关于node.js爬取Vue页面某数据爬取不到的问题？

Z时代
2024-02-18
分类：IT

爬取的目标页面：https://music.gala.com/artists/jaq

想要获取的数据：Total Listens

我的代码：

const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const info = [];
const hrefLine = [];
(async () => {
  const browser = await puppeteer.launch({ headless: true });
  const page = await browser.newPage();
  await page.goto('https://music.gala.com/discover?page=artists', { waitUntil: 'networkidle0', timeout: 60000 });
  const html = await page.content();
  const $ = cheerio.load(html);
  const artists = $('.artist-card');
  for (const artist of artists) {
    const name = $(artist).find('.artist-card__artist').text().trim();
    const _name = name.replace(/[.\s!&]+/g, '-').toLowerCase();
    const href = `https://music.gala.com/artists/${_name}`;
    hrefLine.push(href);
  }
  await browser.close();
  await crawl(hrefLine);
  console.log(info);
})();
async function crawl(hrefLine) {
  const browser = await puppeteer.launch({args: ['--no-sandbox']});
  const page = await browser.newPage();
  for (let i = 0; i < hrefLine.length; i++) {
    const url = hrefLine[i];
    console.log(`Crawling ${url}...`);
    try {
      await page.goto(url, { waitUntil: 'networkidle0' });
      const html = await page.content();
      const $ = cheerio.load(html);
      const name1 = $('.self-start').eq(0).text();
      const collectorsDiv = $('div').filter(function() {
        return $(this).text().includes('Collectors');
      });
      const Total_Listens = $('div.leading-10 span').text();
      const level = $('#__nuxt div.hero div.capitalize').text();
      const Collectors = collectorsDiv.eq(11).next('div').text();
      console.log(`${level}艺术家：${name1}的歌曲总共被听了${Total_Listens}次，已被${Collectors}人收藏`);
      const per_art = {
        name1,
        level,
        Total_Listens,
        Collectors
      };
      info.push(per_art);
      console.log(per_art);
    } catch (error) {
      console.error(`Failed to crawl ${url}: ${error}`);
    }
  }
  await browser.close();}

其他数据都可以爬取到，就这个Total Listens怎么爬都是空字符串，不知道是否由于双向绑定或者是虚拟dom的问题，请各位大佬赐教，十分感谢！

回答：

SPA 渲染需要时间，所以不能只管顺序读写，要在每一步不断检查标志物，直到其出现，再往后执行。

以上是关于node.js爬取Vue页面某数据爬取不到的问题？的全部内容，来源链接： utcz.com/p/934006.html

关于node.js爬取Vue页面某数据爬取不到的问题？

回答：

其他人也看了：