关于node.js爬取Vue页面某数据爬取不到的问题?

爬取的目标页面:https://music.gala.com/artists/jaq

想要获取的数据:Total Listens

我的代码:

const puppeteer = require('puppeteer');

const cheerio = require('cheerio');

const info = [];

const hrefLine = [];

(async () => {

const browser = await puppeteer.launch({ headless: true });

const page = await browser.newPage();

await page.goto('https://music.gala.com/discover?page=artists', { waitUntil: 'networkidle0', timeout: 60000 });

const html = await page.content();

const $ = cheerio.load(html);

const artists = $('.artist-card');

for (const artist of artists) {

const name = $(artist).find('.artist-card__artist').text().trim();

const _name = name.replace(/[.\s!&]+/g, '-').toLowerCase();

const href = `https://music.gala.com/artists/${_name}`;

hrefLine.push(href);

}

await browser.close();

await crawl(hrefLine);

console.log(info);

})();

async function crawl(hrefLine) {

const browser = await puppeteer.launch({args: ['--no-sandbox']});

const page = await browser.newPage();

for (let i = 0; i < hrefLine.length; i++) {

const url = hrefLine[i];

console.log(`Crawling ${url}...`);

try {

await page.goto(url, { waitUntil: 'networkidle0' });

const html = await page.content();

const $ = cheerio.load(html);

const name1 = $('.self-start').eq(0).text();

const collectorsDiv = $('div').filter(function() {

return $(this).text().includes('Collectors');

});

const Total_Listens = $('div.leading-10 span').text();

const level = $('#__nuxt div.hero div.capitalize').text();

const Collectors = collectorsDiv.eq(11).next('div').text();

console.log(`${level}艺术家:${name1}的歌曲总共被听了${Total_Listens}次,已被${Collectors}人收藏`);

const per_art = {

name1,

level,

Total_Listens,

Collectors

};

info.push(per_art);

console.log(per_art);

} catch (error) {

console.error(`Failed to crawl ${url}: ${error}`);

}

}

await browser.close();

}

其他数据都可以爬取到,就这个Total Listens怎么爬都是空字符串,不知道是否由于双向绑定或者是虚拟dom的问题,请各位大佬赐教,十分感谢!


回答:

SPA 渲染需要时间,所以不能只管顺序读写,要在每一步不断检查标志物,直到其出现,再往后执行。

以上是 关于node.js爬取Vue页面某数据爬取不到的问题? 的全部内容, 来源链接: utcz.com/p/934006.html

回到顶部