关于node.js爬取Vue页面某数据爬取不到的问题?
爬取的目标页面:https://music.gala.com/artists/jaq
想要获取的数据:Total Listens
我的代码:
const puppeteer = require('puppeteer');const cheerio = require('cheerio');
const info = [];
const hrefLine = [];
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto('https://music.gala.com/discover?page=artists', { waitUntil: 'networkidle0', timeout: 60000 });
const html = await page.content();
const $ = cheerio.load(html);
const artists = $('.artist-card');
for (const artist of artists) {
const name = $(artist).find('.artist-card__artist').text().trim();
const _name = name.replace(/[.\s!&]+/g, '-').toLowerCase();
const href = `https://music.gala.com/artists/${_name}`;
hrefLine.push(href);
}
await browser.close();
await crawl(hrefLine);
console.log(info);
})();
async function crawl(hrefLine) {
const browser = await puppeteer.launch({args: ['--no-sandbox']});
const page = await browser.newPage();
for (let i = 0; i < hrefLine.length; i++) {
const url = hrefLine[i];
console.log(`Crawling ${url}...`);
try {
await page.goto(url, { waitUntil: 'networkidle0' });
const html = await page.content();
const $ = cheerio.load(html);
const name1 = $('.self-start').eq(0).text();
const collectorsDiv = $('div').filter(function() {
return $(this).text().includes('Collectors');
});
const Total_Listens = $('div.leading-10 span').text();
const level = $('#__nuxt div.hero div.capitalize').text();
const Collectors = collectorsDiv.eq(11).next('div').text();
console.log(`${level}艺术家:${name1}的歌曲总共被听了${Total_Listens}次,已被${Collectors}人收藏`);
const per_art = {
name1,
level,
Total_Listens,
Collectors
};
info.push(per_art);
console.log(per_art);
} catch (error) {
console.error(`Failed to crawl ${url}: ${error}`);
}
}
await browser.close();
}
其他数据都可以爬取到,就这个Total Listens怎么爬都是空字符串,不知道是否由于双向绑定或者是虚拟dom的问题,请各位大佬赐教,十分感谢!
回答:
SPA 渲染需要时间,所以不能只管顺序读写,要在每一步不断检查标志物,直到其出现,再往后执行。
以上是 关于node.js爬取Vue页面某数据爬取不到的问题? 的全部内容, 来源链接: utcz.com/p/934006.html