Python爬虫关于网易云音乐的评论加密方式的报错？

Z时代
2024-01-10
分类：技术分享

最近正在学习抓取网易云歌曲的评论，遇到了一点问题，想求助一下各位网友。

以这首歌为例子，很显然网易云音乐的ajax评论是加密的。
很明显网易云的ajax是要加密的

通过一些查看前辈们的代码，我大致模仿了一下。可是遇到了一些问题。由于segmentfault的pre标签限制了长度，代码直接贴出来可读性不好，所以我分块贴一下。环境是python3，使用python2可能会出现字符编码问题。

抓取的网易云音乐的评论

代码

encrypt1.py，用于处理加密信息的模块

from Crypto.Cipher import AES
import codecs
import os
import base64
import json
import requests
def aesEncrypt(text, secKey):
    pad = 16 - len(text) % 16
    text = text + pad * chr(pad)
    encryptor = AES.new(secKey, 2, '0102030405060708')
    ciphertext = encryptor.encrypt(text)
    ciphertext = base64.b64encode(ciphertext)
    return str(ciphertext)
def rsaEncrypt(text, pubKey, modulus):
    text = text[::-1]
    rs = int(codecs.encode(text.encode('utf-8'), 'hex'), 16)**int(pubKey, 16) % int(modulus, 16)
    return format(rs, 'x').zfill(256)
def createSecretKey(size):    return (''.join(map(lambda xx: (hex(xx)[2:]), os.urandom(size))))[0:16]

encrypt2.py，用于处理加密信息的模块2，就是结合上面的三个方法总处理程序，问题就是处在了这里。print(len(req.json())的输出结果是0。注释:要填写username和password程序才能运行

from encrypt1 import *
import json
import requests
def readEver(songId):
    url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_' + str(songId) + '/?csrf_token='  # commment by ajax
    headers = {'Cookie': 'appver=1.5.0.75771;', 'Referer': 'http://music.163.com/'}
    text = {'username': '', 'password': '', 'rememberLogin': 'true'}
    text = json.dumps(text)
    nonce = '0CoJUm6Qyw8W8jud'
    secKey = createSecretKey(16)
    encText = aesEncrypt(aesEncrypt(text, nonce), secKey)  # params
    pubKey = '010001'
    modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
    encSecKey = rsaEncrypt(secKey, pubKey, modulus)  # enSecKey
    data = {'params': encText, 'encSecKey': encSecKey}
    req = requests.post(url, headers=headers, data=data)
    print(len(req.json()) # 输出结果是0
    total = req.json()['total']
    if int(total) > 10000:
        print(songId, total)
    else:
        pass

爬虫主程序

from bs4 import BeautifulSoup
from pprint import pprint
from encrypt1 import *
from encrypt2 import *
import requests
Default_Header = {
                  'Referer':'http://music.163.com/',
                  'Host':'music.163.com',
                  'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Iceweasel/38.3.0',
                  'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                  'Accept-Encoding':'gzip, deflate'}
BASE_URL = 'http://music.163.com'
_session = requests.session()
_session.headers.update(Default_Header)
def getPage(pageIndex):
    pageUrl = 'http://music.163.com/discover/playlist/?order=hot&cat=全部&limit=35&offset=' + pageIndex
    soup = BeautifulSoup(_session.get(pageUrl).content)
    songList = soup.findAll('a', attrs={'class':'tit f-thide s-fc0'})
    for i in songList:
        print(i['href'])
        getPlayList(i['href'])
def getPlayList(playListId):
    playListUrl = BASE_URL + playListId
    soup = BeautifulSoup(_session.get(playListUrl).content)
    songList = soup.find('ul', attrs={'class': 'f-hide'})
    for i in songList.findAll('li'):
        startIndex = (i.find('a'))['href']
        songId = startIndex.split('=')[1]
        readEver(songId)
if __name__=='__main__':
    for i in range(1,43):        getPage(str(i*35))

报错信息

正如前面所言，出错的原因是req.json返回的长度为0，所以req.json()['total']会报错。

Traceback (most recent call last):
  File "wangyi.py", line 40, in <module>
    getPage(str(i*35))
  File "wangyi.py", line 24, in getPage
    getPlayList(i['href'])
  File "wangyi.py", line 33, in getPlayList
    readEver(songId)
  File "/home/testenv/encrypt.py", line 40, in readEver
    total = req.json()['total']
  File "/home/testenv/.local/lib/python3.5/site-packages/requests/models.py", line 850, in json
    return complexjson.loads(self.text, **kwargs)
  File "/usr/lib/python3.5/json/__init__.py", line 319, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python3.5/json/decoder.py", line 339, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/lib/python3.5/json/decoder.py", line 357, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from Nonejson.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

问题描述

很明显应该是加密程序出现了问题，请问各位网友我应该怎么处理这个问题？

回答：

我之前写过一个爬评论的爬虫，貌似没有什么加密啊，难道是网易的策略改了？你可以参考一下我写的，地址在这

回答：

如果加密，还是只想用python通过请求来做，那么就必须分析它的JS加密库，既然前台可以显示，肯定前台就做了解密。

不想那么麻烦，那么可以利用浏览器加载之后在获取内容，这样不更方便么。
可以使用selenium来达到获取评论的目的

以上是 Python爬虫关于网易云音乐的评论加密方式的报错？的全部内容，来源链接： utcz.com/a/165298.html

Python爬虫关于网易云音乐的评论加密方式的报错？

代码

报错信息

问题描述

回答：

回答：

其他人也看了：