python爬取B站原视频的实例代码

python

这篇文章主要介绍了python 爬取B站原视频的实例代码,帮助大家更好的理解和使用python 爬虫,感兴趣的朋友可以了解下

B站原视频爬取,我就不多说直接上代码。直接运行就好。
B站是把视频和音频分开。要把2个合并起来使用。这个需要分析才能看出来。然后就是登陆这块是比较难的。

  1import os

2import re

3import argparse

4import subprocess

5import prettytable

6from DecryptLogin import login

7

8

9"""B站类"""

10class Bilibili():

11def__init__(self, username, password, **kwargs):

12 self.username = username

13 self.password = password

14 self.session = Bilibili.login(username, password)

15 self.headers = {

16"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36"

17}

18 self.user_info_url = "http://api.bilibili.com/x/space/acc/info"

19 self.submit_videos_url = "http://space.bilibili.com/ajax/member/getSubmitVideos"

20 self.view_url = "http://api.bilibili.com/x/web-interface/view"

21 self.video_player_url = "http://api.bilibili.com/x/player/playurl"

22"""运行主程序"""

23def run(self):

24while True:

25 userid = input("请输入目标用户ID(例:345993405)(我的一个LOL好友凯撒可以关注他一下 谢谢) ——> ")

26 user_info = self.__getUserInfo(userid)

27 tb = prettytable.PrettyTable()

28 tb.field_names = list(user_info.keys())

29tb.add_row(list(user_info.values()))

30print("获取的用户信息如下:")

31print(tb)

32 is_download = input("是否下载该用户的所有视频(y/n, 默认: y) ——> ")

33if is_download == "y"or is_download == "yes"ornot is_download:

34 self.__downloadVideos(userid)

35"""根据userid获得该用户基本信息"""

36def__getUserInfo(self, userid):

37 params = {"mid": userid, "jsonp": "jsonp"}

38 res = self.session.get(self.user_info_url, params=params, headers=self.headers)

39 res_json = res.json()

40 user_info = {

41"用户名": res_json["data"]["name"],

42"性别": res_json["data"]["sex"],

43"个性签名": res_json["data"]["sign"],

44"用户等级": res_json["data"]["level"],

45"生日": res_json["data"]["birthday"]

46}

47return user_info

48"""下载目标用户的所有视频"""

49def__downloadVideos(self, userid):

50ifnot os.path.exists(userid):

51os.mkdir(userid)

52# 非会员用户只能下载到高清1080P

53 quality = [("16", "流畅 360P"),

54 ("32", "清晰 480P"),

55 ("64", "高清 720P"),

56 ("74", "高清 720P60"),

57 ("80", "高清 1080P"),

58 ("112", "高清 1080P+"),

59 ("116", "高清 1080P60")][-3]

60# 获得用户的视频基本信息

61 video_info = {"aids": [], "cid_parts": [], "titles": [], "links": [], "down_flags": []}

62 params = {"mid": userid, "pagesize": 30, "tid": 0, "page": 1, "order": "pubdate"}

63while True:

64 res = self.session.get(self.submit_videos_url, headers=self.headers, params=params)

65 res_json = res.json()

66for item in res_json["data"]["vlist"]:

67 video_info["aids"].append(item["aid"])

68if len(video_info["aids"]) < int(res_json["data"]["count"]):

69 params["page"] += 1

70else:

71break

72for aid in video_info["aids"]:

73 params = {"aid": aid}

74 res = self.session.get(self.view_url, headers=self.headers, params=params)

75 cid_part = []

76for page in res.json()["data"]["pages"]:

77 cid_part.append([page["cid"], page["part"]])

78 video_info["cid_parts"].append(cid_part)

79 title = res.json()["data"]["title"]

80 title = re.sub(r"[‘"/:*?"<>|s"]", "", title)

81 video_info["titles"].append(title)

82print("共获取到用户ID<%s>的<%d>个视频..." % (userid, len(video_info["titles"])))

83for idx in range(len(video_info["titles"])):

84 aid = video_info["aids"][idx]

85 cid_part = video_info["cid_parts"][idx]

86 link = []

87 down_flag = False

88for cid, part in cid_part:

89 params = {"avid": aid, "cid": cid, "qn": quality, "otype": "json", "fnver": 0, "fnval": 16}

90 res = self.session.get(self.video_player_url, params=params, headers=self.headers)

91 res_json = res.json()

92if"dash"in res_json["data"]:

93 down_flag = True

94 v, a = res_json["data"]["dash"]["video"][0], res_json["data"]["dash"]["audio"][0]

95 link_v = [v["baseUrl"]]

96 link_a = [a["baseUrl"]]

97if v["backup_url"]:

98for item in v["backup_url"]:

99link_v.append(item)

100if a["backup_url"]:

101for item in a["backup_url"]:

102link_a.append(item)

103 link = [link_v, link_a]

104else:

105 link = [res_json["data"]["durl"][-1]["url"]]

106if res_json["data"]["durl"][-1]["backup_url"]:

107for item in res_json["data"]["durl"][-1]["backup_url"]:

108link.append(item)

109 video_info["links"].append(link)

110 video_info["down_flags"].append(down_flag)

111# 开始下载

112 out_pipe_quiet = subprocess.PIPE

113 out_pipe = None

114 aria2c_path = os.path.join(os.getcwd(), "tools/aria2c")

115 ffmpeg_path = os.path.join(os.getcwd(), "tools/ffmpeg")

116for idx in range(len(video_info["titles"])):

117 title = video_info["titles"][idx]

118 aid = video_info["aids"][idx]

119 down_flag = video_info["down_flags"][idx]

120print("正在下载视频<%s>..." % title)

121if down_flag:

122 link_v, link_a = video_info["links"][idx]

123# --视频

124 url = ""{}"".format("" "".join(link_v))

125 command = "{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}"

126 command = command.format(aria2c_path, len(link_v), userid, title+".flv", aid, "", url)

127print(command)

128 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)

129process.wait()

130# --音频

131 url = ""{}"".format("" "".join(link_a))

132 command = "{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}"

133 command = command.format(aria2c_path, len(link_v), userid, title+".aac", aid, "", url)

134print(command)

135

136 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)

137process.wait()

138# --合并

139 command = "{} -i "{}" -i "{}" -c copy -f mp4 -y "{}""

140 command = command.format(ffmpeg_path, os.path.join(userid, title+".flv"), os.path.join(userid, title+".aac"), os.path.join(userid, title+".mp4"))

141print(command)

142

143 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe_quiet, shell=True)

144process.wait()

145 os.remove(os.path.join(userid, title+".flv"))

146 os.remove(os.path.join(userid, title+".aac"))

147else:

148 link = video_info["links"][idx]

149 url = ""{}"".format("" "".join(link))

150 command = "{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}"

151 command = command.format(aria2c_path, len(link), userid, title+".flv", aid, "", url)

152 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)

153process.wait()

154 os.rename(os.path.join(userid, title+".flv"), os.path.join(userid, title+".mp4"))

155print("所有视频下载完成, 该用户所有视频保存在<%s>文件夹中..." % (userid))

156"""借助大佬开源的库来登录B站"""

157@staticmethod

158def login(username, password):

159 _, session = login.Login().bilibili(username, password)

160return session

161

162

163"""run"""

164if__name__ == "__main__":

165 parser = argparse.ArgumentParser(description="下载B站指定用户的所有视频(仅支持Windows下使用)")

166 parser.add_argument("--username", dest="username", help="xxx", type=str, required=True)

167 parser.add_argument("--password", dest="password", help="xxxx", type=str, required=True)

168print(parser)

169 args = parser.parse_args(["--password", "xxxx","--username", "xxx"])

170# args = parser.parse_args(["--password", "FOO"])

171print("5")

172 bili = Bilibili(args.username, args.password)

173 bili.run()

把账号密码填上就行。以上就是python 爬取B站原视频的实例代码的详细内容

福利:如果你的学习时间不是很紧张,并且又想快速的提搞Python开发技术,Python爬虫,Python大数据分析,人工智能等技术,这里给大家分享一套系统教学资源,加一下我建的Python技术的学习裙;九三七六六七五零九,一起学习。群里有相关开发工具,学习教程!

以上是 python爬取B站原视频的实例代码 的全部内容, 来源链接: utcz.com/z/530878.html

回到顶部