python爬取”顶点小说网“《纯阳剑尊》的示例代码

爬取”顶点小说网“《纯阳剑尊》

代码

import requests

from bs4 import BeautifulSoup

# 反爬

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, \

like Gecko) Chrome/70.0.3538.102 Safari/537.36'

}

# 获得请求

def open_url(url):

response = requests.get(url, headers=headers)

response.encoding = response.apparent_encoding

html = response.text

return html

# 提取标题

def get_title(url):

soup = BeautifulSoup(url, 'lxml')

title_tag = soup.find('dd')

title = '\n' + title_tag.h1.get_text() + '\n'

return title

# 提取文本

def get_texts(url):

soup2 = BeautifulSoup(url, 'lxml')

text_tags = soup2.find_all('dd', id="contents")

return text_tags

# 保存标题

def save_title(filename, title):

with open(filename, 'a+', encoding='utf-8') as file:

file.write(title)

# 保存文本

def save_text(filename, text):

with open(filename, 'a+', encoding='utf-8') as file:

file.write(text)

# 主程序函数

def main():

num = input('《纯阳剑尊》你想要下载第几章?(1-802)')

num = int(num)

number = 8184027 + num

url = 'https://www.23us.so/files/article/html/15/15905/' + str(number) + '.html'

filename = '纯阳剑尊.txt'

r = open_url(url)

title = get_title(r)

tags = get_texts(r)

save_title(filename, title)

for text_tag in tags:

text = text_tag.get_text() + '\n'

save_text(filename, text)

print('第{}章已经下载完成!'.format(num))

if __name__ == '__main__':

main()

爬取结果:

以上是 python爬取”顶点小说网“《纯阳剑尊》的示例代码 的全部内容, 来源链接: utcz.com/z/358478.html

回到顶部