【Python】爬取微博头条但无法导入数据库
# mimosa# 2021/7/19
# utf-8
import requests
from bs4 import BeautifulSoup
import pymysql
news_title = []
news_url = []
num = (-1)
main_url = 'https://s.weibo.com'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36'
} # 请求头
# 抓取数据
def get_url(url):
url = requests.get(url, headers=header)
url.encoding = 'utf-8'
return url
# 解析网页
def sp_url(urls):
soup = BeautifulSoup(urls.text, 'lxml')
data = soup.select('#pl_top_realtimehot > table > tbody > tr > td.td-02 > a')
return soup, data
target_url = 'https://s.weibo.com/top/summary/'
target_urls = get_url(target_url)
target_soup, target_data = sp_url(target_urls)
for item in target_data:
result_title = {item.get_text(), }
result_url = {main_url + item.get('href'), }
num = num + 1
news_title.append(1)
news_title[num] = result_title
news_url.append(1)
news_url[num] = result_url
# print(news_title, news_url)
db = pymysql.Connect(host="localhost", user="root", password="root", database="weibo", autocommit=True) # 打开数据库链接
cursor = db.cursor() # 获取操作游标
sql = "insert into HOT(title, href)""values(%s, %s)" # 出入一条记录
sql_data = (news_title, news_url)
try:
cursor.execute(sql, sql_data)
db.commit()
except:
db.rollback()
db.close() # 关闭数据库链接
数据库名字为’weibo’表名为’hot’,代码运行以后使用select*from hot命令表依然是空的。
回答:
大概率db插入报错rollback了, try...except...把错误输出来, 隐藏错误是个不太好的习惯, 导致不容易发现问题.
以上是 【Python】爬取微博头条但无法导入数据库 的全部内容, 来源链接: utcz.com/p/938036.html