【Python】爬取微博头条但无法导入数据库

【Python】爬取微博头条但无法导入数据库

# mimosa

# 2021/7/19

# utf-8

import requests

from bs4 import BeautifulSoup

import pymysql

news_title = []

news_url = []

num = (-1)

main_url = 'https://s.weibo.com'

header = {

'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36'

} # 请求头

# 抓取数据

def get_url(url):

url = requests.get(url, headers=header)

url.encoding = 'utf-8'

return url

# 解析网页

def sp_url(urls):

soup = BeautifulSoup(urls.text, 'lxml')

data = soup.select('#pl_top_realtimehot > table > tbody > tr > td.td-02 > a')

return soup, data

target_url = 'https://s.weibo.com/top/summary/'

target_urls = get_url(target_url)

target_soup, target_data = sp_url(target_urls)

for item in target_data:

result_title = {item.get_text(), }

result_url = {main_url + item.get('href'), }

num = num + 1

news_title.append(1)

news_title[num] = result_title

news_url.append(1)

news_url[num] = result_url

# print(news_title, news_url)

db = pymysql.Connect(host="localhost", user="root", password="root", database="weibo", autocommit=True) # 打开数据库链接

cursor = db.cursor() # 获取操作游标

sql = "insert into HOT(title, href)""values(%s, %s)" # 出入一条记录

sql_data = (news_title, news_url)

try:

cursor.execute(sql, sql_data)

db.commit()

except:

db.rollback()

db.close() # 关闭数据库链接

数据库名字为’weibo’表名为’hot’,代码运行以后使用select*from hot命令表依然是空的。


回答:

大概率db插入报错rollback了, try...except...把错误输出来, 隐藏错误是个不太好的习惯, 导致不容易发现问题.

以上是 【Python】爬取微博头条但无法导入数据库 的全部内容, 来源链接: utcz.com/p/938036.html

回到顶部