【Python】爬取微博头条但无法导入数据库

Z时代
2024-03-06
分类：IT

# mimosa
# 2021/7/19
# utf-8
import requests
from bs4 import BeautifulSoup
import pymysql
news_title = []
news_url = []
num = (-1)
main_url = 'https://s.weibo.com'
header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36'
}  # 请求头
# 抓取数据
def get_url(url):
    url = requests.get(url, headers=header)
    url.encoding = 'utf-8'
    return url
# 解析网页
def sp_url(urls):
    soup = BeautifulSoup(urls.text, 'lxml')
    data = soup.select('#pl_top_realtimehot > table > tbody > tr > td.td-02 > a')
    return soup, data
target_url = 'https://s.weibo.com/top/summary/'
target_urls = get_url(target_url)
target_soup, target_data = sp_url(target_urls)
for item in target_data:
    result_title = {item.get_text(), }
    result_url = {main_url + item.get('href'), }
    num = num + 1
    news_title.append(1)
    news_title[num] = result_title
    news_url.append(1)
    news_url[num] = result_url
    # print(news_title, news_url)
db = pymysql.Connect(host="localhost", user="root", password="root", database="weibo", autocommit=True)  # 打开数据库链接
cursor = db.cursor()  # 获取操作游标
sql = "insert into HOT(title, href)""values(%s, %s)"  # 出入一条记录
sql_data = (news_title, news_url)
try:
    cursor.execute(sql, sql_data)
    db.commit()
except:
    db.rollback()
db.close()  # 关闭数据库链接