Python爬取惠农网苹果数据,看看新鲜的水果价格如何[Python基础]

python

前言

本文的文字及图片来源于网络,仅供学习、交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理

本次目标

爬取惠农网信息

受害者地址

https://www.cnhnb.com/

 

环境

Python3.6

pycharm

爬虫代码

导入工具

import requests

import parsel

import csv

import time

请求头

headers = {

"Cookie": "deviceIdRenew=1; Hm_lvt_91cf34f62b9bedb16460ca36cf192f4c=1604579356,1604659451; deviceId=d1dd5b9-d191-406b-971d-391916a0e; sessionId=S_0KH64T2IHLHSO77N; lmvid=b24dcd0ad2a8f0b783f248c7ff2675a8; lmvid.sig=w1UBnTUKSDq-GfAlx6TyR_K7SjyujGIlF-1kRjTrOAI; hnUserTicket=b80e6b3a-38a3-432c-816d-aeb0376228bd; hnUserId=870048250; Hm_lpvt_91cf34f62b9bedb16460ca36cf192f4c=1604659904",

"Host": "www.cnhnb.com",

"Referer": "https://www.cnhnb.com/supply/pingguo/",

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",

}

解析网站、爬取数据

def get_page_url(page_url, page_id):

response_2 = requests.get(url=page_url, headers=headers)

selector_2 = parsel.Selector(response_2.text)

num_id = selector_2.css("#__layout > div > div > div > div > ul > li:nth-child(1) > a::attr(href)").get().split("/")[2]

title = selector_2.css(".proinfo-title::text").get().strip() # 标题

update_time = selector_2.css(".update-time::text").get().strip("更新时间:") # 更新时间

price = selector_2.css(".priceTxt .orange .fs30::text").get().strip() + ""# 价格

ads = selector_2.css("div:nth-child(5) > span.fs14.gray6::text").get() # 发货地址

get_phone_url = "https://gateway.cnhnb.com/banana/im/operate/wechatcall"

data = {

"businessType": "1",

"sourceFrom": "2",

"ticket": ""b80e6b3a-38a3-432c-816d-aeb0376228bd"",

"userId": "{}".format(num_id),

}

head = {

"authority": "gateway.cnhnb.com",

"method": "POST",

"path": "/banana/im/operate/wechatcall",

"scheme": "https",

"accept": "application/json, text/plain, */*",

"accept-encoding": "gzip, deflate, br",

"accept-language": "zh-CN,zh;q=0.9",

"cache-control": "no-cache",

"content-length": "98",

"content-type": "application/json",

"origin": "https://www.cnhnb.com",

"pragma": "no-cache",

"referer": "https://www.cnhnb.com/gongying/{}/".format(page_id),

# "Cookie": "deviceIdRenew=1; Hm_lvt_91cf34f62b9bedb16460ca36cf192f4c=1604579356,1604659451; deviceId=d1dd5b9-d191-406b-971d-391916a0e; sessionId=S_0KH64T2IHLHSO77N; lmvid=b24dcd0ad2a8f0b783f248c7ff2675a8; lmvid.sig=w1UBnTUKSDq-GfAlx6TyR_K7SjyujGIlF-1kRjTrOAI; hnUserTicket=b80e6b3a-38a3-432c-816d-aeb0376228bd; hnUserId=870048250; Hm_lpvt_91cf34f62b9bedb16460ca36cf192f4c=1604659904",

"sec-fetch-dest": "empty",

"sec-fetch-mode": "cors",

"sec-fetch-site": "same-site",

"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",

"x-b3-traceid": "0KH64WG5WL1GXPNG",

"x-client-appid": "5",

"x-client-id": "c10e4e9a-5e19-4ba2-a934-c8c5c56680f5",

"x-client-nonce": "62f080cd-ad30-4590-b362-b1c9e660a8d5",

"x-client-page": "/gongying/{}/".format(page_id),

"x-client-sid": "S_0KH64W0GT18JX07L",

"x-client-ticket": "b80e6b3a-38a3-432c-816d-aeb0376228bd",

"x-client-time": "1604659611092",

"x-hn-job": "If you see these message, I hope you dont hack us, I hope you can join us! Please visit https://www.cnhnkj.com/job.html",

}

response_3 = requests.post(url=get_phone_url, json=data, headers=head)

html_data = response_3.json()

if html_data["errorCode"] == 0:

dit = {

"标题": title,

"更新时间": update_time,

"价格": price,

"发货地址": ads,

"联系方式": html_data["data"]["messageTitle"]

}

csv_writer.writerow(dit)

print(dit)

else:

dit = {

"标题": title,

"更新时间": update_time,

"价格": price,

"发货地址": ads,

"联系方式": "商家设置防打扰"

}

csv_writer.writerow(dit)

print(dit)

保存数据

f = open("惠农网信息.csv", mode="a", encoding="utf-8-sig", newline="")

csv_writer = csv.DictWriter(f, fieldnames=["标题", "更新时间", "价格", "发货地址", "联系方式"])

csv_writer.writeheader()

效果图

 

以上是 Python爬取惠农网苹果数据,看看新鲜的水果价格如何[Python基础] 的全部内容, 来源链接: utcz.com/z/530248.html

回到顶部