Python爬取惠农网苹果数据,看看新鲜的水果价格如何[Python基础]
前言
本文的文字及图片来源于网络,仅供学习、交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理
本次目标
爬取惠农网信息
受害者地址
https://www.cnhnb.com/
环境
Python3.6
pycharm
爬虫代码
导入工具
import requestsimport parselimport csvimport time
请求头
headers = {"Cookie": "deviceIdRenew=1; Hm_lvt_91cf34f62b9bedb16460ca36cf192f4c=1604579356,1604659451; deviceId=d1dd5b9-d191-406b-971d-391916a0e; sessionId=S_0KH64T2IHLHSO77N; lmvid=b24dcd0ad2a8f0b783f248c7ff2675a8; lmvid.sig=w1UBnTUKSDq-GfAlx6TyR_K7SjyujGIlF-1kRjTrOAI; hnUserTicket=b80e6b3a-38a3-432c-816d-aeb0376228bd; hnUserId=870048250; Hm_lpvt_91cf34f62b9bedb16460ca36cf192f4c=1604659904","Host": "www.cnhnb.com","Referer": "https://www.cnhnb.com/supply/pingguo/","User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",}
解析网站、爬取数据
def get_page_url(page_url, page_id):response_2
= requests.get(url=page_url, headers=headers)selector_2
= parsel.Selector(response_2.text)num_id
= selector_2.css("#__layout > div > div > div > div > ul > li:nth-child(1) > a::attr(href)").get().split("/")[2]title
= selector_2.css(".proinfo-title::text").get().strip() # 标题update_time = selector_2.css(".update-time::text").get().strip("更新时间:") # 更新时间
price = selector_2.css(".priceTxt .orange .fs30::text").get().strip() + "元"# 价格
ads = selector_2.css("div:nth-child(5) > span.fs14.gray6::text").get() # 发货地址
get_phone_url = "https://gateway.cnhnb.com/banana/im/operate/wechatcall"
data = {
"businessType": "1",
"sourceFrom": "2",
"ticket": ""b80e6b3a-38a3-432c-816d-aeb0376228bd"",
"userId": "{}".format(num_id),
}
head = {
"authority": "gateway.cnhnb.com",
"method": "POST",
"path": "/banana/im/operate/wechatcall",
"scheme": "https",
"accept": "application/json, text/plain, */*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"content-length": "98",
"content-type": "application/json",
"origin": "https://www.cnhnb.com",
"pragma": "no-cache",
"referer": "https://www.cnhnb.com/gongying/{}/".format(page_id),
# "Cookie": "deviceIdRenew=1; Hm_lvt_91cf34f62b9bedb16460ca36cf192f4c=1604579356,1604659451; deviceId=d1dd5b9-d191-406b-971d-391916a0e; sessionId=S_0KH64T2IHLHSO77N; lmvid=b24dcd0ad2a8f0b783f248c7ff2675a8; lmvid.sig=w1UBnTUKSDq-GfAlx6TyR_K7SjyujGIlF-1kRjTrOAI; hnUserTicket=b80e6b3a-38a3-432c-816d-aeb0376228bd; hnUserId=870048250; Hm_lpvt_91cf34f62b9bedb16460ca36cf192f4c=1604659904",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"x-b3-traceid": "0KH64WG5WL1GXPNG",
"x-client-appid": "5",
"x-client-id": "c10e4e9a-5e19-4ba2-a934-c8c5c56680f5",
"x-client-nonce": "62f080cd-ad30-4590-b362-b1c9e660a8d5",
"x-client-page": "/gongying/{}/".format(page_id),
"x-client-sid": "S_0KH64W0GT18JX07L",
"x-client-ticket": "b80e6b3a-38a3-432c-816d-aeb0376228bd",
"x-client-time": "1604659611092",
"x-hn-job": "If you see these message, I hope you dont hack us, I hope you can join us! Please visit https://www.cnhnkj.com/job.html",
}
response_3 = requests.post(url=get_phone_url, json=data, headers=head)
html_data = response_3.json()
if html_data["errorCode"] == 0:
dit = {
"标题": title,
"更新时间": update_time,
"价格": price,
"发货地址": ads,
"联系方式": html_data["data"]["messageTitle"]
}
csv_writer.writerow(dit)
print(dit)
else:
dit = {
"标题": title,
"更新时间": update_time,
"价格": price,
"发货地址": ads,
"联系方式": "商家设置防打扰"
}
csv_writer.writerow(dit)
print(dit)
保存数据
f = open("惠农网信息.csv", mode="a", encoding="utf-8-sig", newline="")csv_writer
= csv.DictWriter(f, fieldnames=["标题", "更新时间", "价格", "发货地址", "联系方式"])csv_writer.writeheader()
效果图
以上是 Python爬取惠农网苹果数据,看看新鲜的水果价格如何[Python基础] 的全部内容, 来源链接: utcz.com/z/530248.html