用BS4将信息写入文件时遇到的问题

代码如下:

import requests

import codecs

from bs4 import BeautifulSoup

i = 0

fp1 = codecs.open('D:/Program Files/python/abcd11.txt', 'w', 'utf-8')

answer_soup = BeautifulSoup(open('D:/Program Files/python/abcd2.txt'))

author_tag = answer_soup.find_all("h3", class_ = "zm-item-answer-author-wrap")

print type(author_tag)

for s in author_tag:

fp1.write(s.text)

print s.text

print i

i = i + 1

fp1.close

运行结果如下:
图片描述

为什么都能print出来,但却不能写入到文件中呢?

打开文件的空白的。

我自己实在是不知道是为什么了,在另一段代码中是可行的的啊。(第一段的写入文件是正常的,第二段写入就是空白的了)

--coding:utf-8--

import requests

import codecs

import ConfigParser

import json

from bs4 import BeautifulSoup

global session

global cookies

i = 0

url = 'http://www.zhihu.com/question/20899988'

fp = codecs.open('D:/Program Files/python/abcd.txt', 'w', 'utf-8')

fp1 = codecs.open('D:/Program Files/python/abcd1.txt', 'w', 'utf-8')

fp2 = codecs.open('D:/Program Files/python/abcd2.txt', 'w', 'utf-8')

cf = ConfigParser.ConfigParser()

cf.read("D:/Program Files/python/config.ini")

cookies = cf._sections["cookies"]

email = cf.get("info", "email")

password = cf.get("info", "password")

cookies = dict(cookies)

print cookies

s = requests.session()

login_data = {"email": email, "password": password}

header = {

'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",

'Host': "www.zhihu.com",

'Referer': "http://www.zhihu.com/",

'X-Requested-With': "XMLHttpRequest"

}

r = s.post('http://www.zhihu.com/login', data = login_data, headers = header)

if r.json()["r"] == 1:

print "Login Failed, reason is:"

for m in r.json()["msg"]:

print r.json()["msg"][m]

print "Use cookies"

has_cookies = False

for key in cookies:

if key != 'name' and cookies[key] != '':

has_cookies = True

break

if has_cookies == False:

raise ValueError("请填写config.ini文件中的cookies项.")

session = s

print r.text

soup = BeautifulSoup(r.content)

if session == None:

create_session()

s2 = session

has_cookies = False

for key in cookies:

if key != 'name' and cookies[key] != '':

has_cookies = True

r2 = s2.get(url,cookies = cookies)

break

if has_cookies == False:

r2 = s2.get(url)

# print "aaaaaaaaaaaaaaaaaaaa"

soup2 = BeautifulSoup(r2.content)

author_tag = soup2.find_all("h3", class_ = "zm-item-answer-author-wrap")

for ss in author_tag:

fp.write(ss.text)

print ss.text

print type(ss.text)

i = i + 1

s3 = session

post_url = "http://www.zhihu.com/node/QuestionAnswerListV2"

_xsrf = soup2.find("input", attrs = {'name': '_xsrf'})["value"]

i = 1

offset = i * 50

params = json.dumps({"url_token":int(url[-8:-1] + url[-1]), "pagesize":50, "offset": offset})

data = {

'_xsrf': _xsrf,

'method': "next",

'params': params

}

header = {

'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",

'Host': "www.zhihu.com",

'Referer': url

}

has_cookies = False

for key in cookies:

if key != 'name' and cookies[key] != '':

has_cookies = True

r3 = s3.post(post_url, data = data, headers = header, cookies = cookies)

break

if has_cookies == False:

r3 = s3.post(post_url, data = data, headers = header)

answer_list = r3.json()["msg"]

for j in range(7):

fp2.write(answer_list[j])

soup = BeautifulSoup(soup.encode("utf-8"))

print j

answer_soup = BeautifulSoup(open('D:/Program Files/python/abcd2.txt'))

i = 0

author_tag = answer_soup.find_all("h3", class_ = "zm-item-answer-author-wrap")

for ss in author_tag:

fp1.write(ss.text)

print ss.text

print i

i = i + 1

fp.close

fp1.close

fp2.close

小白第一次提问,python爬虫也是感兴趣刚学……

有问题大家尽管批评,我改……

以上是 用BS4将信息写入文件时遇到的问题 的全部内容, 来源链接: utcz.com/a/161526.html

回到顶部