python 爬虫问题求教望解答疑惑

python 爬虫问题求教望解答疑惑

import requests

from bs4 import BeautifulSoup

def html(url):

resp=requests.get(url)

soup=BeautifulSoup(resp.text,'html.parser')

code_tag=soup.new_tag("code")

pres=soup.find_all("pre")

for s in pres:

s.string.wrap(code_tag)

print(pres)

html('https://www.cnblogs.com/wind666/p/11992837.html')

返回结果

[<pre></pre>, <pre></pre>, <pre></pre>, <pre><code># 想得到1-10的平方组成的list

list_1_10 = [x**2 for x in range(1,11)]

print(list_1_10)# 想得到1-10中为偶数的平方组成的list

example = [i**2 for i in range(1,11) if i%2 == 0 ]

print(example)# 想得到多重嵌套中的数是2的倍数的平方组成的list

example2 = [[1,2,3],[4,5,6],[7,8,9],[10]]

example3 = [j**2 for i in example2 for j in i if j%2 == 0]

print(example3)# 想得到多重嵌套的list中一重嵌套中list长度大于1的list中的数为2的倍数的平方组成的list

example4 = [[1,2,3],[4,5,6],[7,8,9],[10]]

exmaple5 = [j**2 for i in example2 if len(i)&gt;1 for j in i if j%2 == 0]

print(exmaple5)</code></pre>]

请问大神为何前几个pre标签都没添加code标签只有最后一个添加了code标签,前几个pre标签内字符串被删除了,明明for循环遍历操作了,希望大神能够解答,谢谢!


回答:

def html(url):

resp = requests.get(url)

soup = BeautifulSoup(resp.text, 'html.parser')

# code_tag = soup.new_tag("code") 问题在这

pres = soup.find_all("pre")

for s in pres:

s.string.wrap(soup.new_tag("code"))

print(repr(pres))


分析:

s.string的类型为 bs4.element.NavigableString

def wrap(self, wrap_inside):

"""

Wrap this PageElement inside another one.

:param wrap_inside: A PageElement.

:return: `wrap_inside`, occupying the position in the tree that used to be occupied by `self`, and with `self` inside it.

"""

me = self.replace_with(wrap_inside)

wrap_inside.append(me)

return wrap_inside

当执行完wrap操作,code_tag已被修改。

以上是 python 爬虫问题求教望解答疑惑 的全部内容, 来源链接: utcz.com/p/937879.html

回到顶部