python去除html标签[python高级教程]

python去除html标签[python高级教程]

python去除html标签,自己写的,若有不足请指正:

#! /usr/bin/env python

#coding=utf-8

# blueel 2013-01-19

from HTMLParser import HTMLParser

class MLStripper(HTMLParser):

def __init__(self):

self.reset()

self.fed = []

def handle_data(self, d):

self.fed.append(d)

def get_data(self):

return "".join(self.fed)

def strip_tags(html):

s = MLStripper()

s.feed(html)

return s.get_data()

调用:

html = '<em productIndex="0" class="valor-dividido" style="display:block"><span>ou <span><label productIndex="0" class="skuBestInstallmentNumber">12</label>X</span> de <span> <label productIndex="0" class="skuBestInstallmentValue">R$  116,58</label></span> sem juros</span></em>'


print strip_tags(html)


以上是 python去除html标签[python高级教程] 的全部内容, 来源链接: utcz.com/z/540355.html

回到顶部