Python解析生成XML-ElementTree VS minidom

python

OS:Windows 7

关键字:Python3.4,XML,ElementTree,minidom

本文介绍用Python解析生成以下XML:

<Persons>

<Person>

<Name>LDL</Name>

<Description Language='English'><![CDATA[cdata text]]></Description>

</Person>

<Person>

<Name>China</Name>

<Description Language='English'><![CDATA[cdata text]]></Description>

</Person>

</Persons>

1.创建一个xml文件名为src.xml,内容如上,放到c:\temp

2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。

ElementTreeSample.py如下:

# -*- coding: utf-8 -*-

"""

Sample of xml.etree.ElementTree

@author: ldlchina

"""

import os

import sys

import logging

import traceback

import xml.etree.ElementTree as ET

import time

def copy_node(src_node, target_node):

# Copy attr

for key in src_node.keys():

target_node.set(key, src_node.get(key))

if len(list(src_node)) > 0:

for child in src_node:

target_child = ET.Element(child.tag)

target_node.append(target_child)

copy_node(child, target_child)

else:

target_node.text = src_node.text

def read_write_xml(src, target):

tree = ET.parse(src)

root = tree.getroot()

target_root = ET.Element(root.tag)

start_time = time.time() * 1000

copy_node(root, target_root)

end_time = time.time() * 1000

print('copy_node:' + str(end_time - start_time))

target_tree = ET.ElementTree(target_root)

target_tree.write(target)

logging.info(target)

def main():

try:

current_file = os.path.realpath(__file__)

# Configure logger

log_file = current_file.replace('.py', '.log')

logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)

# Create console handler

ch = logging.StreamHandler()

ch.setLevel(logging.INFO)

logger = logging.getLogger('')

logger.addHandler(ch)

#src = sys.argv[1]

#target = sys.argv[2]

# For debugging

src = 'C:/temp/src.xml'

target = 'C:/temp/target-tree.xml'

# Generate results

start_time = time.time() * 1000

read_write_xml(src, target)

end_time = time.time() * 1000

print('read_write_xml:' + str(end_time - start_time))

except:

logging.exception(''.format(traceback.format_exc()))

input('Press any key to exit...')

main()

 3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。

MinidomSample.py如下:

# -*- coding: utf-8 -*-

"""

Sample of xml.dom.minidom

@author: ldlchina

"""

import os

import sys

import logging

import traceback

import xml.dom.minidom as MD

import time

def get_text(n):

nodelist = n.childNodes

rc = ""

for node in nodelist:

if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:

rc = rc + node.data

return rc

def copy_node(target_doc, src_node, target_node):

if not isinstance(src_node, MD.Document) and src_node.hasAttributes():

for item in src_node.attributes.items():

target_node.setAttribute(item[0], item[1])

for node in src_node.childNodes:

if node.nodeType == node.TEXT_NODE:

target_child = target_doc.createTextNode(node.nodeValue)

target_node.appendChild(target_child)

elif node.nodeType == node.CDATA_SECTION_NODE:

target_child = target_doc.createCDATASection(node.nodeValue)

target_node.appendChild(target_child)

elif node.nodeType == node.ELEMENT_NODE:

target_child = target_doc.createElement(node.nodeName)

target_node.appendChild(target_child)

copy_node(target_doc, node, target_child)

def read_write_xml(src, target):

doc = MD.parse(src)

target_doc = MD.Document()

start_time = time.time() * 1000

copy_node(target_doc, doc, target_doc)

end_time = time.time() * 1000

print('copy_node: ' + str(end_time - start_time))

# Write to file

f = open(target, 'w')

f.write(target_doc.documentElement.toxml())

f.close()

logging.info(target)

def main():

try:

current_file = os.path.realpath(__file__)

# Configure logger

log_file = current_file.replace('.py', '.log')

logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)

# Create console handler

ch = logging.StreamHandler()

ch.setLevel(logging.INFO)

logger = logging.getLogger('')

logger.addHandler(ch)

#src = sys.argv[1]

#target = sys.argv[2]

# For debugging

src = 'C:/temp/src.xml'

target = 'C:/temp/target-dom.xml'

# Generate results

start_time = time.time() * 1000

read_write_xml(src, target)

end_time = time.time() * 1000

print('read_write_xml: ' + str(end_time - start_time))

except:

logging.exception(''.format(traceback.format_exc()))

input('Press any key to exit...')

main()

4.运行ElementTreeSample.py,得到XML如下:

<Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>

5.运行MinidomSample.py,得到XML如下:

<Persons>

<Person>

<Name>LDL</Name>

<Description Language="English"><![CDATA[cdata text]]></Description>

</Person>

<Person>

<Name>China</Name>

<Description Language="Chinese"><![CDATA[cdata text]]></Description>

</Person>

</Persons>

ElementTree VS minidom:

1.ElementTree执行速度会比minidom快一些。

2.ElemenTree不能分析XML的转行和缩进。minidom可以。

3.ElemenTree不支持CDATA,minidom可以。

以上是 Python解析生成XML-ElementTree VS minidom 的全部内容, 来源链接: utcz.com/z/389261.html

回到顶部