此外,如果没有简单的方法可以做到这一点,Python 2.4.4是否支持任何替代序列化格式?
我最近写了一些代码将XML转换为Python数据结构,尽管我确实需要处理属性。我使用了xml.dom.minidom
而不是ElementTree
,出于类似的原因。虽然我没有在Python 2.4.4上测试过,但我认为它会生效。但我没有编写反向XML生成器,不过你可以使用我包含的“lispy_string”函数来完成这个任务。
我还包含了一些针对我所编写应用程序的快捷方式(在docstring中解释),但听起来你也可能会发现这些快捷方式很有用。本质上,一个XML树技术上被翻译成一个字典列表的字典列表,等等。我省略了创建中间列表,除非必要,所以你可以通过dictname[element1][element2]
来引用元素,而不是dictname[element1][0][element2][0]
等。
属性处理有点笨拙,我强烈建议在处理属性之前阅读代码。
import sys
from xml.dom import minidom
def dappend(dictionary, key, item):
"""Append item to dictionary at key. Only create a list if there is more than one item for the given key.
dictionary[key]=item if key doesn't exist.
dictionary[key].append(item) if key exists."""
if key in dictionary.keys():
if not isinstance(dictionary[key], list):
lst=[]
lst.append(dictionary[key])
lst.append(item)
dictionary[key]=lst
else:
dictionary[key].append(item)
else:
dictionary.setdefault(key, item)
def node_attributes(node):
"""Return an attribute dictionary """
if node.hasAttributes():
return dict([(str(attr), str(node.attributes[attr].value)) for attr in node.attributes.keys()])
else:
return None
def attr_str(node):
return "%s-attrs" % str(node.nodeName)
def hasAttributes(node):
if node.nodeType == node.ELEMENT_NODE:
if node.hasAttributes():
return True
return False
def with_attributes(node, values):
if hasAttributes(node):
if isinstance(values, dict):
dappend(values, '#attributes', node_attributes(node))
return { str(node.nodeName): values }
elif isinstance(values, str):
return { str(node.nodeName): values,
attr_str(node): node_attributes(node)}
else:
return { str(node.nodeName): values }
def xmldom2dict(node):
"""Given an xml dom node tree,
return a python dictionary corresponding to the tree structure of the XML.
This parser does not make lists unless they are needed. For example:
'<list><item>1</item><item>2</item></list>' becomes:
{ 'list' : { 'item' : ['1', '2'] } }
BUT
'<list><item>1</item></list>' would be:
{ 'list' : { 'item' : '1' } }
This is a shortcut for a particular problem and probably not a good long-term design.
"""
if not node.hasChildNodes():
if node.nodeType == node.TEXT_NODE:
if node.data.strip() != '':
return str(node.data.strip())
else:
return None
else:
return with_attributes(node, None)
else:
#recursively create the list of child nodes
childlist=[xmldom2dict(child) for child in node.childNodes if (xmldom2dict(child) != None and child.nodeType != child.COMMENT_NODE)]
if len(childlist)==1:
return with_attributes(node, childlist[0])
else:
#if False not in [isinstance(child, dict) for child in childlist]:
new_dict={}
for child in childlist:
if isinstance(child, dict):
for k in child:
dappend(new_dict, k, child[k])
elif isinstance(child, str):
dappend(new_dict, '#text', child)
else:
print "ERROR"
return with_attributes(node, new_dict)
def load(fname):
return xmldom2dict(minidom.parse(fname))
def lispy_string(node, lst=None, level=0):
if lst==None:
lst=[]
if not isinstance(node, dict) and not isinstance(node, list):
lst.append(' "%s"' % node)
elif isinstance(node, dict):
for key in node.keys():
lst.append("\n%s(%s" % (spaces(level), key))
lispy_print(node[key], lst, level+2)
lst.append(")")
elif isinstance(node, list):
lst.append(" [")
for item in node:
lispy_print(item, lst, level)
lst.append("]")
return lst
if __name__=='__main__':
data = minidom.parse(sys.argv[1])
d=xmldom2dict(data)
print d
问题Serialize Python dictionary to XML列举了一些XML序列化的方式。至于替代性的序列化格式,我想pickle
模块是一个不错的工具。
Python中的字典是无序的,记住这一点。我有一段非常基本的代码,它很小,并且不需要任何外部模块。坏处是它不支持任何类型的XML属性,但是你说:
我不担心属性
所以这是代码:
def d2x(d, root="root"):
op = lambda tag: '<' + tag + '>'
cl = lambda tag: '</' + tag + '>\n'
ml = lambda v,xml: xml + op(key) + str(v) + cl(key)
xml = op(root) + '\n' if root else ""
for key,vl in d.iteritems():
vtype = type(vl)
if vtype is list:
for v in vl:
xml = ml(v,xml)
if vtype is dict: xml = ml('\n' + d2x(vl,None),xml)
if vtype is not list and vtype is not dict: xml = ml(vl,xml)
xml += cl(root) if root else ""
return xml
mydict = {
"boolean":False,
"integer":12,
"float":3.1,
"listitems":["item1","item2"],
"string":"Hello world",
"dictionary":{
"key1":1,
"key2":2,
"dictindict":{
"a":"aaa",
"b":"bbb"
}
}
}
print d2x (mydict,"superxml")
<superxml>
<string>Hello world</string>
<dictionary>
<key2>2</key2>
<key1>1</key1>
<dictindict>
<a>aaa</a>
<b>bbb</b>
</dictindict>
</dictionary>
<float>3.1</float>
<listitems>item1</listitems>
<listitems>item2</listitems>
<boolean>False</boolean>
<integer>12</integer>
</superxml>
对于将Python字典序列化为XML,以下Python类适用于我。与其他解决方案相比,它的优点是非常简单,并且可以进行正确的XML编码。该脚本基于此答案。它只有一个扩展:通过将list_mappings
字典传递给构造函数,您可以指定如何命名单个列表项(在下面示例中的children
属性内的child
)。
from xml.dom.minidom import Document
class DictToXML(object):
default_list_item_name = "item"
def __init__(self, structure, list_mappings={}):
self.doc = Document()
if len(structure) == 1:
rootName = str(list(structure.keys())[0])
self.root = self.doc.createElement(rootName)
self.list_mappings = list_mappings
self.doc.appendChild(self.root)
self.build(self.root, structure[rootName])
def build(self, father, structure):
if type(structure) == dict:
for k in structure:
tag = self.doc.createElement(k)
father.appendChild(tag)
self.build(tag, structure[k])
elif type(structure) == list:
tag_name = self.default_list_item_name
if father.tagName in self.list_mappings:
tag_name = self.list_mappings[father.tagName]
for l in structure:
tag = self.doc.createElement(tag_name)
self.build(tag, l)
father.appendChild(tag)
else:
data = str(structure)
tag = self.doc.createTextNode(data)
father.appendChild(tag)
def display(self):
print(self.doc.toprettyxml(indent=" "))
def get_string(self):
return self.doc.toprettyxml(indent=" ")
if __name__ == '__main__':
example = {'sibling': {'couple': {'mother': 'mom', 'father': 'dad', 'children': [{'child': 'foo'},
{'child': 'bar'}]}}}
xml = DictToXML(example)
xml.display()
<?xml version="1.0" ?>
<sibling>
<couple>
<children>
<child>
<name>foo</name>
</child>
<child>
<name>bar</name>
</child>
</children>
<father>dad</father>
<mother>mom</mother>
</couple>
</sibling>
Grey的链接包含了一些看起来非常强大的解决方案。如果你想自己动手,你可以递归地使用xml.dom.node的childNode成员,当node.childNode = None时终止。