找到以下的 XSD 到 CSV 解析器:
有了以下代码,多节点 XML 也可以解析。
import pandas as pd
from bs4 import BeautifulSoup
def xsd_to_dict(xsd_path):
super_dict = {}
soup = BeautifulSoup(open(xsd_path), "html.parser")
for complex_type in soup.find_all('xs:complextype'):
xsd_parsed = [x for x in ",".join(str(complex_type).split("\n"))
.replace("</xs:sequence>", "")
.replace("'<xs:sequence>", "")
.replace("<xs:", "")
.replace("</xs:complextype>", "")
.replace("</xs:element>", "")
.replace(">", "").replace("sequence", "")
.split(",") if x != ""]
if len(xsd_parsed[0]) > len("complextype") + 1:
matrix_list = [e.split(" ") for e in xsd_parsed[-len(xsd_parsed) + 1:]]
level_1 = ["|".join(["".join([":".join(final.split("=")) for final in y if len(final.split("=")) == 2])
for y in [x.split(",") for x in item]]) for item in matrix_list]
level_1.insert(0, xsd_parsed[0])
for x in level_1[-len(xsd_parsed) + 1:]:
flattened_dict = {x.split(":")[0]:"-".join(x.split(":")[-len(x.split(":")) + 1:])
for x in (level_1[0] + x).replace("=", ":").split("|")}
xPath = flattened_dict.get("complextype name")
xmlName = flattened_dict.get("name")
dataType = flattened_dict.get("type")
if xmlName != None:
final_dict = {x.split(":")[0]:x.split(":")[1]
for x in str("xpath:"+str(xPath)+",xmlFieldName:"+str(xmlName)+",dataPath:"+str(dataType)).split(",")}
for k, v in final_dict.items():
super_dict.setdefault(k, []).append(v)
return super_dict
def xsd_to_csv(xsd_path):
pd.DataFrame(xsd_to_dict(xsd_path)).to_csv(xsd_path.replace(".xsd", ".csv"))
return "done"
xsd_to_csv("CustomersOrders.xsd")
输入:https://learn.microsoft.com/en-us/dotnet/csharp/programming-guide/concepts/linq/sample-xsd-file-customers-and-orders1
输出:
,xpath,xmlFieldName,dataPath
0,"""CustomerType""","""CompanyName""","""xs-string"""
1,"""CustomerType""","""ContactName""","""xs-string"""
2,"""CustomerType""","""ContactTitle""","""xs-string"""
3,"""CustomerType""","""Phone""","""xs-string"""
4,"""CustomerType""","""Fax""","""xs-string"""
5,"""CustomerType""","""FullAddress""","""AddressType"""
6,"""CustomerType""","""CustomerID""","""xs-token""</xs-attribute"
7,"""AddressType""","""Address""","""xs-string"""
8,"""AddressType""","""City""","""xs-string"""
9,"""AddressType""","""Region""","""xs-string"""
10,"""AddressType""","""PostalCode""","""xs-string"""
11,"""AddressType""","""Country""","""xs-string"""
12,"""AddressType""","""CustomerID""","""xs-token""</xs-attribute"
13,"""OrderType""","""CustomerID""","""xs-token"""
14,"""OrderType""","""EmployeeID""","""xs-token"""
15,"""OrderType""","""OrderDate""","""xs-dateTime"""
16,"""OrderType""","""RequiredDate""","""xs-dateTime"""
17,"""OrderType""","""ShipInfo""","""ShipInfoType"""
18,"""ShipInfoType""","""ShipVia""","""xs-integer"""
19,"""ShipInfoType""","""Freight""","""xs-decimal"""
20,"""ShipInfoType""","""ShipName""","""xs-string"""
21,"""ShipInfoType""","""ShipAddress""","""xs-string"""
22,"""ShipInfoType""","""ShipCity""","""xs-string"""
23,"""ShipInfoType""","""ShipRegion""","""xs-string"""
24,"""ShipInfoType""","""ShipPostalCode""","""xs-string"""
25,"""ShipInfoType""","""ShipCountry""","""xs-string"""
26,"""ShipInfoType""","""ShippedDate""","""xs-dateTime""
lxml
。您需要安装它,这需要一点时间,但是之后您将拥有一个非常强大的包,具有出色的xpath支持、模式验证等功能。接着,可以参考 lxml 提供的教程,它将回答您所有的问题。 - Jan Vlcinsky