2009年5月14日:我已经改用混合方法...请参见下面的代码。
这个版本有两者的优点:
* XmlReader/XmlTextReader(内存效率-->速度);和
* XmlSerializer(代码生成-->开发灵活性和便利性)。
它使用XmlTextReader遍历文档,并创建“文档小片段”,然后使用XmlSerializer和使用XSD.EXE生成的“XML绑定”类反序列化它们。
我想这个技巧是通用的,而且很快...我正在解析一个包含56,000个GML要素的201 MB XML文档,大约需要7秒钟...这个应用程序的旧VB6实现需要几分钟(甚至几个小时)才能解析大的数据提取...所以我走得很顺利。
再次感谢论坛网友们捐赠宝贵的时间。我非常感激。
祝大家好运。Keith。
using System;
using System.Reflection;
using System.Xml;
using System.Xml.Serialization;
using System.IO;
using System.Collections.Generic;
using nrw_rime_extract.utils;
using nrw_rime_extract.xml.generated_bindings;
namespace nrw_rime_extract.xml
{
internal interface ExtractXmlReader
{
rimeType read(string xmlFilename);
}
internal class ExtractXmlReader_XmlSerializerImpl : ExtractXmlReader
{
private Log log = Log.getInstance();
public rimeType read(string xmlFilename)
{
log.write(
string.Format(
"DEBUG: ExtractXmlReader_XmlSerializerImpl.read({0})",
xmlFilename));
using (Stream stream = new FileStream(xmlFilename, FileMode.Open))
{
return read(stream);
}
}
internal rimeType read(Stream xmlInputStream)
{
XmlSerializer serializer = new XmlSerializer(typeof(rimeType));
serializer.UnknownNode += new XmlNodeEventHandler(handleUnknownNode);
serializer.UnknownAttribute +=
new XmlAttributeEventHandler(handleUnknownAttribute);
return (rimeType)serializer.Deserialize(xmlInputStream);
}
protected void handleUnknownNode(object sender, XmlNodeEventArgs e)
{
log.write(
string.Format(
"XML_ERROR: Unknown Node at line {0} position {1} : {2}\t{3}",
e.LineNumber, e.LinePosition, e.Name, e.Text));
}
protected void handleUnknownAttribute(object sender, XmlAttributeEventArgs e)
{
log.write(
string.Format(
"XML_ERROR: Unknown Attribute at line {0} position {1} : {2}='{3}'",
e.LineNumber, e.LinePosition, e.Attr.Name, e.Attr.Value));
}
}
internal class ExtractXmlReader_XmlTextReaderXmlSerializerHybridImpl :
ExtractXmlReader
{
private Log log = Log.getInstance();
public rimeType read(string xmlFilename)
{
log.write(
string.Format(
"DEBUG: ExtractXmlReader_XmlTextReaderXmlSerializerHybridImpl." +
"read({0})",
xmlFilename));
using (XmlReader reader = XmlReader.Create(xmlFilename))
{
return read(reader);
}
}
public rimeType read(XmlReader reader)
{
rimeType result = new rimeType();
Dictionary<Type, XmlSerializer> serializers =
new Dictionary<Type, XmlSerializer>();
serializers.Add(typeof(featureClassType),
newSerializer(typeof(featureClassType)));
serializers.Add(typeof(featureType),
newSerializer(typeof(featureType)));
List<featureClassType> featureClasses = new List<featureClassType>();
List<featureType> features = new List<featureType>();
while (!reader.EOF)
{
if (reader.MoveToContent() != XmlNodeType.Element)
{
reader.Read();
continue;
}
if (reader.Name.Equals("featureClass"))
{
using (
StringReader elementReader =
new StringReader(reader.ReadOuterXml()))
{
XmlSerializer deserializer =
serializers[typeof (featureClassType)];
featureClasses.Add(
(featureClassType)
deserializer.Deserialize(elementReader));
}
continue;
}
if (reader.Name.Equals("feature"))
{
using (
StringReader elementReader =
new StringReader(reader.ReadOuterXml()))
{
XmlSerializer deserializer =
serializers[typeof (featureType)];
features.Add(
(featureType)
deserializer.Deserialize(elementReader));
}
continue;
}
log.write(
"WARNING: unknown element '" + reader.Name +
"' was skipped during parsing.");
reader.Read();
}
result.featureClasses = featureClasses.ToArray();
result.features = features.ToArray();
return result;
}
private XmlSerializer newSerializer(Type elementType)
{
XmlSerializer serializer = new XmlSerializer(elementType);
serializer.UnknownNode += new XmlNodeEventHandler(handleUnknownNode);
serializer.UnknownAttribute +=
new XmlAttributeEventHandler(handleUnknownAttribute);
return serializer;
}
protected void handleUnknownNode(object sender, XmlNodeEventArgs e)
{
log.write(
string.Format(
"XML_ERROR: Unknown Node at line {0} position {1} : {2}\t{3}",
e.LineNumber, e.LinePosition, e.Name, e.Text));
}
protected void handleUnknownAttribute(object sender, XmlAttributeEventArgs e)
{
log.write(
string.Format(
"XML_ERROR: Unknown Attribute at line {0} position {1} : {2}='{3}'",
e.LineNumber, e.LinePosition, e.Attr.Name, e.Attr.Value));
}
}
}