一个可以在顶部存在命名空间声明时正常工作的解决方案:
大多数答案中提出的命令如果xml在顶部声明了命名空间,则不能直接使用。考虑以下内容:
输入的xml:
<elem1 xmlns="urn:x" xmlns:prefix="urn:y">
<elem2 attr1="false" attr2="value2">
elem2 value
</elem2>
<elem2 attr1="true" attr2="value2.1">
elem2.1 value
</elem2>
<prefix:elem3>
elem3 value
</prefix:elem3>
</elem1>
无法工作:
xmlstarlet sel -t -v "/elem1" input.xml
xmllint -xpath "/elem1" input.xml
解决方案:
java ExtractXpath.java "/" example-inputs/input.xml
java ExtractXpath.java "/elem1" input.xml
java ExtractXpath.java "//elem2[@attr2='value2']" input.xml
java ExtractXpath.java "/elem1/elem2/@attr2" input.xml
java ExtractXpath.java "/elem1/elem3/text()" input.xml
java ExtractXpath.java "name(/elem1/elem3)" input.xml
java ExtractXpath.java "name(*/elem3)" input.xml
java ExtractXpath.java "count(/elem2)" input.xml
ExtractXpath.java:
import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathEvaluationResult;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ExtractXpath {
public static void main(String[] args) throws Exception {
assertThat(args.length==2, "Wrong number of args");
String xpath = args[0];
File file = new File(args[1]);
assertThat(file.isFile(), file.getAbsolutePath()+" is not a file.");
FileInputStream fileIS = new FileInputStream(file);
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(fileIS);
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = xpath;
XPathExpression xpathExpression = xPath.compile(expression);
XPathEvaluationResult xpathEvalResult = xpathExpression.evaluateExpression(xmlDocument);
System.out.println(applyXpathExpression(xmlDocument, xpathExpression, xpathEvalResult.type().name()));
}
private static String applyXpathExpression(Document xmlDocument, XPathExpression expr, String xpathTypeName) throws TransformerConfigurationException, TransformerException, XPathExpressionException {
List<String> retVal = new ArrayList();
if(xpathTypeName.equals(XPathConstants.NODESET.getLocalPart())){
NodeList nodeList = (NodeList)expr.evaluate(xmlDocument, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
retVal.add(convertNodeToString(nodeList.item(i)));
}
}else if(xpathTypeName.equals(XPathConstants.STRING.getLocalPart())){
retVal.add((String)expr.evaluate(xmlDocument, XPathConstants.STRING));
}else if(xpathTypeName.equals(XPathConstants.NUMBER.getLocalPart())){
retVal.add(((Number)expr.evaluate(xmlDocument, XPathConstants.NUMBER)).toString());
}else if(xpathTypeName.equals(XPathConstants.BOOLEAN.getLocalPart())){
retVal.add(((Boolean)expr.evaluate(xmlDocument, XPathConstants.BOOLEAN)).toString());
}else if(xpathTypeName.equals(XPathConstants.NODE.getLocalPart())){
System.err.println("WARNING found xpathTypeName=NODE");
retVal.add(convertNodeToString((Node)expr.evaluate(xmlDocument, XPathConstants.NODE)));
}else{
throw new RuntimeException("Unexpected xpath type name: "+xpathTypeName+". This should normally not happen");
}
return retVal.stream().map(str->"==MATCH_START==\n"+str+"\n==MATCH_END==").collect(Collectors.joining ("\n"));
}
private static String convertNodeToString(Node node) throws TransformerConfigurationException, TransformerException {
short nType = node.getNodeType();
switch (nType) {
case Node.ATTRIBUTE_NODE , Node.TEXT_NODE -> {
return node.getNodeValue();
}
case Node.ELEMENT_NODE, Node.DOCUMENT_NODE -> {
StringWriter writer = new StringWriter();
Transformer trans = TransformerFactory.newInstance().newTransformer();
trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
trans.setOutputProperty(OutputKeys.INDENT, "yes");
trans.transform(new DOMSource(node), new StreamResult(writer));
return writer.toString();
}
default -> {
System.err.println("WARNING: FIXME: Node type:"+nType+" could possibly be handled in a better way.");
return node.getNodeValue();
}
}
}
private static void assertThat(boolean b, String msg) {
if(!b){
System.err.println(msg+"\n\nUSAGE: program xpath xmlFile");
System.exit(-1);
}
}
}
@SuppressWarnings("unchecked")
class NamespaceResolver implements NamespaceContext {
private final Document sourceDocument;
public NamespaceResolver(Document document) {
sourceDocument = document;
}
@Override
public String getNamespaceURI(String prefix) {
if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
return sourceDocument.lookupNamespaceURI(null);
} else {
return sourceDocument.lookupNamespaceURI(prefix);
}
}
@Override
public String getPrefix(String namespaceURI) {
return sourceDocument.lookupPrefix(namespaceURI);
}
@SuppressWarnings("rawtypes")
@Override
public Iterator getPrefixes(String namespaceURI) {
return null;
}
}
为了简单起见:
xpath-extract
命令:
#!/bin/bash
java ExtractXpath.java "$1" "$2"
xpath
产生的“噪音”在标准错误输出(STDERR)而不是标准输出(STDOUT)。 - miken32