请查看这篇文章。我展示了如何使用SAX解析器处理XLSX文件。
https://dev59.com/JnfZa4cB1Zd3GeqPT6C0#44969009
简而言之,我扩展了org.xml.sax.helpers.DefaultHandler
,它处理XLSX文件的XML结构。它是一种事件解析器 - SAX。
class SheetHandler extends DefaultHandler {
private static final String ROW_EVENT = "row";
private static final String CELL_EVENT = "c";
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private List<String> cellCache = new LinkedList<>();
private List<String[]> rowCache = new LinkedList<>();
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if (CELL_EVENT.equals(name)) {
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
} else if (ROW_EVENT.equals(name)) {
if (!cellCache.isEmpty()) {
rowCache.add(cellCache.toArray(new String[cellCache.size()]));
}
cellCache.clear();
}
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
if(nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
if(name.equals("v")) {
cellCache.add(lastContents);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
public List<String[]> getRowCache() {
return rowCache;
}
}
然后我解析代表XLSX文件的XML。
private List<String []> processFirstSheet(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename, PackageAccess.READ);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
SheetHandler handler = new SheetHandler(sst);
XMLReader parser = fetchSheetParser(handler);
Iterator<InputStream> sheetIterator = r.getSheetsData();
if (!sheetIterator.hasNext()) {
return Collections.emptyList();
}
InputStream sheetInputStream = sheetIterator.next();
BufferedInputStream bisSheet = new BufferedInputStream(sheetInputStream);
InputSource sheetSource = new InputSource(bisSheet);
parser.parse(sheetSource);
List<String []> res = handler.getRowCache();
bisSheet.close();
return res;
}
public XMLReader fetchSheetParser(ContentHandler handler) throws SAXException {
XMLReader parser = new SAXParser();
parser.setContentHandler(handler);
return parser;
}