From: Tim Allison Date: Thu, 18 Feb 2016 01:49:59 +0000 (+0000) Subject: 59021 -- fix content extraction from namespaced elements in XSSFEventBasedExcelExtractor X-Git-Tag: REL_3_14_FINAL~30 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=0b0911db6b99fe04c15e25653afe6350d4b70583;p=poi.git 59021 -- fix content extraction from namespaced elements in XSSFEventBasedExcelExtractor git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1730992 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 24a0564d16..6ad897c3f2 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -41,7 +41,7 @@ import org.xml.sax.helpers.DefaultHandler; */ public class XSSFSheetXMLHandler extends DefaultHandler { private static final POILogger logger = POILogFactory.getLogger(XSSFSheetXMLHandler.class); - + static final String SPREADSHEETML_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; /** * These are the different kinds of cells we support. * We keep track of the current one between @@ -186,17 +186,21 @@ public class XSSFSheetXMLHandler extends DefaultHandler { @Override @SuppressWarnings("unused") - public void startElement(String uri, String localName, String name, + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { - if (isTextTag(name)) { + if (uri != null && ! uri.equals(SPREADSHEETML_NS)) { + return; + } + + if (isTextTag(localName)) { vIsOpen = true; // Clear contents cache value.setLength(0); - } else if ("is".equals(name)) { + } else if ("is".equals(localName)) { // Inline string outer tag isIsOpen = true; - } else if ("f".equals(name)) { + } else if ("f".equals(localName)) { // Clear contents cache formula.setLength(0); @@ -231,14 +235,14 @@ public class XSSFSheetXMLHandler extends DefaultHandler { fIsOpen = true; } } - else if("oddHeader".equals(name) || "evenHeader".equals(name) || - "firstHeader".equals(name) || "firstFooter".equals(name) || - "oddFooter".equals(name) || "evenFooter".equals(name)) { + else if("oddHeader".equals(localName) || "evenHeader".equals(localName) || + "firstHeader".equals(localName) || "firstFooter".equals(localName) || + "oddFooter".equals(localName) || "evenFooter".equals(localName)) { hfIsOpen = true; // Clear contents cache headerFooter.setLength(0); } - else if("row".equals(name)) { + else if("row".equals(localName)) { String rowNumStr = attributes.getValue("r"); if(rowNumStr != null) { rowNum = Integer.parseInt(rowNumStr) - 1; @@ -248,7 +252,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { output.startRow(rowNum); } // c => cell - else if ("c".equals(name)) { + else if ("c".equals(localName)) { // Set up defaults. this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; @@ -269,11 +273,13 @@ public class XSSFSheetXMLHandler extends DefaultHandler { else { // Number, but almost certainly with a special style or format XSSFCellStyle style = null; - if (cellStyleStr != null) { - int styleIndex = Integer.parseInt(cellStyleStr); - style = stylesTable.getStyleAt(styleIndex); - } else if (stylesTable.getNumCellStyles() > 0) { - style = stylesTable.getStyleAt(0); + if (stylesTable != null) { + if (cellStyleStr != null) { + int styleIndex = Integer.parseInt(cellStyleStr); + style = stylesTable.getStyleAt(styleIndex); + } else if (stylesTable.getNumCellStyles() > 0) { + style = stylesTable.getStyleAt(0); + } } if (style != null) { this.formatIndex = style.getDataFormat(); @@ -286,12 +292,17 @@ public class XSSFSheetXMLHandler extends DefaultHandler { } @Override - public void endElement(String uri, String localName, String name) + public void endElement(String uri, String localName, String qName) throws SAXException { + + if (uri != null && ! uri.equals(SPREADSHEETML_NS)) { + return; + } + String thisStr = null; // v => contents of a cell - if (isTextTag(name)) { + if (isTextTag(localName)) { vIsOpen = false; // Process the value contents as required, now we have it all @@ -364,11 +375,11 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // Output output.cell(cellRef, thisStr, comment); - } else if ("f".equals(name)) { + } else if ("f".equals(localName)) { fIsOpen = false; - } else if ("is".equals(name)) { + } else if ("is".equals(localName)) { isIsOpen = false; - } else if ("row".equals(name)) { + } else if ("row".equals(localName)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); @@ -377,19 +388,19 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well nextRowNum = rowNum + 1; - } else if ("sheetData".equals(name)) { + } else if ("sheetData".equals(localName)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); } - else if("oddHeader".equals(name) || "evenHeader".equals(name) || - "firstHeader".equals(name)) { + else if("oddHeader".equals(localName) || "evenHeader".equals(localName) || + "firstHeader".equals(localName)) { hfIsOpen = false; - output.headerFooter(headerFooter.toString(), true, name); + output.headerFooter(headerFooter.toString(), true, localName); } - else if("oddFooter".equals(name) || "evenFooter".equals(name) || - "firstFooter".equals(name)) { + else if("oddFooter".equals(localName) || "evenFooter".equals(localName) || + "firstFooter".equals(localName)) { hfIsOpen = false; - output.headerFooter(headerFooter.toString(), false, name); + output.headerFooter(headerFooter.toString(), false, localName); } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java index b56b3791f0..2be777d6f0 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java @@ -337,4 +337,13 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { extractor.close(); } } + + public void test59021() throws Exception { + XSSFEventBasedExcelExtractor ex = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.openSamplePackage("59021.xlsx")); + String text = ex.getText(); + assertTrue("can't find Abhkazia", text.contains("Abkhazia - Fixed")); + assertTrue("can't find 10/02/2016", text.contains("10/02/2016")); + } } diff --git a/test-data/spreadsheet/59021.xlsx b/test-data/spreadsheet/59021.xlsx new file mode 100644 index 0000000000..0f485d270e Binary files /dev/null and b/test-data/spreadsheet/59021.xlsx differ