From: Nick Burch Date: Fri, 26 Feb 2016 23:32:17 +0000 (+0000) Subject: Refactor to pull out the list of Excel 97+ directory entry names to a common place... X-Git-Tag: REL_3_14_FINAL~14 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=7fdd90fecb225bc43ae1e2e3b804d842c3dc1561;p=poi.git Refactor to pull out the list of Excel 97+ directory entry names to a common place, avoiding duplication. Also starts on unit testing #59074 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732579 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java index 4d0b894e01..1f0b543a45 100644 --- a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java +++ b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java @@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermodel.HSSFUserException; import org.apache.poi.hssf.record.*; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES; /** * Low level event based HSSF reader. Pass either a DocumentInputStream to @@ -59,20 +60,20 @@ public class HSSFEventFactory { */ public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { // some old documents have "WORKBOOK" or "BOOK" - final String name; + String name = null; Set entryNames = dir.getEntryNames(); - if (entryNames.contains("Workbook")) { - name = "Workbook"; - } else if (entryNames.contains("WORKBOOK")) { - name = "WORKBOOK"; - } else if (entryNames.contains("BOOK")) { - name = "BOOK"; - } else { - name = "Workbook"; + for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) { + if (entryNames.contains(potentialName)) { + name = potentialName; + break; + } + } + // If in doubt, go for the default + if (name == null) { + name = WORKBOOK_DIR_ENTRY_NAMES[0]; } InputStream in = dir.createDocumentInputStream(name); - processEvents(req, in); } diff --git a/src/java/org/apache/poi/hssf/model/InternalWorkbook.java b/src/java/org/apache/poi/hssf/model/InternalWorkbook.java index 93c8da16f4..b932836cb6 100644 --- a/src/java/org/apache/poi/hssf/model/InternalWorkbook.java +++ b/src/java/org/apache/poi/hssf/model/InternalWorkbook.java @@ -123,6 +123,16 @@ public final class InternalWorkbook { */ private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31; + /** + * Normally, the Workbook will be in a POIFS Stream called + * "Workbook". However, some weird XLS generators use "WORKBOOK" + * or "BOOK". + */ + public static final String[] WORKBOOK_DIR_ENTRY_NAMES = { + "Workbook", // as per BIFF8 spec + "WORKBOOK", // Typically from third party programs + "BOOK", // Typically odd Crystal Reports exports + }; private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class); private static final int DEBUG = POILogger.DEBUG; diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index ecf681c7f1..ec66d5d4a6 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -17,6 +17,8 @@ package org.apache.poi.hssf.usermodel; +import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; @@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; - /** * High level representation of a workbook. This is the first object most users * will construct whether they are reading or writing a workbook. It is also the @@ -243,17 +244,6 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss this(fs.getRoot(), fs, preserveNodes); } - /** - * Normally, the Workbook will be in a POIFS Stream - * called "Workbook". However, some weird XLS generators use "WORKBOOK" - */ - private static final String[] WORKBOOK_DIR_ENTRY_NAMES = { - "Workbook", // as per BIFF8 spec - "WORKBOOK", // Typically from third party programs - "BOOK", // Typically odd Crystal Reports exports - }; - - public static String getWorkbookDirEntryName(DirectoryNode directory) { for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) { diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 6b8e881860..73efc94d38 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.apache.xmlbeans.XmlException; +import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES; + /** * Figures out the correct POITextExtractor for your supplied * document, and returns it. @@ -301,13 +303,13 @@ public class ExtractorFactory { { // Look for certain entries in the stream, to figure it // out from - if (poifsDir.hasEntry("Workbook") || - // some XLS files have different entry-names - poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) { - if (getPreferEventExtractor()) { - return new EventBasedExcelExtractor(poifsDir); + for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) { + if (poifsDir.hasEntry(workbookName)) { + if (getPreferEventExtractor()) { + return new EventBasedExcelExtractor(poifsDir); + } + return new ExcelExtractor(poifsDir); } - return new ExcelExtractor(poifsDir); } if (poifsDir.hasEntry("WordDocument")) { diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 19f20bdcc5..00fea52a4e 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.xmlbeans.XmlException; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; /** @@ -920,4 +921,23 @@ public class TestExtractorFactory { } } } + + /** + * #59074 - No supported documents found in the OLE2 stream on + * a valid Excel file + */ + @Ignore + @Test + public void a() throws Exception { + POITextExtractor ext = ExtractorFactory.createExtractor( + POIDataSamples.getSpreadSheetInstance().getFile("59074.xls")); + assertNotNull(ext); + + String text = ext.getText(); + ext.close(); + +System.err.println(text); + assertNotNull(text); + assertTrue(text.contains("test")); + } } diff --git a/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java b/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java index 15a8954888..59a52b225c 100644 --- a/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java +++ b/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java @@ -36,6 +36,7 @@ public class TestBiffViewer extends BaseXLSIteratingTest { EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well EXCLUDED.add("password.xls"); EXCLUDED.add("46904.xls"); + EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95 EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2