]> source.dussan.org Git - poi.git/commitdiff
Refactor to pull out the list of Excel 97+ directory entry names to a common place...
authorNick Burch <nick@apache.org>
Fri, 26 Feb 2016 23:32:17 +0000 (23:32 +0000)
committerNick Burch <nick@apache.org>
Fri, 26 Feb 2016 23:32:17 +0000 (23:32 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732579 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
src/java/org/apache/poi/hssf/model/InternalWorkbook.java
src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java

index 4d0b894e01577e42c43002001b0a3ce33216cefa..1f0b543a4581aa7d8e43c8e92f9bf8614123a882 100644 (file)
@@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermodel.HSSFUserException;
 import org.apache.poi.hssf.record.*;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
 
 /**
  * Low level event based HSSF reader.  Pass either a DocumentInputStream to
@@ -59,20 +60,20 @@ public class HSSFEventFactory {
     */
     public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
         // some old documents have "WORKBOOK" or "BOOK"
-        final String name;
+        String name = null;
         Set<String> entryNames = dir.getEntryNames();
-        if (entryNames.contains("Workbook")) {
-            name = "Workbook";
-        } else if (entryNames.contains("WORKBOOK")) {
-            name = "WORKBOOK";
-        } else if (entryNames.contains("BOOK")) {
-            name = "BOOK";
-        } else {
-            name = "Workbook";
+        for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
+            if (entryNames.contains(potentialName)) {
+                name = potentialName;
+                break;
+            }
+        }
+        // If in doubt, go for the default
+        if (name == null) {
+            name = WORKBOOK_DIR_ENTRY_NAMES[0];
         }
 
         InputStream in = dir.createDocumentInputStream(name);
-
         processEvents(req, in);
     }
 
index 93c8da16f4b458f61079b8277d165fda2953ebe2..b932836cb65c2d9c553169d1d42ea430ef481d2c 100644 (file)
@@ -123,6 +123,16 @@ public final class InternalWorkbook {
      */
     private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
 
+    /**
+     * Normally, the Workbook will be in a POIFS Stream called 
+     *  "Workbook". However, some weird XLS generators use "WORKBOOK"
+     *  or "BOOK".
+     */
+    public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
+        "Workbook", // as per BIFF8 spec
+        "WORKBOOK", // Typically from third party programs
+        "BOOK",     // Typically odd Crystal Reports exports
+    };
 
     private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
     private static final int DEBUG = POILogger.DEBUG;
index ecf681c7f1be2be587f634deb4253da2273f1eec..ec66d5d4a652490176ac79375a255d4cf54d5aee 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.poi.hssf.usermodel;
 
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.FileNotFoundException;
@@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
 
-
 /**
  * High level representation of a workbook.  This is the first object most users
  * will construct whether they are reading or writing a workbook.  It is also the
@@ -243,17 +244,6 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss
         this(fs.getRoot(), fs, preserveNodes);
     }
 
-    /**
-     * Normally, the Workbook will be in a POIFS Stream
-     * called "Workbook". However, some weird XLS generators use "WORKBOOK"
-     */
-    private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
-        "Workbook", // as per BIFF8 spec
-        "WORKBOOK", // Typically from third party programs
-        "BOOK",     // Typically odd Crystal Reports exports
-    };
-
-
     public static String getWorkbookDirEntryName(DirectoryNode directory) {
 
         for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {
index 6b8e881860505b75d1f37bdf5a93c010c6025842..73efc94d38049f89f63f3ceb0a7c736b57328914 100644 (file)
@@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 import org.apache.poi.xwpf.usermodel.XWPFRelation;
 import org.apache.xmlbeans.XmlException;
 
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
 /**
  * Figures out the correct POITextExtractor for your supplied
  *  document, and returns it.
@@ -301,13 +303,13 @@ public class ExtractorFactory {
     {
         // Look for certain entries in the stream, to figure it
         // out from
-        if (poifsDir.hasEntry("Workbook") ||
-                // some XLS files have different entry-names
-                poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
-            if (getPreferEventExtractor()) {
-                return new EventBasedExcelExtractor(poifsDir);
+        for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
+            if (poifsDir.hasEntry(workbookName)) {
+                if (getPreferEventExtractor()) {
+                    return new EventBasedExcelExtractor(poifsDir);
+                }
+                return new ExcelExtractor(poifsDir);
             }
-            return new ExcelExtractor(poifsDir);
         }
 
         if (poifsDir.hasEntry("WordDocument")) {
index 19f20bdcc5b2ab64f6415da21f88df9e9897b1fd..00fea52a4e0639035f14a9a199a37e5ca901793c 100644 (file)
@@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 import org.apache.xmlbeans.XmlException;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -920,4 +921,23 @@ public class TestExtractorFactory {
             }
         }
     }
+    
+    /**
+     *  #59074 - No supported documents found in the OLE2 stream on
+     *   a valid Excel file
+     */
+    @Ignore
+    @Test
+    public void a() throws Exception {
+        POITextExtractor ext =  ExtractorFactory.createExtractor(
+                POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+        assertNotNull(ext);
+        
+        String text = ext.getText();
+        ext.close();
+        
+System.err.println(text);
+        assertNotNull(text);
+        assertTrue(text.contains("test"));
+    }
 }
index 15a89548888a9a8f48dc7a238cf00142106d7850..59a52b225caaa4feaae5eb7fdf35bebfa1762b1a 100644 (file)
@@ -36,6 +36,7 @@ public class TestBiffViewer extends BaseXLSIteratingTest {
                EXCLUDED.add("43493.xls");      // HSSFWorkbook cannot open it as well
                EXCLUDED.add("password.xls"); 
                EXCLUDED.add("46904.xls");
+        EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
         EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header 
                EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
         EXCLUDED.add("testEXCEL_2.xls");  // Biff 2 / Excel 2, pre-OLE2