]> source.dussan.org Git - poi.git/commitdiff
Integration tests: Expect exception for old word documents and still run the text...
authorDominik Stadler <centic@apache.org>
Sun, 22 Mar 2015 21:47:44 +0000 (21:47 +0000)
committerDominik Stadler <centic@apache.org>
Sun, 22 Mar 2015 21:47:44 +0000 (21:47 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1668483 13f79535-47bb-0310-9956-ffa450edef68

src/integrationtest/org/apache/poi/TestAllFiles.java
src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java
test-data/document/52117.doc [new file with mode: 0644]

index e8de685f9c22b5f35611330816867b700f73c436..8a66024f7ba073785c0c7494960be7fc2acc56bf 100644 (file)
@@ -31,6 +31,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.poi.hwpf.OldWordFileFormatException;
 import org.apache.poi.stress.*;
 import org.apache.tools.ant.DirectoryScanner;
 import org.junit.Test;
@@ -162,6 +163,20 @@ public class TestAllFiles {
         HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler());
     }
 
+    // Old Word Documents where we can at least extract some text
+    private static final Set<String> OLD_FILES = new HashSet<String>();
+    static {
+        OLD_FILES.add("document/Bug49933.doc");
+        OLD_FILES.add("document/Bug51944.doc");
+        OLD_FILES.add("document/Word6.doc");
+        OLD_FILES.add("document/Word6_sections.doc");
+        OLD_FILES.add("document/Word6_sections2.doc");
+        OLD_FILES.add("document/Word95.doc");
+        OLD_FILES.add("document/word95err.doc");
+        OLD_FILES.add("hpsf/TestMickey.doc");
+        OLD_FILES.add("document/52117.doc");
+    }
+
     private static final Set<String> EXPECTED_FAILURES = new HashSet<String>();
     static {
         // password protected files
@@ -202,15 +217,7 @@ public class TestAllFiles {
         EXPECTED_FAILURES.add("spreadsheet/43493.xls");
         EXPECTED_FAILURES.add("spreadsheet/46904.xls");
         EXPECTED_FAILURES.add("document/56880.doc");
-        EXPECTED_FAILURES.add("document/Bug49933.doc");
         EXPECTED_FAILURES.add("document/Bug50955.doc");
-        EXPECTED_FAILURES.add("document/Bug51944.doc");
-        EXPECTED_FAILURES.add("document/Word6.doc");
-        EXPECTED_FAILURES.add("document/Word6_sections.doc");
-        EXPECTED_FAILURES.add("document/Word6_sections2.doc");
-        EXPECTED_FAILURES.add("document/Word95.doc");
-        EXPECTED_FAILURES.add("document/word95err.doc");
-        EXPECTED_FAILURES.add("hpsf/TestMickey.doc");
         EXPECTED_FAILURES.add("slideshow/PPT95.ppt");
         EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx");
         EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx");
@@ -269,17 +276,29 @@ public class TestAllFiles {
         File inputFile = new File(ROOT_DIR, file);
 
         try {
-            InputStream stream = new BufferedInputStream(new FileInputStream(inputFile),100);
+            InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024);
             try {
                 handler.handleFile(stream);
 
                 assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", 
                         EXPECTED_FAILURES.contains(file));
+                assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", 
+                        OLD_FILES.contains(file));
             } finally {
                 stream.close();
             }
 
             handler.handleExtracting(inputFile);
+        } catch (OldWordFileFormatException e) {
+            // for old word files we should still support extracting text
+            if(OLD_FILES.contains(file)) {
+                handler.handleExtracting(inputFile);
+            } else {
+                // check if we expect failure for this file
+                if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+                    throw new Exception("While handling " + file, e);
+                }
+            }
         } catch (Exception e) {
             // check if we expect failure for this file
             if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
index 8a27e6d0e9c2c39da95b992d0c51f4620eb790e8..8819083771d6df96d0804fcfb44250e5e64a3fbf 100644 (file)
@@ -28,8 +28,10 @@ import java.io.InputStream;
 import java.util.HashSet;\r
 import java.util.Set;\r
 \r
+import org.apache.poi.POIOLE2TextExtractor;\r
 import org.apache.poi.POITextExtractor;\r
 import org.apache.poi.extractor.ExtractorFactory;\r
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;\r
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;\r
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;\r
 import org.apache.xmlbeans.XmlException;\r
@@ -89,6 +91,19 @@ public abstract class AbstractFileHandler implements FileHandler {
             assertEquals("File should not be modified by extractor", modified, file.lastModified());\r
             \r
             handleExtractingAsStream(file);\r
+            \r
+            if(extractor instanceof POIOLE2TextExtractor) {\r
+               HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor);\r
+               try {\r
+                       assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());\r
+                       assertNotNull(hpsfExtractor.getSummaryInformationText());\r
+                       String text = hpsfExtractor.getText();\r
+                       //System.out.println(text);\r
+                       assertNotNull(text);\r
+               } finally {\r
+                       hpsfExtractor.close();\r
+               }\r
+            }\r
         } catch (IllegalArgumentException e) {\r
             if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {\r
                 throw new Exception("While handling " + file, e);\r
index 3a223674cdd43a887a07189fdcaaf5021f264edd..a56ddd2dc6b96a51eaf9439d24cf3eb80c7041ae 100644 (file)
@@ -63,12 +63,10 @@ public class HWPFFileHandler extends POIFSFileHandler {
         docTextWriter.close();
        }
 
-
-
        // a test-case to test this locally without executing the full TestAllFiles
        @Test
        public void test() throws Exception {
-               File file = new File("test-data/document/51921-Word-Crash067.doc");
+               File file = new File("test-data/document/52117.doc");
 
                InputStream stream = new FileInputStream(file);
                try {
@@ -91,4 +89,10 @@ public class HWPFFileHandler extends POIFSFileHandler {
                        stream.close();
                }
        }
+
+       @Test
+       public void testExtractingOld() throws Exception {
+               File file = new File("test-data/document/52117.doc");
+               handleExtracting(file);
+       }
 }
diff --git a/test-data/document/52117.doc b/test-data/document/52117.doc
new file mode 100644 (file)
index 0000000..4f966c0
Binary files /dev/null and b/test-data/document/52117.doc differ