]> source.dussan.org Git - poi.git/commitdiff
Bug 61267: detect Word v2 files and report that they are not supported in Apache POI
authorDominik Stadler <centic@apache.org>
Mon, 2 Apr 2018 17:15:42 +0000 (17:15 +0000)
committerDominik Stadler <centic@apache.org>
Mon, 2 Apr 2018 17:15:42 +0000 (17:15 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1828176 13f79535-47bb-0310-9956-ffa450edef68

src/integrationtest/org/apache/poi/TestAllFiles.java
src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
src/java/org/apache/poi/poifs/filesystem/FileMagic.java
src/java/org/apache/poi/poifs/storage/HeaderBlock.java
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java
test-data/document/word2.doc [new file with mode: 0644]

index 77651274e44005732ef40556eb75d669ef53333b..333a8ebcdc611aa4d1f74a552130388c881844e8 100644 (file)
@@ -302,6 +302,8 @@ public class TestAllFiles {
         "spreadsheet/poc-xmlbomb.xlsx",  // contains xml-entity-expansion
         "spreadsheet/poc-xmlbomb-empty.xlsx",  // contains xml-entity-expansion
         "spreadsheet/poc-shared-strings.xlsx",  // contains shared-string-entity-expansion
+        "document/61612a.docx",
+        "document/word2.doc",
 
         // old Excel files, which we only support simple text extraction of
         "spreadsheet/testEXCEL_2.xls",
index a5baa4adbc5aba6a0c832351a05d0eb6db6514b4..680cac839854dbce48fed32d778dbd0fe13f9ec2 100644 (file)
@@ -54,7 +54,8 @@ public class HPSFFileHandler extends POIFSFileHandler {
         "spreadsheet/55982.xls",
         "spreadsheet/testEXCEL_3.xls",
         "spreadsheet/testEXCEL_4.xls",
-        "hpsf/Test_Humor-Generation.ppt"
+        "hpsf/Test_Humor-Generation.ppt",
+        "document/word2.doc"
     );
     
     static final Set<String> EXCLUDES_HANDLE_FILE = unmodifiableHashSet(
index 6bde1ced8e407cd1d47dd3d11ca804d18e799afc..765cf6e32338d6be9d3837697d7e4b814c58fe80 100644 (file)
@@ -77,10 +77,11 @@ public enum FileMagic {
     PDF("%PDF"),
     /** Some different HTML documents */
     HTML("<!DOCTYP".getBytes(UTF_8), "<html".getBytes(UTF_8)),
+    WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}),
     // keep UNKNOWN always as last enum!
     /** UNKNOWN magic */
     UNKNOWN(new byte[0]);
-    
+
     final byte[][] magic;
     
     FileMagic(long magic) {
index c833138a240466cb9074070c72dac7a81b0c9ed9..778590a8990371e4ff560cff87f1fccc8472d69b 100644 (file)
@@ -136,6 +136,9 @@ public final class HeaderBlock implements HeaderBlockConstants {
           case MSWRITE:
            throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
                + "Apache POI doesn't currently support this format");
+          case WORD2:
+                  throw new NotOLE2FileException("The supplied data appears to be an old Word version 2 file. "
+                          + "Apache POI doesn't currently support this format");
        case BIFF2:
        case BIFF3:
        case BIFF4:
index 50ddda3297e51c07c4e2d41a3d97663eb8e9f708..d5702d9a9c70a0393f4e48a140cfc2d993fb136c 100644 (file)
@@ -44,7 +44,7 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
         // Can't open as HWPFDocument
         HWPFTestDataSamples.openSampleFile("Word6.doc");
     }
-    
+
     @Test
     public void testWord6hwpfOld() throws IOException {
         // Open
@@ -60,8 +60,21 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
         doc.close();
     }
 
-    
-    
+    /**
+     * Test a simple Word 2 document
+     */
+    @Test(expected=IllegalArgumentException.class)
+    public void testWord2hwpf() throws IOException {
+        // Can't open as HWPFDocument
+        HWPFTestDataSamples.openSampleFile("word2.doc");
+    }
+
+    @Test(expected=RuntimeException.class)
+    public void testWord2hwpfOld() throws IOException {
+        // Open
+        HWPFTestDataSamples.openOldSampleFile("word2.doc");
+    }
+
     /**
      * Test a simple Word 95 document
      */
diff --git a/test-data/document/word2.doc b/test-data/document/word2.doc
new file mode 100644 (file)
index 0000000..e100a61
Binary files /dev/null and b/test-data/document/word2.doc differ