From: Andreas Beeker <kiwiwings@apache.org>
Date: Tue, 18 Dec 2018 23:55:58 +0000 (+0000)
Subject: #62886 - Regression extracting text from corrupted docx files
X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=bd951d284af2b0990739222962999dbbbd01a871;p=poi.git

#62886 - Regression extracting text from corrupted docx files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1849252 13f79535-47bb-0310-9956-ffa450edef68
---

diff --git a/src/ooxml/java/org/apache/poi/openxml4j/util/ZipArchiveThresholdInputStream.java b/src/ooxml/java/org/apache/poi/openxml4j/util/ZipArchiveThresholdInputStream.java
index 725c7a4835..728bf9849a 100644
--- a/src/ooxml/java/org/apache/poi/openxml4j/util/ZipArchiveThresholdInputStream.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/util/ZipArchiveThresholdInputStream.java
@@ -20,6 +20,7 @@ package org.apache.poi.openxml4j.util;
 import static org.apache.poi.openxml4j.util.ZipSecureFile.MAX_ENTRY_SIZE;
 import static org.apache.poi.openxml4j.util.ZipSecureFile.MIN_INFLATE_RATIO;
 
+import java.io.EOFException;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -144,6 +145,8 @@ public class ZipArchiveThresholdInputStream extends FilterInputStream {
                         "No valid entries or contents found, this is not a valid OOXML (Office Open XML) file", ze);
             }
             throw ze;
+        } catch (EOFException e) {
+            return null;
         }
     }
 
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFBugs.java b/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFBugs.java
index 42d675c28c..a77eedc999 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFBugs.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFBugs.java
@@ -43,13 +43,24 @@ import org.junit.Test;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
 
 public class TestXWPFBugs {
+    private static final POIDataSamples samples = POIDataSamples.getDocumentInstance();
+
+    @Test
+    public void truncatedDocx() throws Exception {
+        try (InputStream fis = samples.openResourceAsStream("truncated62886.docx");
+            OPCPackage opc = OPCPackage.open(fis);
+            XWPFWordExtractor ext = new XWPFWordExtractor(opc)) {
+            assertNotNull(ext.getText());
+        }
+    }
+
     /**
      * A word document that's encrypted with non-standard
      * Encryption options, and no cspname section. See bug 53475
      */
     @Test
     public void bug53475NoCSPName() throws Exception {
-        File file = POIDataSamples.getDocumentInstance().getFile("bug53475-password-is-solrcell.docx");
+        File file = samples.getFile("bug53475-password-is-solrcell.docx");
         POIFSFileSystem filesystem = new POIFSFileSystem(file, true);
 
         // Check the encryption details
@@ -84,7 +95,7 @@ public class TestXWPFBugs {
         int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
         Assume.assumeTrue("Please install JCE Unlimited Strength Jurisdiction Policy files for AES 256", maxKeyLen == 2147483647);
 
-        File file = POIDataSamples.getDocumentInstance().getFile("bug53475-password-is-pass.docx");
+        File file = samples.getFile("bug53475-password-is-pass.docx");
         POIFSFileSystem filesystem = new POIFSFileSystem(file, true);
 
         // Check the encryption details
@@ -117,7 +128,7 @@ public class TestXWPFBugs {
     public void bug59058() throws IOException, XmlException {
         String files[] = { "bug57031.docx", "bug59058.docx" };
         for (String f : files) {
-            ZipFile zf = new ZipFile(POIDataSamples.getDocumentInstance().getFile(f));
+            ZipFile zf = new ZipFile(samples.getFile(f));
             ZipArchiveEntry entry = zf.getEntry("word/document.xml");
             DocumentDocument document = DocumentDocument.Factory.parse(zf.getInputStream(entry));
             assertNotNull(document);
diff --git a/test-data/document/truncated62886.docx b/test-data/document/truncated62886.docx
new file mode 100644
index 0000000000..4998b45ad6
Binary files /dev/null and b/test-data/document/truncated62886.docx differ