]> source.dussan.org Git - poi.git/commitdiff
Test for another type of xml-bomb
authorDominik Stadler <centic@apache.org>
Thu, 6 Apr 2017 21:50:03 +0000 (21:50 +0000)
committerDominik Stadler <centic@apache.org>
Thu, 6 Apr 2017 21:50:03 +0000 (21:50 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1790473 13f79535-47bb-0310-9956-ffa450edef68

src/integrationtest/org/apache/poi/TestAllFiles.java
src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
test-data/spreadsheet/poc-xmlbomb-empty.xlsx [new file with mode: 0644]

index 19edc1455b1c192d4a1859ed988c991737b1b209..e61df18ce15a744b1187c57e21d282ccc75e1b03 100644 (file)
@@ -282,6 +282,7 @@ public class TestAllFiles {
         "poifs/unknown_properties.msg", // POIFS properties corrupted
         "poifs/only-zero-byte-streams.ole2", // No actual contents
         "spreadsheet/poc-xmlbomb.xlsx",  // contains xml-entity-expansion
+        "spreadsheet/poc-xmlbomb-empty.xlsx",  // contains xml-entity-expansion
         "spreadsheet/poc-shared-strings.xlsx",  // contains shared-string-entity-expansion
         "spreadsheet/60255_extra_drawingparts.xlsx", // Non-drawing drawing
         
index 87aacd161d8b4802dc7928c2f6f540dfd4e3472c..aa47a7267055879bb9cea22b6cb47490317dcb6d 100644 (file)
@@ -129,6 +129,7 @@ public class XSSFFileHandler extends SpreadsheetHandler {
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764-2.xlsx");
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx");
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx");
+        EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb-empty.xlsx");
         // strict OOXML
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/57914.xlsx");
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SampleSS.strict.xlsx");
@@ -136,7 +137,7 @@ public class XSSFFileHandler extends SpreadsheetHandler {
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample.strict.xlsx");
         // TODO: good to ignore?
         EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample-beta.xlsx");
-        
+
         // corrupt/invalid
         EXPECTED_ADDITIONAL_FAILURES.add("openxml4j/invalid.xlsx");
     }
index 6e4eb8d4ab2e8b7c4b27b8105e4b50b6b9c6c711..ec0b14b1e38d1de046d108366d9985696d391c16 100644 (file)
@@ -957,6 +957,7 @@ public class TestExtractorFactory {
         "poifs/unknown_properties.msg", // POIFS properties corrupted
         "poifs/only-zero-byte-streams.ole2", // No actual contents
         "spreadsheet/poc-xmlbomb.xlsx",  // contains xml-entity-expansion
+        "spreadsheet/poc-xmlbomb-empty.xlsx",  // contains xml-entity-expansion
         "spreadsheet/poc-shared-strings.xlsx",  // contains shared-string-entity-expansion
 
         // old Excel files, which we only support simple text extraction of
index 5f83bc52d0abec0c8e45ce113a5d52bc21a785e7..f537f139f2f492e96f0edf4fc263d8b6ac4f7b53 100644 (file)
 
 package org.apache.poi.openxml4j.opc;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.InvocationTargetException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.List;
-import java.util.TreeMap;
-import java.util.regex.Pattern;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipOutputStream;
-
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.POITestCase;
-import org.apache.poi.POIXMLException;
-import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.*;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
-import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
-import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
-import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.*;
 import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
 import org.apache.poi.openxml4j.opc.internal.FileHelper;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
@@ -62,11 +29,9 @@ import org.apache.poi.openxml4j.opc.internal.ZipHelper;
 import org.apache.poi.openxml4j.util.ZipSecureFile;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.ss.usermodel.WorkbookFactory;
-import org.apache.poi.util.DocumentHelper;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.apache.poi.util.TempFile;
+import org.apache.poi.util.*;
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.apache.xmlbeans.XmlException;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.w3c.dom.Document;
@@ -74,6 +39,21 @@ import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
+import java.io.*;
+import java.lang.reflect.InvocationTargetException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.regex.Pattern;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+import static org.junit.Assert.*;
+
 public final class TestPackage {
     private static final POILogger logger = POILogFactory.getLogger(TestPackage.class);
 
@@ -103,10 +83,6 @@ public final class TestPackage {
        /**
         * Test that when we create a new Package, we give it
         *  the correct default content types
-        * @throws IllegalAccessException 
-        * @throws NoSuchFieldException 
-        * @throws IllegalArgumentException 
-        * @throws SecurityException 
         */
     @Test
        public void createGetsContentTypes()
@@ -195,7 +171,6 @@ public final class TestPackage {
         * Tests that we can create a new package, add a core
         *  document and another part, save and re-load and
         *  have everything setup as expected
-        * @throws SAXException 
         */
     @Test
        public void createPackageWithCoreDocument() throws IOException, InvalidFormatException, URISyntaxException, SAXException {
@@ -410,7 +385,6 @@ public final class TestPackage {
 
     /**
      * TODO: fix and enable
-     * @throws URISyntaxException 
      */
     @Test
     @Ignore
@@ -835,10 +809,39 @@ public final class TestPackage {
         wb.close();
         zipFile.close();
     }
+
+    @Test
+       public void zipBombSampleFiles() throws IOException, OpenXML4JException, XmlException {
+       openZipBombFile("poc-shared-strings.xlsx");
+       openZipBombFile("poc-xmlbomb.xlsx");
+       openZipBombFile("poc-xmlbomb-empty.xlsx");
+       }
+
+       private void openZipBombFile(String file) throws IOException, OpenXML4JException, XmlException {
+       try {
+                       Workbook wb = XSSFTestDataSamples.openSampleWorkbook(file);
+                       wb.close();
+
+                       POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"));
+                       try  {
+                               assertNotNull(extractor);
+                               extractor.getText();
+                       } finally {
+                               extractor.close();
+                       }
+
+                       fail("Should catch an exception because of a ZipBomb");
+               } catch (IllegalStateException e) {
+               if(!e.getMessage().contains("The text would exceed the max allowed overall size of extracted text.")) {
+                               throw e;
+                       }
+               } catch (POIXMLException e) {
+               checkForZipBombException(e);
+               }
+       }
     
     @Test
-    public void zipBombCheckSizes()
-    throws IOException, EncryptedDocumentException, InvalidFormatException {
+    public void zipBombCheckSizes() throws IOException, EncryptedDocumentException, InvalidFormatException {
         File file = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx");
 
         try {
@@ -897,13 +900,15 @@ public final class TestPackage {
         if(e instanceof InvocationTargetException) {
             InvocationTargetException t = (InvocationTargetException)e;
             IOException t2 = (IOException)t.getTargetException();
-            if(t2.getMessage().startsWith("Zip bomb detected!")) {
+            if(t2.getMessage().startsWith("Zip bomb detected!") ||
+                                       t2.getMessage().startsWith("The parser has encountered more than \"4,096\" entity expansions in this document;")) {
                 return;
             }
         }
         
         String msg = e.getMessage();
-        if(msg != null && msg.startsWith("Zip bomb detected!")) {
+        if(msg != null && (msg.startsWith("Zip bomb detected!") ||
+                               msg.startsWith("The parser has encountered more than \"4,096\" entity expansions in this document;"))) {
             return;
         }
         
diff --git a/test-data/spreadsheet/poc-xmlbomb-empty.xlsx b/test-data/spreadsheet/poc-xmlbomb-empty.xlsx
new file mode 100644 (file)
index 0000000..3feb645
Binary files /dev/null and b/test-data/spreadsheet/poc-xmlbomb-empty.xlsx differ