From 77614b8c49b132b37c9cf2532bf4d9da3985391e Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Thu, 6 Apr 2017 21:50:03 +0000 Subject: [PATCH] Test for another type of xml-bomb git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1790473 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/TestAllFiles.java | 1 + .../apache/poi/stress/XSSFFileHandler.java | 3 +- .../poi/extractor/TestExtractorFactory.java | 1 + .../apache/poi/openxml4j/opc/TestPackage.java | 109 +++++++++--------- test-data/spreadsheet/poc-xmlbomb-empty.xlsx | Bin 0 -> 5193 bytes 5 files changed, 61 insertions(+), 53 deletions(-) create mode 100644 test-data/spreadsheet/poc-xmlbomb-empty.xlsx diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 19edc1455b..e61df18ce1 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -282,6 +282,7 @@ public class TestAllFiles { "poifs/unknown_properties.msg", // POIFS properties corrupted "poifs/only-zero-byte-streams.ole2", // No actual contents "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion + "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion "spreadsheet/60255_extra_drawingparts.xlsx", // Non-drawing drawing diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java index 87aacd161d..aa47a72670 100644 --- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java @@ -129,6 +129,7 @@ public class XSSFFileHandler extends SpreadsheetHandler { EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764-2.xlsx"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb-empty.xlsx"); // strict OOXML EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/57914.xlsx"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SampleSS.strict.xlsx"); @@ -136,7 +137,7 @@ public class XSSFFileHandler extends SpreadsheetHandler { EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample.strict.xlsx"); // TODO: good to ignore? EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample-beta.xlsx"); - + // corrupt/invalid EXPECTED_ADDITIONAL_FAILURES.add("openxml4j/invalid.xlsx"); } diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 6e4eb8d4ab..ec0b14b1e3 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -957,6 +957,7 @@ public class TestExtractorFactory { "poifs/unknown_properties.msg", // POIFS properties corrupted "poifs/only-zero-byte-streams.ole2", // No actual contents "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion + "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion // old Excel files, which we only support simple text extraction of diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index 5f83bc52d0..f537f139f2 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -17,44 +17,11 @@ package org.apache.poi.openxml4j.opc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.lang.reflect.InvocationTargetException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.List; -import java.util.TreeMap; -import java.util.regex.Pattern; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; -import java.util.zip.ZipOutputStream; - -import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.POIDataSamples; -import org.apache.poi.POITestCase; -import org.apache.poi.POIXMLException; -import org.apache.poi.UnsupportedFileFormatException; +import org.apache.poi.*; +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.openxml4j.exceptions.InvalidOperationException; -import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; -import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException; -import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; +import org.apache.poi.openxml4j.exceptions.*; import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; import org.apache.poi.openxml4j.opc.internal.FileHelper; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; @@ -62,11 +29,9 @@ import org.apache.poi.openxml4j.opc.internal.ZipHelper; import org.apache.poi.openxml4j.util.ZipSecureFile; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; -import org.apache.poi.util.DocumentHelper; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; -import org.apache.poi.util.TempFile; +import org.apache.poi.util.*; +import org.apache.poi.xssf.XSSFTestDataSamples; +import org.apache.xmlbeans.XmlException; import org.junit.Ignore; import org.junit.Test; import org.w3c.dom.Document; @@ -74,6 +39,21 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import java.io.*; +import java.lang.reflect.InvocationTargetException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.TreeMap; +import java.util.regex.Pattern; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import static org.junit.Assert.*; + public final class TestPackage { private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); @@ -103,10 +83,6 @@ public final class TestPackage { /** * Test that when we create a new Package, we give it * the correct default content types - * @throws IllegalAccessException - * @throws NoSuchFieldException - * @throws IllegalArgumentException - * @throws SecurityException */ @Test public void createGetsContentTypes() @@ -195,7 +171,6 @@ public final class TestPackage { * Tests that we can create a new package, add a core * document and another part, save and re-load and * have everything setup as expected - * @throws SAXException */ @Test public void createPackageWithCoreDocument() throws IOException, InvalidFormatException, URISyntaxException, SAXException { @@ -410,7 +385,6 @@ public final class TestPackage { /** * TODO: fix and enable - * @throws URISyntaxException */ @Test @Ignore @@ -835,10 +809,39 @@ public final class TestPackage { wb.close(); zipFile.close(); } + + @Test + public void zipBombSampleFiles() throws IOException, OpenXML4JException, XmlException { + openZipBombFile("poc-shared-strings.xlsx"); + openZipBombFile("poc-xmlbomb.xlsx"); + openZipBombFile("poc-xmlbomb-empty.xlsx"); + } + + private void openZipBombFile(String file) throws IOException, OpenXML4JException, XmlException { + try { + Workbook wb = XSSFTestDataSamples.openSampleWorkbook(file); + wb.close(); + + POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx")); + try { + assertNotNull(extractor); + extractor.getText(); + } finally { + extractor.close(); + } + + fail("Should catch an exception because of a ZipBomb"); + } catch (IllegalStateException e) { + if(!e.getMessage().contains("The text would exceed the max allowed overall size of extracted text.")) { + throw e; + } + } catch (POIXMLException e) { + checkForZipBombException(e); + } + } @Test - public void zipBombCheckSizes() - throws IOException, EncryptedDocumentException, InvalidFormatException { + public void zipBombCheckSizes() throws IOException, EncryptedDocumentException, InvalidFormatException { File file = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx"); try { @@ -897,13 +900,15 @@ public final class TestPackage { if(e instanceof InvocationTargetException) { InvocationTargetException t = (InvocationTargetException)e; IOException t2 = (IOException)t.getTargetException(); - if(t2.getMessage().startsWith("Zip bomb detected!")) { + if(t2.getMessage().startsWith("Zip bomb detected!") || + t2.getMessage().startsWith("The parser has encountered more than \"4,096\" entity expansions in this document;")) { return; } } String msg = e.getMessage(); - if(msg != null && msg.startsWith("Zip bomb detected!")) { + if(msg != null && (msg.startsWith("Zip bomb detected!") || + msg.startsWith("The parser has encountered more than \"4,096\" entity expansions in this document;"))) { return; } diff --git a/test-data/spreadsheet/poc-xmlbomb-empty.xlsx b/test-data/spreadsheet/poc-xmlbomb-empty.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3feb64586eaac0ef3f5506c40b161322743fdd33 GIT binary patch literal 5193 zcmdT|cRUpCA3rXU6|y_4B73ilGE(*^^K3aIm z-^sW1t$x4%f6x7K_qylvem>9h{(PR#cvKaD=SYD701iOv@zEU<(ngQ~1^}RS4gk0W z004BQp|&uHEzD5e%?{$I&*^GoT?gl}YUCygKpo;{mXFAb!0)Y@GQ+HaN}r$0$7?q| zZAwWJIj%P!W?>`+`L~-G!w{Y>DpLD-FJH|AWQS^`*h}8XzO#hpkCV;XAmNjtgAPxj z5g5!f;JX3L5AZMEi{Z@Bgzp0)TqFu4`d=}3!voCYZ;<77CYp(dQ~4aqV8E{NQo@o-)M_|1h$Er zOz8d4xyG!#H)W|Okg~$sO2OYke>%AGbdYDpm z4OXhR^wrMDESPWt!Wi2h*_WqwSE1Jn;|CLevULH?gemlassq&Scf#)Hb$HLAr!CR$ zOZ2Olv7OzC-=_2`q5T&B{PcmB40jzZSk6jIQ_ zom1pONRXDvc=OU44g;&XsDmN=8jvkn-FYe+HNyy%BahZ?FE8zlzI3}}9WkP7M~lfC ztV3eG)`rmsWe%SO1h!rYw@Fguq>Ok;;J)%g?t-hciJ(!mCS+7qn<)QW;sKxRx8^Ql zs7&^yxBIiLc3bVMqtj8Vl2l9M*mt;S+_w$b8hG@l5Cd91!?z-anng6q$5=$U=FG># zlv4CL!dNzhaI$u)`4?6!HNlHynZvZ1@z-~5{ow7?gJ2g16I9V;q@z8E_*X(6Kph|_ zjKsyLsl4YVYMnYFRhYUf#KhbwNFqZUc8$q|zg}RShkE$6B_8lr-MWubG{w{-)G=zf zW)ku-CD)I`qR}cb)0HR#aIJtU9ui>?kK823Y9VQt)lg{bqs31m*P_y3DG(4_1^ZTj z6sINnv(y(XYzpLg1Yrbz3BHc9*Iz&62zJWgEWWZG3-<|9<8Q%aH*O+X^UYWZUJ5oX zlg0NsT=kqUz`ol+@B$x*dWs@OKs||O?-uaoehEC6pt`Ar(oD(oD(8#B66w8rgLJVUQ z-qi_NYW+@qz%gom>o^r2bXR1?pgB0)<+gnDTpED2V><;em)FvOB0|IqU~v!gXPig}NwUSY}g3 zh|RV2%wsPz4jW+hc5iI&FtT2DXT^njP>AFr4`uEX64LV&;|$y9Zwf&#_7puN-w}<_ z+`>6vDDDvTC2)=2el8#y8M|ITjbDgI!H+}AAd$@1v0F)2k5%>g zkgh6xYaGT03~)Mk=NO%RBQv$t(VAzc{)|2{4j?z*Qa&B2;M9Tk=NdvFfb^6IM+;*I zi0OTpgQcyx;|UMzy|0TJD(yO<*f7A1%IJ90E z<;LUOhv@cHL;rW+_?E3D@LOh*Vh81Y!NdB{&LXAd&6_=QkH*(lUB~H?Y?%v>>9r~AA0P+Bj>a{p#`6?49-1U|Q4H^R zqIvm0ABSnYDjJEcyN6BcN=)8AbHHgV%PJ?Yi9=t2&*(r#@Q>|bZq|@fqbKOAjB}F) zB2aAXQ;}KOqzV1>MzAD)hI2AW{Oj+pDVxkhwmE0a3?qbl=PbJ$@$%!XluL;APZGCf=0-B~onP7M75z)FY8R z+SB8Ep8_Xta9wCN4u|>b1_X&-+DH;%2}3fmY`_eHGh?(B%9Gy%1denxq|6xMV-Jj0 z(q1_#P5?D}=pd^>!E6O_YB=S`4an}y(lc&S%r0v-}HF`>jjm2{kv5L&xX27pH z-bvlE$F{!X_9ZoFI^36MJ3YYK-bvSBDqZg#qnY#=C>$nN`>*Ag=DsO#mZtD-I z(|9T@zrL&!>u}!W*=?r#(}RvLdKGVW55UVmVyG9F`&P-*T2}Mx6p>1aviZyGvfiIz zRq?k$&Tm?UY;8Sy@$~CmUcQNE%ZYCOE>=*pAD_#7nT`5ivq1#T9bFbV6ZkIeUCW}+ zE{F`H7I~-Mv|g;JYvC#x?mnuAnjQfsmVs#<8`$@r4e@N`ws}5O-Yh>@Tuk{O{(-7e zQ83fgcAB1|ykg9X#_{E2dWDYZ+@Kp1ME+g@Z*>Q%f~cWBNmc~j&gvY?Rd=avlvnme zmir=()10N9Fv*~jIpV#2Q<4pN>D<~!GMA2j^WS_1GDIT!j&`Gc_~NPm{u;i+?XBc&$zFe_^JrRGql;;#EDD}avJ$G)^`tKKd4+UwkOa@)rQlv{8MtC?ks z$`C7d94gL1k;JTJ%X@f`S;Cp{BMN+W1&w!%pTAgrrFGp4`qi38{?*t~s!ufCUr5aK(yochL&z_a>t4Dh2r?lA2hC^t zOd%PE{)qAfO{kcx%P@r#QX2CaI7b>`XZgKTQpvLn=DnM%70k^ee|-y_{c{vrCIhpHd!7$={V_*q!@+(!4NL8{sYr$-4c zn4i$J&@(&hamIZ4Uh}@vi|t(3=dK)-$@&z!o?c!?iLrP!_elj@0iJTM1s>yPa^$9MB=dSey6k?O{n*ghnEbSbf3p=AW+ zmO$r%jhFR>a#fkp%yCYsmyfFpgotoQZITfa=NgOI)P?!h`U6MGR6ZzSnaSZc5nni$ zrBbO&T{ej;Bi$#Wt<&70=bR$8RNMEws*ZchJzH#B)3Uj5h}q)NqV~t6xzNBNcON}5 zrf+5P@!?6$*DXch=0&6<%$zQijmm8@QCu;3><#zWTM%;ii`j65i&3ESZI&ASgj*C& zNN3ucX~))mZ|gEXtv$B_*BKU(FE)!DO6ZKbHHOZ~?NztM&|a6|eRdqE2Yg8t(D=m1 zy^MYN-g@Lw;e&5$LaGWFn74rc{h0%OKhcBUC(HNb`!A0kXAAsUyPtIMr-R66UD6C4 z<$sj%|5o~YM}LYrQTpU3=6fCgzm%Oc3a8^l862HXerbpQt?&1S_!M@cujW_SKkf0E z(!Y0)XXp>;sPg;_wMLa&&eTU ze}(;;Y=3eL0G_lFe{U|%)=5U&7u}lu(Q2HX`0pj^*$N3brz`v{SbvB9ULBl4wb4&M zXyw0d{|x*maVJUXbeu3vi>{Ra0sl-