diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2018-04-25 10:03:39 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2018-04-25 10:03:39 +0000 |
commit | 4e26e6a8d8fea055f17fe856c01f359362c4ff58 (patch) | |
tree | e1e3896e4899eff93db1682b5d7e03232f544415 /src/ooxml/testcases/org/apache/poi/openxml4j | |
parent | a276d5d3f6b3c332cde1d2cf586379c6af4e3a0b (diff) | |
download | poi-4e26e6a8d8fea055f17fe856c01f359362c4ff58.tar.gz poi-4e26e6a8d8fea055f17fe856c01f359362c4ff58.zip |
Bug 62187 - commit Commons Compress unrelated changes
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1830061 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/ooxml/testcases/org/apache/poi/openxml4j')
4 files changed, 373 insertions, 426 deletions
diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index 8cdcd61ad9..b37de1816e 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -17,6 +17,7 @@ package org.apache.poi.openxml4j.opc; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -34,13 +35,14 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PushbackInputStream; -import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.net.URISyntaxException; +import java.util.Arrays; import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.TreeMap; +import java.util.function.BiConsumer; import java.util.regex.Pattern; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -53,7 +55,6 @@ import org.apache.poi.POITextExtractor; import org.apache.poi.POIXMLException; import org.apache.poi.UnsupportedFileFormatException; import org.apache.poi.extractor.ExtractorFactory; -import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -66,6 +67,8 @@ import org.apache.poi.openxml4j.opc.internal.FileHelper; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; import org.apache.poi.openxml4j.opc.internal.ZipHelper; import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.sl.usermodel.SlideShow; +import org.apache.poi.sl.usermodel.SlideShowFactory; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.util.DocumentHelper; @@ -74,17 +77,26 @@ import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.apache.poi.util.TempFile; import org.apache.poi.xssf.XSSFTestDataSamples; +import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.apache.xmlbeans.XmlException; +import org.hamcrest.Description; +import org.hamcrest.TypeSafeMatcher; import org.junit.Ignore; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; public final class TestPackage { private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); + @Rule + public ExpectedException expectedEx = ExpectedException.none(); + /** * Test that just opening and closing the file doesn't alter the document. */ @@ -114,7 +126,7 @@ public final class TestPackage { */ @Test public void createGetsContentTypes() - throws IOException, InvalidFormatException, SecurityException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException { + throws IOException, InvalidFormatException, SecurityException, IllegalArgumentException { File targetFile = OpenXML4JTestDataSamples.getOutputFile("TestCreatePackageTMP.docx"); // Zap the target file, in case of an earlier run @@ -596,7 +608,7 @@ public final class TestPackage { } @Test - public void getPartsByName() throws IOException, InvalidFormatException { + public void getPartsByName() throws InvalidFormatException { String filepath = OpenXML4JTestDataSamples.getSampleFileName("sample.docx"); @SuppressWarnings("resource") @@ -653,7 +665,7 @@ public final class TestPackage { @Test public void replaceContentType() - throws IOException, InvalidFormatException, SecurityException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException { + throws IOException, InvalidFormatException, SecurityException, IllegalArgumentException { InputStream is = OpenXML4JTestDataSamples.openSampleStream("sample.xlsx"); @SuppressWarnings("resource") OPCPackage p = OPCPackage.open(is); @@ -760,163 +772,175 @@ public final class TestPackage { } } - @Test(expected=IOException.class) + /** + * Zip bomb handling test + * + * see bug #50090 / #56865 + */ + @Test public void zipBombCreateAndHandle() throws IOException, EncryptedDocumentException, InvalidFormatException { - // #50090 / #56865 - ZipFile zipFile = ZipHelper.openZipFile(OpenXML4JTestDataSamples.getSampleFile("sample.xlsx")); - assertNotNull(zipFile); - ByteArrayOutputStream bos = new ByteArrayOutputStream(2500000); - ZipOutputStream append = new ZipOutputStream(bos); - // first, copy contents from existing war - Enumeration<? extends ZipEntry> entries = zipFile.entries(); - while (entries.hasMoreElements()) { - ZipEntry e2 = entries.nextElement(); - ZipEntry e = new ZipEntry(e2.getName()); - e.setTime(e2.getTime()); - e.setComment(e2.getComment()); - e.setSize(e2.getSize()); - - append.putNextEntry(e); - if (!e.isDirectory()) { - InputStream is = zipFile.getInputStream(e); - if (e.getName().equals("[Content_Types].xml")) { - ByteArrayOutputStream bos2 = new ByteArrayOutputStream(); - IOUtils.copy(is, bos2); - long size = bos2.size()-"</Types>".length(); - append.write(bos2.toByteArray(), 0, (int)size); - byte spam[] = new byte[0x7FFF]; - for (int i=0; i<spam.length; i++) spam[i] = ' '; - // 0x7FFF0000 is the maximum for 32-bit zips, but less still works - while (size < 0x7FFF00) { - append.write(spam); - size += spam.length; - } - append.write("</Types>".getBytes("UTF-8")); - size += 8; - e.setSize(size); - } else { - IOUtils.copy(is, append); - } - is.close(); - } - append.closeEntry(); - } - - append.close(); - zipFile.close(); - byte buf[] = bos.toByteArray(); - //noinspection UnusedAssignment - bos = null; - - Workbook wb = WorkbookFactory.create(new ByteArrayInputStream(buf)); - wb.getSheetAt(0); - wb.close(); - zipFile.close(); + try (ZipFile zipFile = ZipHelper.openZipFile(OpenXML4JTestDataSamples.getSampleFile("sample.xlsx")); + ZipOutputStream append = new ZipOutputStream(bos)) { + assertNotNull(zipFile); + + // first, copy contents from existing war + Enumeration<? extends ZipEntry> entries = zipFile.entries(); + while (entries.hasMoreElements()) { + final ZipEntry eIn = entries.nextElement(); + final ZipEntry eOut = new ZipEntry(eIn.getName()); + eOut.setTime(eIn.getTime()); + eOut.setComment(eIn.getComment()); + eOut.setSize(eIn.getSize()); + + append.putNextEntry(eOut); + if (!eOut.isDirectory()) { + try (InputStream is = zipFile.getInputStream(eIn)) { + if (eOut.getName().equals("[Content_Types].xml")) { + ByteArrayOutputStream bos2 = new ByteArrayOutputStream(); + IOUtils.copy(is, bos2); + long size = bos2.size() - "</Types>".length(); + append.write(bos2.toByteArray(), 0, (int) size); + byte spam[] = new byte[0x7FFF]; + Arrays.fill(spam, (byte) ' '); + // 0x7FFF0000 is the maximum for 32-bit zips, but less still works + while (size < 0x7FFF00) { + append.write(spam); + size += spam.length; + } + append.write("</Types>".getBytes("UTF-8")); + size += 8; + eOut.setSize(size); + } else { + IOUtils.copy(is, append); + } + } + } + append.closeEntry(); + } + } + + expectedEx.expect(IOException.class); + expectedEx.expectMessage("Zip bomb detected!"); + + try (Workbook wb = WorkbookFactory.create(new ByteArrayInputStream(bos.toByteArray()))) { + wb.getSheetAt(0); + } } - @Test - public void zipBombSampleFiles() throws IOException, OpenXML4JException, XmlException { - openZipBombFile("poc-shared-strings.xlsx"); - openZipBombFile("poc-xmlbomb.xlsx"); - openZipBombFile("poc-xmlbomb-empty.xlsx"); + @Test + public void testZipEntityExpansionTerminates() throws IOException, OpenXML4JException, XmlException { + expectedEx.expect(IllegalStateException.class); + expectedEx.expectMessage("The text would exceed the max allowed overall size of extracted text."); + openXmlBombFile("poc-shared-strings.xlsx"); } - private void openZipBombFile(String file) throws IOException, OpenXML4JException, XmlException { - try { - Workbook wb = XSSFTestDataSamples.openSampleWorkbook(file); - wb.close(); + @Test + public void testZipEntityExpansionSharedStringTableEvents() throws IOException, OpenXML4JException, XmlException { + boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); + ExtractorFactory.setThreadPrefersEventExtractors(true); + try { + expectedEx.expect(IllegalStateException.class); + expectedEx.expectMessage("The text would exceed the max allowed overall size of extracted text."); + openXmlBombFile("poc-shared-strings.xlsx"); + } finally { + ExtractorFactory.setThreadPrefersEventExtractors(before); + } + } - try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"))) { - assertNotNull(extractor); - extractor.getText(); - } - fail("Should catch an exception because of a ZipBomb"); - } catch (IllegalStateException e) { - if(!e.getMessage().contains("The text would exceed the max allowed overall size of extracted text.")) { - throw e; - } - } catch (POIXMLException e) { - checkForZipBombException(e); + @Test + public void testZipEntityExpansionExceedsMemory() throws IOException, OpenXML4JException, XmlException { + expectedEx.expect(POIXMLException.class); + expectedEx.expectMessage("Unable to parse xml bean"); + expectedEx.expectCause(getCauseMatcher(SAXParseException.class, "The parser has encountered more than")); + openXmlBombFile("poc-xmlbomb.xlsx"); + } + + @Test + public void testZipEntityExpansionExceedsMemory2() throws IOException, OpenXML4JException, XmlException { + expectedEx.expect(POIXMLException.class); + expectedEx.expectMessage("Unable to parse xml bean"); + expectedEx.expectCause(getCauseMatcher(SAXParseException.class, "The parser has encountered more than")); + openXmlBombFile("poc-xmlbomb-empty.xlsx"); + } + + private void openXmlBombFile(String file) throws IOException, OpenXML4JException, XmlException { + final double minInf = ZipSecureFile.getMinInflateRatio(); + ZipSecureFile.setMinInflateRatio(0.002); + try (POITextExtractor extractor = ExtractorFactory.createExtractor(XSSFTestDataSamples.getSampleFile(file))) { + assertNotNull(extractor); + extractor.getText(); + } finally { + ZipSecureFile.setMinInflateRatio(minInf); } } - + @Test - public void zipBombCheckSizes() throws IOException, EncryptedDocumentException, InvalidFormatException { - File file = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx"); + public void zipBombCheckSizesWithinLimits() throws IOException, EncryptedDocumentException, InvalidFormatException { + getZipStatsAndConsume((max_size, min_ratio) -> { + // use values close to, but within the limits + ZipSecureFile.setMinInflateRatio(min_ratio - 0.002); + assertEquals(min_ratio - 0.002, ZipSecureFile.getMinInflateRatio(), 0.00001); + ZipSecureFile.setMaxEntrySize(max_size + 1); + assertEquals(max_size + 1, ZipSecureFile.getMaxEntrySize()); + }); + } - try { - double min_ratio = Double.MAX_VALUE; - long max_size = 0; - ZipFile zf = ZipHelper.openZipFile(file); - assertNotNull(zf); - Enumeration<? extends ZipEntry> entries = zf.entries(); - while (entries.hasMoreElements()) { - ZipEntry ze = entries.nextElement(); - double ratio = (double)ze.getCompressedSize() / (double)ze.getSize(); - min_ratio = Math.min(min_ratio, ratio); - max_size = Math.max(max_size, ze.getSize()); - } - zf.close(); - - // use values close to, but within the limits - ZipSecureFile.setMinInflateRatio(min_ratio-0.002); - assertEquals(min_ratio-0.002, ZipSecureFile.getMinInflateRatio(), 0.00001); - ZipSecureFile.setMaxEntrySize(max_size+1); - assertEquals(max_size+1, ZipSecureFile.getMaxEntrySize()); - - WorkbookFactory.create(file, null, true).close(); - - // check ratio out of bounds - ZipSecureFile.setMinInflateRatio(min_ratio+0.002); - try { - WorkbookFactory.create(file, null, true).close(); - // this is a bit strange, as there will be different exceptions thrown - // depending if this executed via "ant test" or within eclipse - // maybe a difference in JDK ... - } catch (InvalidFormatException | POIXMLException e) { - checkForZipBombException(e); - } + @Test + public void zipBombCheckSizesRatioTooSmall() throws IOException, EncryptedDocumentException, InvalidFormatException { + expectedEx.expect(POIXMLException.class); + expectedEx.expectMessage("You can adjust this limit via ZipSecureFile.setMinInflateRatio()"); + getZipStatsAndConsume((max_size, min_ratio) -> { + // check ratio out of bounds + ZipSecureFile.setMinInflateRatio(min_ratio+0.002); + }); + } + @Test + public void zipBombCheckSizesSizeTooBig() throws IOException, EncryptedDocumentException, InvalidFormatException { + expectedEx.expect(POIXMLException.class); + expectedEx.expectMessage("You can adjust this limit via ZipSecureFile.setMaxEntrySize()"); + getZipStatsAndConsume((max_size, min_ratio) -> { // check max entry size ouf of bounds - ZipSecureFile.setMinInflateRatio(min_ratio-0.002); - ZipSecureFile.setMaxEntrySize(max_size-1); - try { - WorkbookFactory.create(file, null, true).close(); - } catch (InvalidFormatException | POIXMLException e) { - checkForZipBombException(e); - } - } finally { - // reset otherwise a lot of ooxml tests will fail - ZipSecureFile.setMinInflateRatio(0.01d); - ZipSecureFile.setMaxEntrySize(0xFFFFFFFFL); - } - } + ZipSecureFile.setMinInflateRatio(min_ratio-0.002); + ZipSecureFile.setMaxEntrySize(max_size-100); + }); + } - private void checkForZipBombException(Throwable e) { - // unwrap InvocationTargetException as they usually contain the nested exception in the "target" member - if(e instanceof InvocationTargetException) { - e = ((InvocationTargetException)e).getTargetException(); - } - - String msg = e.getMessage(); - if(msg != null && (msg.startsWith("Zip bomb detected!") || - msg.contains("The parser has encountered more than \"4,096\" entity expansions in this document;") || - msg.contains("The parser has encountered more than \"4096\" entity expansions in this document;"))) { - return; - } - - // recursively check the causes for the message as it can be nested further down in the exception-tree - if(e.getCause() != null && e.getCause() != e) { - checkForZipBombException(e.getCause()); - return; - } + private void getZipStatsAndConsume(BiConsumer<Long,Double> ratioCon) throws IOException, InvalidFormatException { + // use a test file with a xml file bigger than 100k (ZipArchiveThresholdInputStream.GRACE_ENTRY_SIZE) + final File file = XSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"); - throw new IllegalStateException("Expected to catch an Exception because of a detected Zip Bomb, but did not find the related error message in the exception", e); - } + double min_ratio = Double.MAX_VALUE; + long max_size = 0; + try (ZipFile zf = ZipHelper.openZipFile(file)) { + assertNotNull(zf); + Enumeration<? extends ZipEntry> entries = zf.entries(); + while (entries.hasMoreElements()) { + ZipEntry ze = entries.nextElement(); + if (ze.getSize() == 0) { + continue; + } + // add zip entry header ~ 30 bytes + long size = ze.getSize()+30; + double ratio = ze.getCompressedSize() / (double)size; + min_ratio = Math.min(min_ratio, ratio); + max_size = Math.max(max_size, size); + } + } + ratioCon.accept(max_size, min_ratio); + + //noinspection EmptyTryBlock,unused + try (Workbook wb = WorkbookFactory.create(file, null, true)) { + } finally { + // reset otherwise a lot of ooxml tests will fail + ZipSecureFile.setMinInflateRatio(0.01d); + ZipSecureFile.setMaxEntrySize(0xFFFFFFFFL); + } + } @Test public void testConstructors() throws IOException { @@ -926,10 +950,6 @@ public final class TestPackage { assertNotNull(zipFile.getName()); zipFile.close(); - zipFile = new ZipSecureFile(file, ZipFile.OPEN_READ); - assertNotNull(zipFile.getName()); - zipFile.close(); - zipFile = new ZipSecureFile(file.getAbsolutePath()); assertNotNull(zipFile.getName()); zipFile.close(); @@ -948,7 +968,7 @@ public final class TestPackage { // bug 60128 @Test(expected=NotOfficeXmlFileException.class) - public void testCorruptFile() throws IOException, InvalidFormatException { + public void testCorruptFile() throws InvalidFormatException { File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx"); OPCPackage.open(file, PackageAccess.READ); } @@ -976,4 +996,148 @@ public final class TestPackage { } } } + + @Test + public void testBug56479() throws Exception { + InputStream is = OpenXML4JTestDataSamples.openSampleStream("dcterms_bug_56479.zip"); + OPCPackage p = OPCPackage.open(is); + + // Check we found the contents of it + boolean foundCoreProps = false, foundDocument = false, foundTheme1 = false; + for (final PackagePart part : p.getParts()) { + final String partName = part.getPartName().toString(); + final String contentType = part.getContentType(); + if ("/docProps/core.xml".equals(partName)) { + assertEquals(ContentTypes.CORE_PROPERTIES_PART, contentType); + foundCoreProps = true; + } + if ("/word/document.xml".equals(partName)) { + assertEquals(XWPFRelation.DOCUMENT.getContentType(), contentType); + foundDocument = true; + } + if ("/word/theme/theme1.xml".equals(partName)) { + assertEquals(XWPFRelation.THEME.getContentType(), contentType); + foundTheme1 = true; + } + } + assertTrue("Core not found in " + p.getParts(), foundCoreProps); + assertFalse("Document should not be found in " + p.getParts(), foundDocument); + assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1); + p.close(); + is.close(); + } + + @Test + public void unparseableCentralDirectory() throws IOException { + File f = OpenXML4JTestDataSamples.getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx"); + SlideShow<?,?> ppt = SlideShowFactory.create(f, null, true); + ppt.close(); + } + + @Test + public void testClosingStreamOnException() throws IOException { + InputStream is = OpenXML4JTestDataSamples.openSampleStream("dcterms_bug_56479.zip"); + File tmp = File.createTempFile("poi-test-truncated-zip", ""); + // create a corrupted zip file by truncating a valid zip file to the first 100 bytes + OutputStream os = new FileOutputStream(tmp); + for (int i = 0; i < 100; i++) { + os.write(is.read()); + } + os.flush(); + os.close(); + is.close(); + + // feed the corrupted zip file to OPCPackage + try { + OPCPackage.open(tmp, PackageAccess.READ); + } catch (Exception e) { + // expected: the zip file is invalid + // this test does not care if open() throws an exception or not. + } + // If the stream is not closed on exception, it will keep a file descriptor to tmp, + // and requests to the OS to delete the file will fail. + assertTrue("Can't delete tmp file", tmp.delete()); + } + + /** + * If ZipPackage is passed an invalid file, a call to close + * (eg from the OPCPackage open method) should tidy up the + * stream / file the broken file is being read from. + * See bug #60128 for more + */ + @Test(expected = NotOfficeXmlFileException.class) + public void testTidyStreamOnInvalidFile1() throws Exception { + openInvalidFile("SampleSS.ods", false); + } + + @Test(expected = NotOfficeXmlFileException.class) + public void testTidyStreamOnInvalidFile2() throws Exception { + openInvalidFile("SampleSS.ods", true); + } + + @Test(expected = NotOfficeXmlFileException.class) + public void testTidyStreamOnInvalidFile3() throws Exception { + openInvalidFile("SampleSS.txt", false); + } + + @Test(expected = NotOfficeXmlFileException.class) + public void testTidyStreamOnInvalidFile4() throws Exception { + openInvalidFile("SampleSS.txt", true); + } + + private static void openInvalidFile(final String name, final boolean useStream) throws IOException, InvalidFormatException { + // Spreadsheet has a good mix of alternate file types + final POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); + ZipPackage pkgTest = null; + try (final InputStream is = (useStream) ? files.openResourceAsStream(name) : null) { + try (final ZipPackage pkg = (useStream) ? new ZipPackage(is, PackageAccess.READ) : new ZipPackage(files.getFile(name), PackageAccess.READ)) { + pkgTest = pkg; + assertNotNull(pkg.getZipArchive()); +// assertFalse(pkg.getZipArchive().isClosed()); + pkg.getParts(); + fail("Shouldn't work"); + } + } finally { + if (pkgTest != null) { + assertNotNull(pkgTest.getZipArchive()); + assertTrue(pkgTest.getZipArchive().isClosed()); + } + } + } + + @SuppressWarnings("SameParameterValue") + private static <T extends Throwable> AnyCauseMatcher<T> getCauseMatcher(Class<T> cause, String message) { + // junit is only using hamcrest-core, so instead of adding hamcrest-beans, we provide the throwable + // search with the basics... + // see https://stackoverflow.com/a/47703937/2066598 + return new AnyCauseMatcher<>(cause, message); + } + + private static class AnyCauseMatcher<T extends Throwable> extends TypeSafeMatcher<T> { + private final Class<T> expectedType; + private final String expectedMessage; + + AnyCauseMatcher(Class<T> expectedType, String expectedMessage) { + this.expectedType = expectedType; + this.expectedMessage = expectedMessage; + } + + @Override + protected boolean matchesSafely(final Throwable root) { + for (Throwable t = root; t != null; t = t.getCause()) { + if (t.getClass().isAssignableFrom(expectedType) && t.getMessage().contains(expectedMessage)) { + return true; + } + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("expects type ") + .appendValue(expectedType) + .appendText(" and a message ") + .appendValue(expectedMessage); + } + } } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java deleted file mode 100644 index 88c5bbb1ad..0000000000 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java +++ /dev/null @@ -1,246 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.openxml4j.opc; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; - -import org.apache.poi.POIDataSamples; -import org.apache.poi.POITextExtractor; -import org.apache.poi.POIXMLException; -import org.apache.poi.extractor.ExtractorFactory; -import org.apache.poi.hssf.HSSFTestDataSamples; -import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; -import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; -import org.apache.poi.sl.usermodel.SlideShow; -import org.apache.poi.sl.usermodel.SlideShowFactory; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.ss.usermodel.WorkbookFactory; -import org.apache.poi.xssf.XSSFTestDataSamples; -import org.apache.poi.xwpf.usermodel.XWPFRelation; -import org.apache.xmlbeans.XmlException; -import org.junit.Test; - -public class TestZipPackage { - @Test - public void testBug56479() throws Exception { - InputStream is = OpenXML4JTestDataSamples.openSampleStream("dcterms_bug_56479.zip"); - OPCPackage p = OPCPackage.open(is); - - // Check we found the contents of it - boolean foundCoreProps = false, foundDocument = false, foundTheme1 = false; - for (final PackagePart part : p.getParts()) { - final String partName = part.getPartName().toString(); - final String contentType = part.getContentType(); - if ("/docProps/core.xml".equals(partName)) { - assertEquals(ContentTypes.CORE_PROPERTIES_PART, contentType); - foundCoreProps = true; - } - if ("/word/document.xml".equals(partName)) { - assertEquals(XWPFRelation.DOCUMENT.getContentType(), contentType); - foundDocument = true; - } - if ("/word/theme/theme1.xml".equals(partName)) { - assertEquals(XWPFRelation.THEME.getContentType(), contentType); - foundTheme1 = true; - } - } - assertTrue("Core not found in " + p.getParts(), foundCoreProps); - assertFalse("Document should not be found in " + p.getParts(), foundDocument); - assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1); - p.close(); - is.close(); - } - - @Test - public void testZipEntityExpansionTerminates() throws IOException { - try { - Workbook wb = XSSFTestDataSamples.openSampleWorkbook("poc-xmlbomb.xlsx"); - wb.close(); - fail("Should catch exception due to entity expansion limitations"); - } catch (POIXMLException e) { - assertEntityLimitReached(e); - } - } - - private void assertEntityLimitReached(Exception e) throws UnsupportedEncodingException { - ByteArrayOutputStream str = new ByteArrayOutputStream(); - try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(str, "UTF-8"))) { - e.printStackTrace(writer); - } - String string = new String(str.toByteArray(), "UTF-8"); - assertTrue("Had: " + string, string.contains("The parser has encountered more than")); - } - - @Test - public void testZipEntityExpansionExceedsMemory() throws Exception { - try { - Workbook wb = WorkbookFactory.create(XSSFTestDataSamples.openSamplePackage("poc-xmlbomb.xlsx")); - wb.close(); - fail("Should catch exception due to entity expansion limitations"); - } catch (POIXMLException e) { - assertEntityLimitReached(e); - } - - try { - try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-xmlbomb.xlsx"))) { - assertNotNull(extractor); - - try { - extractor.getText(); - } catch (IllegalStateException e) { - // expected due to shared strings expansion - } - } - } catch (POIXMLException e) { - assertEntityLimitReached(e); - } - } - - @Test - public void testZipEntityExpansionSharedStringTable() throws Exception { - Workbook wb = WorkbookFactory.create(XSSFTestDataSamples.openSamplePackage("poc-shared-strings.xlsx")); - wb.close(); - - try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"))) { - assertNotNull(extractor); - - try { - extractor.getText(); - } catch (IllegalStateException e) { - // expected due to shared strings expansion - } - } - } - - @Test - public void testZipEntityExpansionSharedStringTableEvents() throws Exception { - boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); - ExtractorFactory.setThreadPrefersEventExtractors(true); - try { - try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"))) { - assertNotNull(extractor); - - try { - extractor.getText(); - } catch (IllegalStateException e) { - // expected due to shared strings expansion - } - } - } catch (XmlException e) { - assertEntityLimitReached(e); - } finally { - ExtractorFactory.setThreadPrefersEventExtractors(before); - } - } - - @Test - public void unparseableCentralDirectory() throws IOException { - File f = OpenXML4JTestDataSamples.getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx"); - SlideShow<?,?> ppt = SlideShowFactory.create(f, null, true); - ppt.close(); - } - - @Test - public void testClosingStreamOnException() throws IOException { - InputStream is = OpenXML4JTestDataSamples.openSampleStream("dcterms_bug_56479.zip"); - File tmp = File.createTempFile("poi-test-truncated-zip", ""); - // create a corrupted zip file by truncating a valid zip file to the first 100 bytes - OutputStream os = new FileOutputStream(tmp); - for (int i = 0; i < 100; i++) { - os.write(is.read()); - } - os.flush(); - os.close(); - is.close(); - - // feed the corrupted zip file to OPCPackage - try { - OPCPackage.open(tmp, PackageAccess.READ); - } catch (Exception e) { - // expected: the zip file is invalid - // this test does not care if open() throws an exception or not. - } - // If the stream is not closed on exception, it will keep a file descriptor to tmp, - // and requests to the OS to delete the file will fail. - assertTrue("Can't delete tmp file", tmp.delete()); - } - - /** - * If ZipPackage is passed an invalid file, a call to close - * (eg from the OPCPackage open method) should tidy up the - * stream / file the broken file is being read from. - * See bug #60128 for more - */ - @Test - public void testTidyStreamOnInvalidFile() throws Exception { - // Spreadsheet has a good mix of alternate file types - POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); - - File[] notValidF = new File[] { - files.getFile("SampleSS.ods"), files.getFile("SampleSS.txt") - }; - InputStream[] notValidS = new InputStream[] { - files.openResourceAsStream("SampleSS.ods"), files.openResourceAsStream("SampleSS.txt") - }; - - for (File notValid : notValidF) { - ZipPackage pkg = new ZipPackage(notValid, PackageAccess.READ); - assertNotNull(pkg.getZipArchive()); - assertFalse(pkg.getZipArchive().isClosed()); - try { - pkg.getParts(); - fail("Shouldn't work"); - } catch (NotOfficeXmlFileException e) { - // expected here - } - pkg.close(); - - assertNotNull(pkg.getZipArchive()); - assertTrue(pkg.getZipArchive().isClosed()); - } - for (InputStream notValid : notValidS) { - ZipPackage pkg = new ZipPackage(notValid, PackageAccess.READ); - assertNotNull(pkg.getZipArchive()); - assertFalse(pkg.getZipArchive().isClosed()); - try { - pkg.getParts(); - fail("Shouldn't work"); - } catch (NotOfficeXmlFileException e) { - // expected here - } - pkg.close(); - - assertNotNull(pkg.getZipArchive()); - assertTrue(pkg.getZipArchive().isClosed()); - } - } -} diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/ZipFileAssert.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/ZipFileAssert.java index 330720fd30..3706085c19 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/ZipFileAssert.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/ZipFileAssert.java @@ -18,6 +18,7 @@ package org.apache.poi.openxml4j.opc; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -31,20 +32,26 @@ import java.util.TreeMap; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import org.junit.Assert; - import junit.framework.AssertionFailedError; +import org.apache.poi.util.IOUtils; +import org.junit.Assert; +import org.xmlunit.builder.DiffBuilder; +import org.xmlunit.builder.Input; +import org.xmlunit.diff.Comparison; +import org.xmlunit.diff.ComparisonResult; +import org.xmlunit.diff.DefaultNodeMatcher; +import org.xmlunit.diff.Diff; +import org.xmlunit.diff.DifferenceEvaluator; +import org.xmlunit.diff.ElementSelectors; /** * Compare the contents of 2 zip files. */ -public class ZipFileAssert { +public final class ZipFileAssert { private ZipFileAssert() { } - static final int BUFFER_SIZE = 2048; - - protected static void equals( + private static void equals( TreeMap<String, ByteArrayOutputStream> file1, TreeMap<String, ByteArrayOutputStream> file2) { Set<String> listFile1 = file1.keySet(); @@ -52,32 +59,37 @@ public class ZipFileAssert { for (String fileName : listFile1) { // extract the contents for both - ByteArrayOutputStream contain2 = file2.get(fileName); ByteArrayOutputStream contain1 = file1.get(fileName); + ByteArrayOutputStream contain2 = file2.get(fileName); assertNotNull(fileName + " not found in 2nd zip", contain2); // no need to check for contain1. The key come from it - if ((fileName.endsWith(".xml")) || fileName.endsWith(".rels")) { + if (fileName.matches(".*\\.(xml|rels)$")) { // we have a xml file - // TODO - // YK: the original OpenXML4J version attempted to compare xml using xmlunit (http://xmlunit.sourceforge.net), - // but POI does not depend on this library + final Diff diff = DiffBuilder. + compare(Input.fromByteArray(contain1.toByteArray())). + withTest(Input.fromByteArray(contain2.toByteArray())). + ignoreWhitespace(). + checkForSimilar(). + withDifferenceEvaluator(new IgnoreXMLDeclEvaluator()). + withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndAllAttributes, ElementSelectors.byNameAndText)). + build(); + assertFalse(fileName+": "+diff.toString(), diff.hasDifferences()); } else { // not xml, may be an image or other binary format - Assert.assertEquals(fileName + " does not have the same size in both zip:", contain2.size(), contain1.size()); + Assert.assertEquals(fileName + " does not have the same size in both zip:", contain1.size(), contain2.size()); assertArrayEquals("contents differ", contain1.toByteArray(), contain2.toByteArray()); } } } - protected static TreeMap<String, ByteArrayOutputStream> decompress( + private static TreeMap<String, ByteArrayOutputStream> decompress( File filename) throws IOException { // store the zip content in memory // let s assume it is not Go ;-) TreeMap<String, ByteArrayOutputStream> zipContent = new TreeMap<>(); - byte data[] = new byte[BUFFER_SIZE]; /* Open file to decompress */ FileInputStream file_decompress = new FileInputStream(filename); @@ -89,20 +101,12 @@ public class ZipFileAssert { /* Processing entries of the zip file */ ZipEntry entree; - int count; while ((entree = zis.getNextEntry()) != null) { /* Create a array for the current entry */ ByteArrayOutputStream byteArray = new ByteArrayOutputStream(); + IOUtils.copy(zis, byteArray); zipContent.put(entree.getName(), byteArray); - - /* copy in memory */ - while ((count = zis.read(data, 0, BUFFER_SIZE)) != -1) { - byteArray.write(data, 0, count); - } - /* Flush the buffer */ - byteArray.flush(); - byteArray.close(); } zis.close(); @@ -136,4 +140,29 @@ public class ZipFileAssert { throw new AssertionFailedError(e.toString()); } } + + private static class IgnoreXMLDeclEvaluator implements DifferenceEvaluator { + public ComparisonResult evaluate(final Comparison comparison, final ComparisonResult outcome) { + if (outcome != ComparisonResult.EQUAL) { + // only evaluate differences + switch (comparison.getType()) { + case CHILD_NODELIST_SEQUENCE: + case XML_STANDALONE: + case NAMESPACE_PREFIX: + return ComparisonResult.SIMILAR; + case TEXT_VALUE: + switch (comparison.getControlDetails().getTarget().getParentNode().getNodeName()) { + case "dcterms:created": + case "dc:creator": + return ComparisonResult.SIMILAR; + } + break; + default: + break; + } + } + + return outcome; + } + } } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/internal/marshallers/TestZipPackagePropertiesMarshaller.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/internal/marshallers/TestZipPackagePropertiesMarshaller.java index 4ff321e370..f9abb2fe8b 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/internal/marshallers/TestZipPackagePropertiesMarshaller.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/internal/marshallers/TestZipPackagePropertiesMarshaller.java @@ -17,11 +17,8 @@ package org.apache.poi.openxml4j.opc.internal.marshallers; -import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.PackagingURIHelper; -import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.openxml4j.opc.internal.PartMarshaller; -import org.junit.Test; +import static org.apache.poi.openxml4j.opc.PackagingURIHelper.PACKAGE_RELATIONSHIPS_ROOT_URI; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -29,8 +26,11 @@ import java.io.OutputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import static org.apache.poi.openxml4j.opc.PackagingURIHelper.PACKAGE_RELATIONSHIPS_ROOT_URI; -import static org.junit.Assert.assertTrue; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.openxml4j.opc.internal.PartMarshaller; +import org.junit.Test; public class TestZipPackagePropertiesMarshaller { private PartMarshaller marshaller = new ZipPackagePropertiesMarshaller(); @@ -58,7 +58,7 @@ public class TestZipPackagePropertiesMarshaller { marshaller.marshall(new PackagePropertiesPart(null, PackagingURIHelper.createPartName(PACKAGE_RELATIONSHIPS_ROOT_URI)), new ZipOutputStream(new ByteArrayOutputStream()) { @Override - public void putNextEntry(ZipEntry e) throws IOException { + public void putNextEntry(final ZipEntry archiveEntry) throws IOException { throw new IOException("TestException"); } }); |