From: Andreas Beeker Date: Mon, 28 Mar 2016 22:49:45 +0000 (+0000) Subject: some zips can't be opened via ZipFile in JDK6, as the central directory X-Git-Tag: REL_3_15_BETA2~397 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1e65636048704a6410bff54ee245ec083585a366;p=poi.git some zips can't be opened via ZipFile in JDK6, as the central directory contains either non-latin entries or the compression type can't be handled the workaround is to iterate over the stream and not the directory git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1736933 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java index c5ac56391d..7e2d546ec7 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java @@ -18,6 +18,7 @@ package org.apache.poi.openxml4j.opc; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -88,6 +89,7 @@ public final class ZipPackage extends Package { */ ZipPackage(InputStream in, PackageAccess access) throws IOException { super(access); + @SuppressWarnings("resource") ThresholdInputStream zis = ZipHelper.openZipStream(in); this.zipArchive = new ZipInputStreamZipEntrySource(zis); } @@ -101,18 +103,7 @@ public final class ZipPackage extends Package { * The package access mode. */ ZipPackage(String path, PackageAccess access) { - super(access); - - final ZipFile zipFile; - - try { - zipFile = ZipHelper.openZipFile(path); - } catch (IOException e) { - throw new InvalidOperationException( - "Can't open the specified file: '" + path + "'", e); - } - - this.zipArchive = new ZipFileZipEntrySource(zipFile); + this(new File(path), access); } /** @@ -123,19 +114,33 @@ public final class ZipPackage extends Package { * @param access * The package access mode. */ + @SuppressWarnings("resource") ZipPackage(File file, PackageAccess access) { super(access); - final ZipFile zipFile; - + ZipEntrySource ze; try { - zipFile = ZipHelper.openZipFile(file); + final ZipFile zipFile = ZipHelper.openZipFile(file); + ze = new ZipFileZipEntrySource(zipFile); } catch (IOException e) { - throw new InvalidOperationException( - "Can't open the specified file: '" + file + "'", e); + // probably not happening with write access - not sure how to handle the default read-write access ... + if (access == PackageAccess.WRITE) { + throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e); + } + logger.log(POILogger.ERROR, "Error in zip file "+file+" - falling back to stream processing (i.e. ignoring zip central directory)"); + // some zips can't be opened via ZipFile in JDK6, as the central directory + // contains either non-latin entries or the compression type can't be handled + // the workaround is to iterate over the stream and not the directory + FileInputStream fis; + try { + fis = new FileInputStream(file); + ThresholdInputStream zis = ZipHelper.openZipStream(fis); + ze = new ZipInputStreamZipEntrySource(zis); + } catch (IOException e2) { + throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e); + } } - - this.zipArchive = new ZipFileZipEntrySource(zipFile); + this.zipArchive = ze; } /** diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java index 786a69f0c4..56536d2382 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java @@ -221,6 +221,7 @@ public final class ZipHelper { * The stream to open. * @return The zip stream freshly open. */ + @SuppressWarnings("resource") public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { // Peek at the first few bytes to sanity check InputStream checkedStream = prepareToCheckHeader(stream); @@ -228,8 +229,7 @@ public final class ZipHelper { // Open as a proper zip stream InputStream zis = new ZipInputStream(checkedStream); - ThresholdInputStream tis = ZipSecureFile.addThreshold(zis); - return tis; + return ZipSecureFile.addThreshold(zis); } /** @@ -262,8 +262,6 @@ public final class ZipHelper { * @return The zip archive freshly open. */ public static ZipFile openZipFile(String path) throws IOException { - File f = new File(path); - - return openZipFile(f); + return openZipFile(new File(path)); } } diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 0be9740b78..4a4a61e45e 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -32,6 +32,7 @@ import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; import org.apache.poi.POIXMLException; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.UnsupportedFileFormatException; import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.hslf.extractor.PowerPointExtractor; @@ -643,10 +644,7 @@ public class TestExtractorFactory { public void testPackage() throws Exception { // Excel POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); - assertTrue( - extractor - instanceof XSSFExcelExtractor - ); + assertTrue(extractor instanceof XSSFExcelExtractor); extractor.close(); extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString())); assertTrue(extractor.getText().length() > 200); @@ -654,48 +652,33 @@ public class TestExtractorFactory { // Word extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString())); - assertTrue( - extractor - instanceof XWPFWordExtractor - ); + assertTrue(extractor instanceof XWPFWordExtractor); extractor.close(); extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString())); - assertTrue( - extractor.getText().length() > 120 - ); + assertTrue(extractor.getText().length() > 120); extractor.close(); // PowerPoint extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString())); - assertTrue( - extractor - instanceof XSLFPowerPointExtractor - ); + assertTrue(extractor instanceof XSLFPowerPointExtractor); extractor.close(); extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString())); - assertTrue( - extractor.getText().length() > 120 - ); + assertTrue(extractor.getText().length() > 120); extractor.close(); // Visio extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString())); - assertTrue( - extractor - instanceof XDGFVisioExtractor - ); - assertTrue( - extractor.getText().length() > 20 - ); + assertTrue(extractor instanceof XDGFVisioExtractor); + assertTrue(extractor.getText().length() > 20); extractor.close(); // Text try { ExtractorFactory.createExtractor(OPCPackage.open(txt.toString())); fail(); - } catch(InvalidOperationException e) { + } catch(UnsupportedFileFormatException e) { // Good } } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index 0834ad223a..d2e1cff350 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -41,8 +41,6 @@ import java.util.List; import java.util.TreeMap; import java.util.regex.Pattern; import java.util.zip.ZipEntry; -import java.util.zip.ZipError; -import java.util.zip.ZipException; import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; @@ -50,6 +48,7 @@ import org.apache.poi.EncryptedDocumentException; import org.apache.poi.POIDataSamples; import org.apache.poi.POITestCase; import org.apache.poi.POIXMLException; +import org.apache.poi.UnsupportedFileFormatException; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -744,7 +743,7 @@ public final class TestPackage { try { OPCPackage.open(files.getFile("SampleSS.txt")); fail("Shouldn't be able to open Plain Text"); - } catch (InvalidOperationException e) { + } catch (UnsupportedFileFormatException e) { // Unhelpful low-level error, sorry } } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java index d292749e29..b83f6f972d 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; @@ -35,6 +36,8 @@ import org.apache.poi.POIXMLException; import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; +import org.apache.poi.sl.usermodel.SlideShow; +import org.apache.poi.sl.usermodel.SlideShowFactory; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.xssf.XSSFTestDataSamples; @@ -164,4 +167,11 @@ public class TestZipPackage { ExtractorFactory.setThreadPrefersEventExtractors(before); } } + + @Test + public void unparseableCentralDirectory() throws IOException { + File f = OpenXML4JTestDataSamples.getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx"); + SlideShow ppt = SlideShowFactory.create(f); + ppt.close(); + } } diff --git a/test-data/openxml4j/at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx b/test-data/openxml4j/at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx new file mode 100644 index 0000000000..7f9ff0c6f6 Binary files /dev/null and b/test-data/openxml4j/at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx differ