contains either non-latin entries or the compression type can't be handled the workaround is to iterate over the stream and not the directory git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1736933 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_15_BETA2
@@ -18,6 +18,7 @@ | |||
package org.apache.poi.openxml4j.opc; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.OutputStream; | |||
@@ -88,6 +89,7 @@ public final class ZipPackage extends Package { | |||
*/ | |||
ZipPackage(InputStream in, PackageAccess access) throws IOException { | |||
super(access); | |||
@SuppressWarnings("resource") | |||
ThresholdInputStream zis = ZipHelper.openZipStream(in); | |||
this.zipArchive = new ZipInputStreamZipEntrySource(zis); | |||
} | |||
@@ -101,18 +103,7 @@ public final class ZipPackage extends Package { | |||
* The package access mode. | |||
*/ | |||
ZipPackage(String path, PackageAccess access) { | |||
super(access); | |||
final ZipFile zipFile; | |||
try { | |||
zipFile = ZipHelper.openZipFile(path); | |||
} catch (IOException e) { | |||
throw new InvalidOperationException( | |||
"Can't open the specified file: '" + path + "'", e); | |||
} | |||
this.zipArchive = new ZipFileZipEntrySource(zipFile); | |||
this(new File(path), access); | |||
} | |||
/** | |||
@@ -123,19 +114,33 @@ public final class ZipPackage extends Package { | |||
* @param access | |||
* The package access mode. | |||
*/ | |||
@SuppressWarnings("resource") | |||
ZipPackage(File file, PackageAccess access) { | |||
super(access); | |||
final ZipFile zipFile; | |||
ZipEntrySource ze; | |||
try { | |||
zipFile = ZipHelper.openZipFile(file); | |||
final ZipFile zipFile = ZipHelper.openZipFile(file); | |||
ze = new ZipFileZipEntrySource(zipFile); | |||
} catch (IOException e) { | |||
throw new InvalidOperationException( | |||
"Can't open the specified file: '" + file + "'", e); | |||
// probably not happening with write access - not sure how to handle the default read-write access ... | |||
if (access == PackageAccess.WRITE) { | |||
throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e); | |||
} | |||
logger.log(POILogger.ERROR, "Error in zip file "+file+" - falling back to stream processing (i.e. ignoring zip central directory)"); | |||
// some zips can't be opened via ZipFile in JDK6, as the central directory | |||
// contains either non-latin entries or the compression type can't be handled | |||
// the workaround is to iterate over the stream and not the directory | |||
FileInputStream fis; | |||
try { | |||
fis = new FileInputStream(file); | |||
ThresholdInputStream zis = ZipHelper.openZipStream(fis); | |||
ze = new ZipInputStreamZipEntrySource(zis); | |||
} catch (IOException e2) { | |||
throw new InvalidOperationException("Can't open the specified file: '" + file + "'", e); | |||
} | |||
} | |||
this.zipArchive = new ZipFileZipEntrySource(zipFile); | |||
this.zipArchive = ze; | |||
} | |||
/** |
@@ -221,6 +221,7 @@ public final class ZipHelper { | |||
* The stream to open. | |||
* @return The zip stream freshly open. | |||
*/ | |||
@SuppressWarnings("resource") | |||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { | |||
// Peek at the first few bytes to sanity check | |||
InputStream checkedStream = prepareToCheckHeader(stream); | |||
@@ -228,8 +229,7 @@ public final class ZipHelper { | |||
// Open as a proper zip stream | |||
InputStream zis = new ZipInputStream(checkedStream); | |||
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis); | |||
return tis; | |||
return ZipSecureFile.addThreshold(zis); | |||
} | |||
/** | |||
@@ -262,8 +262,6 @@ public final class ZipHelper { | |||
* @return The zip archive freshly open. | |||
*/ | |||
public static ZipFile openZipFile(String path) throws IOException { | |||
File f = new File(path); | |||
return openZipFile(f); | |||
return openZipFile(new File(path)); | |||
} | |||
} |
@@ -32,6 +32,7 @@ import org.apache.poi.POIOLE2TextExtractor; | |||
import org.apache.poi.POITextExtractor; | |||
import org.apache.poi.POIXMLException; | |||
import org.apache.poi.POIXMLTextExtractor; | |||
import org.apache.poi.UnsupportedFileFormatException; | |||
import org.apache.poi.hdgf.extractor.VisioTextExtractor; | |||
import org.apache.poi.hpbf.extractor.PublisherTextExtractor; | |||
import org.apache.poi.hslf.extractor.PowerPointExtractor; | |||
@@ -643,10 +644,7 @@ public class TestExtractorFactory { | |||
public void testPackage() throws Exception { | |||
// Excel | |||
POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ)); | |||
assertTrue( | |||
extractor | |||
instanceof XSSFExcelExtractor | |||
); | |||
assertTrue(extractor instanceof XSSFExcelExtractor); | |||
extractor.close(); | |||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString())); | |||
assertTrue(extractor.getText().length() > 200); | |||
@@ -654,48 +652,33 @@ public class TestExtractorFactory { | |||
// Word | |||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString())); | |||
assertTrue( | |||
extractor | |||
instanceof XWPFWordExtractor | |||
); | |||
assertTrue(extractor instanceof XWPFWordExtractor); | |||
extractor.close(); | |||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString())); | |||
assertTrue( | |||
extractor.getText().length() > 120 | |||
); | |||
assertTrue(extractor.getText().length() > 120); | |||
extractor.close(); | |||
// PowerPoint | |||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString())); | |||
assertTrue( | |||
extractor | |||
instanceof XSLFPowerPointExtractor | |||
); | |||
assertTrue(extractor instanceof XSLFPowerPointExtractor); | |||
extractor.close(); | |||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString())); | |||
assertTrue( | |||
extractor.getText().length() > 120 | |||
); | |||
assertTrue(extractor.getText().length() > 120); | |||
extractor.close(); | |||
// Visio | |||
extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString())); | |||
assertTrue( | |||
extractor | |||
instanceof XDGFVisioExtractor | |||
); | |||
assertTrue( | |||
extractor.getText().length() > 20 | |||
); | |||
assertTrue(extractor instanceof XDGFVisioExtractor); | |||
assertTrue(extractor.getText().length() > 20); | |||
extractor.close(); | |||
// Text | |||
try { | |||
ExtractorFactory.createExtractor(OPCPackage.open(txt.toString())); | |||
fail(); | |||
} catch(InvalidOperationException e) { | |||
} catch(UnsupportedFileFormatException e) { | |||
// Good | |||
} | |||
} |
@@ -41,8 +41,6 @@ import java.util.List; | |||
import java.util.TreeMap; | |||
import java.util.regex.Pattern; | |||
import java.util.zip.ZipEntry; | |||
import java.util.zip.ZipError; | |||
import java.util.zip.ZipException; | |||
import java.util.zip.ZipFile; | |||
import java.util.zip.ZipOutputStream; | |||
@@ -50,6 +48,7 @@ import org.apache.poi.EncryptedDocumentException; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.POITestCase; | |||
import org.apache.poi.POIXMLException; | |||
import org.apache.poi.UnsupportedFileFormatException; | |||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException; | |||
@@ -744,7 +743,7 @@ public final class TestPackage { | |||
try { | |||
OPCPackage.open(files.getFile("SampleSS.txt")); | |||
fail("Shouldn't be able to open Plain Text"); | |||
} catch (InvalidOperationException e) { | |||
} catch (UnsupportedFileFormatException e) { | |||
// Unhelpful low-level error, sorry | |||
} | |||
} |
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue; | |||
import static org.junit.Assert.fail; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.File; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.OutputStreamWriter; | |||
@@ -35,6 +36,8 @@ import org.apache.poi.POIXMLException; | |||
import org.apache.poi.extractor.ExtractorFactory; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; | |||
import org.apache.poi.sl.usermodel.SlideShow; | |||
import org.apache.poi.sl.usermodel.SlideShowFactory; | |||
import org.apache.poi.ss.usermodel.Workbook; | |||
import org.apache.poi.ss.usermodel.WorkbookFactory; | |||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||
@@ -164,4 +167,11 @@ public class TestZipPackage { | |||
ExtractorFactory.setThreadPrefersEventExtractors(before); | |||
} | |||
} | |||
@Test | |||
public void unparseableCentralDirectory() throws IOException { | |||
File f = OpenXML4JTestDataSamples.getSampleFile("at.pzp.www_uploads_media_PP_Scheinecker-jdk6error.pptx"); | |||
SlideShow<?,?> ppt = SlideShowFactory.create(f); | |||
ppt.close(); | |||
} | |||
} |