import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
// At this point, we should have loaded the content type part
if (this.contentTypeManager == null) {
// Is it a different Zip-based format?
+ int numEntries = 0;
boolean hasMimetype = false;
boolean hasSettingsXML = false;
entries = this.zipArchive.getEntries();
if (entry.getName().equals("settings.xml")) {
hasSettingsXML = true;
}
+ numEntries++;
}
if (hasMimetype && hasSettingsXML) {
throw new ODFNotOfficeXmlFileException(
"The supplied data appears to be in ODF (Open Document) Format. " +
"Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit");
}
+ if (numEntries == 0) {
+ throw new NotOfficeXmlFileException(
+ "No valid entries or contents found, this is not a valid OOXML " +
+ "(Office Open XML) file");
+ }
// Fallback exception
throw new InvalidFormatException(
package org.apache.poi.openxml4j.opc.internal;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.PushbackInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Enumeration;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.ZipPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.poifs.storage.HeaderBlockConstants;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LittleEndian;
public final class ZipHelper {
/**
return null;
}
}
+
+ /**
+ * Verifies that the given stream starts with a Zip structure.
+ *
+ * Warning - this will consume the first few bytes of the stream,
+ * you should push-back or reset the stream after use!
+ */
+ public static void verifyZipHeader(InputStream stream)
+ throws NotOfficeXmlFileException, IOException {
+ // Grab the first 8 bytes
+ byte[] data = new byte[8];
+ IOUtils.readFully(stream, data);
+
+ // OLE2?
+ long signature = LittleEndian.getLong(data);
+ if (signature == HeaderBlockConstants._signature) {
+ throw new OLE2NotOfficeXmlFileException(
+ "The supplied data appears to be in the OLE2 Format. " +
+ "You are calling the part of POI that deals with OOXML "+
+ "(Office Open XML) Documents. You need to call a different " +
+ "part of POI to process this data (eg HSSF instead of XSSF)");
+ }
+
+ // Raw XML?
+ byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
+ if (data[0] == RAW_XML_FILE_HEADER[0] &&
+ data[1] == RAW_XML_FILE_HEADER[1] &&
+ data[2] == RAW_XML_FILE_HEADER[2] &&
+ data[3] == RAW_XML_FILE_HEADER[3] &&
+ data[4] == RAW_XML_FILE_HEADER[4]) {
+ throw new NotOfficeXmlFileException(
+ "The supplied data appears to be a raw XML file. " +
+ "Formats such as Office 2003 XML are not supported");
+ }
+
+ // Don't check for a Zip header, as to maintain backwards
+ // compatibility we need to let them seek over junk at the
+ // start before beginning processing.
+
+ // Put things back
+ if (stream instanceof PushbackInputStream) {
+ ((PushbackInputStream)stream).unread(data);
+ } else if (stream.markSupported()) {
+ stream.reset();
+ } else if (stream instanceof FileInputStream) {
+ // File open check, about to be closed, nothing to do
+ } else {
+ // Oh dear... I hope you know what you're doing!
+ }
+ }
+
+ private static InputStream prepareToCheckHeader(InputStream stream) {
+ if (stream instanceof PushbackInputStream) {
+ return stream;
+ }
+ if (stream.markSupported()) {
+ stream.mark(8);
+ return stream;
+ }
+ return new PushbackInputStream(stream, 8);
+ }
/**
* Opens the specified stream as a secure zip
* @return The zip stream freshly open.
*/
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
- InputStream zis = new ZipInputStream(stream);
+ // Peek at the first few bytes to sanity check
+ InputStream checkedStream = prepareToCheckHeader(stream);
+ verifyZipHeader(checkedStream);
+
+ // Open as a proper zip stream
+ InputStream zis = new ZipInputStream(checkedStream);
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
return tis;
}
if (!file.exists()) {
return null;
}
+
+ // Peek at the first few bytes to sanity check
+ FileInputStream input = new FileInputStream(file);
+ verifyZipHeader(input);
+ input.close();
+ // Open as a proper zip file
return new ZipSecureFile(file);
}
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
// OLE2 - Stream
-// try {
-// OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
-// fail("Shouldn't be able to open OLE2");
-// } catch (OLE2NotOfficeXmlFileException e) {
-// // TODO Check details
-// }
-
+ try {
+ OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
+ fail("Shouldn't be able to open OLE2");
+ } catch (OLE2NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
+ assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
+ }
// OLE2 - File
+ try {
+ OPCPackage.open(files.getFile("SampleSS.xls"));
+ fail("Shouldn't be able to open OLE2");
+ } catch (OLE2NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
+ assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
+ }
+
+ // Raw XML - Stream
+ try {
+ OPCPackage.open(files.openResourceAsStream("SampleSS.xml"));
+ fail("Shouldn't be able to open XML");
+ } catch (NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
+ assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
+ }
+ // Raw XML - File
+ try {
+ OPCPackage.open(files.getFile("SampleSS.xml"));
+ fail("Shouldn't be able to open XML");
+ } catch (NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
+ assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
+ }
// ODF / ODS - Stream
try {
// Plain Text - Stream
// Plain Text - File
-
- // Raw XML - Stream
- // Raw XML - File
}
@Test(expected=IOException.class)