]> source.dussan.org Git - poi.git/commitdiff
Before opening an OOXML file, check the first few bytes, and give helpful exceptions...
authorNick Burch <nick@apache.org>
Tue, 15 Mar 2016 12:30:45 +0000 (12:30 +0000)
committerNick Burch <nick@apache.org>
Tue, 15 Mar 2016 12:30:45 +0000 (12:30 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1735069 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java

index 549d2e576ffddfa8931d0fe27b3bbdb215e15f62..c5ac56391d072ff8d942614653efb62ac523f16e 100644 (file)
@@ -28,6 +28,7 @@ import java.util.zip.ZipOutputStream;
 
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
 import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
@@ -194,6 +195,7 @@ public final class ZipPackage extends Package {
         // At this point, we should have loaded the content type part
         if (this.contentTypeManager == null) {
             // Is it a different Zip-based format?
+            int numEntries = 0;
             boolean hasMimetype = false;
             boolean hasSettingsXML = false;
             entries = this.zipArchive.getEntries();
@@ -205,12 +207,18 @@ public final class ZipPackage extends Package {
                 if (entry.getName().equals("settings.xml")) {
                     hasSettingsXML = true;
                 }
+                numEntries++;
             }
             if (hasMimetype && hasSettingsXML) {
                 throw new ODFNotOfficeXmlFileException(
                    "The supplied data appears to be in ODF (Open Document) Format. " +
                    "Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit");
             }
+            if (numEntries == 0) {
+                throw new NotOfficeXmlFileException(
+                   "No valid entries or contents found, this is not a valid OOXML " +
+                   "(Office Open XML) file");
+            }
 
             // Fallback exception
             throw new InvalidFormatException(
index 2a536dc7bfb0cdbb571e70539cc7f696e92ff88f..786a69f0c49032d5ca879d32cb192452dcd7a075 100644 (file)
 package org.apache.poi.openxml4j.opc.internal;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Enumeration;
@@ -27,12 +29,18 @@ import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 import java.util.zip.ZipInputStream;
 
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.openxml4j.opc.ZipPackage;
 import org.apache.poi.openxml4j.util.ZipSecureFile;
 import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.poifs.storage.HeaderBlockConstants;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LittleEndian;
 
 public final class ZipHelper {
     /**
@@ -144,6 +152,67 @@ public final class ZipHelper {
             return null;
         }
     }
+    
+    /**
+     * Verifies that the given stream starts with a Zip structure.
+     * 
+     * Warning - this will consume the first few bytes of the stream,
+     *  you should push-back or reset the stream after use!
+     */
+    public static void verifyZipHeader(InputStream stream) 
+            throws NotOfficeXmlFileException, IOException {
+        // Grab the first 8 bytes
+        byte[] data = new byte[8];
+        IOUtils.readFully(stream, data);
+        
+        // OLE2?
+        long signature = LittleEndian.getLong(data);
+        if (signature == HeaderBlockConstants._signature) {
+            throw new OLE2NotOfficeXmlFileException(
+                "The supplied data appears to be in the OLE2 Format. " +
+                "You are calling the part of POI that deals with OOXML "+
+                "(Office Open XML) Documents. You need to call a different " +
+                "part of POI to process this data (eg HSSF instead of XSSF)");
+        }
+        
+        // Raw XML?
+        byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
+        if (data[0] == RAW_XML_FILE_HEADER[0] &&
+            data[1] == RAW_XML_FILE_HEADER[1] &&
+            data[2] == RAW_XML_FILE_HEADER[2] &&
+            data[3] == RAW_XML_FILE_HEADER[3] &&
+            data[4] == RAW_XML_FILE_HEADER[4]) {
+            throw new NotOfficeXmlFileException(
+                "The supplied data appears to be a raw XML file. " +
+                "Formats such as Office 2003 XML are not supported");
+        }
+
+        // Don't check for a Zip header, as to maintain backwards
+        //  compatibility we need to let them seek over junk at the
+        //  start before beginning processing.
+        
+        // Put things back
+        if (stream instanceof PushbackInputStream) {
+            ((PushbackInputStream)stream).unread(data);
+        } else if (stream.markSupported()) {
+            stream.reset();
+        } else if (stream instanceof FileInputStream) {
+            // File open check, about to be closed, nothing to do
+        } else {
+            // Oh dear... I hope you know what you're doing!
+        }
+    }
+    
+    private static InputStream prepareToCheckHeader(InputStream stream) {
+        if (stream instanceof PushbackInputStream) {
+            return stream;
+        }
+        if (stream.markSupported()) {
+            stream.mark(8);
+            return stream;
+        }
+        return new PushbackInputStream(stream, 8);
+    }
 
     /**
      * Opens the specified stream as a secure zip
@@ -153,7 +222,12 @@ public final class ZipHelper {
      * @return The zip stream freshly open.
      */
     public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
-        InputStream zis = new ZipInputStream(stream);
+        // Peek at the first few bytes to sanity check
+        InputStream checkedStream = prepareToCheckHeader(stream);
+        verifyZipHeader(checkedStream);
+        
+        // Open as a proper zip stream
+        InputStream zis = new ZipInputStream(checkedStream);
         ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
         return tis;
     }
@@ -170,7 +244,13 @@ public final class ZipHelper {
         if (!file.exists()) {
             return null;
         }
+        
+        // Peek at the first few bytes to sanity check
+        FileInputStream input = new FileInputStream(file);
+        verifyZipHeader(input);
+        input.close();
 
+        // Open as a proper zip file
         return new ZipSecureFile(file);
     }
 
index 7a113d72863156297127d1b8e91a9742bc0e7496..9cde41486d3e69a96a4440248dfada3eb78abc1f 100644 (file)
@@ -51,6 +51,7 @@ import org.apache.poi.POIXMLException;
 import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
 import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
 import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
 import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
@@ -679,14 +680,38 @@ public final class TestPackage {
         POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
         
         // OLE2 - Stream
-//        try {
-//            OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
-//            fail("Shouldn't be able to open OLE2");
-//        } catch (OLE2NotOfficeXmlFileException e) {
-//            // TODO Check details
-//        }
-        
+        try {
+            OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
+            fail("Shouldn't be able to open OLE2");
+        } catch (OLE2NotOfficeXmlFileException e) {
+            assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
+            assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
+        }
         // OLE2 - File
+        try {
+            OPCPackage.open(files.getFile("SampleSS.xls"));
+            fail("Shouldn't be able to open OLE2");
+        } catch (OLE2NotOfficeXmlFileException e) {
+            assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
+            assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
+        }
+        
+        // Raw XML - Stream
+        try {
+            OPCPackage.open(files.openResourceAsStream("SampleSS.xml"));
+            fail("Shouldn't be able to open XML");
+        } catch (NotOfficeXmlFileException e) {
+            assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
+            assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
+        }
+        // Raw XML - File
+        try {
+            OPCPackage.open(files.getFile("SampleSS.xml"));
+            fail("Shouldn't be able to open XML");
+        } catch (NotOfficeXmlFileException e) {
+            assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
+            assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
+        }
         
         // ODF / ODS - Stream
         try {
@@ -707,9 +732,6 @@ public final class TestPackage {
         
         // Plain Text - Stream
         // Plain Text - File
-        
-        // Raw XML - Stream
-        // Raw XML - File
     }
 
     @Test(expected=IOException.class)