From: Nick Burch Date: Sun, 30 Nov 2014 16:59:06 +0000 (+0000) Subject: More detection for older Excel formats when opening the POIFS Stream, and a more... X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=687321411365f660244df0bbe66f824889bd0da9;p=poi.git More detection for older Excel formats when opening the POIFS Stream, and a more specific exception for non-OLE2 files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642565 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java index c42e3adb6d..931e93d032 100644 --- a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java @@ -24,6 +24,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import org.apache.poi.hssf.OldExcelFormatException; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.CodepageRecord; import org.apache.poi.hssf.record.FormulaRecord; @@ -37,6 +38,7 @@ import org.apache.poi.hssf.record.RecordInputStream; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentNode; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.NotOLE2FileException; import org.apache.poi.ss.usermodel.Cell; /** @@ -65,12 +67,10 @@ public class OldExcelExtractor { public OldExcelExtractor(File f) throws IOException { try { open(new NPOIFSFileSystem(f)); - } catch (IOException e) { - if (e.getMessage().startsWith("Invalid header signature")) { - open(new FileInputStream(f)); - } else { - throw e; - } + } catch (OldExcelFormatException oe) { + open(new FileInputStream(f)); + } catch (NotOLE2FileException e) { + open(new FileInputStream(f)); } } public OldExcelExtractor(NPOIFSFileSystem fs) throws IOException { diff --git a/src/java/org/apache/poi/poifs/filesystem/NotOLE2FileException.java b/src/java/org/apache/poi/poifs/filesystem/NotOLE2FileException.java new file mode 100644 index 0000000000..18387cfb77 --- /dev/null +++ b/src/java/org/apache/poi/poifs/filesystem/NotOLE2FileException.java @@ -0,0 +1,30 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.filesystem; + +import java.io.IOException; + +/** + * This exception is thrown when we try to open a file that doesn't + * seem to actually be an OLE2 file after all + */ +public class NotOLE2FileException extends IOException { + public NotOLE2FileException(String s) { + super(s); + } +} diff --git a/src/java/org/apache/poi/poifs/filesystem/OfficeXmlFileException.java b/src/java/org/apache/poi/poifs/filesystem/OfficeXmlFileException.java index b83ccd0f27..927fd94f79 100644 --- a/src/java/org/apache/poi/poifs/filesystem/OfficeXmlFileException.java +++ b/src/java/org/apache/poi/poifs/filesystem/OfficeXmlFileException.java @@ -1,4 +1,3 @@ - /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -16,20 +15,14 @@ limitations under the License. ==================================================================== */ - - package org.apache.poi.poifs.filesystem; /** * This exception is thrown when we try to open a file that's actually * an Office 2007+ XML file, rather than an OLE2 file (which is what - * POI works with) - * - * @author Nick Burch + * POIFS works with) */ - -public class OfficeXmlFileException extends IllegalArgumentException -{ +public class OfficeXmlFileException extends IllegalArgumentException { public OfficeXmlFileException(String s) { super(s); } diff --git a/src/java/org/apache/poi/poifs/storage/HeaderBlock.java b/src/java/org/apache/poi/poifs/storage/HeaderBlock.java index fc76c26ddc..560111fa01 100644 --- a/src/java/org/apache/poi/poifs/storage/HeaderBlock.java +++ b/src/java/org/apache/poi/poifs/storage/HeaderBlock.java @@ -23,8 +23,10 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.Arrays; +import org.apache.poi.hssf.OldExcelFormatException; import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.filesystem.NotOLE2FileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.HexDump; import org.apache.poi.util.IOUtils; @@ -124,20 +126,45 @@ public final class HeaderBlock implements HeaderBlockConstants { if (signature != _signature) { // Is it one of the usual suspects? byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER; - if(_data[0] == OOXML_FILE_HEADER[0] && + if (_data[0] == OOXML_FILE_HEADER[0] && _data[1] == OOXML_FILE_HEADER[1] && _data[2] == OOXML_FILE_HEADER[2] && _data[3] == OOXML_FILE_HEADER[3]) { throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); } - if ((signature & 0xFF8FFFFFFFFFFFFFL) == 0x0010000200040009L) { - // BIFF2 raw stream starts with BOF (sid=0x0009, size=0x0004, data=0x00t0) - throw new IllegalArgumentException("The supplied data appears to be in BIFF2 format. " - + "POI only supports BIFF8 format"); - } + + if (_data[0] == 0x09 && _data[1] == 0x00 && // sid=0x0009 + _data[2] == 0x04 && _data[3] == 0x00 && // size=0x0004 + _data[4] == 0x00 && _data[5] == 0x00 && // unused + (_data[6] == 0x01 || _data[6] == 0x02 || _data[6] == 0x04) && + _data[7] == 0x00) { + // BIFF2 raw stream + throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. " + + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); + } + if (_data[0] == 0x09 && _data[1] == 0x02 && // sid=0x0209 + _data[2] == 0x06 && _data[3] == 0x00 && // size=0x0006 + _data[4] == 0x00 && _data[5] == 0x00 && // unused + (_data[6] == 0x01 || _data[6] == 0x02 || _data[6] == 0x04) && + _data[7] == 0x00) { + // BIFF3 raw stream + throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. " + + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); + } + if (_data[0] == 0x09 && _data[1] == 0x04 && // sid=0x0409 + _data[2] == 0x06 && _data[3] == 0x00 && // size=0x0006 + _data[4] == 0x00 && _data[5] == 0x00) { // unused + if (((_data[6] == 0x01 || _data[6] == 0x02 || _data[6] == 0x04) && + _data[7] == 0x00) || + (_data[6] == 0x00 && _data[7] == 0x01)) { + // BIFF4 raw stream + throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. " + + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); + } + } // Give a generic error if the OLE2 signature isn't found - throw new IOException("Invalid header signature; read " + throw new NotOLE2FileException("Invalid header signature; read " + longToHex(signature) + ", expected " + longToHex(_signature) + " - Your file appears " + "not to be a valid OLE2 document");