git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1804854 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_17_FINAL
@@ -17,22 +17,22 @@ | |||
package org.apache.poi.poifs.filesystem; | |||
import org.apache.poi.EncryptedDocumentException; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.poifs.crypt.Decryptor; | |||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||
import org.apache.poi.util.IOUtils; | |||
import java.io.FilterInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.security.GeneralSecurityException; | |||
import org.apache.poi.EncryptedDocumentException; | |||
import org.apache.poi.poifs.crypt.Decryptor; | |||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.Removal; | |||
/** | |||
* A small base class for the various factories, e.g. WorkbookFactory, | |||
* SlideShowFactory to combine common code here. | |||
*/ | |||
@Internal | |||
public class DocumentFactoryHelper { | |||
/** | |||
* Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using | |||
@@ -81,36 +81,19 @@ public class DocumentFactoryHelper { | |||
/** | |||
* Checks that the supplied InputStream (which MUST | |||
* support mark and reset, or be a PushbackInputStream) | |||
* has a OOXML (zip) header at the start of it. | |||
* If your InputStream does not support mark / reset, | |||
* then wrap it in a PushBackInputStream, then be | |||
* support mark and reset) has a OOXML (zip) header at the start of it.<p> | |||
* | |||
* If unsure if your InputStream does support mark / reset, | |||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make | |||
* sure to always use that, and not the original! | |||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||
* | |||
* @param inp An InputStream which supports either mark/reset | |||
* | |||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead | |||
*/ | |||
@Deprecated | |||
@Removal(version="4.0") | |||
public static boolean hasOOXMLHeader(InputStream inp) throws IOException { | |||
// We want to peek at the first 4 bytes | |||
inp.mark(4); | |||
byte[] header = new byte[4]; | |||
int bytesRead = IOUtils.readFully(inp, header); | |||
// Wind back those 4 bytes | |||
if(inp instanceof PushbackInputStream) { | |||
PushbackInputStream pin = (PushbackInputStream)inp; | |||
pin.unread(header, 0, bytesRead); | |||
} else { | |||
inp.reset(); | |||
} | |||
// Did it match the ooxml zip signature? | |||
return ( | |||
bytesRead == 4 && | |||
header[0] == POIFSConstants.OOXML_FILE_HEADER[0] && | |||
header[1] == POIFSConstants.OOXML_FILE_HEADER[1] && | |||
header[2] == POIFSConstants.OOXML_FILE_HEADER[2] && | |||
header[3] == POIFSConstants.OOXML_FILE_HEADER[3] | |||
); | |||
return FileMagic.valueOf(inp) == FileMagic.OOXML; | |||
} | |||
} |
@@ -0,0 +1,155 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.poifs.filesystem; | |||
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER; | |||
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER; | |||
import java.io.BufferedInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.util.LocaleUtil; | |||
/** | |||
* The file magic number, i.e. the file identification based on the first bytes | |||
* of the file | |||
*/ | |||
public enum FileMagic { | |||
/** OLE2 / BIFF8+ stream used for Office 97 and higher documents */ | |||
OLE2(HeaderBlockConstants._signature), | |||
/** OOXML / ZIP stream */ | |||
OOXML(OOXML_FILE_HEADER), | |||
/** XML file */ | |||
XML(RAW_XML_FILE_HEADER), | |||
/** BIFF2 raw stream - for Excel 2 */ | |||
BIFF2(new byte[]{ | |||
0x09, 0x00, // sid=0x0009 | |||
0x04, 0x00, // size=0x0004 | |||
0x00, 0x00, // unused | |||
0x70, 0x00 // 0x70 = multiple values | |||
}), | |||
/** BIFF3 raw stream - for Excel 3 */ | |||
BIFF3(new byte[]{ | |||
0x09, 0x02, // sid=0x0209 | |||
0x06, 0x00, // size=0x0006 | |||
0x00, 0x00, // unused | |||
0x70, 0x00 // 0x70 = multiple values | |||
}), | |||
/** BIFF4 raw stream - for Excel 4 */ | |||
BIFF4(new byte[]{ | |||
0x09, 0x04, // sid=0x0409 | |||
0x06, 0x00, // size=0x0006 | |||
0x00, 0x00, // unused | |||
0x70, 0x00 // 0x70 = multiple values | |||
},new byte[]{ | |||
0x09, 0x04, // sid=0x0409 | |||
0x06, 0x00, // size=0x0006 | |||
0x00, 0x00, // unused | |||
0x00, 0x01 | |||
}), | |||
/** Old MS Write raw stream */ | |||
MSWRITE( | |||
new byte[]{0x31, (byte)0xbe, 0x00, 0x00 }, | |||
new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }), | |||
/** RTF document */ | |||
RTF("{\\rtf"), | |||
/** PDF document */ | |||
PDF("%PDF"), | |||
// keep UNKNOWN always as last enum! | |||
/** UNKNOWN magic */ | |||
UNKNOWN(new byte[0]); | |||
final byte[][] magic; | |||
FileMagic(long magic) { | |||
this.magic = new byte[1][8]; | |||
LittleEndian.putLong(this.magic[0], 0, magic); | |||
} | |||
FileMagic(byte[]... magic) { | |||
this.magic = magic; | |||
} | |||
FileMagic(String magic) { | |||
this(magic.getBytes(LocaleUtil.CHARSET_1252)); | |||
} | |||
public static FileMagic valueOf(byte[] magic) { | |||
for (FileMagic fm : values()) { | |||
int i=0; | |||
boolean found = true; | |||
for (byte[] ma : fm.magic) { | |||
for (byte m : ma) { | |||
byte d = magic[i++]; | |||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { | |||
found = false; | |||
break; | |||
} | |||
} | |||
if (found) { | |||
return fm; | |||
} | |||
} | |||
} | |||
return UNKNOWN; | |||
} | |||
/** | |||
* Get the file magic of the supplied InputStream (which MUST | |||
* support mark and reset).<p> | |||
* | |||
* If unsure if your InputStream does support mark / reset, | |||
* use {@link #prepareToCheckMagic(InputStream)} to wrap it and make | |||
* sure to always use that, and not the original!<p> | |||
* | |||
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean, | |||
* that the ZIP stream has leading junk bytes | |||
* | |||
* @param inp An InputStream which supports either mark/reset | |||
*/ | |||
public static FileMagic valueOf(InputStream inp) throws IOException { | |||
if (!inp.markSupported()) { | |||
throw new IOException("getFileMagic() only operates on streams which support mark(int)"); | |||
} | |||
// Grab the first 8 bytes | |||
byte[] data = IOUtils.peekFirst8Bytes(inp); | |||
return FileMagic.valueOf(data); | |||
} | |||
/** | |||
* Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not | |||
* | |||
* @param stream stream to be checked for wrapping | |||
* @return a mark enabled stream | |||
*/ | |||
public static InputStream prepareToCheckMagic(InputStream stream) { | |||
if (stream.markSupported()) { | |||
return stream; | |||
} | |||
// we used to process the data via a PushbackInputStream, but user code could provide a too small one | |||
// so we use a BufferedInputStream instead now | |||
return new BufferedInputStream(stream); | |||
} | |||
} |
@@ -19,6 +19,7 @@ | |||
package org.apache.poi.poifs.filesystem; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.Closeable; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
@@ -26,7 +27,6 @@ import java.io.FileOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.OutputStream; | |||
import java.io.PushbackInputStream; | |||
import java.nio.ByteBuffer; | |||
import java.nio.channels.Channels; | |||
import java.nio.channels.FileChannel; | |||
@@ -51,14 +51,13 @@ import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex; | |||
import org.apache.poi.poifs.storage.BlockAllocationTableReader; | |||
import org.apache.poi.poifs.storage.BlockAllocationTableWriter; | |||
import org.apache.poi.poifs.storage.HeaderBlock; | |||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||
import org.apache.poi.poifs.storage.HeaderBlockWriter; | |||
import org.apache.poi.util.CloseIgnoringInputStream; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LongField; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
import org.apache.poi.util.Removal; | |||
/** | |||
* <p>This is the main class of the POIFS system; it manages the entire | |||
@@ -353,44 +352,38 @@ public class NPOIFSFileSystem extends BlockStore | |||
/** | |||
* Checks that the supplied InputStream (which MUST | |||
* support mark and reset, or be a PushbackInputStream) | |||
* has a POIFS (OLE2) header at the start of it. | |||
* If your InputStream does not support mark / reset, | |||
* then wrap it in a PushBackInputStream, then be | |||
* sure to always use that and not the original! | |||
* support mark and reset) has a POIFS (OLE2) header at the start of it. | |||
* If unsure if your InputStream does support mark / reset, | |||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make | |||
* sure to always use that, and not the original! | |||
* | |||
* After the method call, the InputStream is at the | |||
* same position as of the time of entering the method. | |||
* | |||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||
* @param inp An InputStream which supports mark/reset | |||
* | |||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||
*/ | |||
@Deprecated | |||
@Removal(version="4.0") | |||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | |||
// We want to peek at the first 8 bytes | |||
inp.mark(8); | |||
byte[] header = new byte[8]; | |||
int bytesRead = IOUtils.readFully(inp, header); | |||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header); | |||
// Wind back those 8 bytes | |||
if(inp instanceof PushbackInputStream) { | |||
PushbackInputStream pin = (PushbackInputStream)inp; | |||
pin.unread(header, 0, bytesRead); | |||
} else { | |||
inp.reset(); | |||
} | |||
// Did it match the signature? | |||
return (signature.get() == HeaderBlockConstants._signature); | |||
return FileMagic.valueOf(inp) == FileMagic.OLE2; | |||
} | |||
/** | |||
* Checks if the supplied first 8 bytes of a stream / file | |||
* has a POIFS (OLE2) header. | |||
* | |||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||
*/ | |||
@Deprecated | |||
@Removal(version="4.0") | |||
public static boolean hasPOIFSHeader(byte[] header8Bytes) { | |||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); | |||
return (signature.get() == HeaderBlockConstants._signature); | |||
try { | |||
return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes)); | |||
} catch (IOException e) { | |||
throw new RuntimeException("invalid header check", e); | |||
} | |||
} | |||
/** |
@@ -42,16 +42,14 @@ import org.apache.poi.poifs.storage.BlockAllocationTableWriter; | |||
import org.apache.poi.poifs.storage.BlockList; | |||
import org.apache.poi.poifs.storage.BlockWritable; | |||
import org.apache.poi.poifs.storage.HeaderBlock; | |||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||
import org.apache.poi.poifs.storage.HeaderBlockWriter; | |||
import org.apache.poi.poifs.storage.RawDataBlockList; | |||
import org.apache.poi.poifs.storage.SmallBlockTableReader; | |||
import org.apache.poi.poifs.storage.SmallBlockTableWriter; | |||
import org.apache.poi.util.CloseIgnoringInputStream; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.LongField; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
import org.apache.poi.util.Removal; | |||
/** | |||
* <p>This is the main class of the POIFS system; it manages the entire | |||
@@ -200,27 +198,34 @@ public class OPOIFSFileSystem | |||
/** | |||
* Checks that the supplied InputStream (which MUST | |||
* support mark and reset, or be a PushbackInputStream) | |||
* has a POIFS (OLE2) header at the start of it. | |||
* If your InputStream does not support mark / reset, | |||
* then wrap it in a PushBackInputStream, then be | |||
* support mark and reset) has a POIFS (OLE2) header at the start of it. | |||
* If unsure if your InputStream does support mark / reset, | |||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make | |||
* sure to always use that, and not the original! | |||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||
* | |||
* After the method call, the InputStream is at the | |||
* same position as of the time of entering the method. | |||
* | |||
* @param inp An InputStream which supports either mark/reset | |||
* | |||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||
*/ | |||
@Deprecated | |||
@Removal(version="4.0") | |||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | |||
// We want to peek at the first 8 bytes | |||
byte[] header = IOUtils.peekFirst8Bytes(inp); | |||
return hasPOIFSHeader(header); | |||
return NPOIFSFileSystem.hasPOIFSHeader(inp); | |||
} | |||
/** | |||
* Checks if the supplied first 8 bytes of a stream / file | |||
* has a POIFS (OLE2) header. | |||
* | |||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||
*/ | |||
@Deprecated | |||
@Removal(version="4.0") | |||
public static boolean hasPOIFSHeader(byte[] header8Bytes) { | |||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); | |||
// Did it match the signature? | |||
return (signature.get() == HeaderBlockConstants._signature); | |||
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes); | |||
} | |||
/** |
@@ -114,27 +114,6 @@ public class POIFSFileSystem | |||
super(file); | |||
} | |||
/** | |||
* Checks that the supplied InputStream (which MUST | |||
* support mark and reset, or be a PushbackInputStream) | |||
* has a POIFS (OLE2) header at the start of it. | |||
* If your InputStream does not support mark / reset, | |||
* then wrap it in a PushBackInputStream, then be | |||
* sure to always use that, and not the original! | |||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||
*/ | |||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | |||
return NPOIFSFileSystem.hasPOIFSHeader(inp); | |||
} | |||
/** | |||
* Checks if the supplied first 8 bytes of a stream / file | |||
* has a POIFS (OLE2) header. | |||
*/ | |||
public static boolean hasPOIFSHeader(byte[] header8Bytes) { | |||
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes); | |||
} | |||
/** | |||
* Creates a new {@link POIFSFileSystem} in a new {@link File}. | |||
* Use {@link #POIFSFileSystem(File)} to open an existing File, |
@@ -17,8 +17,8 @@ | |||
package org.apache.poi.poifs.macros; | |||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; | |||
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; | |||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.ByteArrayOutputStream; | |||
@@ -27,7 +27,6 @@ import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.nio.charset.Charset; | |||
import java.util.HashMap; | |||
import java.util.Map; | |||
@@ -38,6 +37,7 @@ import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | |||
import org.apache.poi.poifs.filesystem.DocumentNode; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | |||
import org.apache.poi.util.CodePageUtil; | |||
@@ -67,13 +67,12 @@ public class VBAMacroReader implements Closeable { | |||
private NPOIFSFileSystem fs; | |||
public VBAMacroReader(InputStream rstream) throws IOException { | |||
PushbackInputStream stream = new PushbackInputStream(rstream, 8); | |||
byte[] header8 = IOUtils.peekFirst8Bytes(stream); | |||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { | |||
fs = new NPOIFSFileSystem(stream); | |||
InputStream is = FileMagic.prepareToCheckMagic(rstream); | |||
FileMagic fm = FileMagic.valueOf(is); | |||
if (fm == FileMagic.OLE2) { | |||
fs = new NPOIFSFileSystem(is); | |||
} else { | |||
openOOXML(stream); | |||
openOOXML(is); | |||
} | |||
} | |||
@@ -26,6 +26,7 @@ import java.util.Arrays; | |||
import org.apache.poi.hssf.OldExcelFormatException; | |||
import org.apache.poi.poifs.common.POIFSBigBlockSize; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.NotOLE2FileException; | |||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | |||
import org.apache.poi.util.HexDump; | |||
@@ -40,41 +41,6 @@ import org.apache.poi.util.ShortField; | |||
* The block containing the archive header | |||
*/ | |||
public final class HeaderBlock implements HeaderBlockConstants { | |||
private static final byte[] MAGIC_BIFF2 = { | |||
0x09, 0x00, // sid=0x0009 | |||
0x04, 0x00, // size=0x0004 | |||
0x00, 0x00, // unused | |||
0x70, 0x00 // 0x70 = multiple values | |||
}; | |||
private static final byte[] MAGIC_BIFF3 = { | |||
0x09, 0x02, // sid=0x0209 | |||
0x06, 0x00, // size=0x0006 | |||
0x00, 0x00, // unused | |||
0x70, 0x00 // 0x70 = multiple values | |||
}; | |||
private static final byte[] MAGIC_BIFF4a = { | |||
0x09, 0x04, // sid=0x0409 | |||
0x06, 0x00, // size=0x0006 | |||
0x00, 0x00, // unused | |||
0x70, 0x00 // 0x70 = multiple values | |||
}; | |||
private static final byte[] MAGIC_BIFF4b = { | |||
0x09, 0x04, // sid=0x0409 | |||
0x06, 0x00, // size=0x0006 | |||
0x00, 0x00, // unused | |||
0x00, 0x01 | |||
}; | |||
private static final byte[] MAGIC_MSWRITEa = { | |||
0x31, (byte)0xbe, 0x00, 0x00 | |||
}; | |||
private static final byte[] MAGIC_MSWRITEb = { | |||
0x32, (byte)0xbe, 0x00, 0x00 | |||
}; | |||
private static final byte _default_value = ( byte ) 0xFF; | |||
/** | |||
@@ -151,53 +117,35 @@ public final class HeaderBlock implements HeaderBlockConstants { | |||
this._data = data.clone(); | |||
// verify signature | |||
long signature = LittleEndian.getLong(_data, _signature_offset); | |||
if (signature != _signature) { | |||
// Is it one of the usual suspects? | |||
if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) { | |||
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " | |||
+ "You are calling the part of POI that deals with OLE2 Office Documents. " | |||
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); | |||
} | |||
if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) { | |||
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " | |||
+ "Formats such as Office 2003 XML are not supported"); | |||
} | |||
// Old MS Write raw stream | |||
if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) { | |||
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " | |||
+ "Apache POI doesn't currently support this format"); | |||
} | |||
// BIFF2 raw stream | |||
if (cmp(MAGIC_BIFF2, data)) { | |||
throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. " | |||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||
} | |||
// BIFF3 raw stream | |||
if (cmp(MAGIC_BIFF3, data)) { | |||
throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. " | |||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||
} | |||
// BIFF4 raw stream | |||
if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) { | |||
throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. " | |||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||
} | |||
// Give a generic error if the OLE2 signature isn't found | |||
throw new NotOLE2FileException("Invalid header signature; read " | |||
+ HexDump.longToHex(signature) + ", expected " | |||
+ HexDump.longToHex(_signature) + " - Your file appears " | |||
+ "not to be a valid OLE2 document"); | |||
} | |||
FileMagic fm = FileMagic.valueOf(data); | |||
switch (fm) { | |||
case OLE2: | |||
break; | |||
case OOXML: | |||
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " | |||
+ "You are calling the part of POI that deals with OLE2 Office Documents. " | |||
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); | |||
case XML: | |||
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " | |||
+ "Formats such as Office 2003 XML are not supported"); | |||
case MSWRITE: | |||
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " | |||
+ "Apache POI doesn't currently support this format"); | |||
case BIFF2: | |||
case BIFF3: | |||
case BIFF4: | |||
throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. " | |||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||
default: | |||
// Give a generic error if the OLE2 signature isn't found | |||
String exp = HexDump.longToHex(_signature); | |||
String act = HexDump.longToHex(LittleEndian.getLong(data, 0)); | |||
throw new NotOLE2FileException( | |||
"Invalid header signature; read " + act + ", expected " + exp + | |||
" - Your file appears not to be a valid OLE2 document"); | |||
} | |||
// Figure out our block size | |||
if (_data[30] == 12) { | |||
this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; | |||
@@ -434,15 +382,4 @@ public final class HeaderBlock implements HeaderBlockConstants { | |||
stream.write(0); | |||
} | |||
} | |||
private static boolean cmp(byte[] magic, byte[] data) { | |||
int i=0; | |||
for (byte m : magic) { | |||
byte d = data[i++]; | |||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
} |
@@ -20,7 +20,6 @@ import java.io.File; | |||
import java.io.FileNotFoundException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.lang.reflect.InvocationTargetException; | |||
import java.lang.reflect.Method; | |||
@@ -30,6 +29,7 @@ import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; | |||
import org.apache.poi.poifs.crypt.Decryptor; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | |||
import org.apache.poi.util.IOUtils; | |||
@@ -94,9 +94,7 @@ public class SlideShowFactory { | |||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from | |||
* the given InputStream. | |||
* | |||
* <p>Your input stream MUST either support mark/reset, or | |||
* be wrapped as a {@link PushbackInputStream}! Note that | |||
* using an {@link InputStream} has a higher memory footprint | |||
* <p>Note that using an {@link InputStream} has a higher memory footprint | |||
* than using a {@link File}.</p> | |||
* | |||
* <p>Note that in order to properly release resources the | |||
@@ -118,9 +116,8 @@ public class SlideShowFactory { | |||
/** | |||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from | |||
* the given InputStream, which may be password protected. | |||
* <p>Your input stream MUST either support mark/reset, or | |||
* be wrapped as a {@link PushbackInputStream}! Note that | |||
* using an {@link InputStream} has a higher memory footprint | |||
* | |||
* <p>Note that using an {@link InputStream} has a higher memory footprint | |||
* than using a {@link File}.</p> | |||
* | |||
* <p>Note that in order to properly release resources the | |||
@@ -137,23 +134,18 @@ public class SlideShowFactory { | |||
* @throws EncryptedDocumentException If the wrong password is given for a protected file | |||
*/ | |||
public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException { | |||
// If clearly doesn't do mark/reset, wrap up | |||
if (! inp.markSupported()) { | |||
inp = new PushbackInputStream(inp, 8); | |||
} | |||
// Ensure that there is at least some data there | |||
byte[] header8 = IOUtils.peekFirst8Bytes(inp); | |||
// Try to create | |||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { | |||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); | |||
InputStream is = FileMagic.prepareToCheckMagic(inp); | |||
FileMagic fm = FileMagic.valueOf(is); | |||
switch (fm) { | |||
case OLE2: | |||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is); | |||
return create(fs, password); | |||
case OOXML: | |||
return createXSLFSlideShow(is); | |||
default: | |||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||
} | |||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { | |||
return createXSLFSlideShow(inp); | |||
} | |||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||
} | |||
/** |
@@ -21,7 +21,6 @@ import java.io.File; | |||
import java.io.FileNotFoundException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.util.ArrayList; | |||
import java.util.Iterator; | |||
@@ -45,8 +44,8 @@ import org.apache.poi.poifs.crypt.Decryptor; | |||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.NotOLE2FileException; | |||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; | |||
@@ -175,21 +174,20 @@ public class ExtractorFactory { | |||
} | |||
public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { | |||
// Figure out the kind of stream | |||
// If clearly doesn't do mark/reset, wrap up | |||
if (! inp.markSupported()) { | |||
inp = new PushbackInputStream(inp, 8); | |||
} | |||
InputStream is = FileMagic.prepareToCheckMagic(inp); | |||
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) { | |||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); | |||
FileMagic fm = FileMagic.valueOf(is); | |||
switch (fm) { | |||
case OLE2: | |||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is); | |||
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); | |||
return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs); | |||
case OOXML: | |||
return createExtractor(OPCPackage.open(is)); | |||
default: | |||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||
} | |||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { | |||
return createExtractor(OPCPackage.open(inp)); | |||
} | |||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||
} | |||
/** |
@@ -22,7 +22,6 @@ import java.io.FileInputStream; | |||
import java.io.FileNotFoundException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.net.URI; | |||
import java.net.URISyntaxException; | |||
import java.util.Enumeration; | |||
@@ -38,12 +37,11 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||
import org.apache.poi.openxml4j.opc.ZipPackage; | |||
import org.apache.poi.openxml4j.util.ZipSecureFile; | |||
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.Removal; | |||
@Internal | |||
public final class ZipHelper { | |||
/** | |||
* Forward slash use to convert part name between OPC and zip item naming | |||
@@ -172,59 +170,29 @@ public final class ZipHelper { | |||
* Warning - this will consume the first few bytes of the stream, | |||
* you should push-back or reset the stream after use! | |||
*/ | |||
public static void verifyZipHeader(InputStream stream) | |||
throws NotOfficeXmlFileException, IOException { | |||
// Grab the first 8 bytes | |||
byte[] data = new byte[8]; | |||
IOUtils.readFully(stream, data); | |||
// OLE2? | |||
long signature = LittleEndian.getLong(data); | |||
if (signature == HeaderBlockConstants._signature) { | |||
public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException { | |||
InputStream is = FileMagic.prepareToCheckMagic(stream); | |||
FileMagic fm = FileMagic.valueOf(is); | |||
switch (fm) { | |||
case OLE2: | |||
throw new OLE2NotOfficeXmlFileException( | |||
"The supplied data appears to be in the OLE2 Format. " + | |||
"You are calling the part of POI that deals with OOXML "+ | |||
"(Office Open XML) Documents. You need to call a different " + | |||
"part of POI to process this data (eg HSSF instead of XSSF)"); | |||
} | |||
// Raw XML? | |||
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER; | |||
if (data[0] == RAW_XML_FILE_HEADER[0] && | |||
data[1] == RAW_XML_FILE_HEADER[1] && | |||
data[2] == RAW_XML_FILE_HEADER[2] && | |||
data[3] == RAW_XML_FILE_HEADER[3] && | |||
data[4] == RAW_XML_FILE_HEADER[4]) { | |||
case XML: | |||
throw new NotOfficeXmlFileException( | |||
"The supplied data appears to be a raw XML file. " + | |||
"Formats such as Office 2003 XML are not supported"); | |||
default: | |||
case OOXML: | |||
case UNKNOWN: | |||
// Don't check for a Zip header, as to maintain backwards | |||
// compatibility we need to let them seek over junk at the | |||
// start before beginning processing. | |||
break; | |||
} | |||
// Don't check for a Zip header, as to maintain backwards | |||
// compatibility we need to let them seek over junk at the | |||
// start before beginning processing. | |||
// Put things back | |||
if (stream instanceof PushbackInputStream) { | |||
((PushbackInputStream)stream).unread(data); | |||
} else if (stream.markSupported()) { | |||
stream.reset(); | |||
} else if (stream instanceof FileInputStream) { | |||
// File open check, about to be closed, nothing to do | |||
} else { | |||
// Oh dear... I hope you know what you're doing! | |||
} | |||
} | |||
private static InputStream prepareToCheckHeader(InputStream stream) { | |||
if (stream instanceof PushbackInputStream) { | |||
return stream; | |||
} | |||
if (stream.markSupported()) { | |||
stream.mark(8); | |||
return stream; | |||
} | |||
return new PushbackInputStream(stream, 8); | |||
} | |||
/** | |||
@@ -237,7 +205,7 @@ public final class ZipHelper { | |||
@SuppressWarnings("resource") | |||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { | |||
// Peek at the first few bytes to sanity check | |||
InputStream checkedStream = prepareToCheckHeader(stream); | |||
InputStream checkedStream = FileMagic.prepareToCheckMagic(stream); | |||
verifyZipHeader(checkedStream); | |||
// Open as a proper zip stream |
@@ -198,10 +198,11 @@ public class ZipSecureFile extends ZipFile { | |||
public static class ThresholdInputStream extends PushbackInputStream { | |||
long counter = 0; | |||
long markPos = 0; | |||
ThresholdInputStream cis; | |||
public ThresholdInputStream(InputStream is, ThresholdInputStream cis) { | |||
super(is,1); | |||
super(is); | |||
this.cis = cis; | |||
} | |||
@@ -225,14 +226,15 @@ public class ZipSecureFile extends ZipFile { | |||
@Override | |||
public long skip(long n) throws IOException { | |||
counter = 0; | |||
return in.skip(n); | |||
long s = in.skip(n); | |||
counter += s; | |||
return s; | |||
} | |||
@Override | |||
public synchronized void reset() throws IOException { | |||
counter = 0; | |||
in.reset(); | |||
counter = markPos; | |||
super.reset(); | |||
} | |||
public void advance(int advance) throws IOException { | |||
@@ -263,10 +265,10 @@ public class ZipSecureFile extends ZipFile { | |||
} | |||
// one of the limits was reached, report it | |||
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. " | |||
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. " | |||
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. " | |||
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter) | |||
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n" | |||
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n" | |||
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n" | |||
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n" | |||
+ "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO); | |||
} | |||
@@ -322,6 +324,7 @@ public class ZipSecureFile extends ZipFile { | |||
@Override | |||
public synchronized void mark(int readlimit) { | |||
markPos = counter; | |||
in.mark(readlimit); | |||
} | |||
} |
@@ -16,11 +16,11 @@ | |||
==================================================================== */ | |||
package org.apache.poi.ss.usermodel; | |||
import java.io.BufferedInputStream; | |||
import java.io.File; | |||
import java.io.FileNotFoundException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import org.apache.poi.EmptyFileException; | |||
import org.apache.poi.EncryptedDocumentException; | |||
@@ -32,6 +32,7 @@ import org.apache.poi.openxml4j.opc.PackageAccess; | |||
import org.apache.poi.poifs.crypt.Decryptor; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
@@ -127,7 +128,7 @@ public class WorkbookFactory { | |||
* the given InputStream. | |||
* | |||
* <p>Your input stream MUST either support mark/reset, or | |||
* be wrapped as a {@link PushbackInputStream}! Note that | |||
* be wrapped as a {@link BufferedInputStream}! Note that | |||
* using an {@link InputStream} has a higher memory footprint | |||
* than using a {@link File}.</p> | |||
* | |||
@@ -150,16 +151,15 @@ public class WorkbookFactory { | |||
/** | |||
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from | |||
* the given InputStream, which may be password protected. | |||
* <p>Your input stream MUST either support mark/reset, or | |||
* be wrapped as a {@link PushbackInputStream}! Note that | |||
* using an {@link InputStream} has a higher memory footprint | |||
* than using a {@link File}.</p> | |||
* the given InputStream, which may be password protected.<p> | |||
* | |||
* Note that using an {@link InputStream} has a higher memory footprint | |||
* than using a {@link File}.<p> | |||
* | |||
* <p>Note that in order to properly release resources the | |||
* Note that in order to properly release resources the | |||
* Workbook should be closed after use. Note also that loading | |||
* from an InputStream requires more memory than loading | |||
* from a File, so prefer {@link #create(File)} where possible.</p> | |||
* from a File, so prefer {@link #create(File)} where possible. | |||
* | |||
* @param inp The {@link InputStream} to read data from. | |||
* @param password The password that should be used or null if no password is necessary. | |||
@@ -172,23 +172,19 @@ public class WorkbookFactory { | |||
* @throws EmptyFileException If an empty stream is given | |||
*/ | |||
public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { | |||
// If clearly doesn't do mark/reset, wrap up | |||
if (! inp.markSupported()) { | |||
inp = new PushbackInputStream(inp, 8); | |||
} | |||
// Ensure that there is at least some data there | |||
byte[] header8 = IOUtils.peekFirst8Bytes(inp); | |||
// Try to create | |||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { | |||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); | |||
InputStream is = FileMagic.prepareToCheckMagic(inp); | |||
FileMagic fm = FileMagic.valueOf(is); | |||
switch (fm) { | |||
case OLE2: | |||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is); | |||
return create(fs, password); | |||
case OOXML: | |||
return new XSSFWorkbook(OPCPackage.open(is)); | |||
default: | |||
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||
} | |||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { | |||
return new XSSFWorkbook(OPCPackage.open(inp)); | |||
} | |||
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||
} | |||
/** |
@@ -20,7 +20,6 @@ package org.apache.poi.xssf.usermodel; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import javax.xml.namespace.QName; | |||
@@ -29,7 +28,7 @@ import org.apache.poi.POIXMLException; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.ss.usermodel.ObjectData; | |||
import org.apache.poi.util.IOUtils; | |||
@@ -161,17 +160,8 @@ public class XSSFObjectData extends XSSFSimpleShape implements ObjectData { | |||
InputStream is = null; | |||
try { | |||
is = getObjectPart().getInputStream(); | |||
// If clearly doesn't do mark/reset, wrap up | |||
if (! is.markSupported()) { | |||
is = new PushbackInputStream(is, 8); | |||
} | |||
// Ensure that there is at least some data there | |||
byte[] header8 = IOUtils.peekFirst8Bytes(is); | |||
// Try to create | |||
return NPOIFSFileSystem.hasPOIFSHeader(header8); | |||
is = FileMagic.prepareToCheckMagic(is); | |||
return FileMagic.valueOf(is) == FileMagic.OLE2; | |||
} catch (IOException e) { | |||
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); | |||
return false; |
@@ -19,68 +19,70 @@ | |||
package org.apache.poi; | |||
import static org.junit.Assert.assertArrayEquals; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertFalse; | |||
import static org.junit.Assert.assertTrue; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.util.Arrays; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.util.IOUtils; | |||
import org.junit.Test; | |||
/** | |||
* Class to test that HXF correctly detects OOXML | |||
* documents | |||
*/ | |||
public class TestDetectAsOOXML extends TestCase | |||
{ | |||
public void testOpensProperly() throws Exception | |||
{ | |||
public class TestDetectAsOOXML { | |||
@Test | |||
public void testOpensProperly() throws IOException, InvalidFormatException { | |||
OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx")); | |||
} | |||
public void testDetectAsPOIFS() throws Exception { | |||
InputStream in; | |||
// ooxml file is | |||
in = new PushbackInputStream( | |||
HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10 | |||
); | |||
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||
in.close(); | |||
// xls file isn't | |||
in = new PushbackInputStream( | |||
HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10 | |||
); | |||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||
in.close(); | |||
// text file isn't | |||
in = new PushbackInputStream( | |||
HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10 | |||
); | |||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||
in.close(); | |||
@Test | |||
public void testDetectAsPOIFS() throws IOException { | |||
Object fileAndMagic[][] = { | |||
{ "SampleSS.xlsx", FileMagic.OOXML }, | |||
{ "SampleSS.xls", FileMagic.OLE2 }, | |||
{ "SampleSS.txt", FileMagic.UNKNOWN } | |||
}; | |||
for (Object fm[] : fileAndMagic) { | |||
InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]); | |||
is = FileMagic.prepareToCheckMagic(is); | |||
FileMagic act = FileMagic.valueOf(is); | |||
if (act == FileMagic.OOXML) { | |||
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is)); | |||
} | |||
assertEquals("file magic failed for "+fm[0], fm[1], act); | |||
is.close(); | |||
} | |||
} | |||
@Test | |||
public void testFileCorruption() throws Exception { | |||
// create test InputStream | |||
byte[] testData = { (byte)1, (byte)2, (byte)3 }; | |||
byte[] testData = { 1, 2, 3 }; | |||
ByteArrayInputStream testInput = new ByteArrayInputStream(testData); | |||
InputStream is = FileMagic.prepareToCheckMagic(testInput); | |||
// detect header | |||
InputStream in = new PushbackInputStream(testInput, 10); | |||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is)); | |||
// check if InputStream is still intact | |||
byte[] test = new byte[3]; | |||
assertEquals(3, in.read(test)); | |||
assertTrue(Arrays.equals(testData, test)); | |||
assertEquals(-1, in.read()); | |||
in.close(); | |||
byte[] act = IOUtils.toByteArray(is); | |||
assertArrayEquals(testData, act); | |||
assertEquals(-1, is.read()); | |||
is.close(); | |||
} | |||
} |
@@ -17,11 +17,50 @@ | |||
package org.apache.poi.openxml4j.opc; | |||
import org.apache.poi.*; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertFalse; | |||
import static org.junit.Assert.assertNotNull; | |||
import static org.junit.Assert.assertNull; | |||
import static org.junit.Assert.assertTrue; | |||
import static org.junit.Assert.fail; | |||
import java.io.BufferedInputStream; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.FileOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.OutputStream; | |||
import java.io.PushbackInputStream; | |||
import java.lang.reflect.InvocationTargetException; | |||
import java.net.URI; | |||
import java.net.URISyntaxException; | |||
import java.util.Enumeration; | |||
import java.util.HashMap; | |||
import java.util.List; | |||
import java.util.TreeMap; | |||
import java.util.regex.Pattern; | |||
import java.util.zip.ZipEntry; | |||
import java.util.zip.ZipFile; | |||
import java.util.zip.ZipOutputStream; | |||
import org.apache.poi.EncryptedDocumentException; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.POITestCase; | |||
import org.apache.poi.POITextExtractor; | |||
import org.apache.poi.POIXMLException; | |||
import org.apache.poi.UnsupportedFileFormatException; | |||
import org.apache.poi.extractor.ExtractorFactory; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; | |||
import org.apache.poi.openxml4j.exceptions.*; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException; | |||
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; | |||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException; | |||
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; | |||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; | |||
import org.apache.poi.openxml4j.opc.internal.FileHelper; | |||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; | |||
@@ -29,7 +68,11 @@ import org.apache.poi.openxml4j.opc.internal.ZipHelper; | |||
import org.apache.poi.openxml4j.util.ZipSecureFile; | |||
import org.apache.poi.ss.usermodel.Workbook; | |||
import org.apache.poi.ss.usermodel.WorkbookFactory; | |||
import org.apache.poi.util.*; | |||
import org.apache.poi.util.DocumentHelper; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
import org.apache.poi.util.TempFile; | |||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.junit.Ignore; | |||
@@ -39,21 +82,6 @@ import org.w3c.dom.Element; | |||
import org.w3c.dom.NodeList; | |||
import org.xml.sax.SAXException; | |||
import java.io.*; | |||
import java.lang.reflect.InvocationTargetException; | |||
import java.net.URI; | |||
import java.net.URISyntaxException; | |||
import java.util.Enumeration; | |||
import java.util.HashMap; | |||
import java.util.List; | |||
import java.util.TreeMap; | |||
import java.util.regex.Pattern; | |||
import java.util.zip.ZipEntry; | |||
import java.util.zip.ZipFile; | |||
import java.util.zip.ZipOutputStream; | |||
import static org.junit.Assert.*; | |||
public final class TestPackage { | |||
private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); | |||
@@ -947,20 +975,32 @@ public final class TestPackage { | |||
} | |||
// bug 60128 | |||
@Test | |||
@Test(expected=NotOfficeXmlFileException.class) | |||
public void testCorruptFile() throws IOException, InvalidFormatException { | |||
OPCPackage pkg = null; | |||
File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx"); | |||
OPCPackage.open(file, PackageAccess.READ); | |||
} | |||
// bug 61381 | |||
@Test | |||
public void testTooShortFilterStreams() throws IOException, InvalidFormatException { | |||
File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx"); | |||
File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls"); | |||
InputStream isList[] = { | |||
new PushbackInputStream(new FileInputStream(xssf), 2), | |||
new BufferedInputStream(new FileInputStream(xssf), 2), | |||
new PushbackInputStream(new FileInputStream(hssf), 2), | |||
new BufferedInputStream(new FileInputStream(hssf), 2), | |||
}; | |||
try { | |||
pkg = OPCPackage.open(file, PackageAccess.READ); | |||
} catch (NotOfficeXmlFileException e) { | |||
/*System.out.println(e.getClass().getName()); | |||
System.out.println(e.getMessage()); | |||
e.printStackTrace();*/ | |||
// ignore exception | |||
for (InputStream is : isList) { | |||
WorkbookFactory.create(is).close(); | |||
} | |||
} finally { | |||
if (pkg != null) { | |||
pkg.close(); | |||
for (InputStream is : isList) { | |||
IOUtils.closeQuietly(is); | |||
} | |||
} | |||
} |
@@ -20,7 +20,6 @@ package org.apache.poi.hwpf; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.security.GeneralSecurityException; | |||
import org.apache.poi.EncryptedDocumentException; | |||
@@ -47,6 +46,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | |||
import org.apache.poi.poifs.filesystem.FileMagic; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.util.BoundedInputStream; | |||
import org.apache.poi.util.IOUtils; | |||
@@ -116,22 +116,14 @@ public abstract class HWPFDocumentCore extends POIDocument { | |||
* POIFSFileSystem from it, and returns that. | |||
*/ | |||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { | |||
// Open a PushbackInputStream, so we can peek at the first few bytes | |||
PushbackInputStream pis = new PushbackInputStream(istream,6); | |||
byte[] first6 = IOUtils.toByteArray(pis, 6); | |||
// Does it start with {\rtf ? If so, it's really RTF | |||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' | |||
&& first6[3] == 't' && first6[4] == 'f') { | |||
throw new IllegalArgumentException("The document is really a RTF file"); | |||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { | |||
throw new IllegalArgumentException("The document is really a PDF file"); | |||
} | |||
// OK, so it's neither RTF nor PDF | |||
// Open a POIFSFileSystem on the (pushed back) stream | |||
pis.unread(first6); | |||
return new POIFSFileSystem(pis); | |||
InputStream is = FileMagic.prepareToCheckMagic(istream); | |||
FileMagic fm = FileMagic.valueOf(is); | |||
if (fm != FileMagic.OLE2) { | |||
throw new IllegalArgumentException("The document is really a "+fm+" file"); | |||
} | |||
return new POIFSFileSystem(is); | |||
} | |||
/** |
@@ -22,7 +22,6 @@ import static org.apache.poi.POITestCase.assertContains; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.util.Arrays; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
@@ -86,8 +85,9 @@ public class TestOfficeXMLException extends TestCase { | |||
// text file isn't | |||
confirmIsPOIFS("SampleSS.txt", false); | |||
} | |||
private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException { | |||
InputStream in = new PushbackInputStream(openSampleStream(sampleFileName), 10); | |||
InputStream in = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName)); | |||
try { | |||
boolean actualResult; | |||
try { | |||
@@ -108,7 +108,7 @@ public class TestOfficeXMLException extends TestCase { | |||
InputStream testInput = new ByteArrayInputStream(testData); | |||
// detect header | |||
InputStream in = new PushbackInputStream(testInput, 10); | |||
InputStream in = FileMagic.prepareToCheckMagic(testInput); | |||
assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); | |||
// check if InputStream is still intact | |||
@@ -126,7 +126,7 @@ public class TestOfficeXMLException extends TestCase { | |||
InputStream testInput = new ByteArrayInputStream(testData); | |||
// detect header | |||
InputStream in = new PushbackInputStream(testInput, 10); | |||
InputStream in = FileMagic.prepareToCheckMagic(testInput); | |||
assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in)); | |||
// check if InputStream is still intact |