git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1804854 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_17_FINAL
package org.apache.poi.poifs.filesystem; | package org.apache.poi.poifs.filesystem; | ||||
import org.apache.poi.EncryptedDocumentException; | |||||
import org.apache.poi.poifs.common.POIFSConstants; | |||||
import org.apache.poi.poifs.crypt.Decryptor; | |||||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||||
import org.apache.poi.util.IOUtils; | |||||
import java.io.FilterInputStream; | import java.io.FilterInputStream; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.security.GeneralSecurityException; | import java.security.GeneralSecurityException; | ||||
import org.apache.poi.EncryptedDocumentException; | |||||
import org.apache.poi.poifs.crypt.Decryptor; | |||||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.Removal; | |||||
/** | /** | ||||
* A small base class for the various factories, e.g. WorkbookFactory, | * A small base class for the various factories, e.g. WorkbookFactory, | ||||
* SlideShowFactory to combine common code here. | * SlideShowFactory to combine common code here. | ||||
*/ | */ | ||||
@Internal | |||||
public class DocumentFactoryHelper { | public class DocumentFactoryHelper { | ||||
/** | /** | ||||
* Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using | * Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using | ||||
/** | /** | ||||
* Checks that the supplied InputStream (which MUST | * Checks that the supplied InputStream (which MUST | ||||
* support mark and reset, or be a PushbackInputStream) | |||||
* has a OOXML (zip) header at the start of it. | |||||
* If your InputStream does not support mark / reset, | |||||
* then wrap it in a PushBackInputStream, then be | |||||
* support mark and reset) has a OOXML (zip) header at the start of it.<p> | |||||
* | |||||
* If unsure if your InputStream does support mark / reset, | |||||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make | |||||
* sure to always use that, and not the original! | * sure to always use that, and not the original! | ||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||||
* | |||||
* @param inp An InputStream which supports either mark/reset | |||||
* | |||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead | |||||
*/ | */ | ||||
@Deprecated | |||||
@Removal(version="4.0") | |||||
public static boolean hasOOXMLHeader(InputStream inp) throws IOException { | public static boolean hasOOXMLHeader(InputStream inp) throws IOException { | ||||
// We want to peek at the first 4 bytes | |||||
inp.mark(4); | |||||
byte[] header = new byte[4]; | |||||
int bytesRead = IOUtils.readFully(inp, header); | |||||
// Wind back those 4 bytes | |||||
if(inp instanceof PushbackInputStream) { | |||||
PushbackInputStream pin = (PushbackInputStream)inp; | |||||
pin.unread(header, 0, bytesRead); | |||||
} else { | |||||
inp.reset(); | |||||
} | |||||
// Did it match the ooxml zip signature? | |||||
return ( | |||||
bytesRead == 4 && | |||||
header[0] == POIFSConstants.OOXML_FILE_HEADER[0] && | |||||
header[1] == POIFSConstants.OOXML_FILE_HEADER[1] && | |||||
header[2] == POIFSConstants.OOXML_FILE_HEADER[2] && | |||||
header[3] == POIFSConstants.OOXML_FILE_HEADER[3] | |||||
); | |||||
return FileMagic.valueOf(inp) == FileMagic.OOXML; | |||||
} | } | ||||
} | } |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.poifs.filesystem; | |||||
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER; | |||||
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER; | |||||
import java.io.BufferedInputStream; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||||
import org.apache.poi.util.IOUtils; | |||||
import org.apache.poi.util.LittleEndian; | |||||
import org.apache.poi.util.LocaleUtil; | |||||
/** | |||||
* The file magic number, i.e. the file identification based on the first bytes | |||||
* of the file | |||||
*/ | |||||
public enum FileMagic { | |||||
/** OLE2 / BIFF8+ stream used for Office 97 and higher documents */ | |||||
OLE2(HeaderBlockConstants._signature), | |||||
/** OOXML / ZIP stream */ | |||||
OOXML(OOXML_FILE_HEADER), | |||||
/** XML file */ | |||||
XML(RAW_XML_FILE_HEADER), | |||||
/** BIFF2 raw stream - for Excel 2 */ | |||||
BIFF2(new byte[]{ | |||||
0x09, 0x00, // sid=0x0009 | |||||
0x04, 0x00, // size=0x0004 | |||||
0x00, 0x00, // unused | |||||
0x70, 0x00 // 0x70 = multiple values | |||||
}), | |||||
/** BIFF3 raw stream - for Excel 3 */ | |||||
BIFF3(new byte[]{ | |||||
0x09, 0x02, // sid=0x0209 | |||||
0x06, 0x00, // size=0x0006 | |||||
0x00, 0x00, // unused | |||||
0x70, 0x00 // 0x70 = multiple values | |||||
}), | |||||
/** BIFF4 raw stream - for Excel 4 */ | |||||
BIFF4(new byte[]{ | |||||
0x09, 0x04, // sid=0x0409 | |||||
0x06, 0x00, // size=0x0006 | |||||
0x00, 0x00, // unused | |||||
0x70, 0x00 // 0x70 = multiple values | |||||
},new byte[]{ | |||||
0x09, 0x04, // sid=0x0409 | |||||
0x06, 0x00, // size=0x0006 | |||||
0x00, 0x00, // unused | |||||
0x00, 0x01 | |||||
}), | |||||
/** Old MS Write raw stream */ | |||||
MSWRITE( | |||||
new byte[]{0x31, (byte)0xbe, 0x00, 0x00 }, | |||||
new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }), | |||||
/** RTF document */ | |||||
RTF("{\\rtf"), | |||||
/** PDF document */ | |||||
PDF("%PDF"), | |||||
// keep UNKNOWN always as last enum! | |||||
/** UNKNOWN magic */ | |||||
UNKNOWN(new byte[0]); | |||||
final byte[][] magic; | |||||
FileMagic(long magic) { | |||||
this.magic = new byte[1][8]; | |||||
LittleEndian.putLong(this.magic[0], 0, magic); | |||||
} | |||||
FileMagic(byte[]... magic) { | |||||
this.magic = magic; | |||||
} | |||||
FileMagic(String magic) { | |||||
this(magic.getBytes(LocaleUtil.CHARSET_1252)); | |||||
} | |||||
public static FileMagic valueOf(byte[] magic) { | |||||
for (FileMagic fm : values()) { | |||||
int i=0; | |||||
boolean found = true; | |||||
for (byte[] ma : fm.magic) { | |||||
for (byte m : ma) { | |||||
byte d = magic[i++]; | |||||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { | |||||
found = false; | |||||
break; | |||||
} | |||||
} | |||||
if (found) { | |||||
return fm; | |||||
} | |||||
} | |||||
} | |||||
return UNKNOWN; | |||||
} | |||||
/** | |||||
* Get the file magic of the supplied InputStream (which MUST | |||||
* support mark and reset).<p> | |||||
* | |||||
* If unsure if your InputStream does support mark / reset, | |||||
* use {@link #prepareToCheckMagic(InputStream)} to wrap it and make | |||||
* sure to always use that, and not the original!<p> | |||||
* | |||||
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean, | |||||
* that the ZIP stream has leading junk bytes | |||||
* | |||||
* @param inp An InputStream which supports either mark/reset | |||||
*/ | |||||
public static FileMagic valueOf(InputStream inp) throws IOException { | |||||
if (!inp.markSupported()) { | |||||
throw new IOException("getFileMagic() only operates on streams which support mark(int)"); | |||||
} | |||||
// Grab the first 8 bytes | |||||
byte[] data = IOUtils.peekFirst8Bytes(inp); | |||||
return FileMagic.valueOf(data); | |||||
} | |||||
/** | |||||
* Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not | |||||
* | |||||
* @param stream stream to be checked for wrapping | |||||
* @return a mark enabled stream | |||||
*/ | |||||
public static InputStream prepareToCheckMagic(InputStream stream) { | |||||
if (stream.markSupported()) { | |||||
return stream; | |||||
} | |||||
// we used to process the data via a PushbackInputStream, but user code could provide a too small one | |||||
// so we use a BufferedInputStream instead now | |||||
return new BufferedInputStream(stream); | |||||
} | |||||
} |
package org.apache.poi.poifs.filesystem; | package org.apache.poi.poifs.filesystem; | ||||
import java.io.ByteArrayInputStream; | |||||
import java.io.Closeable; | import java.io.Closeable; | ||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileInputStream; | import java.io.FileInputStream; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.OutputStream; | import java.io.OutputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.nio.ByteBuffer; | import java.nio.ByteBuffer; | ||||
import java.nio.channels.Channels; | import java.nio.channels.Channels; | ||||
import java.nio.channels.FileChannel; | import java.nio.channels.FileChannel; | ||||
import org.apache.poi.poifs.storage.BlockAllocationTableReader; | import org.apache.poi.poifs.storage.BlockAllocationTableReader; | ||||
import org.apache.poi.poifs.storage.BlockAllocationTableWriter; | import org.apache.poi.poifs.storage.BlockAllocationTableWriter; | ||||
import org.apache.poi.poifs.storage.HeaderBlock; | import org.apache.poi.poifs.storage.HeaderBlock; | ||||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||||
import org.apache.poi.poifs.storage.HeaderBlockWriter; | import org.apache.poi.poifs.storage.HeaderBlockWriter; | ||||
import org.apache.poi.util.CloseIgnoringInputStream; | import org.apache.poi.util.CloseIgnoringInputStream; | ||||
import org.apache.poi.util.IOUtils; | import org.apache.poi.util.IOUtils; | ||||
import org.apache.poi.util.Internal; | import org.apache.poi.util.Internal; | ||||
import org.apache.poi.util.LongField; | |||||
import org.apache.poi.util.POILogFactory; | import org.apache.poi.util.POILogFactory; | ||||
import org.apache.poi.util.POILogger; | import org.apache.poi.util.POILogger; | ||||
import org.apache.poi.util.Removal; | |||||
/** | /** | ||||
* <p>This is the main class of the POIFS system; it manages the entire | * <p>This is the main class of the POIFS system; it manages the entire | ||||
/** | /** | ||||
* Checks that the supplied InputStream (which MUST | * Checks that the supplied InputStream (which MUST | ||||
* support mark and reset, or be a PushbackInputStream) | |||||
* has a POIFS (OLE2) header at the start of it. | |||||
* If your InputStream does not support mark / reset, | |||||
* then wrap it in a PushBackInputStream, then be | |||||
* sure to always use that and not the original! | |||||
* support mark and reset) has a POIFS (OLE2) header at the start of it. | |||||
* If unsure if your InputStream does support mark / reset, | |||||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make | |||||
* sure to always use that, and not the original! | |||||
* | * | ||||
* After the method call, the InputStream is at the | * After the method call, the InputStream is at the | ||||
* same position as of the time of entering the method. | * same position as of the time of entering the method. | ||||
* | * | ||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||||
* @param inp An InputStream which supports mark/reset | |||||
* | |||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||||
*/ | */ | ||||
@Deprecated | |||||
@Removal(version="4.0") | |||||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | ||||
// We want to peek at the first 8 bytes | |||||
inp.mark(8); | |||||
byte[] header = new byte[8]; | |||||
int bytesRead = IOUtils.readFully(inp, header); | |||||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header); | |||||
// Wind back those 8 bytes | |||||
if(inp instanceof PushbackInputStream) { | |||||
PushbackInputStream pin = (PushbackInputStream)inp; | |||||
pin.unread(header, 0, bytesRead); | |||||
} else { | |||||
inp.reset(); | |||||
} | |||||
// Did it match the signature? | |||||
return (signature.get() == HeaderBlockConstants._signature); | |||||
return FileMagic.valueOf(inp) == FileMagic.OLE2; | |||||
} | } | ||||
/** | /** | ||||
* Checks if the supplied first 8 bytes of a stream / file | * Checks if the supplied first 8 bytes of a stream / file | ||||
* has a POIFS (OLE2) header. | * has a POIFS (OLE2) header. | ||||
* | |||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||||
*/ | */ | ||||
@Deprecated | |||||
@Removal(version="4.0") | |||||
public static boolean hasPOIFSHeader(byte[] header8Bytes) { | public static boolean hasPOIFSHeader(byte[] header8Bytes) { | ||||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); | |||||
return (signature.get() == HeaderBlockConstants._signature); | |||||
try { | |||||
return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes)); | |||||
} catch (IOException e) { | |||||
throw new RuntimeException("invalid header check", e); | |||||
} | |||||
} | } | ||||
/** | /** |
import org.apache.poi.poifs.storage.BlockList; | import org.apache.poi.poifs.storage.BlockList; | ||||
import org.apache.poi.poifs.storage.BlockWritable; | import org.apache.poi.poifs.storage.BlockWritable; | ||||
import org.apache.poi.poifs.storage.HeaderBlock; | import org.apache.poi.poifs.storage.HeaderBlock; | ||||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||||
import org.apache.poi.poifs.storage.HeaderBlockWriter; | import org.apache.poi.poifs.storage.HeaderBlockWriter; | ||||
import org.apache.poi.poifs.storage.RawDataBlockList; | import org.apache.poi.poifs.storage.RawDataBlockList; | ||||
import org.apache.poi.poifs.storage.SmallBlockTableReader; | import org.apache.poi.poifs.storage.SmallBlockTableReader; | ||||
import org.apache.poi.poifs.storage.SmallBlockTableWriter; | import org.apache.poi.poifs.storage.SmallBlockTableWriter; | ||||
import org.apache.poi.util.CloseIgnoringInputStream; | import org.apache.poi.util.CloseIgnoringInputStream; | ||||
import org.apache.poi.util.IOUtils; | |||||
import org.apache.poi.util.LongField; | |||||
import org.apache.poi.util.POILogFactory; | import org.apache.poi.util.POILogFactory; | ||||
import org.apache.poi.util.POILogger; | import org.apache.poi.util.POILogger; | ||||
import org.apache.poi.util.Removal; | |||||
/** | /** | ||||
* <p>This is the main class of the POIFS system; it manages the entire | * <p>This is the main class of the POIFS system; it manages the entire | ||||
/** | /** | ||||
* Checks that the supplied InputStream (which MUST | * Checks that the supplied InputStream (which MUST | ||||
* support mark and reset, or be a PushbackInputStream) | |||||
* has a POIFS (OLE2) header at the start of it. | |||||
* If your InputStream does not support mark / reset, | |||||
* then wrap it in a PushBackInputStream, then be | |||||
* support mark and reset) has a POIFS (OLE2) header at the start of it. | |||||
* If unsure if your InputStream does support mark / reset, | |||||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make | |||||
* sure to always use that, and not the original! | * sure to always use that, and not the original! | ||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||||
* | |||||
* After the method call, the InputStream is at the | |||||
* same position as of the time of entering the method. | |||||
* | |||||
* @param inp An InputStream which supports either mark/reset | |||||
* | |||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||||
*/ | */ | ||||
@Deprecated | |||||
@Removal(version="4.0") | |||||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | ||||
// We want to peek at the first 8 bytes | |||||
byte[] header = IOUtils.peekFirst8Bytes(inp); | |||||
return hasPOIFSHeader(header); | |||||
return NPOIFSFileSystem.hasPOIFSHeader(inp); | |||||
} | } | ||||
/** | /** | ||||
* Checks if the supplied first 8 bytes of a stream / file | * Checks if the supplied first 8 bytes of a stream / file | ||||
* has a POIFS (OLE2) header. | * has a POIFS (OLE2) header. | ||||
* | |||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead | |||||
*/ | */ | ||||
@Deprecated | |||||
@Removal(version="4.0") | |||||
public static boolean hasPOIFSHeader(byte[] header8Bytes) { | public static boolean hasPOIFSHeader(byte[] header8Bytes) { | ||||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); | |||||
// Did it match the signature? | |||||
return (signature.get() == HeaderBlockConstants._signature); | |||||
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes); | |||||
} | } | ||||
/** | /** |
super(file); | super(file); | ||||
} | } | ||||
/** | |||||
* Checks that the supplied InputStream (which MUST | |||||
* support mark and reset, or be a PushbackInputStream) | |||||
* has a POIFS (OLE2) header at the start of it. | |||||
* If your InputStream does not support mark / reset, | |||||
* then wrap it in a PushBackInputStream, then be | |||||
* sure to always use that, and not the original! | |||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream | |||||
*/ | |||||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { | |||||
return NPOIFSFileSystem.hasPOIFSHeader(inp); | |||||
} | |||||
/** | |||||
* Checks if the supplied first 8 bytes of a stream / file | |||||
* has a POIFS (OLE2) header. | |||||
*/ | |||||
public static boolean hasPOIFSHeader(byte[] header8Bytes) { | |||||
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes); | |||||
} | |||||
/** | /** | ||||
* Creates a new {@link POIFSFileSystem} in a new {@link File}. | * Creates a new {@link POIFSFileSystem} in a new {@link File}. | ||||
* Use {@link #POIFSFileSystem(File)} to open an existing File, | * Use {@link #POIFSFileSystem(File)} to open an existing File, |
package org.apache.poi.poifs.macros; | package org.apache.poi.poifs.macros; | ||||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; | |||||
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; | import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; | ||||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; | |||||
import java.io.ByteArrayInputStream; | import java.io.ByteArrayInputStream; | ||||
import java.io.ByteArrayOutputStream; | import java.io.ByteArrayOutputStream; | ||||
import java.io.FileInputStream; | import java.io.FileInputStream; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.nio.charset.Charset; | import java.nio.charset.Charset; | ||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.Map; | import java.util.Map; | ||||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | import org.apache.poi.poifs.filesystem.DocumentInputStream; | ||||
import org.apache.poi.poifs.filesystem.DocumentNode; | import org.apache.poi.poifs.filesystem.DocumentNode; | ||||
import org.apache.poi.poifs.filesystem.Entry; | import org.apache.poi.poifs.filesystem.Entry; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | ||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | ||||
import org.apache.poi.util.CodePageUtil; | import org.apache.poi.util.CodePageUtil; | ||||
private NPOIFSFileSystem fs; | private NPOIFSFileSystem fs; | ||||
public VBAMacroReader(InputStream rstream) throws IOException { | public VBAMacroReader(InputStream rstream) throws IOException { | ||||
PushbackInputStream stream = new PushbackInputStream(rstream, 8); | |||||
byte[] header8 = IOUtils.peekFirst8Bytes(stream); | |||||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { | |||||
fs = new NPOIFSFileSystem(stream); | |||||
InputStream is = FileMagic.prepareToCheckMagic(rstream); | |||||
FileMagic fm = FileMagic.valueOf(is); | |||||
if (fm == FileMagic.OLE2) { | |||||
fs = new NPOIFSFileSystem(is); | |||||
} else { | } else { | ||||
openOOXML(stream); | |||||
openOOXML(is); | |||||
} | } | ||||
} | } | ||||
import org.apache.poi.hssf.OldExcelFormatException; | import org.apache.poi.hssf.OldExcelFormatException; | ||||
import org.apache.poi.poifs.common.POIFSBigBlockSize; | import org.apache.poi.poifs.common.POIFSBigBlockSize; | ||||
import org.apache.poi.poifs.common.POIFSConstants; | import org.apache.poi.poifs.common.POIFSConstants; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.NotOLE2FileException; | import org.apache.poi.poifs.filesystem.NotOLE2FileException; | ||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | ||||
import org.apache.poi.util.HexDump; | import org.apache.poi.util.HexDump; | ||||
* The block containing the archive header | * The block containing the archive header | ||||
*/ | */ | ||||
public final class HeaderBlock implements HeaderBlockConstants { | public final class HeaderBlock implements HeaderBlockConstants { | ||||
private static final byte[] MAGIC_BIFF2 = { | |||||
0x09, 0x00, // sid=0x0009 | |||||
0x04, 0x00, // size=0x0004 | |||||
0x00, 0x00, // unused | |||||
0x70, 0x00 // 0x70 = multiple values | |||||
}; | |||||
private static final byte[] MAGIC_BIFF3 = { | |||||
0x09, 0x02, // sid=0x0209 | |||||
0x06, 0x00, // size=0x0006 | |||||
0x00, 0x00, // unused | |||||
0x70, 0x00 // 0x70 = multiple values | |||||
}; | |||||
private static final byte[] MAGIC_BIFF4a = { | |||||
0x09, 0x04, // sid=0x0409 | |||||
0x06, 0x00, // size=0x0006 | |||||
0x00, 0x00, // unused | |||||
0x70, 0x00 // 0x70 = multiple values | |||||
}; | |||||
private static final byte[] MAGIC_BIFF4b = { | |||||
0x09, 0x04, // sid=0x0409 | |||||
0x06, 0x00, // size=0x0006 | |||||
0x00, 0x00, // unused | |||||
0x00, 0x01 | |||||
}; | |||||
private static final byte[] MAGIC_MSWRITEa = { | |||||
0x31, (byte)0xbe, 0x00, 0x00 | |||||
}; | |||||
private static final byte[] MAGIC_MSWRITEb = { | |||||
0x32, (byte)0xbe, 0x00, 0x00 | |||||
}; | |||||
private static final byte _default_value = ( byte ) 0xFF; | private static final byte _default_value = ( byte ) 0xFF; | ||||
/** | /** | ||||
this._data = data.clone(); | this._data = data.clone(); | ||||
// verify signature | // verify signature | ||||
long signature = LittleEndian.getLong(_data, _signature_offset); | |||||
if (signature != _signature) { | |||||
// Is it one of the usual suspects? | |||||
if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) { | |||||
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " | |||||
+ "You are calling the part of POI that deals with OLE2 Office Documents. " | |||||
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); | |||||
} | |||||
if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) { | |||||
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " | |||||
+ "Formats such as Office 2003 XML are not supported"); | |||||
} | |||||
// Old MS Write raw stream | |||||
if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) { | |||||
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " | |||||
+ "Apache POI doesn't currently support this format"); | |||||
} | |||||
// BIFF2 raw stream | |||||
if (cmp(MAGIC_BIFF2, data)) { | |||||
throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. " | |||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||||
} | |||||
// BIFF3 raw stream | |||||
if (cmp(MAGIC_BIFF3, data)) { | |||||
throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. " | |||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||||
} | |||||
// BIFF4 raw stream | |||||
if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) { | |||||
throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. " | |||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||||
} | |||||
// Give a generic error if the OLE2 signature isn't found | |||||
throw new NotOLE2FileException("Invalid header signature; read " | |||||
+ HexDump.longToHex(signature) + ", expected " | |||||
+ HexDump.longToHex(_signature) + " - Your file appears " | |||||
+ "not to be a valid OLE2 document"); | |||||
} | |||||
FileMagic fm = FileMagic.valueOf(data); | |||||
switch (fm) { | |||||
case OLE2: | |||||
break; | |||||
case OOXML: | |||||
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " | |||||
+ "You are calling the part of POI that deals with OLE2 Office Documents. " | |||||
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); | |||||
case XML: | |||||
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " | |||||
+ "Formats such as Office 2003 XML are not supported"); | |||||
case MSWRITE: | |||||
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " | |||||
+ "Apache POI doesn't currently support this format"); | |||||
case BIFF2: | |||||
case BIFF3: | |||||
case BIFF4: | |||||
throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. " | |||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); | |||||
default: | |||||
// Give a generic error if the OLE2 signature isn't found | |||||
String exp = HexDump.longToHex(_signature); | |||||
String act = HexDump.longToHex(LittleEndian.getLong(data, 0)); | |||||
throw new NotOLE2FileException( | |||||
"Invalid header signature; read " + act + ", expected " + exp + | |||||
" - Your file appears not to be a valid OLE2 document"); | |||||
} | |||||
// Figure out our block size | // Figure out our block size | ||||
if (_data[30] == 12) { | if (_data[30] == 12) { | ||||
this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; | this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; | ||||
stream.write(0); | stream.write(0); | ||||
} | } | ||||
} | } | ||||
private static boolean cmp(byte[] magic, byte[] data) { | |||||
int i=0; | |||||
for (byte m : magic) { | |||||
byte d = data[i++]; | |||||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
} | } |
import java.io.FileNotFoundException; | import java.io.FileNotFoundException; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.lang.reflect.InvocationTargetException; | import java.lang.reflect.InvocationTargetException; | ||||
import java.lang.reflect.Method; | import java.lang.reflect.Method; | ||||
import org.apache.poi.poifs.crypt.Decryptor; | import org.apache.poi.poifs.crypt.Decryptor; | ||||
import org.apache.poi.poifs.filesystem.DirectoryNode; | import org.apache.poi.poifs.filesystem.DirectoryNode; | ||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | ||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | ||||
import org.apache.poi.util.IOUtils; | import org.apache.poi.util.IOUtils; | ||||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from | * Creates the appropriate HSLFSlideShow / XMLSlideShow from | ||||
* the given InputStream. | * the given InputStream. | ||||
* | * | ||||
* <p>Your input stream MUST either support mark/reset, or | |||||
* be wrapped as a {@link PushbackInputStream}! Note that | |||||
* using an {@link InputStream} has a higher memory footprint | |||||
* <p>Note that using an {@link InputStream} has a higher memory footprint | |||||
* than using a {@link File}.</p> | * than using a {@link File}.</p> | ||||
* | * | ||||
* <p>Note that in order to properly release resources the | * <p>Note that in order to properly release resources the | ||||
/** | /** | ||||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from | * Creates the appropriate HSLFSlideShow / XMLSlideShow from | ||||
* the given InputStream, which may be password protected. | * the given InputStream, which may be password protected. | ||||
* <p>Your input stream MUST either support mark/reset, or | |||||
* be wrapped as a {@link PushbackInputStream}! Note that | |||||
* using an {@link InputStream} has a higher memory footprint | |||||
* | |||||
* <p>Note that using an {@link InputStream} has a higher memory footprint | |||||
* than using a {@link File}.</p> | * than using a {@link File}.</p> | ||||
* | * | ||||
* <p>Note that in order to properly release resources the | * <p>Note that in order to properly release resources the | ||||
* @throws EncryptedDocumentException If the wrong password is given for a protected file | * @throws EncryptedDocumentException If the wrong password is given for a protected file | ||||
*/ | */ | ||||
public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException { | public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException { | ||||
// If clearly doesn't do mark/reset, wrap up | |||||
if (! inp.markSupported()) { | |||||
inp = new PushbackInputStream(inp, 8); | |||||
} | |||||
// Ensure that there is at least some data there | |||||
byte[] header8 = IOUtils.peekFirst8Bytes(inp); | |||||
// Try to create | |||||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { | |||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); | |||||
InputStream is = FileMagic.prepareToCheckMagic(inp); | |||||
FileMagic fm = FileMagic.valueOf(is); | |||||
switch (fm) { | |||||
case OLE2: | |||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is); | |||||
return create(fs, password); | return create(fs, password); | ||||
case OOXML: | |||||
return createXSLFSlideShow(is); | |||||
default: | |||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||||
} | } | ||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { | |||||
return createXSLFSlideShow(inp); | |||||
} | |||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||||
} | } | ||||
/** | /** |
import java.io.FileNotFoundException; | import java.io.FileNotFoundException; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Iterator; | import java.util.Iterator; | ||||
import org.apache.poi.poifs.crypt.EncryptionInfo; | import org.apache.poi.poifs.crypt.EncryptionInfo; | ||||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | import org.apache.poi.poifs.filesystem.DirectoryEntry; | ||||
import org.apache.poi.poifs.filesystem.DirectoryNode; | import org.apache.poi.poifs.filesystem.DirectoryNode; | ||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | |||||
import org.apache.poi.poifs.filesystem.Entry; | import org.apache.poi.poifs.filesystem.Entry; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | ||||
import org.apache.poi.poifs.filesystem.NotOLE2FileException; | import org.apache.poi.poifs.filesystem.NotOLE2FileException; | ||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; | import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; | ||||
} | } | ||||
public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { | public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { | ||||
// Figure out the kind of stream | |||||
// If clearly doesn't do mark/reset, wrap up | |||||
if (! inp.markSupported()) { | |||||
inp = new PushbackInputStream(inp, 8); | |||||
} | |||||
InputStream is = FileMagic.prepareToCheckMagic(inp); | |||||
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) { | |||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); | |||||
FileMagic fm = FileMagic.valueOf(is); | |||||
switch (fm) { | |||||
case OLE2: | |||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is); | |||||
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); | boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); | ||||
return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs); | return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs); | ||||
case OOXML: | |||||
return createExtractor(OPCPackage.open(is)); | |||||
default: | |||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||||
} | } | ||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { | |||||
return createExtractor(OPCPackage.open(inp)); | |||||
} | |||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||||
} | } | ||||
/** | /** |
import java.io.FileNotFoundException; | import java.io.FileNotFoundException; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.net.URI; | import java.net.URI; | ||||
import java.net.URISyntaxException; | import java.net.URISyntaxException; | ||||
import java.util.Enumeration; | import java.util.Enumeration; | ||||
import org.apache.poi.openxml4j.opc.ZipPackage; | import org.apache.poi.openxml4j.opc.ZipPackage; | ||||
import org.apache.poi.openxml4j.util.ZipSecureFile; | import org.apache.poi.openxml4j.util.ZipSecureFile; | ||||
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream; | import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream; | ||||
import org.apache.poi.poifs.common.POIFSConstants; | |||||
import org.apache.poi.poifs.storage.HeaderBlockConstants; | |||||
import org.apache.poi.util.IOUtils; | |||||
import org.apache.poi.util.LittleEndian; | |||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.Removal; | import org.apache.poi.util.Removal; | ||||
@Internal | |||||
public final class ZipHelper { | public final class ZipHelper { | ||||
/** | /** | ||||
* Forward slash use to convert part name between OPC and zip item naming | * Forward slash use to convert part name between OPC and zip item naming | ||||
* Warning - this will consume the first few bytes of the stream, | * Warning - this will consume the first few bytes of the stream, | ||||
* you should push-back or reset the stream after use! | * you should push-back or reset the stream after use! | ||||
*/ | */ | ||||
public static void verifyZipHeader(InputStream stream) | |||||
throws NotOfficeXmlFileException, IOException { | |||||
// Grab the first 8 bytes | |||||
byte[] data = new byte[8]; | |||||
IOUtils.readFully(stream, data); | |||||
// OLE2? | |||||
long signature = LittleEndian.getLong(data); | |||||
if (signature == HeaderBlockConstants._signature) { | |||||
public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException { | |||||
InputStream is = FileMagic.prepareToCheckMagic(stream); | |||||
FileMagic fm = FileMagic.valueOf(is); | |||||
switch (fm) { | |||||
case OLE2: | |||||
throw new OLE2NotOfficeXmlFileException( | throw new OLE2NotOfficeXmlFileException( | ||||
"The supplied data appears to be in the OLE2 Format. " + | "The supplied data appears to be in the OLE2 Format. " + | ||||
"You are calling the part of POI that deals with OOXML "+ | "You are calling the part of POI that deals with OOXML "+ | ||||
"(Office Open XML) Documents. You need to call a different " + | "(Office Open XML) Documents. You need to call a different " + | ||||
"part of POI to process this data (eg HSSF instead of XSSF)"); | "part of POI to process this data (eg HSSF instead of XSSF)"); | ||||
} | |||||
// Raw XML? | |||||
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER; | |||||
if (data[0] == RAW_XML_FILE_HEADER[0] && | |||||
data[1] == RAW_XML_FILE_HEADER[1] && | |||||
data[2] == RAW_XML_FILE_HEADER[2] && | |||||
data[3] == RAW_XML_FILE_HEADER[3] && | |||||
data[4] == RAW_XML_FILE_HEADER[4]) { | |||||
case XML: | |||||
throw new NotOfficeXmlFileException( | throw new NotOfficeXmlFileException( | ||||
"The supplied data appears to be a raw XML file. " + | "The supplied data appears to be a raw XML file. " + | ||||
"Formats such as Office 2003 XML are not supported"); | "Formats such as Office 2003 XML are not supported"); | ||||
default: | |||||
case OOXML: | |||||
case UNKNOWN: | |||||
// Don't check for a Zip header, as to maintain backwards | |||||
// compatibility we need to let them seek over junk at the | |||||
// start before beginning processing. | |||||
break; | |||||
} | } | ||||
// Don't check for a Zip header, as to maintain backwards | |||||
// compatibility we need to let them seek over junk at the | |||||
// start before beginning processing. | |||||
// Put things back | |||||
if (stream instanceof PushbackInputStream) { | |||||
((PushbackInputStream)stream).unread(data); | |||||
} else if (stream.markSupported()) { | |||||
stream.reset(); | |||||
} else if (stream instanceof FileInputStream) { | |||||
// File open check, about to be closed, nothing to do | |||||
} else { | |||||
// Oh dear... I hope you know what you're doing! | |||||
} | |||||
} | |||||
private static InputStream prepareToCheckHeader(InputStream stream) { | |||||
if (stream instanceof PushbackInputStream) { | |||||
return stream; | |||||
} | |||||
if (stream.markSupported()) { | |||||
stream.mark(8); | |||||
return stream; | |||||
} | |||||
return new PushbackInputStream(stream, 8); | |||||
} | } | ||||
/** | /** | ||||
@SuppressWarnings("resource") | @SuppressWarnings("resource") | ||||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { | public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { | ||||
// Peek at the first few bytes to sanity check | // Peek at the first few bytes to sanity check | ||||
InputStream checkedStream = prepareToCheckHeader(stream); | |||||
InputStream checkedStream = FileMagic.prepareToCheckMagic(stream); | |||||
verifyZipHeader(checkedStream); | verifyZipHeader(checkedStream); | ||||
// Open as a proper zip stream | // Open as a proper zip stream |
public static class ThresholdInputStream extends PushbackInputStream { | public static class ThresholdInputStream extends PushbackInputStream { | ||||
long counter = 0; | long counter = 0; | ||||
long markPos = 0; | |||||
ThresholdInputStream cis; | ThresholdInputStream cis; | ||||
public ThresholdInputStream(InputStream is, ThresholdInputStream cis) { | public ThresholdInputStream(InputStream is, ThresholdInputStream cis) { | ||||
super(is,1); | |||||
super(is); | |||||
this.cis = cis; | this.cis = cis; | ||||
} | } | ||||
@Override | @Override | ||||
public long skip(long n) throws IOException { | public long skip(long n) throws IOException { | ||||
counter = 0; | |||||
return in.skip(n); | |||||
long s = in.skip(n); | |||||
counter += s; | |||||
return s; | |||||
} | } | ||||
@Override | @Override | ||||
public synchronized void reset() throws IOException { | public synchronized void reset() throws IOException { | ||||
counter = 0; | |||||
in.reset(); | |||||
counter = markPos; | |||||
super.reset(); | |||||
} | } | ||||
public void advance(int advance) throws IOException { | public void advance(int advance) throws IOException { | ||||
} | } | ||||
// one of the limits was reached, report it | // one of the limits was reached, report it | ||||
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. " | |||||
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. " | |||||
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. " | |||||
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter) | |||||
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n" | |||||
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n" | |||||
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n" | |||||
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n" | |||||
+ "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO); | + "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO); | ||||
} | } | ||||
@Override | @Override | ||||
public synchronized void mark(int readlimit) { | public synchronized void mark(int readlimit) { | ||||
markPos = counter; | |||||
in.mark(readlimit); | in.mark(readlimit); | ||||
} | } | ||||
} | } |
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.ss.usermodel; | package org.apache.poi.ss.usermodel; | ||||
import java.io.BufferedInputStream; | |||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileNotFoundException; | import java.io.FileNotFoundException; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import org.apache.poi.EmptyFileException; | import org.apache.poi.EmptyFileException; | ||||
import org.apache.poi.EncryptedDocumentException; | import org.apache.poi.EncryptedDocumentException; | ||||
import org.apache.poi.poifs.crypt.Decryptor; | import org.apache.poi.poifs.crypt.Decryptor; | ||||
import org.apache.poi.poifs.filesystem.DirectoryNode; | import org.apache.poi.poifs.filesystem.DirectoryNode; | ||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | ||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | ||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | import org.apache.poi.poifs.filesystem.POIFSFileSystem; | ||||
* the given InputStream. | * the given InputStream. | ||||
* | * | ||||
* <p>Your input stream MUST either support mark/reset, or | * <p>Your input stream MUST either support mark/reset, or | ||||
* be wrapped as a {@link PushbackInputStream}! Note that | |||||
* be wrapped as a {@link BufferedInputStream}! Note that | |||||
* using an {@link InputStream} has a higher memory footprint | * using an {@link InputStream} has a higher memory footprint | ||||
* than using a {@link File}.</p> | * than using a {@link File}.</p> | ||||
* | * | ||||
/** | /** | ||||
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from | * Creates the appropriate HSSFWorkbook / XSSFWorkbook from | ||||
* the given InputStream, which may be password protected. | |||||
* <p>Your input stream MUST either support mark/reset, or | |||||
* be wrapped as a {@link PushbackInputStream}! Note that | |||||
* using an {@link InputStream} has a higher memory footprint | |||||
* than using a {@link File}.</p> | |||||
* the given InputStream, which may be password protected.<p> | |||||
* | |||||
* Note that using an {@link InputStream} has a higher memory footprint | |||||
* than using a {@link File}.<p> | |||||
* | * | ||||
* <p>Note that in order to properly release resources the | |||||
* Note that in order to properly release resources the | |||||
* Workbook should be closed after use. Note also that loading | * Workbook should be closed after use. Note also that loading | ||||
* from an InputStream requires more memory than loading | * from an InputStream requires more memory than loading | ||||
* from a File, so prefer {@link #create(File)} where possible.</p> | |||||
* from a File, so prefer {@link #create(File)} where possible. | |||||
* | * | ||||
* @param inp The {@link InputStream} to read data from. | * @param inp The {@link InputStream} to read data from. | ||||
* @param password The password that should be used or null if no password is necessary. | * @param password The password that should be used or null if no password is necessary. | ||||
* @throws EmptyFileException If an empty stream is given | * @throws EmptyFileException If an empty stream is given | ||||
*/ | */ | ||||
public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { | public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { | ||||
// If clearly doesn't do mark/reset, wrap up | |||||
if (! inp.markSupported()) { | |||||
inp = new PushbackInputStream(inp, 8); | |||||
} | |||||
// Ensure that there is at least some data there | |||||
byte[] header8 = IOUtils.peekFirst8Bytes(inp); | |||||
// Try to create | |||||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { | |||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); | |||||
InputStream is = FileMagic.prepareToCheckMagic(inp); | |||||
FileMagic fm = FileMagic.valueOf(is); | |||||
switch (fm) { | |||||
case OLE2: | |||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is); | |||||
return create(fs, password); | return create(fs, password); | ||||
case OOXML: | |||||
return new XSSFWorkbook(OPCPackage.open(is)); | |||||
default: | |||||
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||||
} | } | ||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { | |||||
return new XSSFWorkbook(OPCPackage.open(inp)); | |||||
} | |||||
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); | |||||
} | } | ||||
/** | /** |
import java.io.ByteArrayOutputStream; | import java.io.ByteArrayOutputStream; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import javax.xml.namespace.QName; | import javax.xml.namespace.QName; | ||||
import org.apache.poi.openxml4j.opc.PackagePart; | import org.apache.poi.openxml4j.opc.PackagePart; | ||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | ||||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | import org.apache.poi.poifs.filesystem.DirectoryEntry; | ||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | import org.apache.poi.poifs.filesystem.POIFSFileSystem; | ||||
import org.apache.poi.ss.usermodel.ObjectData; | import org.apache.poi.ss.usermodel.ObjectData; | ||||
import org.apache.poi.util.IOUtils; | import org.apache.poi.util.IOUtils; | ||||
InputStream is = null; | InputStream is = null; | ||||
try { | try { | ||||
is = getObjectPart().getInputStream(); | is = getObjectPart().getInputStream(); | ||||
// If clearly doesn't do mark/reset, wrap up | |||||
if (! is.markSupported()) { | |||||
is = new PushbackInputStream(is, 8); | |||||
} | |||||
// Ensure that there is at least some data there | |||||
byte[] header8 = IOUtils.peekFirst8Bytes(is); | |||||
// Try to create | |||||
return NPOIFSFileSystem.hasPOIFSHeader(header8); | |||||
is = FileMagic.prepareToCheckMagic(is); | |||||
return FileMagic.valueOf(is) == FileMagic.OLE2; | |||||
} catch (IOException e) { | } catch (IOException e) { | ||||
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); | LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); | ||||
return false; | return false; |
package org.apache.poi; | package org.apache.poi; | ||||
import static org.junit.Assert.assertArrayEquals; | |||||
import static org.junit.Assert.assertEquals; | |||||
import static org.junit.Assert.assertFalse; | |||||
import static org.junit.Assert.assertTrue; | |||||
import java.io.ByteArrayInputStream; | import java.io.ByteArrayInputStream; | ||||
import java.io.IOException; | |||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.util.Arrays; | |||||
import junit.framework.TestCase; | |||||
import org.apache.poi.hssf.HSSFTestDataSamples; | import org.apache.poi.hssf.HSSFTestDataSamples; | ||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | import org.apache.poi.openxml4j.opc.OPCPackage; | ||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.util.IOUtils; | |||||
import org.junit.Test; | |||||
/** | /** | ||||
* Class to test that HXF correctly detects OOXML | * Class to test that HXF correctly detects OOXML | ||||
* documents | * documents | ||||
*/ | */ | ||||
public class TestDetectAsOOXML extends TestCase | |||||
{ | |||||
public void testOpensProperly() throws Exception | |||||
{ | |||||
public class TestDetectAsOOXML { | |||||
@Test | |||||
public void testOpensProperly() throws IOException, InvalidFormatException { | |||||
OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx")); | OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx")); | ||||
} | } | ||||
public void testDetectAsPOIFS() throws Exception { | |||||
InputStream in; | |||||
// ooxml file is | |||||
in = new PushbackInputStream( | |||||
HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10 | |||||
); | |||||
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||||
in.close(); | |||||
// xls file isn't | |||||
in = new PushbackInputStream( | |||||
HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10 | |||||
); | |||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||||
in.close(); | |||||
// text file isn't | |||||
in = new PushbackInputStream( | |||||
HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10 | |||||
); | |||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||||
in.close(); | |||||
@Test | |||||
public void testDetectAsPOIFS() throws IOException { | |||||
Object fileAndMagic[][] = { | |||||
{ "SampleSS.xlsx", FileMagic.OOXML }, | |||||
{ "SampleSS.xls", FileMagic.OLE2 }, | |||||
{ "SampleSS.txt", FileMagic.UNKNOWN } | |||||
}; | |||||
for (Object fm[] : fileAndMagic) { | |||||
InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]); | |||||
is = FileMagic.prepareToCheckMagic(is); | |||||
FileMagic act = FileMagic.valueOf(is); | |||||
if (act == FileMagic.OOXML) { | |||||
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is)); | |||||
} | |||||
assertEquals("file magic failed for "+fm[0], fm[1], act); | |||||
is.close(); | |||||
} | |||||
} | } | ||||
@Test | |||||
public void testFileCorruption() throws Exception { | public void testFileCorruption() throws Exception { | ||||
// create test InputStream | // create test InputStream | ||||
byte[] testData = { (byte)1, (byte)2, (byte)3 }; | |||||
byte[] testData = { 1, 2, 3 }; | |||||
ByteArrayInputStream testInput = new ByteArrayInputStream(testData); | ByteArrayInputStream testInput = new ByteArrayInputStream(testData); | ||||
InputStream is = FileMagic.prepareToCheckMagic(testInput); | |||||
// detect header | // detect header | ||||
InputStream in = new PushbackInputStream(testInput, 10); | |||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); | |||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is)); | |||||
// check if InputStream is still intact | // check if InputStream is still intact | ||||
byte[] test = new byte[3]; | |||||
assertEquals(3, in.read(test)); | |||||
assertTrue(Arrays.equals(testData, test)); | |||||
assertEquals(-1, in.read()); | |||||
in.close(); | |||||
byte[] act = IOUtils.toByteArray(is); | |||||
assertArrayEquals(testData, act); | |||||
assertEquals(-1, is.read()); | |||||
is.close(); | |||||
} | } | ||||
} | } |
package org.apache.poi.openxml4j.opc; | package org.apache.poi.openxml4j.opc; | ||||
import org.apache.poi.*; | |||||
import static org.junit.Assert.assertEquals; | |||||
import static org.junit.Assert.assertFalse; | |||||
import static org.junit.Assert.assertNotNull; | |||||
import static org.junit.Assert.assertNull; | |||||
import static org.junit.Assert.assertTrue; | |||||
import static org.junit.Assert.fail; | |||||
import java.io.BufferedInputStream; | |||||
import java.io.ByteArrayInputStream; | |||||
import java.io.ByteArrayOutputStream; | |||||
import java.io.File; | |||||
import java.io.FileInputStream; | |||||
import java.io.FileOutputStream; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.io.OutputStream; | |||||
import java.io.PushbackInputStream; | |||||
import java.lang.reflect.InvocationTargetException; | |||||
import java.net.URI; | |||||
import java.net.URISyntaxException; | |||||
import java.util.Enumeration; | |||||
import java.util.HashMap; | |||||
import java.util.List; | |||||
import java.util.TreeMap; | |||||
import java.util.regex.Pattern; | |||||
import java.util.zip.ZipEntry; | |||||
import java.util.zip.ZipFile; | |||||
import java.util.zip.ZipOutputStream; | |||||
import org.apache.poi.EncryptedDocumentException; | |||||
import org.apache.poi.POIDataSamples; | |||||
import org.apache.poi.POITestCase; | |||||
import org.apache.poi.POITextExtractor; | |||||
import org.apache.poi.POIXMLException; | |||||
import org.apache.poi.UnsupportedFileFormatException; | |||||
import org.apache.poi.extractor.ExtractorFactory; | import org.apache.poi.extractor.ExtractorFactory; | ||||
import org.apache.poi.hssf.HSSFTestDataSamples; | import org.apache.poi.hssf.HSSFTestDataSamples; | ||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; | import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; | ||||
import org.apache.poi.openxml4j.exceptions.*; | |||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException; | |||||
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; | |||||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException; | |||||
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; | |||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||||
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; | import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; | ||||
import org.apache.poi.openxml4j.opc.internal.FileHelper; | import org.apache.poi.openxml4j.opc.internal.FileHelper; | ||||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; | import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; | ||||
import org.apache.poi.openxml4j.util.ZipSecureFile; | import org.apache.poi.openxml4j.util.ZipSecureFile; | ||||
import org.apache.poi.ss.usermodel.Workbook; | import org.apache.poi.ss.usermodel.Workbook; | ||||
import org.apache.poi.ss.usermodel.WorkbookFactory; | import org.apache.poi.ss.usermodel.WorkbookFactory; | ||||
import org.apache.poi.util.*; | |||||
import org.apache.poi.util.DocumentHelper; | |||||
import org.apache.poi.util.IOUtils; | |||||
import org.apache.poi.util.POILogFactory; | |||||
import org.apache.poi.util.POILogger; | |||||
import org.apache.poi.util.TempFile; | |||||
import org.apache.poi.xssf.XSSFTestDataSamples; | import org.apache.poi.xssf.XSSFTestDataSamples; | ||||
import org.apache.xmlbeans.XmlException; | import org.apache.xmlbeans.XmlException; | ||||
import org.junit.Ignore; | import org.junit.Ignore; | ||||
import org.w3c.dom.NodeList; | import org.w3c.dom.NodeList; | ||||
import org.xml.sax.SAXException; | import org.xml.sax.SAXException; | ||||
import java.io.*; | |||||
import java.lang.reflect.InvocationTargetException; | |||||
import java.net.URI; | |||||
import java.net.URISyntaxException; | |||||
import java.util.Enumeration; | |||||
import java.util.HashMap; | |||||
import java.util.List; | |||||
import java.util.TreeMap; | |||||
import java.util.regex.Pattern; | |||||
import java.util.zip.ZipEntry; | |||||
import java.util.zip.ZipFile; | |||||
import java.util.zip.ZipOutputStream; | |||||
import static org.junit.Assert.*; | |||||
public final class TestPackage { | public final class TestPackage { | ||||
private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); | private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); | ||||
} | } | ||||
// bug 60128 | // bug 60128 | ||||
@Test | |||||
@Test(expected=NotOfficeXmlFileException.class) | |||||
public void testCorruptFile() throws IOException, InvalidFormatException { | public void testCorruptFile() throws IOException, InvalidFormatException { | ||||
OPCPackage pkg = null; | |||||
File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx"); | File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx"); | ||||
OPCPackage.open(file, PackageAccess.READ); | |||||
} | |||||
// bug 61381 | |||||
@Test | |||||
public void testTooShortFilterStreams() throws IOException, InvalidFormatException { | |||||
File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx"); | |||||
File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls"); | |||||
InputStream isList[] = { | |||||
new PushbackInputStream(new FileInputStream(xssf), 2), | |||||
new BufferedInputStream(new FileInputStream(xssf), 2), | |||||
new PushbackInputStream(new FileInputStream(hssf), 2), | |||||
new BufferedInputStream(new FileInputStream(hssf), 2), | |||||
}; | |||||
try { | try { | ||||
pkg = OPCPackage.open(file, PackageAccess.READ); | |||||
} catch (NotOfficeXmlFileException e) { | |||||
/*System.out.println(e.getClass().getName()); | |||||
System.out.println(e.getMessage()); | |||||
e.printStackTrace();*/ | |||||
// ignore exception | |||||
for (InputStream is : isList) { | |||||
WorkbookFactory.create(is).close(); | |||||
} | |||||
} finally { | } finally { | ||||
if (pkg != null) { | |||||
pkg.close(); | |||||
for (InputStream is : isList) { | |||||
IOUtils.closeQuietly(is); | |||||
} | } | ||||
} | } | ||||
} | } |
import java.io.ByteArrayOutputStream; | import java.io.ByteArrayOutputStream; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.security.GeneralSecurityException; | import java.security.GeneralSecurityException; | ||||
import org.apache.poi.EncryptedDocumentException; | import org.apache.poi.EncryptedDocumentException; | ||||
import org.apache.poi.poifs.filesystem.DirectoryNode; | import org.apache.poi.poifs.filesystem.DirectoryNode; | ||||
import org.apache.poi.poifs.filesystem.DocumentEntry; | import org.apache.poi.poifs.filesystem.DocumentEntry; | ||||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | import org.apache.poi.poifs.filesystem.DocumentInputStream; | ||||
import org.apache.poi.poifs.filesystem.FileMagic; | |||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | import org.apache.poi.poifs.filesystem.POIFSFileSystem; | ||||
import org.apache.poi.util.BoundedInputStream; | import org.apache.poi.util.BoundedInputStream; | ||||
import org.apache.poi.util.IOUtils; | import org.apache.poi.util.IOUtils; | ||||
* POIFSFileSystem from it, and returns that. | * POIFSFileSystem from it, and returns that. | ||||
*/ | */ | ||||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { | public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { | ||||
// Open a PushbackInputStream, so we can peek at the first few bytes | |||||
PushbackInputStream pis = new PushbackInputStream(istream,6); | |||||
byte[] first6 = IOUtils.toByteArray(pis, 6); | |||||
// Does it start with {\rtf ? If so, it's really RTF | |||||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' | |||||
&& first6[3] == 't' && first6[4] == 'f') { | |||||
throw new IllegalArgumentException("The document is really a RTF file"); | |||||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { | |||||
throw new IllegalArgumentException("The document is really a PDF file"); | |||||
} | |||||
// OK, so it's neither RTF nor PDF | |||||
// Open a POIFSFileSystem on the (pushed back) stream | |||||
pis.unread(first6); | |||||
return new POIFSFileSystem(pis); | |||||
InputStream is = FileMagic.prepareToCheckMagic(istream); | |||||
FileMagic fm = FileMagic.valueOf(is); | |||||
if (fm != FileMagic.OLE2) { | |||||
throw new IllegalArgumentException("The document is really a "+fm+" file"); | |||||
} | |||||
return new POIFSFileSystem(is); | |||||
} | } | ||||
/** | /** |
import java.io.ByteArrayInputStream; | import java.io.ByteArrayInputStream; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.io.PushbackInputStream; | |||||
import java.util.Arrays; | import java.util.Arrays; | ||||
import org.apache.poi.hssf.HSSFTestDataSamples; | import org.apache.poi.hssf.HSSFTestDataSamples; | ||||
// text file isn't | // text file isn't | ||||
confirmIsPOIFS("SampleSS.txt", false); | confirmIsPOIFS("SampleSS.txt", false); | ||||
} | } | ||||
private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException { | private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException { | ||||
InputStream in = new PushbackInputStream(openSampleStream(sampleFileName), 10); | |||||
InputStream in = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName)); | |||||
try { | try { | ||||
boolean actualResult; | boolean actualResult; | ||||
try { | try { | ||||
InputStream testInput = new ByteArrayInputStream(testData); | InputStream testInput = new ByteArrayInputStream(testData); | ||||
// detect header | // detect header | ||||
InputStream in = new PushbackInputStream(testInput, 10); | |||||
InputStream in = FileMagic.prepareToCheckMagic(testInput); | |||||
assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); | assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); | ||||
// check if InputStream is still intact | // check if InputStream is still intact | ||||
InputStream testInput = new ByteArrayInputStream(testData); | InputStream testInput = new ByteArrayInputStream(testData); | ||||
// detect header | // detect header | ||||
InputStream in = new PushbackInputStream(testInput, 10); | |||||
InputStream in = FileMagic.prepareToCheckMagic(testInput); | |||||
assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in)); | assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in)); | ||||
// check if InputStream is still intact | // check if InputStream is still intact |