diff options
-rw-r--r-- | src/java/org/apache/poi/POIDocument.java | 2 | ||||
-rw-r--r-- | src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java | 6 | ||||
-rw-r--r-- | src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java | 13 | ||||
-rw-r--r-- | src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java | 6 | ||||
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java | 121 | ||||
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java | 403 | ||||
-rw-r--r-- | src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java | 69 | ||||
-rw-r--r-- | test-data/document/password_password_cryptoapi.doc | bin | 0 -> 27136 bytes | |||
-rw-r--r-- | test-data/document/password_tika_binaryrc4.doc | bin | 0 -> 22016 bytes |
9 files changed, 399 insertions, 221 deletions
diff --git a/src/java/org/apache/poi/POIDocument.java b/src/java/org/apache/poi/POIDocument.java index dc626da49b..774507722a 100644 --- a/src/java/org/apache/poi/POIDocument.java +++ b/src/java/org/apache/poi/POIDocument.java @@ -195,7 +195,7 @@ public abstract class POIDocument implements Closeable { NPOIFSFileSystem encPoifs = null; String step = "getting"; try { - if (encryptionInfo != null) { + if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) { step = "getting encrypted"; String encryptedStream = null; for (String s : encryptedStreamNames) { diff --git a/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java b/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java index 382ae2f130..f589f02cb9 100644 --- a/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java +++ b/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java @@ -32,7 +32,11 @@ public final class Biff8EncryptionKey { * @param password pass <code>null</code> to clear user password (and use default) */ public static void setCurrentUserPassword(String password) { - _userPasswordTLS.set(password); + if (password == null) { + _userPasswordTLS.remove(); + } else { + _userPasswordTLS.set(password); + } } /** diff --git a/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java b/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java index e8895c1abf..c70105fb93 100644 --- a/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java +++ b/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java @@ -122,8 +122,11 @@ public class EncryptionInfo implements Cloneable { } else if ( 2 <= versionMajor && versionMajor <= 4 && versionMinor == 2) { - encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard; encryptionFlags = dis.readInt(); + encryptionMode = ( + preferredEncryptionMode == cryptoAPI + || !flagAES.isSet(encryptionFlags)) + ? cryptoAPI : standard; } else if ( versionMajor == agile.versionMajor && versionMinor == agile.versionMinor){ @@ -268,6 +271,14 @@ public class EncryptionInfo implements Cloneable { return encryptionMode; } + /** + * @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream, + * otherwise the Summaries aren't encrypted and located in their usual streams + */ + public boolean isDocPropsEncrypted() { + return !flagDocProps.isSet(getEncryptionFlags()); + } + @Override public EncryptionInfo clone() throws CloneNotSupportedException { EncryptionInfo other = (EncryptionInfo)super.clone(); diff --git a/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java b/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java index 1cc6b1b2f4..8be9ab3faa 100644 --- a/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java @@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable { super(stream, size, chunkSize); } - public BinaryRC4CipherInputStream(InputStream stream) + public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos) throws GeneralSecurityException { - super(stream, Integer.MAX_VALUE, chunkSize); + super(stream, size, chunkSize, initialPos); } } @@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable { @Override public InputStream getDataStream(InputStream stream, int size, int initialPos) throws IOException, GeneralSecurityException { - return new BinaryRC4CipherInputStream(stream); + return new BinaryRC4CipherInputStream(stream, size, initialPos); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 7ec8fb6efc..25e83b95ef 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -18,6 +18,7 @@ package org.apache.poi.hwpf; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -25,9 +26,29 @@ import java.io.OutputStream; import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hpsf.SummaryInformation; -import org.apache.poi.hwpf.model.*; +import org.apache.poi.hwpf.model.BookmarksTables; +import org.apache.poi.hwpf.model.CHPBinTable; +import org.apache.poi.hwpf.model.ComplexFileTable; +import org.apache.poi.hwpf.model.DocumentProperties; +import org.apache.poi.hwpf.model.EscherRecordHolder; +import org.apache.poi.hwpf.model.FSPADocumentPart; +import org.apache.poi.hwpf.model.FSPATable; +import org.apache.poi.hwpf.model.FieldsTables; +import org.apache.poi.hwpf.model.FontTable; +import org.apache.poi.hwpf.model.ListTables; +import org.apache.poi.hwpf.model.NoteType; +import org.apache.poi.hwpf.model.NotesTables; +import org.apache.poi.hwpf.model.PAPBinTable; +import org.apache.poi.hwpf.model.PicturesTable; +import org.apache.poi.hwpf.model.RevisionMarkAuthorTable; +import org.apache.poi.hwpf.model.SavedByTable; +import org.apache.poi.hwpf.model.SectionTable; +import org.apache.poi.hwpf.model.SinglentonTextPiece; +import org.apache.poi.hwpf.model.StyleSheet; +import org.apache.poi.hwpf.model.SubdocumentType; +import org.apache.poi.hwpf.model.TextPiece; +import org.apache.poi.hwpf.model.TextPieceTable; import org.apache.poi.hwpf.model.io.HWPFFileSystem; -import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.usermodel.Bookmarks; import org.apache.poi.hwpf.usermodel.BookmarksImpl; import org.apache.poi.hwpf.usermodel.Field; @@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.OfficeDrawings; import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.crypt.EncryptionInfo; import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.EntryUtils; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.util.IOUtils; import org.apache.poi.util.Internal; /** @@ -59,8 +79,6 @@ public final class HWPFDocument extends HWPFDocumentCore { private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable"; private static final String STREAM_DATA = "Data"; - private static final String STREAM_TABLE_0 = "0Table"; - private static final String STREAM_TABLE_1 = "1Table"; /** table stream buffer*/ protected byte[] _tableStream; @@ -178,11 +196,7 @@ public final class HWPFDocument extends HWPFDocumentCore { } // use the fib to determine the name of the table stream. - String name = STREAM_TABLE_0; - if (_fib.getFibBase().isFWhichTblStm()) - { - name = STREAM_TABLE_1; - } + String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0; // Grab the table stream. if (!directory.hasEntry(name)) { @@ -190,25 +204,12 @@ public final class HWPFDocument extends HWPFDocumentCore { } // read in the table stream. - InputStream is = directory.createDocumentInputStream(name); - _tableStream = IOUtils.toByteArray(is); - is.close(); + _tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE); _fib.fillVariableFields(_mainStream, _tableStream); // read in the data stream. - InputStream dis = null; - try { - DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA); - dis = directory.createDocumentInputStream(STREAM_DATA); - _dataStream = IOUtils.toByteArray(dis, dataProps.getSize()); - } catch(IOException e) { - _dataStream = new byte[0]; - } finally { - if (dis != null) { - dis.close(); - } - } + _dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0]; // Get the cp of the start of text in the main stream // The latest spec doc says this is always zero! @@ -233,8 +234,7 @@ public final class HWPFDocument extends HWPFDocumentCore { */ boolean preserveBinTables = false; try { - preserveBinTables = Boolean.parseBoolean( System - .getProperty( PROPERTY_PRESERVE_BIN_TABLES ) ); + preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) ); } catch ( Exception exc ) { // ignore; } @@ -250,8 +250,7 @@ public final class HWPFDocument extends HWPFDocumentCore { */ boolean preserveTextTable = false; try { - preserveTextTable = Boolean.parseBoolean( System - .getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) ); + preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) ); } catch ( Exception exc ) { // ignore; } @@ -612,8 +611,8 @@ public final class HWPFDocument extends HWPFDocumentCore { private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException { // initialize our streams for writing. HWPFFileSystem docSys = new HWPFFileSystem(); - HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT); - HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1); + ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT); + ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1); //HWPFOutputStream dataStream = docSys.getStream("Data"); int tableOffset = 0; @@ -630,13 +629,13 @@ public final class HWPFDocument extends HWPFDocumentCore { // it after we write everything else. byte[] placeHolder = new byte[fibSize]; wordDocumentStream.write(placeHolder); - int mainOffset = wordDocumentStream.getOffset(); + int mainOffset = wordDocumentStream.size(); // write out the StyleSheet. _fib.setFcStshf(tableOffset); _ss.writeTo(tableStream); - _fib.setLcbStshf(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); + _fib.setLcbStshf(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); // get fcMin and fcMac because we will be writing the actual text with the // complex table. @@ -654,9 +653,9 @@ public final class HWPFDocument extends HWPFDocumentCore { // write out the Complex table, includes text. _fib.setFcClx(tableOffset); _cft.writeTo(wordDocumentStream, tableStream); - _fib.setLcbClx(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); - int fcMac = wordDocumentStream.getOffset(); + _fib.setLcbClx(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); + int fcMac = wordDocumentStream.size(); /* * dop (document properties record) Written immediately after the end of @@ -670,8 +669,8 @@ public final class HWPFDocument extends HWPFDocumentCore { // write out the DocumentProperties. _fib.setFcDop(tableOffset); _dop.writeTo(tableStream); - _fib.setLcbDop(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); + _fib.setLcbDop(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); /* * plcfBkmkf (table recording beginning CPs of bookmarks) Written @@ -683,7 +682,7 @@ public final class HWPFDocument extends HWPFDocumentCore { if ( _bookmarksTables != null ) { _bookmarksTables.writePlcfBkmkf( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } /* @@ -696,7 +695,7 @@ public final class HWPFDocument extends HWPFDocumentCore { if ( _bookmarksTables != null ) { _bookmarksTables.writePlcfBkmkl( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } /* @@ -710,8 +709,8 @@ public final class HWPFDocument extends HWPFDocumentCore { // write out the CHPBinTable. _fib.setFcPlcfbteChpx(tableOffset); _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable()); - _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); + _fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); /* * plcfbtePapx (bin table for PAP FKPs) Written immediately after the @@ -724,8 +723,8 @@ public final class HWPFDocument extends HWPFDocumentCore { // write out the PAPBinTable. _fib.setFcPlcfbtePapx(tableOffset); _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable()); - _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); + _fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); /* * plcfendRef (endnote reference position table) Written immediately @@ -739,7 +738,7 @@ public final class HWPFDocument extends HWPFDocumentCore { */ _endnotesTables.writeRef( _fib, tableStream ); _endnotesTables.writeTxt( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); /* * plcffld*** (table of field positions and statuses for annotation @@ -753,7 +752,7 @@ public final class HWPFDocument extends HWPFDocumentCore { if ( _fieldsTables != null ) { _fieldsTables.write( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } /* @@ -768,7 +767,7 @@ public final class HWPFDocument extends HWPFDocumentCore { */ _footnotesTables.writeRef( _fib, tableStream ); _footnotesTables.writeTxt( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); /* * plcfsed (section table) Written immediately after the previously @@ -781,8 +780,8 @@ public final class HWPFDocument extends HWPFDocumentCore { // write out the SectionTable. _fib.setFcPlcfsed(tableOffset); _st.writeTo(wordDocumentStream, tableStream); - _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); + _fib.setLcbPlcfsed(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); // write out the list tables if ( _lt != null ) @@ -800,7 +799,7 @@ public final class HWPFDocument extends HWPFDocumentCore { * Specification; Page 25 of 210 */ _lt.writeListDataTo( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); /* * plflfo (more list formats) Written immediately after the end of @@ -814,7 +813,7 @@ public final class HWPFDocument extends HWPFDocumentCore { * Specification; Page 26 of 210 */ _lt.writeListOverridesTo( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } /* @@ -827,7 +826,7 @@ public final class HWPFDocument extends HWPFDocumentCore { if ( _bookmarksTables != null ) { _bookmarksTables.writeSttbfBkmk( _fib, tableStream ); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } /* @@ -843,9 +842,9 @@ public final class HWPFDocument extends HWPFDocumentCore { { _fib.setFcSttbSavedBy(tableOffset); _sbt.writeTo(tableStream); - _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset); + _fib.setLcbSttbSavedBy(tableStream.size() - tableOffset); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } // write out the revision mark authors table. @@ -853,21 +852,21 @@ public final class HWPFDocument extends HWPFDocumentCore { { _fib.setFcSttbfRMark(tableOffset); _rmat.writeTo(tableStream); - _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset); + _fib.setLcbSttbfRMark(tableStream.size() - tableOffset); - tableOffset = tableStream.getOffset(); + tableOffset = tableStream.size(); } // write out the FontTable. _fib.setFcSttbfffn(tableOffset); _ft.writeTo(tableStream); - _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset); - tableOffset = tableStream.getOffset(); + _fib.setLcbSttbfffn(tableStream.size() - tableOffset); + tableOffset = tableStream.size(); // set some variables in the FileInformationBlock. _fib.getFibBase().setFcMin(fcMin); _fib.getFibBase().setFcMac(fcMac); - _fib.setCbMac(wordDocumentStream.getOffset()); + _fib.setCbMac(wordDocumentStream.size()); // make sure that the table, doc and data streams use big blocks. byte[] mainBuf = wordDocumentStream.toByteArray(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java index 69c1997cb7..c52abc101e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java @@ -17,13 +17,19 @@ package org.apache.poi.hwpf; +import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; +import java.security.GeneralSecurityException; +import org.apache.poi.EncryptedDocumentException; import org.apache.poi.POIDocument; +import org.apache.poi.hpsf.PropertySet; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; import org.apache.poi.hwpf.model.CHPBinTable; +import org.apache.poi.hwpf.model.FibBase; import org.apache.poi.hwpf.model.FileInformationBlock; import org.apache.poi.hwpf.model.FontTable; import org.apache.poi.hwpf.model.ListTables; @@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPieceTable; import org.apache.poi.hwpf.usermodel.ObjectPoolImpl; import org.apache.poi.hwpf.usermodel.ObjectsPool; import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.poifs.crypt.ChunkedCipherInputStream; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.crypt.EncryptionMode; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.BoundedInputStream; import org.apache.poi.util.IOUtils; import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndianByteArrayInputStream; /** * This class holds much of the core of a Word document, but * without some of the table structure information. * You generally want to work with one of - * {@link HWPFDocument} or {@link HWPFOldDocument} + * {@link HWPFDocument} or {@link HWPFOldDocument} */ -public abstract class HWPFDocumentCore extends POIDocument -{ +public abstract class HWPFDocumentCore extends POIDocument { protected static final String STREAM_OBJECT_POOL = "ObjectPool"; protected static final String STREAM_WORD_DOCUMENT = "WordDocument"; + protected static final String STREAM_TABLE_0 = "0Table"; + protected static final String STREAM_TABLE_1 = "1Table"; - /** Holds OLE2 objects */ - protected ObjectPoolImpl _objectPool; - - /** The FIB */ - protected FileInformationBlock _fib; - - /** Holds styles for this document.*/ - protected StyleSheet _ss; - - /** Contains formatting properties for text*/ - protected CHPBinTable _cbt; - - /** Contains formatting properties for paragraphs*/ - protected PAPBinTable _pbt; - - /** Contains formatting properties for sections.*/ - protected SectionTable _st; - - /** Holds fonts for this document.*/ - protected FontTable _ft; - - /** Hold list tables */ - protected ListTables _lt; - - /** main document stream buffer*/ - protected byte[] _mainStream; - - protected HWPFDocumentCore() - { - super((DirectoryNode)null); - } - - /** - * Takes an InputStream, verifies that it's not RTF or PDF, builds a - * POIFSFileSystem from it, and returns that. - */ - public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { - // Open a PushbackInputStream, so we can peek at the first few bytes - PushbackInputStream pis = new PushbackInputStream(istream,6); - byte[] first6 = IOUtils.toByteArray(pis, 6); - - // Does it start with {\rtf ? If so, it's really RTF - if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' - && first6[3] == 't' && first6[4] == 'f') { - throw new IllegalArgumentException("The document is really a RTF file"); - } else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { - throw new IllegalArgumentException("The document is really a PDF file"); - } - - // OK, so it's neither RTF nor PDF - // Open a POIFSFileSystem on the (pushed back) stream - pis.unread(first6); - return new POIFSFileSystem(pis); - } - - /** - * This constructor loads a Word document from an InputStream. - * - * @param istream The InputStream that contains the Word document. - * @throws IOException If there is an unexpected IOException from the passed - * in InputStream. - */ - public HWPFDocumentCore(InputStream istream) throws IOException - { - //do Ole stuff - this( verifyAndBuildPOIFS(istream) ); - } - - /** - * This constructor loads a Word document from a POIFSFileSystem - * - * @param pfilesystem The POIFSFileSystem that contains the Word document. - * @throws IOException If there is an unexpected IOException from the passed - * in POIFSFileSystem. - */ - public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException - { - this(pfilesystem.getRoot()); - } - - /** - * This constructor loads a Word document from a specific point - * in a POIFSFileSystem, probably not the default. - * Used typically to open embeded documents. - * - * @param directory The DirectoryNode that contains the Word document. - * @throws IOException If there is an unexpected IOException from the passed - * in POIFSFileSystem. - */ - public HWPFDocumentCore(DirectoryNode directory) throws IOException { - // Sort out the hpsf properties - super(directory); - - // read in the main stream. - DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument"); - DocumentInputStream dis = null; - try { - dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT); - _mainStream = IOUtils.toByteArray(dis, documentProps.getSize()); - } finally { - if (dis != null) { - dis.close(); + private static final int FIB_BASE_LEN = 68; + + /** Holds OLE2 objects */ + protected ObjectPoolImpl _objectPool; + + /** The FIB */ + protected FileInformationBlock _fib; + + /** Holds styles for this document.*/ + protected StyleSheet _ss; + + /** Contains formatting properties for text*/ + protected CHPBinTable _cbt; + + /** Contains formatting properties for paragraphs*/ + protected PAPBinTable _pbt; + + /** Contains formatting properties for sections.*/ + protected SectionTable _st; + + /** Holds fonts for this document.*/ + protected FontTable _ft; + + /** Hold list tables */ + protected ListTables _lt; + + /** main document stream buffer*/ + protected byte[] _mainStream; + + private EncryptionInfo _encryptionInfo; + + protected HWPFDocumentCore() { + super((DirectoryNode)null); + } + + /** + * Takes an InputStream, verifies that it's not RTF or PDF, builds a + * POIFSFileSystem from it, and returns that. + */ + public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { + // Open a PushbackInputStream, so we can peek at the first few bytes + PushbackInputStream pis = new PushbackInputStream(istream,6); + byte[] first6 = IOUtils.toByteArray(pis, 6); + + // Does it start with {\rtf ? If so, it's really RTF + if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' + && first6[3] == 't' && first6[4] == 'f') { + throw new IllegalArgumentException("The document is really a RTF file"); + } else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { + throw new IllegalArgumentException("The document is really a PDF file"); + } + + // OK, so it's neither RTF nor PDF + // Open a POIFSFileSystem on the (pushed back) stream + pis.unread(first6); + return new POIFSFileSystem(pis); + } + + /** + * This constructor loads a Word document from an InputStream. + * + * @param istream The InputStream that contains the Word document. + * @throws IOException If there is an unexpected IOException from the passed + * in InputStream. + */ + public HWPFDocumentCore(InputStream istream) throws IOException { + //do Ole stuff + this( verifyAndBuildPOIFS(istream) ); + } + + /** + * This constructor loads a Word document from a POIFSFileSystem + * + * @param pfilesystem The POIFSFileSystem that contains the Word document. + * @throws IOException If there is an unexpected IOException from the passed + * in POIFSFileSystem. + */ + public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException { + this(pfilesystem.getRoot()); + } + + /** + * This constructor loads a Word document from a specific point + * in a POIFSFileSystem, probably not the default. + * Used typically to open embeded documents. + * + * @param directory The DirectoryNode that contains the Word document. + * @throws IOException If there is an unexpected IOException from the passed + * in POIFSFileSystem. + */ + public HWPFDocumentCore(DirectoryNode directory) throws IOException { + // Sort out the hpsf properties + super(directory); + + // read in the main stream. + _mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE); + _fib = new FileInformationBlock(_mainStream); + + DirectoryEntry objectPoolEntry = null; + if (directory.hasEntry(STREAM_OBJECT_POOL)) { + objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL); } + _objectPool = new ObjectPoolImpl(objectPoolEntry); } - // Create our FIB, and check for the doc being encrypted - _fib = new FileInformationBlock(_mainStream); + /** + * For a given named property entry, either return it or null if + * if it wasn't found + * + * @param setName The property to read + * @return The value of the given property or null if it wasn't found. + */ + @Override + protected PropertySet getPropertySet(String setName) { + EncryptionInfo ei; + try { + ei = getEncryptionInfo(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return (ei == null) + ? super.getPropertySet(setName) + : super.getPropertySet(setName, ei); + } + + protected EncryptionInfo getEncryptionInfo() throws IOException { + if (_encryptionInfo != null) { + return _encryptionInfo; + } + + // Create our FIB, and check for the doc being encrypted + byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN); + FibBase fibBase = new FibBase( fibBaseBytes, 0 ); + if (!fibBase.isFEncrypted()) { + return null; + } - DirectoryEntry objectPoolEntry; - try { - objectPoolEntry = (DirectoryEntry) directory - .getEntry(STREAM_OBJECT_POOL); - } catch (FileNotFoundException exc) { - objectPoolEntry = null; + String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0; + byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey()); + LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream); + EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null; + EncryptionInfo ei = new EncryptionInfo(leis, em); + Decryptor dec = ei.getDecryptor(); + dec.setChunkSize(512); + try { + String pass = Biff8EncryptionKey.getCurrentUserPassword(); + if (pass == null) { + pass = Decryptor.DEFAULT_PASSWORD; + } + if (!dec.verifyPassword(pass)) { + throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening"); + } + } catch (GeneralSecurityException e) { + throw new IOException(e.getMessage(), e); + } + _encryptionInfo = ei; + return ei; } - _objectPool = new ObjectPoolImpl(objectPoolEntry); - } - /** + /** + * Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available, + * decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try + * to decrypt the bytes + * + * @param name the name of the stream + * @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption + * @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes + * @return the read bytes + * @throws IOException if the stream can't be found + */ + protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException { + DirectoryNode dir = getDirectory(); + DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name); + DocumentInputStream dis = dir.createDocumentInputStream(documentProps); + EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null; + int streamSize = documentProps.getSize(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len)); + + InputStream is = dis; + try { + if (ei != null) { + try { + Decryptor dec = ei.getDecryptor(); + is = dec.getDataStream(dis, streamSize, 0); + if (encryptionOffset > 0) { + ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is; + byte plain[] = new byte[encryptionOffset]; + cis.readPlain(plain, 0, encryptionOffset); + bos.write(plain); + } + } catch (GeneralSecurityException e) { + throw new IOException(e.getMessage(), e); + } + } + // This simplifies a few combinations, so we actually always try to copy len bytes + // regardless if encryptionOffset is greater than 0 + if (len < Integer.MAX_VALUE) { + is = new BoundedInputStream(is, len); + } + IOUtils.copy(is, bos); + return bos.toByteArray(); + } finally { + IOUtils.closeQuietly(is); + IOUtils.closeQuietly(dis); + } + } + + + /** * Returns the range which covers the whole of the document, but excludes * any headers and footers. */ @@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore extends POIDocument @Internal public abstract StringBuilder getText(); - public CHPBinTable getCharacterTable() - { - return _cbt; - } - - public PAPBinTable getParagraphTable() - { - return _pbt; - } - - public SectionTable getSectionTable() - { - return _st; - } - - public StyleSheet getStyleSheet() - { - return _ss; - } - - public ListTables getListTables() - { - return _lt; - } - - public FontTable getFontTable() - { - return _ft; - } - - public FileInformationBlock getFileInformationBlock() - { - return _fib; - } - - public ObjectsPool getObjectsPool() - { + public CHPBinTable getCharacterTable() { + return _cbt; + } + + public PAPBinTable getParagraphTable() { + return _pbt; + } + + public SectionTable getSectionTable() { + return _st; + } + + public StyleSheet getStyleSheet() { + return _ss; + } + + public ListTables getListTables() { + return _lt; + } + + public FontTable getFontTable() { + return _ft; + } + + public FileInformationBlock getFileInformationBlock() { + return _fib; + } + + public ObjectsPool getObjectsPool() { return _objectPool; } @@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore extends POIDocument public byte[] getMainStream() { return _mainStream; } -} +}
\ No newline at end of file diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java b/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java new file mode 100644 index 0000000000..875fb9ec7b --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java @@ -0,0 +1,69 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.junit.AfterClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class HWPFTestEncryption { + @AfterClass + public static void clearPass() { + Biff8EncryptionKey.setCurrentUserPassword(null); + } + + @Parameter(value = 0) + public String file; + + @Parameter(value = 1) + public String password; + + @Parameter(value = 2) + public String expected; + + @Parameters(name="{0}") + public static Collection<String[]> data() { + return Arrays.asList( + new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." }, + new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" } + ); + } + + @Test + public void extract() throws IOException { + Biff8EncryptionKey.setCurrentUserPassword(password); + HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file); + WordExtractor we = new WordExtractor(docD); + String actual = we.getText().trim(); + assertEquals(expected, actual); + we.close(); + docD.close(); + } +} diff --git a/test-data/document/password_password_cryptoapi.doc b/test-data/document/password_password_cryptoapi.doc Binary files differnew file mode 100644 index 0000000000..7ef0128582 --- /dev/null +++ b/test-data/document/password_password_cryptoapi.doc diff --git a/test-data/document/password_tika_binaryrc4.doc b/test-data/document/password_tika_binaryrc4.doc Binary files differnew file mode 100644 index 0000000000..b407783d21 --- /dev/null +++ b/test-data/document/password_tika_binaryrc4.doc |