aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/java/org/apache/poi/POIDocument.java2
-rw-r--r--src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java6
-rw-r--r--src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java13
-rw-r--r--src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java121
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java403
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java69
-rw-r--r--test-data/document/password_password_cryptoapi.docbin0 -> 27136 bytes
-rw-r--r--test-data/document/password_tika_binaryrc4.docbin0 -> 22016 bytes
9 files changed, 399 insertions, 221 deletions
diff --git a/src/java/org/apache/poi/POIDocument.java b/src/java/org/apache/poi/POIDocument.java
index dc626da49b..774507722a 100644
--- a/src/java/org/apache/poi/POIDocument.java
+++ b/src/java/org/apache/poi/POIDocument.java
@@ -195,7 +195,7 @@ public abstract class POIDocument implements Closeable {
NPOIFSFileSystem encPoifs = null;
String step = "getting";
try {
- if (encryptionInfo != null) {
+ if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
step = "getting encrypted";
String encryptedStream = null;
for (String s : encryptedStreamNames) {
diff --git a/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java b/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java
index 382ae2f130..f589f02cb9 100644
--- a/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java
+++ b/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java
@@ -32,7 +32,11 @@ public final class Biff8EncryptionKey {
* @param password pass <code>null</code> to clear user password (and use default)
*/
public static void setCurrentUserPassword(String password) {
- _userPasswordTLS.set(password);
+ if (password == null) {
+ _userPasswordTLS.remove();
+ } else {
+ _userPasswordTLS.set(password);
+ }
}
/**
diff --git a/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java b/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java
index e8895c1abf..c70105fb93 100644
--- a/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java
+++ b/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java
@@ -122,8 +122,11 @@ public class EncryptionInfo implements Cloneable {
} else if (
2 <= versionMajor && versionMajor <= 4
&& versionMinor == 2) {
- encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard;
encryptionFlags = dis.readInt();
+ encryptionMode = (
+ preferredEncryptionMode == cryptoAPI
+ || !flagAES.isSet(encryptionFlags))
+ ? cryptoAPI : standard;
} else if (
versionMajor == agile.versionMajor
&& versionMinor == agile.versionMinor){
@@ -268,6 +271,14 @@ public class EncryptionInfo implements Cloneable {
return encryptionMode;
}
+ /**
+ * @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream,
+ * otherwise the Summaries aren't encrypted and located in their usual streams
+ */
+ public boolean isDocPropsEncrypted() {
+ return !flagDocProps.isSet(getEncryptionFlags());
+ }
+
@Override
public EncryptionInfo clone() throws CloneNotSupportedException {
EncryptionInfo other = (EncryptionInfo)super.clone();
diff --git a/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java b/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java
index 1cc6b1b2f4..8be9ab3faa 100644
--- a/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java
+++ b/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java
@@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
super(stream, size, chunkSize);
}
- public BinaryRC4CipherInputStream(InputStream stream)
+ public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos)
throws GeneralSecurityException {
- super(stream, Integer.MAX_VALUE, chunkSize);
+ super(stream, size, chunkSize, initialPos);
}
}
@@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
@Override
public InputStream getDataStream(InputStream stream, int size, int initialPos)
throws IOException, GeneralSecurityException {
- return new BinaryRC4CipherInputStream(stream);
+ return new BinaryRC4CipherInputStream(stream, size, initialPos);
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
index 7ec8fb6efc..25e83b95ef 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
@@ -18,6 +18,7 @@
package org.apache.poi.hwpf;
import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -25,9 +26,29 @@ import java.io.OutputStream;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.hwpf.model.*;
+import org.apache.poi.hwpf.model.BookmarksTables;
+import org.apache.poi.hwpf.model.CHPBinTable;
+import org.apache.poi.hwpf.model.ComplexFileTable;
+import org.apache.poi.hwpf.model.DocumentProperties;
+import org.apache.poi.hwpf.model.EscherRecordHolder;
+import org.apache.poi.hwpf.model.FSPADocumentPart;
+import org.apache.poi.hwpf.model.FSPATable;
+import org.apache.poi.hwpf.model.FieldsTables;
+import org.apache.poi.hwpf.model.FontTable;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.model.NoteType;
+import org.apache.poi.hwpf.model.NotesTables;
+import org.apache.poi.hwpf.model.PAPBinTable;
+import org.apache.poi.hwpf.model.PicturesTable;
+import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
+import org.apache.poi.hwpf.model.SavedByTable;
+import org.apache.poi.hwpf.model.SectionTable;
+import org.apache.poi.hwpf.model.SinglentonTextPiece;
+import org.apache.poi.hwpf.model.StyleSheet;
+import org.apache.poi.hwpf.model.SubdocumentType;
+import org.apache.poi.hwpf.model.TextPiece;
+import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
-import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.BookmarksImpl;
import org.apache.poi.hwpf.usermodel.Field;
@@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.OfficeDrawings;
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
/**
@@ -59,8 +79,6 @@ public final class HWPFDocument extends HWPFDocumentCore {
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
private static final String STREAM_DATA = "Data";
- private static final String STREAM_TABLE_0 = "0Table";
- private static final String STREAM_TABLE_1 = "1Table";
/** table stream buffer*/
protected byte[] _tableStream;
@@ -178,11 +196,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
}
// use the fib to determine the name of the table stream.
- String name = STREAM_TABLE_0;
- if (_fib.getFibBase().isFWhichTblStm())
- {
- name = STREAM_TABLE_1;
- }
+ String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0;
// Grab the table stream.
if (!directory.hasEntry(name)) {
@@ -190,25 +204,12 @@ public final class HWPFDocument extends HWPFDocumentCore {
}
// read in the table stream.
- InputStream is = directory.createDocumentInputStream(name);
- _tableStream = IOUtils.toByteArray(is);
- is.close();
+ _tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE);
_fib.fillVariableFields(_mainStream, _tableStream);
// read in the data stream.
- InputStream dis = null;
- try {
- DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA);
- dis = directory.createDocumentInputStream(STREAM_DATA);
- _dataStream = IOUtils.toByteArray(dis, dataProps.getSize());
- } catch(IOException e) {
- _dataStream = new byte[0];
- } finally {
- if (dis != null) {
- dis.close();
- }
- }
+ _dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0];
// Get the cp of the start of text in the main stream
// The latest spec doc says this is always zero!
@@ -233,8 +234,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
boolean preserveBinTables = false;
try {
- preserveBinTables = Boolean.parseBoolean( System
- .getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
+ preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
} catch ( Exception exc ) {
// ignore;
}
@@ -250,8 +250,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
boolean preserveTextTable = false;
try {
- preserveTextTable = Boolean.parseBoolean( System
- .getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
+ preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
} catch ( Exception exc ) {
// ignore;
}
@@ -612,8 +611,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException {
// initialize our streams for writing.
HWPFFileSystem docSys = new HWPFFileSystem();
- HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
- HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
+ ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
+ ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
//HWPFOutputStream dataStream = docSys.getStream("Data");
int tableOffset = 0;
@@ -630,13 +629,13 @@ public final class HWPFDocument extends HWPFDocumentCore {
// it after we write everything else.
byte[] placeHolder = new byte[fibSize];
wordDocumentStream.write(placeHolder);
- int mainOffset = wordDocumentStream.getOffset();
+ int mainOffset = wordDocumentStream.size();
// write out the StyleSheet.
_fib.setFcStshf(tableOffset);
_ss.writeTo(tableStream);
- _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
+ _fib.setLcbStshf(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
// get fcMin and fcMac because we will be writing the actual text with the
// complex table.
@@ -654,9 +653,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the Complex table, includes text.
_fib.setFcClx(tableOffset);
_cft.writeTo(wordDocumentStream, tableStream);
- _fib.setLcbClx(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- int fcMac = wordDocumentStream.getOffset();
+ _fib.setLcbClx(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
+ int fcMac = wordDocumentStream.size();
/*
* dop (document properties record) Written immediately after the end of
@@ -670,8 +669,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the DocumentProperties.
_fib.setFcDop(tableOffset);
_dop.writeTo(tableStream);
- _fib.setLcbDop(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
+ _fib.setLcbDop(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
/*
* plcfBkmkf (table recording beginning CPs of bookmarks) Written
@@ -683,7 +682,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null )
{
_bookmarksTables.writePlcfBkmkf( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
/*
@@ -696,7 +695,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null )
{
_bookmarksTables.writePlcfBkmkl( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
/*
@@ -710,8 +709,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the CHPBinTable.
_fib.setFcPlcfbteChpx(tableOffset);
_cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
- _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
+ _fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
/*
* plcfbtePapx (bin table for PAP FKPs) Written immediately after the
@@ -724,8 +723,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the PAPBinTable.
_fib.setFcPlcfbtePapx(tableOffset);
_pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
- _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
+ _fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
/*
* plcfendRef (endnote reference position table) Written immediately
@@ -739,7 +738,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
_endnotesTables.writeRef( _fib, tableStream );
_endnotesTables.writeTxt( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
/*
* plcffld*** (table of field positions and statuses for annotation
@@ -753,7 +752,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _fieldsTables != null )
{
_fieldsTables.write( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
/*
@@ -768,7 +767,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
_footnotesTables.writeRef( _fib, tableStream );
_footnotesTables.writeTxt( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
/*
* plcfsed (section table) Written immediately after the previously
@@ -781,8 +780,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the SectionTable.
_fib.setFcPlcfsed(tableOffset);
_st.writeTo(wordDocumentStream, tableStream);
- _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
+ _fib.setLcbPlcfsed(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
// write out the list tables
if ( _lt != null )
@@ -800,7 +799,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
* Specification; Page 25 of 210
*/
_lt.writeListDataTo( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
/*
* plflfo (more list formats) Written immediately after the end of
@@ -814,7 +813,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
* Specification; Page 26 of 210
*/
_lt.writeListOverridesTo( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
/*
@@ -827,7 +826,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null )
{
_bookmarksTables.writeSttbfBkmk( _fib, tableStream );
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
/*
@@ -843,9 +842,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
{
_fib.setFcSttbSavedBy(tableOffset);
_sbt.writeTo(tableStream);
- _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);
+ _fib.setLcbSttbSavedBy(tableStream.size() - tableOffset);
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
// write out the revision mark authors table.
@@ -853,21 +852,21 @@ public final class HWPFDocument extends HWPFDocumentCore {
{
_fib.setFcSttbfRMark(tableOffset);
_rmat.writeTo(tableStream);
- _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);
+ _fib.setLcbSttbfRMark(tableStream.size() - tableOffset);
- tableOffset = tableStream.getOffset();
+ tableOffset = tableStream.size();
}
// write out the FontTable.
_fib.setFcSttbfffn(tableOffset);
_ft.writeTo(tableStream);
- _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
+ _fib.setLcbSttbfffn(tableStream.size() - tableOffset);
+ tableOffset = tableStream.size();
// set some variables in the FileInformationBlock.
_fib.getFibBase().setFcMin(fcMin);
_fib.getFibBase().setFcMac(fcMac);
- _fib.setCbMac(wordDocumentStream.getOffset());
+ _fib.setCbMac(wordDocumentStream.size());
// make sure that the table, doc and data streams use big blocks.
byte[] mainBuf = wordDocumentStream.toByteArray();
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
index 69c1997cb7..c52abc101e 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
@@ -17,13 +17,19 @@
package org.apache.poi.hwpf;
+import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
+import java.security.GeneralSecurityException;
+import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.model.CHPBinTable;
+import org.apache.poi.hwpf.model.FibBase;
import org.apache.poi.hwpf.model.FileInformationBlock;
import org.apache.poi.hwpf.model.FontTable;
import org.apache.poi.hwpf.model.ListTables;
@@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.usermodel.ObjectPoolImpl;
import org.apache.poi.hwpf.usermodel.ObjectsPool;
import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.poifs.crypt.ChunkedCipherInputStream;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.crypt.EncryptionMode;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.BoundedInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianByteArrayInputStream;
/**
* This class holds much of the core of a Word document, but
* without some of the table structure information.
* You generally want to work with one of
- * {@link HWPFDocument} or {@link HWPFOldDocument}
+ * {@link HWPFDocument} or {@link HWPFOldDocument}
*/
-public abstract class HWPFDocumentCore extends POIDocument
-{
+public abstract class HWPFDocumentCore extends POIDocument {
protected static final String STREAM_OBJECT_POOL = "ObjectPool";
protected static final String STREAM_WORD_DOCUMENT = "WordDocument";
+ protected static final String STREAM_TABLE_0 = "0Table";
+ protected static final String STREAM_TABLE_1 = "1Table";
- /** Holds OLE2 objects */
- protected ObjectPoolImpl _objectPool;
-
- /** The FIB */
- protected FileInformationBlock _fib;
-
- /** Holds styles for this document.*/
- protected StyleSheet _ss;
-
- /** Contains formatting properties for text*/
- protected CHPBinTable _cbt;
-
- /** Contains formatting properties for paragraphs*/
- protected PAPBinTable _pbt;
-
- /** Contains formatting properties for sections.*/
- protected SectionTable _st;
-
- /** Holds fonts for this document.*/
- protected FontTable _ft;
-
- /** Hold list tables */
- protected ListTables _lt;
-
- /** main document stream buffer*/
- protected byte[] _mainStream;
-
- protected HWPFDocumentCore()
- {
- super((DirectoryNode)null);
- }
-
- /**
- * Takes an InputStream, verifies that it's not RTF or PDF, builds a
- * POIFSFileSystem from it, and returns that.
- */
- public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
- // Open a PushbackInputStream, so we can peek at the first few bytes
- PushbackInputStream pis = new PushbackInputStream(istream,6);
- byte[] first6 = IOUtils.toByteArray(pis, 6);
-
- // Does it start with {\rtf ? If so, it's really RTF
- if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
- && first6[3] == 't' && first6[4] == 'f') {
- throw new IllegalArgumentException("The document is really a RTF file");
- } else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
- throw new IllegalArgumentException("The document is really a PDF file");
- }
-
- // OK, so it's neither RTF nor PDF
- // Open a POIFSFileSystem on the (pushed back) stream
- pis.unread(first6);
- return new POIFSFileSystem(pis);
- }
-
- /**
- * This constructor loads a Word document from an InputStream.
- *
- * @param istream The InputStream that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in InputStream.
- */
- public HWPFDocumentCore(InputStream istream) throws IOException
- {
- //do Ole stuff
- this( verifyAndBuildPOIFS(istream) );
- }
-
- /**
- * This constructor loads a Word document from a POIFSFileSystem
- *
- * @param pfilesystem The POIFSFileSystem that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in POIFSFileSystem.
- */
- public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException
- {
- this(pfilesystem.getRoot());
- }
-
- /**
- * This constructor loads a Word document from a specific point
- * in a POIFSFileSystem, probably not the default.
- * Used typically to open embeded documents.
- *
- * @param directory The DirectoryNode that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in POIFSFileSystem.
- */
- public HWPFDocumentCore(DirectoryNode directory) throws IOException {
- // Sort out the hpsf properties
- super(directory);
-
- // read in the main stream.
- DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument");
- DocumentInputStream dis = null;
- try {
- dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT);
- _mainStream = IOUtils.toByteArray(dis, documentProps.getSize());
- } finally {
- if (dis != null) {
- dis.close();
+ private static final int FIB_BASE_LEN = 68;
+
+ /** Holds OLE2 objects */
+ protected ObjectPoolImpl _objectPool;
+
+ /** The FIB */
+ protected FileInformationBlock _fib;
+
+ /** Holds styles for this document.*/
+ protected StyleSheet _ss;
+
+ /** Contains formatting properties for text*/
+ protected CHPBinTable _cbt;
+
+ /** Contains formatting properties for paragraphs*/
+ protected PAPBinTable _pbt;
+
+ /** Contains formatting properties for sections.*/
+ protected SectionTable _st;
+
+ /** Holds fonts for this document.*/
+ protected FontTable _ft;
+
+ /** Hold list tables */
+ protected ListTables _lt;
+
+ /** main document stream buffer*/
+ protected byte[] _mainStream;
+
+ private EncryptionInfo _encryptionInfo;
+
+ protected HWPFDocumentCore() {
+ super((DirectoryNode)null);
+ }
+
+ /**
+ * Takes an InputStream, verifies that it's not RTF or PDF, builds a
+ * POIFSFileSystem from it, and returns that.
+ */
+ public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
+ // Open a PushbackInputStream, so we can peek at the first few bytes
+ PushbackInputStream pis = new PushbackInputStream(istream,6);
+ byte[] first6 = IOUtils.toByteArray(pis, 6);
+
+ // Does it start with {\rtf ? If so, it's really RTF
+ if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
+ && first6[3] == 't' && first6[4] == 'f') {
+ throw new IllegalArgumentException("The document is really a RTF file");
+ } else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
+ throw new IllegalArgumentException("The document is really a PDF file");
+ }
+
+ // OK, so it's neither RTF nor PDF
+ // Open a POIFSFileSystem on the (pushed back) stream
+ pis.unread(first6);
+ return new POIFSFileSystem(pis);
+ }
+
+ /**
+ * This constructor loads a Word document from an InputStream.
+ *
+ * @param istream The InputStream that contains the Word document.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in InputStream.
+ */
+ public HWPFDocumentCore(InputStream istream) throws IOException {
+ //do Ole stuff
+ this( verifyAndBuildPOIFS(istream) );
+ }
+
+ /**
+ * This constructor loads a Word document from a POIFSFileSystem
+ *
+ * @param pfilesystem The POIFSFileSystem that contains the Word document.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in POIFSFileSystem.
+ */
+ public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException {
+ this(pfilesystem.getRoot());
+ }
+
+ /**
+ * This constructor loads a Word document from a specific point
+ * in a POIFSFileSystem, probably not the default.
+ * Used typically to open embeded documents.
+ *
+ * @param directory The DirectoryNode that contains the Word document.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in POIFSFileSystem.
+ */
+ public HWPFDocumentCore(DirectoryNode directory) throws IOException {
+ // Sort out the hpsf properties
+ super(directory);
+
+ // read in the main stream.
+ _mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE);
+ _fib = new FileInformationBlock(_mainStream);
+
+ DirectoryEntry objectPoolEntry = null;
+ if (directory.hasEntry(STREAM_OBJECT_POOL)) {
+ objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL);
}
+ _objectPool = new ObjectPoolImpl(objectPoolEntry);
}
- // Create our FIB, and check for the doc being encrypted
- _fib = new FileInformationBlock(_mainStream);
+ /**
+ * For a given named property entry, either return it or null if
+ * if it wasn't found
+ *
+ * @param setName The property to read
+ * @return The value of the given property or null if it wasn't found.
+ */
+ @Override
+ protected PropertySet getPropertySet(String setName) {
+ EncryptionInfo ei;
+ try {
+ ei = getEncryptionInfo();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return (ei == null)
+ ? super.getPropertySet(setName)
+ : super.getPropertySet(setName, ei);
+ }
+
+ protected EncryptionInfo getEncryptionInfo() throws IOException {
+ if (_encryptionInfo != null) {
+ return _encryptionInfo;
+ }
+
+ // Create our FIB, and check for the doc being encrypted
+ byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN);
+ FibBase fibBase = new FibBase( fibBaseBytes, 0 );
+ if (!fibBase.isFEncrypted()) {
+ return null;
+ }
- DirectoryEntry objectPoolEntry;
- try {
- objectPoolEntry = (DirectoryEntry) directory
- .getEntry(STREAM_OBJECT_POOL);
- } catch (FileNotFoundException exc) {
- objectPoolEntry = null;
+ String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0;
+ byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey());
+ LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream);
+ EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null;
+ EncryptionInfo ei = new EncryptionInfo(leis, em);
+ Decryptor dec = ei.getDecryptor();
+ dec.setChunkSize(512);
+ try {
+ String pass = Biff8EncryptionKey.getCurrentUserPassword();
+ if (pass == null) {
+ pass = Decryptor.DEFAULT_PASSWORD;
+ }
+ if (!dec.verifyPassword(pass)) {
+ throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening");
+ }
+ } catch (GeneralSecurityException e) {
+ throw new IOException(e.getMessage(), e);
+ }
+ _encryptionInfo = ei;
+ return ei;
}
- _objectPool = new ObjectPoolImpl(objectPoolEntry);
- }
- /**
+ /**
+ * Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available,
+ * decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try
+ * to decrypt the bytes
+ *
+ * @param name the name of the stream
+ * @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption
+ * @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes
+ * @return the read bytes
+ * @throws IOException if the stream can't be found
+ */
+ protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException {
+ DirectoryNode dir = getDirectory();
+ DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name);
+ DocumentInputStream dis = dir.createDocumentInputStream(documentProps);
+ EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null;
+ int streamSize = documentProps.getSize();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len));
+
+ InputStream is = dis;
+ try {
+ if (ei != null) {
+ try {
+ Decryptor dec = ei.getDecryptor();
+ is = dec.getDataStream(dis, streamSize, 0);
+ if (encryptionOffset > 0) {
+ ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is;
+ byte plain[] = new byte[encryptionOffset];
+ cis.readPlain(plain, 0, encryptionOffset);
+ bos.write(plain);
+ }
+ } catch (GeneralSecurityException e) {
+ throw new IOException(e.getMessage(), e);
+ }
+ }
+ // This simplifies a few combinations, so we actually always try to copy len bytes
+ // regardless if encryptionOffset is greater than 0
+ if (len < Integer.MAX_VALUE) {
+ is = new BoundedInputStream(is, len);
+ }
+ IOUtils.copy(is, bos);
+ return bos.toByteArray();
+ } finally {
+ IOUtils.closeQuietly(is);
+ IOUtils.closeQuietly(dis);
+ }
+ }
+
+
+ /**
* Returns the range which covers the whole of the document, but excludes
* any headers and footers.
*/
@@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore extends POIDocument
@Internal
public abstract StringBuilder getText();
- public CHPBinTable getCharacterTable()
- {
- return _cbt;
- }
-
- public PAPBinTable getParagraphTable()
- {
- return _pbt;
- }
-
- public SectionTable getSectionTable()
- {
- return _st;
- }
-
- public StyleSheet getStyleSheet()
- {
- return _ss;
- }
-
- public ListTables getListTables()
- {
- return _lt;
- }
-
- public FontTable getFontTable()
- {
- return _ft;
- }
-
- public FileInformationBlock getFileInformationBlock()
- {
- return _fib;
- }
-
- public ObjectsPool getObjectsPool()
- {
+ public CHPBinTable getCharacterTable() {
+ return _cbt;
+ }
+
+ public PAPBinTable getParagraphTable() {
+ return _pbt;
+ }
+
+ public SectionTable getSectionTable() {
+ return _st;
+ }
+
+ public StyleSheet getStyleSheet() {
+ return _ss;
+ }
+
+ public ListTables getListTables() {
+ return _lt;
+ }
+
+ public FontTable getFontTable() {
+ return _ft;
+ }
+
+ public FileInformationBlock getFileInformationBlock() {
+ return _fib;
+ }
+
+ public ObjectsPool getObjectsPool() {
return _objectPool;
}
@@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore extends POIDocument
public byte[] getMainStream() {
return _mainStream;
}
-}
+} \ No newline at end of file
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java b/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java
new file mode 100644
index 0000000000..875fb9ec7b
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java
@@ -0,0 +1,69 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.junit.AfterClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class HWPFTestEncryption {
+ @AfterClass
+ public static void clearPass() {
+ Biff8EncryptionKey.setCurrentUserPassword(null);
+ }
+
+ @Parameter(value = 0)
+ public String file;
+
+ @Parameter(value = 1)
+ public String password;
+
+ @Parameter(value = 2)
+ public String expected;
+
+ @Parameters(name="{0}")
+ public static Collection<String[]> data() {
+ return Arrays.asList(
+ new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." },
+ new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" }
+ );
+ }
+
+ @Test
+ public void extract() throws IOException {
+ Biff8EncryptionKey.setCurrentUserPassword(password);
+ HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file);
+ WordExtractor we = new WordExtractor(docD);
+ String actual = we.getText().trim();
+ assertEquals(expected, actual);
+ we.close();
+ docD.close();
+ }
+}
diff --git a/test-data/document/password_password_cryptoapi.doc b/test-data/document/password_password_cryptoapi.doc
new file mode 100644
index 0000000000..7ef0128582
--- /dev/null
+++ b/test-data/document/password_password_cryptoapi.doc
Binary files differ
diff --git a/test-data/document/password_tika_binaryrc4.doc b/test-data/document/password_tika_binaryrc4.doc
new file mode 100644
index 0000000000..b407783d21
--- /dev/null
+++ b/test-data/document/password_tika_binaryrc4.doc
Binary files differ