Decryption for Binary RC4 and CryptoAPI (... XOR is missing) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1797837 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_17_BETA1
@@ -195,7 +195,7 @@ public abstract class POIDocument implements Closeable { | |||
NPOIFSFileSystem encPoifs = null; | |||
String step = "getting"; | |||
try { | |||
if (encryptionInfo != null) { | |||
if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) { | |||
step = "getting encrypted"; | |||
String encryptedStream = null; | |||
for (String s : encryptedStreamNames) { |
@@ -32,7 +32,11 @@ public final class Biff8EncryptionKey { | |||
* @param password pass <code>null</code> to clear user password (and use default) | |||
*/ | |||
public static void setCurrentUserPassword(String password) { | |||
_userPasswordTLS.set(password); | |||
if (password == null) { | |||
_userPasswordTLS.remove(); | |||
} else { | |||
_userPasswordTLS.set(password); | |||
} | |||
} | |||
/** |
@@ -122,8 +122,11 @@ public class EncryptionInfo implements Cloneable { | |||
} else if ( | |||
2 <= versionMajor && versionMajor <= 4 | |||
&& versionMinor == 2) { | |||
encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard; | |||
encryptionFlags = dis.readInt(); | |||
encryptionMode = ( | |||
preferredEncryptionMode == cryptoAPI | |||
|| !flagAES.isSet(encryptionFlags)) | |||
? cryptoAPI : standard; | |||
} else if ( | |||
versionMajor == agile.versionMajor | |||
&& versionMinor == agile.versionMinor){ | |||
@@ -268,6 +271,14 @@ public class EncryptionInfo implements Cloneable { | |||
return encryptionMode; | |||
} | |||
/** | |||
* @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream, | |||
* otherwise the Summaries aren't encrypted and located in their usual streams | |||
*/ | |||
public boolean isDocPropsEncrypted() { | |||
return !flagDocProps.isSet(getEncryptionFlags()); | |||
} | |||
@Override | |||
public EncryptionInfo clone() throws CloneNotSupportedException { | |||
EncryptionInfo other = (EncryptionInfo)super.clone(); |
@@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable { | |||
super(stream, size, chunkSize); | |||
} | |||
public BinaryRC4CipherInputStream(InputStream stream) | |||
public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos) | |||
throws GeneralSecurityException { | |||
super(stream, Integer.MAX_VALUE, chunkSize); | |||
super(stream, size, chunkSize, initialPos); | |||
} | |||
} | |||
@@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable { | |||
@Override | |||
public InputStream getDataStream(InputStream stream, int size, int initialPos) | |||
throws IOException, GeneralSecurityException { | |||
return new BinaryRC4CipherInputStream(stream); | |||
return new BinaryRC4CipherInputStream(stream, size, initialPos); | |||
} | |||
@@ -18,6 +18,7 @@ | |||
package org.apache.poi.hwpf; | |||
import java.io.ByteArrayInputStream; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.File; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
@@ -25,9 +26,29 @@ import java.io.OutputStream; | |||
import org.apache.poi.hpsf.DocumentSummaryInformation; | |||
import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.model.*; | |||
import org.apache.poi.hwpf.model.BookmarksTables; | |||
import org.apache.poi.hwpf.model.CHPBinTable; | |||
import org.apache.poi.hwpf.model.ComplexFileTable; | |||
import org.apache.poi.hwpf.model.DocumentProperties; | |||
import org.apache.poi.hwpf.model.EscherRecordHolder; | |||
import org.apache.poi.hwpf.model.FSPADocumentPart; | |||
import org.apache.poi.hwpf.model.FSPATable; | |||
import org.apache.poi.hwpf.model.FieldsTables; | |||
import org.apache.poi.hwpf.model.FontTable; | |||
import org.apache.poi.hwpf.model.ListTables; | |||
import org.apache.poi.hwpf.model.NoteType; | |||
import org.apache.poi.hwpf.model.NotesTables; | |||
import org.apache.poi.hwpf.model.PAPBinTable; | |||
import org.apache.poi.hwpf.model.PicturesTable; | |||
import org.apache.poi.hwpf.model.RevisionMarkAuthorTable; | |||
import org.apache.poi.hwpf.model.SavedByTable; | |||
import org.apache.poi.hwpf.model.SectionTable; | |||
import org.apache.poi.hwpf.model.SinglentonTextPiece; | |||
import org.apache.poi.hwpf.model.StyleSheet; | |||
import org.apache.poi.hwpf.model.SubdocumentType; | |||
import org.apache.poi.hwpf.model.TextPiece; | |||
import org.apache.poi.hwpf.model.TextPieceTable; | |||
import org.apache.poi.hwpf.model.io.HWPFFileSystem; | |||
import org.apache.poi.hwpf.model.io.HWPFOutputStream; | |||
import org.apache.poi.hwpf.usermodel.Bookmarks; | |||
import org.apache.poi.hwpf.usermodel.BookmarksImpl; | |||
import org.apache.poi.hwpf.usermodel.Field; | |||
@@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.OfficeDrawings; | |||
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl; | |||
import org.apache.poi.hwpf.usermodel.Range; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
import org.apache.poi.poifs.filesystem.EntryUtils; | |||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.Internal; | |||
/** | |||
@@ -59,8 +79,6 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable"; | |||
private static final String STREAM_DATA = "Data"; | |||
private static final String STREAM_TABLE_0 = "0Table"; | |||
private static final String STREAM_TABLE_1 = "1Table"; | |||
/** table stream buffer*/ | |||
protected byte[] _tableStream; | |||
@@ -178,11 +196,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
} | |||
// use the fib to determine the name of the table stream. | |||
String name = STREAM_TABLE_0; | |||
if (_fib.getFibBase().isFWhichTblStm()) | |||
{ | |||
name = STREAM_TABLE_1; | |||
} | |||
String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0; | |||
// Grab the table stream. | |||
if (!directory.hasEntry(name)) { | |||
@@ -190,25 +204,12 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
} | |||
// read in the table stream. | |||
InputStream is = directory.createDocumentInputStream(name); | |||
_tableStream = IOUtils.toByteArray(is); | |||
is.close(); | |||
_tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE); | |||
_fib.fillVariableFields(_mainStream, _tableStream); | |||
// read in the data stream. | |||
InputStream dis = null; | |||
try { | |||
DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA); | |||
dis = directory.createDocumentInputStream(STREAM_DATA); | |||
_dataStream = IOUtils.toByteArray(dis, dataProps.getSize()); | |||
} catch(IOException e) { | |||
_dataStream = new byte[0]; | |||
} finally { | |||
if (dis != null) { | |||
dis.close(); | |||
} | |||
} | |||
_dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0]; | |||
// Get the cp of the start of text in the main stream | |||
// The latest spec doc says this is always zero! | |||
@@ -233,8 +234,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
*/ | |||
boolean preserveBinTables = false; | |||
try { | |||
preserveBinTables = Boolean.parseBoolean( System | |||
.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) ); | |||
preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) ); | |||
} catch ( Exception exc ) { | |||
// ignore; | |||
} | |||
@@ -250,8 +250,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
*/ | |||
boolean preserveTextTable = false; | |||
try { | |||
preserveTextTable = Boolean.parseBoolean( System | |||
.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) ); | |||
preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) ); | |||
} catch ( Exception exc ) { | |||
// ignore; | |||
} | |||
@@ -612,8 +611,8 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException { | |||
// initialize our streams for writing. | |||
HWPFFileSystem docSys = new HWPFFileSystem(); | |||
HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT); | |||
HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1); | |||
ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT); | |||
ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1); | |||
//HWPFOutputStream dataStream = docSys.getStream("Data"); | |||
int tableOffset = 0; | |||
@@ -630,13 +629,13 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
// it after we write everything else. | |||
byte[] placeHolder = new byte[fibSize]; | |||
wordDocumentStream.write(placeHolder); | |||
int mainOffset = wordDocumentStream.getOffset(); | |||
int mainOffset = wordDocumentStream.size(); | |||
// write out the StyleSheet. | |||
_fib.setFcStshf(tableOffset); | |||
_ss.writeTo(tableStream); | |||
_fib.setLcbStshf(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
_fib.setLcbStshf(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
// get fcMin and fcMac because we will be writing the actual text with the | |||
// complex table. | |||
@@ -654,9 +653,9 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
// write out the Complex table, includes text. | |||
_fib.setFcClx(tableOffset); | |||
_cft.writeTo(wordDocumentStream, tableStream); | |||
_fib.setLcbClx(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
int fcMac = wordDocumentStream.getOffset(); | |||
_fib.setLcbClx(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
int fcMac = wordDocumentStream.size(); | |||
/* | |||
* dop (document properties record) Written immediately after the end of | |||
@@ -670,8 +669,8 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
// write out the DocumentProperties. | |||
_fib.setFcDop(tableOffset); | |||
_dop.writeTo(tableStream); | |||
_fib.setLcbDop(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
_fib.setLcbDop(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
/* | |||
* plcfBkmkf (table recording beginning CPs of bookmarks) Written | |||
@@ -683,7 +682,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
if ( _bookmarksTables != null ) | |||
{ | |||
_bookmarksTables.writePlcfBkmkf( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
/* | |||
@@ -696,7 +695,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
if ( _bookmarksTables != null ) | |||
{ | |||
_bookmarksTables.writePlcfBkmkl( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
/* | |||
@@ -710,8 +709,8 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
// write out the CHPBinTable. | |||
_fib.setFcPlcfbteChpx(tableOffset); | |||
_cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable()); | |||
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
_fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
/* | |||
* plcfbtePapx (bin table for PAP FKPs) Written immediately after the | |||
@@ -724,8 +723,8 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
// write out the PAPBinTable. | |||
_fib.setFcPlcfbtePapx(tableOffset); | |||
_pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable()); | |||
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
_fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
/* | |||
* plcfendRef (endnote reference position table) Written immediately | |||
@@ -739,7 +738,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
*/ | |||
_endnotesTables.writeRef( _fib, tableStream ); | |||
_endnotesTables.writeTxt( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
/* | |||
* plcffld*** (table of field positions and statuses for annotation | |||
@@ -753,7 +752,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
if ( _fieldsTables != null ) | |||
{ | |||
_fieldsTables.write( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
/* | |||
@@ -768,7 +767,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
*/ | |||
_footnotesTables.writeRef( _fib, tableStream ); | |||
_footnotesTables.writeTxt( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
/* | |||
* plcfsed (section table) Written immediately after the previously | |||
@@ -781,8 +780,8 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
// write out the SectionTable. | |||
_fib.setFcPlcfsed(tableOffset); | |||
_st.writeTo(wordDocumentStream, tableStream); | |||
_fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
_fib.setLcbPlcfsed(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
// write out the list tables | |||
if ( _lt != null ) | |||
@@ -800,7 +799,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
* Specification; Page 25 of 210 | |||
*/ | |||
_lt.writeListDataTo( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
/* | |||
* plflfo (more list formats) Written immediately after the end of | |||
@@ -814,7 +813,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
* Specification; Page 26 of 210 | |||
*/ | |||
_lt.writeListOverridesTo( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
/* | |||
@@ -827,7 +826,7 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
if ( _bookmarksTables != null ) | |||
{ | |||
_bookmarksTables.writeSttbfBkmk( _fib, tableStream ); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
/* | |||
@@ -843,9 +842,9 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
{ | |||
_fib.setFcSttbSavedBy(tableOffset); | |||
_sbt.writeTo(tableStream); | |||
_fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset); | |||
_fib.setLcbSttbSavedBy(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
// write out the revision mark authors table. | |||
@@ -853,21 +852,21 @@ public final class HWPFDocument extends HWPFDocumentCore { | |||
{ | |||
_fib.setFcSttbfRMark(tableOffset); | |||
_rmat.writeTo(tableStream); | |||
_fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset); | |||
_fib.setLcbSttbfRMark(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
tableOffset = tableStream.size(); | |||
} | |||
// write out the FontTable. | |||
_fib.setFcSttbfffn(tableOffset); | |||
_ft.writeTo(tableStream); | |||
_fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset); | |||
tableOffset = tableStream.getOffset(); | |||
_fib.setLcbSttbfffn(tableStream.size() - tableOffset); | |||
tableOffset = tableStream.size(); | |||
// set some variables in the FileInformationBlock. | |||
_fib.getFibBase().setFcMin(fcMin); | |||
_fib.getFibBase().setFcMac(fcMac); | |||
_fib.setCbMac(wordDocumentStream.getOffset()); | |||
_fib.setCbMac(wordDocumentStream.size()); | |||
// make sure that the table, doc and data streams use big blocks. | |||
byte[] mainBuf = wordDocumentStream.toByteArray(); |
@@ -17,13 +17,19 @@ | |||
package org.apache.poi.hwpf; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.FileNotFoundException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.PushbackInputStream; | |||
import java.security.GeneralSecurityException; | |||
import org.apache.poi.EncryptedDocumentException; | |||
import org.apache.poi.POIDocument; | |||
import org.apache.poi.hpsf.PropertySet; | |||
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; | |||
import org.apache.poi.hwpf.model.CHPBinTable; | |||
import org.apache.poi.hwpf.model.FibBase; | |||
import org.apache.poi.hwpf.model.FileInformationBlock; | |||
import org.apache.poi.hwpf.model.FontTable; | |||
import org.apache.poi.hwpf.model.ListTables; | |||
@@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPieceTable; | |||
import org.apache.poi.hwpf.usermodel.ObjectPoolImpl; | |||
import org.apache.poi.hwpf.usermodel.ObjectsPool; | |||
import org.apache.poi.hwpf.usermodel.Range; | |||
import org.apache.poi.poifs.crypt.ChunkedCipherInputStream; | |||
import org.apache.poi.poifs.crypt.Decryptor; | |||
import org.apache.poi.poifs.crypt.EncryptionInfo; | |||
import org.apache.poi.poifs.crypt.EncryptionMode; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.util.BoundedInputStream; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndianByteArrayInputStream; | |||
/** | |||
* This class holds much of the core of a Word document, but | |||
* without some of the table structure information. | |||
* You generally want to work with one of | |||
* {@link HWPFDocument} or {@link HWPFOldDocument} | |||
* {@link HWPFDocument} or {@link HWPFOldDocument} | |||
*/ | |||
public abstract class HWPFDocumentCore extends POIDocument | |||
{ | |||
public abstract class HWPFDocumentCore extends POIDocument { | |||
protected static final String STREAM_OBJECT_POOL = "ObjectPool"; | |||
protected static final String STREAM_WORD_DOCUMENT = "WordDocument"; | |||
protected static final String STREAM_TABLE_0 = "0Table"; | |||
protected static final String STREAM_TABLE_1 = "1Table"; | |||
/** Holds OLE2 objects */ | |||
protected ObjectPoolImpl _objectPool; | |||
/** The FIB */ | |||
protected FileInformationBlock _fib; | |||
/** Holds styles for this document.*/ | |||
protected StyleSheet _ss; | |||
/** Contains formatting properties for text*/ | |||
protected CHPBinTable _cbt; | |||
/** Contains formatting properties for paragraphs*/ | |||
protected PAPBinTable _pbt; | |||
/** Contains formatting properties for sections.*/ | |||
protected SectionTable _st; | |||
/** Holds fonts for this document.*/ | |||
protected FontTable _ft; | |||
/** Hold list tables */ | |||
protected ListTables _lt; | |||
/** main document stream buffer*/ | |||
protected byte[] _mainStream; | |||
protected HWPFDocumentCore() | |||
{ | |||
super((DirectoryNode)null); | |||
} | |||
/** | |||
* Takes an InputStream, verifies that it's not RTF or PDF, builds a | |||
* POIFSFileSystem from it, and returns that. | |||
*/ | |||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { | |||
// Open a PushbackInputStream, so we can peek at the first few bytes | |||
PushbackInputStream pis = new PushbackInputStream(istream,6); | |||
byte[] first6 = IOUtils.toByteArray(pis, 6); | |||
// Does it start with {\rtf ? If so, it's really RTF | |||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' | |||
&& first6[3] == 't' && first6[4] == 'f') { | |||
throw new IllegalArgumentException("The document is really a RTF file"); | |||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { | |||
throw new IllegalArgumentException("The document is really a PDF file"); | |||
} | |||
// OK, so it's neither RTF nor PDF | |||
// Open a POIFSFileSystem on the (pushed back) stream | |||
pis.unread(first6); | |||
return new POIFSFileSystem(pis); | |||
} | |||
/** | |||
* This constructor loads a Word document from an InputStream. | |||
* | |||
* @param istream The InputStream that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in InputStream. | |||
*/ | |||
public HWPFDocumentCore(InputStream istream) throws IOException | |||
{ | |||
//do Ole stuff | |||
this( verifyAndBuildPOIFS(istream) ); | |||
} | |||
/** | |||
* This constructor loads a Word document from a POIFSFileSystem | |||
* | |||
* @param pfilesystem The POIFSFileSystem that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in POIFSFileSystem. | |||
*/ | |||
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException | |||
{ | |||
this(pfilesystem.getRoot()); | |||
} | |||
/** | |||
* This constructor loads a Word document from a specific point | |||
* in a POIFSFileSystem, probably not the default. | |||
* Used typically to open embeded documents. | |||
* | |||
* @param directory The DirectoryNode that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in POIFSFileSystem. | |||
*/ | |||
public HWPFDocumentCore(DirectoryNode directory) throws IOException { | |||
// Sort out the hpsf properties | |||
super(directory); | |||
// read in the main stream. | |||
DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument"); | |||
DocumentInputStream dis = null; | |||
try { | |||
dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT); | |||
_mainStream = IOUtils.toByteArray(dis, documentProps.getSize()); | |||
} finally { | |||
if (dis != null) { | |||
dis.close(); | |||
private static final int FIB_BASE_LEN = 68; | |||
/** Holds OLE2 objects */ | |||
protected ObjectPoolImpl _objectPool; | |||
/** The FIB */ | |||
protected FileInformationBlock _fib; | |||
/** Holds styles for this document.*/ | |||
protected StyleSheet _ss; | |||
/** Contains formatting properties for text*/ | |||
protected CHPBinTable _cbt; | |||
/** Contains formatting properties for paragraphs*/ | |||
protected PAPBinTable _pbt; | |||
/** Contains formatting properties for sections.*/ | |||
protected SectionTable _st; | |||
/** Holds fonts for this document.*/ | |||
protected FontTable _ft; | |||
/** Hold list tables */ | |||
protected ListTables _lt; | |||
/** main document stream buffer*/ | |||
protected byte[] _mainStream; | |||
private EncryptionInfo _encryptionInfo; | |||
protected HWPFDocumentCore() { | |||
super((DirectoryNode)null); | |||
} | |||
/** | |||
* Takes an InputStream, verifies that it's not RTF or PDF, builds a | |||
* POIFSFileSystem from it, and returns that. | |||
*/ | |||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { | |||
// Open a PushbackInputStream, so we can peek at the first few bytes | |||
PushbackInputStream pis = new PushbackInputStream(istream,6); | |||
byte[] first6 = IOUtils.toByteArray(pis, 6); | |||
// Does it start with {\rtf ? If so, it's really RTF | |||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' | |||
&& first6[3] == 't' && first6[4] == 'f') { | |||
throw new IllegalArgumentException("The document is really a RTF file"); | |||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { | |||
throw new IllegalArgumentException("The document is really a PDF file"); | |||
} | |||
// OK, so it's neither RTF nor PDF | |||
// Open a POIFSFileSystem on the (pushed back) stream | |||
pis.unread(first6); | |||
return new POIFSFileSystem(pis); | |||
} | |||
/** | |||
* This constructor loads a Word document from an InputStream. | |||
* | |||
* @param istream The InputStream that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in InputStream. | |||
*/ | |||
public HWPFDocumentCore(InputStream istream) throws IOException { | |||
//do Ole stuff | |||
this( verifyAndBuildPOIFS(istream) ); | |||
} | |||
/** | |||
* This constructor loads a Word document from a POIFSFileSystem | |||
* | |||
* @param pfilesystem The POIFSFileSystem that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in POIFSFileSystem. | |||
*/ | |||
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException { | |||
this(pfilesystem.getRoot()); | |||
} | |||
/** | |||
* This constructor loads a Word document from a specific point | |||
* in a POIFSFileSystem, probably not the default. | |||
* Used typically to open embeded documents. | |||
* | |||
* @param directory The DirectoryNode that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in POIFSFileSystem. | |||
*/ | |||
public HWPFDocumentCore(DirectoryNode directory) throws IOException { | |||
// Sort out the hpsf properties | |||
super(directory); | |||
// read in the main stream. | |||
_mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE); | |||
_fib = new FileInformationBlock(_mainStream); | |||
DirectoryEntry objectPoolEntry = null; | |||
if (directory.hasEntry(STREAM_OBJECT_POOL)) { | |||
objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL); | |||
} | |||
_objectPool = new ObjectPoolImpl(objectPoolEntry); | |||
} | |||
// Create our FIB, and check for the doc being encrypted | |||
_fib = new FileInformationBlock(_mainStream); | |||
/** | |||
* For a given named property entry, either return it or null if | |||
* if it wasn't found | |||
* | |||
* @param setName The property to read | |||
* @return The value of the given property or null if it wasn't found. | |||
*/ | |||
@Override | |||
protected PropertySet getPropertySet(String setName) { | |||
EncryptionInfo ei; | |||
try { | |||
ei = getEncryptionInfo(); | |||
} catch (IOException e) { | |||
throw new RuntimeException(e); | |||
} | |||
return (ei == null) | |||
? super.getPropertySet(setName) | |||
: super.getPropertySet(setName, ei); | |||
} | |||
protected EncryptionInfo getEncryptionInfo() throws IOException { | |||
if (_encryptionInfo != null) { | |||
return _encryptionInfo; | |||
} | |||
// Create our FIB, and check for the doc being encrypted | |||
byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN); | |||
FibBase fibBase = new FibBase( fibBaseBytes, 0 ); | |||
if (!fibBase.isFEncrypted()) { | |||
return null; | |||
} | |||
DirectoryEntry objectPoolEntry; | |||
try { | |||
objectPoolEntry = (DirectoryEntry) directory | |||
.getEntry(STREAM_OBJECT_POOL); | |||
} catch (FileNotFoundException exc) { | |||
objectPoolEntry = null; | |||
String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0; | |||
byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey()); | |||
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream); | |||
EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null; | |||
EncryptionInfo ei = new EncryptionInfo(leis, em); | |||
Decryptor dec = ei.getDecryptor(); | |||
dec.setChunkSize(512); | |||
try { | |||
String pass = Biff8EncryptionKey.getCurrentUserPassword(); | |||
if (pass == null) { | |||
pass = Decryptor.DEFAULT_PASSWORD; | |||
} | |||
if (!dec.verifyPassword(pass)) { | |||
throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening"); | |||
} | |||
} catch (GeneralSecurityException e) { | |||
throw new IOException(e.getMessage(), e); | |||
} | |||
_encryptionInfo = ei; | |||
return ei; | |||
} | |||
_objectPool = new ObjectPoolImpl(objectPoolEntry); | |||
} | |||
/** | |||
/** | |||
* Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available, | |||
* decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try | |||
* to decrypt the bytes | |||
* | |||
* @param name the name of the stream | |||
* @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption | |||
* @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes | |||
* @return the read bytes | |||
* @throws IOException if the stream can't be found | |||
*/ | |||
protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException { | |||
DirectoryNode dir = getDirectory(); | |||
DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name); | |||
DocumentInputStream dis = dir.createDocumentInputStream(documentProps); | |||
EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null; | |||
int streamSize = documentProps.getSize(); | |||
ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len)); | |||
InputStream is = dis; | |||
try { | |||
if (ei != null) { | |||
try { | |||
Decryptor dec = ei.getDecryptor(); | |||
is = dec.getDataStream(dis, streamSize, 0); | |||
if (encryptionOffset > 0) { | |||
ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is; | |||
byte plain[] = new byte[encryptionOffset]; | |||
cis.readPlain(plain, 0, encryptionOffset); | |||
bos.write(plain); | |||
} | |||
} catch (GeneralSecurityException e) { | |||
throw new IOException(e.getMessage(), e); | |||
} | |||
} | |||
// This simplifies a few combinations, so we actually always try to copy len bytes | |||
// regardless if encryptionOffset is greater than 0 | |||
if (len < Integer.MAX_VALUE) { | |||
is = new BoundedInputStream(is, len); | |||
} | |||
IOUtils.copy(is, bos); | |||
return bos.toByteArray(); | |||
} finally { | |||
IOUtils.closeQuietly(is); | |||
IOUtils.closeQuietly(dis); | |||
} | |||
} | |||
/** | |||
* Returns the range which covers the whole of the document, but excludes | |||
* any headers and footers. | |||
*/ | |||
@@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore extends POIDocument | |||
@Internal | |||
public abstract StringBuilder getText(); | |||
public CHPBinTable getCharacterTable() | |||
{ | |||
return _cbt; | |||
} | |||
public PAPBinTable getParagraphTable() | |||
{ | |||
return _pbt; | |||
} | |||
public SectionTable getSectionTable() | |||
{ | |||
return _st; | |||
} | |||
public StyleSheet getStyleSheet() | |||
{ | |||
return _ss; | |||
} | |||
public ListTables getListTables() | |||
{ | |||
return _lt; | |||
} | |||
public FontTable getFontTable() | |||
{ | |||
return _ft; | |||
} | |||
public FileInformationBlock getFileInformationBlock() | |||
{ | |||
return _fib; | |||
} | |||
public ObjectsPool getObjectsPool() | |||
{ | |||
public CHPBinTable getCharacterTable() { | |||
return _cbt; | |||
} | |||
public PAPBinTable getParagraphTable() { | |||
return _pbt; | |||
} | |||
public SectionTable getSectionTable() { | |||
return _st; | |||
} | |||
public StyleSheet getStyleSheet() { | |||
return _ss; | |||
} | |||
public ListTables getListTables() { | |||
return _lt; | |||
} | |||
public FontTable getFontTable() { | |||
return _ft; | |||
} | |||
public FileInformationBlock getFileInformationBlock() { | |||
return _fib; | |||
} | |||
public ObjectsPool getObjectsPool() { | |||
return _objectPool; | |||
} | |||
@@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore extends POIDocument | |||
public byte[] getMainStream() { | |||
return _mainStream; | |||
} | |||
} | |||
} |
@@ -0,0 +1,69 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.hwpf; | |||
import static org.junit.Assert.assertEquals; | |||
import java.io.IOException; | |||
import java.util.Arrays; | |||
import java.util.Collection; | |||
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; | |||
import org.apache.poi.hwpf.extractor.WordExtractor; | |||
import org.junit.AfterClass; | |||
import org.junit.Test; | |||
import org.junit.runner.RunWith; | |||
import org.junit.runners.Parameterized; | |||
import org.junit.runners.Parameterized.Parameter; | |||
import org.junit.runners.Parameterized.Parameters; | |||
@RunWith(Parameterized.class) | |||
public class HWPFTestEncryption { | |||
@AfterClass | |||
public static void clearPass() { | |||
Biff8EncryptionKey.setCurrentUserPassword(null); | |||
} | |||
@Parameter(value = 0) | |||
public String file; | |||
@Parameter(value = 1) | |||
public String password; | |||
@Parameter(value = 2) | |||
public String expected; | |||
@Parameters(name="{0}") | |||
public static Collection<String[]> data() { | |||
return Arrays.asList( | |||
new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." }, | |||
new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" } | |||
); | |||
} | |||
@Test | |||
public void extract() throws IOException { | |||
Biff8EncryptionKey.setCurrentUserPassword(password); | |||
HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file); | |||
WordExtractor we = new WordExtractor(docD); | |||
String actual = we.getText().trim(); | |||
assertEquals(expected, actual); | |||
we.close(); | |||
docD.close(); | |||
} | |||
} |