aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java30
-rw-r--r--poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java53
2 files changed, 68 insertions, 15 deletions
diff --git a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
index 3377c8cd9c..c20d39ff66 100644
--- a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
+++ b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
@@ -20,56 +20,56 @@ import java.util.Arrays;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import static java.lang.Integer.toHexString;
import static org.apache.logging.log4j.util.Unbox.box;
-public class NilPICFAndBinData
-{
-
+public class NilPICFAndBinData {
private static final Logger LOGGER = LogManager.getLogger(NilPICFAndBinData.class);
+ // limit the default maximum length of the allocated fields
+ private static final int MAX_SIZE = 100_000;
+
private byte[] _binData;
- public NilPICFAndBinData( byte[] data, int offset )
- {
+ public NilPICFAndBinData( byte[] data, int offset ) {
fillFields( data, offset );
}
- public void fillFields( byte[] data, int offset )
- {
+ public void fillFields( byte[] data, int offset ) {
int lcb = LittleEndian.getInt( data, offset );
int cbHeader = LittleEndian.getUShort( data, offset
+ LittleEndianConsts.INT_SIZE );
- if ( cbHeader != 0x44 )
- {
+ if ( cbHeader != 0x44 ) {
LOGGER.atWarn().log("NilPICFAndBinData at offset {} cbHeader 0x{} != 0x44", box(offset), toHexString(cbHeader));
}
+ // make sure these do not cause OOM if passed as invalid or extremely large values
+ IOUtils.safelyAllocateCheck(lcb, MAX_SIZE);
+ IOUtils.safelyAllocateCheck(cbHeader, MAX_SIZE);
+
// skip the 62 ignored bytes
int binaryLength = lcb - cbHeader;
this._binData = Arrays.copyOfRange(data, offset + cbHeader,
offset + cbHeader + binaryLength);
}
- public byte[] getBinData()
- {
+ public byte[] getBinData() {
return _binData;
}
- public byte[] serialize()
- {
+ public byte[] serialize() {
byte[] bs = new byte[_binData.length + 0x44];
LittleEndian.putInt( bs, 0, _binData.length + 0x44 );
System.arraycopy( _binData, 0, bs, 0x44, _binData.length );
return bs;
}
- public int serialize( byte[] data, int offset )
- {
+ public int serialize( byte[] data, int offset ) {
LittleEndian.putInt( data, offset, _binData.length + 0x44 );
System.arraycopy( _binData, 0, data, offset + 0x44, _binData.length );
return 0x44 + _binData.length;
diff --git a/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java b/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
index 9ffea802cf..64d89486d0 100644
--- a/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
+++ b/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
@@ -20,10 +20,29 @@ import static org.apache.poi.hwpf.HWPFTestDataSamples.openSampleFile;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FilenameFilter;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.stream.Stream;
+
+import org.apache.commons.io.filefilter.SuffixFileFilter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.util.RecordFormatException;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
public class TestWordToTextConverter {
+ private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class);
/**
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
@@ -60,4 +79,38 @@ public class TestWordToTextConverter {
assertNotNull(WordToTextConverter.getText(doc));
}
}
+
+ @ParameterizedTest
+ @MethodSource("files")
+ void testAllFiles(File file) throws Exception {
+ LOG.info("Testing " + file);
+ try (FileInputStream stream = new FileInputStream(file)) {
+ InputStream is = FileMagic.prepareToCheckMagic(stream);
+ FileMagic fm = FileMagic.valueOf(is);
+
+ if (fm != FileMagic.OLE2) {
+ LOG.info("Skip non-doc file " + file);
+
+ return;
+ }
+
+ try (HWPFDocument doc = new HWPFDocument(is)) {
+ String foundText = WordToTextConverter.getText(doc);
+ assertNotNull(foundText);
+ } catch (OldWordFileFormatException | EncryptedDocumentException | RecordFormatException e) {
+ // ignored here
+ }
+ }
+ }
+
+ public static Stream<Arguments> files() {
+ String dataDirName = System.getProperty(POIDataSamples.TEST_PROPERTY,
+ new File("test-data").exists() ? "test-data" : "../test-data");
+
+ File[] documents = new File(dataDirName, "document").listFiles(
+ (FilenameFilter) new SuffixFileFilter(".doc"));
+ assertNotNull(documents);
+
+ return Arrays.stream(documents).map(Arguments::of);
+ }
}