diff options
-rw-r--r-- | poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java | 3 | ||||
-rw-r--r-- | poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java | 2 | ||||
-rw-r--r-- | poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java | 22 | ||||
-rw-r--r-- | poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java | 2 | ||||
-rw-r--r-- | poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java | 18 | ||||
-rw-r--r-- | test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc | bin | 0 -> 9729 bytes | |||
-rw-r--r-- | test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx | bin | 0 -> 20757 bytes | |||
-rw-r--r-- | test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt | bin | 0 -> 9728 bytes | |||
-rw-r--r-- | test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx | bin | 0 -> 1523 bytes | |||
-rw-r--r-- | test-data/spreadsheet/stress.xls | bin | 97280 -> 98816 bytes |
10 files changed, 35 insertions, 12 deletions
diff --git a/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java b/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java index e4f6e08ef7..ada90eb75d 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java +++ b/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java @@ -198,6 +198,9 @@ public abstract class POIXMLRelation { * @since 3.16-beta3 */ public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { + if (corePart == null) { + throw new IllegalArgumentException("Core-Part cannot be empty"); + } PackageRelationshipCollection prc = corePart.getRelationshipsByType(getRelation()); Iterator<PackageRelationship> it = prc.iterator(); diff --git a/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java b/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java index 2de39e3669..a648f26d9f 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java +++ b/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java @@ -229,7 +229,7 @@ public final class POIXMLExtractorFactory implements ExtractorProvider { // Grab the core document part, and try to identify from that final PackagePart corePart = pkg.getPart(core.getRelationship(0)); - final String contentType = corePart.getContentType(); + final String contentType = corePart == null ? null : corePart.getContentType(); // Is it XSSF? for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { diff --git a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java index ddd7ff68d5..8cb7579621 100644 --- a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java +++ b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java @@ -22,8 +22,6 @@ import java.lang.reflect.Method; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.dom.DOMSource; @@ -53,10 +51,11 @@ import org.w3c.dom.Document; import org.w3c.dom.Element; @Beta -public class WordToTextConverter extends AbstractWordConverter -{ +public class WordToTextConverter extends AbstractWordConverter { private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class); + private static final int MAX_NESTED_CHILD_NODES = 500; + public static String getText( DirectoryNode root ) throws Exception { final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc( root ); @@ -109,7 +108,7 @@ public class WordToTextConverter extends AbstractWordConverter serializer.transform( domSource, streamResult ); } - private static Document process( File docFile ) throws IOException, ParserConfigurationException { + private static Document process( File docFile ) throws IOException { try (final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc( docFile )) { WordToTextConverter wordToTextConverter = new WordToTextConverter( XMLHelper.newDocumentBuilder().newDocument()); @@ -118,7 +117,7 @@ public class WordToTextConverter extends AbstractWordConverter } } - private AtomicInteger noteCounters = new AtomicInteger( 1 ); + private final AtomicInteger noteCounters = new AtomicInteger( 1 ); private Element notes; @@ -130,11 +129,8 @@ public class WordToTextConverter extends AbstractWordConverter * Creates new instance of {@link WordToTextConverter}. Can be used for * output several {@link HWPFDocument}s into single text document. * - * @throws ParserConfigurationException - * if an internal {@link DocumentBuilder} cannot be created */ - public WordToTextConverter() throws ParserConfigurationException - { + public WordToTextConverter() { this.textDocumentFacade = new TextDocumentFacade( XMLHelper.newDocumentBuilder().newDocument() ); } @@ -312,6 +308,12 @@ public class WordToTextConverter extends AbstractWordConverter Element note = textDocumentFacade.createBlock(); notes.appendChild( note ); + // avoid StackOverflowException with very deeply nested files (mostly synthetic test files via fuzzing) + // if this limit is reached in real-word documents, we can make this configurable + if (note.getParentNode() != null && note.getParentNode().getChildNodes().getLength() > MAX_NESTED_CHILD_NODES) { + throw new IllegalStateException("Had more than the limit of " + MAX_NESTED_CHILD_NODES + " nested child notes"); + } + note.appendChild( textDocumentFacade.createText( "^" + noteIndex + "\t " ) ); processCharacters( wordDocument, Integer.MIN_VALUE, noteTextRange, note ); diff --git a/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java b/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java index 7848f688cd..ef146dda23 100644 --- a/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java +++ b/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java @@ -35,6 +35,7 @@ import java.util.stream.Stream; import org.apache.poi.POIDataSamples; import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException; +import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.OldPowerPointFormatException; import org.apache.poi.util.IOUtils; import org.apache.commons.io.output.NullPrintStream; @@ -65,6 +66,7 @@ public abstract class BaseTestPPTIterating { EXCLUDED.put("clusterfuzz-testcase-minimized-POIHSLFFuzzer-6710128412590080.ppt", RuntimeException.class); EXCLUDED.put("clusterfuzz-testcase-minimized-POIFuzzer-5429732352851968.ppt", FileNotFoundException.class); EXCLUDED.put("clusterfuzz-testcase-minimized-POIFuzzer-5681320547975168.ppt", FileNotFoundException.class); + EXCLUDED.put("clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt", RuntimeException.class); } public static Stream<Arguments> files() { diff --git a/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java b/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java index 74df2761a6..da4d2a3289 100644 --- a/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java +++ b/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java @@ -50,6 +50,8 @@ public final class EscherContainerRecord extends EscherRecord implements Iterabl private static final Logger LOGGER = LogManager.getLogger(EscherContainerRecord.class); + private static final int MAX_NESTED_CHILD_NODES = 1000; + /** * in case if document contains any charts we have such document structure: * BOF @@ -86,12 +88,26 @@ public final class EscherContainerRecord extends EscherRecord implements Iterabl @Override public int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory) { + return fillFields(data, pOffset, recordFactory, 0); + } + + private int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory, int nesting) { + if (nesting > MAX_NESTED_CHILD_NODES) { + throw new IllegalStateException("Had more than the limit of " + MAX_NESTED_CHILD_NODES + " nested child notes"); + } int bytesRemaining = readHeader(data, pOffset); int bytesWritten = 8; int offset = pOffset + 8; while (bytesRemaining > 0 && offset < data.length) { EscherRecord child = recordFactory.createRecord(data, offset); - int childBytesWritten = child.fillFields(data, offset, recordFactory); + + final int childBytesWritten; + if (child instanceof EscherContainerRecord) { + childBytesWritten = ((EscherContainerRecord)child).fillFields(data, offset, recordFactory, nesting + 1); + } else { + childBytesWritten = child.fillFields(data, offset, recordFactory); + } + bytesWritten += childBytesWritten; offset += childBytesWritten; bytesRemaining -= childBytesWritten; diff --git a/test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc b/test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc Binary files differnew file mode 100644 index 0000000000..add1da339f --- /dev/null +++ b/test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc diff --git a/test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx b/test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx Binary files differnew file mode 100644 index 0000000000..36132483c9 --- /dev/null +++ b/test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx diff --git a/test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt b/test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt Binary files differnew file mode 100644 index 0000000000..9bc426a2b2 --- /dev/null +++ b/test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt diff --git a/test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx b/test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx Binary files differnew file mode 100644 index 0000000000..b644793380 --- /dev/null +++ b/test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls Binary files differindex 0c0044cfc4..5841042e61 100644 --- a/test-data/spreadsheet/stress.xls +++ b/test-data/spreadsheet/stress.xls |