From a9aad913bf20cad7d120b3ee64e661def0f02289 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 30 Oct 2015 17:26:37 +0000 Subject: [PATCH] Add a limit of the max number of characters that can be extracted to avoid sending applications out of memory with very large documents git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1711520 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/POIXMLTextExtractor.java | 15 +++++++++ .../poi/openxml4j/util/ZipSecureFile.java | 31 +++++++++++++++++++ .../XSSFEventBasedExcelExtractor.java | 3 ++ .../xssf/extractor/XSSFExcelExtractor.java | 22 ++++++++----- 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java index 705bf42a6b..a0f4359287 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java @@ -23,6 +23,7 @@ import org.apache.poi.POIXMLProperties.CoreProperties; import org.apache.poi.POIXMLProperties.CustomProperties; import org.apache.poi.POIXMLProperties.ExtendedProperties; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.util.ZipSecureFile; public abstract class POIXMLTextExtractor extends POITextExtractor { /** The POIXMLDocument that's open */ @@ -88,4 +89,18 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { } super.close(); } + + protected void checkMaxTextSize(StringBuffer text, String string) { + if(string == null) { + return; + } + + int size = text.length() + string.length(); + if(size > ZipSecureFile.getMaxTextSize()) { + throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. " + + "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. " + + "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. " + + "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize()); + } + } } diff --git a/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java b/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java index b06ddcb25a..e3b899373e 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java @@ -50,6 +50,9 @@ public class ZipSecureFile extends ZipFile { // don't alert for expanded sizes smaller than 100k private static long GRACE_ENTRY_SIZE = 100*1024; + // The default maximum size of extracted text + private static long MAX_TEXT_SIZE = 10*1024*1024; + /** * Sets the ratio between de- and inflated bytes to detect zipbomb. * It defaults to 1% (= 0.01d), i.e. when the compression is better than @@ -100,6 +103,34 @@ public class ZipSecureFile extends ZipFile { return MAX_ENTRY_SIZE; } + /** + * Sets the maximum number of characters of text that are + * extracted before an exception is thrown during extracting + * text from documents. + * + * This can be used to limit memory consumption and protect against + * security vulnerabilities when documents are provided by users. + * + * @param maxTextSize the max. file size of a single zip entry + */ + public static void setMaxTextSize(long maxTextSize) { + if (maxTextSize < 0 || maxTextSize > 0xFFFFFFFFl) { + throw new IllegalArgumentException("Max text size is bounded [0-4GB]."); + } + MAX_TEXT_SIZE = maxTextSize; + } + + /** + * Returns the current maximum allowed text size. + * + * See setMaxTextSize() for details. + * + * @return The max accepted text size. + */ + public static long getMaxTextSize() { + return MAX_TEXT_SIZE; + } + public ZipSecureFile(File file, int mode) throws IOException { super(file, mode); } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index c52bed687d..18db97f433 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -283,11 +283,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor output.append('\t'); } if (formattedValue != null) { + checkMaxTextSize(output, formattedValue); output.append(formattedValue); } if (includeCellComments && comment != null) { String commentText = comment.getString().getString().replace('\n', ' '); output.append(formattedValue != null ? " Comment by " : "Comment by "); + checkMaxTextSize(output, commentText); if (commentText.startsWith(comment.getAuthor() + ": ")) { output.append(commentText); } else { @@ -363,6 +365,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor * Append the cell contents we have collected. */ private void appendCellText(StringBuffer buffer) { + checkMaxTextSize(buffer, output.toString()); buffer.append(output); } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index 39ef5be8a4..c66ad71b37 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -168,7 +168,9 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor // Is it a formula one? if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) { if (formulasNotResults) { - text.append(cell.getCellFormula()); + String contents = cell.getCellFormula(); + checkMaxTextSize(text, contents); + text.append(contents); } else { if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) { handleStringCell(text, cell); @@ -188,6 +190,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor // Replace any newlines with spaces, otherwise it // breaks the output String commentText = comment.getString().getString().replace('\n', ' '); + checkMaxTextSize(text, commentText); text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText); } @@ -230,8 +233,11 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor } private void handleStringCell(StringBuffer text, Cell cell) { - text.append(cell.getRichStringCellValue().getString()); + String contents = cell.getRichStringCellValue().getString(); + checkMaxTextSize(text, contents); + text.append(contents); } + private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) { int type = cell.getCellType(); if (type == Cell.CELL_TYPE_FORMULA) { @@ -242,16 +248,18 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor CellStyle cs = cell.getCellStyle(); if (cs != null && cs.getDataFormatString() != null) { - text.append(formatter.formatRawCellContents( - cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString() - )); + String contents = formatter.formatRawCellContents( + cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString()); + checkMaxTextSize(text, contents); + text.append(contents); return; } } // No supported styling applies to this cell - XSSFCell xcell = (XSSFCell)cell; - text.append( xcell.getRawValue() ); + String contents = ((XSSFCell)cell).getRawValue(); + checkMaxTextSize(text, contents); + text.append( contents ); } private String extractHeaderFooter(HeaderFooter hf) { -- 2.39.5