aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2024-07-15 05:41:04 +0000
committerDominik Stadler <centic@apache.org>2024-07-15 05:41:04 +0000
commite2044c958b2103c3534e5211fb4ed452c3bb758a (patch)
tree0f6904c2129f19ba9cf544f586450addc88aa370
parent5085e3d1b2af1ac20e6158b0a00797a0d3b6f2ba (diff)
downloadpoi-e2044c958b2103c3534e5211fb4ed452c3bb758a.tar.gz
poi-e2044c958b2103c3534e5211fb4ed452c3bb758a.zip
Bug 66425: Avoid exceptions found via poi-fuzz
Prevent too much memory usage Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=67413 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1919237 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java1
-rw-r--r--poi-integration/src/test/resources/log4j2-test.xml1
-rw-r--r--poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordUtils.java279
-rw-r--r--test-data/document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.docbin0 -> 46603 bytes
-rw-r--r--test-data/spreadsheet/stress.xlsbin68608 -> 68608 bytes
5 files changed, 126 insertions, 155 deletions
diff --git a/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java b/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java
index e54f7da1d6..73b27a979e 100644
--- a/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java
+++ b/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java
@@ -138,6 +138,7 @@ public class TestAllFiles {
"spreadsheet/clusterfuzz-testcase-minimized-POIXSSFFuzzer-5089447305609216.xlsx",
"spreadsheet/clusterfuzz-testcase-minimized-POIXSSFFuzzer-5089447305609216.xlsx",
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls",
+ "document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc",
});
private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet(
diff --git a/poi-integration/src/test/resources/log4j2-test.xml b/poi-integration/src/test/resources/log4j2-test.xml
index 3cbbac8cea..43fff138fa 100644
--- a/poi-integration/src/test/resources/log4j2-test.xml
+++ b/poi-integration/src/test/resources/log4j2-test.xml
@@ -37,6 +37,7 @@
<Logger name="org.apache.poi.xssf.usermodel.XSSFWorkbook" level="ERROR" />
<Logger name="org.apache.poi.hslf.usermodel.HSLFGroupShape" level="WARN" />
<Logger name="org.apache.poi.hslf.record.Record" level="ERROR" />
+ <Logger name="org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor" level="FATAL" />
<!-- Change to DEBUG or another level to get log output -->
<Root level="ERROR">
diff --git a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordUtils.java
index 7f6a3e0d24..6668382682 100644
--- a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordUtils.java
+++ b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordUtils.java
@@ -40,6 +40,7 @@ import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.Beta;
+import org.apache.poi.util.IOUtils;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
@@ -49,8 +50,7 @@ import org.w3c.dom.NodeList;
import static org.apache.logging.log4j.util.Unbox.box;
@Beta
-public class AbstractWordUtils
-{
+public class AbstractWordUtils {
static final String EMPTY = "";
private static final Logger LOG = LogManager.getLogger(AbstractWordUtils.class);
@@ -59,6 +59,17 @@ public class AbstractWordUtils
public static final int TWIPS_PER_PT = 20;
/**
+ * Limit the amount of main memory which can be used for bullet-information
+ *
+ * if this is too small it can be raised via IOUtils.setByteArrayMaxOverride()
+ *
+ * the chosen limit is fairly arbitrarily, but should allow almost all valid
+ * documents to be processed, but should prevent from causing unexpected high
+ * memory allocation with malicious files.
+ */
+ private static final int MAX_BULLET_BUFFER_SIZE = 1_000_000;
+
+ /**
* Creates array of all possible cell edges. In HTML (and FO) cells from
* different rows and same column should have same width, otherwise spanning
* shall be used.
@@ -67,15 +78,12 @@ public class AbstractWordUtils
* table to build cell edges array from
* @return array of cell edges (including leftest one) in twips
*/
- static int[] buildTableCellEdgesArray( Table table )
- {
+ static int[] buildTableCellEdgesArray( Table table ) {
Set<Integer> edges = new TreeSet<>();
- for ( int r = 0; r < table.numRows(); r++ )
- {
+ for ( int r = 0; r < table.numRows(); r++ ) {
TableRow tableRow = table.getRow( r );
- for ( int c = 0; c < tableRow.numCells(); c++ )
- {
+ for ( int c = 0; c < tableRow.numCells(); c++ ) {
TableCell tableCell = tableRow.getCell( c );
edges.add(tableCell.getLeftEdge());
@@ -85,16 +93,14 @@ public class AbstractWordUtils
Integer[] sorted = edges.toArray(new Integer[0]);
int[] result = new int[sorted.length];
- for ( int i = 0; i < sorted.length; i++ )
- {
+ for ( int i = 0; i < sorted.length; i++ ) {
result[i] = sorted[i];
}
return result;
}
- static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
- {
+ static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) {
if ( node1.getNodeType() != Node.ELEMENT_NODE
|| node2.getNodeType() != Node.ELEMENT_NODE )
return false;
@@ -112,8 +118,7 @@ public class AbstractWordUtils
if ( attributes1.getLength() != attributes2.getLength() )
return false;
- for ( int i = 0; i < attributes1.getLength(); i++ )
- {
+ for ( int i = 0; i < attributes1.getLength(); i++ ) {
final Attr attr1 = (Attr) attributes1.item( i );
final Attr attr2;
if ( isNotEmpty( attr1.getNamespaceURI() ) )
@@ -130,11 +135,9 @@ public class AbstractWordUtils
return true;
}
- static void compactChildNodesR( Element parentElement, String childTagName )
- {
+ static void compactChildNodesR( Element parentElement, String childTagName ) {
NodeList childNodes = parentElement.getChildNodes();
- for ( int i = 0; i < childNodes.getLength() - 1; i++ )
- {
+ for ( int i = 0; i < childNodes.getLength() - 1; i++ ) {
Node child1 = childNodes.item( i );
Node child2 = childNodes.item( i + 1 );
if ( !AbstractWordUtils.canBeMerged( child1, child2, childTagName ) )
@@ -148,23 +151,19 @@ public class AbstractWordUtils
}
childNodes = parentElement.getChildNodes();
- for ( int i = 0; i < childNodes.getLength() - 1; i++ )
- {
+ for ( int i = 0; i < childNodes.getLength() - 1; i++ ) {
Node child = childNodes.item( i );
- if ( child instanceof Element )
- {
+ if ( child instanceof Element ) {
compactChildNodesR( (Element) child, childTagName );
}
}
}
- public static String getBorderType( BorderCode borderCode )
- {
+ public static String getBorderType( BorderCode borderCode ) {
if ( borderCode == null )
throw new IllegalArgumentException( "borderCode is null" );
- switch ( borderCode.getBorderType() )
- {
+ switch ( borderCode.getBorderType() ) {
case 3:
case 10:
case 11:
@@ -199,8 +198,7 @@ public class AbstractWordUtils
}
}
- public static String getBorderWidth( BorderCode borderCode )
- {
+ public static String getBorderWidth( BorderCode borderCode ) {
int lineWidth = borderCode.getLineWidth();
int pt = lineWidth / 8;
int pte = lineWidth - pt * 8;
@@ -208,47 +206,35 @@ public class AbstractWordUtils
return pt + "." + 1000 / 8 * pte + "pt";
}
- public static class NumberingState
- {
-
+ public static class NumberingState {
private final Map<String, Integer> levels = new HashMap<>();
-
}
public static String getBulletText( NumberingState numberingState,
- HWPFList list, char level )
- {
+ HWPFList list, char level ) {
StringBuilder bulletBuffer = new StringBuilder();
char[] xst = list.getNumberText( level ).toCharArray();
- for ( char element : xst )
- {
- if ( element < 9 )
- {
+ for ( char element : xst ) {
+ if ( element < 9 ) {
int lsid = list.getLsid();
final String key = lsid + "#" + ( (int) element );
int num;
if ( !list.isStartAtOverridden( element )
- && numberingState.levels.containsKey( key ) )
- {
+ && numberingState.levels.containsKey( key ) ) {
num = numberingState.levels.get( key );
- if ( level == element )
- {
+ if ( level == element ) {
num++;
numberingState.levels.put( key, num );
}
- }
- else
- {
+ } else {
num = list.getStartAt( element );
numberingState.levels.put( key, num );
}
- if ( level == element )
- {
+ if ( level == element ) {
// cleaning states of nested levels to reset numbering
- for ( int i = element + 1; i < 9; i++ )
- {
+ for ( int i = element + 1; i < 9; i++ ) {
final String childKey = lsid + "#" + i;
numberingState.levels.remove( childKey );
}
@@ -256,31 +242,32 @@ public class AbstractWordUtils
bulletBuffer.append( NumberFormatter.getNumber( num,
list.getNumberFormat( level ) ) );
- }
- else
- {
+ } else {
bulletBuffer.append( element );
}
+
+ // ensure this buffer does not grow to much, this should avoid cases where
+ // this can "explode", i.e. small input file consumes huge amounts of
+ // main memory
+ IOUtils.safelyAllocateCheck(bulletBuffer.length(), MAX_BULLET_BUFFER_SIZE);
}
byte follow = list.getTypeOfCharFollowingTheNumber( level );
- switch ( follow )
- {
- case 0:
- bulletBuffer.append( "\t" );
- break;
- case 1:
- bulletBuffer.append( " " );
- break;
- default:
- break;
+ switch ( follow ) {
+ case 0:
+ bulletBuffer.append( "\t" );
+ break;
+ case 1:
+ bulletBuffer.append( " " );
+ break;
+ default:
+ break;
}
return bulletBuffer.toString();
}
- public static String getColor( int ico )
- {
+ public static String getColor( int ico ) {
switch ( ico ) {
case 2:
return "blue";
@@ -318,8 +305,7 @@ public class AbstractWordUtils
}
}
- public static String getOpacity( int argbValue )
- {
+ public static String getOpacity( int argbValue ) {
int opacity = (int) ( ( argbValue & 0xFF000000L) >>> 24 );
if ( opacity == 0 || opacity == 0xFF )
return ".0";
@@ -327,8 +313,7 @@ public class AbstractWordUtils
return "" + ( opacity / (float) 0xFF );
}
- public static String getColor24( int argbValue )
- {
+ public static String getColor24( int argbValue ) {
if ( argbValue == -1 )
throw new IllegalArgumentException( "This colorref is empty" );
@@ -337,96 +322,88 @@ public class AbstractWordUtils
| ( bgrValue & 0xFF0000 ) >> 16;
// http://www.w3.org/TR/REC-html40/types.html#h-6.5
- switch ( rgbValue )
- {
- case 0xFFFFFF:
- return "white";
- case 0xC0C0C0:
- return "silver";
- case 0x808080:
- return "gray";
- case 0x000000:
- return "black";
- case 0xFF0000:
- return "red";
- case 0x800000:
- return "maroon";
- case 0xFFFF00:
- return "yellow";
- case 0x808000:
- return "olive";
- case 0x00FF00:
- return "lime";
- case 0x008000:
- return "green";
- case 0x00FFFF:
- return "aqua";
- case 0x008080:
- return "teal";
- case 0x0000FF:
- return "blue";
- case 0x000080:
- return "navy";
- case 0xFF00FF:
- return "fuchsia";
- case 0x800080:
- return "purple";
+ switch ( rgbValue ) {
+ case 0xFFFFFF:
+ return "white";
+ case 0xC0C0C0:
+ return "silver";
+ case 0x808080:
+ return "gray";
+ case 0x000000:
+ return "black";
+ case 0xFF0000:
+ return "red";
+ case 0x800000:
+ return "maroon";
+ case 0xFFFF00:
+ return "yellow";
+ case 0x808000:
+ return "olive";
+ case 0x00FF00:
+ return "lime";
+ case 0x008000:
+ return "green";
+ case 0x00FFFF:
+ return "aqua";
+ case 0x008080:
+ return "teal";
+ case 0x0000FF:
+ return "blue";
+ case 0x000080:
+ return "navy";
+ case 0xFF00FF:
+ return "fuchsia";
+ case 0x800080:
+ return "purple";
}
StringBuilder result = new StringBuilder( "#" );
String hex = Integer.toHexString( rgbValue );
- for ( int i = hex.length(); i < 6; i++ )
- {
+ for ( int i = hex.length(); i < 6; i++ ) {
result.append( '0' );
}
result.append( hex );
return result.toString();
}
- public static String getJustification( int js )
- {
- switch ( js )
- {
- case 0:
- case 7:
- return "start";
- case 1:
- case 5:
- return "center";
- case 2:
- case 8:
- return "end";
- case 3:
- case 4:
- case 9:
- return "justify";
- case 6:
- return "left";
+ public static String getJustification( int js ) {
+ switch ( js ) {
+ case 0:
+ case 7:
+ return "start";
+ case 1:
+ case 5:
+ return "center";
+ case 2:
+ case 8:
+ return "end";
+ case 3:
+ case 4:
+ case 9:
+ return "justify";
+ case 6:
+ return "left";
}
return "";
}
- public static String getLanguage( int languageCode )
- {
- switch ( languageCode )
- {
- case 1024:
- return EMPTY;
- case 1033:
- return "en-us";
- case 1049:
- return "ru-ru";
- case 2057:
- return "en-uk";
- default:
- LOG.atWarn().log("Unknown or unmapped language code: {}", box(languageCode));
- return EMPTY;
+ public static String getLanguage( int languageCode ) {
+ switch ( languageCode ) {
+ case 1024:
+ return EMPTY;
+ case 1033:
+ return "en-us";
+ case 1049:
+ return "ru-ru";
+ case 2057:
+ return "en-uk";
+ default:
+ LOG.atWarn().log("Unknown or unmapped language code: {}", box(languageCode));
+ return EMPTY;
}
}
- public static String getListItemNumberLabel( int number, int format )
- {
-
+ public static String getListItemNumberLabel( int number, int format ) {
if ( format != 0 )
LOG.atInfo().log("NYI: toListItemNumberLabel(): {}", box(format));
@@ -444,35 +421,27 @@ public class AbstractWordUtils
}
public static HWPFDocumentCore loadDoc( final DirectoryNode root )
- throws IOException
- {
- try
- {
+ throws IOException {
+ try {
return new HWPFDocument( root );
- }
- catch ( OldWordFileFormatException exc )
- {
+ } catch ( OldWordFileFormatException exc ) {
return new HWPFOldDocument( root );
}
}
- public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
- {
+ public static HWPFDocumentCore loadDoc( File docFile ) throws IOException {
try (InputStream istream = Files.newInputStream(docFile.toPath())) {
return loadDoc(istream);
}
}
public static HWPFDocumentCore loadDoc( InputStream inputStream )
- throws IOException
- {
+ throws IOException {
return loadDoc( HWPFDocumentCore.verifyAndBuildPOIFS( inputStream ) );
}
public static HWPFDocumentCore loadDoc(
- final POIFSFileSystem poifsFileSystem ) throws IOException
- {
+ final POIFSFileSystem poifsFileSystem ) throws IOException {
return loadDoc( poifsFileSystem.getRoot() );
}
-
}
diff --git a/test-data/document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc b/test-data/document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc
new file mode 100644
index 0000000000..ae2c91023d
--- /dev/null
+++ b/test-data/document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc
Binary files differ
diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls
index efe00e3845..a94383250f 100644
--- a/test-data/spreadsheet/stress.xls
+++ b/test-data/spreadsheet/stress.xls
Binary files differ