diff options
author | Dominik Stadler <centic@apache.org> | 2024-07-18 07:09:32 +0000 |
---|---|---|
committer | Dominik Stadler <centic@apache.org> | 2024-07-18 07:09:32 +0000 |
commit | 0dea4a301c971c42ceaf0ed43cc6a3545026507e (patch) | |
tree | 7317833310a67bdf288fac2f901d364dbc600e65 | |
parent | 0dac5680c31bc7b2014ce6b76cabf91adc39a908 (diff) | |
download | poi-0dea4a301c971c42ceaf0ed43cc6a3545026507e.tar.gz poi-0dea4a301c971c42ceaf0ed43cc6a3545026507e.zip |
Bug 66425: Avoid exceptions found via poi-fuzz
Processing formats uses regular expressions. Very complex formats
can recurse very deeply and thus can cause StackOVerflows depending
on the used stack-size.
In order to handle this a bit more gracefully, we now catch this
and report a better exception with details about the parsed
format and potential mitigation.
Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=66137
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1919342 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java | 1 | ||||
-rw-r--r-- | poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java | 3 | ||||
-rw-r--r-- | poi/src/main/java/org/apache/poi/ss/format/CellFormat.java | 30 | ||||
-rw-r--r-- | poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java | 12 | ||||
-rw-r--r-- | test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls | bin | 0 -> 66050 bytes | |||
-rw-r--r-- | test-data/spreadsheet/stress.xls | bin | 109568 -> 110080 bytes |
6 files changed, 34 insertions, 12 deletions
diff --git a/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java b/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java index 19b38600ac..54f97e5e7b 100644 --- a/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java +++ b/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java @@ -136,6 +136,7 @@ public class TestAllFiles { "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4977868385681408.xls", "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls", "document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc", + "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls" }); private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet( diff --git a/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java b/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java index 980e1298b8..45c75ffc87 100644 --- a/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java +++ b/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java @@ -46,7 +46,8 @@ public class TestExcelConverterSuite { // not failing, but requires more memory "ex45698-22488.xls", // broken documents - "clusterfuzz-testcase-minimized-POIHSSFFuzzer-5436547081830400.xls" + "clusterfuzz-testcase-minimized-POIHSSFFuzzer-5436547081830400.xls", + "clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls" ); public static Stream<Arguments> files() { diff --git a/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java b/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java index c95caca273..9a420540a5 100644 --- a/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java +++ b/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java @@ -28,7 +28,6 @@ import java.util.regex.Pattern; import javax.swing.JLabel; -import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.poi.ss.usermodel.Cell; @@ -182,19 +181,28 @@ public class CellFormat { Matcher m = ONE_PART.matcher(format); List<CellFormatPart> parts = new ArrayList<>(); - while (m.find()) { - try { - String valueDesc = m.group(); + try { + while (m.find()) { + try { + String valueDesc = m.group(); - // Strip out the semicolon if it's there - if (valueDesc.endsWith(";")) - valueDesc = valueDesc.substring(0, valueDesc.length() - 1); + // Strip out the semicolon if it's there + if (valueDesc.endsWith(";")) + valueDesc = valueDesc.substring(0, valueDesc.length() - 1); - parts.add(new CellFormatPart(locale, valueDesc)); - } catch (RuntimeException e) { - LOG.warn("Invalid format: {}", CellFormatter.quote(m.group()), e); - parts.add(null); + parts.add(new CellFormatPart(locale, valueDesc)); + } catch (RuntimeException e) { + LOG.warn("Invalid format: {}", CellFormatter.quote(m.group()), e); + parts.add(null); + } } + } catch (StackOverflowError e) { + // very complex formats can cause the regex-parsing to exceed the available stack + // we want to handle this more gracefully by catching it and reporting a bit more + // details in the error message + throw new IllegalStateException("The provided format is too complex: " + format + + ", you can try to increase Java Stack size via commandline argument '-Xss' " + + "to allow handling this format"); } formatPartCount = parts.size(); diff --git a/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 495ceb9837..8e27789f97 100644 --- a/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -382,4 +382,16 @@ final class TestExcelExtractor { assertContains(txt, "Macro2"); } } + + @Test + void testStackOverflowInRegex() throws IOException { + try (ExcelExtractor extractor = createExtractor("clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls")) { + extractor.getText(); + } catch (IllegalStateException e) { + // we either get a StackOverflow or a parsing error depending on the stack-size of the current JVM, + // so we expect both here + assertTrue(e.getMessage().contains("Provided formula is too complex") || + e.getMessage().contains("Did not have a ExtendedFormatRecord")); + } + } } diff --git a/test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls b/test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls Binary files differnew file mode 100644 index 0000000000..baa032472e --- /dev/null +++ b/test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls Binary files differindex 124e07a16f..f4efb868ba 100644 --- a/test-data/spreadsheet/stress.xls +++ b/test-data/spreadsheet/stress.xls |