aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2024-07-18 07:09:32 +0000
committerDominik Stadler <centic@apache.org>2024-07-18 07:09:32 +0000
commit0dea4a301c971c42ceaf0ed43cc6a3545026507e (patch)
tree7317833310a67bdf288fac2f901d364dbc600e65
parent0dac5680c31bc7b2014ce6b76cabf91adc39a908 (diff)
downloadpoi-0dea4a301c971c42ceaf0ed43cc6a3545026507e.tar.gz
poi-0dea4a301c971c42ceaf0ed43cc6a3545026507e.zip
Bug 66425: Avoid exceptions found via poi-fuzz
Processing formats uses regular expressions. Very complex formats can recurse very deeply and thus can cause StackOVerflows depending on the used stack-size. In order to handle this a bit more gracefully, we now catch this and report a better exception with details about the parsed format and potential mitigation. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=66137 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1919342 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java1
-rw-r--r--poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java3
-rw-r--r--poi/src/main/java/org/apache/poi/ss/format/CellFormat.java30
-rw-r--r--poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java12
-rw-r--r--test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xlsbin0 -> 66050 bytes
-rw-r--r--test-data/spreadsheet/stress.xlsbin109568 -> 110080 bytes
6 files changed, 34 insertions, 12 deletions
diff --git a/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java b/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java
index 19b38600ac..54f97e5e7b 100644
--- a/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java
+++ b/poi-integration/src/test/java/org/apache/poi/stress/TestAllFiles.java
@@ -136,6 +136,7 @@ public class TestAllFiles {
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4977868385681408.xls",
"spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4651309315719168.xls",
"document/clusterfuzz-testcase-POIHWPFFuzzer-5696094627495936.doc",
+ "spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls"
});
private static final Set<String> EXPECTED_FAILURES = StressTestUtils.unmodifiableHashSet(
diff --git a/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java b/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java
index 980e1298b8..45c75ffc87 100644
--- a/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java
+++ b/poi-scratchpad/src/test/java/org/apache/poi/hssf/converter/TestExcelConverterSuite.java
@@ -46,7 +46,8 @@ public class TestExcelConverterSuite {
// not failing, but requires more memory
"ex45698-22488.xls",
// broken documents
- "clusterfuzz-testcase-minimized-POIHSSFFuzzer-5436547081830400.xls"
+ "clusterfuzz-testcase-minimized-POIHSSFFuzzer-5436547081830400.xls",
+ "clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls"
);
public static Stream<Arguments> files() {
diff --git a/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java b/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java
index c95caca273..9a420540a5 100644
--- a/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java
+++ b/poi/src/main/java/org/apache/poi/ss/format/CellFormat.java
@@ -28,7 +28,6 @@ import java.util.regex.Pattern;
import javax.swing.JLabel;
-import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.ss.usermodel.Cell;
@@ -182,19 +181,28 @@ public class CellFormat {
Matcher m = ONE_PART.matcher(format);
List<CellFormatPart> parts = new ArrayList<>();
- while (m.find()) {
- try {
- String valueDesc = m.group();
+ try {
+ while (m.find()) {
+ try {
+ String valueDesc = m.group();
- // Strip out the semicolon if it's there
- if (valueDesc.endsWith(";"))
- valueDesc = valueDesc.substring(0, valueDesc.length() - 1);
+ // Strip out the semicolon if it's there
+ if (valueDesc.endsWith(";"))
+ valueDesc = valueDesc.substring(0, valueDesc.length() - 1);
- parts.add(new CellFormatPart(locale, valueDesc));
- } catch (RuntimeException e) {
- LOG.warn("Invalid format: {}", CellFormatter.quote(m.group()), e);
- parts.add(null);
+ parts.add(new CellFormatPart(locale, valueDesc));
+ } catch (RuntimeException e) {
+ LOG.warn("Invalid format: {}", CellFormatter.quote(m.group()), e);
+ parts.add(null);
+ }
}
+ } catch (StackOverflowError e) {
+ // very complex formats can cause the regex-parsing to exceed the available stack
+ // we want to handle this more gracefully by catching it and reporting a bit more
+ // details in the error message
+ throw new IllegalStateException("The provided format is too complex: " + format +
+ ", you can try to increase Java Stack size via commandline argument '-Xss' " +
+ "to allow handling this format");
}
formatPartCount = parts.size();
diff --git a/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java
index 495ceb9837..8e27789f97 100644
--- a/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java
+++ b/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java
@@ -382,4 +382,16 @@ final class TestExcelExtractor {
assertContains(txt, "Macro2");
}
}
+
+ @Test
+ void testStackOverflowInRegex() throws IOException {
+ try (ExcelExtractor extractor = createExtractor("clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls")) {
+ extractor.getText();
+ } catch (IllegalStateException e) {
+ // we either get a StackOverflow or a parsing error depending on the stack-size of the current JVM,
+ // so we expect both here
+ assertTrue(e.getMessage().contains("Provided formula is too complex") ||
+ e.getMessage().contains("Did not have a ExtendedFormatRecord"));
+ }
+ }
}
diff --git a/test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls b/test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls
new file mode 100644
index 0000000000..baa032472e
--- /dev/null
+++ b/test-data/spreadsheet/clusterfuzz-testcase-minimized-POIHSSFFuzzer-4657005060816896.xls
Binary files differ
diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls
index 124e07a16f..f4efb868ba 100644
--- a/test-data/spreadsheet/stress.xls
+++ b/test-data/spreadsheet/stress.xls
Binary files differ