From 8d7af95fede762f946afec8539752271bc29dfec Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 18 Sep 2021 20:06:25 +0000 Subject: [PATCH] [bug-65581] support configurable temp file threshold git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893421 13f79535-47bb-0310-9956-ffa450edef68 --- .../openxml4j/util/ZipArchiveFakeEntry.java | 67 ++++++++++++++++--- .../util/ZipInputStreamZipEntrySource.java | 29 ++++++++ .../poi/xssf/usermodel/TestXSSFWorkbook.java | 21 ++++++ poi-ooxml/src/test/resources/log4j2-test.xml | 13 ++++ 4 files changed, 119 insertions(+), 11 deletions(-) create mode 100644 poi-ooxml/src/test/resources/log4j2-test.xml diff --git a/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipArchiveFakeEntry.java b/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipArchiveFakeEntry.java index 927f403500..acca8d0a09 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipArchiveFakeEntry.java +++ b/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipArchiveFakeEntry.java @@ -17,37 +17,82 @@ package org.apache.poi.openxml4j.util; -import java.io.IOException; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.InputStream; +import java.io.IOException; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.poi.util.IOUtils; - +import org.apache.poi.util.TempFile; /** * So we can close the real zip entry and still * effectively work with it. - * Holds the (decompressed!) data in memory, so + * Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so * close this as soon as you can! + * @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int) */ -/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry { - private final byte[] data; +/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable { + private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class); + private byte[] data; + private File tempFile; ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException { super(entry.getName()); final long entrySize = entry.getSize(); - if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) { - throw new IOException("ZIP entry size is too large or invalid"); - } + final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles(); + if (threshold >= 0 && entrySize >= threshold) { + tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp"); + LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes", + tempFile.getAbsolutePath(), entry.getName(), entrySize); + IOUtils.copy(inp, tempFile); + } else { + if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) { + throw new IOException("ZIP entry size is too large or invalid"); + } - // Grab the de-compressed contents for later - data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize); + // Grab the de-compressed contents for later + data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize); + } } + /** + * Returns zip entry. + * @return input stream + * @throws RuntimeException since POI 5.1.0, + * a RuntimeException can occur if the optional temp file has been removed + * @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int) + */ public InputStream getInputStream() { - return new UnsynchronizedByteArrayInputStream(data); + if (tempFile != null) { + try { + return new FileInputStream(tempFile); + } catch (FileNotFoundException e) { + throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing"); + } + } else { + return new UnsynchronizedByteArrayInputStream(data); + } + } + + /** + * Deletes any temp files and releases any byte arrays. + * @throws IOException + * @since POI 5.1.0 + */ + @Override + public void close() throws IOException { + data = null; + if (tempFile != null) { + tempFile.delete(); + } } } diff --git a/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipInputStreamZipEntrySource.java b/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipInputStreamZipEntrySource.java index ee25086f78..fedaff1990 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipInputStreamZipEntrySource.java +++ b/poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipInputStreamZipEntrySource.java @@ -34,15 +34,40 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; * done, to free up that memory! */ public class ZipInputStreamZipEntrySource implements ZipEntrySource { + private static int thresholdForTempFiles = -1; private final Map zipEntries = new HashMap<>(); private InputStream streamToClose; + /** + * Set the threshold at which it a zip entry is regarded as too large for holding in memory + * and the data is put in a temp file instead + * @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory + * and the data is put in a temp file instead - defaults to -1 meaning temp files are not used + * and that zip entries with more than 2GB of data after decompressing will fail, 0 means all + * zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended) + * @since POI 5.1.0 + */ + public static void setThresholdBytesForTempFiles(int thresholdBytes) { + thresholdForTempFiles = thresholdBytes; + } + + /** + * Get the threshold at which it a zip entry is regarded as too large for holding in memory + * and the data is put in a temp file instead (defaults to -1 meaning temp files are not used) + * @return threshold in bytes + * @since POI 5.1.0 + */ + public static int getThresholdBytesForTempFiles() { + return thresholdForTempFiles; + } + /** * Reads all the entries from the ZipInputStream * into memory, and don't close (since POI 4.0.1) the source stream. * We'll then eat lots of memory, but be able to * work with the entries at-will. + * @see #setThresholdBytesForTempFiles */ public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException { for (;;) { @@ -69,6 +94,10 @@ public class ZipInputStreamZipEntrySource implements ZipEntrySource { @Override public void close() throws IOException { + for (ZipArchiveFakeEntry entry : zipEntries.values()) { + entry.close(); + } + // Free the memory zipEntries.clear(); diff --git a/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java b/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java index 58beb8896b..9845af910d 100644 --- a/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java +++ b/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java @@ -53,6 +53,7 @@ import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.internal.FileHelper; import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource; import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook; import org.apache.poi.ss.usermodel.*; import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; @@ -167,6 +168,26 @@ public final class TestXSSFWorkbook extends BaseTestXWorkbook { } } + @Test + void existingWithZipEntryTempFiles() throws Exception { + int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles(); + ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100); + try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx"); + OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) { + assertNotNull(workbook.getSharedStringSource()); + assertNotNull(workbook.getStylesSource()); + + // And check a few low level bits too + PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml")); + + // Links to the three sheets, shared, styles and themes + assertTrue(wbPart.hasRelationships()); + assertEquals(6, wbPart.getRelationships().size()); + } finally { + ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold); + } + } + @Test void getCellStyleAt() throws IOException{ try (XSSFWorkbook workbook = new XSSFWorkbook()) { diff --git a/poi-ooxml/src/test/resources/log4j2-test.xml b/poi-ooxml/src/test/resources/log4j2-test.xml new file mode 100644 index 0000000000..35a6a3ccdf --- /dev/null +++ b/poi-ooxml/src/test/resources/log4j2-test.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file -- 2.39.5