]> source.dussan.org Git - poi.git/commitdiff
[bug-65581] support configurable temp file threshold
authorPJ Fanning <fanningpj@apache.org>
Sat, 18 Sep 2021 20:06:25 +0000 (20:06 +0000)
committerPJ Fanning <fanningpj@apache.org>
Sat, 18 Sep 2021 20:06:25 +0000 (20:06 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893421 13f79535-47bb-0310-9956-ffa450edef68

poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipArchiveFakeEntry.java
poi-ooxml/src/main/java/org/apache/poi/openxml4j/util/ZipInputStreamZipEntrySource.java
poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFWorkbook.java
poi-ooxml/src/test/resources/log4j2-test.xml [new file with mode: 0644]

index 927f4035002b8c43a3f94819b673d79f3f9f3c13..acca8d0a09ddbe2e6d8b7dc7696ae709ab43db52 100644 (file)
 
 package org.apache.poi.openxml4j.util;
 
-import java.io.IOException;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.InputStream;
+import java.io.IOException;
 
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.poi.util.IOUtils;
-
+import org.apache.poi.util.TempFile;
 
 /**
  * So we can close the real zip entry and still
  *  effectively work with it.
- * Holds the (decompressed!) data in memory, so
+ * Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so
  *  close this as soon as you can!
+ * @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
  */
-/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry {
-    private final byte[] data;
+/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable {
+    private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class);
+    private byte[] data;
+    private File tempFile;
 
     ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException {
         super(entry.getName());
 
         final long entrySize = entry.getSize();
 
-        if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) {
-            throw new IOException("ZIP entry size is too large or invalid");
-        }
+        final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
+        if (threshold >= 0 && entrySize >= threshold) {
+            tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp");
+            LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes",
+                    tempFile.getAbsolutePath(), entry.getName(), entrySize);
+            IOUtils.copy(inp, tempFile);
+        } else {
+            if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) {
+                throw new IOException("ZIP entry size is too large or invalid");
+            }
 
-        // Grab the de-compressed contents for later
-        data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
+            // Grab the de-compressed contents for later
+            data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
+        }
     }
 
+    /**
+     * Returns zip entry.
+     * @return input stream
+     * @throws RuntimeException since POI 5.1.0,
+     * a RuntimeException can occur if the optional temp file has been removed
+     * @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
+     */
     public InputStream getInputStream() {
-        return new UnsynchronizedByteArrayInputStream(data);
+        if (tempFile != null) {
+            try {
+                return new FileInputStream(tempFile);
+            } catch (FileNotFoundException e) {
+                throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing");
+            }
+        } else {
+            return new UnsynchronizedByteArrayInputStream(data);
+        }
+    }
+
+    /**
+     * Deletes any temp files and releases any byte arrays.
+     * @throws IOException
+     * @since POI 5.1.0
+     */
+    @Override
+    public void close() throws IOException {
+        data = null;
+        if (tempFile != null) {
+            tempFile.delete();
+        }
     }
 }
index ee25086f7802aba1ba3bf9322858830d00f84c6b..fedaff19908e13e21248fc26c9f9c3ff96038d11 100644 (file)
@@ -34,15 +34,40 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
  *  done, to free up that memory!
  */
 public class ZipInputStreamZipEntrySource implements ZipEntrySource {
+    private static int thresholdForTempFiles = -1;
     private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>();
 
     private InputStream streamToClose;
 
+    /**
+     * Set the threshold at which it a zip entry is regarded as too large for holding in memory
+     * and the data is put in a temp file instead
+     * @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory
+     *                       and the data is put in a temp file instead - defaults to -1 meaning temp files are not used
+     *                       and that zip entries with more than 2GB of data after decompressing will fail, 0 means all
+     *                       zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended)
+     * @since POI 5.1.0
+     */
+    public static void setThresholdBytesForTempFiles(int thresholdBytes) {
+        thresholdForTempFiles = thresholdBytes;
+    }
+
+    /**
+     * Get the threshold at which it a zip entry is regarded as too large for holding in memory
+     * and the data is put in a temp file instead (defaults to -1 meaning temp files are not used)
+     * @return threshold in bytes
+     * @since POI 5.1.0
+     */
+    public static int getThresholdBytesForTempFiles() {
+        return thresholdForTempFiles;
+    }
+
     /**
      * Reads all the entries from the ZipInputStream 
      *  into memory, and don't close (since POI 4.0.1) the source stream.
      * We'll then eat lots of memory, but be able to
      *  work with the entries at-will.
+     * @see #setThresholdBytesForTempFiles
      */
     public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException {
         for (;;) {
@@ -69,6 +94,10 @@ public class ZipInputStreamZipEntrySource implements ZipEntrySource {
 
     @Override
     public void close() throws IOException {
+        for (ZipArchiveFakeEntry entry : zipEntries.values()) {
+            entry.close();
+        }
+
         // Free the memory
         zipEntries.clear();
 
index 58beb8896b40d8e3e8982f662129ace058e456f4..9845af910db692b8e67bed56678036e242a13c63 100644 (file)
@@ -53,6 +53,7 @@ import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.internal.FileHelper;
 import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource;
 import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook;
 import org.apache.poi.ss.usermodel.*;
 import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
@@ -167,6 +168,26 @@ public final class TestXSSFWorkbook extends BaseTestXWorkbook {
         }
     }
 
+    @Test
+    void existingWithZipEntryTempFiles() throws Exception {
+        int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
+        ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100);
+        try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx");
+             OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) {
+            assertNotNull(workbook.getSharedStringSource());
+            assertNotNull(workbook.getStylesSource());
+
+            // And check a few low level bits too
+            PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml"));
+
+            // Links to the three sheets, shared, styles and themes
+            assertTrue(wbPart.hasRelationships());
+            assertEquals(6, wbPart.getRelationships().size());
+        } finally {
+            ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold);
+        }
+    }
+
     @Test
     void getCellStyleAt() throws IOException{
         try (XSSFWorkbook workbook = new XSSFWorkbook()) {
diff --git a/poi-ooxml/src/test/resources/log4j2-test.xml b/poi-ooxml/src/test/resources/log4j2-test.xml
new file mode 100644 (file)
index 0000000..35a6a3c
--- /dev/null
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration status="WARN">
+    <Appenders>
+        <Console name="Console" target="SYSTEM_OUT">
+            <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
+        </Console>
+    </Appenders>
+    <Loggers>
+        <Root level="info">
+            <AppenderRef ref="Console"/>
+        </Root>
+    </Loggers>
+</Configuration>
\ No newline at end of file