package org.apache.poi.openxml4j.util;
-import java.io.IOException;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.InputStream;
+import java.io.IOException;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.poi.util.IOUtils;
-
+import org.apache.poi.util.TempFile;
/**
* So we can close the real zip entry and still
* effectively work with it.
- * Holds the (decompressed!) data in memory, so
+ * Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so
* close this as soon as you can!
+ * @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
*/
-/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry {
- private final byte[] data;
+/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable {
+ private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class);
+ private byte[] data;
+ private File tempFile;
ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException {
super(entry.getName());
final long entrySize = entry.getSize();
- if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) {
- throw new IOException("ZIP entry size is too large or invalid");
- }
+ final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
+ if (threshold >= 0 && entrySize >= threshold) {
+ tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp");
+ LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes",
+ tempFile.getAbsolutePath(), entry.getName(), entrySize);
+ IOUtils.copy(inp, tempFile);
+ } else {
+ if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) {
+ throw new IOException("ZIP entry size is too large or invalid");
+ }
- // Grab the de-compressed contents for later
- data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
+ // Grab the de-compressed contents for later
+ data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize);
+ }
}
+ /**
+ * Returns zip entry.
+ * @return input stream
+ * @throws RuntimeException since POI 5.1.0,
+ * a RuntimeException can occur if the optional temp file has been removed
+ * @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int)
+ */
public InputStream getInputStream() {
- return new UnsynchronizedByteArrayInputStream(data);
+ if (tempFile != null) {
+ try {
+ return new FileInputStream(tempFile);
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing");
+ }
+ } else {
+ return new UnsynchronizedByteArrayInputStream(data);
+ }
+ }
+
+ /**
+ * Deletes any temp files and releases any byte arrays.
+ * @throws IOException
+ * @since POI 5.1.0
+ */
+ @Override
+ public void close() throws IOException {
+ data = null;
+ if (tempFile != null) {
+ tempFile.delete();
+ }
}
}
* done, to free up that memory!
*/
public class ZipInputStreamZipEntrySource implements ZipEntrySource {
+ private static int thresholdForTempFiles = -1;
private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>();
private InputStream streamToClose;
+ /**
+ * Set the threshold at which it a zip entry is regarded as too large for holding in memory
+ * and the data is put in a temp file instead
+ * @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory
+ * and the data is put in a temp file instead - defaults to -1 meaning temp files are not used
+ * and that zip entries with more than 2GB of data after decompressing will fail, 0 means all
+ * zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended)
+ * @since POI 5.1.0
+ */
+ public static void setThresholdBytesForTempFiles(int thresholdBytes) {
+ thresholdForTempFiles = thresholdBytes;
+ }
+
+ /**
+ * Get the threshold at which it a zip entry is regarded as too large for holding in memory
+ * and the data is put in a temp file instead (defaults to -1 meaning temp files are not used)
+ * @return threshold in bytes
+ * @since POI 5.1.0
+ */
+ public static int getThresholdBytesForTempFiles() {
+ return thresholdForTempFiles;
+ }
+
/**
* Reads all the entries from the ZipInputStream
* into memory, and don't close (since POI 4.0.1) the source stream.
* We'll then eat lots of memory, but be able to
* work with the entries at-will.
+ * @see #setThresholdBytesForTempFiles
*/
public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException {
for (;;) {
@Override
public void close() throws IOException {
+ for (ZipArchiveFakeEntry entry : zipEntries.values()) {
+ entry.close();
+ }
+
// Free the memory
zipEntries.clear();
import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource;
import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
}
}
+ @Test
+ void existingWithZipEntryTempFiles() throws Exception {
+ int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles();
+ ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100);
+ try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx");
+ OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) {
+ assertNotNull(workbook.getSharedStringSource());
+ assertNotNull(workbook.getStylesSource());
+
+ // And check a few low level bits too
+ PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml"));
+
+ // Links to the three sheets, shared, styles and themes
+ assertTrue(wbPart.hasRelationships());
+ assertEquals(6, wbPart.getRelationships().size());
+ } finally {
+ ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold);
+ }
+ }
+
@Test
void getCellStyleAt() throws IOException{
try (XSSFWorkbook workbook = new XSSFWorkbook()) {