git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1893421 13f79535-47bb-0310-9956-ffa450edef68tags/REL_5_2_0
@@ -17,37 +17,82 @@ | |||
package org.apache.poi.openxml4j.util; | |||
import java.io.IOException; | |||
import java.io.Closeable; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.FileNotFoundException; | |||
import java.io.InputStream; | |||
import java.io.IOException; | |||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; | |||
import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; | |||
import org.apache.logging.log4j.LogManager; | |||
import org.apache.logging.log4j.Logger; | |||
import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.util.TempFile; | |||
/** | |||
* So we can close the real zip entry and still | |||
* effectively work with it. | |||
* Holds the (decompressed!) data in memory, so | |||
* Holds the (decompressed!) data in memory (or since POI 5.1.0, possibly in a temp file), so | |||
* close this as soon as you can! | |||
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int) | |||
*/ | |||
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry { | |||
private final byte[] data; | |||
/* package */ class ZipArchiveFakeEntry extends ZipArchiveEntry implements Closeable { | |||
private static Logger LOG = LogManager.getLogger(ZipArchiveFakeEntry.class); | |||
private byte[] data; | |||
private File tempFile; | |||
ZipArchiveFakeEntry(ZipArchiveEntry entry, InputStream inp) throws IOException { | |||
super(entry.getName()); | |||
final long entrySize = entry.getSize(); | |||
if (entrySize < -1 || entrySize>=Integer.MAX_VALUE) { | |||
throw new IOException("ZIP entry size is too large or invalid"); | |||
} | |||
final int threshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles(); | |||
if (threshold >= 0 && entrySize >= threshold) { | |||
tempFile = TempFile.createTempFile("poi-zip-entry", ".tmp"); | |||
LOG.atInfo().log("created for temp file {} for zip entry {} of size {} bytes", | |||
tempFile.getAbsolutePath(), entry.getName(), entrySize); | |||
IOUtils.copy(inp, tempFile); | |||
} else { | |||
if (entrySize < -1 || entrySize >= Integer.MAX_VALUE) { | |||
throw new IOException("ZIP entry size is too large or invalid"); | |||
} | |||
// Grab the de-compressed contents for later | |||
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize); | |||
// Grab the de-compressed contents for later | |||
data = (entrySize == -1) ? IOUtils.toByteArray(inp) : IOUtils.toByteArray(inp, (int)entrySize); | |||
} | |||
} | |||
/** | |||
* Returns zip entry. | |||
* @return input stream | |||
* @throws RuntimeException since POI 5.1.0, | |||
* a RuntimeException can occur if the optional temp file has been removed | |||
* @see ZipInputStreamZipEntrySource#setThresholdBytesForTempFiles(int) | |||
*/ | |||
public InputStream getInputStream() { | |||
return new UnsynchronizedByteArrayInputStream(data); | |||
if (tempFile != null) { | |||
try { | |||
return new FileInputStream(tempFile); | |||
} catch (FileNotFoundException e) { | |||
throw new RuntimeException("temp file " + tempFile.getAbsolutePath() + " is missing"); | |||
} | |||
} else { | |||
return new UnsynchronizedByteArrayInputStream(data); | |||
} | |||
} | |||
/** | |||
* Deletes any temp files and releases any byte arrays. | |||
* @throws IOException | |||
* @since POI 5.1.0 | |||
*/ | |||
@Override | |||
public void close() throws IOException { | |||
data = null; | |||
if (tempFile != null) { | |||
tempFile.delete(); | |||
} | |||
} | |||
} |
@@ -34,15 +34,40 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; | |||
* done, to free up that memory! | |||
*/ | |||
public class ZipInputStreamZipEntrySource implements ZipEntrySource { | |||
private static int thresholdForTempFiles = -1; | |||
private final Map<String, ZipArchiveFakeEntry> zipEntries = new HashMap<>(); | |||
private InputStream streamToClose; | |||
/** | |||
* Set the threshold at which it a zip entry is regarded as too large for holding in memory | |||
* and the data is put in a temp file instead | |||
* @param thresholdBytes number of bytes at which a zip entry is regarded as too large for holding in memory | |||
* and the data is put in a temp file instead - defaults to -1 meaning temp files are not used | |||
* and that zip entries with more than 2GB of data after decompressing will fail, 0 means all | |||
* zip entries are stored in temp files. A threshold like 50000000 (approx 50Mb is recommended) | |||
* @since POI 5.1.0 | |||
*/ | |||
public static void setThresholdBytesForTempFiles(int thresholdBytes) { | |||
thresholdForTempFiles = thresholdBytes; | |||
} | |||
/** | |||
* Get the threshold at which it a zip entry is regarded as too large for holding in memory | |||
* and the data is put in a temp file instead (defaults to -1 meaning temp files are not used) | |||
* @return threshold in bytes | |||
* @since POI 5.1.0 | |||
*/ | |||
public static int getThresholdBytesForTempFiles() { | |||
return thresholdForTempFiles; | |||
} | |||
/** | |||
* Reads all the entries from the ZipInputStream | |||
* into memory, and don't close (since POI 4.0.1) the source stream. | |||
* We'll then eat lots of memory, but be able to | |||
* work with the entries at-will. | |||
* @see #setThresholdBytesForTempFiles | |||
*/ | |||
public ZipInputStreamZipEntrySource(ZipArchiveThresholdInputStream inp) throws IOException { | |||
for (;;) { | |||
@@ -69,6 +94,10 @@ public class ZipInputStreamZipEntrySource implements ZipEntrySource { | |||
@Override | |||
public void close() throws IOException { | |||
for (ZipArchiveFakeEntry entry : zipEntries.values()) { | |||
entry.close(); | |||
} | |||
// Free the memory | |||
zipEntries.clear(); | |||
@@ -53,6 +53,7 @@ import org.apache.poi.openxml4j.opc.PackagingURIHelper; | |||
import org.apache.poi.openxml4j.opc.internal.FileHelper; | |||
import org.apache.poi.openxml4j.opc.internal.MemoryPackagePart; | |||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; | |||
import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource; | |||
import org.apache.poi.ss.tests.usermodel.BaseTestXWorkbook; | |||
import org.apache.poi.ss.usermodel.*; | |||
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; | |||
@@ -167,6 +168,26 @@ public final class TestXSSFWorkbook extends BaseTestXWorkbook { | |||
} | |||
} | |||
@Test | |||
void existingWithZipEntryTempFiles() throws Exception { | |||
int defaultThreshold = ZipInputStreamZipEntrySource.getThresholdBytesForTempFiles(); | |||
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(100); | |||
try (XSSFWorkbook workbook = openSampleWorkbook("Formatting.xlsx"); | |||
OPCPackage pkg = OPCPackage.open(openSampleFileStream("Formatting.xlsx"))) { | |||
assertNotNull(workbook.getSharedStringSource()); | |||
assertNotNull(workbook.getStylesSource()); | |||
// And check a few low level bits too | |||
PackagePart wbPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/workbook.xml")); | |||
// Links to the three sheets, shared, styles and themes | |||
assertTrue(wbPart.hasRelationships()); | |||
assertEquals(6, wbPart.getRelationships().size()); | |||
} finally { | |||
ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(defaultThreshold); | |||
} | |||
} | |||
@Test | |||
void getCellStyleAt() throws IOException{ | |||
try (XSSFWorkbook workbook = new XSSFWorkbook()) { |
@@ -0,0 +1,13 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<Configuration status="WARN"> | |||
<Appenders> | |||
<Console name="Console" target="SYSTEM_OUT"> | |||
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/> | |||
</Console> | |||
</Appenders> | |||
<Loggers> | |||
<Root level="info"> | |||
<AppenderRef ref="Console"/> | |||
</Root> | |||
</Loggers> | |||
</Configuration> |