diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2020-04-28 23:08:05 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2020-04-28 23:08:05 +0000 |
commit | 8b4f463ed3d11034888c672228e5057db802a92d (patch) | |
tree | 723277d09a44869d3befba9061ae5df87268ca8a /src/java/org/apache/poi | |
parent | 23acaff78d1e2a08a879a1422e4ed18ff6290e12 (diff) | |
download | poi-8b4f463ed3d11034888c672228e5057db802a92d.tar.gz poi-8b4f463ed3d11034888c672228e5057db802a92d.zip |
#64387 - Big POIFS stream result in OOM
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1877144 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/poi')
5 files changed, 195 insertions, 171 deletions
diff --git a/src/java/org/apache/poi/poifs/filesystem/BlockStore.java b/src/java/org/apache/poi/poifs/filesystem/BlockStore.java index a56d111f3f..bdd016f860 100644 --- a/src/java/org/apache/poi/poifs/filesystem/BlockStore.java +++ b/src/java/org/apache/poi/poifs/filesystem/BlockStore.java @@ -1,4 +1,3 @@ - /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -32,53 +31,59 @@ public abstract class BlockStore { * Returns the size of the blocks managed through the block store. */ protected abstract int getBlockStoreBlockSize(); - + /** * Load the block at the given offset. */ protected abstract ByteBuffer getBlockAt(final int offset) throws IOException; - + /** * Extends the file if required to hold blocks up to - * the specified offset, and return the block from there. + * the specified offset, and return the block from there. */ protected abstract ByteBuffer createBlockIfNeeded(final int offset) throws IOException; - + + /** + * Releases a mmap-ed buffer, which you are sure won't be used again + * @param buffer the buffer + */ + protected abstract void releaseBuffer(ByteBuffer buffer); + /** * Returns the BATBlock that handles the specified offset, * and the relative index within it */ protected abstract BATBlockAndIndex getBATBlockAndIndex(final int offset); - + /** * Works out what block follows the specified one. */ protected abstract int getNextBlock(final int offset); - + /** * Changes the record of what block follows the specified one. */ protected abstract void setNextBlock(final int offset, final int nextBlock); - + /** * Finds a free block, and returns its offset. * This method will extend the file/stream if needed, and if doing * so, allocate new FAT blocks to address the extra space. */ protected abstract int getFreeBlock() throws IOException; - + /** - * Creates a Detector for loops in the chain + * Creates a Detector for loops in the chain */ protected abstract ChainLoopDetector getChainLoopDetector() throws IOException; - + /** * Used to detect if a chain has a loop in it, so * we can bail out with an error rather than - * spinning away for ever... + * spinning away for ever... */ protected class ChainLoopDetector { - private boolean[] used_blocks; + private final boolean[] used_blocks; protected ChainLoopDetector(long rawSize) { int blkSize = getBlockStoreBlockSize(); int numBlocks = (int)(rawSize / blkSize); @@ -94,11 +99,11 @@ public abstract class BlockStore { // blocks we've allocated for them, so are safe return; } - + // Claiming an existing block, ensure there's no loop if(used_blocks[offset]) { throw new IllegalStateException( - "Potential loop detected - Block " + offset + + "Potential loop detected - Block " + offset + " was already claimed but was just requested again" ); } diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index 23d9b3e2b0..22577980a3 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -83,8 +83,8 @@ public class POIFSFileSystem extends BlockStore private POIFSMiniStore _mini_store; private PropertyTable _property_table; - private List<BATBlock> _xbat_blocks; - private List<BATBlock> _bat_blocks; + private final List<BATBlock> _xbat_blocks; + private final List<BATBlock> _bat_blocks; private HeaderBlock _header; private DirectoryNode _root; @@ -113,7 +113,7 @@ public class POIFSFileSystem extends BlockStore protected void createNewDataSource() { // Data needs to initially hold just the header block, // a single bat block, and an empty properties section - long blockSize = ArithmeticUtils.mulAndCheck((long)bigBlockSize.getBigBlockSize(), (long)3); + long blockSize = ArithmeticUtils.mulAndCheck(bigBlockSize.getBigBlockSize(), 3L); _data = new ByteArrayBackedDataSource(IOUtils.safelyAllocate(blockSize, MAX_RECORD_LENGTH)); } @@ -409,7 +409,7 @@ public class POIFSFileSystem extends BlockStore // Ensure there's a spot in the file for it ByteBuffer buffer = ByteBuffer.allocate(bigBlockSize.getBigBlockSize()); // Header isn't in BATs - long writeTo = ArithmeticUtils.mulAndCheck((1 + (long)offset), (long)bigBlockSize.getBigBlockSize()); + long writeTo = ArithmeticUtils.mulAndCheck(1L + offset, bigBlockSize.getBigBlockSize()); _data.write(buffer, writeTo); // All done return newBAT; @@ -937,6 +937,12 @@ public class POIFSFileSystem extends BlockStore return _header; } + @Override + protected void releaseBuffer(ByteBuffer buffer) { + if (_data instanceof FileBackedDataSource) { + ((FileBackedDataSource)_data).releaseBuffer(buffer); + } + } private static void sanityCheckBlockCount(int block_count) throws IOException { if (block_count <= 0) { diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSMiniStore.java b/src/java/org/apache/poi/poifs/filesystem/POIFSMiniStore.java index 5bd83cbdf5..069ebc0292 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSMiniStore.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSMiniStore.java @@ -37,11 +37,11 @@ import org.apache.poi.poifs.storage.HeaderBlock; */ public class POIFSMiniStore extends BlockStore { - private POIFSFileSystem _filesystem; + private final POIFSFileSystem _filesystem; private POIFSStream _mini_stream; - private List<BATBlock> _sbat_blocks; - private HeaderBlock _header; - private RootProperty _root; + private final List<BATBlock> _sbat_blocks; + private final HeaderBlock _header; + private final RootProperty _root; POIFSMiniStore(POIFSFileSystem filesystem, RootProperty root, List<BATBlock> sbats, HeaderBlock header) @@ -93,7 +93,7 @@ public class POIFSMiniStore extends BlockStore if (! firstInStore) { try { return getBlockAt(offset); - } catch(NoSuchElementException e) {} + } catch(NoSuchElementException ignored) {} } // Need to extend the stream @@ -259,4 +259,9 @@ public class POIFSMiniStore extends BlockStore // RootProperty.setSize does the sbat -> bytes conversion for us _filesystem._get_property_table().getRoot().setSize(blocksUsed); } + + @Override + protected void releaseBuffer(ByteBuffer buffer) { + _filesystem.releaseBuffer(buffer); + } } diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSStream.java b/src/java/org/apache/poi/poifs/filesystem/POIFSStream.java index 87bd12092d..356b78c9f7 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSStream.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSStream.java @@ -47,7 +47,7 @@ import org.apache.poi.poifs.storage.HeaderBlock; public class POIFSStream implements Iterable<ByteBuffer> { - private BlockStore blockStore; + private final BlockStore blockStore; private int startBlock; private OutputStream outStream; @@ -140,8 +140,8 @@ public class POIFSStream implements Iterable<ByteBuffer> /** * Class that handles a streaming read of one stream */ - protected class StreamBlockByteBufferIterator implements Iterator<ByteBuffer> { - private ChainLoopDetector loopDetector; + private class StreamBlockByteBufferIterator implements Iterator<ByteBuffer> { + private final ChainLoopDetector loopDetector; private int nextBlock; StreamBlockByteBufferIterator(int firstBlock) { @@ -221,6 +221,9 @@ public class POIFSStream implements Iterable<ByteBuffer> nextBlock = blockStore.getNextBlock(thisBlock); } + if (buffer != null) { + blockStore.releaseBuffer(buffer); + } buffer = blockStore.createBlockIfNeeded(thisBlock); // Update pointers diff --git a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java index ee08de13d7..5b77db4d92 100644 --- a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java +++ b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java @@ -17,10 +17,6 @@ package org.apache.poi.poifs.nio; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -30,151 +26,160 @@ import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; -import java.util.List; +import java.util.IdentityHashMap; + +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; /** * A POIFS {@link DataSource} backed by a File */ public class FileBackedDataSource extends DataSource { - private final static POILogger logger = POILogFactory.getLogger( FileBackedDataSource.class ); - - private FileChannel channel; - private boolean writable; - // remember file base, which needs to be closed too - private RandomAccessFile srcFile; - - // Buffers which map to a file-portion are not closed automatically when the Channel is closed - // therefore we need to keep the list of mapped buffers and do some ugly reflection to try to - // clean the buffer during close(). - // See https://bz.apache.org/bugzilla/show_bug.cgi?id=58480, - // http://stackoverflow.com/questions/3602783/file-access-synchronized-on-java-object and - // http://bugs.java.com/view_bug.do?bug_id=4724038 for related discussions - private List<ByteBuffer> buffersToClean = new ArrayList<>(); - - public FileBackedDataSource(File file) throws FileNotFoundException { - this(newSrcFile(file, "r"), true); - } - - public FileBackedDataSource(File file, boolean readOnly) throws FileNotFoundException { - this(newSrcFile(file, readOnly ? "r" : "rw"), readOnly); - } - - public FileBackedDataSource(RandomAccessFile srcFile, boolean readOnly) { - this(srcFile.getChannel(), readOnly); - this.srcFile = srcFile; - } - - public FileBackedDataSource(FileChannel channel, boolean readOnly) { - this.channel = channel; - this.writable = !readOnly; - } - - public boolean isWriteable() { - return this.writable; - } - - public FileChannel getChannel() { - return this.channel; - } - - @Override - public ByteBuffer read(int length, long position) throws IOException { - if(position >= size()) { - throw new IndexOutOfBoundsException("Position " + position + " past the end of the file"); - } - - // TODO Could we do the read-only case with MapMode.PRIVATE instead? - // See https://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.MapMode.html#PRIVATE - // Or should we have 3 modes instead of the current boolean - - // read-write, read-only, read-to-write-elsewhere? - - // Do we read or map (for read/write)? - ByteBuffer dst; - if (writable) { - dst = channel.map(FileChannel.MapMode.READ_WRITE, position, length); - - // remember this buffer for cleanup - buffersToClean.add(dst); - } else { - // allocate the buffer on the heap if we cannot map the data in directly - channel.position(position); - dst = ByteBuffer.allocate(length); - - // Read the contents and check that we could read some data - int worked = IOUtils.readFully(channel, dst); - if(worked == -1) { - throw new IndexOutOfBoundsException("Position " + position + " past the end of the file"); - } - } - - // make it ready for reading - dst.position(0); - - // All done - return dst; - } - - @Override - public void write(ByteBuffer src, long position) throws IOException { - channel.write(src, position); - } - - @Override - public void copyTo(OutputStream stream) throws IOException { - // Wrap the OutputSteam as a channel - try (WritableByteChannel out = Channels.newChannel(stream)) { - // Now do the transfer - channel.transferTo(0, channel.size(), out); - } - } - - @Override - public long size() throws IOException { - return channel.size(); - } - - @Override - public void close() throws IOException { - // also ensure that all buffers are unmapped so we do not keep files locked on Windows - // We consider it a bug if a Buffer is still in use now! - for(ByteBuffer buffer : buffersToClean) { - unmap(buffer); - } - buffersToClean.clear(); - - if (srcFile != null) { - // see http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4796385 - srcFile.close(); - } else { - channel.close(); - } - } - - private static RandomAccessFile newSrcFile(File file, String mode) throws FileNotFoundException { - if(!file.exists()) { - throw new FileNotFoundException(file.toString()); + private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class); + + private final FileChannel channel; + private final boolean writable; + // remember file base, which needs to be closed too + private RandomAccessFile srcFile; + + // Buffers which map to a file-portion are not closed automatically when the Channel is closed + // therefore we need to keep the list of mapped buffers and do some ugly reflection to try to + // clean the buffer during close(). + // See https://bz.apache.org/bugzilla/show_bug.cgi?id=58480, + // http://stackoverflow.com/questions/3602783/file-access-synchronized-on-java-object and + // http://bugs.java.com/view_bug.do?bug_id=4724038 for related discussions + // https://stackoverflow.com/questions/36077641/java-when-does-direct-buffer-released + private final IdentityHashMap<ByteBuffer,ByteBuffer> buffersToClean = new IdentityHashMap<>(); + + public FileBackedDataSource(File file) throws FileNotFoundException { + this(newSrcFile(file, "r"), true); + } + + public FileBackedDataSource(File file, boolean readOnly) throws FileNotFoundException { + this(newSrcFile(file, readOnly ? "r" : "rw"), readOnly); + } + + public FileBackedDataSource(RandomAccessFile srcFile, boolean readOnly) { + this(srcFile.getChannel(), readOnly); + this.srcFile = srcFile; + } + + public FileBackedDataSource(FileChannel channel, boolean readOnly) { + this.channel = channel; + this.writable = !readOnly; + } + + public boolean isWriteable() { + return this.writable; + } + + public FileChannel getChannel() { + return this.channel; + } + + @Override + public ByteBuffer read(int length, long position) throws IOException { + if (position >= size()) { + throw new IndexOutOfBoundsException("Position " + position + " past the end of the file"); + } + + // TODO Could we do the read-only case with MapMode.PRIVATE instead? + // See https://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.MapMode.html#PRIVATE + // Or should we have 3 modes instead of the current boolean - + // read-write, read-only, read-to-write-elsewhere? + + // Do we read or map (for read/write)? + ByteBuffer dst; + if (writable) { + dst = channel.map(FileChannel.MapMode.READ_WRITE, position, length); + + // remember this buffer for cleanup + buffersToClean.put(dst,dst); + } else { + // allocate the buffer on the heap if we cannot map the data in directly + channel.position(position); + dst = ByteBuffer.allocate(length); + + // Read the contents and check that we could read some data + int worked = IOUtils.readFully(channel, dst); + if (worked == -1) { + throw new IndexOutOfBoundsException("Position " + position + " past the end of the file"); + } + } + + // make it ready for reading + dst.position(0); + + // All done + return dst; + } + + @Override + public void write(ByteBuffer src, long position) throws IOException { + channel.write(src, position); + } + + @Override + public void copyTo(OutputStream stream) throws IOException { + // Wrap the OutputSteam as a channel + try (WritableByteChannel out = Channels.newChannel(stream)) { + // Now do the transfer + channel.transferTo(0, channel.size(), out); + } + } + + @Override + public long size() throws IOException { + return channel.size(); + } + + public void releaseBuffer(ByteBuffer buffer) { + ByteBuffer previous = buffersToClean.remove(buffer); + if (previous != null) { + unmap(previous); + } + } + + @Override + public void close() throws IOException { + // also ensure that all buffers are unmapped so we do not keep files locked on Windows + // We consider it a bug if a Buffer is still in use now! + buffersToClean.forEach((k,v) -> unmap(v)); + buffersToClean.clear(); + + if (srcFile != null) { + // see http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4796385 + srcFile.close(); + } else { + channel.close(); + } + } + + private static RandomAccessFile newSrcFile(File file, String mode) throws FileNotFoundException { + if (!file.exists()) { + throw new FileNotFoundException(file.toString()); } return new RandomAccessFile(file, mode); - } - - // need to use reflection to avoid depending on the sun.nio internal API - // unfortunately this might break silently with newer/other Java implementations, - // but we at least have unit-tests which will indicate this when run on Windows - private static void unmap(final ByteBuffer buffer) { - // not necessary for HeapByteBuffer, avoid lots of log-output on this class - if(buffer.getClass().getName().endsWith("HeapByteBuffer")) { - return; - } - - if (CleanerUtil.UNMAP_SUPPORTED) { - try { - CleanerUtil.getCleaner().freeBuffer(buffer); - } catch (IOException e) { - logger.log(POILogger.WARN, "Failed to unmap the buffer", e); - } - } else { - logger.log(POILogger.DEBUG, CleanerUtil.UNMAP_NOT_SUPPORTED_REASON); - } - } + } + + // need to use reflection to avoid depending on the sun.nio internal API + // unfortunately this might break silently with newer/other Java implementations, + // but we at least have unit-tests which will indicate this when run on Windows + private static void unmap(final ByteBuffer buffer) { + // not necessary for HeapByteBuffer, avoid lots of log-output on this class + if (buffer.getClass().getName().endsWith("HeapByteBuffer")) { + return; + } + + if (CleanerUtil.UNMAP_SUPPORTED) { + try { + CleanerUtil.getCleaner().freeBuffer(buffer); + } catch (IOException e) { + logger.log(POILogger.WARN, "Failed to unmap the buffer", e); + } + } else { + logger.log(POILogger.DEBUG, CleanerUtil.UNMAP_NOT_SUPPORTED_REASON); + } + } } |