diff options
author | Dominik Stadler <centic@apache.org> | 2020-05-16 13:06:07 +0000 |
---|---|---|
committer | Dominik Stadler <centic@apache.org> | 2020-05-16 13:06:07 +0000 |
commit | 3cad9e685173bd33e13bd2407f0d8f6cfe0c5522 (patch) | |
tree | 5d0e97314f1457559b6a609ccc72d4be3106ff1e | |
parent | ff919eb0e4b25a7ba9a97ad6c8fdc64a381a96b0 (diff) | |
download | poi-3cad9e685173bd33e13bd2407f0d8f6cfe0c5522.tar.gz poi-3cad9e685173bd33e13bd2407f0d8f6cfe0c5522.zip |
Bug 64322: Optimize performance of reading ole2 files
Remember channel-size to no re-read it for every read-access,
but reset it if we extend the size of the file
profiling indicates Channel.size() sometimes has similar runtime
overhead as position() or read()!
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1877816 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java | 18 | ||||
-rw-r--r-- | src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java | 91 | ||||
-rw-r--r-- | test-data/poifs/64322.ole2 | bin | 0 -> 3867136 bytes |
3 files changed, 96 insertions, 13 deletions
diff --git a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java index 5b77db4d92..2d4af278c1 100644 --- a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java +++ b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java @@ -39,6 +39,8 @@ public class FileBackedDataSource extends DataSource { private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class); private final FileChannel channel; + private Long channelSize; + private final boolean writable; // remember file base, which needs to be closed too private RandomAccessFile srcFile; @@ -97,8 +99,9 @@ public class FileBackedDataSource extends DataSource { // remember this buffer for cleanup buffersToClean.put(dst,dst); } else { - // allocate the buffer on the heap if we cannot map the data in directly channel.position(position); + + // allocate the buffer on the heap if we cannot map the data in directly dst = ByteBuffer.allocate(length); // Read the contents and check that we could read some data @@ -118,6 +121,11 @@ public class FileBackedDataSource extends DataSource { @Override public void write(ByteBuffer src, long position) throws IOException { channel.write(src, position); + + // we have to re-read size if we write "after" the recorded one + if(channelSize != null && position >= channelSize) { + channelSize = null; + } } @Override @@ -131,7 +139,13 @@ public class FileBackedDataSource extends DataSource { @Override public long size() throws IOException { - return channel.size(); + // this is called often and profiling showed that channel.size() + // was taking a large part of processing-time, so we only read it + // once + if(channelSize == null) { + channelSize = channel.size(); + } + return channelSize; } public void releaseBuffer(ByteBuffer buffer) { diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java b/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java index 1fffceafeb..5153cb37e1 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java @@ -24,11 +24,18 @@ import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; +import java.util.HashMap; import org.apache.poi.POIDataSamples; +import org.apache.poi.hpsf.NoPropertySetStreamException; +import org.apache.poi.hpsf.Property; +import org.apache.poi.hpsf.PropertySet; +import org.apache.poi.hpsf.Section; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSConstants; @@ -177,7 +184,7 @@ public final class TestPOIFSFileSystem { fail("File is corrupt and shouldn't have been opened"); } } - + /** * Tests that we can write and read a file that contains XBATs * as well as regular BATs. @@ -191,19 +198,19 @@ public final class TestPOIFSFileSystem { fs.getRoot().createDocument( "BIG", new ByteArrayInputStream(hugeStream) ); - + ByteArrayOutputStream baos = new ByteArrayOutputStream(); fs.writeFilesystem(baos); byte[] fsData = baos.toByteArray(); - - + + // Check the header was written properly - InputStream inp = new ByteArrayInputStream(fsData); + InputStream inp = new ByteArrayInputStream(fsData); HeaderBlock header = new HeaderBlock(inp); assertEquals(109+21, header.getBATCount()); assertEquals(1, header.getXBATCount()); - - + + // We should have 21 BATs in the XBAT ByteBuffer xbatData = ByteBuffer.allocate(512); xbatData.put(fsData, (1+header.getXBATIndex())*512, 512); @@ -216,19 +223,19 @@ public final class TestPOIFSFileSystem { assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i)); } assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127)); - - + + // Now load it and check fs = new POIFSFileSystem( new ByteArrayInputStream(fsData) ); - + DirectoryNode root = fs.getRoot(); assertEquals(1, root.getEntryCount()); DocumentNode big = (DocumentNode)root.getEntry("BIG"); assertEquals(hugeStream.length, big.getSize()); } - + /** * Most OLE2 files use 512byte blocks. However, a small number * use 4k blocks. Check that we can open these. @@ -293,4 +300,66 @@ public final class TestPOIFSFileSystem { assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8))); } + + @Test + public void test64322() throws NoPropertySetStreamException, IOException { + try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.getFile("64322.ole2"))) { + int count = recurseDir(poiFS.getRoot()); + + assertEquals("Expecting a fixed number of entries being found in the test-document", + 1285, count); + } + } + + @Test + public void test64322a() throws NoPropertySetStreamException, IOException { + try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.openResourceAsStream("64322.ole2"))) { + int count = recurseDir(poiFS.getRoot()); + + assertEquals("Expecting a fixed number of entries being found in the test-document", + 1285, count); + } + } + + private static int recurseDir(DirectoryEntry dir) throws IOException, NoPropertySetStreamException { + int count = 0; + for (Entry entry : dir) { + count++; + if (entry instanceof DirectoryEntry) { + count += recurseDir((DirectoryEntry) entry); + } + if (entry instanceof DocumentEntry) { + DocumentEntry de = (DocumentEntry) entry; + HashMap<String, String> props = new HashMap<>(); + try (DocumentInputStream dis = new DocumentInputStream(de)) { + props.put("name", de.getName()); + + if (PropertySet.isPropertySetStream(dis)) { + dis.mark(10000000); + PropertySet ps = null; + try { + ps = new PropertySet(dis); + + } catch (UnsupportedEncodingException e) { + // ignore + } + if (ps != null) { + for (Section section : ps.getSections()) { + for (Property p : section.getProperties()) { + String prop = section.getDictionary() != null + ? section.getDictionary().get(p.getID()) + : String.valueOf(p.getID()); + if (p.getValue() != null) + props.put("property_" + prop, p.getValue().toString()); + } + } + } + dis.reset(); + } + } + assertTrue(props.size() > 0); + } + } + return count; + } } diff --git a/test-data/poifs/64322.ole2 b/test-data/poifs/64322.ole2 Binary files differnew file mode 100644 index 0000000000..abd421124f --- /dev/null +++ b/test-data/poifs/64322.ole2 |