aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2020-05-16 13:06:07 +0000
committerDominik Stadler <centic@apache.org>2020-05-16 13:06:07 +0000
commit3cad9e685173bd33e13bd2407f0d8f6cfe0c5522 (patch)
tree5d0e97314f1457559b6a609ccc72d4be3106ff1e
parentff919eb0e4b25a7ba9a97ad6c8fdc64a381a96b0 (diff)
downloadpoi-3cad9e685173bd33e13bd2407f0d8f6cfe0c5522.tar.gz
poi-3cad9e685173bd33e13bd2407f0d8f6cfe0c5522.zip
Bug 64322: Optimize performance of reading ole2 files
Remember channel-size to no re-read it for every read-access, but reset it if we extend the size of the file profiling indicates Channel.size() sometimes has similar runtime overhead as position() or read()! git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1877816 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java18
-rw-r--r--src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java91
-rw-r--r--test-data/poifs/64322.ole2bin0 -> 3867136 bytes
3 files changed, 96 insertions, 13 deletions
diff --git a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java
index 5b77db4d92..2d4af278c1 100644
--- a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java
+++ b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java
@@ -39,6 +39,8 @@ public class FileBackedDataSource extends DataSource {
private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class);
private final FileChannel channel;
+ private Long channelSize;
+
private final boolean writable;
// remember file base, which needs to be closed too
private RandomAccessFile srcFile;
@@ -97,8 +99,9 @@ public class FileBackedDataSource extends DataSource {
// remember this buffer for cleanup
buffersToClean.put(dst,dst);
} else {
- // allocate the buffer on the heap if we cannot map the data in directly
channel.position(position);
+
+ // allocate the buffer on the heap if we cannot map the data in directly
dst = ByteBuffer.allocate(length);
// Read the contents and check that we could read some data
@@ -118,6 +121,11 @@ public class FileBackedDataSource extends DataSource {
@Override
public void write(ByteBuffer src, long position) throws IOException {
channel.write(src, position);
+
+ // we have to re-read size if we write "after" the recorded one
+ if(channelSize != null && position >= channelSize) {
+ channelSize = null;
+ }
}
@Override
@@ -131,7 +139,13 @@ public class FileBackedDataSource extends DataSource {
@Override
public long size() throws IOException {
- return channel.size();
+ // this is called often and profiling showed that channel.size()
+ // was taking a large part of processing-time, so we only read it
+ // once
+ if(channelSize == null) {
+ channelSize = channel.size();
+ }
+ return channelSize;
}
public void releaseBuffer(ByteBuffer buffer) {
diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java b/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java
index 1fffceafeb..5153cb37e1 100644
--- a/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java
+++ b/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java
@@ -24,11 +24,18 @@ import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.util.HashMap;
import org.apache.poi.POIDataSamples;
+import org.apache.poi.hpsf.NoPropertySetStreamException;
+import org.apache.poi.hpsf.Property;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.Section;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants;
@@ -177,7 +184,7 @@ public final class TestPOIFSFileSystem {
fail("File is corrupt and shouldn't have been opened");
}
}
-
+
/**
* Tests that we can write and read a file that contains XBATs
* as well as regular BATs.
@@ -191,19 +198,19 @@ public final class TestPOIFSFileSystem {
fs.getRoot().createDocument(
"BIG", new ByteArrayInputStream(hugeStream)
);
-
+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
fs.writeFilesystem(baos);
byte[] fsData = baos.toByteArray();
-
-
+
+
// Check the header was written properly
- InputStream inp = new ByteArrayInputStream(fsData);
+ InputStream inp = new ByteArrayInputStream(fsData);
HeaderBlock header = new HeaderBlock(inp);
assertEquals(109+21, header.getBATCount());
assertEquals(1, header.getXBATCount());
-
-
+
+
// We should have 21 BATs in the XBAT
ByteBuffer xbatData = ByteBuffer.allocate(512);
xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
@@ -216,19 +223,19 @@ public final class TestPOIFSFileSystem {
assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
}
assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
-
-
+
+
// Now load it and check
fs = new POIFSFileSystem(
new ByteArrayInputStream(fsData)
);
-
+
DirectoryNode root = fs.getRoot();
assertEquals(1, root.getEntryCount());
DocumentNode big = (DocumentNode)root.getEntry("BIG");
assertEquals(hugeStream.length, big.getSize());
}
-
+
/**
* Most OLE2 files use 512byte blocks. However, a small number
* use 4k blocks. Check that we can open these.
@@ -293,4 +300,66 @@ public final class TestPOIFSFileSystem {
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8)));
}
+
+ @Test
+ public void test64322() throws NoPropertySetStreamException, IOException {
+ try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.getFile("64322.ole2"))) {
+ int count = recurseDir(poiFS.getRoot());
+
+ assertEquals("Expecting a fixed number of entries being found in the test-document",
+ 1285, count);
+ }
+ }
+
+ @Test
+ public void test64322a() throws NoPropertySetStreamException, IOException {
+ try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.openResourceAsStream("64322.ole2"))) {
+ int count = recurseDir(poiFS.getRoot());
+
+ assertEquals("Expecting a fixed number of entries being found in the test-document",
+ 1285, count);
+ }
+ }
+
+ private static int recurseDir(DirectoryEntry dir) throws IOException, NoPropertySetStreamException {
+ int count = 0;
+ for (Entry entry : dir) {
+ count++;
+ if (entry instanceof DirectoryEntry) {
+ count += recurseDir((DirectoryEntry) entry);
+ }
+ if (entry instanceof DocumentEntry) {
+ DocumentEntry de = (DocumentEntry) entry;
+ HashMap<String, String> props = new HashMap<>();
+ try (DocumentInputStream dis = new DocumentInputStream(de)) {
+ props.put("name", de.getName());
+
+ if (PropertySet.isPropertySetStream(dis)) {
+ dis.mark(10000000);
+ PropertySet ps = null;
+ try {
+ ps = new PropertySet(dis);
+
+ } catch (UnsupportedEncodingException e) {
+ // ignore
+ }
+ if (ps != null) {
+ for (Section section : ps.getSections()) {
+ for (Property p : section.getProperties()) {
+ String prop = section.getDictionary() != null
+ ? section.getDictionary().get(p.getID())
+ : String.valueOf(p.getID());
+ if (p.getValue() != null)
+ props.put("property_" + prop, p.getValue().toString());
+ }
+ }
+ }
+ dis.reset();
+ }
+ }
+ assertTrue(props.size() > 0);
+ }
+ }
+ return count;
+ }
}
diff --git a/test-data/poifs/64322.ole2 b/test-data/poifs/64322.ole2
new file mode 100644
index 0000000000..abd421124f
--- /dev/null
+++ b/test-data/poifs/64322.ole2
Binary files differ