From c0e28795c1b2400f90dd6a6dd3687674c694aa4b Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sat, 18 Dec 2010 10:18:43 +0000 Subject: [PATCH] Start on lower memory POIFS implementation - data source to provide common access to array of bytes and files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1050607 13f79535-47bb-0310-9956-ffa450edef68 --- .../poifs/nio/ByteArrayBackedDataSource.java | 83 +++++++++++++++++++ .../org/apache/poi/poifs/nio/DataSource.java | 31 +++++++ .../poi/poifs/nio/FileBackedDataSource.java | 48 +++++++++++ .../poi/poifs/storage/HeaderBlockReader.java | 68 +++++++++------ src/java/org/apache/poi/util/IOUtils.java | 25 ++++++ .../apache/poi/poifs/nio/TestDataSource.java | 38 +++++++++ 6 files changed, 268 insertions(+), 25 deletions(-) create mode 100644 src/java/org/apache/poi/poifs/nio/ByteArrayBackedDataSource.java create mode 100644 src/java/org/apache/poi/poifs/nio/DataSource.java create mode 100644 src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java create mode 100644 src/testcases/org/apache/poi/poifs/nio/TestDataSource.java diff --git a/src/java/org/apache/poi/poifs/nio/ByteArrayBackedDataSource.java b/src/java/org/apache/poi/poifs/nio/ByteArrayBackedDataSource.java new file mode 100644 index 0000000000..8fbb3ce102 --- /dev/null +++ b/src/java/org/apache/poi/poifs/nio/ByteArrayBackedDataSource.java @@ -0,0 +1,83 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.nio; + +import java.nio.ByteBuffer; + +/** + * A POIFS {@link DataSource} backed by a byte array. + */ +public class ByteArrayBackedDataSource extends DataSource { + private byte[] buffer; + private long size; + + public ByteArrayBackedDataSource(byte[] data) { + this.buffer = data; + this.size = data.length; + } + + public void read(ByteBuffer dst, long position) { + if(position + dst.capacity() > size) { + throw new IndexOutOfBoundsException( + "Unable to read " + dst.capacity() + " bytes from " + + position + " in stream of length " + size + ); + } + dst.put(buffer, (int)position, dst.capacity()); + } + + public void write(ByteBuffer src, long position) { + // Extend if needed + long endPosition = position + src.capacity(); + if(endPosition > buffer.length) { + extend(endPosition); + } + + // Now copy + src.get(buffer, (int)position, src.capacity()); + + // Update size if needed + if(endPosition > size) { + size = endPosition; + } + } + + private void extend(long length) { + // Consider extending by a bit more than requested + long difference = length - buffer.length; + if(difference < buffer.length*0.25) { + difference = (long)(buffer.length*0.25); + } + if(difference < 4096) { + difference = 4096; + } + + byte[] nb = new byte[(int)(difference+buffer.length)]; + System.arraycopy(buffer, 0, nb, 0, (int)size); + buffer = nb; + } + + public long size() { + return size; + } + + public void close() { + buffer = null; + size = -1; + } +} diff --git a/src/java/org/apache/poi/poifs/nio/DataSource.java b/src/java/org/apache/poi/poifs/nio/DataSource.java new file mode 100644 index 0000000000..1264b0922d --- /dev/null +++ b/src/java/org/apache/poi/poifs/nio/DataSource.java @@ -0,0 +1,31 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.nio; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Common definition of how we read and write bytes + */ +public abstract class DataSource { + abstract void read(ByteBuffer dst, long position) throws IOException; + abstract void write(ByteBuffer src, long position) throws IOException; + abstract long size() throws IOException; + abstract void close() throws IOException; +} diff --git a/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java new file mode 100644 index 0000000000..7f5e8e6354 --- /dev/null +++ b/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java @@ -0,0 +1,48 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.nio; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; + +/** + * A POIFS {@link DataSource} backed by a File + */ +public class FileBackedDataSource extends DataSource { + private FileChannel file; + public FileBackedDataSource(FileChannel file) { + this.file = file; + } + + public void read(ByteBuffer dst, long position) throws IOException { + file.read(dst, position); + } + + public void write(ByteBuffer src, long position) throws IOException { + file.write(src, position); + } + + public long size() throws IOException { + return file.size(); + } + + public void close() throws IOException { + file.close(); + } +} diff --git a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java index 46e6e57f7f..3569dfc243 100644 --- a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java +++ b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java @@ -30,6 +30,7 @@ import static org.apache.poi.poifs.storage.HeaderBlockConstants._xbat_start_offs import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSConstants; @@ -83,6 +84,10 @@ public final class HeaderBlockReader { * (Number of DIFAT Sectors in Microsoft parlance) */ private final int _xbat_count; + + /** + * The data + */ private final byte[] _data; /** @@ -93,26 +98,36 @@ public final class HeaderBlockReader { * @exception IOException on errors or bad data */ public HeaderBlockReader(InputStream stream) throws IOException { - // At this point, we don't know how big our - // block sizes are - // So, read the first 32 bytes to check, then - // read the rest of the block - byte[] blockStart = new byte[32]; - int bsCount = IOUtils.readFully(stream, blockStart); - if(bsCount != 32) { - throw alertShortRead(bsCount, 32); + // Grab the first 512 bytes + // (For 4096 sized blocks, the remaining 3584 bytes are zero) + // Then, process the contents + this(readFirst512(stream)); + + // Fetch the rest of the block if needed + if(bigBlockSize.getBigBlockSize() != 512) { + int rest = bigBlockSize.getBigBlockSize() - 512; + byte[] tmp = new byte[rest]; + IOUtils.readFully(stream, tmp); } - + } + + public HeaderBlockReader(ByteBuffer buffer) throws IOException { + this(buffer.array()); + } + + private HeaderBlockReader(byte[] data) throws IOException { + this._data = data; + // verify signature - long signature = LittleEndian.getLong(blockStart, _signature_offset); + long signature = LittleEndian.getLong(_data, _signature_offset); if (signature != _signature) { // Is it one of the usual suspects? byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER; - if(blockStart[0] == OOXML_FILE_HEADER[0] && - blockStart[1] == OOXML_FILE_HEADER[1] && - blockStart[2] == OOXML_FILE_HEADER[2] && - blockStart[3] == OOXML_FILE_HEADER[3]) { + if(_data[0] == OOXML_FILE_HEADER[0] && + _data[1] == OOXML_FILE_HEADER[1] && + _data[2] == OOXML_FILE_HEADER[2] && + _data[3] == OOXML_FILE_HEADER[3]) { throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); } if ((signature & 0xFF8FFFFFFFFFFFFFL) == 0x0010000200040009L) { @@ -129,22 +144,14 @@ public final class HeaderBlockReader { // Figure out our block size - switch (blockStart[30]) { + switch (_data[30]) { case 12: bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; break; case 9: bigBlockSize = POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS; break; default: throw new IOException("Unsupported blocksize (2^" - + blockStart[30] + "). Expected 2^9 or 2^12."); - } - _data = new byte[ bigBlockSize.getBigBlockSize() ]; - System.arraycopy(blockStart, 0, _data, 0, blockStart.length); - - // Now we can read the rest of our header - int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length); - if (byte_count+bsCount != bigBlockSize.getBigBlockSize()) { - throw alertShortRead(byte_count, bigBlockSize.getBigBlockSize()); + + _data[30] + "). Expected 2^9 or 2^12."); } _bat_count = getInt(_bat_count_offset, _data); @@ -154,6 +161,17 @@ public final class HeaderBlockReader { _xbat_start = getInt(_xbat_start_offset, _data); _xbat_count = getInt(_xbat_count_offset, _data); } + + private static byte[] readFirst512(InputStream stream) throws IOException { + // Grab the first 512 bytes + // (For 4096 sized blocks, the remaining 3584 bytes are zero) + byte[] data = new byte[512]; + int bsCount = IOUtils.readFully(stream, data); + if(bsCount != 512) { + throw alertShortRead(bsCount, 512); + } + return data; + } private static int getInt(int offset, byte[] data) { return LittleEndian.getInt(data, offset); @@ -216,7 +234,7 @@ public final class HeaderBlockReader { for (int j = 0; j < _max_bats_in_header; j++) { result[ j ] = LittleEndian.getInt(_data, offset); - offset += LittleEndianConsts.INT_SIZE; + offset += LittleEndianConsts.INT_SIZE; } return result; } diff --git a/src/java/org/apache/poi/util/IOUtils.java b/src/java/org/apache/poi/util/IOUtils.java index a4bf7b0aea..4428c9c544 100644 --- a/src/java/org/apache/poi/util/IOUtils.java +++ b/src/java/org/apache/poi/util/IOUtils.java @@ -21,6 +21,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.ReadableByteChannel; public final class IOUtils { private IOUtils() { @@ -74,6 +76,29 @@ public final class IOUtils { } } } + + /** + * Same as the normal channel.read(b), but tries to ensure + * that the entire len number of bytes is read. + *

+ * If the end of file is reached before any bytes are read, returns -1. If + * the end of the file is reached after some bytes are read, returns the + * number of bytes read. If the end of the file isn't reached before len + * bytes have been read, will return len bytes. + */ + public static int readFully(ReadableByteChannel channel, ByteBuffer b) throws IOException { + int total = 0; + while (true) { + int got = channel.read(b); + if (got < 0) { + return (total == 0) ? -1 : total; + } + total += got; + if (total == b.capacity()) { + return total; + } + } + } /** * Copies all the data from the given InputStream to the OutputStream. It diff --git a/src/testcases/org/apache/poi/poifs/nio/TestDataSource.java b/src/testcases/org/apache/poi/poifs/nio/TestDataSource.java new file mode 100644 index 0000000000..df039ee1da --- /dev/null +++ b/src/testcases/org/apache/poi/poifs/nio/TestDataSource.java @@ -0,0 +1,38 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.poifs.nio; + +import java.io.IOException; + +import junit.framework.TestCase; + +/** + * Tests for the datasource implementations + */ +public class TestDataSource extends TestCase +{ + public void testFile() throws IOException { + // TODO + } + + public void testByteArray() throws IOException { + // TODO + } +} -- 2.39.5