<changes>
<release version="3.6-beta1" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">48085 - improved error checking in BlockAllocationTableReader to trap unreasonable field values</action>
<action dev="POI-DEVELOPERS" type="fix">47924 - fixed logic for matching cells and comments in HSSFCell.getCellComment()</action>
<action dev="POI-DEVELOPERS" type="add">47942 - added implementation of protection features to XLSX and DOCX files</action>
<action dev="POI-DEVELOPERS" type="fix">48070 - preserve leading and trailing white spaces in XSSFRichTextString</action>
* @author Marc Johnson (mjohnson at apache dot org)
*/
public final class BlockAllocationTableReader {
- private IntList _entries;
+
+ /**
+ * Maximum number size (in blocks) of the allocation table as supported by
+ * POI.<br/>
+ *
+ * This constant has been chosen to help POI identify corrupted data in the
+ * header block (rather than crash immediately with {@link OutOfMemoryError}
+ * ). It's not clear if the compound document format actually specifies any
+ * upper limits. For files with 512 byte blocks, having an allocation table
+ * of 65,335 blocks would correspond to a total file size of 4GB. Needless
+ * to say, POI probably cannot handle files anywhere near that size.
+ */
+ private static final int MAX_BLOCK_COUNT = 65535;
+ private final IntList _entries;
/**
* create a BlockAllocationTableReader for an existing filesystem. Side
* @exception IOException if, in trying to create the table, we
* encounter logic errors
*/
-
- public BlockAllocationTableReader(final int block_count,
- final int [] block_array,
- final int xbat_count,
- final int xbat_index,
- final BlockList raw_block_list)
- throws IOException
- {
+ public BlockAllocationTableReader(int block_count, int [] block_array,
+ int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException {
this();
- if (block_count <= 0)
- {
+ if (block_count <= 0) {
throw new IOException(
"Illegal block count; minimum count is 1, got " + block_count
+ " instead");
}
+ if (block_count > MAX_BLOCK_COUNT) {
+ throw new IOException("Block count " + block_count
+ + " is too high. POI maximum is " + MAX_BLOCK_COUNT + ".");
+ }
+
// acquire raw data blocks containing the BAT block data
RawDataBlock blocks[] = new RawDataBlock[ block_count ];
int limit = Math.min(block_count, block_array.length);
*
* @exception IOException
*/
-
- BlockAllocationTableReader(final ListManagedBlock [] blocks,
- final BlockList raw_block_list)
- throws IOException
- {
+ BlockAllocationTableReader(ListManagedBlock[] blocks, BlockList raw_block_list)
+ throws IOException {
this();
setEntries(blocks, raw_block_list);
}
- BlockAllocationTableReader()
- {
+ BlockAllocationTableReader() {
_entries = new IntList();
}
*
* @exception IOException if there is a problem acquiring the blocks
*/
- ListManagedBlock [] fetchBlocks(final int startBlock,
- final int headerPropertiesStartBlock,
- final BlockList blockList)
- throws IOException
- {
+ ListManagedBlock[] fetchBlocks(int startBlock, int headerPropertiesStartBlock,
+ BlockList blockList) throws IOException {
List<ListManagedBlock> blocks = new ArrayList<ListManagedBlock>();
int currentBlock = startBlock;
boolean firstPass = true;
// Sometimes we have data, header, end
// For those cases, stop at the header, not the end
while (currentBlock != POIFSConstants.END_OF_CHAIN) {
- try {
- // Grab the data at the current block offset
- dataBlock = blockList.remove(currentBlock);
- blocks.add(dataBlock);
- // Now figure out which block we go to next
+ try {
+ // Grab the data at the current block offset
+ dataBlock = blockList.remove(currentBlock);
+ blocks.add(dataBlock);
+ // Now figure out which block we go to next
currentBlock = _entries.get(currentBlock);
firstPass = false;
- } catch(IOException e) {
- if(currentBlock == headerPropertiesStartBlock) {
- // Special case where things are in the wrong order
- System.err.println("Warning, header block comes after data blocks in POIFS block listing");
- currentBlock = POIFSConstants.END_OF_CHAIN;
- } else if(currentBlock == 0 && firstPass) {
- // Special case where the termination isn't done right
- // on an empty set
- System.err.println("Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)");
- currentBlock = POIFSConstants.END_OF_CHAIN;
- } else {
- // Ripple up
- throw e;
- }
- }
+ } catch(IOException e) {
+ if(currentBlock == headerPropertiesStartBlock) {
+ // Special case where things are in the wrong order
+ System.err.println("Warning, header block comes after data blocks in POIFS block listing");
+ currentBlock = POIFSConstants.END_OF_CHAIN;
+ } else if(currentBlock == 0 && firstPass) {
+ // Special case where the termination isn't done right
+ // on an empty set
+ System.err.println("Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)");
+ currentBlock = POIFSConstants.END_OF_CHAIN;
+ } else {
+ // Ripple up
+ throw e;
+ }
+ }
}
return blocks.toArray(new ListManagedBlock[blocks.size()]);
*
* @return true if the specific block is used, else false
*/
- boolean isUsed(final int index)
- {
- boolean rval = false;
+ boolean isUsed(int index) {
- try
- {
- rval = _entries.get(index) != -1;
+ try {
+ return _entries.get(index) != -1;
} catch (IndexOutOfBoundsException e) {
// ignored
+ return false;
}
- return rval;
}
/**
*
* @exception IOException if the current block is unused
*/
- int getNextBlockIndex(final int index)
- throws IOException
- {
- if (isUsed(index))
- {
+ int getNextBlockIndex(int index) throws IOException {
+ if (isUsed(index)) {
return _entries.get(index);
}
throw new IOException("index " + index + " is unused");
* @param raw_blocks the list of blocks being managed. Unused
* blocks will be eliminated from the list
*/
- private void setEntries(final ListManagedBlock [] blocks,
- final BlockList raw_blocks)
- throws IOException
- {
+ private void setEntries(ListManagedBlock[] blocks, BlockList raw_blocks) throws IOException {
int limit = BATBlock.entriesPerBlock();
for (int block_index = 0; block_index < blocks.length; block_index++)
* What big block size the file uses. Most files
* use 512 bytes, but a few use 4096
*/
- private int bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE;
+ private final int bigBlockSize;
/** number of big block allocation table blocks (int) */
- private int _bat_count;
+ private final int _bat_count;
/** start of the property set block (int index of the property set
* chain's first big block)
*/
- private int _property_start;
+ private final int _property_start;
/** start of the small block allocation table (int index of small
* block allocation table's first big block)
*/
- private int _sbat_start;
+ private final int _sbat_start;
/** big block index for extension to the big block allocation table */
- private int _xbat_start;
- private int _xbat_count;
- private byte[] _data;
+ private final int _xbat_start;
+ private final int _xbat_count;
+ private final byte[] _data;
/**
* create a new HeaderBlockReader from an InputStream
byte[] blockStart = new byte[32];
int bsCount = IOUtils.readFully(stream, blockStart);
if(bsCount != 32) {
- alertShortRead(bsCount);
- }
-
- // Figure out our block size
- if(blockStart[30] == 12) {
- bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE;
- }
- _data = new byte[ bigBlockSize ];
- System.arraycopy(blockStart, 0, _data, 0, blockStart.length);
-
- // Now we can read the rest of our header
- int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length);
- if (byte_count+bsCount != bigBlockSize) {
- alertShortRead(byte_count);
+ throw alertShortRead(bsCount, 32);
}
// verify signature
- long signature = LittleEndian.getLong(_data, _signature_offset);
+ long signature = LittleEndian.getLong(blockStart, _signature_offset);
if (signature != _signature) {
// Is it one of the usual suspects?
byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER;
- if(_data[0] == OOXML_FILE_HEADER[0] &&
- _data[1] == OOXML_FILE_HEADER[1] &&
- _data[2] == OOXML_FILE_HEADER[2] &&
- _data[3] == OOXML_FILE_HEADER[3]) {
+ if(blockStart[0] == OOXML_FILE_HEADER[0] &&
+ blockStart[1] == OOXML_FILE_HEADER[1] &&
+ blockStart[2] == OOXML_FILE_HEADER[2] &&
+ blockStart[3] == OOXML_FILE_HEADER[3]) {
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
}
if ((signature & 0xFF8FFFFFFFFFFFFFL) == 0x0010000200040009L) {
+ longToHex(signature) + ", expected "
+ longToHex(_signature));
}
+
+
+ // Figure out our block size
+ switch (blockStart[30]) {
+ case 12:
+ bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE; break;
+ case 9:
+ bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE; break;
+ default:
+ throw new IOException("Unsupported blocksize (2^"
+ + blockStart[30] + "). Expected 2^9 or 2^12.");
+ }
+ _data = new byte[ bigBlockSize ];
+ System.arraycopy(blockStart, 0, _data, 0, blockStart.length);
+
+ // Now we can read the rest of our header
+ int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length);
+ if (byte_count+bsCount != bigBlockSize) {
+ throw alertShortRead(byte_count, bigBlockSize);
+ }
+
_bat_count = getInt(_bat_count_offset, _data);
_property_start = getInt(_property_start_offset, _data);
_sbat_start = getInt(_sbat_start_offset, _data);
_xbat_start = getInt(_xbat_start_offset, _data);
_xbat_count = getInt(_xbat_count_offset, _data);
}
-
+
private static int getInt(int offset, byte[] data) {
return LittleEndian.getInt(data, offset);
}
return new String(HexDump.longToHex(value));
}
- private void alertShortRead(int pRead) throws IOException {
+ private static IOException alertShortRead(int pRead, int expectedReadSize) {
int read;
if (pRead < 0) {
//Can't have -1 bytes read in the error message!
}
String type = " byte" + (read == 1 ? (""): ("s"));
- throw new IOException("Unable to read entire header; "
+ return new IOException("Unable to read entire header; "
+ read + type + " read; expected "
- + bigBlockSize + " bytes");
+ + expectedReadSize + " bytes");
}
/**
public int getXBATIndex() {
return _xbat_start;
}
-
+
/**
* @return The Big Block size, normally 512 bytes, sometimes 4096 bytes
*/
return bigBlockSize;
}
}
-
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.util.HexRead;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
small_blocks.remove(j);
fail("removing block " + j + " should have failed");
} catch (IOException ignored) {
+ // expected during successful test
}
}
}
}
}
}
+
+ /**
+ * Bugzilla 48085 describes an error where a corrupted Excel file causes POI to throw an
+ * {@link OutOfMemoryError}.
+ */
+ public void testBadSectorAllocationTableSize_bug48085() {
+ int BLOCK_SIZE = 512;
+ // 512 bytes take from the start of bugzilla attachment 24444
+ byte[] initData = HexRead.readFromString(
+
+ "D0 CF 11 E0 A1 B1 1A E1 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 3E 20 03 20 FE FF 09 20" +
+ "06 20 20 20 20 20 20 20 20 20 20 20 01 20 20 20 01 20 20 20 20 20 20 20 20 10 20 20 02 20 20 20" +
+ "02 20 20 20 FE FF FF FF 20 20 20 20 20 20 20 20 "
+ );
+ // the rest of the block is 'FF'
+ byte[] data = new byte[BLOCK_SIZE];
+ Arrays.fill(data, (byte)0xFF);
+ System.arraycopy(initData, 0, data, 0, initData.length);
+
+ // similar code to POIFSFileSystem.<init>:
+ InputStream stream = new ByteArrayInputStream(data);
+ HeaderBlockReader hb;
+ RawDataBlockList dataBlocks;
+ try {
+ hb = new HeaderBlockReader(stream);
+ dataBlocks = new RawDataBlockList(stream, BLOCK_SIZE);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ try {
+ new BlockAllocationTableReader(hb.getBATCount(), hb.getBATArray(), hb.getXBATCount(),
+ hb.getXBATIndex(), dataBlocks);
+ } catch (IOException e) {
+ // expected during successful test
+ assertEquals("Block count 538976257 is too high. POI maximum is 65535.", e.getMessage());
+ } catch (OutOfMemoryError e) {
+ if (e.getStackTrace()[1].getMethodName().equals("testBadSectorAllocationTableSize")) {
+ throw new AssertionFailedError("Identified bug 48085");
+ }
+ }
+ }
}