/**
* This class provides methods to read a DocumentEntry managed by a
* {@link POIFSFileSystem} instance.
- *
- * @author Marc Johnson (mjohnson at apache dot org)
*/
public final class NDocumentInputStream extends InputStream implements LittleEndianInput {
/** returned by read operations if we're at end of document */
private NPOIFSMiniStore _mini_store;
private NPropertyTable _property_table;
+ private List<BATBlock> _xbat_blocks;
private List<BATBlock> _bat_blocks;
private HeaderBlock _header;
private DirectoryNode _root;
_header = new HeaderBlock(bigBlockSize);
_property_table = new NPropertyTable(_header);
_mini_store = new NPOIFSMiniStore(this, _property_table.getRoot(), new ArrayList<BATBlock>(), _header);
+ _xbat_blocks = new ArrayList<BATBlock>();
_bat_blocks = new ArrayList<BATBlock>();
_root = null;
}
// Read the FAT blocks
for(int fatAt : _header.getBATArray()) {
- loopDetector.claim(fatAt);
- ByteBuffer fatData = getBlockAt(fatAt);
- BATBlock bat = BATBlock.createBATBlock(bigBlockSize, fatData);
- bat.setOurBlockIndex(fatAt);
- _bat_blocks.add(bat);
+ readBAT(fatAt, loopDetector);
}
- // Now read the XFAT blocks
+ // Now read the XFAT blocks, and the FATs within them
BATBlock xfat;
int nextAt = _header.getXBATIndex();
for(int i=0; i<_header.getXBATCount(); i++) {
xfat = BATBlock.createBATBlock(bigBlockSize, fatData);
xfat.setOurBlockIndex(nextAt);
nextAt = xfat.getValueAt(bigBlockSize.getXBATEntriesPerBlock());
+ _xbat_blocks.add(xfat);
- _bat_blocks.add(xfat);
+ for(int j=0; j<bigBlockSize.getXBATEntriesPerBlock(); j++) {
+ int fatAt = xfat.getValueAt(j);
+ if(fatAt == POIFSConstants.UNUSED_BLOCK) break;
+ readBAT(fatAt, loopDetector);
+ }
}
// We're now able to load steams
nextAt = getNextBlock(nextAt);
}
}
+ private void readBAT(int batAt, ChainLoopDetector loopDetector) throws IOException {
+ loopDetector.claim(batAt);
+ ByteBuffer fatData = getBlockAt(batAt);
+ BATBlock bat = BATBlock.createBATBlock(bigBlockSize, fatData);
+ bat.setOurBlockIndex(batAt);
+ _bat_blocks.add(bat);
+ }
/**
* Load the block at the given offset.
/**
* Calculates the maximum size of a file which is addressable given the
* number of FAT (BAT and XBAT) sectors specified.
- * The actual file size will be between [size of fatCount-1 blocks] and
- * [size of fatCount blocks].
- * For 512 byte block sizes, this means we may over-estimate by up to 65kb.
- * For 4096 byte block sizes, this means we may over-estimate by up to 4mb
+ *
+ * For files with 109 or fewer BATs:
+ * The actual file size will be between [size of fatCount-1 blocks] and
+ * [size of fatCount blocks].
+ * For 512 byte block sizes, this means we may over-estimate by up to 65kb.
+ * For 4096 byte block sizes, this means we may over-estimate by up to 4mb
+ *
+ * For files with more than 109 BATs (i.e. has XBATs):
+ * Each XBAT can hold 127/1023 BATs, which in turn address 128/1024 blocks.
+ * For 512 byte block sizes, this means we may over-estimate by up to 8mb
+ * For 4096 byte block sizes, this means we may over-estimate by up to 4gb,
+ * but only for files of more than 436mb in size
*/
public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize,
final int numBAT, final int numXBAT) {
int size = 1; // Header isn't FAT addressed
+
+ // The header contains up to 109 BATs, each of which can
+ // address 128/1024 blocks
size += (numBAT * bigBlockSize.getBATEntriesPerBlock());
- size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock());
+
+ // Each XBAT holds up to 127/1024 BATs, each of which can
+ // address 128/1024 blocks
+ size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock() *
+ bigBlockSize.getBATEntriesPerBlock());
+
+ // So far we've been in sector counts, turn into bytes
return size * bigBlockSize.getBigBlockSize();
}
public static int calculateMaximumSize(final HeaderBlock header)
final HeaderBlock header, final List<BATBlock> bats) {
POIFSBigBlockSize bigBlockSize = header.getBigBlockSize();
- // Are we in the BAT or XBAT range
- int batRangeEndsAt = bigBlockSize.getBATEntriesPerBlock() *
- header.getBATCount();
-
- if(offset < batRangeEndsAt) {
- int whichBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
- int index = offset % bigBlockSize.getBATEntriesPerBlock();
- return new BATBlockAndIndex( index, bats.get(whichBAT) );
- }
-
- // XBATs hold slightly less
- int relOffset = offset - batRangeEndsAt;
- int whichXBAT = (int)Math.floor(relOffset / bigBlockSize.getXBATEntriesPerBlock());
- int index = relOffset % bigBlockSize.getXBATEntriesPerBlock();
- return new BATBlockAndIndex(
- index,
- bats.get(header.getBATCount() + whichXBAT)
- );
+ int whichBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
+ int index = offset % bigBlockSize.getBATEntriesPerBlock();
+ return new BATBlockAndIndex( index, bats.get(whichBAT) );
}
/**
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4, 0)
);
- // Once we get into XBAT blocks, they address a little bit less
+ // One XBAT block holds 127/1023 individual BAT blocks, so they can address
+ // a fairly hefty amount of space themselves
assertEquals(
512 + 109*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 0)
);
assertEquals(
- 512 + 109*512*128 + 512*127,
+ 512 + 109*512*128 + 512*127*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
);
assertEquals(
- 4096 + 109*4096*1024 + 4096*1023,
+ 4096 + 109*4096*1024 + 4096*1023*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
);
assertEquals(
- 512 + 109*512*128 + 3*512*127,
+ 512 + 109*512*128 + 3*512*127*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
);
assertEquals(
- 4096 + 109*4096*1024 + 3*4096*1023,
+ 4096 + 109*4096*1024 + 3*4096*1023*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
);
}
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
- // And finally one with XBATs too
- // This is a naughty file, but we should be able to cope...
- // (We'll decide everything is XBAT not BAT)
- header.setBATCount(0);
+ // The XBAT count makes no difference, as we flatten in memory
+ header.setBATCount(1);
+ header.setXBATCount(1);
offset = 0;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 127;
- assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
- assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
+ assertEquals(127, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
+ assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 128;
- assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
+ assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 129;
- assertEquals(2, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
+ assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 1023;
- assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
- assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
+ assertEquals(1023, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
+ assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 1024;
- assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
+ assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
// Biggr block size, back to real BATs