123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.poifs.storage;
-
- import java.io.IOException;
-
- import java.util.*;
-
- import org.apache.poi.poifs.common.POIFSBigBlockSize;
- import org.apache.poi.poifs.common.POIFSConstants;
- import org.apache.poi.util.IntList;
- import org.apache.poi.util.LittleEndian;
- import org.apache.poi.util.LittleEndianConsts;
-
- /**
- * This class manages and creates the Block Allocation Table, which is
- * basically a set of linked lists of block indices.
- * <P>
- * Each block of the filesystem has an index. The first block, the
- * header, is skipped; the first block after the header is index 0,
- * the next is index 1, and so on.
- * <P>
- * A block's index is also its index into the Block Allocation
- * Table. The entry that it finds in the Block Allocation Table is the
- * index of the next block in the linked list of blocks making up a
- * file, or it is set to -2: end of list.
- *
- * @author Marc Johnson (mjohnson at apache dot org)
- */
- public final class BlockAllocationTableReader {
-
- /**
- * Maximum number size (in blocks) of the allocation table as supported by
- * POI.<br/>
- *
- * This constant has been chosen to help POI identify corrupted data in the
- * header block (rather than crash immediately with {@link OutOfMemoryError}
- * ). It's not clear if the compound document format actually specifies any
- * upper limits. For files with 512 byte blocks, having an allocation table
- * of 65,335 blocks would correspond to a total file size of 4GB. Needless
- * to say, POI probably cannot handle files anywhere near that size.
- */
- private static final int MAX_BLOCK_COUNT = 65535;
- private final IntList _entries;
- private POIFSBigBlockSize bigBlockSize;
-
- /**
- * create a BlockAllocationTableReader for an existing filesystem. Side
- * effect: when this method finishes, the BAT blocks will have
- * been removed from the raw block list, and any blocks labeled as
- * 'unused' in the block allocation table will also have been
- * removed from the raw block list.
- *
- * @param block_count the number of BAT blocks making up the block
- * allocation table
- * @param block_array the array of BAT block indices from the
- * filesystem's header
- * @param xbat_count the number of XBAT blocks
- * @param xbat_index the index of the first XBAT block
- * @param raw_block_list the list of RawDataBlocks
- *
- * @exception IOException if, in trying to create the table, we
- * encounter logic errors
- */
- public BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize, int block_count, int [] block_array,
- int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException {
- this(bigBlockSize);
-
- sanityCheckBlockCount(block_count);
-
- // We want to get the whole of the FAT table
- // To do this:
- // * Work through raw_block_list, which points to the
- // first (up to) 109 BAT blocks
- // * Jump to the XBAT offset, and read in XBATs which
- // point to more BAT blocks
- int limit = Math.min(block_count, block_array.length);
- int block_index;
-
- // This will hold all of the BAT blocks in order
- RawDataBlock blocks[] = new RawDataBlock[ block_count ];
-
- // Process the first (up to) 109 BAT blocks
- for (block_index = 0; block_index < limit; block_index++)
- {
- // Check that the sector number of the BAT block is a valid one
- int nextOffset = block_array[ block_index ];
- if(nextOffset > raw_block_list.blockCount()) {
- throw new IOException("Your file contains " + raw_block_list.blockCount() +
- " sectors, but the initial DIFAT array at index " + block_index +
- " referenced block # " + nextOffset + ". This isn't allowed and " +
- " your file is corrupt");
- }
- // Record the sector number of this BAT block
- blocks[ block_index ] =
- ( RawDataBlock ) raw_block_list.remove(nextOffset);
- }
-
- // Process additional BAT blocks via the XBATs
- if (block_index < block_count)
- {
-
- // must have extended blocks
- if (xbat_index < 0)
- {
- throw new IOException(
- "BAT count exceeds limit, yet XBAT index indicates no valid entries");
- }
- int chain_index = xbat_index;
- int max_entries_per_block = bigBlockSize.getXBATEntriesPerBlock();
- int chain_index_offset = bigBlockSize.getNextXBATChainOffset();
-
- // Each XBAT block contains either:
- // (maximum number of sector indexes) + index of next XBAT
- // some sector indexes + FREE sectors to max # + EndOfChain
- for (int j = 0; j < xbat_count; j++)
- {
- limit = Math.min(block_count - block_index,
- max_entries_per_block);
- byte[] data = raw_block_list.remove(chain_index).getData();
- int offset = 0;
-
- for (int k = 0; k < limit; k++)
- {
- blocks[ block_index++ ] =
- ( RawDataBlock ) raw_block_list
- .remove(LittleEndian.getInt(data, offset));
- offset += LittleEndianConsts.INT_SIZE;
- }
- chain_index = LittleEndian.getInt(data, chain_index_offset);
- if (chain_index == POIFSConstants.END_OF_CHAIN)
- {
- break;
- }
- }
- }
- if (block_index != block_count)
- {
- throw new IOException("Could not find all blocks");
- }
-
- // Now that we have all of the raw data blocks which make
- // up the FAT, go through and create the indices
- setEntries(blocks, raw_block_list);
- }
-
- /**
- * create a BlockAllocationTableReader from an array of raw data blocks
- *
- * @param blocks the raw data
- * @param raw_block_list the list holding the managed blocks
- *
- * @exception IOException
- */
- BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize, ListManagedBlock[] blocks, BlockList raw_block_list)
- throws IOException {
- this(bigBlockSize);
- setEntries(blocks, raw_block_list);
- }
-
- BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize) {
- this.bigBlockSize = bigBlockSize;
- _entries = new IntList();
- }
-
- public static void sanityCheckBlockCount(int block_count) throws IOException {
- if (block_count <= 0) {
- throw new IOException(
- "Illegal block count; minimum count is 1, got " +
- block_count + " instead"
- );
- }
- if (block_count > MAX_BLOCK_COUNT) {
- throw new IOException(
- "Block count " + block_count +
- " is too high. POI maximum is " + MAX_BLOCK_COUNT + "."
- );
- }
- }
-
- /**
- * walk the entries from a specified point and return the
- * associated blocks. The associated blocks are removed from the
- * block list
- *
- * @param startBlock the first block in the chain
- * @param blockList the raw data block list
- *
- * @return array of ListManagedBlocks, in their correct order
- *
- * @exception IOException if there is a problem acquiring the blocks
- */
- ListManagedBlock[] fetchBlocks(int startBlock, int headerPropertiesStartBlock,
- BlockList blockList) throws IOException {
- List<ListManagedBlock> blocks = new ArrayList<ListManagedBlock>();
- int currentBlock = startBlock;
- boolean firstPass = true;
- ListManagedBlock dataBlock = null;
-
- // Process the chain from the start to the end
- // Normally we have header, data, end
- // Sometimes we have data, header, end
- // For those cases, stop at the header, not the end
- while (currentBlock != POIFSConstants.END_OF_CHAIN) {
- try {
- // Grab the data at the current block offset
- dataBlock = blockList.remove(currentBlock);
- blocks.add(dataBlock);
- // Now figure out which block we go to next
- currentBlock = _entries.get(currentBlock);
- firstPass = false;
- } catch(IOException e) {
- if(currentBlock == headerPropertiesStartBlock) {
- // Special case where things are in the wrong order
- System.err.println("Warning, header block comes after data blocks in POIFS block listing");
- currentBlock = POIFSConstants.END_OF_CHAIN;
- } else if(currentBlock == 0 && firstPass) {
- // Special case where the termination isn't done right
- // on an empty set
- System.err.println("Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)");
- currentBlock = POIFSConstants.END_OF_CHAIN;
- } else {
- // Ripple up
- throw e;
- }
- }
- }
-
- return blocks.toArray(new ListManagedBlock[blocks.size()]);
- }
-
- // methods for debugging reader
-
- /**
- * determine whether the block specified by index is used or not
- *
- * @param index index of block in question
- *
- * @return true if the specific block is used, else false
- */
- boolean isUsed(int index) {
-
- try {
- return _entries.get(index) != -1;
- } catch (IndexOutOfBoundsException e) {
- // ignored
- return false;
- }
- }
-
- /**
- * return the next block index
- *
- * @param index of the current block
- *
- * @return index of the next block (may be
- * POIFSConstants.END_OF_CHAIN, indicating end of chain
- * (duh))
- *
- * @exception IOException if the current block is unused
- */
- int getNextBlockIndex(int index) throws IOException {
- if (isUsed(index)) {
- return _entries.get(index);
- }
- throw new IOException("index " + index + " is unused");
- }
-
- /**
- * Convert an array of blocks into a set of integer indices
- *
- * @param blocks the array of blocks containing the indices
- * @param raw_blocks the list of blocks being managed. Unused
- * blocks will be eliminated from the list
- */
- private void setEntries(ListManagedBlock[] blocks, BlockList raw_blocks) throws IOException {
- int limit = bigBlockSize.getBATEntriesPerBlock();
-
- for (int block_index = 0; block_index < blocks.length; block_index++)
- {
- byte[] data = blocks[ block_index ].getData();
- int offset = 0;
-
- for (int k = 0; k < limit; k++)
- {
- int entry = LittleEndian.getInt(data, offset);
-
- if (entry == POIFSConstants.UNUSED_BLOCK)
- {
- raw_blocks.zap(_entries.size());
- }
- _entries.add(entry);
- offset += LittleEndianConsts.INT_SIZE;
- }
-
- // discard block
- blocks[ block_index ] = null;
- }
- raw_blocks.setBAT(this);
- }
- }
|