You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BlockAllocationTableReader.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.poifs.storage;
  16. import java.io.IOException;
  17. import java.util.*;
  18. import org.apache.poi.poifs.common.POIFSBigBlockSize;
  19. import org.apache.poi.poifs.common.POIFSConstants;
  20. import org.apache.poi.util.IntList;
  21. import org.apache.poi.util.LittleEndian;
  22. import org.apache.poi.util.LittleEndianConsts;
  23. /**
  24. * This class manages and creates the Block Allocation Table, which is
  25. * basically a set of linked lists of block indices.
  26. * <P>
  27. * Each block of the filesystem has an index. The first block, the
  28. * header, is skipped; the first block after the header is index 0,
  29. * the next is index 1, and so on.
  30. * <P>
  31. * A block's index is also its index into the Block Allocation
  32. * Table. The entry that it finds in the Block Allocation Table is the
  33. * index of the next block in the linked list of blocks making up a
  34. * file, or it is set to -2: end of list.
  35. *
  36. * @author Marc Johnson (mjohnson at apache dot org)
  37. */
  38. public final class BlockAllocationTableReader {
  39. /**
  40. * Maximum number size (in blocks) of the allocation table as supported by
  41. * POI.<br/>
  42. *
  43. * This constant has been chosen to help POI identify corrupted data in the
  44. * header block (rather than crash immediately with {@link OutOfMemoryError}
  45. * ). It's not clear if the compound document format actually specifies any
  46. * upper limits. For files with 512 byte blocks, having an allocation table
  47. * of 65,335 blocks would correspond to a total file size of 4GB. Needless
  48. * to say, POI probably cannot handle files anywhere near that size.
  49. */
  50. private static final int MAX_BLOCK_COUNT = 65535;
  51. private final IntList _entries;
  52. private POIFSBigBlockSize bigBlockSize;
  53. /**
  54. * create a BlockAllocationTableReader for an existing filesystem. Side
  55. * effect: when this method finishes, the BAT blocks will have
  56. * been removed from the raw block list, and any blocks labeled as
  57. * 'unused' in the block allocation table will also have been
  58. * removed from the raw block list.
  59. *
  60. * @param block_count the number of BAT blocks making up the block
  61. * allocation table
  62. * @param block_array the array of BAT block indices from the
  63. * filesystem's header
  64. * @param xbat_count the number of XBAT blocks
  65. * @param xbat_index the index of the first XBAT block
  66. * @param raw_block_list the list of RawDataBlocks
  67. *
  68. * @exception IOException if, in trying to create the table, we
  69. * encounter logic errors
  70. */
  71. public BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize, int block_count, int [] block_array,
  72. int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException {
  73. this(bigBlockSize);
  74. sanityCheckBlockCount(block_count);
  75. // We want to get the whole of the FAT table
  76. // To do this:
  77. // * Work through raw_block_list, which points to the
  78. // first (up to) 109 BAT blocks
  79. // * Jump to the XBAT offset, and read in XBATs which
  80. // point to more BAT blocks
  81. int limit = Math.min(block_count, block_array.length);
  82. int block_index;
  83. // This will hold all of the BAT blocks in order
  84. RawDataBlock blocks[] = new RawDataBlock[ block_count ];
  85. // Process the first (up to) 109 BAT blocks
  86. for (block_index = 0; block_index < limit; block_index++)
  87. {
  88. // Check that the sector number of the BAT block is a valid one
  89. int nextOffset = block_array[ block_index ];
  90. if(nextOffset > raw_block_list.blockCount()) {
  91. throw new IOException("Your file contains " + raw_block_list.blockCount() +
  92. " sectors, but the initial DIFAT array at index " + block_index +
  93. " referenced block # " + nextOffset + ". This isn't allowed and " +
  94. " your file is corrupt");
  95. }
  96. // Record the sector number of this BAT block
  97. blocks[ block_index ] =
  98. ( RawDataBlock ) raw_block_list.remove(nextOffset);
  99. }
  100. // Process additional BAT blocks via the XBATs
  101. if (block_index < block_count)
  102. {
  103. // must have extended blocks
  104. if (xbat_index < 0)
  105. {
  106. throw new IOException(
  107. "BAT count exceeds limit, yet XBAT index indicates no valid entries");
  108. }
  109. int chain_index = xbat_index;
  110. int max_entries_per_block = bigBlockSize.getXBATEntriesPerBlock();
  111. int chain_index_offset = bigBlockSize.getNextXBATChainOffset();
  112. // Each XBAT block contains either:
  113. // (maximum number of sector indexes) + index of next XBAT
  114. // some sector indexes + FREE sectors to max # + EndOfChain
  115. for (int j = 0; j < xbat_count; j++)
  116. {
  117. limit = Math.min(block_count - block_index,
  118. max_entries_per_block);
  119. byte[] data = raw_block_list.remove(chain_index).getData();
  120. int offset = 0;
  121. for (int k = 0; k < limit; k++)
  122. {
  123. blocks[ block_index++ ] =
  124. ( RawDataBlock ) raw_block_list
  125. .remove(LittleEndian.getInt(data, offset));
  126. offset += LittleEndianConsts.INT_SIZE;
  127. }
  128. chain_index = LittleEndian.getInt(data, chain_index_offset);
  129. if (chain_index == POIFSConstants.END_OF_CHAIN)
  130. {
  131. break;
  132. }
  133. }
  134. }
  135. if (block_index != block_count)
  136. {
  137. throw new IOException("Could not find all blocks");
  138. }
  139. // Now that we have all of the raw data blocks which make
  140. // up the FAT, go through and create the indices
  141. setEntries(blocks, raw_block_list);
  142. }
  143. /**
  144. * create a BlockAllocationTableReader from an array of raw data blocks
  145. *
  146. * @param blocks the raw data
  147. * @param raw_block_list the list holding the managed blocks
  148. *
  149. * @exception IOException
  150. */
  151. BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize, ListManagedBlock[] blocks, BlockList raw_block_list)
  152. throws IOException {
  153. this(bigBlockSize);
  154. setEntries(blocks, raw_block_list);
  155. }
  156. BlockAllocationTableReader(POIFSBigBlockSize bigBlockSize) {
  157. this.bigBlockSize = bigBlockSize;
  158. _entries = new IntList();
  159. }
  160. public static void sanityCheckBlockCount(int block_count) throws IOException {
  161. if (block_count <= 0) {
  162. throw new IOException(
  163. "Illegal block count; minimum count is 1, got " +
  164. block_count + " instead"
  165. );
  166. }
  167. if (block_count > MAX_BLOCK_COUNT) {
  168. throw new IOException(
  169. "Block count " + block_count +
  170. " is too high. POI maximum is " + MAX_BLOCK_COUNT + "."
  171. );
  172. }
  173. }
  174. /**
  175. * walk the entries from a specified point and return the
  176. * associated blocks. The associated blocks are removed from the
  177. * block list
  178. *
  179. * @param startBlock the first block in the chain
  180. * @param blockList the raw data block list
  181. *
  182. * @return array of ListManagedBlocks, in their correct order
  183. *
  184. * @exception IOException if there is a problem acquiring the blocks
  185. */
  186. ListManagedBlock[] fetchBlocks(int startBlock, int headerPropertiesStartBlock,
  187. BlockList blockList) throws IOException {
  188. List<ListManagedBlock> blocks = new ArrayList<ListManagedBlock>();
  189. int currentBlock = startBlock;
  190. boolean firstPass = true;
  191. ListManagedBlock dataBlock = null;
  192. // Process the chain from the start to the end
  193. // Normally we have header, data, end
  194. // Sometimes we have data, header, end
  195. // For those cases, stop at the header, not the end
  196. while (currentBlock != POIFSConstants.END_OF_CHAIN) {
  197. try {
  198. // Grab the data at the current block offset
  199. dataBlock = blockList.remove(currentBlock);
  200. blocks.add(dataBlock);
  201. // Now figure out which block we go to next
  202. currentBlock = _entries.get(currentBlock);
  203. firstPass = false;
  204. } catch(IOException e) {
  205. if(currentBlock == headerPropertiesStartBlock) {
  206. // Special case where things are in the wrong order
  207. System.err.println("Warning, header block comes after data blocks in POIFS block listing");
  208. currentBlock = POIFSConstants.END_OF_CHAIN;
  209. } else if(currentBlock == 0 && firstPass) {
  210. // Special case where the termination isn't done right
  211. // on an empty set
  212. System.err.println("Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)");
  213. currentBlock = POIFSConstants.END_OF_CHAIN;
  214. } else {
  215. // Ripple up
  216. throw e;
  217. }
  218. }
  219. }
  220. return blocks.toArray(new ListManagedBlock[blocks.size()]);
  221. }
  222. // methods for debugging reader
  223. /**
  224. * determine whether the block specified by index is used or not
  225. *
  226. * @param index index of block in question
  227. *
  228. * @return true if the specific block is used, else false
  229. */
  230. boolean isUsed(int index) {
  231. try {
  232. return _entries.get(index) != -1;
  233. } catch (IndexOutOfBoundsException e) {
  234. // ignored
  235. return false;
  236. }
  237. }
  238. /**
  239. * return the next block index
  240. *
  241. * @param index of the current block
  242. *
  243. * @return index of the next block (may be
  244. * POIFSConstants.END_OF_CHAIN, indicating end of chain
  245. * (duh))
  246. *
  247. * @exception IOException if the current block is unused
  248. */
  249. int getNextBlockIndex(int index) throws IOException {
  250. if (isUsed(index)) {
  251. return _entries.get(index);
  252. }
  253. throw new IOException("index " + index + " is unused");
  254. }
  255. /**
  256. * Convert an array of blocks into a set of integer indices
  257. *
  258. * @param blocks the array of blocks containing the indices
  259. * @param raw_blocks the list of blocks being managed. Unused
  260. * blocks will be eliminated from the list
  261. */
  262. private void setEntries(ListManagedBlock[] blocks, BlockList raw_blocks) throws IOException {
  263. int limit = bigBlockSize.getBATEntriesPerBlock();
  264. for (int block_index = 0; block_index < blocks.length; block_index++)
  265. {
  266. byte[] data = blocks[ block_index ].getData();
  267. int offset = 0;
  268. for (int k = 0; k < limit; k++)
  269. {
  270. int entry = LittleEndian.getInt(data, offset);
  271. if (entry == POIFSConstants.UNUSED_BLOCK)
  272. {
  273. raw_blocks.zap(_entries.size());
  274. }
  275. _entries.add(entry);
  276. offset += LittleEndianConsts.INT_SIZE;
  277. }
  278. // discard block
  279. blocks[ block_index ] = null;
  280. }
  281. raw_blocks.setBAT(this);
  282. }
  283. }