You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PAPFormattedDiskPage.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.model;
  16. import java.io.ByteArrayOutputStream;
  17. import java.io.IOException;
  18. import java.util.ArrayList;
  19. import java.util.Arrays;
  20. import java.util.Collections;
  21. import java.util.List;
  22. import org.apache.poi.util.IOUtils;
  23. import org.apache.poi.util.Internal;
  24. import org.apache.poi.util.LittleEndian;
  25. /**
  26. * Represents a PAP FKP. The style properties for paragraph and character runs
  27. * are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps
  28. * for character run properties. The first part of the fkp for both CHP and PAP
  29. * fkps consists of an array of 4 byte int offsets in the main stream for that
  30. * Paragraph's or Character run's text. The ending offset is the next
  31. * value in the array. For example, if an fkp has X number of Paragraph's
  32. * stored in it then there are (x + 1) 4 byte ints in the beginning array. The
  33. * number X is determined by the last byte in a 512 byte fkp.
  34. *
  35. * CHP and PAP fkps also store the compressed styles(grpprl) that correspond to
  36. * the offsets on the front of the fkp. The offset of the grpprls is determined
  37. * differently for CHP fkps and PAP fkps.
  38. *
  39. * @author Ryan Ackley
  40. */
  41. @Internal
  42. public final class PAPFormattedDiskPage extends FormattedDiskPage {
  43. private static final int BX_SIZE = 13;
  44. private static final int FC_SIZE = 4;
  45. private ArrayList<PAPX> _papxList = new ArrayList<>();
  46. private ArrayList<PAPX> _overFlow;
  47. public PAPFormattedDiskPage() { }
  48. /**
  49. * Creates a PAPFormattedDiskPage from a 512 byte array
  50. */
  51. public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
  52. int offset, CharIndexTranslator translator ) {
  53. super( documentStream, offset );
  54. for ( int x = 0; x < _crun; x++ )
  55. {
  56. int bytesStartAt = getStart( x );
  57. int bytesEndAt = getEnd( x );
  58. // int charStartAt = translator.getCharIndex( bytesStartAt );
  59. // int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt
  60. // );
  61. // PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
  62. // getParagraphHeight( x ), dataStream );
  63. // _papxList.add( papx );
  64. for ( int[] range : translator.getCharIndexRanges( bytesStartAt,
  65. bytesEndAt ) )
  66. {
  67. PAPX papx = new PAPX( range[0], range[1], getGrpprl( x ),
  68. getParagraphHeight( x ), dataStream );
  69. _papxList.add( papx );
  70. }
  71. }
  72. _fkp = null;
  73. }
  74. /**
  75. * Fills the queue for writing.
  76. *
  77. * @param filler a List of PAPXs
  78. */
  79. public void fill(List<PAPX> filler)
  80. {
  81. _papxList.addAll(filler);
  82. }
  83. /**
  84. * Used when writing out a Word docunment. This method is part of a sequence
  85. * that is necessary because there is no easy and efficient way to
  86. * determine the number PAPX's that will fit into one FKP. THe sequence is
  87. * as follows:
  88. *
  89. * fill()
  90. * toByteArray()
  91. * getOverflow()
  92. *
  93. * @return The remaining PAPXs that didn't fit into this FKP.
  94. */
  95. ArrayList<PAPX> getOverflow()
  96. {
  97. return _overFlow;
  98. }
  99. /**
  100. * Gets the PAPX at index.
  101. * @param index The index to get the PAPX for.
  102. * @return The PAPX at index.
  103. */
  104. public PAPX getPAPX(int index)
  105. {
  106. return _papxList.get(index);
  107. }
  108. public List<PAPX> getPAPXs()
  109. {
  110. return Collections.unmodifiableList( _papxList );
  111. }
  112. /**
  113. * Gets the papx grpprl for the paragraph at index in this fkp.
  114. *
  115. * @param index The index of the papx to get.
  116. * @return a papx grpprl.
  117. */
  118. protected byte[] getGrpprl(int index)
  119. {
  120. int papxOffset = 2 * LittleEndian.getUByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
  121. int size = 2 * LittleEndian.getUByte(_fkp, _offset + papxOffset);
  122. if(size == 0)
  123. {
  124. size = 2 * LittleEndian.getUByte(_fkp, _offset + ++papxOffset);
  125. }
  126. else
  127. {
  128. size--;
  129. }
  130. byte[] papx = IOUtils.safelyAllocate(size, 512);
  131. System.arraycopy(_fkp, _offset + ++papxOffset, papx, 0, size);
  132. return papx;
  133. }
  134. /**
  135. * Creates a byte array representation of this data structure. Suitable for
  136. * writing to a Word document.
  137. *
  138. * @param dataStream required if PAPX is too big to fit in FKP
  139. *
  140. * @return A byte array representing this data structure.
  141. * @throws IOException
  142. * if an I/O error occurs.
  143. */
  144. protected byte[] toByteArray( ByteArrayOutputStream dataStream,
  145. CharIndexTranslator translator ) throws IOException
  146. {
  147. byte[] buf = new byte[512];
  148. int size = _papxList.size();
  149. int grpprlOffset = 0;
  150. int bxOffset = 0;
  151. int fcOffset = 0;
  152. byte[] lastGrpprl = new byte[0];
  153. // total size is currently the size of one FC
  154. int totalSize = FC_SIZE;
  155. int index = 0;
  156. for ( ; index < size; index++ )
  157. {
  158. byte[] grpprl = _papxList.get( index ).getGrpprl();
  159. int grpprlLength = grpprl.length;
  160. // is grpprl huge?
  161. if ( grpprlLength > 488 )
  162. {
  163. grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
  164. }
  165. // check to see if we have enough room for an FC, a BX, and the
  166. // grpprl
  167. // and the 1 byte size of the grpprl.
  168. int addition = 0;
  169. if ( !Arrays.equals( grpprl, lastGrpprl ) )
  170. {
  171. addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 );
  172. }
  173. else
  174. {
  175. addition = ( FC_SIZE + BX_SIZE );
  176. }
  177. totalSize += addition;
  178. // if size is uneven we will have to add one so the first grpprl
  179. // falls
  180. // on a word boundary
  181. if ( totalSize > 511 + ( index % 2 ) )
  182. {
  183. totalSize -= addition;
  184. break;
  185. }
  186. // grpprls must fall on word boundaries
  187. if ( grpprlLength % 2 > 0 )
  188. {
  189. totalSize += 1;
  190. }
  191. else
  192. {
  193. totalSize += 2;
  194. }
  195. lastGrpprl = grpprl;
  196. }
  197. // see if we couldn't fit some
  198. if ( index != size )
  199. {
  200. _overFlow = new ArrayList<>();
  201. _overFlow.addAll( _papxList.subList( index, size ) );
  202. }
  203. // index should equal number of papxs that will be in this fkp now.
  204. buf[511] = (byte) index;
  205. bxOffset = ( FC_SIZE * index ) + FC_SIZE;
  206. grpprlOffset = 511;
  207. PAPX papx = null;
  208. lastGrpprl = new byte[0];
  209. for ( int x = 0; x < index; x++ )
  210. {
  211. papx = _papxList.get( x );
  212. byte[] phe = papx.getParagraphHeight().toByteArray();
  213. byte[] grpprl = papx.getGrpprl();
  214. // is grpprl huge?
  215. if ( grpprl.length > 488 )
  216. {
  217. // if so do we have storage at getHugeGrpprlOffset()
  218. // int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
  219. // if ( hugeGrpprlOffset == -1 ) // then we have no storage...
  220. // {
  221. // throw new UnsupportedOperationException(
  222. // "This Paragraph has no dataStream storage." );
  223. // }
  224. // we have some storage...
  225. // get the size of the existing storage
  226. // int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
  227. // hugeGrpprlOffset );
  228. //
  229. // if ( maxHugeGrpprlSize < grpprl.length - 2 )
  230. // { // grpprl.length-2 because we don't store the istd
  231. // throw new UnsupportedOperationException(
  232. // "This Paragraph's dataStream storage is too small." );
  233. // }
  234. // store grpprl at hugeGrpprlOffset
  235. // grpprl.length-2 because we don't store the istd
  236. // System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
  237. // 2,
  238. // grpprl.length - 2 );
  239. // LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
  240. // grpprl.length - 2 );
  241. byte[] hugePapx = new byte[grpprl.length - 2];
  242. System.arraycopy( grpprl, 2, hugePapx, 0, grpprl.length - 2 );
  243. int dataStreamOffset = dataStream.size();
  244. dataStream.write( hugePapx );
  245. // grpprl = grpprl containing only a sprmPHugePapx2
  246. int istd = LittleEndian.getUShort( grpprl, 0 );
  247. grpprl = new byte[8];
  248. LittleEndian.putUShort( grpprl, 0, istd );
  249. LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
  250. LittleEndian.putInt( grpprl, 4, dataStreamOffset );
  251. }
  252. boolean same = Arrays.equals( lastGrpprl, grpprl );
  253. if ( !same )
  254. {
  255. grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
  256. grpprlOffset -= ( grpprlOffset % 2 );
  257. }
  258. // LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
  259. LittleEndian.putInt( buf, fcOffset,
  260. translator.getByteIndex( papx.getStart() ) );
  261. buf[bxOffset] = (byte) ( grpprlOffset / 2 );
  262. System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );
  263. /*
  264. * refer to the section on PAPX in the spec. Places a size on the
  265. * front of the PAPX. Has to do with how the grpprl stays on word
  266. * boundaries.
  267. */
  268. if ( !same )
  269. {
  270. int copyOffset = grpprlOffset;
  271. if ( ( grpprl.length % 2 ) > 0 )
  272. {
  273. buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 );
  274. }
  275. else
  276. {
  277. buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 );
  278. copyOffset++;
  279. }
  280. System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length );
  281. lastGrpprl = grpprl;
  282. }
  283. bxOffset += BX_SIZE;
  284. fcOffset += FC_SIZE;
  285. }
  286. if (papx != null) {
  287. // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
  288. LittleEndian.putInt(buf, fcOffset, translator.getByteIndex(papx.getEnd()));
  289. }
  290. return buf;
  291. }
  292. /**
  293. * Used to get the ParagraphHeight of a PAPX at a particular index.
  294. * @param index
  295. * @return The ParagraphHeight
  296. */
  297. private ParagraphHeight getParagraphHeight(int index)
  298. {
  299. int pheOffset = _offset + 1 + (((_crun + 1) * 4) + (index * 13));
  300. return new ParagraphHeight(_fkp, pheOffset);
  301. }
  302. }