You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PAPFormattedDiskPage.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.model;
  16. import java.io.ByteArrayOutputStream;
  17. import java.io.IOException;
  18. import java.util.ArrayList;
  19. import java.util.Arrays;
  20. import java.util.Collections;
  21. import java.util.List;
  22. import org.apache.poi.util.IOUtils;
  23. import org.apache.poi.util.Internal;
  24. import org.apache.poi.util.LittleEndian;
  25. /**
  26. * Represents a PAP FKP. The style properties for paragraph and character runs
  27. * are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps
  28. * for character run properties. The first part of the fkp for both CHP and PAP
  29. * fkps consists of an array of 4 byte int offsets in the main stream for that
  30. * Paragraph's or Character run's text. The ending offset is the next
  31. * value in the array. For example, if an fkp has X number of Paragraph's
  32. * stored in it then there are (x + 1) 4 byte ints in the beginning array. The
  33. * number X is determined by the last byte in a 512 byte fkp.
  34. *
  35. * CHP and PAP fkps also store the compressed styles(grpprl) that correspond to
  36. * the offsets on the front of the fkp. The offset of the grpprls is determined
  37. * differently for CHP fkps and PAP fkps.
  38. */
  39. @Internal
  40. public final class PAPFormattedDiskPage extends FormattedDiskPage {
  41. private static final int BX_SIZE = 13;
  42. private static final int FC_SIZE = 4;
  43. private ArrayList<PAPX> _papxList = new ArrayList<>();
  44. private ArrayList<PAPX> _overFlow;
  45. public PAPFormattedDiskPage() { }
  46. /**
  47. * Creates a PAPFormattedDiskPage from a 512 byte array
  48. */
  49. public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
  50. int offset, CharIndexTranslator translator ) {
  51. super( documentStream, offset );
  52. for ( int x = 0; x < _crun; x++ )
  53. {
  54. int bytesStartAt = getStart( x );
  55. int bytesEndAt = getEnd( x );
  56. // int charStartAt = translator.getCharIndex( bytesStartAt );
  57. // int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt
  58. // );
  59. // PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
  60. // getParagraphHeight( x ), dataStream );
  61. // _papxList.add( papx );
  62. for ( int[] range : translator.getCharIndexRanges( bytesStartAt,
  63. bytesEndAt ) )
  64. {
  65. PAPX papx = new PAPX( range[0], range[1], getGrpprl( x ),
  66. getParagraphHeight( x ), dataStream );
  67. _papxList.add( papx );
  68. }
  69. }
  70. _fkp = null;
  71. }
  72. /**
  73. * Fills the queue for writing.
  74. *
  75. * @param filler a List of PAPXs
  76. */
  77. public void fill(List<PAPX> filler)
  78. {
  79. _papxList.addAll(filler);
  80. }
  81. /**
  82. * Used when writing out a Word docunment. This method is part of a sequence
  83. * that is necessary because there is no easy and efficient way to
  84. * determine the number PAPX's that will fit into one FKP. THe sequence is
  85. * as follows:
  86. *
  87. * fill()
  88. * toByteArray()
  89. * getOverflow()
  90. *
  91. * @return The remaining PAPXs that didn't fit into this FKP.
  92. */
  93. ArrayList<PAPX> getOverflow()
  94. {
  95. return _overFlow;
  96. }
  97. /**
  98. * Gets the PAPX at index.
  99. * @param index The index to get the PAPX for.
  100. * @return The PAPX at index.
  101. */
  102. public PAPX getPAPX(int index)
  103. {
  104. return _papxList.get(index);
  105. }
  106. public List<PAPX> getPAPXs()
  107. {
  108. return Collections.unmodifiableList( _papxList );
  109. }
  110. /**
  111. * Gets the papx grpprl for the paragraph at index in this fkp.
  112. *
  113. * @param index The index of the papx to get.
  114. * @return a papx grpprl.
  115. */
  116. protected byte[] getGrpprl(int index)
  117. {
  118. int papxOffset = 2 * LittleEndian.getUByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
  119. int size = 2 * LittleEndian.getUByte(_fkp, _offset + papxOffset);
  120. if(size == 0) {
  121. size = 2 * LittleEndian.getUByte(_fkp, _offset + ++papxOffset);
  122. } else {
  123. size--;
  124. }
  125. return IOUtils.safelyClone(_fkp, _offset + papxOffset + 1, size, 512);
  126. }
  127. /**
  128. * Creates a byte array representation of this data structure. Suitable for
  129. * writing to a Word document.
  130. *
  131. * @param dataStream required if PAPX is too big to fit in FKP
  132. *
  133. * @return A byte array representing this data structure.
  134. * @throws IOException
  135. * if an I/O error occurs.
  136. */
  137. protected byte[] toByteArray( ByteArrayOutputStream dataStream,
  138. CharIndexTranslator translator ) throws IOException
  139. {
  140. byte[] buf = new byte[512];
  141. int size = _papxList.size();
  142. int grpprlOffset = 0;
  143. int bxOffset = 0;
  144. int fcOffset = 0;
  145. byte[] lastGrpprl = new byte[0];
  146. // total size is currently the size of one FC
  147. int totalSize = FC_SIZE;
  148. int index = 0;
  149. for ( ; index < size; index++ )
  150. {
  151. byte[] grpprl = _papxList.get( index ).getGrpprl();
  152. int grpprlLength = grpprl.length;
  153. // is grpprl huge?
  154. if ( grpprlLength > 488 )
  155. {
  156. grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
  157. }
  158. // check to see if we have enough room for an FC, a BX, and the
  159. // grpprl
  160. // and the 1 byte size of the grpprl.
  161. int addition = 0;
  162. if ( !Arrays.equals( grpprl, lastGrpprl ) )
  163. {
  164. addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 );
  165. }
  166. else
  167. {
  168. addition = ( FC_SIZE + BX_SIZE );
  169. }
  170. totalSize += addition;
  171. // if size is uneven we will have to add one so the first grpprl
  172. // falls
  173. // on a word boundary
  174. if ( totalSize > 511 + ( index % 2 ) )
  175. {
  176. totalSize -= addition;
  177. break;
  178. }
  179. // grpprls must fall on word boundaries
  180. if ( grpprlLength % 2 > 0 )
  181. {
  182. totalSize += 1;
  183. }
  184. else
  185. {
  186. totalSize += 2;
  187. }
  188. lastGrpprl = grpprl;
  189. }
  190. // see if we couldn't fit some
  191. if ( index != size )
  192. {
  193. _overFlow = new ArrayList<>();
  194. _overFlow.addAll( _papxList.subList( index, size ) );
  195. }
  196. // index should equal number of papxs that will be in this fkp now.
  197. buf[511] = (byte) index;
  198. bxOffset = ( FC_SIZE * index ) + FC_SIZE;
  199. grpprlOffset = 511;
  200. PAPX papx = null;
  201. lastGrpprl = new byte[0];
  202. for ( int x = 0; x < index; x++ )
  203. {
  204. papx = _papxList.get( x );
  205. byte[] phe = papx.getParagraphHeight().toByteArray();
  206. byte[] grpprl = papx.getGrpprl();
  207. // is grpprl huge?
  208. if ( grpprl.length > 488 )
  209. {
  210. // if so do we have storage at getHugeGrpprlOffset()
  211. // int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
  212. // if ( hugeGrpprlOffset == -1 ) // then we have no storage...
  213. // {
  214. // throw new UnsupportedOperationException(
  215. // "This Paragraph has no dataStream storage." );
  216. // }
  217. // we have some storage...
  218. // get the size of the existing storage
  219. // int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
  220. // hugeGrpprlOffset );
  221. //
  222. // if ( maxHugeGrpprlSize < grpprl.length - 2 )
  223. // { // grpprl.length-2 because we don't store the istd
  224. // throw new UnsupportedOperationException(
  225. // "This Paragraph's dataStream storage is too small." );
  226. // }
  227. // store grpprl at hugeGrpprlOffset
  228. // grpprl.length-2 because we don't store the istd
  229. // System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
  230. // 2,
  231. // grpprl.length - 2 );
  232. // LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
  233. // grpprl.length - 2 );
  234. byte[] hugePapx = Arrays.copyOfRange(grpprl, 2, grpprl.length);
  235. int dataStreamOffset = dataStream.size();
  236. dataStream.write( hugePapx );
  237. // grpprl = grpprl containing only a sprmPHugePapx2
  238. int istd = LittleEndian.getUShort( grpprl, 0 );
  239. grpprl = new byte[8];
  240. LittleEndian.putUShort( grpprl, 0, istd );
  241. LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
  242. LittleEndian.putInt( grpprl, 4, dataStreamOffset );
  243. }
  244. boolean same = Arrays.equals( lastGrpprl, grpprl );
  245. if ( !same )
  246. {
  247. grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
  248. grpprlOffset -= ( grpprlOffset % 2 );
  249. }
  250. // LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
  251. LittleEndian.putInt( buf, fcOffset,
  252. translator.getByteIndex( papx.getStart() ) );
  253. buf[bxOffset] = (byte) ( grpprlOffset / 2 );
  254. System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );
  255. /*
  256. * refer to the section on PAPX in the spec. Places a size on the
  257. * front of the PAPX. Has to do with how the grpprl stays on word
  258. * boundaries.
  259. */
  260. if ( !same )
  261. {
  262. int copyOffset = grpprlOffset;
  263. if ( ( grpprl.length % 2 ) > 0 )
  264. {
  265. buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 );
  266. }
  267. else
  268. {
  269. buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 );
  270. copyOffset++;
  271. }
  272. System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length );
  273. lastGrpprl = grpprl;
  274. }
  275. bxOffset += BX_SIZE;
  276. fcOffset += FC_SIZE;
  277. }
  278. if (papx != null) {
  279. // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
  280. LittleEndian.putInt(buf, fcOffset, translator.getByteIndex(papx.getEnd()));
  281. }
  282. return buf;
  283. }
  284. /**
  285. * Used to get the ParagraphHeight of a PAPX at a particular index.
  286. * @param index
  287. * @return The ParagraphHeight
  288. */
  289. private ParagraphHeight getParagraphHeight(int index)
  290. {
  291. int pheOffset = _offset + 1 + (((_crun + 1) * 4) + (index * 13));
  292. return new ParagraphHeight(_fkp, pheOffset);
  293. }
  294. }