You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SSTRecord.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.record;
  16. import java.util.Iterator;
  17. import org.apache.poi.hssf.record.common.UnicodeString;
  18. import org.apache.poi.hssf.record.cont.ContinuableRecord;
  19. import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
  20. import org.apache.poi.util.IntMapper;
  21. import org.apache.poi.util.LittleEndianConsts;
  22. /**
  23. * Title: Static String Table Record (0x00FC)<p/>
  24. *
  25. * Description: This holds all the strings for LabelSSTRecords.
  26. * <P>
  27. * REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
  28. * 1-57231-498-2)
  29. * <P>
  30. *
  31. * @see org.apache.poi.hssf.record.LabelSSTRecord
  32. * @see org.apache.poi.hssf.record.ContinueRecord
  33. */
  34. public final class SSTRecord extends ContinuableRecord {
  35. public static final short sid = 0x00FC;
  36. private static final UnicodeString EMPTY_STRING = new UnicodeString("");
  37. // TODO - move these constants to test class (the only consumer)
  38. /** standard record overhead: two shorts (record id plus data space size)*/
  39. static final int STD_RECORD_OVERHEAD = 2 * LittleEndianConsts.SHORT_SIZE;
  40. /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
  41. static final int SST_RECORD_OVERHEAD = STD_RECORD_OVERHEAD + 2 * LittleEndianConsts.INT_SIZE;
  42. /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
  43. static final int MAX_DATA_SPACE = RecordInputStream.MAX_RECORD_DATA_SIZE - 8;
  44. /** union of strings in the SST and EXTSST */
  45. private int field_1_num_strings;
  46. /** according to docs ONLY SST */
  47. private int field_2_num_unique_strings;
  48. private IntMapper<UnicodeString> field_3_strings;
  49. private SSTDeserializer deserializer;
  50. /** Offsets from the beginning of the SST record (even across continuations) */
  51. int[] bucketAbsoluteOffsets;
  52. /** Offsets relative the start of the current SST or continue record */
  53. int[] bucketRelativeOffsets;
  54. public SSTRecord()
  55. {
  56. field_1_num_strings = 0;
  57. field_2_num_unique_strings = 0;
  58. field_3_strings = new IntMapper<UnicodeString>();
  59. deserializer = new SSTDeserializer(field_3_strings);
  60. }
  61. /**
  62. * Add a string.
  63. *
  64. * @param string string to be added
  65. *
  66. * @return the index of that string in the table
  67. */
  68. public int addString(UnicodeString string)
  69. {
  70. field_1_num_strings++;
  71. UnicodeString ucs = ( string == null ) ? EMPTY_STRING
  72. : string;
  73. int rval;
  74. int index = field_3_strings.getIndex(ucs);
  75. if ( index != -1 ) {
  76. rval = index;
  77. } else {
  78. // This is a new string -- we didn't see it among the
  79. // strings we've already collected
  80. rval = field_3_strings.size();
  81. field_2_num_unique_strings++;
  82. SSTDeserializer.addToStringTable( field_3_strings, ucs );
  83. }
  84. return rval;
  85. }
  86. /**
  87. * @return number of strings
  88. */
  89. public int getNumStrings()
  90. {
  91. return field_1_num_strings;
  92. }
  93. /**
  94. * @return number of unique strings
  95. */
  96. public int getNumUniqueStrings()
  97. {
  98. return field_2_num_unique_strings;
  99. }
  100. /**
  101. * Get a particular string by its index
  102. *
  103. * @param id index into the array of strings
  104. *
  105. * @return the desired string
  106. */
  107. public UnicodeString getString(int id )
  108. {
  109. return field_3_strings.get( id );
  110. }
  111. /**
  112. * Return a debugging string representation
  113. *
  114. * @return string representation
  115. */
  116. public String toString() {
  117. StringBuffer buffer = new StringBuffer();
  118. buffer.append( "[SST]\n" );
  119. buffer.append( " .numstrings = " )
  120. .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
  121. buffer.append( " .uniquestrings = " )
  122. .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
  123. for ( int k = 0; k < field_3_strings.size(); k++ )
  124. {
  125. UnicodeString s = field_3_strings.get( k );
  126. buffer.append( " .string_" + k + " = " )
  127. .append( s.getDebugInfo() ).append( "\n" );
  128. }
  129. buffer.append( "[/SST]\n" );
  130. return buffer.toString();
  131. }
  132. public short getSid() {
  133. return sid;
  134. }
  135. /**
  136. * Fill the fields from the data
  137. * <P>
  138. * The data consists of sets of string data. This string data is
  139. * arranged as follows:
  140. * <P>
  141. * <CODE><pre>
  142. * short string_length; // length of string data
  143. * byte string_flag; // flag specifying special string
  144. * // handling
  145. * short run_count; // optional count of formatting runs
  146. * int extend_length; // optional extension length
  147. * char[] string_data; // string data, can be byte[] or
  148. * // short[] (length of array is
  149. * // string_length)
  150. * int[] formatting_runs; // optional formatting runs (length of
  151. * // array is run_count)
  152. * byte[] extension; // optional extension (length of array
  153. * // is extend_length)
  154. * </pre></CODE>
  155. * <P>
  156. * The string_flag is bit mapped as follows:
  157. * <P>
  158. * <TABLE>
  159. * <TR>
  160. * <TH>Bit number</TH>
  161. * <TH>Meaning if 0</TH>
  162. * <TH>Meaning if 1</TH>
  163. * <TR>
  164. * <TR>
  165. * <TD>0</TD>
  166. * <TD>string_data is byte[]</TD>
  167. * <TD>string_data is short[]</TH>
  168. * <TR>
  169. * <TR>
  170. * <TD>1</TD>
  171. * <TD>Should always be 0</TD>
  172. * <TD>string_flag is defective</TH>
  173. * <TR>
  174. * <TR>
  175. * <TD>2</TD>
  176. * <TD>extension is not included</TD>
  177. * <TD>extension is included</TH>
  178. * <TR>
  179. * <TR>
  180. * <TD>3</TD>
  181. * <TD>formatting run data is not included</TD>
  182. * <TD>formatting run data is included</TH>
  183. * <TR>
  184. * <TR>
  185. * <TD>4</TD>
  186. * <TD>Should always be 0</TD>
  187. * <TD>string_flag is defective</TH>
  188. * <TR>
  189. * <TR>
  190. * <TD>5</TD>
  191. * <TD>Should always be 0</TD>
  192. * <TD>string_flag is defective</TH>
  193. * <TR>
  194. * <TR>
  195. * <TD>6</TD>
  196. * <TD>Should always be 0</TD>
  197. * <TD>string_flag is defective</TH>
  198. * <TR>
  199. * <TR>
  200. * <TD>7</TD>
  201. * <TD>Should always be 0</TD>
  202. * <TD>string_flag is defective</TH>
  203. * <TR>
  204. * </TABLE>
  205. * <P>
  206. * We can handle eating the overhead associated with bits 2 or 3
  207. * (or both) being set, but we have no idea what to do with the
  208. * associated data. The UnicodeString class can handle the byte[]
  209. * vs short[] nature of the actual string data
  210. *
  211. * @param in the RecordInputstream to read the record from
  212. */
  213. public SSTRecord(RecordInputStream in) {
  214. // this method is ALWAYS called after construction -- using
  215. // the nontrivial constructor, of course -- so this is where
  216. // we initialize our fields
  217. field_1_num_strings = in.readInt();
  218. field_2_num_unique_strings = in.readInt();
  219. field_3_strings = new IntMapper<UnicodeString>();
  220. deserializer = new SSTDeserializer(field_3_strings);
  221. // Bug 57456: some Excel Sheets send 0 as field=1, but have some random number in field_2,
  222. // we should not try to read the strings in this case.
  223. if(field_1_num_strings == 0) {
  224. field_2_num_unique_strings = 0;
  225. return;
  226. }
  227. deserializer.manufactureStrings( field_2_num_unique_strings, in );
  228. }
  229. /**
  230. * @return an iterator of the strings we hold. All instances are
  231. * UnicodeStrings
  232. */
  233. Iterator<UnicodeString> getStrings()
  234. {
  235. return field_3_strings.iterator();
  236. }
  237. /**
  238. * @return count of the strings we hold.
  239. */
  240. int countStrings() {
  241. return field_3_strings.size();
  242. }
  243. protected void serialize(ContinuableRecordOutput out) {
  244. SSTSerializer serializer = new SSTSerializer(field_3_strings, getNumStrings(), getNumUniqueStrings() );
  245. serializer.serialize(out);
  246. bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
  247. bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
  248. }
  249. SSTDeserializer getDeserializer() {
  250. return deserializer;
  251. }
  252. /**
  253. * Creates an extended string record based on the current contents of
  254. * the current SST record. The offset within the stream to the SST record
  255. * is required because the extended string record points directly to the
  256. * strings in the SST record.
  257. * <p>
  258. * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
  259. * SERIALIZED.
  260. *
  261. * @param sstOffset The offset in the stream to the start of the
  262. * SST record.
  263. * @return The new SST record.
  264. */
  265. public ExtSSTRecord createExtSSTRecord(int sstOffset) {
  266. if (bucketAbsoluteOffsets == null || bucketAbsoluteOffsets == null)
  267. throw new IllegalStateException("SST record has not yet been serialized.");
  268. ExtSSTRecord extSST = new ExtSSTRecord();
  269. extSST.setNumStringsPerBucket((short)8);
  270. int[] absoluteOffsets = bucketAbsoluteOffsets.clone();
  271. int[] relativeOffsets = bucketRelativeOffsets.clone();
  272. for ( int i = 0; i < absoluteOffsets.length; i++ )
  273. absoluteOffsets[i] += sstOffset;
  274. extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
  275. return extSST;
  276. }
  277. /**
  278. * Calculates the size in bytes of the EXTSST record as it would be if the
  279. * record was serialized.
  280. *
  281. * @return The size of the ExtSST record in bytes.
  282. */
  283. public int calcExtSSTRecordSize() {
  284. return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
  285. }
  286. }