Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

TestUnicodeString.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.record.common;
  16. import java.io.ByteArrayInputStream;
  17. import java.io.ByteArrayOutputStream;
  18. import junit.framework.TestCase;
  19. import org.apache.poi.hssf.record.ContinueRecord;
  20. import org.apache.poi.hssf.record.RecordInputStream;
  21. import org.apache.poi.hssf.record.SSTRecord;
  22. import org.apache.poi.hssf.record.common.UnicodeString.ExtRst;
  23. import org.apache.poi.hssf.record.common.UnicodeString.FormatRun;
  24. import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
  25. import org.apache.poi.util.LittleEndianInputStream;
  26. import org.apache.poi.util.LittleEndianOutputStream;
  27. /**
  28. * Tests that {@link UnicodeString} record size calculates correctly. The record size
  29. * is used when serializing {@link SSTRecord}s.
  30. *
  31. * @author Jason Height (jheight at apache.org)
  32. */
  33. public final class TestUnicodeString extends TestCase {
  34. private static final int MAX_DATA_SIZE = RecordInputStream.MAX_RECORD_DATA_SIZE;
  35. /** a 4 character string requiring 16 bit encoding */
  36. private static final String STR_16_BIT = "A\u591A\u8A00\u8A9E";
  37. private static void confirmSize(int expectedSize, UnicodeString s) {
  38. confirmSize(expectedSize, s, 0);
  39. }
  40. /**
  41. * Note - a value of zero for <tt>amountUsedInCurrentRecord</tt> would only ever occur just
  42. * after a {@link ContinueRecord} had been started. In the initial {@link SSTRecord} this
  43. * value starts at 8 (for the first {@link UnicodeString} written). In general, it can be
  44. * any value between 0 and {@link #MAX_DATA_SIZE}
  45. */
  46. private static void confirmSize(int expectedSize, UnicodeString s, int amountUsedInCurrentRecord) {
  47. ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly();
  48. out.writeContinue();
  49. for(int i=amountUsedInCurrentRecord; i>0; i--) {
  50. out.writeByte(0);
  51. }
  52. int size0 = out.getTotalSize();
  53. s.serialize(out);
  54. int size1 = out.getTotalSize();
  55. int actualSize = size1-size0;
  56. assertEquals(expectedSize, actualSize);
  57. }
  58. public void testSmallStringSize() {
  59. //Test a basic string
  60. UnicodeString s = makeUnicodeString("Test");
  61. confirmSize(7, s);
  62. //Test a small string that is uncompressed
  63. s = makeUnicodeString(STR_16_BIT);
  64. s.setOptionFlags((byte)0x01);
  65. confirmSize(11, s);
  66. //Test a compressed small string that has rich text formatting
  67. s.setString("Test");
  68. s.setOptionFlags((byte)0x8);
  69. UnicodeString.FormatRun r = new UnicodeString.FormatRun((short)0,(short)1);
  70. s.addFormatRun(r);
  71. UnicodeString.FormatRun r2 = new UnicodeString.FormatRun((short)2,(short)2);
  72. s.addFormatRun(r2);
  73. confirmSize(17, s);
  74. //Test a uncompressed small string that has rich text formatting
  75. s.setString(STR_16_BIT);
  76. s.setOptionFlags((byte)0x9);
  77. confirmSize(21, s);
  78. //Test a compressed small string that has rich text and extended text
  79. s.setString("Test");
  80. s.setOptionFlags((byte)0xC);
  81. confirmSize(17, s);
  82. // Extended phonetics data
  83. // Minimum size is 14
  84. // Also adds 4 bytes to hold the length
  85. s.setExtendedRst(
  86. new ExtRst()
  87. );
  88. confirmSize(35, s);
  89. //Test a uncompressed small string that has rich text and extended text
  90. s.setString(STR_16_BIT);
  91. s.setOptionFlags((byte)0xD);
  92. confirmSize(39, s);
  93. s.setExtendedRst(null);
  94. confirmSize(21, s);
  95. }
  96. public void testPerfectStringSize() {
  97. //Test a basic string
  98. UnicodeString s = makeUnicodeString(MAX_DATA_SIZE-2-1);
  99. confirmSize(MAX_DATA_SIZE, s);
  100. //Test an uncompressed string
  101. //Note that we can only ever get to a maximim size of 8227 since an uncompressed
  102. //string is writing double bytes.
  103. s = makeUnicodeString((MAX_DATA_SIZE-2-1)/2, true);
  104. s.setOptionFlags((byte)0x1);
  105. confirmSize(MAX_DATA_SIZE-1, s);
  106. }
  107. public void testPerfectRichStringSize() {
  108. //Test a rich text string
  109. UnicodeString s = makeUnicodeString(MAX_DATA_SIZE-2-1-8-2);
  110. s.addFormatRun(new UnicodeString.FormatRun((short)1,(short)0));
  111. s.addFormatRun(new UnicodeString.FormatRun((short)2,(short)1));
  112. s.setOptionFlags((byte)0x8);
  113. confirmSize(MAX_DATA_SIZE, s);
  114. //Test an uncompressed rich text string
  115. //Note that we can only ever get to a maximum size of 8227 since an uncompressed
  116. //string is writing double bytes.
  117. s = makeUnicodeString((MAX_DATA_SIZE-2-1-8-2)/2, true);
  118. s.addFormatRun(new UnicodeString.FormatRun((short)1,(short)0));
  119. s.addFormatRun(new UnicodeString.FormatRun((short)2,(short)1));
  120. s.setOptionFlags((byte)0x9);
  121. confirmSize(MAX_DATA_SIZE-1, s);
  122. }
  123. public void testContinuedStringSize() {
  124. //Test a basic string
  125. UnicodeString s = makeUnicodeString(MAX_DATA_SIZE-2-1+20);
  126. confirmSize(MAX_DATA_SIZE+4+1+20, s);
  127. }
  128. /** Tests that a string size calculation that fits neatly in two records, the second being a continue*/
  129. public void testPerfectContinuedStringSize() {
  130. //Test a basic string
  131. int strSize = MAX_DATA_SIZE*2;
  132. //String overhead
  133. strSize -= 3;
  134. //Continue Record overhead
  135. strSize -= 4;
  136. //Continue Record additional byte overhead
  137. strSize -= 1;
  138. UnicodeString s = makeUnicodeString(strSize);
  139. confirmSize(MAX_DATA_SIZE*2, s);
  140. }
  141. public void testFormatRun() throws Exception {
  142. FormatRun fr = new FormatRun((short)4, (short)0x15c);
  143. assertEquals(4, fr.getCharacterPos());
  144. assertEquals(0x15c, fr.getFontIndex());
  145. ByteArrayOutputStream baos = new ByteArrayOutputStream();
  146. LittleEndianOutputStream out = new LittleEndianOutputStream(baos);
  147. fr.serialize(out);
  148. byte[] b = baos.toByteArray();
  149. assertEquals(4, b.length);
  150. assertEquals(4, b[0]);
  151. assertEquals(0, b[1]);
  152. assertEquals(0x5c, b[2]);
  153. assertEquals(0x01, b[3]);
  154. LittleEndianInputStream inp = new LittleEndianInputStream(
  155. new ByteArrayInputStream(b)
  156. );
  157. fr = new FormatRun(inp);
  158. assertEquals(4, fr.getCharacterPos());
  159. assertEquals(0x15c, fr.getFontIndex());
  160. }
  161. public void testExtRstFromEmpty() throws Exception {
  162. ExtRst ext = new ExtRst();
  163. assertEquals(0, ext.getNumberOfRuns());
  164. assertEquals(0, ext.getFormattingFontIndex());
  165. assertEquals(0, ext.getFormattingOptions());
  166. assertEquals("", ext.getPhoneticText());
  167. assertEquals(0, ext.getPhRuns().length);
  168. assertEquals(10, ext.getDataSize()); // Excludes 4 byte header
  169. ByteArrayOutputStream baos = new ByteArrayOutputStream();
  170. LittleEndianOutputStream out = new LittleEndianOutputStream(baos);
  171. ContinuableRecordOutput cout = new ContinuableRecordOutput(out, 0xffff);
  172. ext.serialize(cout);
  173. cout.writeContinue();
  174. byte[] b = baos.toByteArray();
  175. assertEquals(20, b.length);
  176. // First 4 bytes from the outputstream
  177. assertEquals(-1, b[0]);
  178. assertEquals(-1, b[1]);
  179. assertEquals(14, b[2]);
  180. assertEquals(00, b[3]);
  181. // Reserved
  182. assertEquals(1, b[4]);
  183. assertEquals(0, b[5]);
  184. // Data size
  185. assertEquals(10, b[6]);
  186. assertEquals(00, b[7]);
  187. // Font*2
  188. assertEquals(0, b[8]);
  189. assertEquals(0, b[9]);
  190. assertEquals(0, b[10]);
  191. assertEquals(0, b[11]);
  192. // 0 Runs
  193. assertEquals(0, b[12]);
  194. assertEquals(0, b[13]);
  195. // Size=0, *2
  196. assertEquals(0, b[14]);
  197. assertEquals(0, b[15]);
  198. assertEquals(0, b[16]);
  199. assertEquals(0, b[17]);
  200. // Last 2 bytes from the outputstream
  201. assertEquals(ContinueRecord.sid, b[18]);
  202. assertEquals(0, b[19]);
  203. // Load in again and re-test
  204. byte[] data = new byte[14];
  205. System.arraycopy(b, 4, data, 0, data.length);
  206. LittleEndianInputStream inp = new LittleEndianInputStream(
  207. new ByteArrayInputStream(data)
  208. );
  209. ext = new ExtRst(inp, data.length);
  210. assertEquals(0, ext.getNumberOfRuns());
  211. assertEquals(0, ext.getFormattingFontIndex());
  212. assertEquals(0, ext.getFormattingOptions());
  213. assertEquals("", ext.getPhoneticText());
  214. assertEquals(0, ext.getPhRuns().length);
  215. }
  216. public void testExtRstFromData() throws Exception {
  217. byte[] data = new byte[] {
  218. 01, 00, 0x0C, 00,
  219. 00, 00, 0x37, 00,
  220. 00, 00,
  221. 00, 00, 00, 00,
  222. 00, 00 // Cruft at the end, as found from real files
  223. };
  224. assertEquals(16, data.length);
  225. LittleEndianInputStream inp = new LittleEndianInputStream(
  226. new ByteArrayInputStream(data)
  227. );
  228. ExtRst ext = new ExtRst(inp, data.length);
  229. assertEquals(0x0c, ext.getDataSize()); // Excludes 4 byte header
  230. assertEquals(0, ext.getNumberOfRuns());
  231. assertEquals(0x37, ext.getFormattingOptions());
  232. assertEquals(0, ext.getFormattingFontIndex());
  233. assertEquals("", ext.getPhoneticText());
  234. assertEquals(0, ext.getPhRuns().length);
  235. }
  236. public void testCorruptExtRstDetection() throws Exception {
  237. byte[] data = new byte[] {
  238. 0x79, 0x79, 0x11, 0x11,
  239. 0x22, 0x22, 0x33, 0x33,
  240. };
  241. assertEquals(8, data.length);
  242. LittleEndianInputStream inp = new LittleEndianInputStream(
  243. new ByteArrayInputStream(data)
  244. );
  245. ExtRst ext = new ExtRst(inp, data.length);
  246. // Will be empty
  247. assertEquals(ext, new ExtRst());
  248. // If written, will be the usual size
  249. assertEquals(10, ext.getDataSize()); // Excludes 4 byte header
  250. // Is empty
  251. assertEquals(0, ext.getNumberOfRuns());
  252. assertEquals(0, ext.getFormattingOptions());
  253. assertEquals(0, ext.getFormattingFontIndex());
  254. assertEquals("", ext.getPhoneticText());
  255. assertEquals(0, ext.getPhRuns().length);
  256. }
  257. private static UnicodeString makeUnicodeString(String s) {
  258. UnicodeString st = new UnicodeString(s);
  259. st.setOptionFlags((byte)0);
  260. return st;
  261. }
  262. private static UnicodeString makeUnicodeString(int numChars) {
  263. return makeUnicodeString(numChars, false);
  264. }
  265. /**
  266. * @param is16Bit if <code>true</code> the created string will have characters > 0x00FF
  267. * @return a string of the specified number of characters
  268. */
  269. private static UnicodeString makeUnicodeString(int numChars, boolean is16Bit) {
  270. StringBuffer b = new StringBuffer(numChars);
  271. int charBase = is16Bit ? 0x8A00 : 'A';
  272. for (int i=0;i<numChars;i++) {
  273. char ch = (char) ((i%16)+charBase);
  274. b.append(ch);
  275. }
  276. return makeUnicodeString(b.toString());
  277. }
  278. }