You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SharedStringsTable.java 7.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.model;
  16. import java.io.IOException;
  17. import java.io.InputStream;
  18. import java.io.OutputStream;
  19. import java.util.ArrayList;
  20. import java.util.HashMap;
  21. import java.util.List;
  22. import java.util.Map;
  23. import org.apache.xmlbeans.XmlException;
  24. import org.apache.xmlbeans.XmlOptions;
  25. import org.apache.poi.POIXMLDocumentPart;
  26. import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
  27. import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
  28. import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
  29. import org.apache.poi.openxml4j.opc.PackagePart;
  30. import org.apache.poi.openxml4j.opc.PackageRelationship;
  31. /**
  32. * Table of strings shared across all sheets in a workbook.
  33. * <p>
  34. * A workbook may contain thousands of cells containing string (non-numeric) data. Furthermore this data is very
  35. * likely to be repeated across many rows or columns. The goal of implementing a single string table that is shared
  36. * across the workbook is to improve performance in opening and saving the file by only reading and writing the
  37. * repetitive information once.
  38. * </p>
  39. * <p>
  40. * Consider for example a workbook summarizing information for cities within various countries. There may be a
  41. * column for the name of the country, a column for the name of each city in that country, and a column
  42. * containing the data for each city. In this case the country name is repetitive, being duplicated in many cells.
  43. * In many cases the repetition is extensive, and a tremendous savings is realized by making use of a shared string
  44. * table when saving the workbook. When displaying text in the spreadsheet, the cell table will just contain an
  45. * index into the string table as the value of a cell, instead of the full string.
  46. * </p>
  47. * <p>
  48. * The shared string table contains all the necessary information for displaying the string: the text, formatting
  49. * properties, and phonetic properties (for East Asian languages).
  50. * </p>
  51. *
  52. * @author Nick Birch
  53. * @author Yegor Kozlov
  54. */
  55. public class SharedStringsTable extends POIXMLDocumentPart {
  56. /**
  57. * Array of individual string items in the Shared String table.
  58. */
  59. private final List<CTRst> strings = new ArrayList<CTRst>();
  60. /**
  61. * Maps strings and their indexes in the <code>strings</code> arrays
  62. */
  63. private final Map<String, Integer> stmap = new HashMap<String, Integer>();
  64. /**
  65. * An integer representing the total count of strings in the workbook. This count does not
  66. * include any numbers, it counts only the total of text strings in the workbook.
  67. */
  68. private int count;
  69. /**
  70. * An integer representing the total count of unique strings in the Shared String Table.
  71. * A string is unique even if it is a copy of another string, but has different formatting applied
  72. * at the character level.
  73. */
  74. private int uniqueCount;
  75. public SstDocument _sstDoc;
  76. public SharedStringsTable() {
  77. super();
  78. _sstDoc = SstDocument.Factory.newInstance();
  79. _sstDoc.addNewSst();
  80. }
  81. public SharedStringsTable(PackagePart part, PackageRelationship rel) throws IOException {
  82. super(part, rel);
  83. readFrom(part.getInputStream());
  84. }
  85. /**
  86. * Read this shared strings table from an XML file.
  87. *
  88. * @param is The input stream containing the XML document.
  89. * @throws IOException if an error occurs while reading.
  90. */
  91. public void readFrom(InputStream is) throws IOException {
  92. try {
  93. int cnt = 0;
  94. _sstDoc = SstDocument.Factory.parse(is);
  95. CTSst sst = _sstDoc.getSst();
  96. count = (int)sst.getCount();
  97. uniqueCount = (int)sst.getUniqueCount();
  98. for (CTRst st : sst.getSiArray()) {
  99. stmap.put(st.toString(), cnt);
  100. strings.add(st);
  101. cnt++;
  102. }
  103. } catch (XmlException e) {
  104. throw new IOException(e.getLocalizedMessage());
  105. }
  106. }
  107. /**
  108. * Return a string item by index
  109. *
  110. * @param idx index of item to return.
  111. * @return the item at the specified position in this Shared String table.
  112. */
  113. public CTRst getEntryAt(int idx) {
  114. return strings.get(idx);
  115. }
  116. /**
  117. * Return an integer representing the total count of strings in the workbook. This count does not
  118. * include any numbers, it counts only the total of text strings in the workbook.
  119. *
  120. * @return the total count of strings in the workbook
  121. */
  122. public int getCount(){
  123. return count;
  124. }
  125. /**
  126. * Returns an integer representing the total count of unique strings in the Shared String Table.
  127. * A string is unique even if it is a copy of another string, but has different formatting applied
  128. * at the character level.
  129. *
  130. * @return the total count of unique strings in the workbook
  131. */
  132. public int getUniqueCount(){
  133. return uniqueCount;
  134. }
  135. /**
  136. * Add an entry to this Shared String table (a new value is appened to the end).
  137. *
  138. * <p>
  139. * If the Shared String table already contains this <code>CTRst</code> bean, its index is returned.
  140. * Otherwise a new entry is aded.
  141. * </p>
  142. *
  143. * @param st the entry to add
  144. * @return index the index of added entry
  145. */
  146. public int addEntry(CTRst st) {
  147. String s = st.toString();
  148. count++;
  149. if (stmap.containsKey(s)) {
  150. return stmap.get(s);
  151. }
  152. uniqueCount++;
  153. //create a CTRst bean attached to this SstDocument and copy the argument CTRst into it
  154. CTRst newSt = _sstDoc.getSst().addNewSi();
  155. newSt.set(st);
  156. int idx = strings.size();
  157. stmap.put(s, idx);
  158. strings.add(newSt);
  159. return idx;
  160. }
  161. /**
  162. * Provide low-level access to the underlying array of CTRst beans
  163. *
  164. * @return array of CTRst beans
  165. */
  166. public List<CTRst> getItems() {
  167. return strings;
  168. }
  169. /**
  170. * Write this table out as XML.
  171. *
  172. * @param out The stream to write to.
  173. * @throws IOException if an error occurs while writing.
  174. */
  175. public void writeTo(OutputStream out) throws IOException {
  176. XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
  177. // the following two lines turn off writing CDATA
  178. // see Bugzilla 48936
  179. options.setSaveCDataLengthThreshold(1000000);
  180. options.setSaveCDataEntityCountThreshold(-1);
  181. //re-create the sst table every time saving a workbook
  182. CTSst sst = _sstDoc.getSst();
  183. sst.setCount(count);
  184. sst.setUniqueCount(uniqueCount);
  185. _sstDoc.save(out, options);
  186. }
  187. @Override
  188. protected void commit() throws IOException {
  189. PackagePart part = getPackagePart();
  190. OutputStream out = part.getOutputStream();
  191. writeTo(out);
  192. out.close();
  193. }
  194. }