You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SheetDataWriter.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /*
  2. * ====================================================================
  3. * Licensed to the Apache Software Foundation (ASF) under one or more
  4. * contributor license agreements. See the NOTICE file distributed with
  5. * this work for additional information regarding copyright ownership.
  6. * The ASF licenses this file to You under the Apache License, Version 2.0
  7. * (the "License"); you may not use this file except in compliance with
  8. * the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. * ====================================================================
  18. */
  19. package org.apache.poi.xssf.streaming;
  20. import java.io.BufferedWriter;
  21. import java.io.Closeable;
  22. import java.io.File;
  23. import java.io.FileInputStream;
  24. import java.io.FileOutputStream;
  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.io.OutputStream;
  28. import java.io.OutputStreamWriter;
  29. import java.io.Writer;
  30. import java.nio.charset.StandardCharsets;
  31. import java.util.Iterator;
  32. import org.apache.logging.log4j.LogManager;
  33. import org.apache.logging.log4j.Logger;
  34. import org.apache.poi.ss.usermodel.Cell;
  35. import org.apache.poi.ss.usermodel.CellStyle;
  36. import org.apache.poi.ss.usermodel.CellType;
  37. import org.apache.poi.ss.usermodel.FormulaError;
  38. import org.apache.poi.ss.util.CellReference;
  39. import org.apache.poi.util.CodepointsUtil;
  40. import org.apache.poi.util.TempFile;
  41. import org.apache.poi.xssf.model.SharedStringsTable;
  42. import org.apache.poi.xssf.usermodel.XSSFRichTextString;
  43. import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
  44. /**
  45. * Initially copied from BigGridDemo "SpreadsheetWriter".
  46. * Unlike the original code which wrote the entire document,
  47. * this class only writes the "sheetData" document fragment
  48. * so that it was renamed to "SheetDataWriter"
  49. */
  50. public class SheetDataWriter implements Closeable {
  51. private static final Logger LOG = LogManager.getLogger(SheetDataWriter.class);
  52. private final File _fd;
  53. protected final Writer _out;
  54. private int _rownum;
  55. private int _numberOfFlushedRows;
  56. private int _lowestIndexOfFlushedRows; // meaningful only of _numberOfFlushedRows>0
  57. private int _numberOfCellsOfLastFlushedRow; // meaningful only of _numberOfFlushedRows>0
  58. private int _numberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0
  59. /**
  60. * Table of strings shared across this workbook.
  61. * If two cells contain the same string, then the cell value is the same index into SharedStringsTable
  62. */
  63. private SharedStringsTable _sharedStringSource;
  64. public SheetDataWriter() throws IOException {
  65. _fd = createTempFile();
  66. _out = createWriter(_fd);
  67. }
  68. public SheetDataWriter(Writer writer) throws IOException {
  69. _fd = null;
  70. _out = writer;
  71. }
  72. public SheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException {
  73. this();
  74. this._sharedStringSource = sharedStringsTable;
  75. }
  76. /**
  77. * Create a temp file to write sheet data.
  78. * By default, temp files are created in the default temporary-file directory
  79. * with a prefix "poi-sxssf-sheet" and suffix ".xml". Subclasses can override
  80. * it and specify a different temp directory or filename or suffix, e.g. <code>.gz</code>
  81. *
  82. * @return temp file to write sheet data
  83. */
  84. public File createTempFile() throws IOException {
  85. return TempFile.createTempFile("poi-sxssf-sheet", ".xml");
  86. }
  87. /**
  88. * Create a writer for the sheet data.
  89. *
  90. * @param fd the file to write to
  91. */
  92. public Writer createWriter(File fd) throws IOException {
  93. FileOutputStream fos = new FileOutputStream(fd);
  94. OutputStream decorated;
  95. try {
  96. decorated = decorateOutputStream(fos);
  97. } catch (final IOException e) {
  98. fos.close();
  99. throw e;
  100. }
  101. return new BufferedWriter(
  102. new OutputStreamWriter(decorated, StandardCharsets.UTF_8));
  103. }
  104. /**
  105. * Override this to translate (such as encrypt or compress) the file output stream
  106. * as it is being written to disk.
  107. * The default behavior is to to pass the stream through unmodified.
  108. *
  109. * @param fos the stream to decorate
  110. * @return a decorated stream
  111. * @throws IOException if decorating the stream fails
  112. * @see #decorateInputStream(FileInputStream)
  113. */
  114. protected OutputStream decorateOutputStream(FileOutputStream fos) throws IOException {
  115. return fos;
  116. }
  117. /**
  118. * flush and close the temp data writer.
  119. * This method <em>must</em> be invoked before calling {@link #getWorksheetXMLInputStream()}
  120. */
  121. public void close() throws IOException {
  122. // this would break writing the same document multiple times: _out.flush();
  123. _out.close();
  124. }
  125. protected File getTempFile() {
  126. return _fd;
  127. }
  128. /**
  129. * @return a stream to read temp file with the sheet data
  130. */
  131. public InputStream getWorksheetXMLInputStream() throws IOException {
  132. File fd = getTempFile();
  133. FileInputStream fis = new FileInputStream(fd);
  134. try {
  135. return decorateInputStream(fis);
  136. } catch (IOException e) {
  137. fis.close();
  138. throw e;
  139. }
  140. }
  141. /**
  142. * Override this to translate (such as decrypt or expand) the file input stream
  143. * as it is being read from disk.
  144. * The default behavior is to to pass the stream through unmodified.
  145. *
  146. * @param fis the stream to decorate
  147. * @return a decorated stream
  148. * @throws IOException if decorating the stream fails
  149. * @see #decorateOutputStream(FileOutputStream)
  150. */
  151. protected InputStream decorateInputStream(FileInputStream fis) throws IOException {
  152. return fis;
  153. }
  154. public int getNumberOfFlushedRows() {
  155. return _numberOfFlushedRows;
  156. }
  157. public int getNumberOfCellsOfLastFlushedRow() {
  158. return _numberOfCellsOfLastFlushedRow;
  159. }
  160. public int getLowestIndexOfFlushedRows() {
  161. return _lowestIndexOfFlushedRows;
  162. }
  163. public int getLastFlushedRow() {
  164. return _numberLastFlushedRow;
  165. }
  166. @Override
  167. protected void finalize() throws Throwable {
  168. if (_fd.exists() && !_fd.delete()) {
  169. LOG.atError().log("Can't delete temporary encryption file: {}", _fd);
  170. }
  171. }
  172. /**
  173. * Write a row to the file
  174. *
  175. * @param rownum 0-based row number
  176. * @param row a row
  177. *
  178. * @throws IOException If an I/O error occurs
  179. */
  180. public void writeRow(int rownum, SXSSFRow row) throws IOException {
  181. if (_numberOfFlushedRows == 0)
  182. _lowestIndexOfFlushedRows = rownum;
  183. _numberLastFlushedRow = Math.max(rownum, _numberLastFlushedRow);
  184. _numberOfCellsOfLastFlushedRow = row.getLastCellNum();
  185. _numberOfFlushedRows++;
  186. beginRow(rownum, row);
  187. Iterator<Cell> cells = row.allCellsIterator();
  188. int columnIndex = 0;
  189. while (cells.hasNext()) {
  190. writeCell(columnIndex++, cells.next());
  191. }
  192. endRow();
  193. }
  194. void beginRow(int rownum, SXSSFRow row) throws IOException {
  195. _out.write("<row");
  196. writeAttribute("r", Integer.toString(rownum + 1));
  197. if (row.hasCustomHeight()) {
  198. writeAttribute("customHeight", "true");
  199. writeAttribute("ht", Float.toString(row.getHeightInPoints()));
  200. }
  201. if (row.getZeroHeight()) {
  202. writeAttribute("hidden", "true");
  203. }
  204. if (row.isFormatted()) {
  205. writeAttribute("s", Integer.toString(row.getRowStyleIndex()));
  206. writeAttribute("customFormat", "1");
  207. }
  208. if (row.getOutlineLevel() != 0) {
  209. writeAttribute("outlineLevel", Integer.toString(row.getOutlineLevel()));
  210. }
  211. if(row.getHidden() != null) {
  212. writeAttribute("hidden", row.getHidden() ? "1" : "0");
  213. }
  214. if(row.getCollapsed() != null) {
  215. writeAttribute("collapsed", row.getCollapsed() ? "1" : "0");
  216. }
  217. _out.write(">\n");
  218. this._rownum = rownum;
  219. }
  220. void endRow() throws IOException {
  221. _out.write("</row>\n");
  222. }
  223. public void writeCell(int columnIndex, Cell cell) throws IOException {
  224. if (cell == null) {
  225. return;
  226. }
  227. String ref = new CellReference(_rownum, columnIndex).formatAsString();
  228. _out.write("<c");
  229. writeAttribute("r", ref);
  230. CellStyle cellStyle = cell.getCellStyle();
  231. if (cellStyle.getIndex() != 0) {
  232. // need to convert the short to unsigned short as the indexes can be up to 64k
  233. // ideally we would use int for this index, but that would need changes to some more
  234. // APIs
  235. writeAttribute("s", Integer.toString(cellStyle.getIndex() & 0xffff));
  236. }
  237. CellType cellType = cell.getCellType();
  238. switch (cellType) {
  239. case BLANK: {
  240. _out.write('>');
  241. break;
  242. }
  243. case FORMULA: {
  244. switch(cell.getCachedFormulaResultType()) {
  245. case NUMERIC:
  246. writeAttribute("t", "n");
  247. break;
  248. case STRING:
  249. writeAttribute("t", STCellType.STR.toString());
  250. break;
  251. case BOOLEAN:
  252. writeAttribute("t", "b");
  253. break;
  254. case ERROR:
  255. writeAttribute("t", "e");
  256. break;
  257. }
  258. _out.write("><f>");
  259. outputEscapedString(cell.getCellFormula());
  260. _out.write("</f>");
  261. switch (cell.getCachedFormulaResultType()) {
  262. case NUMERIC:
  263. double nval = cell.getNumericCellValue();
  264. if (!Double.isNaN(nval)) {
  265. _out.write("<v>");
  266. _out.write(Double.toString(nval));
  267. _out.write("</v>");
  268. }
  269. break;
  270. case STRING:
  271. String value = cell.getStringCellValue();
  272. if(value != null && !value.isEmpty()) {
  273. _out.write("<v>");
  274. outputEscapedString(value);
  275. _out.write("</v>");
  276. }
  277. break;
  278. case BOOLEAN:
  279. _out.write("><v>");
  280. _out.write(cell.getBooleanCellValue() ? "1" : "0");
  281. _out.write("</v>");
  282. break;
  283. case ERROR: {
  284. FormulaError error = FormulaError.forInt(cell.getErrorCellValue());
  285. _out.write("><v>");
  286. outputEscapedString(error.getString());
  287. _out.write("</v>");
  288. break;
  289. }
  290. }
  291. break;
  292. }
  293. case STRING: {
  294. if (_sharedStringSource != null) {
  295. XSSFRichTextString rt = new XSSFRichTextString(cell.getStringCellValue());
  296. int sRef = _sharedStringSource.addSharedStringItem(rt);
  297. writeAttribute("t", STCellType.S.toString());
  298. _out.write("><v>");
  299. _out.write(String.valueOf(sRef));
  300. _out.write("</v>");
  301. } else {
  302. writeAttribute("t", "inlineStr");
  303. _out.write("><is><t");
  304. if (hasLeadingTrailingSpaces(cell.getStringCellValue())) {
  305. writeAttribute("xml:space", "preserve");
  306. }
  307. _out.write(">");
  308. outputEscapedString(cell.getStringCellValue());
  309. _out.write("</t></is>");
  310. }
  311. break;
  312. }
  313. case NUMERIC: {
  314. writeAttribute("t", "n");
  315. _out.write("><v>");
  316. _out.write(Double.toString(cell.getNumericCellValue()));
  317. _out.write("</v>");
  318. break;
  319. }
  320. case BOOLEAN: {
  321. writeAttribute("t", "b");
  322. _out.write("><v>");
  323. _out.write(cell.getBooleanCellValue() ? "1" : "0");
  324. _out.write("</v>");
  325. break;
  326. }
  327. case ERROR: {
  328. FormulaError error = FormulaError.forInt(cell.getErrorCellValue());
  329. writeAttribute("t", "e");
  330. _out.write("><v>");
  331. outputEscapedString(error.getString());
  332. _out.write("</v>");
  333. break;
  334. }
  335. default: {
  336. throw new IllegalStateException("Invalid cell type: " + cellType);
  337. }
  338. }
  339. _out.write("</c>");
  340. }
  341. private void writeAttribute(String name, String value) throws IOException {
  342. _out.write(' ');
  343. _out.write(name);
  344. _out.write("=\"");
  345. _out.write(value);
  346. _out.write('\"');
  347. }
  348. /**
  349. * @return whether the string has leading / trailing spaces that
  350. * need to be preserved with the xml:space=\"preserve\" attribute
  351. */
  352. boolean hasLeadingTrailingSpaces(String str) {
  353. if (str != null && str.length() > 0) {
  354. char firstChar = str.charAt(0);
  355. char lastChar = str.charAt(str.length() - 1);
  356. return Character.isWhitespace(firstChar) || Character.isWhitespace(lastChar) ;
  357. }
  358. return false;
  359. }
  360. protected void outputEscapedString(String s) throws IOException {
  361. if (s == null || s.length() == 0) {
  362. return;
  363. }
  364. for (Iterator<String> iter = CodepointsUtil.iteratorFor(s); iter.hasNext(); ) {
  365. String codepoint = iter.next();
  366. switch (codepoint) {
  367. case "<":
  368. _out.write("&lt;");
  369. break;
  370. case ">":
  371. _out.write("&gt;");
  372. break;
  373. case "&":
  374. _out.write("&amp;");
  375. break;
  376. case "\"":
  377. _out.write("&quot;");
  378. break;
  379. // Special characters
  380. case "\n":
  381. _out.write("&#xa;");
  382. break;
  383. case "\r":
  384. _out.write("&#xd;");
  385. break;
  386. case "\t":
  387. _out.write("&#x9;");
  388. break;
  389. case "\u00A0": // NO-BREAK SPACE
  390. _out.write("&#xa0;");
  391. break;
  392. default:
  393. if (codepoint.length() == 1) {
  394. char c = codepoint.charAt(0);
  395. // YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.
  396. // the same rule applies to "not a character" symbols.
  397. if (replaceWithQuestionMark(c)) {
  398. _out.write('?');
  399. } else {
  400. _out.write(c);
  401. }
  402. } else {
  403. _out.write(codepoint);
  404. }
  405. break;
  406. }
  407. }
  408. }
  409. static boolean replaceWithQuestionMark(char c) {
  410. return c < ' ' || ('\uFFFE' <= c && c <= '\uFFFF');
  411. }
  412. /**
  413. * Deletes the temporary file that backed this sheet on disk.
  414. * @return true if the file was deleted, false if it wasn't.
  415. */
  416. boolean dispose() throws IOException {
  417. final boolean ret;
  418. try {
  419. _out.close();
  420. } finally {
  421. ret = _fd.delete();
  422. }
  423. return ret;
  424. }
  425. }