You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

XSSFFileHandler.java 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.stress;
  16. import org.apache.poi.POIXMLException;
  17. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  18. import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
  19. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  20. import org.apache.poi.openxml4j.opc.OPCPackage;
  21. import org.apache.poi.util.IOUtils;
  22. import org.apache.poi.xssf.eventusermodel.XLSX2CSV;
  23. import org.apache.poi.xssf.eventusermodel.XSSFReader;
  24. import org.apache.poi.xssf.eventusermodel.examples.FromHowTo;
  25. import org.apache.poi.xssf.extractor.XSSFExportToXml;
  26. import org.apache.poi.xssf.usermodel.XSSFMap;
  27. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  28. import org.junit.Test;
  29. import org.xml.sax.SAXException;
  30. import javax.xml.parsers.ParserConfigurationException;
  31. import javax.xml.transform.TransformerException;
  32. import java.io.*;
  33. import java.util.HashSet;
  34. import java.util.Iterator;
  35. import java.util.Locale;
  36. import java.util.Set;
  37. import static org.junit.Assert.assertFalse;
  38. import static org.junit.Assert.assertNotNull;
  39. public class XSSFFileHandler extends SpreadsheetHandler {
  40. @Override
  41. public void handleFile(InputStream stream) throws Exception {
  42. // ignore password protected files
  43. if (POIXMLDocumentHandler.isEncrypted(stream)) return;
  44. final XSSFWorkbook wb;
  45. // make sure the potentially large byte-array is freed up quickly again
  46. {
  47. ByteArrayOutputStream out = new ByteArrayOutputStream();
  48. IOUtils.copy(stream, out);
  49. final byte[] bytes = out.toByteArray();
  50. checkXSSFReader(OPCPackage.open(new ByteArrayInputStream(bytes)));
  51. wb = new XSSFWorkbook(new ByteArrayInputStream(bytes));
  52. }
  53. // use the combined handler for HSSF/XSSF
  54. handleWorkbook(wb);
  55. // TODO: some documents fail currently...
  56. //XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(wb);
  57. //evaluator.evaluateAll();
  58. // also verify general POIFS-stuff
  59. new POIXMLDocumentHandler().handlePOIXMLDocument(wb);
  60. // and finally ensure that exporting to XML works
  61. exportToXML(wb);
  62. // this allows to trigger a heap-dump at this point to see which memory is still allocated
  63. //HeapDump.dumpHeap("/tmp/poi.hprof", false);
  64. }
  65. private void checkXSSFReader(OPCPackage p) throws IOException, OpenXML4JException {
  66. XSSFReader reader = new XSSFReader(p);
  67. // these can be null...
  68. InputStream sharedStringsData = reader.getSharedStringsData();
  69. if(sharedStringsData != null) {
  70. sharedStringsData.close();
  71. }
  72. reader.getSharedStringsTable();
  73. InputStream stylesData = reader.getStylesData();
  74. if(stylesData != null) {
  75. stylesData.close();
  76. }
  77. reader.getStylesTable();
  78. InputStream themesData = reader.getThemesData();
  79. if(themesData != null) {
  80. themesData.close();
  81. }
  82. assertNotNull(reader.getWorkbookData());
  83. Iterator<InputStream> sheetsData = reader.getSheetsData();
  84. while(sheetsData.hasNext()) {
  85. InputStream str = sheetsData.next();
  86. str.close();
  87. }
  88. }
  89. private void exportToXML(XSSFWorkbook wb) throws SAXException,
  90. ParserConfigurationException, TransformerException {
  91. for (XSSFMap map : wb.getCustomXMLMappings()) {
  92. XSSFExportToXml exporter = new XSSFExportToXml(map);
  93. ByteArrayOutputStream os = new ByteArrayOutputStream();
  94. exporter.exportToXML(os, true);
  95. }
  96. }
  97. private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<String>();
  98. static {
  99. // expected sheet-id not found
  100. // EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/52348.xlsx");
  101. // EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/59021.xlsx");
  102. // zip-bomb
  103. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx");
  104. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764-2.xlsx");
  105. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx");
  106. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx");
  107. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb-empty.xlsx");
  108. // strict OOXML
  109. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/57914.xlsx");
  110. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SampleSS.strict.xlsx");
  111. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SimpleStrict.xlsx");
  112. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample.strict.xlsx");
  113. // TODO: good to ignore?
  114. EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample-beta.xlsx");
  115. // corrupt/invalid
  116. EXPECTED_ADDITIONAL_FAILURES.add("openxml4j/invalid.xlsx");
  117. }
  118. @SuppressWarnings("resource")
  119. @Override
  120. public void handleAdditional(File file) throws Exception {
  121. // redirect stdout as the examples often write lots of text
  122. PrintStream oldOut = System.out;
  123. try {
  124. System.setOut(new NullPrintStream());
  125. FromHowTo.main(new String[]{file.getAbsolutePath()});
  126. XLSX2CSV.main(new String[]{file.getAbsolutePath()});
  127. assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
  128. EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName()));
  129. } catch (OLE2NotOfficeXmlFileException e) {
  130. // we have some files that are not actually OOXML and thus cannot be tested here
  131. } catch (IllegalArgumentException e) {
  132. if(!EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
  133. throw e;
  134. }
  135. } catch (InvalidFormatException e) {
  136. if(!EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
  137. throw e;
  138. }
  139. } catch (IOException e) {
  140. if(!EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
  141. throw e;
  142. }
  143. } catch (POIXMLException e) {
  144. if(!EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
  145. throw e;
  146. }
  147. } finally {
  148. System.setOut(oldOut);
  149. }
  150. }
  151. // a test-case to test this locally without executing the full TestAllFiles
  152. @Test
  153. public void test() throws Exception {
  154. File file = new File("test-data/spreadsheet/ref-56737.xlsx");
  155. InputStream stream = new BufferedInputStream(new FileInputStream(file));
  156. try {
  157. handleFile(stream);
  158. } finally {
  159. stream.close();
  160. }
  161. handleExtracting(file);
  162. }
  163. @Test
  164. public void testAdditional() throws Exception {
  165. handleAdditional(new File("test-data/spreadsheet/poc-xmlbomb.xlsx"));
  166. }
  167. // need to override all methods to omit calls to UTF-handling methods
  168. static class NullPrintStream extends PrintStream {
  169. @SuppressWarnings("resource")
  170. NullPrintStream() {
  171. super(new OutputStream() {
  172. @Override
  173. public void write(int b) {}
  174. @Override
  175. public void write(byte[] b) {}
  176. @Override
  177. public void write(byte[] b, int off, int len) {}
  178. });
  179. }
  180. @Override
  181. public void write(int b) {}
  182. @Override
  183. public void write(byte[] buf, int off, int len) {}
  184. @Override
  185. public void print(boolean b) {}
  186. @Override
  187. public void print(char c) {}
  188. @Override
  189. public void print(int i) {}
  190. @Override
  191. public void print(long l) {}
  192. @Override
  193. public void print(float f) {}
  194. @Override
  195. public void print(double d) {}
  196. @Override
  197. public void print(char[] s) {}
  198. @Override
  199. public void print(String s) {}
  200. @Override
  201. public void print(Object obj) {}
  202. @Override
  203. public void println() {}
  204. @Override
  205. public void println(boolean x) {}
  206. @Override
  207. public void println(char x) {}
  208. @Override
  209. public void println(int x) {}
  210. @Override
  211. public void println(long x) {}
  212. @Override
  213. public void println(float x) {}
  214. @Override
  215. public void println(double x) {}
  216. @Override
  217. public void println(char[] x) {}
  218. @Override
  219. public void println(String x) {}
  220. @Override
  221. public void println(Object x) {}
  222. @Override
  223. public PrintStream printf(String format, Object... args) { return this; }
  224. @Override
  225. public PrintStream printf(Locale l, String format, Object... args) { return this; }
  226. @Override
  227. public PrintStream format(String format, Object... args) { return this; }
  228. @Override
  229. public PrintStream format(Locale l, String format, Object... args) { return this; }
  230. @Override
  231. public PrintStream append(CharSequence csq) { return this; }
  232. @Override
  233. public PrintStream append(CharSequence csq, int start, int end) { return this; }
  234. @Override
  235. public PrintStream append(char c) { return this; }
  236. @Override
  237. public void write(byte[] b) {}
  238. }
  239. }