Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

TestExcelExtractor.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.extractor;
  16. import java.io.IOException;
  17. import java.io.InputStream;
  18. import junit.framework.TestCase;
  19. import org.apache.poi.POIDataSamples;
  20. import org.apache.poi.hssf.HSSFTestDataSamples;
  21. import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
  22. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  23. import org.apache.poi.poifs.filesystem.DirectoryNode;
  24. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  25. /**
  26. *
  27. */
  28. public final class TestExcelExtractor extends TestCase {
  29. private static ExcelExtractor createExtractor(String sampleFileName) {
  30. InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleFileName);
  31. try {
  32. return new ExcelExtractor(new POIFSFileSystem(is));
  33. } catch (IOException e) {
  34. throw new RuntimeException(e);
  35. }
  36. }
  37. public void testSimple() throws IOException {
  38. ExcelExtractor extractor = createExtractor("Simple.xls");
  39. try {
  40. assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
  41. // Now turn off sheet names
  42. extractor.setIncludeSheetNames(false);
  43. assertEquals("replaceMe\n", extractor.getText());
  44. } finally {
  45. extractor.close();
  46. }
  47. }
  48. public void testNumericFormula() {
  49. ExcelExtractor extractor = createExtractor("sumifformula.xls");
  50. assertEquals(
  51. "Sheet1\n" +
  52. "1000\t1\t5\n" +
  53. "2000\t2\n" +
  54. "3000\t3\n" +
  55. "4000\t4\n" +
  56. "5000\t5\n" +
  57. "Sheet2\nSheet3\n",
  58. extractor.getText()
  59. );
  60. extractor.setFormulasNotResults(true);
  61. assertEquals(
  62. "Sheet1\n" +
  63. "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
  64. "2000\t2\n" +
  65. "3000\t3\n" +
  66. "4000\t4\n" +
  67. "5000\t5\n" +
  68. "Sheet2\nSheet3\n",
  69. extractor.getText()
  70. );
  71. }
  72. public void testwithContinueRecords() {
  73. ExcelExtractor extractor = createExtractor("StringContinueRecords.xls");
  74. extractor.getText();
  75. // Has masses of text
  76. // Until we fixed bug #41064, this would've
  77. // failed by now
  78. assertTrue(extractor.getText().length() > 40960);
  79. }
  80. public void testStringConcat() {
  81. ExcelExtractor extractor = createExtractor("SimpleWithFormula.xls");
  82. // Comes out as NaN if treated as a number
  83. // And as XYZ if treated as a string
  84. assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", extractor.getText());
  85. extractor.setFormulasNotResults(true);
  86. assertEquals("Sheet1\nreplaceme\nreplaceme\nCONCATENATE(A1,A2)\nSheet2\nSheet3\n", extractor.getText());
  87. }
  88. public void testStringFormula() {
  89. ExcelExtractor extractor = createExtractor("StringFormulas.xls");
  90. // Comes out as NaN if treated as a number
  91. // And as XYZ if treated as a string
  92. assertEquals("Sheet1\nXYZ\nSheet2\nSheet3\n", extractor.getText());
  93. extractor.setFormulasNotResults(true);
  94. assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText());
  95. }
  96. public void testEventExtractor() throws Exception {
  97. // First up, a simple file with string
  98. // based formulas in it
  99. EventBasedExcelExtractor extractor = new EventBasedExcelExtractor(
  100. new POIFSFileSystem(
  101. HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
  102. )
  103. );
  104. try {
  105. extractor.setIncludeSheetNames(true);
  106. String text = extractor.getText();
  107. assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
  108. extractor.setIncludeSheetNames(false);
  109. extractor.setFormulasNotResults(true);
  110. text = extractor.getText();
  111. assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
  112. // Now, a slightly longer file with numeric formulas
  113. extractor = new EventBasedExcelExtractor(
  114. new POIFSFileSystem(
  115. HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
  116. )
  117. );
  118. extractor.setIncludeSheetNames(false);
  119. extractor.setFormulasNotResults(true);
  120. text = extractor.getText();
  121. assertEquals(
  122. "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
  123. "2000\t2\n" +
  124. "3000\t3\n" +
  125. "4000\t4\n" +
  126. "5000\t5\n",
  127. text
  128. );
  129. } finally {
  130. extractor.close();
  131. }
  132. }
  133. public void testWithComments() {
  134. ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
  135. extractor.setIncludeSheetNames(false);
  136. // Check without comments
  137. assertEquals(
  138. "1\tone\n" +
  139. "2\ttwo\n" +
  140. "3\tthree\n",
  141. extractor.getText()
  142. );
  143. // Now with
  144. extractor.setIncludeCellComments(true);
  145. assertEquals(
  146. "1\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
  147. "2\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
  148. "3\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
  149. extractor.getText()
  150. );
  151. }
  152. public void testWithBlank() {
  153. ExcelExtractor extractor = createExtractor("MissingBits.xls");
  154. String def = extractor.getText();
  155. extractor.setIncludeBlankCells(true);
  156. String padded = extractor.getText();
  157. assertTrue(def.startsWith(
  158. "Sheet1\n" +
  159. "&[TAB]\t\n" +
  160. "Hello\n" +
  161. "11\t23\n"
  162. ));
  163. assertTrue(padded.startsWith(
  164. "Sheet1\n" +
  165. "&[TAB]\t\n" +
  166. "Hello\n" +
  167. "11\t\t\t23\n"
  168. ));
  169. }
  170. public void testFormatting() throws Exception {
  171. ExcelExtractor extractor = createExtractor("Formatting.xls");
  172. extractor.setIncludeBlankCells(false);
  173. extractor.setIncludeSheetNames(false);
  174. String text = extractor.getText();
  175. // Note - not all the formats in the file
  176. // actually quite match what they claim to
  177. // be, as some are auto-local builtins...
  178. assertTrue(text.startsWith(
  179. "Dates, all 24th November 2006\n"
  180. ));
  181. assertTrue(
  182. text.indexOf(
  183. "yyyy/mm/dd\t2006/11/24\n"
  184. ) > -1
  185. );
  186. assertTrue(
  187. text.indexOf(
  188. "yyyy-mm-dd\t2006-11-24\n"
  189. ) > -1
  190. );
  191. assertTrue(
  192. text.indexOf(
  193. "dd-mm-yy\t24-11-06\n"
  194. ) > -1
  195. );
  196. assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'",
  197. text.indexOf(
  198. "nn.nn\t10.52\n"
  199. ) > -1
  200. );
  201. assertTrue(
  202. text.indexOf(
  203. "nn.nnn\t10.520\n"
  204. ) > -1
  205. );
  206. assertTrue(
  207. text.indexOf(
  208. "\u00a3nn.nn\t\u00a310.52\n"
  209. ) > -1
  210. );
  211. }
  212. /**
  213. * Embeded in a non-excel file
  214. */
  215. public void testWithEmbeded() throws Exception {
  216. POIFSFileSystem fs = new POIFSFileSystem(
  217. POIDataSamples.getDocumentInstance().openResourceAsStream("word_with_embeded.doc")
  218. );
  219. DirectoryNode objPool = (DirectoryNode) fs.getRoot().getEntry("ObjectPool");
  220. DirectoryNode dirA = (DirectoryNode) objPool.getEntry("_1269427460");
  221. DirectoryNode dirB = (DirectoryNode) objPool.getEntry("_1269427461");
  222. HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
  223. HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
  224. ExcelExtractor exA = new ExcelExtractor(wbA);
  225. try {
  226. ExcelExtractor exB = new ExcelExtractor(wbB);
  227. try {
  228. assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
  229. exA.getText());
  230. assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
  231. assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
  232. exB.getText());
  233. assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
  234. } finally {
  235. exB.close();
  236. }
  237. } finally {
  238. exA.close();
  239. }
  240. }
  241. /**
  242. * Excel embeded in excel
  243. */
  244. public void testWithEmbededInOwn() throws Exception {
  245. POIDataSamples ssSamples = POIDataSamples.getSpreadSheetInstance();
  246. POIFSFileSystem fs = new POIFSFileSystem(
  247. ssSamples.openResourceAsStream("excel_with_embeded.xls")
  248. );
  249. DirectoryNode dirA = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B5");
  250. DirectoryNode dirB = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B4");
  251. HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
  252. HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
  253. ExcelExtractor exA = new ExcelExtractor(wbA);
  254. try {
  255. ExcelExtractor exB = new ExcelExtractor(wbB);
  256. try {
  257. assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
  258. exA.getText());
  259. assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
  260. assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
  261. exB.getText());
  262. assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
  263. // And the base file too
  264. ExcelExtractor ex = new ExcelExtractor(fs);
  265. try {
  266. assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
  267. ex.getText());
  268. assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
  269. } finally {
  270. ex.close();
  271. }
  272. } finally {
  273. exB.close();
  274. }
  275. } finally {
  276. exA.close();
  277. }
  278. }
  279. /**
  280. * Test that we get text from headers and footers
  281. */
  282. public void test45538() {
  283. String[] files = {
  284. "45538_classic_Footer.xls", "45538_form_Footer.xls",
  285. "45538_classic_Header.xls", "45538_form_Header.xls"
  286. };
  287. for(int i=0; i<files.length; i++) {
  288. ExcelExtractor extractor = createExtractor(files[i]);
  289. String text = extractor.getText();
  290. assertTrue("Unable to find expected word in text\n" + text, text.indexOf("testdoc") >=0);
  291. assertTrue("Unable to find expected word in text\n" + text, text.indexOf("test phrase") >= 0);
  292. }
  293. }
  294. public void testPassword() {
  295. Biff8EncryptionKey.setCurrentUserPassword("password");
  296. ExcelExtractor extractor = createExtractor("password.xls");
  297. String text = extractor.getText();
  298. Biff8EncryptionKey.setCurrentUserPassword(null);
  299. assertTrue(text.contains("ZIP"));
  300. }
  301. public void testNullPointerException() {
  302. ExcelExtractor extractor = createExtractor("ar.org.apsme.www_Form%20Inscripcion%20Curso%20NO%20Socios.xls");
  303. assertNotNull(extractor);
  304. assertNotNull(extractor.getText());
  305. }
  306. }