You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TestXSLFPowerPointExtractor.java 8.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xslf.extractor;
  16. import org.apache.poi.POIDataSamples;
  17. import org.apache.poi.openxml4j.opc.OPCPackage;
  18. import org.apache.poi.xslf.XSLFSlideShow;
  19. import junit.framework.TestCase;
  20. /**
  21. * Tests for HXFPowerPointExtractor
  22. */
  23. public class TestXSLFPowerPointExtractor extends TestCase {
  24. /**
  25. * A simple file
  26. */
  27. private XSLFSlideShow xmlA;
  28. private OPCPackage pkg;
  29. private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
  30. protected void setUp() throws Exception {
  31. slTests = POIDataSamples.getSlideShowInstance();
  32. pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
  33. xmlA = new XSLFSlideShow(pkg);
  34. }
  35. /**
  36. * Get text out of the simple file
  37. */
  38. public void testGetSimpleText() throws Exception {
  39. new XSLFPowerPointExtractor(xmlA);
  40. new XSLFPowerPointExtractor(pkg);
  41. XSLFPowerPointExtractor extractor =
  42. new XSLFPowerPointExtractor(xmlA);
  43. extractor.getText();
  44. String text = extractor.getText();
  45. assertTrue(text.length() > 0);
  46. // Check Basics
  47. assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
  48. assertTrue(text.contains("amet\n\n"));
  49. // Our master text, for tests
  50. String masterText =
  51. "Click to edit Master title style\n" +
  52. "Click to edit Master text styles\n" +
  53. "Second level\n" +
  54. "Third level\n" +
  55. "Fourth level\n" +
  56. "Fifth level\n";
  57. // Just slides, no notes
  58. text = extractor.getText(true, false, false);
  59. assertEquals(
  60. "Lorem ipsum dolor sit amet\n" +
  61. "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
  62. "\n" +
  63. "Lorem ipsum dolor sit amet\n" +
  64. "Lorem\n" +
  65. "ipsum\n" +
  66. "dolor\n" +
  67. "sit\n" +
  68. "amet\n" +
  69. "\n"
  70. , text
  71. );
  72. // Just notes, no slides
  73. text = extractor.getText(false, true);
  74. assertEquals(
  75. "\n\n\n\n", text
  76. );
  77. // Both
  78. text = extractor.getText(true, true, false);
  79. assertEquals(
  80. "Lorem ipsum dolor sit amet\n" +
  81. "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
  82. "\n\n\n" +
  83. "Lorem ipsum dolor sit amet\n" +
  84. "Lorem\n" +
  85. "ipsum\n" +
  86. "dolor\n" +
  87. "sit\n" +
  88. "amet\n" +
  89. "\n\n\n"
  90. , text
  91. );
  92. // With Slides and Master Text
  93. text = extractor.getText(true, false, true);
  94. assertEquals(
  95. "Lorem ipsum dolor sit amet\n" +
  96. "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
  97. "\n" +
  98. masterText +
  99. "\n\n\n" +
  100. "Lorem ipsum dolor sit amet\n" +
  101. "Lorem\n" +
  102. "ipsum\n" +
  103. "dolor\n" +
  104. "sit\n" +
  105. "amet\n" +
  106. "\n" +
  107. masterText +
  108. "\n\n\n"
  109. , text
  110. );
  111. // With Slides, Notes and Master Text
  112. text = extractor.getText(true, true, true);
  113. assertEquals(
  114. "Lorem ipsum dolor sit amet\n" +
  115. "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
  116. "\n" +
  117. masterText +
  118. "\n\n\n\n\n" +
  119. "Lorem ipsum dolor sit amet\n" +
  120. "Lorem\n" +
  121. "ipsum\n" +
  122. "dolor\n" +
  123. "sit\n" +
  124. "amet\n" +
  125. "\n" +
  126. masterText +
  127. "\n\n\n\n\n"
  128. , text
  129. );
  130. // Via set defaults
  131. extractor.setSlidesByDefault(false);
  132. extractor.setNotesByDefault(true);
  133. text = extractor.getText();
  134. assertEquals(
  135. "\n\n\n\n", text
  136. );
  137. }
  138. public void testGetComments() throws Exception {
  139. XSLFSlideShow xml =
  140. new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
  141. XSLFPowerPointExtractor extractor =
  142. new XSLFPowerPointExtractor(xml);
  143. String text = extractor.getText();
  144. assertTrue(text.length() > 0);
  145. // Check comments are there
  146. assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
  147. assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
  148. // Check the authors came through too
  149. assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
  150. }
  151. public void testGetMasterText() throws Exception {
  152. XSLFSlideShow xml =
  153. new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
  154. XSLFPowerPointExtractor extractor =
  155. new XSLFPowerPointExtractor(xml);
  156. String text = extractor.getText();
  157. assertTrue(text.length() > 0);
  158. // Check master text is there
  159. assertTrue("Unable to find expected word in text\n" + text,
  160. text.contains("Footer from the master slide"));
  161. // Check the whole text
  162. assertEquals(
  163. "First page title\n" +
  164. "First page subtitle\n" +
  165. // "This text comes from the Master Slide\n" + // TODO
  166. // "This is the Master Title\n" + // TODO
  167. "\n" + // TODO Should be the above
  168. "2nd page subtitle\n" +
  169. // "This text comes from the Master Slide\n" + // TODO
  170. "Footer from the master slide\n"
  171. , text
  172. );
  173. }
  174. public void testTable() throws Exception {
  175. XSLFSlideShow xml =
  176. new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
  177. XSLFPowerPointExtractor extractor =
  178. new XSLFPowerPointExtractor(xml);
  179. String text = extractor.getText();
  180. assertTrue(text.length() > 0);
  181. // Check comments are there
  182. assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
  183. }
  184. /**
  185. * Test that we can get the text from macro enabled,
  186. * template, theme, slide enabled etc formats, as
  187. * well as from the normal file
  188. */
  189. public void testDifferentSubformats() throws Exception {
  190. String[] extensions = new String[] {
  191. "pptx", "pptm", "ppsm", "ppsx",
  192. "thmx",
  193. //"xps" // Doesn't have a core document
  194. };
  195. for(String extension : extensions) {
  196. String filename = "testPPT." + extension;
  197. XSLFSlideShow xml =
  198. new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
  199. XSLFPowerPointExtractor extractor =
  200. new XSLFPowerPointExtractor(xml);
  201. String text = extractor.getText();
  202. if(extension.equals("thmx")) {
  203. // Theme file doesn't have any textual content
  204. assertEquals(0, text.length());
  205. continue;
  206. }
  207. assertTrue(text.length() > 0);
  208. assertTrue(
  209. "Text missing for " + filename + "\n" + text,
  210. text.contains("Attachment Test")
  211. );
  212. assertTrue(
  213. "Text missing for " + filename + "\n" + text,
  214. text.contains("This is a test file data with the same content")
  215. );
  216. assertTrue(
  217. "Text missing for " + filename + "\n" + text,
  218. text.contains("content parsing")
  219. );
  220. assertTrue(
  221. "Text missing for " + filename + "\n" + text,
  222. text.contains("Different words to test against")
  223. );
  224. assertTrue(
  225. "Text missing for " + filename + "\n" + text,
  226. text.contains("Mystery")
  227. );
  228. }
  229. }
  230. }