==================================================================== */
package org.apache.poi.xslf.extractor;
+import static org.apache.poi.POITestCase.assertContains;
+import static org.apache.poi.POITestCase.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xslf.usermodel.XSLFSlideShow;
-
-import junit.framework.TestCase;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.xmlbeans.XmlException;
+import org.junit.Test;
/**
- * Tests for HXFPowerPointExtractor
+ * Tests for XSLFPowerPointExtractor
*/
-public class TestXSLFPowerPointExtractor extends TestCase {
- /**
- * A simple file
- */
- private XSLFSlideShow xmlA;
- private OPCPackage pkg;
-
- private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
-
- protected void setUp() throws Exception {
- slTests = POIDataSamples.getSlideShowInstance();
- pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
- xmlA = new XSLFSlideShow(pkg);
- }
+public class TestXSLFPowerPointExtractor {
+ private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
/**
* Get text out of the simple file
+ * @throws XmlException
+ * @throws OpenXML4JException
*/
- public void testGetSimpleText() throws Exception {
- new XSLFPowerPointExtractor(xmlA).close();
+ @Test
+ public void testGetSimpleText()
+ throws IOException, XmlException, OpenXML4JException {
+ XMLSlideShow xmlA = openPPTX("sample.pptx");
+ @SuppressWarnings("resource")
+ OPCPackage pkg = xmlA.getPackage();
+
+ new XSLFPowerPointExtractor(xmlA).close();
new XSLFPowerPointExtractor(pkg).close();
-
- XSLFPowerPointExtractor extractor =
+
+ XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
extractor.getText();
-
+
String text = extractor.getText();
assertTrue(text.length() > 0);
-
+
// Check Basics
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
- assertTrue(text.contains("amet\n\n"));
+ assertContains(text, "amet\n\n");
// Our placeholder master text
// This shouldn't show up in the output
- String masterText =
- "Click to edit Master title style\n" +
- "Click to edit Master subtitle style\n" +
- "\n\n\n\n\n\n" +
- "Click to edit Master title style\n" +
- "Click to edit Master text styles\n" +
- "Second level\n" +
- "Third level\n" +
- "Fourth level\n" +
- "Fifth level\n";
-
+ // String masterText =
+ // "Click to edit Master title style\n" +
+ // "Click to edit Master subtitle style\n" +
+ // "\n\n\n\n\n\n" +
+ // "Click to edit Master title style\n" +
+ // "Click to edit Master text styles\n" +
+ // "Second level\n" +
+ // "Third level\n" +
+ // "Fourth level\n" +
+ // "Fifth level\n";
+
// Just slides, no notes
text = extractor.getText(true, false, false);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n"
- , text
- );
-
+ String slideText =
+ "Lorem ipsum dolor sit amet\n" +
+ "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
+ "\n" +
+ "Lorem ipsum dolor sit amet\n" +
+ "Lorem\n" +
+ "ipsum\n" +
+ "dolor\n" +
+ "sit\n" +
+ "amet\n" +
+ "\n";
+ assertEquals(slideText, text);
+
// Just notes, no slides
text = extractor.getText(false, true);
- assertEquals(
- "\n\n\n\n", text
- );
-
+ assertEquals("\n\n\n\n", text);
+
// Both
text = extractor.getText(true, true, false);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
+ String bothText =
+ "Lorem ipsum dolor sit amet\n" +
+ "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n\n\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n\n\n"
- , text
- );
-
+ "Lorem ipsum dolor sit amet\n" +
+ "Lorem\n" +
+ "ipsum\n" +
+ "dolor\n" +
+ "sit\n" +
+ "amet\n" +
+ "\n\n\n";
+ assertEquals(bothText, text);
+
// With Slides and Master Text
- text = extractor.getText(true, false, true);
- assertEquals(
+ text = extractor.getText(true, false, true);
+ String smText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
- "\n"
- , text
- );
-
+ "\n";
+ assertEquals(smText, text);
+
// With Slides, Notes and Master Text
- text = extractor.getText(true, true, true);
- assertEquals(
+ text = extractor.getText(true, true, true);
+ String snmText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
- "\n\n\n"
- , text
- );
-
+ "\n\n\n";
+ assertEquals(snmText, text);
+
// Via set defaults
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(true);
text = extractor.getText();
- assertEquals(
- "\n\n\n\n", text
- );
-
+ assertEquals("\n\n\n\n", text);
+
extractor.close();
+ xmlA.close();
}
-
- public void testGetComments() throws Exception {
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check comments are there
- assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
- assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
-
- // Check the authors came through too
- assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
-
- extractor.close();
- }
-
+
+ public void testGetComments() throws IOException {
+ XMLSlideShow xml = openPPTX("45545_Comment.pptx");
+ XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check comments are there
+ assertContains(text, "testdoc");
+ assertContains(text, "test phrase");
+
+ // Check the authors came through too
+ assertContains(text, "XPVMWARE01");
+
+ extractor.close();
+ xml.close();
+ }
+
public void testGetMasterText() throws Exception {
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
- extractor.setSlidesByDefault(true);
- extractor.setNotesByDefault(false);
- extractor.setMasterByDefault(true);
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check master text is there
- assertTrue("Unable to find expected word in text\n" + text,
- text.contains("Footer from the master slide"));
-
- // Theme text shouldn't show up
- String themeText =
- "Theme Master Title\n" +
- "Theme Master first level\n" +
- "And the 2nd level\n" +
- "Our 3rd level goes here\n" +
- "And onto the 4th, such fun....\n" +
- "Finally is the Fifth level\n";
-
- // Check the whole text
- assertEquals(
+ XMLSlideShow xml = openPPTX("WithMaster.pptx");
+ XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+ extractor.setSlidesByDefault(true);
+ extractor.setNotesByDefault(false);
+ extractor.setMasterByDefault(true);
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check master text is there
+ assertContains(text, "Footer from the master slide");
+
+ // Theme text shouldn't show up
+ // String themeText =
+ // "Theme Master Title\n" +
+ // "Theme Master first level\n" +
+ // "And the 2nd level\n" +
+ // "Our 3rd level goes here\n" +
+ // "And onto the 4th, such fun....\n" +
+ // "Finally is the Fifth level\n";
+
+ // Check the whole text
+ String wholeText =
"First page title\n" +
"First page subtitle\n" +
"This is the Master Title\n" +
"2nd page subtitle\n" +
"Footer from the master slide\n" +
"This is the Master Title\n" +
- "This text comes from the Master Slide\n"
- , text
- );
-
+ "This text comes from the Master Slide\n";
+ assertEquals(wholeText, text);
+
extractor.close();
+ xml.close();
}
- public void testTable() throws Exception {
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
+ @Test
+ public void testTable() throws Exception {
+ XMLSlideShow xml = openPPTX("present1.pptx");
+ XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
-
+
extractor.close();
+ xml.close();
}
-
+
/**
* Test that we can get the text from macro enabled,
- * template, theme, slide enabled etc formats, as
+ * template, theme, slide enabled etc formats, as
* well as from the normal file
*/
+ @Test
public void testDifferentSubformats() throws Exception {
- String[] extensions = new String[] {
- "pptx", "pptm", "ppsm", "ppsx",
- "thmx",
- //"xps" // Doesn't have a core document
- };
- for(String extension : extensions) {
- String filename = "testPPT." + extension;
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
-
- String text = extractor.getText();
- if(extension.equals("thmx")) {
- // Theme file doesn't have any textual content
- assertEquals(0, text.length());
- continue;
- }
-
+ String[] extensions = new String[] {
+ "pptx", "pptm", "ppsm", "ppsx", "thmx",
+ // "xps" - Doesn't have a core document
+ };
+ for(String extension : extensions) {
+ String filename = "testPPT." + extension;
+ XMLSlideShow xml = openPPTX(filename);
+ XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+
+ String text = extractor.getText();
+ if (extension.equals("thmx")) {
+ // Theme file doesn't have any textual content
+ assertEquals(0, text.length());
+ continue;
+ }
+
assertTrue(text.length() > 0);
assertTrue(
- "Text missing for " + filename + "\n" + text,
+ "Text missing for " + filename + "\n" + text,
text.contains("Attachment Test")
);
assertTrue(
- "Text missing for " + filename + "\n" + text,
+ "Text missing for " + filename + "\n" + text,
text.contains("This is a test file data with the same content")
);
assertTrue(
- "Text missing for " + filename + "\n" + text,
+ "Text missing for " + filename + "\n" + text,
text.contains("content parsing")
);
assertTrue(
- "Text missing for " + filename + "\n" + text,
+ "Text missing for " + filename + "\n" + text,
text.contains("Different words to test against")
);
assertTrue(
- "Text missing for " + filename + "\n" + text,
+ "Text missing for " + filename + "\n" + text,
text.contains("Mystery")
);
-
+
extractor.close();
+ xml.close();
}
}
- public void test45541() throws Exception {
- // extract text from a powerpoint that has a header in the notes-element
- POITextExtractor extr = ExtractorFactory.createExtractor(slTests
- .openResourceAsStream("45541_Header.pptx"));
- String text = extr.getText();
- assertNotNull(text);
- assertFalse("Had: " + text, text.contains("testdoc"));
-
- text = ((XSLFPowerPointExtractor)extr).getText(false, true);
- assertNotNull(text);
- assertTrue("Had: " + text, text.contains("testdoc"));
- extr.close();
- assertNotNull(text);
+ @Test
+ public void test45541() throws Exception {
+ // extract text from a powerpoint that has a header in the notes-element
+ POITextExtractor extr = ExtractorFactory.createExtractor(
+ slTests.getFile("45541_Header.pptx"));
+ String text = extr.getText();
+ assertNotNull(text);
+ assertFalse("Had: " + text, text.contains("testdoc"));
- // extract text from a powerpoint that has a footer in the master-slide
- extr = ExtractorFactory.createExtractor(slTests
- .openResourceAsStream("45541_Footer.pptx"));
- text = extr.getText();
- assertNotNull(text);
- assertFalse("Had " + text, text.contains("testdoc"));
-
- text = ((XSLFPowerPointExtractor)extr).getText(false, true);
+ text = ((XSLFPowerPointExtractor)extr).getText(false, true);
+ assertContains(text, "testdoc");
+ extr.close();
assertNotNull(text);
- assertFalse("Had: " + text, text.contains("testdoc"));
- text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
- assertNotNull(text);
- assertFalse("Had: " + text, text.contains("testdoc"));
+ // extract text from a powerpoint that has a footer in the master-slide
+ extr = ExtractorFactory.createExtractor(
+ slTests.getFile("45541_Footer.pptx"));
+ text = extr.getText();
+ assertNotContained(text, "testdoc");
+
+ text = ((XSLFPowerPointExtractor)extr).getText(false, true);
+ assertNotContained(text, "testdoc");
- extr.close();
- }
+ text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
+ assertNotContained(text, "testdoc");
+
+ extr.close();
+ }
+
+
+ @Test
+ public void bug54570() throws IOException {
+ XMLSlideShow xml = openPPTX("bug54570.pptx");
+ XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+ String text = extractor.getText();
+ assertNotNull(text);
+ extractor.close();
+ xml.close();
+ }
+
+ private XMLSlideShow openPPTX(String file) throws IOException {
+ InputStream is = slTests.openResourceAsStream(file);
+ try {
+ return new XMLSlideShow(is);
+ } finally {
+ is.close();
+ }
+ }
}
package org.apache.poi;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
-import java.util.Collection;
import java.util.Map;
import org.apache.poi.util.SuppressForbidden;
/**
- * Parent class for POI JUnit TestCases, which provide additional
- * features
+ * Util class for POI JUnit TestCases, which provide additional features
*/
-public class POITestCase {
+public final class POITestCase {
public static void assertContains(String haystack, String needle) {
+ assertNotNull(haystack);
assertTrue(
"Unable to find expected text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle)
);
- }
+ }
+
public static void assertNotContained(String haystack, String needle) {
+ assertNotNull(haystack);
assertFalse(
"Unexpectedly found text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle)
);
- }
-
- public static <T> void assertContains(T needle, T[] haystack)
- {
- // Check
- for (T thing : haystack) {
- if (thing.equals(needle)) {
- return;
- }
- }
-
- // Failed, try to build a nice error
- StringBuilder sb = new StringBuilder();
- sb.append("Unable to find ").append(needle).append(" in [");
- for (T thing : haystack) {
- sb.append(" ").append(thing.toString()).append(" ,");
- }
- sb.setCharAt(sb.length()-1, ']');
-
- fail(sb.toString());
- }
+ }
- public static <T> void assertContains(T needle, Collection<T> haystack) {
- if (haystack.contains(needle)) {
- return;
- }
- fail("Unable to find " + needle + " in " + haystack);
- }
-
- /**
- * @param map haystack
- * @param key needle
- */
- public static <T> void assertContains(Map<T, ?> map, T key) {
- if (map.containsKey(key)) {
+ /**
+ * @param map haystack
+ * @param key needle
+ */
+ public static <T> void assertContains(Map<T, ?> map, T key) {
+ if (map.containsKey(key)) {
return;
- }
- fail("Unable to find " + key + " in " + map);
- }
+ }
+ fail("Unable to find " + key + " in " + map);
+ }
- /** Utility method to get the value of a private/protected field.
- * Only use this method in test cases!!!
- */
- public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
- assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
- try {
+ /**
+ * Utility method to get the value of a private/protected field.
+ * Only use this method in test cases!!!
+ */
+ public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
+ assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
+ try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
@Override
@SuppressWarnings("unchecked")
return (R) f.get(instance);
}
});
- } catch (PrivilegedActionException pae) {
- throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
- }
- }
+ } catch (PrivilegedActionException pae) {
+ throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
+ }
+ }
- /** Utility method to call a private/protected method.
- * Only use this method in test cases!!!
- */
- public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
- final Class<?>[] parameterTypes, final Object[] parameters) {
- assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
- try {
+ /**
+ * Utility method to call a private/protected method.
+ * Only use this method in test cases!!!
+ */
+ public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
+ final Class<?>[] parameterTypes, final Object[] parameters) {
+ assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
+ try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
@Override
@SuppressWarnings("unchecked")
return (R) m.invoke(instance, parameters);
}
});
- } catch (PrivilegedActionException pae) {
- throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
- }
- }
+ } catch (PrivilegedActionException pae) {
+ throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
+ }
+ }
}