123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
- package org.apache.poi.xslf.extractor;
-
- import org.apache.poi.POIDataSamples;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.apache.poi.xslf.XSLFSlideShow;
-
- import junit.framework.TestCase;
-
- /**
- * Tests for HXFPowerPointExtractor
- */
- public class TestXSLFPowerPointExtractor extends TestCase {
- /**
- * A simple file
- */
- private XSLFSlideShow xmlA;
- private OPCPackage pkg;
-
- private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
-
- protected void setUp() throws Exception {
- slTests = POIDataSamples.getSlideShowInstance();
- pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
- xmlA = new XSLFSlideShow(pkg);
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new XSLFPowerPointExtractor(xmlA);
- new XSLFPowerPointExtractor(pkg);
-
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xmlA);
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check Basics
- assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
- assertTrue(text.contains("amet\n\n"));
-
- // Our master text, for tests
- String masterText =
- "Click to edit Master title style\n" +
- "Click to edit Master text styles\n" +
- "Second level\n" +
- "Third level\n" +
- "Fourth level\n" +
- "Fifth level\n";
-
- // Just slides, no notes
- text = extractor.getText(true, false, false);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n"
- , text
- );
-
- // Just notes, no slides
- text = extractor.getText(false, true);
- assertEquals(
- "\n\n\n\n", text
- );
-
- // Both
- text = extractor.getText(true, true, false);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n\n\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n\n\n"
- , text
- );
-
- // With Slides and Master Text
- text = extractor.getText(true, false, true);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n" +
- masterText +
- "\n\n\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n" +
- masterText +
- "\n\n\n"
- , text
- );
-
- // With Slides, Notes and Master Text
- text = extractor.getText(true, true, true);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n" +
- masterText +
- "\n\n\n\n\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n" +
- masterText +
- "\n\n\n\n\n"
- , text
- );
-
- // Via set defaults
- extractor.setSlidesByDefault(false);
- extractor.setNotesByDefault(true);
- text = extractor.getText();
- assertEquals(
- "\n\n\n\n", text
- );
- }
-
- public void testGetComments() throws Exception {
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check comments are there
- assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
- assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
-
- // Check the authors came through too
- assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
- }
-
- public void testGetMasterText() throws Exception {
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check master text is there
- assertTrue("Unable to find expected word in text\n" + text,
- text.contains("Footer from the master slide"));
-
- // Check the whole text
- assertEquals(
- "First page title\n" +
- "First page subtitle\n" +
- // "This text comes from the Master Slide\n" + // TODO
- // "This is the Master Title\n" + // TODO
- "\n" + // TODO Should be the above
- "2nd page subtitle\n" +
- // "This text comes from the Master Slide\n" + // TODO
- "Footer from the master slide\n"
- , text
- );
- }
-
- public void testTable() throws Exception {
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check comments are there
- assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
- }
-
- /**
- * Test that we can get the text from macro enabled,
- * template, theme, slide enabled etc formats, as
- * well as from the normal file
- */
- public void testDifferentSubformats() throws Exception {
- String[] extensions = new String[] {
- "pptx", "pptm", "ppsm", "ppsx",
- "thmx",
- //"xps" // Doesn't have a core document
- };
- for(String extension : extensions) {
- String filename = "testPPT." + extension;
- XSLFSlideShow xml =
- new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
- XSLFPowerPointExtractor extractor =
- new XSLFPowerPointExtractor(xml);
-
- String text = extractor.getText();
- if(extension.equals("thmx")) {
- // Theme file doesn't have any textual content
- assertEquals(0, text.length());
- continue;
- }
-
- assertTrue(text.length() > 0);
- assertTrue(
- "Text missing for " + filename + "\n" + text,
- text.contains("Attachment Test")
- );
- assertTrue(
- "Text missing for " + filename + "\n" + text,
- text.contains("This is a test file data with the same content")
- );
- assertTrue(
- "Text missing for " + filename + "\n" + text,
- text.contains("content parsing")
- );
- assertTrue(
- "Text missing for " + filename + "\n" + text,
- text.contains("Different words to test against")
- );
- assertTrue(
- "Text missing for " + filename + "\n" + text,
- text.contains("Mystery")
- );
- }
- }
- }
|