From: Nick Burch Date: Tue, 29 Jun 2010 11:07:27 +0000 (+0000) Subject: More XSLF tests for the less common extensions, and initial support for .thmx (theme... X-Git-Tag: REL_3_7_BETA2~50 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b75c47e1e5917ec141544d3de00546ed09997d29;p=poi.git More XSLF tests for the less common extensions, and initial support for .thmx (theme) files - currently just stretches to not breaking... git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@958923 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index d7096d8cdc..12b8a3830e 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though 49432 - Lazy caching of XSSFComment CTComment objects by reference, to make repeated comment searching faster diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 50d89918ac..d8fb78f1c2 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -16,23 +16,30 @@ ==================================================================== */ package org.apache.poi; -import java.io.*; -import java.util.*; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PushbackInputStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; -import org.apache.poi.poifs.common.POIFSConstants; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.PackageHelper; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; -import org.apache.poi.openxml4j.opc.*; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.util.IOUtils; public abstract class POIXMLDocument extends POIXMLDocumentPart{ public static final String DOCUMENT_CREATOR = "Apache POI"; - public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"; - public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; - public static final String CUSTOM_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"; - // OLE embeddings relation name public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java index c129b0a793..45c2d932bf 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java @@ -23,6 +23,7 @@ import java.net.URI; import org.apache.xmlbeans.XmlOptions; import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogFactory; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.*; @@ -85,6 +86,24 @@ public class POIXMLDocumentPart { this.packagePart = part; this.packageRel = rel; } + + /** + * When you open something like a theme, call this to + * re-base the XML Document onto the core child of the + * current core document + */ + protected final void rebase(OPCPackage pkg) throws InvalidFormatException { + PackageRelationshipCollection cores = + packagePart.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT); + if(cores.size() != 1) { + throw new IllegalStateException( + "Tried to rebase using " + PackageRelationshipTypes.CORE_DOCUMENT + + " but found " + cores.size() + " parts of the right type" + ); + } + packageRel = cores.getRelationship(0); + packagePart = POIXMLDocument.getTargetPart(pkg, packageRel); + } /** * Provides access to the underlying PackagePart diff --git a/src/ooxml/java/org/apache/poi/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/POIXMLProperties.java index 3fa35341e5..88015a0caf 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLProperties.java +++ b/src/ooxml/java/org/apache/poi/POIXMLProperties.java @@ -28,6 +28,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.TargetMode; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; @@ -68,7 +69,7 @@ public class POIXMLProperties { // Extended properties PackageRelationshipCollection extRel = - pkg.getRelationshipsByType(POIXMLDocument.EXTENDED_PROPERTIES_REL_TYPE); + pkg.getRelationshipsByType(PackageRelationshipTypes.EXTENDED_PROPERTIES); if(extRel.size() == 1) { extPart = pkg.getPart( extRel.getRelationship(0)); org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse( @@ -82,7 +83,7 @@ public class POIXMLProperties { // Custom properties PackageRelationshipCollection custRel = - pkg.getRelationshipsByType(POIXMLDocument.CUSTOM_PROPERTIES_REL_TYPE); + pkg.getRelationshipsByType(PackageRelationshipTypes.CUSTOM_PROPERTIES); if(custRel.size() == 1) { custPart = pkg.getPart( custRel.getRelationship(0)); org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse( diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java index e36c025191..337dacb4e1 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java @@ -55,6 +55,11 @@ public interface PackageRelationshipTypes { */ String EXTENDED_PROPERTIES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; + /** + * Custom properties relationship type. + */ + String CUSTOM_PROPERTIES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"; + /** * Core properties relationship type. */ diff --git a/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java index 67fb27fafc..3ac2dc67bc 100644 --- a/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java +++ b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java @@ -29,6 +29,7 @@ import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; import org.apache.xmlbeans.XmlException; +import org.openxmlformats.schemas.drawingml.x2006.main.ThemeDocument; import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation; @@ -57,11 +58,12 @@ import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument; */ public class XSLFSlideShow extends POIXMLDocument { public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"; - public static final String MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml"; - public static final String MACRO_TEMPLATE_CONTENT_TYPE = "application/vnd.ms-powerpoint.template.macroEnabled.main+xml"; - public static final String PRESENTATIONML_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml"; - public static final String PRESENTATIONML_TEMPLATE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml"; - public static final String PRESENTATION_MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml"; + public static final String MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml"; + public static final String MACRO_TEMPLATE_CONTENT_TYPE = "application/vnd.ms-powerpoint.template.macroEnabled.main+xml"; + public static final String PRESENTATIONML_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml"; + public static final String PRESENTATIONML_TEMPLATE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml"; + public static final String PRESENTATION_MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml"; + public static final String THEME_MANAGER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.themeManager+xml"; public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"; public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"; public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"; @@ -77,11 +79,15 @@ public class XSLFSlideShow extends POIXMLDocument { public XSLFSlideShow(OPCPackage container) throws OpenXML4JException, IOException, XmlException { super(container); + if(getCorePart().getContentType().equals(THEME_MANAGER_CONTENT_TYPE)) { + rebase(getPackage()); + } + presentationDoc = PresentationDocument.Factory.parse(getCorePart().getInputStream()); - embedds = new LinkedList(); - for (CTSlideIdListEntry ctSlide : getSlideReferences().getSldIdList()) { + embedds = new LinkedList(); + for (CTSlideIdListEntry ctSlide : getSlideReferences().getSldIdList()) { PackagePart slidePart = getTargetPart(getCorePart().getRelationship(ctSlide.getId2())); @@ -112,7 +118,12 @@ public class XSLFSlideShow extends POIXMLDocument { */ @Internal public CTSlideIdList getSlideReferences() { - return getPresentation().getSldIdLst(); + if(! getPresentation().isSetSldIdLst()) { + getPresentation().setSldIdLst( + CTSlideIdList.Factory.newInstance() + ); + } + return getPresentation().getSldIdLst(); } /** * Returns the references from the presentation to its diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index cfb4e65189..12db514099 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -126,4 +126,53 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check comments are there assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); } + + /** + * Test that we can get the text from macro enabled, + * template, theme, slide enabled etc formats, as + * well as from the normal file + */ + public void testDifferentSubformats() throws Exception { + POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); + String[] extensions = new String[] { + "pptx", "pptm", "ppsm", "ppsx", + "thmx", + //"xps" // Doesn't have a core document + }; + for(String extension : extensions) { + String filename = "testPPT." + extension; + xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename))); + XSLFPowerPointExtractor extractor = + new XSLFPowerPointExtractor(xmlA); + + String text = extractor.getText(); + if(extension.equals("thmx")) { + // Theme file doesn't have any textual content + assertEquals(0, text.length()); + continue; + } + + assertTrue(text.length() > 0); + assertTrue( + "Text missing for " + filename + "\n" + text, + text.contains("Attachment Test") + ); + assertTrue( + "Text missing for " + filename + "\n" + text, + text.contains("This is a test file data with the same content") + ); + assertTrue( + "Text missing for " + filename + "\n" + text, + text.contains("content parsing") + ); + assertTrue( + "Text missing for " + filename + "\n" + text, + text.contains("Different words to test against") + ); + assertTrue( + "Text missing for " + filename + "\n" + text, + text.contains("Mystery") + ); + } + } } diff --git a/test-data/slideshow/testPPT.ppsm b/test-data/slideshow/testPPT.ppsm new file mode 100644 index 0000000000..6ba1432c8e Binary files /dev/null and b/test-data/slideshow/testPPT.ppsm differ diff --git a/test-data/slideshow/testPPT.ppsx b/test-data/slideshow/testPPT.ppsx new file mode 100644 index 0000000000..814ec0e31f Binary files /dev/null and b/test-data/slideshow/testPPT.ppsx differ diff --git a/test-data/slideshow/testPPT.pptm b/test-data/slideshow/testPPT.pptm new file mode 100644 index 0000000000..e8fe1626de Binary files /dev/null and b/test-data/slideshow/testPPT.pptm differ diff --git a/test-data/slideshow/testPPT.pptx b/test-data/slideshow/testPPT.pptx new file mode 100644 index 0000000000..38bc6a233b Binary files /dev/null and b/test-data/slideshow/testPPT.pptx differ diff --git a/test-data/slideshow/testPPT.thmx b/test-data/slideshow/testPPT.thmx new file mode 100644 index 0000000000..9144088189 Binary files /dev/null and b/test-data/slideshow/testPPT.thmx differ diff --git a/test-data/slideshow/testPPT.xps b/test-data/slideshow/testPPT.xps new file mode 100644 index 0000000000..678033a617 Binary files /dev/null and b/test-data/slideshow/testPPT.xps differ