]> source.dussan.org Git - poi.git/commitdiff
Get the powerpoint ooxml stuff converted over, and fix up a few tests
authorNick Burch <nick@apache.org>
Sun, 9 Mar 2008 14:21:34 +0000 (14:21 +0000)
committerNick Burch <nick@apache.org>
Sun, 9 Mar 2008 14:21:34 +0000 (14:21 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635243 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/POIXMLDocument.java
src/ooxml/java/org/apache/poi/xslf/HSLFXML.java [deleted file]
src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xslf/extractor/HXFPowerPointExtractor.java [deleted file]
src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xslf/usermodel/HSLFXMLSlideShow.java [deleted file]
src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java
src/ooxml/testcases/org/apache/poi/xslf/TestHSLFXML.java [deleted file]
src/ooxml/testcases/org/apache/poi/xslf/TestXSLFSlideShow.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java

index 36f195eeb1f1525e008abc02c0af368d8aeca0f0..4e190d7e77460d03582aa4d58eb9a5a5f10d3412 100644 (file)
@@ -22,15 +22,19 @@ import java.io.PushbackInputStream;
 
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.IOUtils;
+import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.InvalidFormatException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxml4j.opc.PackagePart;
 import org.openxml4j.opc.PackagePartName;
 import org.openxml4j.opc.PackageRelationship;
+import org.openxml4j.opc.PackageRelationshipCollection;
 import org.openxml4j.opc.PackageRelationshipTypes;
 import org.openxml4j.opc.PackagingURIHelper;
-
+import org.openxml4j.opc.internal.PackagePropertiesPart;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
 
 public abstract class POIXMLDocument {
 
@@ -68,7 +72,7 @@ public abstract class POIXMLDocument {
      *  in the event of a problem.
      * Works around shortcomings in java's this() constructor calls
      */
-    protected static Package openPackage(String path) throws IOException {
+    public static Package openPackage(String path) throws IOException {
         try {
             return Package.open(path);
         } catch (InvalidFormatException e) {
@@ -99,6 +103,27 @@ public abstract class POIXMLDocument {
         }
         return part;
     }
+
+       /**
+        * Fetches the (single) PackagePart which is defined as
+        *  the supplied relation content type of the base
+        *  container, or null if none found.
+        * @param relationType The relation content type to search for
+        * @throws IllegalArgumentException If we find more than one part of that type
+        */
+       protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
+               PackageRelationshipCollection rels =
+                       getCorePart().getRelationshipsByType(relationType);
+               if(rels.size() == 0) {
+                       return null;
+               }
+               if(rels.size() > 1) {
+                       throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
+               }
+               PackageRelationship rel = rels.getRelationship(0);
+               return getTargetPart(rel);
+       }
+       
     
     /**
      * Checks that the supplied InputStream (which MUST
@@ -132,4 +157,30 @@ public abstract class POIXMLDocument {
                header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
         );                                                         
     }
+
+       /**
+        * Get the core document properties (core ooxml properties).
+        * TODO: Replace with nice usermodel wrapper
+        * @deprecated To be replaced with a proper user-model style view of the properties
+        */
+       public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, IOException {
+               PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE);
+               if(propsPart == null) {
+                       return null;
+               }
+               return (PackagePropertiesPart)propsPart;
+       }
+       
+       /**
+        * Get the extended document properties (extended ooxml properties)
+        * TODO: Replace with nice usermodel wrapper
+        * @deprecated To be replaced with a proper user-model style view of the properties
+        */
+       public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException {
+               PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE);
+               
+               PropertiesDocument props = PropertiesDocument.Factory.parse(
+                               propsPart.getInputStream());
+               return props.getProperties();
+       }
 }
diff --git a/src/ooxml/java/org/apache/poi/xslf/HSLFXML.java b/src/ooxml/java/org/apache/poi/xslf/HSLFXML.java
deleted file mode 100644 (file)
index 568cb80..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf;
-
-import java.io.IOException;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackageRelationshipCollection;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument;
-
-/**
- * Experimental class to do low level processing
- *  of pptx files.
- *  
- * If you are using these low level classes, then you
- *  will almost certainly need to refer to the OOXML
- *  specifications from
- *  http://www.ecma-international.org/publications/standards/Ecma-376.htm
- * 
- * WARNING - APIs expected to change rapidly
- */
-public class HSLFXML extends HXFDocument {
-       public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
-       public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
-       public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
-       public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
-       public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
-
-       private PresentationDocument presentationDoc;
-       
-       public HSLFXML(Package container) throws OpenXML4JException, IOException, XmlException {
-               super(container, MAIN_CONTENT_TYPE);
-               
-               presentationDoc =
-                       PresentationDocument.Factory.parse(basePart.getInputStream());
-       }
-       
-       /**
-        * Returns the low level presentation base object
-        */
-       public CTPresentation getPresentation() {
-               return presentationDoc.getPresentation();
-       }
-       
-       /**
-        * Returns the references from the presentation to its
-        *  slides.
-        * You'll need these to figure out the slide ordering,
-        *  and to get at the actual slides themselves
-        */
-       public CTSlideIdList getSlideReferences() {
-               return getPresentation().getSldIdLst();
-       }
-       /**
-        * Returns the references from the presentation to its
-        *  slide masters.
-        * You'll need these to get at the actual slide 
-        *  masters themselves
-        */
-       public CTSlideMasterIdList getSlideMasterReferences() {
-               return getPresentation().getSldMasterIdLst();
-       }
-       
-       /**
-        * Returns the low level slide master object from
-        *  the supplied slide master reference
-        */
-       public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException {
-               PackagePart masterPart =
-                       getRelatedPackagePart(master.getId2());
-               SldMasterDocument masterDoc =
-                       SldMasterDocument.Factory.parse(masterPart.getInputStream());
-               return masterDoc.getSldMaster();
-       }
-       
-       /**
-        * Returns the low level slide object from
-        *  the supplied slide reference
-        */
-       public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException {
-               PackagePart slidePart =
-                       getRelatedPackagePart(slide.getId2());
-               SldDocument slideDoc =
-                       SldDocument.Factory.parse(slidePart.getInputStream());
-               return slideDoc.getSld();
-       }
-       
-       /**
-        * Returns the low level notes object for the given
-        *  slide, as found from the supplied slide reference
-        */
-       public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException {
-               PackagePart slidePart =
-                       getRelatedPackagePart(slide.getId2());
-               
-               PackageRelationshipCollection notes;
-               try {
-                       notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
-               } catch(InvalidFormatException e) {
-                       throw new IllegalStateException(e);
-               }
-               
-               if(notes.size() == 0) {
-                       // No notes for this slide
-                       return null;
-               }
-               if(notes.size() > 1) {
-                       throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size());
-               }
-               
-               PackagePart notesPart =
-                       getPackagePart(notes.getRelationship(0));
-               NotesDocument notesDoc =
-                       NotesDocument.Factory.parse(notesPart.getInputStream());
-               
-               return notesDoc.getNotes();
-       }
-}
diff --git a/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java
new file mode 100644 (file)
index 0000000..fbdf00b
--- /dev/null
@@ -0,0 +1,164 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf;
+
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxml4j.opc.PackageRelationshipCollection;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument;
+import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument;
+import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument;
+import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument;
+
+/**
+ * Experimental class to do low level processing
+ *  of pptx files.
+ *  
+ * If you are using these low level classes, then you
+ *  will almost certainly need to refer to the OOXML
+ *  specifications from
+ *  http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ * 
+ * WARNING - APIs expected to change rapidly
+ */
+public class XSLFSlideShow extends POIXMLDocument {
+       public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
+       public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
+       public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
+       public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
+       public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
+
+       private PresentationDocument presentationDoc;
+       
+       public XSLFSlideShow(Package container) throws OpenXML4JException, IOException, XmlException {
+               super(container);
+               
+               presentationDoc =
+                       PresentationDocument.Factory.parse(getCorePart().getInputStream());
+       }
+       public XSLFSlideShow(String file) throws OpenXML4JException, IOException, XmlException {
+               this(openPackage(file));
+       }
+       
+       /**
+        * Returns the low level presentation base object
+        */
+       public CTPresentation getPresentation() {
+               return presentationDoc.getPresentation();
+       }
+       
+       /**
+        * Returns the references from the presentation to its
+        *  slides.
+        * You'll need these to figure out the slide ordering,
+        *  and to get at the actual slides themselves
+        */
+       public CTSlideIdList getSlideReferences() {
+               return getPresentation().getSldIdLst();
+       }
+       /**
+        * Returns the references from the presentation to its
+        *  slide masters.
+        * You'll need these to get at the actual slide 
+        *  masters themselves
+        */
+       public CTSlideMasterIdList getSlideMasterReferences() {
+               return getPresentation().getSldMasterIdLst();
+       }
+       
+       /**
+        * Returns the low level slide master object from
+        *  the supplied slide master reference
+        */
+       public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException {
+               try {
+                       PackagePart masterPart =
+                               getTargetPart(getCorePart().getRelationship(master.getId2()));
+                               
+                       SldMasterDocument masterDoc =
+                               SldMasterDocument.Factory.parse(masterPart.getInputStream());
+                       return masterDoc.getSldMaster();
+               } catch(InvalidFormatException e) {
+                       throw new XmlException(e);
+               }
+       }
+       
+       /**
+        * Returns the low level slide object from
+        *  the supplied slide reference
+        */
+       public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException {
+               try {
+                       PackagePart slidePart =
+                               getTargetPart(getCorePart().getRelationship(slide.getId2()));
+                       SldDocument slideDoc =
+                               SldDocument.Factory.parse(slidePart.getInputStream());
+                       return slideDoc.getSld();
+               } catch(InvalidFormatException e) {
+                       throw new XmlException(e);
+               }
+       }
+       
+       /**
+        * Returns the low level notes object for the given
+        *  slide, as found from the supplied slide reference
+        */
+       public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException {
+               PackageRelationshipCollection notes;
+               try {
+                       PackagePart slidePart =
+                               getTargetPart(getCorePart().getRelationship(slide.getId2()));
+               
+                       notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
+               } catch(InvalidFormatException e) {
+                       throw new IllegalStateException(e);
+               }
+               
+               if(notes.size() == 0) {
+                       // No notes for this slide
+                       return null;
+               }
+               if(notes.size() > 1) {
+                       throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size());
+               }
+               
+               try {
+                       PackagePart notesPart =
+                               getTargetPart(notes.getRelationship(0));
+                       NotesDocument notesDoc =
+                               NotesDocument.Factory.parse(notesPart.getInputStream());
+                       
+                       return notesDoc.getNotes();
+               } catch(InvalidFormatException e) {
+                       throw new IllegalStateException(e);
+               }
+       }
+}
diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/HXFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/HXFPowerPointExtractor.java
deleted file mode 100644 (file)
index bfa59e3..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hslf.HSLFXML;
-import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-
-public class HXFPowerPointExtractor extends POIXMLTextExtractor {
-       private HSLFXMLSlideShow slideshow;
-       private boolean slidesByDefault = true;
-       private boolean notesByDefault = false;
-       
-       public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
-               this(new HSLFXMLSlideShow(
-                               new XSLFXML(container)
-               ));
-       }
-       public HXFPowerPointExtractor(HSLFXMLSlideShow slideshow) {
-               super(slideshow);
-               this.slideshow = slideshow;
-       }
-
-       public static void main(String[] args) throws Exception {
-               if(args.length < 1) {
-                       System.err.println("Use:");
-                       System.err.println("  HXFPowerPointExtractor <filename.pptx>");
-                       System.exit(1);
-               }
-               POIXMLTextExtractor extractor = 
-                       new HXFPowerPointExtractor(HXFDocument.openPackage(
-                                       new File(args[0])
-                       ));
-               System.out.println(extractor.getText());
-       }
-
-       /**
-        * Should a call to getText() return slide text?
-        * Default is yes
-        */
-       public void setSlidesByDefault(boolean slidesByDefault) {
-               this.slidesByDefault = slidesByDefault;
-       }
-       /**
-        * Should a call to getText() return notes text?
-        * Default is no
-        */
-       public void setNotesByDefault(boolean notesByDefault) {
-               this.notesByDefault = notesByDefault;
-       }
-       
-       /**
-        * Gets the slide text, but not the notes text
-        */
-       public String getText() {
-               return getText(slidesByDefault, notesByDefault);
-       }
-       
-       /**
-        * Gets the requested text from the file
-        * @param slideText Should we retrieve text from slides?
-        * @param notesText Should we retrieve text from notes?
-        */
-       public String getText(boolean slideText, boolean notesText) {
-               StringBuffer text = new StringBuffer();
-               
-               CTSlideIdListEntry[] slideRefs =
-                       slideshow._getHSLFXML().getSlideReferences().getSldIdArray();
-               for (int i = 0; i < slideRefs.length; i++) {
-                       try {
-                               CTSlide slide =
-                                       slideshow._getHSLFXML().getSlide(slideRefs[i]);
-                               CTNotesSlide notes = 
-                                       slideshow._getHSLFXML().getNotes(slideRefs[i]);
-                               
-                               if(slideText) {
-                                       extractText(slide.getCSld().getSpTree(), text);
-                               }
-                               if(notesText && notes != null) {
-                                       extractText(notes.getCSld().getSpTree(), text);
-                               }
-                       } catch(Exception e) {
-                               throw new RuntimeException(e);
-                       }
-               }
-               
-               return text.toString();
-       }
-       
-       private void extractText(CTGroupShape gs, StringBuffer text) {
-               CTShape[] shapes = gs.getSpArray();
-               for (int i = 0; i < shapes.length; i++) {
-                       CTTextBody textBody =
-                               shapes[i].getTxBody();
-                       if(textBody != null) {
-                               CTTextParagraph[] paras = 
-                                       textBody.getPArray();
-                               for (int j = 0; j < paras.length; j++) {
-                                       CTRegularTextRun[] textRuns =
-                                               paras[j].getRArray();
-                                       for (int k = 0; k < textRuns.length; k++) {
-                                               text.append( textRuns[k].getT() );
-                                       }
-                                       // End each paragraph with a new line
-                                       text.append("\n");
-                               }
-                       }
-               }
-       }
-}
diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
new file mode 100644 (file)
index 0000000..0f28c2f
--- /dev/null
@@ -0,0 +1,135 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf.extractor;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+
+public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
+       private XSLFSlideShow slideshow;
+       private boolean slidesByDefault = true;
+       private boolean notesByDefault = false;
+       
+       public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
+               this(new XSLFSlideShow(container));
+       }
+       public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
+               super(slideshow);
+               this.slideshow = slideshow;
+       }
+
+       public static void main(String[] args) throws Exception {
+               if(args.length < 1) {
+                       System.err.println("Use:");
+                       System.err.println("  HXFPowerPointExtractor <filename.pptx>");
+                       System.exit(1);
+               }
+               POIXMLTextExtractor extractor = 
+                       new XSLFPowerPointExtractor(
+                                       new XSLFSlideShow(args[0]));
+               System.out.println(extractor.getText());
+       }
+
+       /**
+        * Should a call to getText() return slide text?
+        * Default is yes
+        */
+       public void setSlidesByDefault(boolean slidesByDefault) {
+               this.slidesByDefault = slidesByDefault;
+       }
+       /**
+        * Should a call to getText() return notes text?
+        * Default is no
+        */
+       public void setNotesByDefault(boolean notesByDefault) {
+               this.notesByDefault = notesByDefault;
+       }
+       
+       /**
+        * Gets the slide text, but not the notes text
+        */
+       public String getText() {
+               return getText(slidesByDefault, notesByDefault);
+       }
+       
+       /**
+        * Gets the requested text from the file
+        * @param slideText Should we retrieve text from slides?
+        * @param notesText Should we retrieve text from notes?
+        */
+       public String getText(boolean slideText, boolean notesText) {
+               StringBuffer text = new StringBuffer();
+               
+               CTSlideIdListEntry[] slideRefs =
+                       slideshow.getSlideReferences().getSldIdArray();
+               for (int i = 0; i < slideRefs.length; i++) {
+                       try {
+                               CTSlide slide =
+                                       slideshow.getSlide(slideRefs[i]);
+                               CTNotesSlide notes = 
+                                       slideshow.getNotes(slideRefs[i]);
+                               
+                               if(slideText) {
+                                       extractText(slide.getCSld().getSpTree(), text);
+                               }
+                               if(notesText && notes != null) {
+                                       extractText(notes.getCSld().getSpTree(), text);
+                               }
+                       } catch(Exception e) {
+                               throw new RuntimeException(e);
+                       }
+               }
+               
+               return text.toString();
+       }
+       
+       private void extractText(CTGroupShape gs, StringBuffer text) {
+               CTShape[] shapes = gs.getSpArray();
+               for (int i = 0; i < shapes.length; i++) {
+                       CTTextBody textBody =
+                               shapes[i].getTxBody();
+                       if(textBody != null) {
+                               CTTextParagraph[] paras = 
+                                       textBody.getPArray();
+                               for (int j = 0; j < paras.length; j++) {
+                                       CTRegularTextRun[] textRuns =
+                                               paras[j].getRArray();
+                                       for (int k = 0; k < textRuns.length; k++) {
+                                               text.append( textRuns[k].getT() );
+                                       }
+                                       // End each paragraph with a new line
+                                       text.append("\n");
+                               }
+                       }
+               }
+       }
+}
diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/HSLFXMLSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/HSLFXMLSlideShow.java
deleted file mode 100644 (file)
index f4360bf..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hslf.HSLFXML;
-
-/**
- * High level representation of a ooxml slideshow.
- * This is the first object most users will construct whether
- *  they are reading or writing a slideshow. It is also the
- *  top level object for creating new slides/etc.
- */
-public class HSLFXMLSlideShow extends POIXMLDocument {
-       private org.apache.poi.hslf.XSLFXML hslfXML;
-       
-       public HSLFXMLSlideShow(XSLFXML xml) {
-               super(xml);
-               this.hslfXML = xml;
-       }
-       
-       public XSLFXML _getHSLFXML() {
-               return hslfXML;
-       }
-}
diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java
new file mode 100644 (file)
index 0000000..5bc1cf8
--- /dev/null
@@ -0,0 +1,40 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf.usermodel;
+
+import org.apache.poi.xslf.XSLFSlideShow;
+
+/**
+ * High level representation of a ooxml slideshow.
+ * This is the first object most users will construct whether
+ *  they are reading or writing a slideshow. It is also the
+ *  top level object for creating new slides/etc.
+ */
+public class XMLSlideShow {
+       private XSLFSlideShow slideShow;
+       
+       public XMLSlideShow(XSLFSlideShow xml) {
+               this.slideShow = xml;
+       }
+       
+       public XSLFSlideShow _getXSLFSlideShow() {
+               return slideShow;
+       }
+
+       // TODO: Get slides
+       // TODO: Get notes
+}
index 36adb497cd0e49a6ba1196a8b2fc2e5e6f12c3c7..dca593c3c72ca051ac5dbfe535ddf85789bf5686 100644 (file)
@@ -17,7 +17,7 @@
 ==================================================================== */
         
 
-package org.apache.poi.hxf;
+package org.apache.poi;
 
 import junit.framework.TestCase;
 import java.io.*;
@@ -38,7 +38,7 @@ public class TestDetectAsOOXML extends TestCase
        {
                File f = new File(dirname + "/sample.xlsx");
 
-               HXFDocument.openPackage(f);
+               POIXMLDocument.openPackage(f.toString());
        }
        
        public void testDetectAsPOIFS() throws Exception {
@@ -48,18 +48,18 @@ public class TestDetectAsOOXML extends TestCase
                in = new PushbackInputStream(
                                new FileInputStream(dirname + "/SampleSS.xlsx"), 10
                );
-               assertTrue(HXFDocument.hasOOXMLHeader(in));
+               assertTrue(POIXMLDocument.hasOOXMLHeader(in));
                
                // xls file isn't
                in = new PushbackInputStream(
                                new FileInputStream(dirname + "/SampleSS.xls"), 10
                );
-               assertFalse(HXFDocument.hasOOXMLHeader(in));
+               assertFalse(POIXMLDocument.hasOOXMLHeader(in));
                
                // text file isn't
                in = new PushbackInputStream(
                                new FileInputStream(dirname + "/SampleSS.txt"), 10
                );
-               assertFalse(HXFDocument.hasOOXMLHeader(in));
+               assertFalse(POIXMLDocument.hasOOXMLHeader(in));
        }
 }
diff --git a/src/ooxml/testcases/org/apache/poi/xslf/TestHSLFXML.java b/src/ooxml/testcases/org/apache/poi/xslf/TestHSLFXML.java
deleted file mode 100644 (file)
index fd4653a..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf;
-
-import java.io.File;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
-
-import junit.framework.TestCase;
-
-public class TestHSLFXML extends TestCase {
-       private File sampleFile;
-
-       protected void setUp() throws Exception {
-               super.setUp();
-               
-               sampleFile = new File(
-                               System.getProperty("HSLF.testdata.path") +
-                               File.separator + "sample.pptx"
-               );
-       }
-
-       public void testContainsMainContentType() throws Exception {
-               Package pack = HXFDocument.openPackage(sampleFile);
-               
-               boolean found = false;
-               for(PackagePart part : pack.getParts()) {
-                       if(part.getContentType().equals(HSLFXML.MAIN_CONTENT_TYPE)) {
-                               found = true;
-                       }
-                       System.out.println(part);
-               }
-               assertTrue(found);
-       }
-
-       public void testOpen() throws Exception {
-               HXFDocument.openPackage(sampleFile);
-               
-               HSLFXML xml;
-               
-               // With the finalised uri, should be fine
-               xml = new HSLFXML(
-                               HXFDocument.openPackage(sampleFile)
-               );
-               
-               // Check the core
-               assertNotNull(xml.getPresentation());
-               
-               // Check it has some slides
-               assertTrue(
-                       xml.getSlideReferences().sizeOfSldIdArray() > 0
-               );
-               assertTrue(
-                               xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0
-                       );
-       }
-       
-       public void testSlideBasics() throws Exception {
-               HSLFXML xml = new HSLFXML(
-                               HXFDocument.openPackage(sampleFile)
-               );
-               
-               // Should have 1 master
-               assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray());
-               assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length);
-               
-               // Should have three sheets
-               assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray());
-               assertEquals(2, xml.getSlideReferences().getSldIdArray().length);
-               
-               // Check they're as expected
-               CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray();
-               assertEquals(256, slides[0].getId());
-               assertEquals(257, slides[1].getId());
-               assertEquals("rId2", slides[0].getId2());
-               assertEquals("rId3", slides[1].getId2());
-               
-               // Now get those objects
-               assertNotNull(xml.getSlide(slides[0]));
-               assertNotNull(xml.getSlide(slides[1]));
-               
-               // And check they have notes as expected
-               assertNotNull(xml.getNotes(slides[0]));
-               assertNotNull(xml.getNotes(slides[1]));
-               
-               // And again for the master
-               CTSlideMasterIdListEntry[] masters =
-                       xml.getSlideMasterReferences().getSldMasterIdArray();
-               assertEquals(2147483648l, masters[0].getId());
-               assertEquals("rId1", masters[0].getId2());
-               assertNotNull(xml.getSlideMaster(masters[0]));
-       }
-       
-       public void testMetadataBasics() throws Exception {
-               HSLFXML xml = new HSLFXML(
-                               HXFDocument.openPackage(sampleFile)
-               );
-               
-               assertNotNull(xml.getCoreProperties());
-               assertNotNull(xml.getExtendedProperties());
-               
-               assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication());
-               assertEquals(0, xml.getExtendedProperties().getCharacters());
-               assertEquals(0, xml.getExtendedProperties().getLines());
-               
-               assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
-               assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
-       }
-}
diff --git a/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFSlideShow.java b/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFSlideShow.java
new file mode 100644 (file)
index 0000000..c25d08a
--- /dev/null
@@ -0,0 +1,123 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf;
+
+import java.io.File;
+
+import org.apache.poi.POIXMLDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
+
+import junit.framework.TestCase;
+
+public class TestXSLFSlideShow extends TestCase {
+       private String sampleFile;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               sampleFile = new File(
+                               System.getProperty("HSLF.testdata.path") +
+                               File.separator + "sample.pptx"
+               ).toString();
+       }
+
+       public void testContainsMainContentType() throws Exception {
+               Package pack = POIXMLDocument.openPackage(sampleFile);
+               
+               boolean found = false;
+               for(PackagePart part : pack.getParts()) {
+                       if(part.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
+                               found = true;
+                       }
+                       System.out.println(part);
+               }
+               assertTrue(found);
+       }
+
+       public void testOpen() throws Exception {
+               POIXMLDocument.openPackage(sampleFile);
+               
+               XSLFSlideShow xml;
+               
+               // With the finalised uri, should be fine
+               xml = new XSLFSlideShow(
+                               POIXMLDocument.openPackage(sampleFile)
+               );
+               
+               // Check the core
+               assertNotNull(xml.getPresentation());
+               
+               // Check it has some slides
+               assertTrue(
+                       xml.getSlideReferences().sizeOfSldIdArray() > 0
+               );
+               assertTrue(
+                               xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0
+                       );
+       }
+       
+       public void testSlideBasics() throws Exception {
+               XSLFSlideShow xml = new XSLFSlideShow(sampleFile);
+               
+               // Should have 1 master
+               assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray());
+               assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length);
+               
+               // Should have three sheets
+               assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray());
+               assertEquals(2, xml.getSlideReferences().getSldIdArray().length);
+               
+               // Check they're as expected
+               CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray();
+               assertEquals(256, slides[0].getId());
+               assertEquals(257, slides[1].getId());
+               assertEquals("rId2", slides[0].getId2());
+               assertEquals("rId3", slides[1].getId2());
+               
+               // Now get those objects
+               assertNotNull(xml.getSlide(slides[0]));
+               assertNotNull(xml.getSlide(slides[1]));
+               
+               // And check they have notes as expected
+               assertNotNull(xml.getNotes(slides[0]));
+               assertNotNull(xml.getNotes(slides[1]));
+               
+               // And again for the master
+               CTSlideMasterIdListEntry[] masters =
+                       xml.getSlideMasterReferences().getSldMasterIdArray();
+               assertEquals(2147483648l, masters[0].getId());
+               assertEquals("rId1", masters[0].getId2());
+               assertNotNull(xml.getSlideMaster(masters[0]));
+       }
+       
+       public void testMetadataBasics() throws Exception {
+               XSLFSlideShow xml = new XSLFSlideShow(sampleFile);
+               
+               assertNotNull(xml.getCoreProperties());
+               assertNotNull(xml.getExtendedProperties());
+               
+               assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication());
+               assertEquals(0, xml.getExtendedProperties().getCharacters());
+               assertEquals(0, xml.getExtendedProperties().getLines());
+               
+               assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
+               assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
+       }
+}
index 6a006ab5c810db6d332e02c3b11c7a3e0d4af4b7..3b2ba0746ad31a09fd4502b795ed11e26df7cdb8 100644 (file)
@@ -49,7 +49,7 @@ public class TestHXFPowerPointExtractor extends TestCase {
         */
        public void testGetSimpleText() throws Exception {
                new HXFPowerPointExtractor(xmlA.getPackage());
-               new HXFPowerPointExtractor(new HSLFXMLSlideShow(xmlA));
+               new HXFPowerPointExtractor(new XMLSlideShow(xmlA));
                
                HXFPowerPointExtractor extractor = 
                        new HXFPowerPointExtractor(xmlA.getPackage());