import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.IOUtils;
+import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackagePartName;
import org.openxml4j.opc.PackageRelationship;
+import org.openxml4j.opc.PackageRelationshipCollection;
import org.openxml4j.opc.PackageRelationshipTypes;
import org.openxml4j.opc.PackagingURIHelper;
-
+import org.openxml4j.opc.internal.PackagePropertiesPart;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
public abstract class POIXMLDocument {
* in the event of a problem.
* Works around shortcomings in java's this() constructor calls
*/
- protected static Package openPackage(String path) throws IOException {
+ public static Package openPackage(String path) throws IOException {
try {
return Package.open(path);
} catch (InvalidFormatException e) {
}
return part;
}
+
+ /**
+ * Fetches the (single) PackagePart which is defined as
+ * the supplied relation content type of the base
+ * container, or null if none found.
+ * @param relationType The relation content type to search for
+ * @throws IllegalArgumentException If we find more than one part of that type
+ */
+ protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
+ PackageRelationshipCollection rels =
+ getCorePart().getRelationshipsByType(relationType);
+ if(rels.size() == 0) {
+ return null;
+ }
+ if(rels.size() > 1) {
+ throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
+ }
+ PackageRelationship rel = rels.getRelationship(0);
+ return getTargetPart(rel);
+ }
+
/**
* Checks that the supplied InputStream (which MUST
header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
);
}
+
+ /**
+ * Get the core document properties (core ooxml properties).
+ * TODO: Replace with nice usermodel wrapper
+ * @deprecated To be replaced with a proper user-model style view of the properties
+ */
+ public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, IOException {
+ PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE);
+ if(propsPart == null) {
+ return null;
+ }
+ return (PackagePropertiesPart)propsPart;
+ }
+
+ /**
+ * Get the extended document properties (extended ooxml properties)
+ * TODO: Replace with nice usermodel wrapper
+ * @deprecated To be replaced with a proper user-model style view of the properties
+ */
+ public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException {
+ PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE);
+
+ PropertiesDocument props = PropertiesDocument.Factory.parse(
+ propsPart.getInputStream());
+ return props.getProperties();
+ }
}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf;
-
-import java.io.IOException;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackageRelationshipCollection;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument;
-
-/**
- * Experimental class to do low level processing
- * of pptx files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public class HSLFXML extends HXFDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
- public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
- public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
- public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
- public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
-
- private PresentationDocument presentationDoc;
-
- public HSLFXML(Package container) throws OpenXML4JException, IOException, XmlException {
- super(container, MAIN_CONTENT_TYPE);
-
- presentationDoc =
- PresentationDocument.Factory.parse(basePart.getInputStream());
- }
-
- /**
- * Returns the low level presentation base object
- */
- public CTPresentation getPresentation() {
- return presentationDoc.getPresentation();
- }
-
- /**
- * Returns the references from the presentation to its
- * slides.
- * You'll need these to figure out the slide ordering,
- * and to get at the actual slides themselves
- */
- public CTSlideIdList getSlideReferences() {
- return getPresentation().getSldIdLst();
- }
- /**
- * Returns the references from the presentation to its
- * slide masters.
- * You'll need these to get at the actual slide
- * masters themselves
- */
- public CTSlideMasterIdList getSlideMasterReferences() {
- return getPresentation().getSldMasterIdLst();
- }
-
- /**
- * Returns the low level slide master object from
- * the supplied slide master reference
- */
- public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException {
- PackagePart masterPart =
- getRelatedPackagePart(master.getId2());
- SldMasterDocument masterDoc =
- SldMasterDocument.Factory.parse(masterPart.getInputStream());
- return masterDoc.getSldMaster();
- }
-
- /**
- * Returns the low level slide object from
- * the supplied slide reference
- */
- public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException {
- PackagePart slidePart =
- getRelatedPackagePart(slide.getId2());
- SldDocument slideDoc =
- SldDocument.Factory.parse(slidePart.getInputStream());
- return slideDoc.getSld();
- }
-
- /**
- * Returns the low level notes object for the given
- * slide, as found from the supplied slide reference
- */
- public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException {
- PackagePart slidePart =
- getRelatedPackagePart(slide.getId2());
-
- PackageRelationshipCollection notes;
- try {
- notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
- } catch(InvalidFormatException e) {
- throw new IllegalStateException(e);
- }
-
- if(notes.size() == 0) {
- // No notes for this slide
- return null;
- }
- if(notes.size() > 1) {
- throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size());
- }
-
- PackagePart notesPart =
- getPackagePart(notes.getRelationship(0));
- NotesDocument notesDoc =
- NotesDocument.Factory.parse(notesPart.getInputStream());
-
- return notesDoc.getNotes();
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf;
+
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxml4j.opc.PackageRelationshipCollection;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument;
+import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument;
+import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument;
+import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument;
+
+/**
+ * Experimental class to do low level processing
+ * of pptx files.
+ *
+ * If you are using these low level classes, then you
+ * will almost certainly need to refer to the OOXML
+ * specifications from
+ * http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ *
+ * WARNING - APIs expected to change rapidly
+ */
+public class XSLFSlideShow extends POIXMLDocument {
+ public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
+ public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
+ public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
+ public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
+ public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
+
+ private PresentationDocument presentationDoc;
+
+ public XSLFSlideShow(Package container) throws OpenXML4JException, IOException, XmlException {
+ super(container);
+
+ presentationDoc =
+ PresentationDocument.Factory.parse(getCorePart().getInputStream());
+ }
+ public XSLFSlideShow(String file) throws OpenXML4JException, IOException, XmlException {
+ this(openPackage(file));
+ }
+
+ /**
+ * Returns the low level presentation base object
+ */
+ public CTPresentation getPresentation() {
+ return presentationDoc.getPresentation();
+ }
+
+ /**
+ * Returns the references from the presentation to its
+ * slides.
+ * You'll need these to figure out the slide ordering,
+ * and to get at the actual slides themselves
+ */
+ public CTSlideIdList getSlideReferences() {
+ return getPresentation().getSldIdLst();
+ }
+ /**
+ * Returns the references from the presentation to its
+ * slide masters.
+ * You'll need these to get at the actual slide
+ * masters themselves
+ */
+ public CTSlideMasterIdList getSlideMasterReferences() {
+ return getPresentation().getSldMasterIdLst();
+ }
+
+ /**
+ * Returns the low level slide master object from
+ * the supplied slide master reference
+ */
+ public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException {
+ try {
+ PackagePart masterPart =
+ getTargetPart(getCorePart().getRelationship(master.getId2()));
+
+ SldMasterDocument masterDoc =
+ SldMasterDocument.Factory.parse(masterPart.getInputStream());
+ return masterDoc.getSldMaster();
+ } catch(InvalidFormatException e) {
+ throw new XmlException(e);
+ }
+ }
+
+ /**
+ * Returns the low level slide object from
+ * the supplied slide reference
+ */
+ public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException {
+ try {
+ PackagePart slidePart =
+ getTargetPart(getCorePart().getRelationship(slide.getId2()));
+ SldDocument slideDoc =
+ SldDocument.Factory.parse(slidePart.getInputStream());
+ return slideDoc.getSld();
+ } catch(InvalidFormatException e) {
+ throw new XmlException(e);
+ }
+ }
+
+ /**
+ * Returns the low level notes object for the given
+ * slide, as found from the supplied slide reference
+ */
+ public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException {
+ PackageRelationshipCollection notes;
+ try {
+ PackagePart slidePart =
+ getTargetPart(getCorePart().getRelationship(slide.getId2()));
+
+ notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
+ } catch(InvalidFormatException e) {
+ throw new IllegalStateException(e);
+ }
+
+ if(notes.size() == 0) {
+ // No notes for this slide
+ return null;
+ }
+ if(notes.size() > 1) {
+ throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size());
+ }
+
+ try {
+ PackagePart notesPart =
+ getTargetPart(notes.getRelationship(0));
+ NotesDocument notesDoc =
+ NotesDocument.Factory.parse(notesPart.getInputStream());
+
+ return notesDoc.getNotes();
+ } catch(InvalidFormatException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hslf.HSLFXML;
-import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-
-public class HXFPowerPointExtractor extends POIXMLTextExtractor {
- private HSLFXMLSlideShow slideshow;
- private boolean slidesByDefault = true;
- private boolean notesByDefault = false;
-
- public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
- this(new HSLFXMLSlideShow(
- new XSLFXML(container)
- ));
- }
- public HXFPowerPointExtractor(HSLFXMLSlideShow slideshow) {
- super(slideshow);
- this.slideshow = slideshow;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HXFPowerPointExtractor <filename.pptx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new HXFPowerPointExtractor(HXFDocument.openPackage(
- new File(args[0])
- ));
- System.out.println(extractor.getText());
- }
-
- /**
- * Should a call to getText() return slide text?
- * Default is yes
- */
- public void setSlidesByDefault(boolean slidesByDefault) {
- this.slidesByDefault = slidesByDefault;
- }
- /**
- * Should a call to getText() return notes text?
- * Default is no
- */
- public void setNotesByDefault(boolean notesByDefault) {
- this.notesByDefault = notesByDefault;
- }
-
- /**
- * Gets the slide text, but not the notes text
- */
- public String getText() {
- return getText(slidesByDefault, notesByDefault);
- }
-
- /**
- * Gets the requested text from the file
- * @param slideText Should we retrieve text from slides?
- * @param notesText Should we retrieve text from notes?
- */
- public String getText(boolean slideText, boolean notesText) {
- StringBuffer text = new StringBuffer();
-
- CTSlideIdListEntry[] slideRefs =
- slideshow._getHSLFXML().getSlideReferences().getSldIdArray();
- for (int i = 0; i < slideRefs.length; i++) {
- try {
- CTSlide slide =
- slideshow._getHSLFXML().getSlide(slideRefs[i]);
- CTNotesSlide notes =
- slideshow._getHSLFXML().getNotes(slideRefs[i]);
-
- if(slideText) {
- extractText(slide.getCSld().getSpTree(), text);
- }
- if(notesText && notes != null) {
- extractText(notes.getCSld().getSpTree(), text);
- }
- } catch(Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- return text.toString();
- }
-
- private void extractText(CTGroupShape gs, StringBuffer text) {
- CTShape[] shapes = gs.getSpArray();
- for (int i = 0; i < shapes.length; i++) {
- CTTextBody textBody =
- shapes[i].getTxBody();
- if(textBody != null) {
- CTTextParagraph[] paras =
- textBody.getPArray();
- for (int j = 0; j < paras.length; j++) {
- CTRegularTextRun[] textRuns =
- paras[j].getRArray();
- for (int k = 0; k < textRuns.length; k++) {
- text.append( textRuns[k].getT() );
- }
- // End each paragraph with a new line
- text.append("\n");
- }
- }
- }
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf.extractor;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+
+public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
+ private XSLFSlideShow slideshow;
+ private boolean slidesByDefault = true;
+ private boolean notesByDefault = false;
+
+ public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
+ this(new XSLFSlideShow(container));
+ }
+ public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
+ super(slideshow);
+ this.slideshow = slideshow;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" HXFPowerPointExtractor <filename.pptx>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new XSLFPowerPointExtractor(
+ new XSLFSlideShow(args[0]));
+ System.out.println(extractor.getText());
+ }
+
+ /**
+ * Should a call to getText() return slide text?
+ * Default is yes
+ */
+ public void setSlidesByDefault(boolean slidesByDefault) {
+ this.slidesByDefault = slidesByDefault;
+ }
+ /**
+ * Should a call to getText() return notes text?
+ * Default is no
+ */
+ public void setNotesByDefault(boolean notesByDefault) {
+ this.notesByDefault = notesByDefault;
+ }
+
+ /**
+ * Gets the slide text, but not the notes text
+ */
+ public String getText() {
+ return getText(slidesByDefault, notesByDefault);
+ }
+
+ /**
+ * Gets the requested text from the file
+ * @param slideText Should we retrieve text from slides?
+ * @param notesText Should we retrieve text from notes?
+ */
+ public String getText(boolean slideText, boolean notesText) {
+ StringBuffer text = new StringBuffer();
+
+ CTSlideIdListEntry[] slideRefs =
+ slideshow.getSlideReferences().getSldIdArray();
+ for (int i = 0; i < slideRefs.length; i++) {
+ try {
+ CTSlide slide =
+ slideshow.getSlide(slideRefs[i]);
+ CTNotesSlide notes =
+ slideshow.getNotes(slideRefs[i]);
+
+ if(slideText) {
+ extractText(slide.getCSld().getSpTree(), text);
+ }
+ if(notesText && notes != null) {
+ extractText(notes.getCSld().getSpTree(), text);
+ }
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ return text.toString();
+ }
+
+ private void extractText(CTGroupShape gs, StringBuffer text) {
+ CTShape[] shapes = gs.getSpArray();
+ for (int i = 0; i < shapes.length; i++) {
+ CTTextBody textBody =
+ shapes[i].getTxBody();
+ if(textBody != null) {
+ CTTextParagraph[] paras =
+ textBody.getPArray();
+ for (int j = 0; j < paras.length; j++) {
+ CTRegularTextRun[] textRuns =
+ paras[j].getRArray();
+ for (int k = 0; k < textRuns.length; k++) {
+ text.append( textRuns[k].getT() );
+ }
+ // End each paragraph with a new line
+ text.append("\n");
+ }
+ }
+ }
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hslf.HSLFXML;
-
-/**
- * High level representation of a ooxml slideshow.
- * This is the first object most users will construct whether
- * they are reading or writing a slideshow. It is also the
- * top level object for creating new slides/etc.
- */
-public class HSLFXMLSlideShow extends POIXMLDocument {
- private org.apache.poi.hslf.XSLFXML hslfXML;
-
- public HSLFXMLSlideShow(XSLFXML xml) {
- super(xml);
- this.hslfXML = xml;
- }
-
- public XSLFXML _getHSLFXML() {
- return hslfXML;
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf.usermodel;
+
+import org.apache.poi.xslf.XSLFSlideShow;
+
+/**
+ * High level representation of a ooxml slideshow.
+ * This is the first object most users will construct whether
+ * they are reading or writing a slideshow. It is also the
+ * top level object for creating new slides/etc.
+ */
+public class XMLSlideShow {
+ private XSLFSlideShow slideShow;
+
+ public XMLSlideShow(XSLFSlideShow xml) {
+ this.slideShow = xml;
+ }
+
+ public XSLFSlideShow _getXSLFSlideShow() {
+ return slideShow;
+ }
+
+ // TODO: Get slides
+ // TODO: Get notes
+}
==================================================================== */
-package org.apache.poi.hxf;
+package org.apache.poi;
import junit.framework.TestCase;
import java.io.*;
{
File f = new File(dirname + "/sample.xlsx");
- HXFDocument.openPackage(f);
+ POIXMLDocument.openPackage(f.toString());
}
public void testDetectAsPOIFS() throws Exception {
in = new PushbackInputStream(
new FileInputStream(dirname + "/SampleSS.xlsx"), 10
);
- assertTrue(HXFDocument.hasOOXMLHeader(in));
+ assertTrue(POIXMLDocument.hasOOXMLHeader(in));
// xls file isn't
in = new PushbackInputStream(
new FileInputStream(dirname + "/SampleSS.xls"), 10
);
- assertFalse(HXFDocument.hasOOXMLHeader(in));
+ assertFalse(POIXMLDocument.hasOOXMLHeader(in));
// text file isn't
in = new PushbackInputStream(
new FileInputStream(dirname + "/SampleSS.txt"), 10
);
- assertFalse(HXFDocument.hasOOXMLHeader(in));
+ assertFalse(POIXMLDocument.hasOOXMLHeader(in));
}
}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf;
-
-import java.io.File;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
-
-import junit.framework.TestCase;
-
-public class TestHSLFXML extends TestCase {
- private File sampleFile;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- sampleFile = new File(
- System.getProperty("HSLF.testdata.path") +
- File.separator + "sample.pptx"
- );
- }
-
- public void testContainsMainContentType() throws Exception {
- Package pack = HXFDocument.openPackage(sampleFile);
-
- boolean found = false;
- for(PackagePart part : pack.getParts()) {
- if(part.getContentType().equals(HSLFXML.MAIN_CONTENT_TYPE)) {
- found = true;
- }
- System.out.println(part);
- }
- assertTrue(found);
- }
-
- public void testOpen() throws Exception {
- HXFDocument.openPackage(sampleFile);
-
- HSLFXML xml;
-
- // With the finalised uri, should be fine
- xml = new HSLFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Check the core
- assertNotNull(xml.getPresentation());
-
- // Check it has some slides
- assertTrue(
- xml.getSlideReferences().sizeOfSldIdArray() > 0
- );
- assertTrue(
- xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0
- );
- }
-
- public void testSlideBasics() throws Exception {
- HSLFXML xml = new HSLFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Should have 1 master
- assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray());
- assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length);
-
- // Should have three sheets
- assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray());
- assertEquals(2, xml.getSlideReferences().getSldIdArray().length);
-
- // Check they're as expected
- CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray();
- assertEquals(256, slides[0].getId());
- assertEquals(257, slides[1].getId());
- assertEquals("rId2", slides[0].getId2());
- assertEquals("rId3", slides[1].getId2());
-
- // Now get those objects
- assertNotNull(xml.getSlide(slides[0]));
- assertNotNull(xml.getSlide(slides[1]));
-
- // And check they have notes as expected
- assertNotNull(xml.getNotes(slides[0]));
- assertNotNull(xml.getNotes(slides[1]));
-
- // And again for the master
- CTSlideMasterIdListEntry[] masters =
- xml.getSlideMasterReferences().getSldMasterIdArray();
- assertEquals(2147483648l, masters[0].getId());
- assertEquals("rId1", masters[0].getId2());
- assertNotNull(xml.getSlideMaster(masters[0]));
- }
-
- public void testMetadataBasics() throws Exception {
- HSLFXML xml = new HSLFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication());
- assertEquals(0, xml.getExtendedProperties().getCharacters());
- assertEquals(0, xml.getExtendedProperties().getLines());
-
- assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf;
+
+import java.io.File;
+
+import org.apache.poi.POIXMLDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
+
+import junit.framework.TestCase;
+
+public class TestXSLFSlideShow extends TestCase {
+ private String sampleFile;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ sampleFile = new File(
+ System.getProperty("HSLF.testdata.path") +
+ File.separator + "sample.pptx"
+ ).toString();
+ }
+
+ public void testContainsMainContentType() throws Exception {
+ Package pack = POIXMLDocument.openPackage(sampleFile);
+
+ boolean found = false;
+ for(PackagePart part : pack.getParts()) {
+ if(part.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
+ found = true;
+ }
+ System.out.println(part);
+ }
+ assertTrue(found);
+ }
+
+ public void testOpen() throws Exception {
+ POIXMLDocument.openPackage(sampleFile);
+
+ XSLFSlideShow xml;
+
+ // With the finalised uri, should be fine
+ xml = new XSLFSlideShow(
+ POIXMLDocument.openPackage(sampleFile)
+ );
+
+ // Check the core
+ assertNotNull(xml.getPresentation());
+
+ // Check it has some slides
+ assertTrue(
+ xml.getSlideReferences().sizeOfSldIdArray() > 0
+ );
+ assertTrue(
+ xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0
+ );
+ }
+
+ public void testSlideBasics() throws Exception {
+ XSLFSlideShow xml = new XSLFSlideShow(sampleFile);
+
+ // Should have 1 master
+ assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray());
+ assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length);
+
+ // Should have three sheets
+ assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray());
+ assertEquals(2, xml.getSlideReferences().getSldIdArray().length);
+
+ // Check they're as expected
+ CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray();
+ assertEquals(256, slides[0].getId());
+ assertEquals(257, slides[1].getId());
+ assertEquals("rId2", slides[0].getId2());
+ assertEquals("rId3", slides[1].getId2());
+
+ // Now get those objects
+ assertNotNull(xml.getSlide(slides[0]));
+ assertNotNull(xml.getSlide(slides[1]));
+
+ // And check they have notes as expected
+ assertNotNull(xml.getNotes(slides[0]));
+ assertNotNull(xml.getNotes(slides[1]));
+
+ // And again for the master
+ CTSlideMasterIdListEntry[] masters =
+ xml.getSlideMasterReferences().getSldMasterIdArray();
+ assertEquals(2147483648l, masters[0].getId());
+ assertEquals("rId1", masters[0].getId2());
+ assertNotNull(xml.getSlideMaster(masters[0]));
+ }
+
+ public void testMetadataBasics() throws Exception {
+ XSLFSlideShow xml = new XSLFSlideShow(sampleFile);
+
+ assertNotNull(xml.getCoreProperties());
+ assertNotNull(xml.getExtendedProperties());
+
+ assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication());
+ assertEquals(0, xml.getExtendedProperties().getCharacters());
+ assertEquals(0, xml.getExtendedProperties().getLines());
+
+ assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
+ assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
+ }
+}
*/
public void testGetSimpleText() throws Exception {
new HXFPowerPointExtractor(xmlA.getPackage());
- new HXFPowerPointExtractor(new HSLFXMLSlideShow(xmlA));
+ new HXFPowerPointExtractor(new XMLSlideShow(xmlA));
HXFPowerPointExtractor extractor =
new HXFPowerPointExtractor(xmlA.getPackage());