From: Nick Burch Date: Sun, 9 Mar 2008 14:21:34 +0000 (+0000) Subject: Get the powerpoint ooxml stuff converted over, and fix up a few tests X-Git-Tag: REL_3_5_BETA2~196 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=5b902b4562c845ba3bbbfbdc994299f1a3db9d1f;p=poi.git Get the powerpoint ooxml stuff converted over, and fix up a few tests git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635243 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 36f195eeb1..4e190d7e77 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -22,15 +22,19 @@ import java.io.PushbackInputStream; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.IOUtils; +import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.InvalidFormatException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackagePartName; import org.openxml4j.opc.PackageRelationship; +import org.openxml4j.opc.PackageRelationshipCollection; import org.openxml4j.opc.PackageRelationshipTypes; import org.openxml4j.opc.PackagingURIHelper; - +import org.openxml4j.opc.internal.PackagePropertiesPart; +import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; +import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument; public abstract class POIXMLDocument { @@ -68,7 +72,7 @@ public abstract class POIXMLDocument { * in the event of a problem. * Works around shortcomings in java's this() constructor calls */ - protected static Package openPackage(String path) throws IOException { + public static Package openPackage(String path) throws IOException { try { return Package.open(path); } catch (InvalidFormatException e) { @@ -99,6 +103,27 @@ public abstract class POIXMLDocument { } return part; } + + /** + * Fetches the (single) PackagePart which is defined as + * the supplied relation content type of the base + * container, or null if none found. + * @param relationType The relation content type to search for + * @throws IllegalArgumentException If we find more than one part of that type + */ + protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException { + PackageRelationshipCollection rels = + getCorePart().getRelationshipsByType(relationType); + if(rels.size() == 0) { + return null; + } + if(rels.size() > 1) { + throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!"); + } + PackageRelationship rel = rels.getRelationship(0); + return getTargetPart(rel); + } + /** * Checks that the supplied InputStream (which MUST @@ -132,4 +157,30 @@ public abstract class POIXMLDocument { header[3] == POIFSConstants.OOXML_FILE_HEADER[3] ); } + + /** + * Get the core document properties (core ooxml properties). + * TODO: Replace with nice usermodel wrapper + * @deprecated To be replaced with a proper user-model style view of the properties + */ + public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, IOException { + PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE); + if(propsPart == null) { + return null; + } + return (PackagePropertiesPart)propsPart; + } + + /** + * Get the extended document properties (extended ooxml properties) + * TODO: Replace with nice usermodel wrapper + * @deprecated To be replaced with a proper user-model style view of the properties + */ + public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException { + PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE); + + PropertiesDocument props = PropertiesDocument.Factory.parse( + propsPart.getInputStream()); + return props.getProperties(); + } } diff --git a/src/ooxml/java/org/apache/poi/xslf/HSLFXML.java b/src/ooxml/java/org/apache/poi/xslf/HSLFXML.java deleted file mode 100644 index 568cb80aa0..0000000000 --- a/src/ooxml/java/org/apache/poi/xslf/HSLFXML.java +++ /dev/null @@ -1,148 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf; - -import java.io.IOException; - -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.InvalidFormatException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxml4j.opc.PackageRelationshipCollection; -import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry; -import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument; -import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument; -import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument; -import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument; - -/** - * Experimental class to do low level processing - * of pptx files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public class HSLFXML extends HXFDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"; - public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"; - public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"; - public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"; - public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"; - - private PresentationDocument presentationDoc; - - public HSLFXML(Package container) throws OpenXML4JException, IOException, XmlException { - super(container, MAIN_CONTENT_TYPE); - - presentationDoc = - PresentationDocument.Factory.parse(basePart.getInputStream()); - } - - /** - * Returns the low level presentation base object - */ - public CTPresentation getPresentation() { - return presentationDoc.getPresentation(); - } - - /** - * Returns the references from the presentation to its - * slides. - * You'll need these to figure out the slide ordering, - * and to get at the actual slides themselves - */ - public CTSlideIdList getSlideReferences() { - return getPresentation().getSldIdLst(); - } - /** - * Returns the references from the presentation to its - * slide masters. - * You'll need these to get at the actual slide - * masters themselves - */ - public CTSlideMasterIdList getSlideMasterReferences() { - return getPresentation().getSldMasterIdLst(); - } - - /** - * Returns the low level slide master object from - * the supplied slide master reference - */ - public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException { - PackagePart masterPart = - getRelatedPackagePart(master.getId2()); - SldMasterDocument masterDoc = - SldMasterDocument.Factory.parse(masterPart.getInputStream()); - return masterDoc.getSldMaster(); - } - - /** - * Returns the low level slide object from - * the supplied slide reference - */ - public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException { - PackagePart slidePart = - getRelatedPackagePart(slide.getId2()); - SldDocument slideDoc = - SldDocument.Factory.parse(slidePart.getInputStream()); - return slideDoc.getSld(); - } - - /** - * Returns the low level notes object for the given - * slide, as found from the supplied slide reference - */ - public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException { - PackagePart slidePart = - getRelatedPackagePart(slide.getId2()); - - PackageRelationshipCollection notes; - try { - notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE); - } catch(InvalidFormatException e) { - throw new IllegalStateException(e); - } - - if(notes.size() == 0) { - // No notes for this slide - return null; - } - if(notes.size() > 1) { - throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size()); - } - - PackagePart notesPart = - getPackagePart(notes.getRelationship(0)); - NotesDocument notesDoc = - NotesDocument.Factory.parse(notesPart.getInputStream()); - - return notesDoc.getNotes(); - } -} diff --git a/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java new file mode 100644 index 0000000000..fbdf00b01b --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java @@ -0,0 +1,164 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xslf; + +import java.io.IOException; + +import org.apache.poi.POIXMLDocument; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.InvalidFormatException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; +import org.openxml4j.opc.PackageRelationshipCollection; +import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; +import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry; +import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument; +import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument; +import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument; +import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument; + +/** + * Experimental class to do low level processing + * of pptx files. + * + * If you are using these low level classes, then you + * will almost certainly need to refer to the OOXML + * specifications from + * http://www.ecma-international.org/publications/standards/Ecma-376.htm + * + * WARNING - APIs expected to change rapidly + */ +public class XSLFSlideShow extends POIXMLDocument { + public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"; + public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"; + public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"; + public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"; + public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"; + + private PresentationDocument presentationDoc; + + public XSLFSlideShow(Package container) throws OpenXML4JException, IOException, XmlException { + super(container); + + presentationDoc = + PresentationDocument.Factory.parse(getCorePart().getInputStream()); + } + public XSLFSlideShow(String file) throws OpenXML4JException, IOException, XmlException { + this(openPackage(file)); + } + + /** + * Returns the low level presentation base object + */ + public CTPresentation getPresentation() { + return presentationDoc.getPresentation(); + } + + /** + * Returns the references from the presentation to its + * slides. + * You'll need these to figure out the slide ordering, + * and to get at the actual slides themselves + */ + public CTSlideIdList getSlideReferences() { + return getPresentation().getSldIdLst(); + } + /** + * Returns the references from the presentation to its + * slide masters. + * You'll need these to get at the actual slide + * masters themselves + */ + public CTSlideMasterIdList getSlideMasterReferences() { + return getPresentation().getSldMasterIdLst(); + } + + /** + * Returns the low level slide master object from + * the supplied slide master reference + */ + public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException { + try { + PackagePart masterPart = + getTargetPart(getCorePart().getRelationship(master.getId2())); + + SldMasterDocument masterDoc = + SldMasterDocument.Factory.parse(masterPart.getInputStream()); + return masterDoc.getSldMaster(); + } catch(InvalidFormatException e) { + throw new XmlException(e); + } + } + + /** + * Returns the low level slide object from + * the supplied slide reference + */ + public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException { + try { + PackagePart slidePart = + getTargetPart(getCorePart().getRelationship(slide.getId2())); + SldDocument slideDoc = + SldDocument.Factory.parse(slidePart.getInputStream()); + return slideDoc.getSld(); + } catch(InvalidFormatException e) { + throw new XmlException(e); + } + } + + /** + * Returns the low level notes object for the given + * slide, as found from the supplied slide reference + */ + public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException { + PackageRelationshipCollection notes; + try { + PackagePart slidePart = + getTargetPart(getCorePart().getRelationship(slide.getId2())); + + notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE); + } catch(InvalidFormatException e) { + throw new IllegalStateException(e); + } + + if(notes.size() == 0) { + // No notes for this slide + return null; + } + if(notes.size() > 1) { + throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size()); + } + + try { + PackagePart notesPart = + getTargetPart(notes.getRelationship(0)); + NotesDocument notesDoc = + NotesDocument.Factory.parse(notesPart.getInputStream()); + + return notesDoc.getNotes(); + } catch(InvalidFormatException e) { + throw new IllegalStateException(e); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/HXFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/HXFPowerPointExtractor.java deleted file mode 100644 index bfa59e32cd..0000000000 --- a/src/ooxml/java/org/apache/poi/xslf/extractor/HXFPowerPointExtractor.java +++ /dev/null @@ -1,139 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf.extractor; - -import java.io.File; -import java.io.IOException; - -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hslf.HSLFXML; -import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; -import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; -import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; - -public class HXFPowerPointExtractor extends POIXMLTextExtractor { - private HSLFXMLSlideShow slideshow; - private boolean slidesByDefault = true; - private boolean notesByDefault = false; - - public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException { - this(new HSLFXMLSlideShow( - new XSLFXML(container) - )); - } - public HXFPowerPointExtractor(HSLFXMLSlideShow slideshow) { - super(slideshow); - this.slideshow = slideshow; - } - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HXFPowerPointExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new HXFPowerPointExtractor(HXFDocument.openPackage( - new File(args[0]) - )); - System.out.println(extractor.getText()); - } - - /** - * Should a call to getText() return slide text? - * Default is yes - */ - public void setSlidesByDefault(boolean slidesByDefault) { - this.slidesByDefault = slidesByDefault; - } - /** - * Should a call to getText() return notes text? - * Default is no - */ - public void setNotesByDefault(boolean notesByDefault) { - this.notesByDefault = notesByDefault; - } - - /** - * Gets the slide text, but not the notes text - */ - public String getText() { - return getText(slidesByDefault, notesByDefault); - } - - /** - * Gets the requested text from the file - * @param slideText Should we retrieve text from slides? - * @param notesText Should we retrieve text from notes? - */ - public String getText(boolean slideText, boolean notesText) { - StringBuffer text = new StringBuffer(); - - CTSlideIdListEntry[] slideRefs = - slideshow._getHSLFXML().getSlideReferences().getSldIdArray(); - for (int i = 0; i < slideRefs.length; i++) { - try { - CTSlide slide = - slideshow._getHSLFXML().getSlide(slideRefs[i]); - CTNotesSlide notes = - slideshow._getHSLFXML().getNotes(slideRefs[i]); - - if(slideText) { - extractText(slide.getCSld().getSpTree(), text); - } - if(notesText && notes != null) { - extractText(notes.getCSld().getSpTree(), text); - } - } catch(Exception e) { - throw new RuntimeException(e); - } - } - - return text.toString(); - } - - private void extractText(CTGroupShape gs, StringBuffer text) { - CTShape[] shapes = gs.getSpArray(); - for (int i = 0; i < shapes.length; i++) { - CTTextBody textBody = - shapes[i].getTxBody(); - if(textBody != null) { - CTTextParagraph[] paras = - textBody.getPArray(); - for (int j = 0; j < paras.length; j++) { - CTRegularTextRun[] textRuns = - paras[j].getRArray(); - for (int k = 0; k < textRuns.length; k++) { - text.append( textRuns[k].getT() ); - } - // End each paragraph with a new line - text.append("\n"); - } - } - } - } -} diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java new file mode 100644 index 0000000000..0f28c2fdc0 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java @@ -0,0 +1,135 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xslf.extractor; + +import java.io.File; +import java.io.IOException; + +import org.apache.poi.POIXMLDocument; +import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; +import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; +import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; +import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; +import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; +import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; + +public class XSLFPowerPointExtractor extends POIXMLTextExtractor { + private XSLFSlideShow slideshow; + private boolean slidesByDefault = true; + private boolean notesByDefault = false; + + public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException { + this(new XSLFSlideShow(container)); + } + public XSLFPowerPointExtractor(XSLFSlideShow slideshow) { + super(slideshow); + this.slideshow = slideshow; + } + + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" HXFPowerPointExtractor "); + System.exit(1); + } + POIXMLTextExtractor extractor = + new XSLFPowerPointExtractor( + new XSLFSlideShow(args[0])); + System.out.println(extractor.getText()); + } + + /** + * Should a call to getText() return slide text? + * Default is yes + */ + public void setSlidesByDefault(boolean slidesByDefault) { + this.slidesByDefault = slidesByDefault; + } + /** + * Should a call to getText() return notes text? + * Default is no + */ + public void setNotesByDefault(boolean notesByDefault) { + this.notesByDefault = notesByDefault; + } + + /** + * Gets the slide text, but not the notes text + */ + public String getText() { + return getText(slidesByDefault, notesByDefault); + } + + /** + * Gets the requested text from the file + * @param slideText Should we retrieve text from slides? + * @param notesText Should we retrieve text from notes? + */ + public String getText(boolean slideText, boolean notesText) { + StringBuffer text = new StringBuffer(); + + CTSlideIdListEntry[] slideRefs = + slideshow.getSlideReferences().getSldIdArray(); + for (int i = 0; i < slideRefs.length; i++) { + try { + CTSlide slide = + slideshow.getSlide(slideRefs[i]); + CTNotesSlide notes = + slideshow.getNotes(slideRefs[i]); + + if(slideText) { + extractText(slide.getCSld().getSpTree(), text); + } + if(notesText && notes != null) { + extractText(notes.getCSld().getSpTree(), text); + } + } catch(Exception e) { + throw new RuntimeException(e); + } + } + + return text.toString(); + } + + private void extractText(CTGroupShape gs, StringBuffer text) { + CTShape[] shapes = gs.getSpArray(); + for (int i = 0; i < shapes.length; i++) { + CTTextBody textBody = + shapes[i].getTxBody(); + if(textBody != null) { + CTTextParagraph[] paras = + textBody.getPArray(); + for (int j = 0; j < paras.length; j++) { + CTRegularTextRun[] textRuns = + paras[j].getRArray(); + for (int k = 0; k < textRuns.length; k++) { + text.append( textRuns[k].getT() ); + } + // End each paragraph with a new line + text.append("\n"); + } + } + } + } +} diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/HSLFXMLSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/HSLFXMLSlideShow.java deleted file mode 100644 index f4360bfff5..0000000000 --- a/src/ooxml/java/org/apache/poi/xslf/usermodel/HSLFXMLSlideShow.java +++ /dev/null @@ -1,39 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf.usermodel; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.hslf.HSLFXML; - -/** - * High level representation of a ooxml slideshow. - * This is the first object most users will construct whether - * they are reading or writing a slideshow. It is also the - * top level object for creating new slides/etc. - */ -public class HSLFXMLSlideShow extends POIXMLDocument { - private org.apache.poi.hslf.XSLFXML hslfXML; - - public HSLFXMLSlideShow(XSLFXML xml) { - super(xml); - this.hslfXML = xml; - } - - public XSLFXML _getHSLFXML() { - return hslfXML; - } -} diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java new file mode 100644 index 0000000000..5bc1cf8737 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java @@ -0,0 +1,40 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xslf.usermodel; + +import org.apache.poi.xslf.XSLFSlideShow; + +/** + * High level representation of a ooxml slideshow. + * This is the first object most users will construct whether + * they are reading or writing a slideshow. It is also the + * top level object for creating new slides/etc. + */ +public class XMLSlideShow { + private XSLFSlideShow slideShow; + + public XMLSlideShow(XSLFSlideShow xml) { + this.slideShow = xml; + } + + public XSLFSlideShow _getXSLFSlideShow() { + return slideShow; + } + + // TODO: Get slides + // TODO: Get notes +} diff --git a/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java index 36adb497cd..dca593c3c7 100644 --- a/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java +++ b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java @@ -17,7 +17,7 @@ ==================================================================== */ -package org.apache.poi.hxf; +package org.apache.poi; import junit.framework.TestCase; import java.io.*; @@ -38,7 +38,7 @@ public class TestDetectAsOOXML extends TestCase { File f = new File(dirname + "/sample.xlsx"); - HXFDocument.openPackage(f); + POIXMLDocument.openPackage(f.toString()); } public void testDetectAsPOIFS() throws Exception { @@ -48,18 +48,18 @@ public class TestDetectAsOOXML extends TestCase in = new PushbackInputStream( new FileInputStream(dirname + "/SampleSS.xlsx"), 10 ); - assertTrue(HXFDocument.hasOOXMLHeader(in)); + assertTrue(POIXMLDocument.hasOOXMLHeader(in)); // xls file isn't in = new PushbackInputStream( new FileInputStream(dirname + "/SampleSS.xls"), 10 ); - assertFalse(HXFDocument.hasOOXMLHeader(in)); + assertFalse(POIXMLDocument.hasOOXMLHeader(in)); // text file isn't in = new PushbackInputStream( new FileInputStream(dirname + "/SampleSS.txt"), 10 ); - assertFalse(HXFDocument.hasOOXMLHeader(in)); + assertFalse(POIXMLDocument.hasOOXMLHeader(in)); } } diff --git a/src/ooxml/testcases/org/apache/poi/xslf/TestHSLFXML.java b/src/ooxml/testcases/org/apache/poi/xslf/TestHSLFXML.java deleted file mode 100644 index fd4653a854..0000000000 --- a/src/ooxml/testcases/org/apache/poi/xslf/TestHSLFXML.java +++ /dev/null @@ -1,127 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf; - -import java.io.File; - -import org.apache.poi.hxf.HXFDocument; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry; - -import junit.framework.TestCase; - -public class TestHSLFXML extends TestCase { - private File sampleFile; - - protected void setUp() throws Exception { - super.setUp(); - - sampleFile = new File( - System.getProperty("HSLF.testdata.path") + - File.separator + "sample.pptx" - ); - } - - public void testContainsMainContentType() throws Exception { - Package pack = HXFDocument.openPackage(sampleFile); - - boolean found = false; - for(PackagePart part : pack.getParts()) { - if(part.getContentType().equals(HSLFXML.MAIN_CONTENT_TYPE)) { - found = true; - } - System.out.println(part); - } - assertTrue(found); - } - - public void testOpen() throws Exception { - HXFDocument.openPackage(sampleFile); - - HSLFXML xml; - - // With the finalised uri, should be fine - xml = new HSLFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Check the core - assertNotNull(xml.getPresentation()); - - // Check it has some slides - assertTrue( - xml.getSlideReferences().sizeOfSldIdArray() > 0 - ); - assertTrue( - xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0 - ); - } - - public void testSlideBasics() throws Exception { - HSLFXML xml = new HSLFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Should have 1 master - assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray()); - assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length); - - // Should have three sheets - assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray()); - assertEquals(2, xml.getSlideReferences().getSldIdArray().length); - - // Check they're as expected - CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray(); - assertEquals(256, slides[0].getId()); - assertEquals(257, slides[1].getId()); - assertEquals("rId2", slides[0].getId2()); - assertEquals("rId3", slides[1].getId2()); - - // Now get those objects - assertNotNull(xml.getSlide(slides[0])); - assertNotNull(xml.getSlide(slides[1])); - - // And check they have notes as expected - assertNotNull(xml.getNotes(slides[0])); - assertNotNull(xml.getNotes(slides[1])); - - // And again for the master - CTSlideMasterIdListEntry[] masters = - xml.getSlideMasterReferences().getSldMasterIdArray(); - assertEquals(2147483648l, masters[0].getId()); - assertEquals("rId1", masters[0].getId2()); - assertNotNull(xml.getSlideMaster(masters[0])); - } - - public void testMetadataBasics() throws Exception { - HSLFXML xml = new HSLFXML( - HXFDocument.openPackage(sampleFile) - ); - - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication()); - assertEquals(0, xml.getExtendedProperties().getCharacters()); - assertEquals(0, xml.getExtendedProperties().getLines()); - - assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); - } -} diff --git a/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFSlideShow.java b/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFSlideShow.java new file mode 100644 index 0000000000..c25d08a90c --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFSlideShow.java @@ -0,0 +1,123 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xslf; + +import java.io.File; + +import org.apache.poi.POIXMLDocument; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; +import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry; + +import junit.framework.TestCase; + +public class TestXSLFSlideShow extends TestCase { + private String sampleFile; + + protected void setUp() throws Exception { + super.setUp(); + + sampleFile = new File( + System.getProperty("HSLF.testdata.path") + + File.separator + "sample.pptx" + ).toString(); + } + + public void testContainsMainContentType() throws Exception { + Package pack = POIXMLDocument.openPackage(sampleFile); + + boolean found = false; + for(PackagePart part : pack.getParts()) { + if(part.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) { + found = true; + } + System.out.println(part); + } + assertTrue(found); + } + + public void testOpen() throws Exception { + POIXMLDocument.openPackage(sampleFile); + + XSLFSlideShow xml; + + // With the finalised uri, should be fine + xml = new XSLFSlideShow( + POIXMLDocument.openPackage(sampleFile) + ); + + // Check the core + assertNotNull(xml.getPresentation()); + + // Check it has some slides + assertTrue( + xml.getSlideReferences().sizeOfSldIdArray() > 0 + ); + assertTrue( + xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0 + ); + } + + public void testSlideBasics() throws Exception { + XSLFSlideShow xml = new XSLFSlideShow(sampleFile); + + // Should have 1 master + assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray()); + assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length); + + // Should have three sheets + assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray()); + assertEquals(2, xml.getSlideReferences().getSldIdArray().length); + + // Check they're as expected + CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray(); + assertEquals(256, slides[0].getId()); + assertEquals(257, slides[1].getId()); + assertEquals("rId2", slides[0].getId2()); + assertEquals("rId3", slides[1].getId2()); + + // Now get those objects + assertNotNull(xml.getSlide(slides[0])); + assertNotNull(xml.getSlide(slides[1])); + + // And check they have notes as expected + assertNotNull(xml.getNotes(slides[0])); + assertNotNull(xml.getNotes(slides[1])); + + // And again for the master + CTSlideMasterIdListEntry[] masters = + xml.getSlideMasterReferences().getSldMasterIdArray(); + assertEquals(2147483648l, masters[0].getId()); + assertEquals("rId1", masters[0].getId2()); + assertNotNull(xml.getSlideMaster(masters[0])); + } + + public void testMetadataBasics() throws Exception { + XSLFSlideShow xml = new XSLFSlideShow(sampleFile); + + assertNotNull(xml.getCoreProperties()); + assertNotNull(xml.getExtendedProperties()); + + assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication()); + assertEquals(0, xml.getExtendedProperties().getCharacters()); + assertEquals(0, xml.getExtendedProperties().getLines()); + + assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); + assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java index 6a006ab5c8..3b2ba0746a 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java @@ -49,7 +49,7 @@ public class TestHXFPowerPointExtractor extends TestCase { */ public void testGetSimpleText() throws Exception { new HXFPowerPointExtractor(xmlA.getPackage()); - new HXFPowerPointExtractor(new HSLFXMLSlideShow(xmlA)); + new HXFPowerPointExtractor(new XMLSlideShow(xmlA)); HXFPowerPointExtractor extractor = new HXFPowerPointExtractor(xmlA.getPackage());