]> source.dussan.org Git - poi.git/commitdiff
Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extract...
authorNick Burch <nick@apache.org>
Tue, 5 Aug 2008 18:05:29 +0000 (18:05 +0000)
committerNick Burch <nick@apache.org>
Tue, 5 Aug 2008 18:05:29 +0000 (18:05 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682843 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java

index ac5d2f636afc79efd157e1727a269656086483d3..0317cd9d27b875fa8ac4719baf064ec04a9970fa 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
            <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
            <action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
index a9e1cbdc6a8914715895ce4100a5c3580cba9c13..ce1869e2c0908f5b5cb8d3f30763b590eba1668a 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
            <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
            <action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
index 0f28c2fdc07405b3e2567cfdc262b3934d591439..4366f82e7658a3d437bf8dbdc5045b7f500ef87b 100644 (file)
 ==================================================================== */
 package org.apache.poi.xslf.extractor;
 
-import java.io.File;
 import java.io.IOException;
 
-import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
@@ -35,17 +37,20 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
 
 public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
-       private XSLFSlideShow slideshow;
+       private XMLSlideShow slideshow;
        private boolean slidesByDefault = true;
        private boolean notesByDefault = false;
        
+       public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
+               super(slideshow._getXSLFSlideShow());
+               this.slideshow = slideshow;
+       }
+       public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException {
+               this(new XMLSlideShow(slideshow));
+       }
        public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
                this(new XSLFSlideShow(container));
        }
-       public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
-               super(slideshow);
-               this.slideshow = slideshow;
-       }
 
        public static void main(String[] args) throws Exception {
                if(args.length < 1) {
@@ -88,18 +93,32 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
         */
        public String getText(boolean slideText, boolean notesText) {
                StringBuffer text = new StringBuffer();
-               
-               CTSlideIdListEntry[] slideRefs =
-                       slideshow.getSlideReferences().getSldIdArray();
-               for (int i = 0; i < slideRefs.length; i++) {
+
+               XSLFSlide[] slides = slideshow.getSlides();
+               for(int i = 0; i < slides.length; i++) {
+                       CTSlide rawSlide = slides[i]._getCTSlide();
+                       CTSlideIdListEntry slideId = slides[i]._getCTSlideId();
+                       
                        try {
-                               CTSlide slide =
-                                       slideshow.getSlide(slideRefs[i]);
+                               // For now, still very low level
                                CTNotesSlide notes = 
-                                       slideshow.getNotes(slideRefs[i]);
+                                       slideshow._getXSLFSlideShow().getNotes(slideId);
+                               CTCommentList comments =
+                                       slideshow._getXSLFSlideShow().getSlideComments(slideId);
                                
                                if(slideText) {
-                                       extractText(slide.getCSld().getSpTree(), text);
+                                       extractText(rawSlide.getCSld().getSpTree(), text);
+                                       
+                                       // Comments too for the slide
+                                       if(comments != null) {
+                                               for(CTComment comment : comments.getCmArray()) {
+                                                       // TODO - comment authors too
+                                                       // (They're in another stream)
+                                                       text.append(
+                                                                       comment.getText() + "\n"
+                                                       );
+                                               }
+                                       }
                                }
                                if(notesText && notes != null) {
                                        extractText(notes.getCSld().getSpTree(), text);
index fb10a2421c706fb9adbc4c1eadd36feeddf49629..ac3135637489ccda6655c6fce059ffa9e31b2c74 100644 (file)
@@ -108,4 +108,22 @@ public class TestXSLFPowerPointExtractor extends TestCase {
                                "\n\n\n\n", text
                );
        }
+       
+       public void testGetComments() throws Exception {
+               File file = new File(
+                               System.getProperty("HSLF.testdata.path") +
+                               File.separator + "45545_Comment.pptx"
+               );
+               assertTrue(file.exists());
+               
+               xmlA = new XSLFSlideShow(file.toString());
+               XSLFPowerPointExtractor extractor = 
+                       new XSLFPowerPointExtractor(xmlA);
+               
+               String text = extractor.getText();
+               assertTrue(text.length() > 0);
+               
+               // Check comments are there
+               assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
+       }
 }