]> source.dussan.org Git - poi.git/commitdiff
Fix bug #45543 - Optionally extract comment text with PowerPointExtractor, and initia...
authorNick Burch <nick@apache.org>
Mon, 4 Aug 2008 22:48:39 +0000 (22:48 +0000)
committerNick Burch <nick@apache.org>
Mon, 4 Aug 2008 22:48:39 +0000 (22:48 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@682533 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
src/scratchpad/src/org/apache/poi/hslf/model/Comment.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hslf/model/Slide.java
src/scratchpad/src/org/apache/poi/hslf/record/RecordContainer.java
src/scratchpad/testcases/org/apache/poi/hslf/data/45543.ppt [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hslf/data/WithComments.ppt [new file with mode: 0755]
src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java

index c3080c322a26246ec918bcfc7bd0a52d0594c83c..80b3d0406df56f5406e3f58a522b6d73037eebdf 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
            <action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
            <action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
index 95d2eb708aa98e1c78117795de55f5a851897bad..71e0ea2b5978317d50cc48e3c69f0861c31f4d43 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
            <action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
            <action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
index cd9fa282560b04222b7b0e74dd3150128c65e61e..9dddb700bd467f00562aea6d1a23ea2a1e6ebda4 100644 (file)
@@ -27,6 +27,8 @@ import org.apache.poi.POIOLE2TextExtractor;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.hslf.*;
 import org.apache.poi.hslf.model.*;
+import org.apache.poi.hslf.record.Comment2000;
+import org.apache.poi.hslf.record.Record;
 import org.apache.poi.hslf.usermodel.*;
 
 /**
@@ -44,6 +46,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
        
        private boolean slidesByDefault = true;
        private boolean notesByDefault = false;
+       private boolean commentsByDefault = false;
 
   /**
    * Basic extractor. Returns all the text, and optionally all the notes
@@ -57,16 +60,20 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
        }
 
        boolean notes = false;
+       boolean comments = false;
        String file;
        if(args.length > 1) {
                notes = true;
                file = args[1];
+               if(args.length > 2) {
+                       comments = true;
+               }
        } else {
                file = args[0];
        }
 
        PowerPointExtractor ppe = new PowerPointExtractor(file);
-       System.out.println(ppe.getText(true,notes));
+       System.out.println(ppe.getText(true,notes,comments));
        ppe.close();
   }
 
@@ -127,6 +134,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
        public void setNotesByDefault(boolean notesByDefault) {
                this.notesByDefault = notesByDefault;
        }
+       /**
+        * Should a call to getText() return comments text?
+        * Default is no
+        */
+       public void setCommentsByDefault(boolean commentsByDefault) {
+               this.commentsByDefault = commentsByDefault;
+       }
 
        /**
         * Fetches all the slide text from the slideshow, 
@@ -135,7 +149,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
         *  to change this
         */
        public String getText() {
-               return getText(slidesByDefault,notesByDefault);
+               return getText(slidesByDefault,notesByDefault,commentsByDefault);
        }
 
        /**
@@ -153,6 +167,9 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
    * @param getNoteText fetch note text
    */
   public String getText(boolean getSlideText, boolean getNoteText) {
+         return getText(getSlideText, getNoteText, commentsByDefault);
+  }
+  public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) {
        StringBuffer ret = new StringBuffer(); 
 
        if(getSlideText) {
@@ -169,6 +186,18 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
                                        }
                                }
                        }
+                       
+                       if(getCommentText) {
+                               Comment[] comments = slide.getComments();
+                               for(int j=0; j<comments.length; j++) {
+                                       ret.append(
+                                                       comments[j].getAuthor() + 
+                                                       " - " +
+                                                       comments[j].getText() + 
+                                                       "\n"
+                                       );
+                               }
+                       }
                }
                if(getNoteText) {
                        ret.append("\n");
diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/Comment.java b/src/scratchpad/src/org/apache/poi/hslf/model/Comment.java
new file mode 100644 (file)
index 0000000..86035f1
--- /dev/null
@@ -0,0 +1,54 @@
+package org.apache.poi.hslf.model;
+
+import org.apache.poi.hslf.record.Comment2000;
+
+public class Comment {
+       private Comment2000 comment2000;
+       
+       public Comment(Comment2000 comment2000) {
+               this.comment2000 = comment2000;
+       }
+       
+       protected Comment2000 getComment2000() {
+               return comment2000;
+       }
+       
+       /**
+        * Get the Author of this comment
+        */
+       public String getAuthor() {
+               return comment2000.getAuthor();
+       }
+       /**
+        * Set the Author of this comment
+        */
+       public void setAuthor(String author) {
+               comment2000.setAuthor(author);
+       }
+
+       /**
+        * Get the Author's Initials of this comment
+        */
+       public String getAuthorInitials() {
+               return comment2000.getAuthorInitials();
+       }
+       /**
+        * Set the Author's Initials of this comment
+        */
+       public void setAuthorInitials(String initials) {
+               comment2000.setAuthorInitials(initials);
+       }
+
+       /**
+        * Get the text of this comment
+        */
+       public String getText() {
+               return comment2000.getText();
+       }
+       /**
+        * Set the text of this comment
+        */
+       public void setText(String text) {
+               comment2000.setText(text);
+       }
+}
index 670a86655a4465de55746ea2384b6997561a907d..48f2fdefccdf911a5e34d0ed041c53e525295d4e 100644 (file)
@@ -361,6 +361,59 @@ public class Slide extends Sheet
         }
         return super.getColorScheme();
     }
+    
+    /**
+     * Get the comment(s) for this slide.
+     * Note - for now, only works on PPT 2000 and 
+     *  PPT 2003 files. Doesn't work for PPT 97
+     *  ones, as they do their comments oddly.
+     */
+    public Comment[] getComments() {
+       // If there are any, they're in
+       //  ProgTags -> ProgBinaryTag -> BinaryTagData
+       RecordContainer progTags = (RecordContainer)
+                       getSheetContainer().findFirstOfType(
+                                               RecordTypes.ProgTags.typeID
+       );
+       if(progTags != null) {
+               RecordContainer progBinaryTag = (RecordContainer)
+                       progTags.findFirstOfType(
+                                       RecordTypes.ProgBinaryTag.typeID
+               );
+               if(progBinaryTag != null) {
+                       RecordContainer binaryTags = (RecordContainer)
+                               progBinaryTag.findFirstOfType(
+                                               RecordTypes.BinaryTagData.typeID
+                       );
+                       if(binaryTags != null) {
+                               // This is where they'll be
+                               int count = 0;
+                               for(int i=0; i<binaryTags.getChildRecords().length; i++) {
+                                       if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
+                                               count++;
+                                       }
+                               }
+                               
+                               // Now build
+                               Comment[] comments = new Comment[count];
+                               count = 0;
+                               for(int i=0; i<binaryTags.getChildRecords().length; i++) {
+                                       if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
+                                               comments[i] = new Comment(
+                                                               (Comment2000)binaryTags.getChildRecords()[i]
+                                               );
+                                               count++;
+                                       }
+                               }
+                               
+                               return comments;
+                       }
+               }
+       }
+       
+       // None found
+       return new Comment[0];
+    }
 
     public void draw(Graphics2D graphics){
         MasterSheet master = getMasterSheet();
index 2aaab41b942089006e9236511da066f65b3382e2..f5efedc1d315798a07ed6129cbfbf957e3c72738 100644 (file)
@@ -123,6 +123,20 @@ public abstract class RecordContainer extends Record
        }
        
        
+       /**
+        * Finds the first child record of the given type,
+        *  or null if none of the child records are of the
+        *  given type. Does not descend.
+        */
+       public Record findFirstOfType(long type) {
+               for(int i=0; i<_children.length; i++) {
+                       if(_children[i].getRecordType() == type) {
+                               return _children[i];
+                       }
+               }
+               return null;
+       }
+       
        /* ===============================================================
         *                   External Move Methods
         * ===============================================================
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/data/45543.ppt b/src/scratchpad/testcases/org/apache/poi/hslf/data/45543.ppt
new file mode 100644 (file)
index 0000000..d5b04f8
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hslf/data/45543.ppt differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/data/WithComments.ppt b/src/scratchpad/testcases/org/apache/poi/hslf/data/WithComments.ppt
new file mode 100755 (executable)
index 0000000..2b5f864
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hslf/data/WithComments.ppt differ
index d6197a598936f6e3798b6b7e30700425dd4da966..35113866f22a192eab38e16d477b4d1551f60958 100644 (file)
@@ -216,4 +216,36 @@ public class TextExtractor extends TestCase {
                                ppe.getText(true, false)
                );
     }
+    
+    /**
+     * From bug #45543
+     */
+    public void testWithComments() throws Exception {
+               String filename;
+               
+               // New file
+               filename = dirname + "/WithComments.ppt";
+               ppe = new PowerPointExtractor(filename);
+
+               String text = ppe.getText();
+               assertFalse("Comments not in by default", text.contains("This is a test comment"));
+               
+               ppe.setCommentsByDefault(true);
+               
+               text = ppe.getText();
+               assertTrue("Unable to find expected word in text\n" + text, text.contains("This is a test comment"));
+
+               
+               // And another file
+               filename = dirname + "/45543.ppt";
+               ppe = new PowerPointExtractor(filename);
+
+               text = ppe.getText();
+               assertFalse("Comments not in by default", text.contains("testdoc"));
+               
+               ppe.setCommentsByDefault(true);
+               
+               text = ppe.getText();
+               assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
+    }
 }