]> source.dussan.org Git - poi.git/commitdiff
Bugzilla 51803: fixed HSLF TextExtractor to extract content from master slide
authorYegor Kozlov <yegor@apache.org>
Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
committerYegor Kozlov <yegor@apache.org>
Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1203295 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
test-data/slideshow/master_text.ppt

index ee49e67d78d585c7ac19c910c85c012b1db1cdae..ace9d1164228dfe29a264dc032ee98b87910055b 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta5" date="2011-??-??">
+           <action dev="poi-developers" type="fix">51803 - fixed HSLF TextExtractor to extract content from master slide </action>
            <action dev="poi-developers" type="fix">52190 - null check on XWPF setFontFamily</action>
            <action dev="poi-developers" type="fix">52062 - ensure that temporary files in SXSSF are deleted</action>
            <action dev="poi-developers" type="fix">50936 - Exception parsing MS Word 8.0 file (as duplicate of 47958)</action>
index 8a195ac25881a38f73d660ac2a82226d4b7cc8db..6610cde2cecbf949b3c3e791e66168ca3b7f19d6 100644 (file)
@@ -221,7 +221,22 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
                if (getSlideText) {
             if (getMasterText) {
                 for (SlideMaster master : _show.getSlidesMasters()) {
-                    textRunsToText(ret, master.getTextRuns());
+                    for(Shape sh : master.getShapes()){
+                        if(sh instanceof TextShape){
+                            if(MasterSheet.isPlaceholder(sh)) {
+                                // don't bother about boiler
+                                // plate text on master
+                                // sheets
+                                continue;
+                            }
+                            TextShape tsh = (TextShape)sh;
+                            String text = tsh.getText();
+                            ret.append(text);
+                            if (!text.endsWith("\n")) {
+                                ret.append("\n");
+                            }
+                        }
+                    }
                 }
             }
 
index e5246653958fe1cbf7702029cb027f1a136289b8..7850d774b1f95bae1ae7e1efcf88c3c8d85ed71c 100644 (file)
@@ -24,7 +24,6 @@ import org.apache.poi.hslf.usermodel.SlideShow;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Vector;
 import java.awt.*;
 
 /**
@@ -122,7 +121,7 @@ public abstract class Sheet {
      * For a given PPDrawing, grab all the TextRuns
      */
     public static TextRun[] findTextRuns(PPDrawing ppdrawing) {
-        Vector runsV = new Vector();
+        final List<TextRun> runsV = new ArrayList<TextRun>();
         EscherTextboxWrapper[] wrappers = ppdrawing.getTextboxWrappers();
         for (int i = 0; i < wrappers.length; i++) {
             int s1 = runsV.size();
@@ -132,15 +131,11 @@ public abstract class Sheet {
             findTextRuns(wrappers[i].getChildRecords(), runsV);
             int s2 = runsV.size();
             if (s2 != s1){
-                TextRun t = (TextRun) runsV.get(runsV.size()-1);
+                TextRun t = runsV.get(runsV.size()-1);
                 t.setShapeId(wrappers[i].getShapeId());
             }
         }
-        TextRun[] runs = new TextRun[runsV.size()];
-        for (int i = 0; i < runs.length; i++) {
-            runs[i] = (TextRun) runsV.get(i);
-        }
-        return runs;
+        return runsV.toArray(new TextRun[runsV.size()]);
     }
 
     /**
@@ -151,7 +146,7 @@ public abstract class Sheet {
      * @param records the records to build from
      * @param found   vector to add any found to
      */
-    protected static void findTextRuns(Record[] records, Vector found) {
+    protected static void findTextRuns(Record[] records, List<TextRun> found) {
         // Look for a TextHeaderAtom
         for (int i = 0, slwtIndex=0; i < (records.length - 1); i++) {
             if (records[i] instanceof TextHeaderAtom) {
index c457c9c8c456f90b52c2ec4728c924f58a08d848..56eb0a0d9e597ad4071c2e354645fe707ec68b51 100644 (file)
@@ -292,13 +292,15 @@ public final class TestExtractor extends TestCase {
        
        // Initially not there
        String text = ppe.getText();
-       assertFalse(text.contains("Master Header Text"));
+       assertFalse(text.contains("Text that I added to the master slide"));
        
        // Enable, shows up
        ppe.setMasterByDefault(true);
        text = ppe.getText();
-       assertTrue(text.contains("Master Header Text"));
-       
+       assertTrue(text.contains("Text that I added to the master slide"));
+
+       // Make sure placeholder text does not come out
+       assertFalse(text.contains("Click to edit Master"));
        
        // Now with another file only containing master text
        // Will always show up
index a748e8b2170172d92202d60ffd74743de0b036b2..cdcf4bc005822111c07bcbb5c5a6e69066c61661 100644 (file)
Binary files a/test-data/slideshow/master_text.ppt and b/test-data/slideshow/master_text.ppt differ