Bugzilla 51803: fixed HSLF TextExtractor to extract content from master slide

author Yegor Kozlov <yegor@apache.org>

Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)

committer Yegor Kozlov <yegor@apache.org>

Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
author Yegor Kozlov <yegor@apache.org>
Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
committer Yegor Kozlov <yegor@apache.org>
Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml

index ee49e67d78d585c7ac19c910c85c012b1db1cdae..ace9d1164228dfe29a264dc032ee98b87910055b 100644 (file)
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
  
      <changes>
          <release version="3.8-beta5" date="2011-??-??">
+           <action dev="poi-developers" type="fix">51803 - fixed HSLF TextExtractor to extract content from master slide </action>
             <action dev="poi-developers" type="fix">52190 - null check on XWPF setFontFamily</action>
             <action dev="poi-developers" type="fix">52062 - ensure that temporary files in SXSSF are deleted</action>
             <action dev="poi-developers" type="fix">50936 - Exception parsing MS Word 8.0 file (as duplicate of 47958)</action>
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java

index 8a195ac25881a38f73d660ac2a82226d4b7cc8db..6610cde2cecbf949b3c3e791e66168ca3b7f19d6 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
@@ -221,7 +221,22 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
                 if (getSlideText) {
              if (getMasterText) {
                  for (SlideMaster master : _show.getSlidesMasters()) {
-                    textRunsToText(ret, master.getTextRuns());
+                    for(Shape sh : master.getShapes()){
+                        if(sh instanceof TextShape){
+                            if(MasterSheet.isPlaceholder(sh)) {
+                                // don't bother about boiler
+                                // plate text on master
+                                // sheets
+                                continue;
+                            }
+                            TextShape tsh = (TextShape)sh;
+                            String text = tsh.getText();
+                            ret.append(text);
+                            if (!text.endsWith("\n")) {
+                                ret.append("\n");
+                            }
+                        }
+                    }
                  }
              }
  
diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java

index e5246653958fe1cbf7702029cb027f1a136289b8..7850d774b1f95bae1ae7e1efcf88c3c8d85ed71c 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
@@ -24,7 +24,6 @@ import org.apache.poi.hslf.usermodel.SlideShow;
  import java.util.ArrayList;
  import java.util.Iterator;
  import java.util.List;
-import java.util.Vector;
  import java.awt.*;
  
  /**
@@ -122,7 +121,7 @@ public abstract class Sheet {
       * For a given PPDrawing, grab all the TextRuns
       */
      public static TextRun[] findTextRuns(PPDrawing ppdrawing) {
-        Vector runsV = new Vector();
+        final List<TextRun> runsV = new ArrayList<TextRun>();
          EscherTextboxWrapper[] wrappers = ppdrawing.getTextboxWrappers();
          for (int i = 0; i < wrappers.length; i++) {
              int s1 = runsV.size();
@@ -132,15 +131,11 @@ public abstract class Sheet {
              findTextRuns(wrappers[i].getChildRecords(), runsV);
              int s2 = runsV.size();
              if (s2 != s1){
-                TextRun t = (TextRun) runsV.get(runsV.size()-1);
+                TextRun t = runsV.get(runsV.size()-1);
                  t.setShapeId(wrappers[i].getShapeId());
              }
          }
-        TextRun[] runs = new TextRun[runsV.size()];
-        for (int i = 0; i < runs.length; i++) {
-            runs[i] = (TextRun) runsV.get(i);
-        }
-        return runs;
+        return runsV.toArray(new TextRun[runsV.size()]);
      }
  
      /**
@@ -151,7 +146,7 @@ public abstract class Sheet {
       * @param records the records to build from
       * @param found   vector to add any found to
       */
-    protected static void findTextRuns(Record[] records, Vector found) {
+    protected static void findTextRuns(Record[] records, List<TextRun> found) {
          // Look for a TextHeaderAtom
          for (int i = 0, slwtIndex=0; i < (records.length - 1); i++) {
              if (records[i] instanceof TextHeaderAtom) {
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

index c457c9c8c456f90b52c2ec4728c924f58a08d848..56eb0a0d9e597ad4071c2e354645fe707ec68b51 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -292,13 +292,15 @@ public final class TestExtractor extends TestCase {
         
         // Initially not there
         String text = ppe.getText();
-       assertFalse(text.contains("Master Header Text"));
+       assertFalse(text.contains("Text that I added to the master slide"));
         
         // Enable, shows up
         ppe.setMasterByDefault(true);
         text = ppe.getText();
-       assertTrue(text.contains("Master Header Text"));
-       
+       assertTrue(text.contains("Text that I added to the master slide"));
+
+       // Make sure placeholder text does not come out
+       assertFalse(text.contains("Click to edit Master"));
         
         // Now with another file only containing master text
         // Will always show up
diff --git a/test-data/slideshow/master_text.ppt b/test-data/slideshow/master_text.ppt

index a748e8b2170172d92202d60ffd74743de0b036b2..cdcf4bc005822111c07bcbb5c5a6e69066c61661 100644 (file)

Binary files a/test-data/slideshow/master_text.ppt and b/test-data/slideshow/master_text.ppt differ
author	Yegor Kozlov <yegor@apache.org>
	Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
committer	Yegor Kozlov <yegor@apache.org>
	Thu, 17 Nov 2011 17:35:42 +0000 (17:35 +0000)
src/documentation/content/xdocs/status.xml		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java		patch \| blob \| history
test-data/slideshow/master_text.ppt		patch \| blob \| history