summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYegor Kozlov <yegor@apache.org>2011-11-17 17:35:42 +0000
committerYegor Kozlov <yegor@apache.org>2011-11-17 17:35:42 +0000
commitdcc11f4043636395cfefc1b2511ac46aa58abcbf (patch)
tree3de5a4196772d8292f0bbde9a86c63d16f6dd580
parent67c8cdac99bc119c074bbacfa1d60108ec7a076d (diff)
downloadpoi-dcc11f4043636395cfefc1b2511ac46aa58abcbf.tar.gz
poi-dcc11f4043636395cfefc1b2511ac46aa58abcbf.zip
Bugzilla 51803: fixed HSLF TextExtractor to extract content from master slide
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1203295 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java17
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java13
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java8
-rw-r--r--test-data/slideshow/master_text.pptbin71168 -> 117760 bytes
5 files changed, 26 insertions, 13 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index ee49e67d78..ace9d11642 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta5" date="2011-??-??">
+ <action dev="poi-developers" type="fix">51803 - fixed HSLF TextExtractor to extract content from master slide </action>
<action dev="poi-developers" type="fix">52190 - null check on XWPF setFontFamily</action>
<action dev="poi-developers" type="fix">52062 - ensure that temporary files in SXSSF are deleted</action>
<action dev="poi-developers" type="fix">50936 - Exception parsing MS Word 8.0 file (as duplicate of 47958)</action>
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
index 8a195ac258..6610cde2ce 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
@@ -221,7 +221,22 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
if (getSlideText) {
if (getMasterText) {
for (SlideMaster master : _show.getSlidesMasters()) {
- textRunsToText(ret, master.getTextRuns());
+ for(Shape sh : master.getShapes()){
+ if(sh instanceof TextShape){
+ if(MasterSheet.isPlaceholder(sh)) {
+ // don't bother about boiler
+ // plate text on master
+ // sheets
+ continue;
+ }
+ TextShape tsh = (TextShape)sh;
+ String text = tsh.getText();
+ ret.append(text);
+ if (!text.endsWith("\n")) {
+ ret.append("\n");
+ }
+ }
+ }
}
}
diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
index e524665395..7850d774b1 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
@@ -24,7 +24,6 @@ import org.apache.poi.hslf.usermodel.SlideShow;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-import java.util.Vector;
import java.awt.*;
/**
@@ -122,7 +121,7 @@ public abstract class Sheet {
* For a given PPDrawing, grab all the TextRuns
*/
public static TextRun[] findTextRuns(PPDrawing ppdrawing) {
- Vector runsV = new Vector();
+ final List<TextRun> runsV = new ArrayList<TextRun>();
EscherTextboxWrapper[] wrappers = ppdrawing.getTextboxWrappers();
for (int i = 0; i < wrappers.length; i++) {
int s1 = runsV.size();
@@ -132,15 +131,11 @@ public abstract class Sheet {
findTextRuns(wrappers[i].getChildRecords(), runsV);
int s2 = runsV.size();
if (s2 != s1){
- TextRun t = (TextRun) runsV.get(runsV.size()-1);
+ TextRun t = runsV.get(runsV.size()-1);
t.setShapeId(wrappers[i].getShapeId());
}
}
- TextRun[] runs = new TextRun[runsV.size()];
- for (int i = 0; i < runs.length; i++) {
- runs[i] = (TextRun) runsV.get(i);
- }
- return runs;
+ return runsV.toArray(new TextRun[runsV.size()]);
}
/**
@@ -151,7 +146,7 @@ public abstract class Sheet {
* @param records the records to build from
* @param found vector to add any found to
*/
- protected static void findTextRuns(Record[] records, Vector found) {
+ protected static void findTextRuns(Record[] records, List<TextRun> found) {
// Look for a TextHeaderAtom
for (int i = 0, slwtIndex=0; i < (records.length - 1); i++) {
if (records[i] instanceof TextHeaderAtom) {
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
index c457c9c8c4..56eb0a0d9e 100644
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -292,13 +292,15 @@ public final class TestExtractor extends TestCase {
// Initially not there
String text = ppe.getText();
- assertFalse(text.contains("Master Header Text"));
+ assertFalse(text.contains("Text that I added to the master slide"));
// Enable, shows up
ppe.setMasterByDefault(true);
text = ppe.getText();
- assertTrue(text.contains("Master Header Text"));
-
+ assertTrue(text.contains("Text that I added to the master slide"));
+
+ // Make sure placeholder text does not come out
+ assertFalse(text.contains("Click to edit Master"));
// Now with another file only containing master text
// Will always show up
diff --git a/test-data/slideshow/master_text.ppt b/test-data/slideshow/master_text.ppt
index a748e8b217..cdcf4bc005 100644
--- a/test-data/slideshow/master_text.ppt
+++ b/test-data/slideshow/master_text.ppt
Binary files differ