summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYegor Kozlov <yegor@apache.org>2009-11-12 07:07:41 +0000
committerYegor Kozlov <yegor@apache.org>2009-11-12 07:07:41 +0000
commita53a2ef1f8ed53f574b7688cf6035dde62cc554d (patch)
tree3782665efe0b739453718e57767f31b94de8eb5a
parent6233bdeb73c13d465bd6e860eff88b7b114933a7 (diff)
downloadpoi-a53a2ef1f8ed53f574b7688cf6035dde62cc554d.tar.gz
poi-a53a2ef1f8ed53f574b7688cf6035dde62cc554d.zip
support for text extraction from PPT master slides, see Bugzilla 48161
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@835271 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java69
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java14
-rw-r--r--test-data/slideshow/master_text.pptbin0 -> 71168 bytes
4 files changed, 53 insertions, 31 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 7725c909d2..c26a9fb685 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.6-beta1" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
<action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
<action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
<action dev="POI-DEVELOPERS" type="fix">48087 - avoid NPE in XSSFChartSheet when calling methods of the superclass</action>
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
index 97c7d71f6a..05ade6ab9e 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
@@ -45,6 +45,7 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
private boolean _slidesByDefault = true;
private boolean _notesByDefault = false;
private boolean _commentsByDefault = false;
+ private boolean _masterByDefault = false;
/**
* Basic extractor. Returns all the text, and optionally all the notes
@@ -58,6 +59,8 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
boolean notes = false;
boolean comments = false;
+ boolean master = true;
+
String file;
if (args.length > 1) {
notes = true;
@@ -70,7 +73,7 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
}
PowerPointExtractor ppe = new PowerPointExtractor(file);
- System.out.println(ppe.getText(true, notes, comments));
+ System.out.println(ppe.getText(true, notes, comments, master));
}
/**
@@ -137,12 +140,19 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
this._commentsByDefault = commentsByDefault;
}
+ /**
+ * Should a call to getText() return text from master? Default is no
+ */
+ public void setMasterByDefault(boolean masterByDefault) {
+ this._masterByDefault = masterByDefault;
+ }
+
/**
* Fetches all the slide text from the slideshow, but not the notes, unless
* you've called setSlidesByDefault() and setNotesByDefault() to change this
*/
public String getText() {
- return getText(_slidesByDefault, _notesByDefault, _commentsByDefault);
+ return getText(_slidesByDefault, _notesByDefault, _commentsByDefault, _masterByDefault);
}
/**
@@ -178,14 +188,20 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
* @param getNoteText fetch note text
*/
public String getText(boolean getSlideText, boolean getNoteText) {
- return getText(getSlideText, getNoteText, _commentsByDefault);
+ return getText(getSlideText, getNoteText, _commentsByDefault, _masterByDefault);
}
- public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) {
+ public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
StringBuffer ret = new StringBuffer();
if (getSlideText) {
- for (int i = 0; i < _slides.length; i++) {
+ if (getMasterText) {
+ for (SlideMaster master : _show.getSlidesMasters()) {
+ textRunsToText(ret, master.getTextRuns());
+ }
+ }
+
+ for (int i = 0; i < _slides.length; i++) {
Slide slide = _slides[i];
// Slide header, if set
@@ -195,19 +211,9 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
}
// Slide text
- TextRun[] runs = slide.getTextRuns();
- for (int j = 0; j < runs.length; j++) {
- TextRun run = runs[j];
- if (run != null) {
- String text = run.getText();
- ret.append(text);
- if (!text.endsWith("\n")) {
- ret.append("\n");
- }
- }
- }
+ textRunsToText(ret, slide.getTextRuns());
- // Slide footer, if set
+ // Slide footer, if set
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n");
}
@@ -249,17 +255,7 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
}
// Notes text
- TextRun[] runs = notes.getTextRuns();
- if (runs != null && runs.length > 0) {
- for (int j = 0; j < runs.length; j++) {
- TextRun run = runs[j];
- String text = run.getText();
- ret.append(text);
- if (!text.endsWith("\n")) {
- ret.append("\n");
- }
- }
- }
+ textRunsToText(ret, notes.getTextRuns());
// Repeat the notes footer, if set
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
@@ -270,4 +266,21 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
return ret.toString();
}
+
+ private void textRunsToText(StringBuffer ret, TextRun[] runs) {
+ if (runs==null) {
+ return;
+ }
+
+ for (int j = 0; j < runs.length; j++) {
+ TextRun run = runs[j];
+ if (run != null) {
+ String text = run.getText();
+ ret.append(text);
+ if (!text.endsWith("\n")) {
+ ret.append("\n");
+ }
+ }
+ }
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
index 66316be7df..016a7d0f58 100644
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -48,7 +48,6 @@ public final class TestExtractor extends TestCase {
//private String pdirname;
protected void setUp() throws Exception {
-
ppe = new PowerPointExtractor(slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
ppe2 = new PowerPointExtractor(slTests.openResourceAsStream("with_textbox.ppt"));
}
@@ -63,7 +62,7 @@ public final class TestExtractor extends TestCase {
// 1 page example with text boxes
sheetText = ppe2.getText();
- expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
+ expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
ensureTwoStringsTheSame(expectText, sheetText);
}
@@ -112,7 +111,7 @@ public final class TestExtractor extends TestCase {
*/
public void testMissingCoreRecords() throws Exception {
ppe = new PowerPointExtractor(slTests.openResourceAsStream("missing_core_records.ppt"));
-
+
String text = ppe.getText(true, false);
String nText = ppe.getNotes();
@@ -265,4 +264,13 @@ public final class TestExtractor extends TestCase {
private static boolean contains(String text, String searchString) {
return text.indexOf(searchString) >=0;
}
+
+ public void testMasterText() throws Exception {
+ ppe = new PowerPointExtractor(slTests.openResourceAsStream("master_text.ppt"));
+ ppe.setMasterByDefault(true);
+
+ String text = ppe.getText();
+ assertTrue(text.contains("Master Header Text"));
+ }
+
}
diff --git a/test-data/slideshow/master_text.ppt b/test-data/slideshow/master_text.ppt
new file mode 100644
index 0000000000..a748e8b217
--- /dev/null
+++ b/test-data/slideshow/master_text.ppt
Binary files differ