aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad
diff options
context:
space:
mode:
authorYegor Kozlov <yegor@apache.org>2009-07-05 13:15:41 +0000
committerYegor Kozlov <yegor@apache.org>2009-07-05 13:15:41 +0000
commite71804ae01b27d9c64f6e7c477cd137a2cdd798b (patch)
treea606d68cd1d34efd66c04e692e0d6c96f25f90b0 /src/scratchpad
parentbb85e52d3de852f8c706722316e8476fb3dc95a3 (diff)
downloadpoi-e71804ae01b27d9c64f6e7c477cd137a2cdd798b.tar.gz
poi-e71804ae01b27d9c64f6e7c477cd137a2cdd798b.zip
Support for getting OLE object data in PowerPointExtractor, see Bugzilla 47456
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@791241 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java27
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java72
2 files changed, 49 insertions, 50 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
index 5e6cad5100..a8dbfa2a93 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
@@ -21,14 +21,12 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hslf.HSLFSlideShow;
-import org.apache.poi.hslf.model.Comment;
-import org.apache.poi.hslf.model.HeadersFooters;
-import org.apache.poi.hslf.model.Notes;
-import org.apache.poi.hslf.model.Slide;
-import org.apache.poi.hslf.model.TextRun;
+import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -151,7 +149,24 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
return getText(false,true);
}
- /**
+ public List<OLEShape> getOLEShapes() {
+ List<OLEShape> list = new ArrayList<OLEShape>();
+
+ for (int i = 0; i < _slides.length; i++) {
+ Slide slide = _slides[i];
+
+ Shape[] shapes = slide.getShapes();
+ for (int j = 0; j < shapes.length; j++) {
+ if (shapes[j] instanceof OLEShape) {
+ list.add((OLEShape) shapes[j]);
+ }
+ }
+ }
+
+ return list;
+ }
+
+ /**
* Fetches text from the slideshow, be it slide text or note text.
* Because the final block of text in a TextRun normally have their
* last \n stripped, we add it back
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
index 9c213d4773..769638bf02 100644
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -18,11 +18,16 @@
package org.apache.poi.hslf.extractor;
import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.List;
import org.apache.poi.hslf.HSLFSlideShow;
+import org.apache.poi.hslf.model.OLEShape;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hwpf.HWPFDocument;
import junit.framework.TestCase;
@@ -167,51 +172,30 @@ public final class TestExtractor extends TestCase {
/**
* A powerpoint file with embeded powerpoint files
- * TODO - figure out how to handle this, as ppt
- * appears to embed not as ole2 streams
*/
- public void DISABLEDtestExtractFromOwnEmbeded() throws Exception {
- String filename3 = pdirname + "/ppt_with_embeded.ppt";
- POIFSFileSystem fs = new POIFSFileSystem(
- new FileInputStream(filename3)
- );
- HSLFSlideShow ss;
-
- DirectoryNode dirA = (DirectoryNode)
- fs.getRoot().getEntry("MBD0000A3B6");
- DirectoryNode dirB = (DirectoryNode)
- fs.getRoot().getEntry("MBD0000A3B3");
-
- assertNotNull(dirA.getEntry("PowerPoint Document"));
- assertNotNull(dirB.getEntry("PowerPoint Document"));
-
- // Check the first file
- ss = new HSLFSlideShow(dirA, fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
- ppe.getText(true, false)
- );
-
- // And the second
- ss = new HSLFSlideShow(dirB, fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
- ppe.getText(true, false)
- );
-
-
- // Check the master doc two ways
- ss = new HSLFSlideShow(fs.getRoot(), fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("I have embeded files in me\n",
- ppe.getText(true, false)
- );
-
- ss = new HSLFSlideShow(fs);
- ppe = new PowerPointExtractor(ss);
- assertEquals("I have embeded files in me\n",
- ppe.getText(true, false)
- );
+ public void testExtractFromOwnEmbeded() throws Exception {
+ String path = pdirname + "/ppt_with_embeded.ppt";
+ ppe = new PowerPointExtractor(path);
+ List<OLEShape> shapes = ppe.getOLEShapes();
+ assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size());
+ int num_ppt = 0, num_doc = 0, num_xls = 0;
+ for(OLEShape ole : shapes) {
+ String name = ole.getInstanceName();
+ InputStream data = ole.getObjectData().getData();
+ if ("Worksheet".equals(name)) {
+ HSSFWorkbook wb = new HSSFWorkbook(data);
+ num_xls++;
+ } else if ("Document".equals(name)) {
+ HWPFDocument doc = new HWPFDocument(data);
+ num_doc++;
+ } else if ("Presentation".equals(name)) {
+ num_ppt++;
+ SlideShow ppt = new SlideShow(data);
+ }
+ }
+ assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
+ assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
+ assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
}
/**