diff options
author | Yegor Kozlov <yegor@apache.org> | 2009-07-05 13:15:41 +0000 |
---|---|---|
committer | Yegor Kozlov <yegor@apache.org> | 2009-07-05 13:15:41 +0000 |
commit | e71804ae01b27d9c64f6e7c477cd137a2cdd798b (patch) | |
tree | a606d68cd1d34efd66c04e692e0d6c96f25f90b0 /src/scratchpad | |
parent | bb85e52d3de852f8c706722316e8476fb3dc95a3 (diff) | |
download | poi-e71804ae01b27d9c64f6e7c477cd137a2cdd798b.tar.gz poi-e71804ae01b27d9c64f6e7c477cd137a2cdd798b.zip |
Support for getting OLE object data in PowerPointExtractor, see Bugzilla 47456
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@791241 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r-- | src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java | 27 | ||||
-rw-r--r-- | src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java | 72 |
2 files changed, 49 insertions, 50 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index 5e6cad5100..a8dbfa2a93 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -21,14 +21,12 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.HashSet; +import java.util.List; +import java.util.ArrayList; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hslf.HSLFSlideShow; -import org.apache.poi.hslf.model.Comment; -import org.apache.poi.hslf.model.HeadersFooters; -import org.apache.poi.hslf.model.Notes; -import org.apache.poi.hslf.model.Slide; -import org.apache.poi.hslf.model.TextRun; +import org.apache.poi.hslf.model.*; import org.apache.poi.hslf.usermodel.SlideShow; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -151,7 +149,24 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor { return getText(false,true); } - /** + public List<OLEShape> getOLEShapes() { + List<OLEShape> list = new ArrayList<OLEShape>(); + + for (int i = 0; i < _slides.length; i++) { + Slide slide = _slides[i]; + + Shape[] shapes = slide.getShapes(); + for (int j = 0; j < shapes.length; j++) { + if (shapes[j] instanceof OLEShape) { + list.add((OLEShape) shapes[j]); + } + } + } + + return list; + } + + /** * Fetches text from the slideshow, be it slide text or note text. * Because the final block of text in a TextRun normally have their * last \n stripped, we add it back diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index 9c213d4773..769638bf02 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -18,11 +18,16 @@ package org.apache.poi.hslf.extractor; import java.io.FileInputStream; +import java.io.InputStream; +import java.util.List; import org.apache.poi.hslf.HSLFSlideShow; +import org.apache.poi.hslf.model.OLEShape; import org.apache.poi.hslf.usermodel.SlideShow; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hwpf.HWPFDocument; import junit.framework.TestCase; @@ -167,51 +172,30 @@ public final class TestExtractor extends TestCase { /** * A powerpoint file with embeded powerpoint files - * TODO - figure out how to handle this, as ppt - * appears to embed not as ole2 streams */ - public void DISABLEDtestExtractFromOwnEmbeded() throws Exception { - String filename3 = pdirname + "/ppt_with_embeded.ppt"; - POIFSFileSystem fs = new POIFSFileSystem( - new FileInputStream(filename3) - ); - HSLFSlideShow ss; - - DirectoryNode dirA = (DirectoryNode) - fs.getRoot().getEntry("MBD0000A3B6"); - DirectoryNode dirB = (DirectoryNode) - fs.getRoot().getEntry("MBD0000A3B3"); - - assertNotNull(dirA.getEntry("PowerPoint Document")); - assertNotNull(dirB.getEntry("PowerPoint Document")); - - // Check the first file - ss = new HSLFSlideShow(dirA, fs); - ppe = new PowerPointExtractor(ss); - assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n", - ppe.getText(true, false) - ); - - // And the second - ss = new HSLFSlideShow(dirB, fs); - ppe = new PowerPointExtractor(ss); - assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n", - ppe.getText(true, false) - ); - - - // Check the master doc two ways - ss = new HSLFSlideShow(fs.getRoot(), fs); - ppe = new PowerPointExtractor(ss); - assertEquals("I have embeded files in me\n", - ppe.getText(true, false) - ); - - ss = new HSLFSlideShow(fs); - ppe = new PowerPointExtractor(ss); - assertEquals("I have embeded files in me\n", - ppe.getText(true, false) - ); + public void testExtractFromOwnEmbeded() throws Exception { + String path = pdirname + "/ppt_with_embeded.ppt"; + ppe = new PowerPointExtractor(path); + List<OLEShape> shapes = ppe.getOLEShapes(); + assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size()); + int num_ppt = 0, num_doc = 0, num_xls = 0; + for(OLEShape ole : shapes) { + String name = ole.getInstanceName(); + InputStream data = ole.getObjectData().getData(); + if ("Worksheet".equals(name)) { + HSSFWorkbook wb = new HSSFWorkbook(data); + num_xls++; + } else if ("Document".equals(name)) { + HWPFDocument doc = new HWPFDocument(data); + num_doc++; + } else if ("Presentation".equals(name)) { + num_ppt++; + SlideShow ppt = new SlideShow(data); + } + } + assertEquals("Expected 2 embedded Word Documents", 2, num_doc); + assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls); + assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt); } /** |