From a7a35871dba2af8a15780d31ca799d5448033fef Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sat, 1 Jan 2011 05:42:36 +0000 Subject: [PATCH] Tweak HSLF and HWPF to work well with NPOIFS, and add unit tests for this git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1054191 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hslf/EncryptedSlideShow.java | 56 +++++++++---------- .../org/apache/poi/hslf/HSLFSlideShow.java | 8 +++ .../poi/hslf/extractor/TestExtractor.java | 45 ++++++++++++--- .../poi/hwpf/extractor/TestWordExtractor.java | 27 +++++++++ 4 files changed, 98 insertions(+), 38 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java index 00152134f6..d8839241cc 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java @@ -25,7 +25,6 @@ import org.apache.poi.hslf.record.DocumentEncryptionAtom; import org.apache.poi.hslf.record.PersistPtrHolder; import org.apache.poi.hslf.record.Record; import org.apache.poi.hslf.record.UserEditAtom; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * This class provides helper functions for determining if a @@ -39,34 +38,33 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; public final class EncryptedSlideShow { - /** - * Check to see if a HSLFSlideShow represents an encrypted - * PowerPoint document, or not - * @param hss The HSLFSlideShow to check - * @return true if encrypted, otherwise false - */ - public static boolean checkIfEncrypted(HSLFSlideShow hss) { - // Easy way to check - contains a stream - // "EncryptedSummary" - POIFSFileSystem fs = hss.getPOIFSFileSystem(); - try { - fs.getRoot().getEntry("EncryptedSummary"); - return true; - } catch(FileNotFoundException fnfe) { - // Doesn't have encrypted properties - } - - // If they encrypted the document but not the properties, - // it's harder. - // We need to see what the last record pointed to by the - // first PersistPrtHolder is - if it's a - // DocumentEncryptionAtom, then the file's Encrypted - DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss); - if(dea != null) { - return true; - } - return false; - } + /** + * Check to see if a HSLFSlideShow represents an encrypted + * PowerPoint document, or not + * @param hss The HSLFSlideShow to check + * @return true if encrypted, otherwise false + */ + public static boolean checkIfEncrypted(HSLFSlideShow hss) { + // Easy way to check - contains a stream + // "EncryptedSummary" + try { + hss.getPOIFSDirectory().getEntry("EncryptedSummary"); + return true; + } catch(FileNotFoundException fnfe) { + // Doesn't have encrypted properties + } + + // If they encrypted the document but not the properties, + // it's harder. + // We need to see what the last record pointed to by the + // first PersistPrtHolder is - if it's a + // DocumentEncryptionAtom, then the file's Encrypted + DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss); + if(dea != null) { + return true; + } + return false; + } /** * Return the DocumentEncryptionAtom for a HSLFSlideShow, or diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java index a317b29142..22a3b8abaa 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -84,6 +84,14 @@ public final class HSLFSlideShow extends POIDocument { return directory.getFileSystem(); } + /** + * Returns the directory in the underlying POIFSFileSystem for the + * document that is open. + */ + protected DirectoryNode getPOIFSDirectory() { + return directory; + } + /** * Constructs a Powerpoint document from fileName. Parses the document * and places all the important stuff into data structures. diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index be1a3dcb0c..3c7bcafda5 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -17,20 +17,20 @@ package org.apache.poi.hslf.extractor; -import java.io.FileInputStream; import java.io.InputStream; import java.util.List; +import junit.framework.TestCase; + +import org.apache.poi.POIDataSamples; import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.model.OLEShape; import org.apache.poi.hslf.usermodel.SlideShow; -import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.HWPFDocument; -import org.apache.poi.POIDataSamples; - -import junit.framework.TestCase; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * Tests that the extractor correctly gets the text out of our sample file @@ -40,8 +40,13 @@ import junit.framework.TestCase; public final class TestExtractor extends TestCase { /** Extractor primed on the 2 page basic test data */ private PowerPointExtractor ppe; + private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"; + /** Extractor primed on the 1 page but text-box'd test data */ private PowerPointExtractor ppe2; + private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n"; + + /** Where our embeded files live */ //private String pdirname; private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); @@ -55,16 +60,14 @@ public final class TestExtractor extends TestCase { public void testReadSheetText() { // Basic 2 page example String sheetText = ppe.getText(); - String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"; ensureTwoStringsTheSame(expectText, sheetText); // 1 page example with text boxes sheetText = ppe2.getText(); - expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n"; - ensureTwoStringsTheSame(expectText, sheetText); + ensureTwoStringsTheSame(expectText2, sheetText); } public void testReadNoteText() { @@ -273,4 +276,28 @@ public final class TestExtractor extends TestCase { assertTrue(text.contains("Master Header Text")); } + + /** + * Tests that we can work with both {@link POIFSFileSystem} + * and {@link NPOIFSFileSystem} + */ + public void testDifferentPOIFS() throws Exception { + // Open the two filesystems + DirectoryNode[] files = new DirectoryNode[2]; + files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot(); + files[1] = (new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt"))).getRoot(); + + // Open directly + for(DirectoryNode dir : files) { + PowerPointExtractor extractor = new PowerPointExtractor(dir, null); + assertEquals(expectText, extractor.getText()); + } + + // Open via a HWPFDocument + for(DirectoryNode dir : files) { + HSLFSlideShow slideshow = new HSLFSlideShow(dir); + PowerPointExtractor extractor = new PowerPointExtractor(slideshow); + assertEquals(expectText, extractor.getText()); + } + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java index 1ef21db3a5..b1d565d00c 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java @@ -24,6 +24,7 @@ import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFTestDataSamples; import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** @@ -314,4 +315,30 @@ public final class TestWordExtractor extends TestCase { assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435")); } + + /** + * Tests that we can work with both {@link POIFSFileSystem} + * and {@link NPOIFSFileSystem} + */ + public void testDifferentPOIFS() throws Exception { + POIDataSamples docTests = POIDataSamples.getDocumentInstance(); + + // Open the two filesystems + DirectoryNode[] files = new DirectoryNode[2]; + files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot(); + files[1] = (new NPOIFSFileSystem(docTests.getFile("test2.doc"))).getRoot(); + + // Open directly + for(DirectoryNode dir : files) { + WordExtractor extractor = new WordExtractor(dir, null); + assertEquals(p_text1_block, extractor.getText()); + } + + // Open via a HWPFDocument + for(DirectoryNode dir : files) { + HWPFDocument doc = new HWPFDocument(dir); + WordExtractor extractor = new WordExtractor(doc); + assertEquals(p_text1_block, extractor.getText()); + } + } } -- 2.39.5