Tweak HSLF and HWPF to work well with NPOIFS, and add unit tests for this

author Nick Burch <nick@apache.org>

Sat, 1 Jan 2011 05:42:36 +0000 (05:42 +0000)

committer Nick Burch <nick@apache.org>

Sat, 1 Jan 2011 05:42:36 +0000 (05:42 +0000)
author Nick Burch <nick@apache.org>
Sat, 1 Jan 2011 05:42:36 +0000 (05:42 +0000)
committer Nick Burch <nick@apache.org>
Sat, 1 Jan 2011 05:42:36 +0000 (05:42 +0000)
diff --git a/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java

index 00152134f693a0fb2a0587f0383902648b53d5bf..d8839241ccb94c4a804d61bc36117a4c7fede6cd 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java
@@ -25,7 +25,6 @@ import org.apache.poi.hslf.record.DocumentEncryptionAtom;
  import org.apache.poi.hslf.record.PersistPtrHolder;
  import org.apache.poi.hslf.record.Record;
  import org.apache.poi.hslf.record.UserEditAtom;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  
  /**
   * This class provides helper functions for determining if a
@@ -39,34 +38,33 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  
  public final class EncryptedSlideShow
  {
-       /**
-        * Check to see if a HSLFSlideShow represents an encrypted
-        *  PowerPoint document, or not
-        * @param hss The HSLFSlideShow to check
-        * @return true if encrypted, otherwise false
-        */
-       public static boolean checkIfEncrypted(HSLFSlideShow hss) {
-               // Easy way to check - contains a stream
-               //  "EncryptedSummary"
-               POIFSFileSystem fs = hss.getPOIFSFileSystem();
-               try {
-                       fs.getRoot().getEntry("EncryptedSummary");
-                       return true;
-               } catch(FileNotFoundException fnfe) {
-                       // Doesn't have encrypted properties
-               }
-
-               // If they encrypted the document but not the properties,
-               //  it's harder.
-               // We need to see what the last record pointed to by the
-               //  first PersistPrtHolder is - if it's a
-               //  DocumentEncryptionAtom, then the file's Encrypted
-               DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss);
-               if(dea != null) {
-                       return true;
-               }
-               return false;
-       }
+   /**
+    * Check to see if a HSLFSlideShow represents an encrypted
+    *  PowerPoint document, or not
+    * @param hss The HSLFSlideShow to check
+    * @return true if encrypted, otherwise false
+    */
+   public static boolean checkIfEncrypted(HSLFSlideShow hss) {
+      // Easy way to check - contains a stream
+      //  "EncryptedSummary"
+      try {
+         hss.getPOIFSDirectory().getEntry("EncryptedSummary");
+         return true;
+      } catch(FileNotFoundException fnfe) {
+         // Doesn't have encrypted properties
+      }
+
+      // If they encrypted the document but not the properties,
+      //  it's harder.
+      // We need to see what the last record pointed to by the
+      //  first PersistPrtHolder is - if it's a
+      //  DocumentEncryptionAtom, then the file's Encrypted
+      DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss);
+      if(dea != null) {
+         return true;
+      }
+      return false;
+   }
  
         /**
          * Return the DocumentEncryptionAtom for a HSLFSlideShow, or
diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java

index a317b2914222172bf6879ab0e2f5a1cb5835b24f..22a3b8abaa418519d90b4a89718232ba0f7206dc 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
@@ -84,6 +84,14 @@ public final class HSLFSlideShow extends POIDocument {
                 return directory.getFileSystem();
         }
  
+   /**
+    * Returns the directory in the underlying POIFSFileSystem for the 
+    *  document that is open.
+    */
+   protected DirectoryNode getPOIFSDirectory() {
+      return directory;
+   }
+
         /**
          * Constructs a Powerpoint document from fileName. Parses the document
          * and places all the important stuff into data structures.
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

index be1a3dcb0cc8b5fddf70e6624ba09a2ab53fadbd..3c7bcafda5806ec3ce73f7566944e37332a42b71 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -17,20 +17,20 @@
  
  package org.apache.poi.hslf.extractor;
  
-import java.io.FileInputStream;
  import java.io.InputStream;
  import java.util.List;
  
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
  import org.apache.poi.hslf.HSLFSlideShow;
  import org.apache.poi.hslf.model.OLEShape;
  import org.apache.poi.hslf.usermodel.SlideShow;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.POIDataSamples;
-
-import junit.framework.TestCase;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  
  /**
   * Tests that the extractor correctly gets the text out of our sample file
@@ -40,8 +40,13 @@ import junit.framework.TestCase;
  public final class TestExtractor extends TestCase {
         /** Extractor primed on the 2 page basic test data */
         private PowerPointExtractor ppe;
+   private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
+
         /** Extractor primed on the 1 page but text-box'd test data */
         private PowerPointExtractor ppe2;
+       private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
+       
+       
         /** Where our embeded files live */
         //private String pdirname;
      private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
@@ -55,16 +60,14 @@ public final class TestExtractor extends TestCase {
      public void testReadSheetText() {
         // Basic 2 page example
                 String sheetText = ppe.getText();
-               String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
  
                 ensureTwoStringsTheSame(expectText, sheetText);
                 
                 
                 // 1 page example with text boxes
                 sheetText = ppe2.getText();
-               expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
  
-               ensureTwoStringsTheSame(expectText, sheetText);
+               ensureTwoStringsTheSame(expectText2, sheetText);
      }
      
         public void testReadNoteText() {
@@ -273,4 +276,28 @@ public final class TestExtractor extends TestCase {
                 assertTrue(text.contains("Master Header Text"));
      }
  
+    
+    /**
+     * Tests that we can work with both {@link POIFSFileSystem}
+     *  and {@link NPOIFSFileSystem}
+     */
+    public void testDifferentPOIFS() throws Exception {
+       // Open the two filesystems
+       DirectoryNode[] files = new DirectoryNode[2];
+       files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot();
+       files[1] = (new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt"))).getRoot();
+       
+       // Open directly 
+       for(DirectoryNode dir : files) {
+          PowerPointExtractor extractor = new PowerPointExtractor(dir, null);
+          assertEquals(expectText, extractor.getText());
+       }
+
+       // Open via a HWPFDocument
+       for(DirectoryNode dir : files) {
+          HSLFSlideShow slideshow = new HSLFSlideShow(dir);
+          PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
+          assertEquals(expectText, extractor.getText());
+       }
+    }
  }
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java

index 1ef21db3a5a445270eb93b98b7d170da965ed6d8..b1d565d00cbe3c13467642a62d9f111fc2148d07 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
@@ -24,6 +24,7 @@ import org.apache.poi.hwpf.HWPFDocument;
  import org.apache.poi.hwpf.HWPFTestDataSamples;
  import org.apache.poi.hwpf.OldWordFileFormatException;
  import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
  import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  
  /**
@@ -314,4 +315,30 @@ public final class TestWordExtractor extends TestCase {
  
          assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
      }
+    
+    /**
+     * Tests that we can work with both {@link POIFSFileSystem}
+     *  and {@link NPOIFSFileSystem}
+     */
+    public void testDifferentPOIFS() throws Exception {
+       POIDataSamples docTests = POIDataSamples.getDocumentInstance();
+       
+       // Open the two filesystems
+       DirectoryNode[] files = new DirectoryNode[2];
+       files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot();
+       files[1] = (new NPOIFSFileSystem(docTests.getFile("test2.doc"))).getRoot();
+       
+       // Open directly 
+       for(DirectoryNode dir : files) {
+          WordExtractor extractor = new WordExtractor(dir, null);
+          assertEquals(p_text1_block, extractor.getText());
+       }
+
+       // Open via a HWPFDocument
+       for(DirectoryNode dir : files) {
+          HWPFDocument doc = new HWPFDocument(dir);
+          WordExtractor extractor = new WordExtractor(doc);
+          assertEquals(p_text1_block, extractor.getText());
+       }
+    }
  }
author	Nick Burch <nick@apache.org>
	Sat, 1 Jan 2011 05:42:36 +0000 (05:42 +0000)
committer	Nick Burch <nick@apache.org>
	Sat, 1 Jan 2011 05:42:36 +0000 (05:42 +0000)
src/scratchpad/src/org/apache/poi/hslf/EncryptedSlideShow.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java		patch \| blob \| history