import org.apache.poi.hslf.record.PersistPtrHolder;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.UserEditAtom;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* This class provides helper functions for determining if a
public final class EncryptedSlideShow
{
- /**
- * Check to see if a HSLFSlideShow represents an encrypted
- * PowerPoint document, or not
- * @param hss The HSLFSlideShow to check
- * @return true if encrypted, otherwise false
- */
- public static boolean checkIfEncrypted(HSLFSlideShow hss) {
- // Easy way to check - contains a stream
- // "EncryptedSummary"
- POIFSFileSystem fs = hss.getPOIFSFileSystem();
- try {
- fs.getRoot().getEntry("EncryptedSummary");
- return true;
- } catch(FileNotFoundException fnfe) {
- // Doesn't have encrypted properties
- }
-
- // If they encrypted the document but not the properties,
- // it's harder.
- // We need to see what the last record pointed to by the
- // first PersistPrtHolder is - if it's a
- // DocumentEncryptionAtom, then the file's Encrypted
- DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss);
- if(dea != null) {
- return true;
- }
- return false;
- }
+ /**
+ * Check to see if a HSLFSlideShow represents an encrypted
+ * PowerPoint document, or not
+ * @param hss The HSLFSlideShow to check
+ * @return true if encrypted, otherwise false
+ */
+ public static boolean checkIfEncrypted(HSLFSlideShow hss) {
+ // Easy way to check - contains a stream
+ // "EncryptedSummary"
+ try {
+ hss.getPOIFSDirectory().getEntry("EncryptedSummary");
+ return true;
+ } catch(FileNotFoundException fnfe) {
+ // Doesn't have encrypted properties
+ }
+
+ // If they encrypted the document but not the properties,
+ // it's harder.
+ // We need to see what the last record pointed to by the
+ // first PersistPrtHolder is - if it's a
+ // DocumentEncryptionAtom, then the file's Encrypted
+ DocumentEncryptionAtom dea = fetchDocumentEncryptionAtom(hss);
+ if(dea != null) {
+ return true;
+ }
+ return false;
+ }
/**
* Return the DocumentEncryptionAtom for a HSLFSlideShow, or
return directory.getFileSystem();
}
+ /**
+ * Returns the directory in the underlying POIFSFileSystem for the
+ * document that is open.
+ */
+ protected DirectoryNode getPOIFSDirectory() {
+ return directory;
+ }
+
/**
* Constructs a Powerpoint document from fileName. Parses the document
* and places all the important stuff into data structures.
package org.apache.poi.hslf.extractor;
-import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.OLEShape;
import org.apache.poi.hslf.usermodel.SlideShow;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.POIDataSamples;
-
-import junit.framework.TestCase;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Tests that the extractor correctly gets the text out of our sample file
public final class TestExtractor extends TestCase {
/** Extractor primed on the 2 page basic test data */
private PowerPointExtractor ppe;
+ private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
+
/** Extractor primed on the 1 page but text-box'd test data */
private PowerPointExtractor ppe2;
+ private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
+
+
/** Where our embeded files live */
//private String pdirname;
private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
public void testReadSheetText() {
// Basic 2 page example
String sheetText = ppe.getText();
- String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
ensureTwoStringsTheSame(expectText, sheetText);
// 1 page example with text boxes
sheetText = ppe2.getText();
- expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
- ensureTwoStringsTheSame(expectText, sheetText);
+ ensureTwoStringsTheSame(expectText2, sheetText);
}
public void testReadNoteText() {
assertTrue(text.contains("Master Header Text"));
}
+
+ /**
+ * Tests that we can work with both {@link POIFSFileSystem}
+ * and {@link NPOIFSFileSystem}
+ */
+ public void testDifferentPOIFS() throws Exception {
+ // Open the two filesystems
+ DirectoryNode[] files = new DirectoryNode[2];
+ files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot();
+ files[1] = (new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt"))).getRoot();
+
+ // Open directly
+ for(DirectoryNode dir : files) {
+ PowerPointExtractor extractor = new PowerPointExtractor(dir, null);
+ assertEquals(expectText, extractor.getText());
+ }
+
+ // Open via a HWPFDocument
+ for(DirectoryNode dir : files) {
+ HSLFSlideShow slideshow = new HSLFSlideShow(dir);
+ PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
+ assertEquals(expectText, extractor.getText());
+ }
+ }
}
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
}
+
+ /**
+ * Tests that we can work with both {@link POIFSFileSystem}
+ * and {@link NPOIFSFileSystem}
+ */
+ public void testDifferentPOIFS() throws Exception {
+ POIDataSamples docTests = POIDataSamples.getDocumentInstance();
+
+ // Open the two filesystems
+ DirectoryNode[] files = new DirectoryNode[2];
+ files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot();
+ files[1] = (new NPOIFSFileSystem(docTests.getFile("test2.doc"))).getRoot();
+
+ // Open directly
+ for(DirectoryNode dir : files) {
+ WordExtractor extractor = new WordExtractor(dir, null);
+ assertEquals(p_text1_block, extractor.getText());
+ }
+
+ // Open via a HWPFDocument
+ for(DirectoryNode dir : files) {
+ HWPFDocument doc = new HWPFDocument(dir);
+ WordExtractor extractor = new WordExtractor(doc);
+ assertEquals(p_text1_block, extractor.getText());
+ }
+ }
}