]> source.dussan.org Git - poi.git/commitdiff
Improve how POIFS works with directory entries, and update HWPFDocument to support...
authorNick Burch <nick@apache.org>
Thu, 10 Apr 2008 16:59:10 +0000 (16:59 +0000)
committerNick Burch <nick@apache.org>
Thu, 10 Apr 2008 16:59:10 +0000 (16:59 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@646870 13f79535-47bb-0310-9956-ffa450edef68

build.xml
src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/POIDocument.java
src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java
src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java
src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java
src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java

index b4e10a0085a912506d0a5fb970f79e290a7c207a..4201e8155eb8a362a42ea9586458f71fac023187 100644 (file)
--- a/build.xml
+++ b/build.xml
@@ -521,6 +521,8 @@ under the License.
         file="${main.src.test}/org/apache/poi/hwpf/data"/> 
       <sysproperty key="HPSF.testdata.path"
         file="${main.src.test}/org/apache/poi/hpsf/data"/> 
+      <sysproperty key="POIFS.testdata.path"
+        file="${main.src.test}/org/apache/poi/poifs/data"/> 
       <sysproperty key="java.awt.headless" value="true"/>
       <formatter type="plain"/>
       <formatter type="xml"/>
@@ -556,6 +558,8 @@ under the License.
         file="${main.src.test}/org/apache/poi/hpsf/data"/>
       <sysproperty key="HWPF.testdata.path"
         file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
+      <sysproperty key="POIFS.testdata.path"
+        file="${main.src.test}/org/apache/poi/poifs/data"/> 
       <sysproperty key="java.awt.headless" value="true"/>
       <formatter type="plain" usefile="no"/>
       <batchtest todir="${main.reports.test}">
@@ -585,6 +589,7 @@ under the License.
             <sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
             <sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
             <sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+            <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> 
                 <sysproperty key="java.awt.headless" value="true"/>
             <formatter type="plain" usefile="no"/>
             <formatter type="xml"/>
@@ -601,6 +606,7 @@ under the License.
             <classpath refid="test.classpath"/>
             <sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/>
             <sysproperty key="HPSF.testdata.path" file="${main.src.test}/org/apache/poi/hpsf/data"/>
+            <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> 
             <sysproperty key="java.awt.headless" value="true"/>
             <formatter type="plain" usefile="no"/>
             <test name="${testcase}"/>
@@ -639,6 +645,7 @@ under the License.
             <sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
             <sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
             <sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+            <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> 
             <sysproperty key="java.awt.headless" value="true"/>
             <formatter type="plain"/>
             <formatter type="xml"/>
@@ -673,6 +680,7 @@ under the License.
                 <sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
                 <sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
                 <sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+                <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> 
                    <sysproperty key="java.awt.headless" value="true"/>
                        <sysproperty key="java.awt.headless" value="true"/>
                    <formatter type="plain" usefile="no"/>
index c3b1f72e846fb934b6eb1de9e21c0787cb6a4a22..2bfb46d0e2ef36ac8a24e0c283dc28a4ec34a632 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.0.3-beta1" date="2008-04-??">
+           <action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
            <action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
            <action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
            <action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
index 2ceb4ea9e568a0cdb993f3226b6121b9a714a985..9d50f53edb48ab809f91efa2c6394aeeb394ce7d 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.0.3-beta1" date="2008-04-??">
+           <action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
            <action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
            <action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
            <action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
index 075fa4538118f8533704fe1ed220ef4a9626e946..01e50231ce0df556b07fb471e7bd508406190449 100644 (file)
@@ -29,6 +29,7 @@ import org.apache.poi.hpsf.PropertySet;
 import org.apache.poi.hpsf.PropertySetFactory;
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.DocumentInputStream;
 import org.apache.poi.poifs.filesystem.Entry;
@@ -50,12 +51,23 @@ public abstract class POIDocument {
        protected DocumentSummaryInformation dsInf;
        /** The open POIFS FileSystem that contains our document */
        protected POIFSFileSystem filesystem;
+       /**     The directory that our document lives in */
+       protected DirectoryNode directory;
        
        /** For our own logging use */
        protected POILogger logger = POILogFactory.getLogger(this.getClass());
 
     /* Have the property streams been read yet? (Only done on-demand) */
     protected boolean initialized = false;
+    
+
+    protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) {
+       this.filesystem = fs;
+       this.directory = dir;
+    }
+    protected POIDocument(POIFSFileSystem fs) {
+       this(fs.getRoot(), fs);
+    }
 
        /**
         * Fetch the Document Summary Information of the document
@@ -110,7 +122,7 @@ public abstract class POIDocument {
                DocumentInputStream dis;
                try {
                        // Find the entry, and get an input stream for it
-                       dis = filesystem.createDocumentInputStream(setName);
+                       dis = directory.createDocumentInputStream(setName);
                } catch(IOException ie) {
                        // Oh well, doesn't exist
                        logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie);
index 3838e634d92ed52088713d741056e30307c079ae..1eb4f7c8cfe116d6a86e61b631129581453665be 100644 (file)
@@ -139,6 +139,7 @@ public class HSSFWorkbook extends POIDocument
 
     protected HSSFWorkbook( Workbook book )
     {
+       super(null, null);
         workbook = book;
         sheets = new ArrayList( INITIAL_CAPACITY );
         names = new ArrayList( INITIAL_CAPACITY );
@@ -164,8 +165,8 @@ public class HSSFWorkbook extends POIDocument
     public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes)
             throws IOException
     {
+       super(fs);
         this.preserveNodes = preserveNodes;
-        this.filesystem = fs;
         
         // If we're not preserving nodes, don't track the
         //  POIFS any more
index cb8039033db2deaae90fcb16cfd8a4d4d7b90159..6805e5197b3a23187f26631396a90dd770cc97f7 100644 (file)
@@ -105,6 +105,31 @@ public class DirectoryNode
     {
         return _path;
     }
+    
+    /**
+     * open a document in the directory's entry's list of entries
+     *
+     * @param documentName the name of the document to be opened
+     *
+     * @return a newly opened DocumentInputStream
+     *
+     * @exception IOException if the document does not exist or the
+     *            name is that of a DirectoryEntry
+     */
+
+    public DocumentInputStream createDocumentInputStream(
+            final String documentName)
+        throws IOException
+    {
+        Entry document = getEntry(documentName);
+
+        if (!document.isDocumentEntry())
+        {
+            throw new IOException("Entry '" + documentName
+                                  + "' is not a DocumentEntry");
+        }
+        return new DocumentInputStream(( DocumentEntry ) document);
+    }
 
     /**
      * create a new DocumentEntry
index 61774dc6768e8a599a20196fa561218104578843..7c693a5de8663fac31d6f3caab3fe0b5532d1ab2 100644 (file)
@@ -287,7 +287,7 @@ public class POIFSFileSystem
     {
         return getRoot().createDirectory(name);
     }
-
+    
     /**
      * Write the filesystem out
      *
@@ -422,7 +422,7 @@ public class POIFSFileSystem
      * @return the root entry
      */
 
-    public DirectoryEntry getRoot()
+    public DirectoryNode getRoot()
     {
         if (_root == null)
         {
@@ -446,14 +446,7 @@ public class POIFSFileSystem
             final String documentName)
         throws IOException
     {
-        Entry document = getRoot().getEntry(documentName);
-
-        if (!document.isDocumentEntry())
-        {
-            throw new IOException("Entry '" + documentName
-                                  + "' is not a DocumentEntry");
-        }
-        return new DocumentInputStream(( DocumentEntry ) document);
+       return getRoot().createDocumentInputStream(documentName);
     }
 
     /**
index 955cbc5ab463db57e6f0078d442519134047f85c..af66163072e673f36f1a925c53d7624cff95f1ee 100644 (file)
@@ -53,7 +53,7 @@ public class HDGFDiagram extends POIDocument {
        private PointerFactory ptrFactory;
        
        public HDGFDiagram(POIFSFileSystem fs) throws IOException {
-               filesystem = fs;
+               super(fs);
                
                DocumentEntry docProps =
                        (DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
index 12afcc49f622846ce268afb11c228bee2c96f231..2c523c70a6a48ba1bafece0e57361c71530e3fcd 100644 (file)
@@ -124,7 +124,7 @@ public class HSLFSlideShow extends POIDocument
         */
        public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
        {
-               this.filesystem = filesystem;
+               super(filesystem);
                
                // First up, grab the "Current User" stream
                // We need this before we can detect Encrypted Documents
index 557060aa50bb8f511970ef2be8b7f86b047c67b6..a54e50de43c618c5a7d5f82507a0c88488c03d07 100644 (file)
@@ -29,6 +29,7 @@ import java.io.ByteArrayInputStream;
 import java.util.Iterator;
 
 import org.apache.poi.POIDocument;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.common.POIFSConstants;
@@ -95,7 +96,7 @@ public class HWPFDocument extends POIDocument
 
   protected HWPFDocument()
   {
-
+     super(null, null);
   }
 
   /**
@@ -132,7 +133,7 @@ public class HWPFDocument extends POIDocument
     //do Ole stuff
     this( verifyAndBuildPOIFS(istream) );
   }
-
+  
   /**
    * This constructor loads a Word document from a POIFSFileSystem
    *
@@ -141,16 +142,31 @@ public class HWPFDocument extends POIDocument
    *         in POIFSFileSystem.
    */
   public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
+  {
+       this(pfilesystem.getRoot(), pfilesystem);
+  }
+  
+  /**
+   * This constructor loads a Word document from a specific point
+   *  in a POIFSFileSystem, probably not the default.
+   * Used typically to open embeded documents.
+   *
+   * @param pfilesystem The POIFSFileSystem that contains the Word document.
+   * @throws IOException If there is an unexpected IOException from the passed
+   *         in POIFSFileSystem.
+   */
+  public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
   {
     // Sort out the hpsf properties
-    filesystem = pfilesystem;
+       super(directory, pfilesystem);
     readProperties();
     
     // read in the main stream.
-    DocumentEntry documentProps =
-       (DocumentEntry)filesystem.getRoot().getEntry("WordDocument");
+    DocumentEntry documentProps = (DocumentEntry)
+       directory.getEntry("WordDocument");
     _mainStream = new byte[documentProps.getSize()];
-    filesystem.createDocumentInputStream("WordDocument").read(_mainStream);
+    
+    directory.createDocumentInputStream("WordDocument").read(_mainStream);
 
     // use the fib to determine the name of the table stream.
     _fib = new FileInformationBlock(_mainStream);
@@ -165,14 +181,14 @@ public class HWPFDocument extends POIDocument
     DocumentEntry tableProps;
        try {
                tableProps =
-                       (DocumentEntry)filesystem.getRoot().getEntry(name);
+                       (DocumentEntry)directory.getEntry(name);
        } catch(FileNotFoundException fnfe) {
                throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
        }
 
     // read in the table stream.
     _tableStream = new byte[tableProps.getSize()];
-    filesystem.createDocumentInputStream(name).read(_tableStream);
+    directory.createDocumentInputStream(name).read(_tableStream);
 
     _fib.fillVariableFields(_mainStream, _tableStream);
 
@@ -180,7 +196,7 @@ public class HWPFDocument extends POIDocument
     try
     {
       DocumentEntry dataProps =
-          (DocumentEntry) filesystem.getRoot().getEntry("Data");
+          (DocumentEntry)directory.getEntry("Data");
       _dataStream = new byte[dataProps.getSize()];
       filesystem.createDocumentInputStream("Data").read(_dataStream);
     }
index cda33675f2627b766203f0d68b04eab289d091a9..c78ccfa32306a9386cf44d1858db57ca272b88dc 100644 (file)
@@ -23,6 +23,8 @@ import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.model.TextPiece;
 import org.apache.poi.hwpf.usermodel.Paragraph;
 import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
 import junit.framework.TestCase;
 
@@ -54,12 +56,16 @@ public class TestWordExtractor extends TestCase {
        private WordExtractor extractor;
        // Corrupted document - can't do paragraph based stuff
        private WordExtractor extractor2;
+       // A word doc embeded in an excel file
+       private String filename3;
        
     protected void setUp() throws Exception {
                String dirname = System.getProperty("HWPF.testdata.path");
+               String pdirname = System.getProperty("POIFS.testdata.path");
                
                String filename = dirname + "/test2.doc";
                String filename2 = dirname + "/test.doc";
+               filename3 = pdirname + "/excel_with_embeded.xls";
                extractor = new WordExtractor(new FileInputStream(filename));
                extractor2 = new WordExtractor(new FileInputStream(filename2));
                
@@ -101,4 +107,25 @@ public class TestWordExtractor extends TestCase {
        String text = extractor.getTextFromPieces();
        assertEquals(p_text1_block, text);
     }
+    
+    
+    /**
+     * Test that we can get data from an
+     *  embeded word document
+     * @throws Exception
+     */
+    public void testExtractFromEmbeded() throws Exception {
+       POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename3));
+       DirectoryNode dir = (DirectoryNode)
+               fs.getRoot().getEntry("MBD03F25D8D");
+       // Should have WordDocument and 1Table
+       assertNotNull(dir.getEntry("1Table"));
+       assertNotNull(dir.getEntry("WordDocument"));
+       
+       HWPFDocument doc = new HWPFDocument(dir, fs);
+       WordExtractor extractor3 = new WordExtractor(doc);
+               
+       assertNotNull(extractor3.getText());
+       assertTrue(extractor3.getText().length() > 20);
+    }
 }