]> source.dussan.org Git - poi.git/commitdiff
Added code for an event model
authorSaid Ryan Ackley <sackley@apache.org>
Fri, 28 Jun 2002 21:13:25 +0000 (21:13 +0000)
committerSaid Ryan Ackley <sackley@apache.org>
Fri, 28 Jun 2002 21:13:25 +0000 (21:13 +0000)
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352732 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hdf/model/HDFObjectFactory.java
src/scratchpad/src/org/apache/poi/hdf/model/hdftypes/FormattedDiskPage.java
src/scratchpad/src/org/apache/poi/hdf/model/hdftypes/PlexOfCps.java

index 66b51304454fc581ff765568a1064daaf2f17b12..5e51ce8ac0b2d1ab41f0f953180e318b2be99201 100644 (file)
@@ -64,12 +64,16 @@ package org.apache.poi.hdf.model;
 
 import java.util.ArrayList;
 import java.io.InputStream;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.List;
 import java.util.TreeSet;
 
 
 import org.apache.poi.hdf.model.hdftypes.*;
+import org.apache.poi.hdf.event.HDFLowLevelParsingListener;
+import org.apache.poi.hdf.model.util.BTreeSet;
+import org.apache.poi.hdf.model.util.ParsingState;
 
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.poifs.filesystem.POIFSDocument;
@@ -84,7 +88,7 @@ import org.apache.poi.util.LittleEndian;
  * that represent the data.
  * @author  andy
  */
-public class HDFObjectFactory
+public class HDFObjectFactory implements HDFLowLevelParsingListener
 {
 
     /** OLE stuff*/
@@ -99,16 +103,22 @@ public class HDFObjectFactory
     private ListTables _listTables;
     /** Font info */
     private FontTable _fonts;
+    /** Used to set up the object model*/
+    private HDFLowLevelParsingListener _listener;
+
+    private ParsingState _charParsingState;
+
+    private ParsingState _parParsingState;
 
     /** text pieces */
     //BTreeSet _text = new BTreeSet();
-    TreeSet _text = new TreeSet();
+    BTreeSet _text = new BTreeSet();
     /** document sections */
-    TreeSet _sections = new TreeSet();
+    BTreeSet _sections = new BTreeSet();
     /** document paragraphs */
-    TreeSet _paragraphs = new TreeSet();
+    BTreeSet _paragraphs = new BTreeSet();
     /** document character runs */
-    TreeSet _characterRuns = new TreeSet();
+    BTreeSet _characterRuns = new BTreeSet();
 
     /** main document stream buffer*/
     byte[] _mainDocument;
@@ -116,14 +126,34 @@ public class HDFObjectFactory
     byte[] _tableBuffer;
 
 
-
+    public static void main(String args[])
+    {
+      try
+      {
+        HDFObjectFactory f = new HDFObjectFactory(new FileInputStream("c:\\test.doc"));
+        int k = 0;
+      }
+      catch(Throwable t)
+      {
+        t.printStackTrace();
+      }
+    }
     /** Creates a new instance of HDFObjectFactory
      *
      * @param istream The InputStream that is the Word document
      *
      */
-    public HDFObjectFactory(InputStream istream) throws IOException
+    protected HDFObjectFactory(InputStream istream, HDFLowLevelParsingListener l) throws IOException
     {
+        if (l == null)
+        {
+            _listener = this;
+        }
+        else
+        {
+            _listener = l;
+        }
+
         //do Ole stuff
         _filesystem = new POIFSFileSystem(istream);
 
@@ -139,6 +169,20 @@ public class HDFObjectFactory
         initTextPieces();
         initFormattingProperties();
 
+
+    }
+
+
+
+
+    /** Creates a new instance of HDFObjectFactory
+     *
+     * @param istream The InputStream that is the Word document
+     *
+     */
+    public HDFObjectFactory(InputStream istream) throws IOException
+    {
+        this(istream, null);
     }
 
     public static List getTypes(InputStream istream) throws IOException
@@ -156,14 +200,31 @@ public class HDFObjectFactory
 
         FileInformationBlock fib = new FileInformationBlock(mainDocument);
 
-       // initTableStream();
-       // initTextPieces();
-       // initFormattingProperties();
 
         results.add(fib);
         return results;
     }
 
+    public void document(DocumentProperties dop)
+    {
+      _dop = dop;
+    }
+    public void section(SepxNode sepx)
+    {
+      _sections.add(sepx);
+    }
+    public void paragraph(PapxNode papx)
+    {
+      _paragraphs.add(papx);
+    }
+    public void characterRun(ChpxNode chpx)
+    {
+      _characterRuns.add(chpx);
+    }
+    public void text(TextPiece t)
+    {
+      _text.add(t);
+    }
     /**
      * Initializes the table stream
      *
@@ -234,7 +295,7 @@ public class HDFObjectFactory
                                 LittleEndian.getInt(_tableBuffer, pos + (x * 4));
 
                 TextPiece piece = new TextPiece(filePos, totLength, unicode);
-                _text.add(piece);
+                _listener.text(piece);
 
             }
 
@@ -252,18 +313,64 @@ public class HDFObjectFactory
 
         initDocumentProperties();
         initSectionProperties();
-        initCharacterProperties();
+        //initCharacterProperties();
         initParagraphProperties();
     }
+    private void initCharacterProperties(int charOffset, PlexOfCps charPlcf, int start, int end)
+    {
+    }
+    private void initParagraphProperties(int parOffset, PlexOfCps parPlcf, int end)
+    {
+        //Initialize character property stuff
+        int currentParPage = _parParsingState.getCurrentPage();
+        int parPlcfLen = parPlcf.length();
+        int currentPageIndex = _parParsingState.getCurrentPageIndex();
+        //byte[] fkp = new byte[512];
+        //System.arraycopy(_mainDocument, (currentCharPage * 512), fkp, 0, 512);
+        FormattedDiskPage fkp = _parParsingState.getFkp();
+        int currentPapxIndex = _parParsingState.getCurrentPropIndex();
+        int currentArraySize = fkp.size();
+
+        do
+        {
+          if (currentPapxIndex < currentArraySize)
+          {
+            int parStart = fkp.getStart(currentPapxIndex);
+            int parEnd = fkp.getEnd(currentPapxIndex);
+            byte[] papx = fkp.getGrpprl(currentPapxIndex);
+            _listener.paragraph(new PapxNode(parStart, parEnd, papx));
+            if (parEnd < end)
+            {
+              currentPapxIndex++;
+            }
+            else
+            {
+              //save the state
+              _parParsingState.setState(currentParPage, currentPageIndex, fkp, currentPapxIndex);
+              break;
+            }
+          }
+          else
+          {
+            currentParPage = LittleEndian.getInt(_tableBuffer, parOffset + parPlcf.getStructOffset(++currentPageIndex));
+            byte byteFkp[] = new byte[512];
+            System.arraycopy(_mainDocument, (currentParPage * 512), fkp, 0, 512);
+            fkp = new PAPFormattedDiskPage(byteFkp);
+            currentPapxIndex = 0;
+            currentArraySize = fkp.size();
+          }
+        }
+        while(currentParPage <= parPlcfLen + 1);
+    }
     /**
      * initializes the CharacterProperties BTree
      */
-    private void initCharacterProperties()
+    /*private void initCharacterProperties()
     {
         int charOffset = _fib.getFcPlcfbteChpx();
         int charPlcSize = _fib.getLcbPlcfbteChpx();
 
-        int arraySize = (charPlcSize - 4)/8;
+        //int arraySize = (charPlcSize - 4)/8;
 
         //first we must go through the bin table and find the fkps
         for(int x = 0; x < arraySize; x++)
@@ -302,46 +409,90 @@ public class HDFObjectFactory
             }
 
         }
-    }
+    }*/
     /**
      * intializes the Paragraph Properties BTree
      */
     private void initParagraphProperties()
     {
-        //find paragraphs
+        //paragraphs
         int parOffset = _fib.getFcPlcfbtePapx();
         int parPlcSize = _fib.getLcbPlcfbtePapx();
 
-        int arraySize = (parPlcSize - 4)/8;
+        //characters
+        int charOffset = _fib.getFcPlcfbteChpx();
+        int charPlcSize = _fib.getLcbPlcfbteChpx();
+
+        PlexOfCps charPlcf = new PlexOfCps(charPlcSize, 4);
+        PlexOfCps parPlcf = new PlexOfCps(parPlcSize, 4);
+
+        //Initialize character property stuff
+        int currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(0));
+        int charPlcfLen = charPlcf.length();
+        int currentPageIndex = 0;
+        byte[] fkp = new byte[512];
+        System.arraycopy(_mainDocument, (currentCharPage * 512), fkp, 0, 512);
+        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(fkp);
+        int currentChpxIndex = 0;
+        int currentArraySize = cfkp.size();
+
+
+        int arraySize = parPlcf.length();
+
         //first we must go through the bin table and find the fkps
         for(int x = 0; x < arraySize; x++)
         {
-            int PN = LittleEndian.getInt(_tableBuffer, parOffset + (4 * (arraySize + 1) + (4 * x)));
+            int PN = LittleEndian.getInt(_tableBuffer, parOffset + parPlcf.getStructOffset(x));
 
-            byte[] fkp = new byte[512];
+            fkp = new byte[512];
             System.arraycopy(_mainDocument, (PN * 512), fkp, 0, 512);
+
+            PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(fkp);
             //take each fkp and get the paps
-            int crun = LittleEndian.getUnsignedByte(fkp, 511);
+            int crun = pfkp.size();
             for(int y = 0; y < crun; y++)
             {
                 //get the beginning fc of each paragraph text run
-                int fcStart = LittleEndian.getInt(fkp, y * 4);
-                int fcEnd = LittleEndian.getInt(fkp, (y+1) * 4);
-                //get the offset in fkp of the papx for this paragraph
-                int papxOffset = 2 * LittleEndian.getUnsignedByte(fkp, ((crun + 1) * 4) + (y * 13));
-                int size = 2 * LittleEndian.getUnsignedByte(fkp, papxOffset);
-                if(size == 0)
-                {
-                    size = 2 * LittleEndian.getUnsignedByte(fkp, ++papxOffset);
-                }
-                else
+                int fcStart = pfkp.getStart(y);
+                int fcEnd = pfkp.getEnd(y);
+
+                //get the papx for this paragraph
+                byte[] papx = pfkp.getGrpprl(y);
+
+                _listener.paragraph(new PapxNode(fcStart, fcEnd, papx));
+
+                //get the character runs for this paragraph
+                int charStart = 0;
+                int charEnd = 0;
+                //add the character runs
+                do
                 {
-                    size--;
+                  if (currentChpxIndex < currentArraySize)
+                  {
+                    charStart = cfkp.getStart(currentChpxIndex);
+                    charEnd = cfkp.getEnd(currentChpxIndex);
+                    byte[] chpx = cfkp.getGrpprl(currentChpxIndex);
+                    _listener.characterRun(new ChpxNode(charStart, charEnd, chpx));
+                    if (charEnd < fcEnd)
+                    {
+                      currentChpxIndex++;
+                    }
+                    else
+                    {
+                      break;
+                    }
+                  }
+                  else
+                  {
+                    currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(++currentPageIndex));
+                    fkp = new byte[512];
+                    System.arraycopy(_mainDocument, (currentCharPage * 512), fkp, 0, 512);
+                    cfkp = new CHPFormattedDiskPage(fkp);
+                    currentChpxIndex = 0;
+                    currentArraySize = cfkp.size();
+                  }
                 }
-
-                byte[] papx = new byte[size];
-                System.arraycopy(fkp, ++papxOffset, papx, 0, size);
-                _paragraphs.add(new PapxNode(fcStart, fcEnd, papx));
+                while(currentCharPage <= charPlcfLen + 1);
 
             }
 
@@ -353,22 +504,38 @@ public class HDFObjectFactory
      */
     private void initSectionProperties()
     {
-      //find sections
+
+
+      //sections
       int fcMin = _fib.getFcMin();
       int plcfsedFC = _fib.getFcPlcfsed();
       int plcfsedSize = _fib.getLcbPlcfsed();
-      byte[] plcfsed = new byte[plcfsedSize];
-      System.arraycopy(_tableBuffer, plcfsedFC, plcfsed, 0, plcfsedSize);
 
-      int arraySize = (plcfsedSize - 4)/16;
+      //paragraphs
+      int parOffset = _fib.getFcPlcfbtePapx();
+      int parPlcSize = _fib.getLcbPlcfbtePapx();
+
+      //characters
+      int charOffset = _fib.getFcPlcfbteChpx();
+      int charPlcSize = _fib.getLcbPlcfbteChpx();
+
+      PlexOfCps charPlcf = new PlexOfCps(charPlcSize, 4);
+      PlexOfCps parPlcf = new PlexOfCps(parPlcSize, 4);
+
+      //byte[] plcfsed = new byte[plcfsedSize];
+      //System.arraycopy(_tableBuffer, plcfsedFC, plcfsed, 0, plcfsedSize);
+
+      PlexOfCps plcfsed = new PlexOfCps(plcfsedSize, 12);
+      int arraySize = plcfsed.length();
 
 
       for(int x = 0; x < arraySize; x++)
       {
-          int sectionStart = LittleEndian.getInt(plcfsed, x * 4) + fcMin;
-          int sectionEnd = LittleEndian.getInt(plcfsed, (x+1) * 4) + fcMin;
-          int sepxStart = LittleEndian.getInt(plcfsed, 4 * (arraySize + 1) + (x * 12) + 2);
+          int sectionStart = LittleEndian.getInt(_tableBuffer, plcfsed.getIntOffset(x)) + fcMin;
+          int sectionEnd = LittleEndian.getInt(_tableBuffer, plcfsed.getIntOffset(x + 1)) + fcMin;
+          int sepxStart = LittleEndian.getInt(_tableBuffer, plcfsed.getStructOffset(x));
           int sepxSize = LittleEndian.getShort(_mainDocument, sepxStart);
+
           byte[] sepx = new byte[sepxSize];
           System.arraycopy(_mainDocument, sepxStart + 2, sepx, 0, sepxSize);
           SepxNode node = new SepxNode(x + 1, sectionStart, sectionEnd, sepx);
@@ -385,7 +552,7 @@ public class HDFObjectFactory
         byte[] dopArray = new byte[size];
 
         System.arraycopy(_tableBuffer, pos, dopArray, 0, size);
-        _dop = new DocumentProperties(dopArray);
+        _listener.document(new DocumentProperties(dopArray));
     }
     /**
      * Uncompresses the StyleSheet from file into memory.
index ca45da42360cafaa48738f26088abec79e8f4c27..ac2a9e036877124aba2916d758574a9b3a03e9ed 100644 (file)
@@ -87,6 +87,7 @@ public abstract class FormattedDiskPage
     public FormattedDiskPage(byte[] fkp)
     {
         _crun = LittleEndian.getUnsignedByte(fkp, 511);
+        _fkp = fkp;
     }
     /**
      * Used to get a text offset corresponding to a grpprl in this fkp.
index ad220c58792d8eeb3212a88db965dd9c6eca56ea..ac48f0b07ca867c9c09126a54d91f45e11e15acf 100644 (file)
@@ -81,9 +81,12 @@ public class PlexOfCps
     public PlexOfCps(int size, int sizeOfStruct)
     {
         _count = (size - 4)/(4 + sizeOfStruct);
-        _offset = offset;
         _sizeOfStruct = sizeOfStruct;
     }
+    public int getIntOffset(int index)
+    {
+      return index * 4;
+    }
     /**
      * returns the number of data structures in this PlexOfCps.
      *