]> source.dussan.org Git - poi.git/commitdiff
add initial support for fast-saved files
authorSergey Vladimirov <sergey@apache.org>
Tue, 12 Jul 2011 00:40:27 +0000 (00:40 +0000)
committerSergey Vladimirov <sergey@apache.org>
Tue, 12 Jul 2011 00:40:27 +0000 (00:40 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145410 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java
src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java

index b8a9892f9de5d99d32ce1908dc13794713fe0d38..e317b24fc81ed07917f38487c3e1f4faa90a038e 100644 (file)
@@ -216,8 +216,8 @@ public final class HWPFDocument extends HWPFDocumentCore
 
     // Now load the rest of the properties, which need to be adjusted
     //  for where text really begin
-    _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
-    _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
+    _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
+    _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
 
     // Read FSPA and Escher information
     _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
index 75abad1f0ee906286faf393d5966032afb78396e..ae1b9736ba699288ad2abf4f613810e5db9cf818 100644 (file)
@@ -29,6 +29,8 @@ import java.util.Set;
 import org.apache.poi.hwpf.model.io.HWPFFileSystem;
 import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 import org.apache.poi.hwpf.sprm.SprmBuffer;
+import org.apache.poi.hwpf.sprm.SprmIterator;
+import org.apache.poi.hwpf.sprm.SprmOperation;
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.POILogFactory;
@@ -58,20 +60,21 @@ public class CHPBinTable
      * Constructor used to read a binTable in from a Word document.
      * 
      * @deprecated Use
-     *             {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)}
+     *             {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
      *             instead
      */
     public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
             int size, int fcMin, TextPieceTable tpt )
     {
-        this( documentStream, tableStream, offset, size, tpt, true );
+        this( documentStream, tableStream, offset, size, null, tpt, true );
     }
 
     /**
      * Constructor used to read a binTable in from a Word document.
      */
     public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
-            int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces )
+            int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
+            boolean ignoreChpxWithoutTextPieces )
     {
         /*
          * Page 35:
@@ -105,6 +108,58 @@ public class CHPBinTable
       }
     }
 
+        if ( complexFileTable != null )
+        {
+            SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
+
+            // adding CHPX from fast-saved SPRMs
+            for ( TextPiece textPiece : tpt.getTextPieces() )
+            {
+                PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
+                if ( !prm.isComplex() )
+                    continue;
+                int igrpprl = prm.getIgrpprl();
+
+                if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
+                {
+                    logger.log( POILogger.WARN, textPiece
+                            + "'s PRM references to unknown grpprl" );
+                    continue;
+                }
+
+                boolean hasChp = false;
+                SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
+                for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
+                        .hasNext(); )
+                {
+                    SprmOperation sprmOperation = iterator.next();
+                    if ( sprmOperation.getType() == SprmOperation.TYPE_CHP )
+                    {
+                        hasChp = true;
+                        break;
+                    }
+                }
+
+                if ( hasChp )
+                {
+                    SprmBuffer newSprmBuffer;
+                    try
+                    {
+                        newSprmBuffer = (SprmBuffer) sprmBuffer.clone();
+                    }
+                    catch ( CloneNotSupportedException e )
+                    {
+                        // shall not happen
+                        throw new Error( e );
+                    }
+
+                    CHPX chpx = new CHPX( textPiece.getStart(),
+                            textPiece.getEnd(), newSprmBuffer );
+                    _textRuns.add( chpx );
+                }
+            }
+        }
+
         // rebuild document paragraphs structure
         StringBuilder docText = new StringBuilder();
         for ( TextPiece textPiece : tpt.getTextPieces() )
index ffc62d0fa7c8f167e8bac63b6c47f3fc52a5db00..d679c03e95c2f94564ec4576f1124ee8b59dbb96 100644 (file)
 package org.apache.poi.hwpf.model;
 
 import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
 
+import org.apache.poi.hwpf.model.io.HWPFFileSystem;
+import org.apache.poi.hwpf.model.io.HWPFOutputStream;
+import org.apache.poi.hwpf.sprm.SprmBuffer;
 import org.apache.poi.util.LittleEndian;
-import org.apache.poi.hwpf.model.io.*;
 
 public final class ComplexFileTable
 {
@@ -30,6 +34,8 @@ public final class ComplexFileTable
 
   protected TextPieceTable _tpt;
 
+  private SprmBuffer[] _grpprls;
+  
   public ComplexFileTable()
   {
     _tpt = new TextPieceTable();
@@ -39,12 +45,20 @@ public final class ComplexFileTable
   {
     //skips through the prms before we reach the piece table. These contain data
     //for actual fast saved files
-    while (tableStream[offset] == GRPPRL_TYPE)
-    {
-      offset++;
-      int skip = LittleEndian.getShort(tableStream, offset);
-      offset += LittleEndian.SHORT_SIZE + skip;
-    }
+        List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
+        while ( tableStream[offset] == GRPPRL_TYPE )
+        {
+            offset++;
+            int size = LittleEndian.getShort( tableStream, offset );
+            offset += LittleEndian.SHORT_SIZE;
+            byte[] bs = LittleEndian.getByteArray( tableStream, offset, size );
+            offset += size;
+
+            SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 );
+            sprmBuffers.add( sprmBuffer );
+        }
+        this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] );
+
     if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
     {
       throw new IOException("The text piece table is corrupted");
@@ -59,6 +73,11 @@ public final class ComplexFileTable
     return _tpt;
   }
 
+    public SprmBuffer[] getGrpprls()
+    {
+        return _grpprls;
+    }
+
   public void writeTo(HWPFFileSystem sys)
     throws IOException
   {
index 645ea14f6b94eeb6e16dafbd70da5e7df28cb860..7627b2e0b47d6b1e3fbf39ebdc6fdf53147d47db 100644 (file)
@@ -26,6 +26,8 @@ import java.util.List;
 import org.apache.poi.hwpf.model.io.HWPFFileSystem;
 import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 import org.apache.poi.hwpf.sprm.SprmBuffer;
+import org.apache.poi.hwpf.sprm.SprmIterator;
+import org.apache.poi.hwpf.sprm.SprmOperation;
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.POILogFactory;
@@ -62,12 +64,12 @@ public class PAPBinTable
             byte[] dataStream, int offset, int size, int fcMin,
             TextPieceTable tpt )
     {
-        this( documentStream, tableStream, dataStream, offset, size, tpt, true );
+        this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
     }
 
     public PAPBinTable( byte[] documentStream, byte[] tableStream,
-            byte[] dataStream, int offset, int size, TextPieceTable tpt,
-            boolean ignorePapxWithoutTextPieces )
+            byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
+            TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
     {
     PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
     this.tpt = tpt;
@@ -94,6 +96,50 @@ public class PAPBinTable
       }
     }
 
+        if ( complexFileTable != null )
+        {
+            SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
+
+            // adding CHPX from fast-saved SPRMs
+            for ( TextPiece textPiece : tpt.getTextPieces() )
+            {
+                PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
+                if ( !prm.isComplex() )
+                    continue;
+                int igrpprl = prm.getIgrpprl();
+
+                if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
+                {
+                    logger.log( POILogger.WARN, textPiece
+                            + "'s PRM references to unknown grpprl" );
+                    continue;
+                }
+
+                boolean hasPap = false;
+                SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
+                for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
+                        .hasNext(); )
+                {
+                    SprmOperation sprmOperation = iterator.next();
+                    if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
+                    {
+                        hasPap = true;
+                        break;
+                    }
+                }
+
+                if ( hasPap )
+                {
+                    SprmBuffer newSprmBuffer = new SprmBuffer(2);
+                    newSprmBuffer.append( sprmBuffer.toByteArray() );
+
+                    PAPX papx = new PAPX( textPiece.getStart(),
+                            textPiece.getEnd(), newSprmBuffer, dataStream );
+                    _paragraphs.add( papx );
+                }
+            }
+        }
+
         // rebuild document paragraphs structure
         StringBuilder docText = new StringBuilder();
         for ( TextPiece textPiece : tpt.getTextPieces() )
@@ -144,11 +190,6 @@ public class PAPBinTable
                     if ( fChar == 13 || fChar == 7 || fChar == 12 )
                         break;
                 }
-                // if ( papx.getStart() <= charIndex && charIndex <
-                // papx.getEnd() )
-                // {
-                // papxs.add( papx );
-                // }
             }
 
             if ( papxs.size() == 0 )
@@ -178,10 +219,21 @@ public class PAPBinTable
                 }
             }
 
-            SprmBuffer sprmBuffer = new SprmBuffer( 2 );
+            SprmBuffer sprmBuffer = null;
             for ( PAPX papx : papxs )
             {
-                sprmBuffer.append( papx.getGrpprl(), 2 );
+                if ( sprmBuffer == null )
+                    try
+                    {
+                        sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone();
+                    }
+                    catch ( CloneNotSupportedException e )
+                    {
+                        // can't happen
+                        throw new Error( e );
+                    }
+                else
+                    sprmBuffer.append( papx.getGrpprl(), 2 );
             }
             PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
                     dataStream );
index 4e357b38f40880119ff074375f23701d2eed8655..05fff84f5c558407b2e835520d23f2658e6a170b 100644 (file)
@@ -19,8 +19,10 @@ package org.apache.poi.hwpf.model;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
+import org.apache.poi.hwpf.sprm.SprmBuffer;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
@@ -90,7 +92,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
                         Integer.valueOf( startAt ), "; ",
                         Integer.valueOf( endAt ),
                         ") (bytes) doesn't have corresponding text pieces "
-                                + "and will be skipped" );
+                                + "and will be skipped\n\tSkipped SPRM: "
+                                + new SprmBuffer( getGrpprl( x ), 2 ) );
                 _papxList.add( null );
                 continue;
             }
@@ -150,6 +153,11 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
       return _papxList.get(index);
     }
 
+    public List<PAPX> getPAPXs()
+    {
+        return Collections.unmodifiableList( _papxList );
+    }
+
     /**
      * Gets the papx grpprl for the paragraph at index in this fkp.
      *
index e2aeeb46cb47ca15a03931883606e915853dd757..38700f7907cda618001a6e19a1c8c1a15dcbc572 100644 (file)
@@ -46,7 +46,7 @@ public final class TestCHPBinTable
     byte[] tableStream = _hWPFDocFixture._tableStream;
     int fcMin = fib.getFcMin();
 
-    _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false);
+    _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
 
     HWPFFileSystem fileSys = new HWPFFileSystem();
 
@@ -57,7 +57,7 @@ public final class TestCHPBinTable
     byte[] newTableStream = tableOut.toByteArray();
     byte[] newMainStream = mainOut.toByteArray();
 
-    CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false);
+    CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
 
     ArrayList oldTextRuns = _cHPBinTable._textRuns;
     ArrayList newTextRuns = newBinTable._textRuns;
index 8bd102a16c6e9edd9bb2f6f0d494b9692eae1cb2..e34d563acbea4c6a0116491ac75aad34a3d43245 100644 (file)
@@ -40,7 +40,7 @@ public final class TestPAPBinTable
     byte[] mainStream = _hWPFDocFixture._mainStream;
     byte[] tableStream = _hWPFDocFixture._tableStream;
 
-    _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
+    _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false);
 
     HWPFFileSystem fileSys = new HWPFFileSystem();
 
@@ -51,7 +51,7 @@ public final class TestPAPBinTable
     byte[] newTableStream = tableOut.toByteArray();
     byte[] newMainStream = mainOut.toByteArray();
 
-    PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
+    PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false);
 
     ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
     ArrayList newTextRuns = newBinTable.getParagraphs();