]> source.dussan.org Git - poi.git/commitdiff
different workarounds for old Word format
authorSergey Vladimirov <sergey@apache.org>
Sun, 30 Oct 2011 08:59:16 +0000 (08:59 +0000)
committerSergey Vladimirov <sergey@apache.org>
Sun, 30 Oct 2011 08:59:16 +0000 (08:59 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1195133 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
src/scratchpad/src/org/apache/poi/hwpf/model/OldPAPBinTable.java
src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java

index 4efa858db6c5387220fd4fc13f62dc7da6ca4c5a..070f0660eedef6c48a5e254c2f91b2749c336830 100644 (file)
@@ -80,7 +80,7 @@ import org.apache.poi.util.Internal;
  */
 public final class HWPFDocument extends HWPFDocumentCore
 {
-    private static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
+    static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
     private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
 
     private static final String STREAM_DATA = "Data";
index 58fb26b1b3cf73aaa4f502f07b596a69a0ac000a..1ea906a282d9b120dc2003d61c795f3f5bb62494 100644 (file)
@@ -66,9 +66,10 @@ public class HWPFOldDocument extends HWPFDocumentCore {
         
         // We need to get hold of the text that makes up the
         //  document, which might be regular or fast-saved
+        ComplexFileTable cft = null;
         StringBuffer text = new StringBuffer();
         if(_fib.getFibBase().isFComplex()) {
-            ComplexFileTable cft = new ComplexFileTable(
+            cft = new ComplexFileTable(
                     _mainStream, _mainStream,
                     complexTableOffset, _fib.getFibBase().getFcMin()
             );
@@ -113,6 +114,27 @@ public class HWPFOldDocument extends HWPFDocumentCore {
                 _mainStream, sedTableOffset, sedTableSize,
                 _fib.getFibBase().getFcMin(), tpt
         );
+
+        /*
+         * in this mode we preserving PAPX/CHPX structure from file, so text may
+         * miss from output, and text order may be corrupted
+         */
+        boolean preserveBinTables = false;
+        try
+        {
+            preserveBinTables = Boolean.parseBoolean( System
+                    .getProperty( HWPFDocument.PROPERTY_PRESERVE_BIN_TABLES ) );
+        }
+        catch ( Exception exc )
+        {
+            // ignore;
+        }
+
+        if ( !preserveBinTables )
+        {
+            _cbt.rebuild( cft );
+            _pbt.rebuild( _text, cft );
+        }
     }
 
     public Range getOverallRange()
index 0a4ae67ae8aba46b2a0d8e88831233c95bb457b5..0f7c90d5291c5be028669963858138ea1a528fdf 100644 (file)
@@ -17,8 +17,6 @@
 
 package org.apache.poi.hwpf.model;
 
-import java.util.Collections;
-
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.Internal;
 import org.apache.poi.util.LittleEndian;
@@ -57,7 +55,5 @@ public final class OldPAPBinTable extends PAPBinTable
                     _paragraphs.add( papx );
             }
     }
-    Collections.sort( _paragraphs, PropertyNode.StartComparator.instance );
   }
 }
-
index 7ab0576d6a1f6077127163f7d918306b4a83c312..437ec974390b6740c414af29f13a46d887657209 100644 (file)
@@ -113,6 +113,12 @@ public class PAPBinTable
 
     public void rebuild( final StringBuilder docText,
             ComplexFileTable complexFileTable )
+    {
+        rebuild( docText, complexFileTable, _paragraphs );
+    }
+
+    static void rebuild( final StringBuilder docText,
+            ComplexFileTable complexFileTable, List<PAPX> paragraphs )
     {
         long start = System.currentTimeMillis();
 
@@ -156,19 +162,19 @@ public class PAPBinTable
 
                     PAPX papx = new PAPX( textPiece.getStart(),
                             textPiece.getEnd(), newSprmBuffer );
-                    _paragraphs.add( papx );
+                    paragraphs.add( papx );
                 }
             }
 
             logger.log( POILogger.DEBUG,
                     "Merged (?) with PAPX from complex file table in ",
                     Long.valueOf( System.currentTimeMillis() - start ),
-                    " ms (", Integer.valueOf( _paragraphs.size() ),
+                    " ms (", Integer.valueOf( paragraphs.size() ),
                     " elements in total)" );
             start = System.currentTimeMillis();
         }
 
-        List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs );
+        List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( paragraphs );
         Collections.sort( oldPapxSortedByEndPos,
                 PropertyNode.EndComparator.instance );
 
@@ -179,7 +185,7 @@ public class PAPBinTable
         final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
         {
             int counter = 0;
-            for ( PAPX papx : _paragraphs )
+            for ( PAPX papx : paragraphs )
             {
                 papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
             }
@@ -270,6 +276,9 @@ public class PAPBinTable
             SprmBuffer sprmBuffer = null;
             for ( PAPX papx : papxs )
             {
+                if ( papx.getGrpprl() == null || papx.getGrpprl().length == 0 )
+                    continue;
+
                 if ( sprmBuffer == null )
                     try
                     {
@@ -281,7 +290,9 @@ public class PAPBinTable
                         throw new Error( e );
                     }
                 else
+                {
                     sprmBuffer.append( papx.getGrpprl(), 2 );
+                }
             }
             PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer );
             newPapxs.add( newPapx );
@@ -289,11 +300,12 @@ public class PAPBinTable
             lastParStart = endExclusive;
             continue;
         }
-        this._paragraphs = new ArrayList<PAPX>( newPapxs );
+        paragraphs.clear();
+        paragraphs.addAll( newPapxs );
 
         logger.log( POILogger.DEBUG, "PAPX rebuilded from document text in ",
                 Long.valueOf( System.currentTimeMillis() - start ), " ms (",
-                Integer.valueOf( _paragraphs.size() ), " elements)" );
+                Integer.valueOf( paragraphs.size() ), " elements)" );
         start = System.currentTimeMillis();
     }
 
index 0f5e847dcce11e4946826395384a6c7e6046a6c5..bfaa8a9e3180bcfcee1aefd5544f93ca0a6ba242 100644 (file)
@@ -112,11 +112,17 @@ public final class PAPX extends BytePropertyNode<PAPX> {
 
   public byte[] getGrpprl()
   {
+      if (_buf == null)
+          return new byte[0];
+
     return ((SprmBuffer)_buf).toByteArray();
   }
 
-  public short getIstd()
-  {
+    public short getIstd()
+    {
+        if ( _buf == null )
+            return 0;
+
     byte[] buf = getGrpprl();
     if (buf.length == 0)
     {
index b690d400d7169b1cd781b08fb3c2407bc4e88add..4999f128a29a5c3e57b0773b80cc0a5963847c86 100644 (file)
@@ -1101,7 +1101,7 @@ public class Range { // TODO -instantiable superclass
         int endIndex = binarySearchEnd( rpl, startIndex, end );
         while ( endIndex < rpl.size() - 1
                 && rpl.get( endIndex + 1 ).getEnd() <= end )
-            endIndex--;
+            endIndex++;
 
         if ( startIndex < 0 || startIndex >= rpl.size()
                 || startIndex > endIndex || endIndex < 0
index 5aa15eeef7985a8b631af536f21c75ecd4da9c9e..7309f7f44b57c7eb17107fd25b6f704cd83af94a 100644 (file)
@@ -28,6 +28,8 @@ import java.util.List;
 
 import junit.framework.TestCase;
 
+import org.apache.poi.hwpf.converter.WordToTextConverter;
+
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwpf.HWPFDocument;
@@ -736,7 +738,8 @@ public class TestBugs extends TestCase
      */
     public void testBug51944() throws Exception
     {
-        HWPFTestDataSamples.openOldSampleFile( "Bug51944.doc" );
+        HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile( "Bug51944.doc" );
+        WordToTextConverter.getText( doc );
     }
 
     /**