replace ComplexFileTable with single-element-one right after load; replace text piece...

author Sergey Vladimirov <sergey@apache.org>

Mon, 25 Jul 2011 12:58:09 +0000 (12:58 +0000)

committer Sergey Vladimirov <sergey@apache.org>

Mon, 25 Jul 2011 12:58:09 +0000 (12:58 +0000)
author Sergey Vladimirov <sergey@apache.org>
Mon, 25 Jul 2011 12:58:09 +0000 (12:58 +0000)
committer Sergey Vladimirov <sergey@apache.org>
Mon, 25 Jul 2011 12:58:09 +0000 (12:58 +0000)
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java

index 1ce89377b752e38e6c662e5ff57254c48df7adc9..ffa1196f5c9786d9de76e9c8dc45dcba3f3cda77 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
@@ -23,8 +23,6 @@ import java.io.FileNotFoundException;
  import java.io.IOException;
  import java.io.InputStream;
  import java.io.OutputStream;
-import java.util.Iterator;
-import java.util.List;
  
  import org.apache.poi.hwpf.model.BookmarksTables;
  import org.apache.poi.hwpf.model.CHPBinTable;
@@ -40,6 +38,7 @@ import org.apache.poi.hwpf.model.NoteType;
  import org.apache.poi.hwpf.model.NotesTables;
  import org.apache.poi.hwpf.model.PAPBinTable;
  import org.apache.poi.hwpf.model.PicturesTable;
+import org.apache.poi.hwpf.model.PieceDescriptor;
  import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
  import org.apache.poi.hwpf.model.SavedByTable;
  import org.apache.poi.hwpf.model.SectionTable;
@@ -92,7 +91,7 @@ public final class HWPFDocument extends HWPFDocumentCore
    * structure*/
    protected ComplexFileTable _cft;
  
-  protected TextPieceTable _tpt;
+  protected final StringBuilder _text;
  
    /** Holds the save history for this document. */
    protected SavedByTable _sbt;
@@ -139,6 +138,7 @@ public final class HWPFDocument extends HWPFDocumentCore
    protected HWPFDocument()
    {
       super();
+     this._text = new StringBuilder("\r");
    }
  
    /**
@@ -246,15 +246,35 @@ public final class HWPFDocument extends HWPFDocumentCore
      // Start to load up our standard structures.
      _dop = new DocumentProperties(_tableStream, _fib.getFcDop());
      _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
-    _tpt = _cft.getTextPieceTable();
+    TextPieceTable _tpt = _cft.getTextPieceTable();
  
      // Now load the rest of the properties, which need to be adjusted
      //  for where text really begin
      _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
      _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt);
  
+        _text = _tpt.getText();
          _cbt.rebuild( _cft );
-        _pbt.rebuild( _dataStream, _cft );
+        _pbt.rebuild( _text, _dataStream, _cft );
+
+        boolean preserve = false;
+        try
+        {
+            preserve = Boolean.parseBoolean( System
+                    .getProperty( "org.apache.poi.hwpf.preserveTextTable" ) );
+        }
+        catch ( Exception exc )
+        {
+            // ignore;
+        }
+        if ( !preserve )
+        {
+            _cft = new ComplexFileTable();
+            _tpt = _cft.getTextPieceTable();
+            _tpt.add( new TextPiece( 0, _text.length(), _text.toString()
+                    .getBytes( "UTF-16LE" ), new PieceDescriptor( new byte[8],
+                    0 ) ) );
+        }
  
      // Read FSPA and Escher information
      _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
@@ -314,6 +334,12 @@ public final class HWPFDocument extends HWPFDocumentCore
      return _cft.getTextPieceTable();
    }
  
+    @Override
+    public StringBuilder getText()
+    {
+        return _text;
+    }
+
    @Deprecated
    public CPSplitCalculator getCPSplitCalculator()
    {
@@ -326,10 +352,7 @@ public final class HWPFDocument extends HWPFDocumentCore
    }
  
    public Range getOverallRange() {
-         // hack to get the ending cp of the document, Have to revisit this.
-      TextPiece p =  _tpt.getTextPieces().get(_tpt.getTextPieces().size() - 1);
-
-      return new Range(0, p.getEnd(), this);
+      return new Range(0, _text.length(), this);
    }
  
      /**
@@ -445,16 +468,7 @@ public final class HWPFDocument extends HWPFDocumentCore
     */
    public int characterLength()
    {
-    List<TextPiece> textPieces = _tpt.getTextPieces();
-    Iterator<TextPiece> textIt = textPieces.iterator();
-
-    int length = 0;
-    while(textIt.hasNext())
-    {
-      TextPiece tp = textIt.next();
-      length += tp.characterLength();
-    }
-    return length;
+      return _text.length();
    }
  
    /**
@@ -643,7 +657,7 @@ public final class HWPFDocument extends HWPFDocumentCore
  
      // write out the PAPBinTable.
      _fib.setFcPlcfbtePapx(tableOffset);
-    _pbt.writeTo(docSys, fcMin);
+    _pbt.writeTo(docSys, fcMin, _cft.getTextPieceTable());
      _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
      tableOffset = tableStream.getOffset();
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java

index e3bd68d8ea8976f04714a6e17383ba7ff880b277..50171c37e77b1018a30552552a18e15add00ecfc 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
@@ -35,6 +35,7 @@ import org.apache.poi.hwpf.usermodel.Range;
  import org.apache.poi.poifs.filesystem.DirectoryNode;
  import org.apache.poi.poifs.filesystem.DocumentEntry;
  import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.Internal;
  
  
  /**
@@ -161,8 +162,20 @@ public abstract class HWPFDocumentCore extends POIDocument
       */
      public abstract Range getOverallRange();
  
-  public abstract TextPieceTable getTextTable();
-  
+    /**
+     * Returns document text, i.e. text information from all text pieces,
+     * including OLE descriptions and field codes
+     */
+    public String getDocumentText() {
+        return getText().toString();
+    }
+
+    /**
+     * Internal method to access document text
+     */
+    @Internal
+    public abstract StringBuilder getText();
+
    public CHPBinTable getCharacterTable()
    {
      return _cbt;
@@ -197,4 +210,6 @@ public abstract class HWPFDocumentCore extends POIDocument
    {
      return _fib;
    }
+
+    public abstract TextPieceTable getTextTable();
  }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java

index 3bc32a13f6072d77fce3b4692b0bc144e3e3070a..08c60959cf59cdf3a97933574aa622af2455d402 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
@@ -38,6 +38,8 @@ import org.apache.poi.util.LittleEndian;
  public class HWPFOldDocument extends HWPFDocumentCore {
      private TextPieceTable tpt;
      
+    private StringBuilder _text;
+    
      public HWPFOldDocument(POIFSFileSystem fs) throws IOException {
          this(fs.getRoot());
      }
@@ -88,13 +90,15 @@ public class HWPFOldDocument extends HWPFDocumentCore {
              byte[] textData = new byte[_fib.getFcMac()-_fib.getFcMin()];
              System.arraycopy(_mainStream, _fib.getFcMin(), textData, 0, textData.length);
              TextPiece tp = new TextPiece(
-                    0, textData.length, textData, pd, 0
+                    0, textData.length, textData, pd
              );
              tpt.add(tp);
              
              text.append(tp.getStringBuffer());
          }
          
+        _text = tpt.getText();
+
          // Now we can fetch the character and paragraph properties
          _cbt = new OldCHPBinTable(
                  _mainStream, chpTableOffset, chpTableSize,
@@ -126,6 +130,12 @@ public class HWPFOldDocument extends HWPFDocumentCore {
        return tpt;
      }
  
+    @Override
+    public StringBuilder getText()
+    {
+        return _text;
+    }
+
      @Override
      public void write(OutputStream out) throws IOException {
          throw new IllegalStateException("Writing is not available for the older file formats");
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java

index 70b8b38404455c770f186a727aeaf106562b1b10..15917978562326efe447dd69742d52ce92d585a1 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java
@@ -23,9 +23,7 @@ import java.io.File;
  import java.io.FileInputStream;
  import java.io.IOException;
  import java.io.InputStream;
-import java.util.ArrayList;
  import java.util.Arrays;
-import java.util.Collections;
  import java.util.LinkedHashMap;
  import java.util.List;
  import java.util.Map;
@@ -37,10 +35,7 @@ import org.apache.poi.hwpf.OldWordFileFormatException;
  import org.apache.poi.hwpf.model.CHPX;
  import org.apache.poi.hwpf.model.FieldsDocumentPart;
  import org.apache.poi.hwpf.model.FileInformationBlock;
-import org.apache.poi.hwpf.model.GenericPropertyNode;
-import org.apache.poi.hwpf.model.PAPFormattedDiskPage;
  import org.apache.poi.hwpf.model.PAPX;
-import org.apache.poi.hwpf.model.PlexOfCps;
  import org.apache.poi.hwpf.model.StyleSheet;
  import org.apache.poi.hwpf.model.TextPiece;
  import org.apache.poi.hwpf.sprm.SprmIterator;
@@ -51,10 +46,8 @@ import org.apache.poi.hwpf.usermodel.Field;
  import org.apache.poi.hwpf.usermodel.Paragraph;
  import org.apache.poi.hwpf.usermodel.Picture;
  import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.poifs.common.POIFSConstants;
  import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LittleEndian;
  
  /**
   * Used by developers to list out key information on a HWPF file. End users will
@@ -241,13 +234,10 @@ public final class HWPFLister
  
      private LinkedHashMap<Integer, String> paragraphs;
  
-    private String text;
-
      public HWPFLister( HWPFDocumentCore doc )
      {
          _doc = doc;
  
-        buildText();
          buildParagraphs();
      }
  
@@ -256,6 +246,7 @@ public final class HWPFLister
          paragraphs = new LinkedHashMap<Integer, String>();
  
          StringBuilder part = new StringBuilder();
+        String text = _doc.getDocumentText();
          for ( int charIndex = 0; charIndex < text.length(); charIndex++ )
          {
              char c = text.charAt( charIndex );
@@ -268,24 +259,6 @@ public final class HWPFLister
          }
      }
  
-    private void buildText()
-    {
-        StringBuilder builder = new StringBuilder();
-        for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )
-        {
-            String toAppend = textPiece.getStringBuffer().toString();
-
-            if ( toAppend.length() != ( textPiece.getEnd() - textPiece
-                    .getStart() ) )
-            {
-                throw new AssertionError();
-            }
-
-            builder.replace( textPiece.getStart(), textPiece.getEnd(), toAppend );
-        }
-        this.text = builder.toString();
-    }
-
      private void dumpBookmarks()
      {
          if ( !( _doc instanceof HWPFDocument ) )
@@ -379,69 +352,69 @@ public final class HWPFLister
  
      public void dumpPapx( boolean withProperties ) throws Exception
      {
-        if ( _doc instanceof HWPFDocument )
-        {
-            System.out.println( "binary PAP pages " );
-
-            HWPFDocument doc = (HWPFDocument) _doc;
-
-            java.lang.reflect.Field fMainStream = HWPFDocumentCore.class
-                    .getDeclaredField( "_mainStream" );
-            fMainStream.setAccessible( true );
-            byte[] mainStream = (byte[]) fMainStream.get( _doc );
-
-            PlexOfCps binTable = new PlexOfCps( doc.getTableStream(), doc
-                    .getFileInformationBlock().getFcPlcfbtePapx(), doc
-                    .getFileInformationBlock().getLcbPlcfbtePapx(), 4 );
-
-            List<PAPX> papxs = new ArrayList<PAPX>();
-
-            int length = binTable.length();
-            for ( int x = 0; x < length; x++ )
-            {
-                GenericPropertyNode node = binTable.getProperty( x );
-
-                int pageNum = LittleEndian.getInt( node.getBytes() );
-                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
-                        * pageNum;
-
-                PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
-                        mainStream, doc.getDataStream(), pageOffset,
-                        doc.getTextTable() );
-
-                System.out.println( "* PFKP: " + pfkp );
-
-                for ( PAPX papx : pfkp.getPAPXs() )
-                {
-                    System.out.println( "** " + papx );
-                    papxs.add( papx );
-                    if ( papx != null && true )
-                    {
-                        SprmIterator sprmIt = new SprmIterator(
-                                papx.getGrpprl(), 2 );
-                        while ( sprmIt.hasNext() )
-                        {
-                            SprmOperation sprm = sprmIt.next();
-                            System.out.println( "*** " + sprm.toString() );
-                        }
-                    }
-
-                }
-            }
-
-            Collections.sort( papxs );
-            System.out.println( "* Sorted by END" );
-            for ( PAPX papx : papxs )
-            {
-                System.out.println( "** " + papx );
-                SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
-                while ( sprmIt.hasNext() )
-                {
-                    SprmOperation sprm = sprmIt.next();
-                    System.out.println( "*** " + sprm.toString() );
-                }
-            }
-        }
+//        if ( _doc instanceof HWPFDocument )
+//        {
+//            System.out.println( "binary PAP pages " );
+//
+//            HWPFDocument doc = (HWPFDocument) _doc;
+//
+//            java.lang.reflect.Field fMainStream = HWPFDocumentCore.class
+//                    .getDeclaredField( "_mainStream" );
+//            fMainStream.setAccessible( true );
+//            byte[] mainStream = (byte[]) fMainStream.get( _doc );
+//
+//            PlexOfCps binTable = new PlexOfCps( doc.getTableStream(), doc
+//                    .getFileInformationBlock().getFcPlcfbtePapx(), doc
+//                    .getFileInformationBlock().getLcbPlcfbtePapx(), 4 );
+//
+//            List<PAPX> papxs = new ArrayList<PAPX>();
+//
+//            int length = binTable.length();
+//            for ( int x = 0; x < length; x++ )
+//            {
+//                GenericPropertyNode node = binTable.getProperty( x );
+//
+//                int pageNum = LittleEndian.getInt( node.getBytes() );
+//                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
+//                        * pageNum;
+//
+//                PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
+//                        mainStream, doc.getDataStream(), pageOffset,
+//                        doc.getTextTable() );
+//
+//                System.out.println( "* PFKP: " + pfkp );
+//
+//                for ( PAPX papx : pfkp.getPAPXs() )
+//                {
+//                    System.out.println( "** " + papx );
+//                    papxs.add( papx );
+//                    if ( papx != null && true )
+//                    {
+//                        SprmIterator sprmIt = new SprmIterator(
+//                                papx.getGrpprl(), 2 );
+//                        while ( sprmIt.hasNext() )
+//                        {
+//                            SprmOperation sprm = sprmIt.next();
+//                            System.out.println( "*** " + sprm.toString() );
+//                        }
+//                    }
+//
+//                }
+//            }
+//
+//            Collections.sort( papxs );
+//            System.out.println( "* Sorted by END" );
+//            for ( PAPX papx : papxs )
+//            {
+//                System.out.println( "** " + papx );
+//                SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
+//                while ( sprmIt.hasNext() )
+//                {
+//                    SprmOperation sprm = sprmIt.next();
+//                    System.out.println( "*** " + sprm.toString() );
+//                }
+//            }
+//        }
  
          // for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
          // {
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java

index b5dcc78a80fcfbd0d7b34bb2988d51a63a73790e..464b11a406907aa0883d3584cfadad8d43275fba 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
@@ -20,13 +20,11 @@ package org.apache.poi.hwpf.extractor;
  import java.io.FileInputStream;
  import java.io.IOException;
  import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
  import java.util.ArrayList;
  import java.util.Arrays;
  
  import org.apache.poi.POIOLE2TextExtractor;
  import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.model.TextPiece;
  import org.apache.poi.hwpf.usermodel.HeaderStories;
  import org.apache.poi.hwpf.usermodel.Paragraph;
  import org.apache.poi.hwpf.usermodel.Range;
@@ -218,22 +216,7 @@ public final class WordExtractor extends POIOLE2TextExtractor {
          *  mapping is broken. Fast too.
          */
         public String getTextFromPieces() {
-       StringBuffer textBuf = new StringBuffer();
-
-       for(TextPiece piece : doc.getTextTable().getTextPieces()) {
-               String encoding = "Cp1252";
-               if (piece.isUnicode()) {
-                       encoding = "UTF-16LE";
-               }
-               try {
-                       String text = new String(piece.getRawBytes(), encoding);
-                       textBuf.append(text);
-               } catch(UnsupportedEncodingException e) {
-                       throw new InternalError("Standard Encoding " + encoding + " not found, JVM broken");
-               }
-       }
-
-       String text = textBuf.toString();
+       String text = doc.getDocumentText();
  
         // Fix line endings (Note - won't get all of them
         text = text.replaceAll("\r\r\r", "\r\n\r\n\r\n");
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java

index 976c4a705580724943eb4aada28557d834e0c2d0..551e90b750e3d9e03d6092b4d964f0da6361bf52 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
@@ -179,34 +179,6 @@ public class CHPBinTable
              start = System.currentTimeMillis();
          }
  
-        // rebuild document paragraphs structure
-        StringBuilder docText = new StringBuilder();
-        for ( TextPiece textPiece : tpt.getTextPieces() )
-        {
-            String toAppend = textPiece.getStringBuffer().toString();
-            int toAppendLength = toAppend.length();
-
-            if ( toAppendLength != textPiece.getEnd() - textPiece.getStart() )
-            {
-                logger.log(
-                        POILogger.WARN,
-                        "Text piece has boundaries [",
-                        Integer.valueOf( textPiece.getStart() ),
-                        "; ",
-                        Integer.valueOf( textPiece.getEnd() ),
-                        ") but length ",
-                        Integer.valueOf( textPiece.getEnd()
-                                - textPiece.getStart() ) );
-            }
-
-            docText.replace( textPiece.getStart(), textPiece.getStart()
-                    + toAppendLength, toAppend );
-        }
-        logger.log( POILogger.DEBUG, "Document text rebuilded in ",
-                Long.valueOf( System.currentTimeMillis() - start ), " ms (",
-                Integer.valueOf( docText.length() ), " chars)" );
-        start = System.currentTimeMillis();
-
          List<CHPX> oldChpxSortedByStartPos = new ArrayList<CHPX>( _textRuns );
          Collections.sort( oldChpxSortedByStartPos,
                  PropertyNode.StartComparator.instance );
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java

index 4bb50e023e578c76f079236be90f0654753dc33e..e93fb0774141048b948147942f50bf93c233bfd0 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
@@ -54,9 +54,6 @@ public class PAPBinTable
    protected ArrayList<PAPX> _paragraphs = new ArrayList<PAPX>();
    byte[] _dataStream;
  
-  /** So we can know if things are unicode or not */
-  private TextPieceTable tpt;
-
    public PAPBinTable()
    {
    }
@@ -81,7 +78,6 @@ public class PAPBinTable
  
          {
              PlexOfCps binTable = new PlexOfCps( tableStream, offset, size, 4 );
-            this.tpt = tpt;
  
              int length = binTable.length();
              for ( int x = 0; x < length; x++ )
@@ -112,7 +108,8 @@ public class PAPBinTable
                  Integer.valueOf( _paragraphs.size() ), " elements)" );
      }
  
-    public void rebuild( byte[] dataStream, ComplexFileTable complexFileTable )
+    public void rebuild( final StringBuilder docText, byte[] dataStream,
+            ComplexFileTable complexFileTable )
      {
          long start = System.currentTimeMillis();
  
@@ -121,7 +118,8 @@ public class PAPBinTable
              SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
  
              // adding PAPX from fast-saved SPRMs
-            for ( TextPiece textPiece : tpt.getTextPieces() )
+            for ( TextPiece textPiece : complexFileTable.getTextPieceTable()
+                    .getTextPieces() )
              {
                  PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
                  if ( !prm.isComplex() )
@@ -167,34 +165,6 @@ public class PAPBinTable
              start = System.currentTimeMillis();
          }
  
-        // rebuild document paragraphs structure
-        StringBuilder docText = new StringBuilder();
-        for ( TextPiece textPiece : tpt.getTextPieces() )
-        {
-            String toAppend = textPiece.getStringBuffer().toString();
-            int toAppendLength = toAppend.length();
-
-            if ( toAppendLength != textPiece.getEnd() - textPiece.getStart() )
-            {
-                logger.log(
-                        POILogger.WARN,
-                        "Text piece has boundaries [",
-                        Integer.valueOf( textPiece.getStart() ),
-                        "; ",
-                        Integer.valueOf( textPiece.getEnd() ),
-                        ") but length ",
-                        Integer.valueOf( textPiece.getEnd()
-                                - textPiece.getStart() ) );
-            }
-
-            docText.replace( textPiece.getStart(), textPiece.getStart()
-                    + toAppendLength, toAppend );
-        }
-        logger.log( POILogger.DEBUG, "Document text rebuilded in ",
-                Long.valueOf( System.currentTimeMillis() - start ), " ms (",
-                Integer.valueOf( docText.length() ), " chars)" );
-        start = System.currentTimeMillis();
-
          List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs );
          Collections.sort( oldPapxSortedByEndPos,
                  PropertyNode.EndComparator.instance );
@@ -274,7 +244,8 @@ public class PAPBinTable
              {
                  // can we reuse existing?
                  PAPX existing = papxs.get( 0 );
-                if ( existing.getStart() == startInclusive && existing.getEnd() == endExclusive )
+                if ( existing.getStart() == startInclusive
+                        && existing.getEnd() == endExclusive )
                  {
                      newPapxs.add( existing );
                      lastParStart = endExclusive;
@@ -311,7 +282,8 @@ public class PAPBinTable
          this._paragraphs = new ArrayList<PAPX>( newPapxs );
  
          logger.log( POILogger.DEBUG, "PAPX rebuilded from document text in ",
-                Long.valueOf( System.currentTimeMillis() - start ), " ms" );
+                Long.valueOf( System.currentTimeMillis() - start ), " ms (",
+                Integer.valueOf( _paragraphs.size() ), " elements)" );
          start = System.currentTimeMillis();
  
          _dataStream = dataStream;
@@ -320,7 +292,7 @@ public class PAPBinTable
    public void insert(int listIndex, int cpStart, SprmBuffer buf)
    {
  
-    PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream);
+    PAPX forInsert = new PAPX(0, 0, buf, _dataStream);
  
      // Ensure character offsets are really characters
      forInsert.setStart(cpStart);
@@ -350,7 +322,7 @@ public class PAPBinTable
         //  Original, until insert at point
         //  New one
         //  Clone of original, on to the old end
-        PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream);
+        PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream);
          // Again ensure contains character based offsets no matter what
          clone.setStart(cpStart);
          clone.setEnd(currentPap.getEnd());
@@ -427,9 +399,8 @@ public class PAPBinTable
      return _paragraphs;
    }
  
-  public void writeTo(HWPFFileSystem sys, int fcMin)
-    throws IOException
-  {
+    public void writeTo( HWPFFileSystem sys, int fcMin, CharIndexTranslator translator ) throws IOException
+    {
  
      HWPFOutputStream docStream = sys.getStream("WordDocument");
      OutputStream tableStream = sys.getStream("1Table");
@@ -463,7 +434,7 @@ public class PAPBinTable
        PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream);
        pfkp.fill(overflow);
  
-      byte[] bufFkp = pfkp.toByteArray(tpt, fcMin);
+      byte[] bufFkp = pfkp.toByteArray(translator, fcMin);
        docStream.write(bufFkp);
        overflow = pfkp.getOverflow();
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java

index ce448911abb290edb80d414826d8f837c38f4717..a2255e7538d83d79b54bcf2bb3f8d40accf043ab 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
@@ -19,6 +19,9 @@ package org.apache.poi.hwpf.model;
  
  
  import java.io.UnsupportedEncodingException;
+
+import org.apache.poi.util.Internal;
+
  /**
   * Lightweight representation of a text piece.
   * Works in the character domain, not the byte domain, so you
@@ -27,19 +30,39 @@ import java.io.UnsupportedEncodingException;
   *
   * @author Ryan Ackley
   */
-
+@Internal
  public final class TextPiece extends PropertyNode<TextPiece>
  {
    private boolean _usesUnicode;
  
    private PieceDescriptor _pd;
  
-  /**
-   * @param start Beginning offset in main document stream, in characters.
-   * @param end Ending offset in main document stream, in characters.
-   * @param text The raw bytes of our text
-   */
-  public TextPiece(int start, int end, byte[] text, PieceDescriptor pd, int cpStart) {
+    /**
+     * @param start
+     *            Beginning offset in main document stream, in characters.
+     * @param end
+     *            Ending offset in main document stream, in characters.
+     * @param text
+     *            The raw bytes of our text
+     * @deprecated Use {@link #TextPiece(int,int,byte[],PieceDescriptor)}
+     *             instead
+     */
+    public TextPiece( int start, int end, byte[] text, PieceDescriptor pd,
+            int cpStart )
+    {
+        this( start, end, text, pd );
+    }
+
+    /**
+     * @param start
+     *            Beginning offset in main document stream, in characters.
+     * @param end
+     *            Ending offset in main document stream, in characters.
+     * @param text
+     *            The raw bytes of our text
+     */
+    public TextPiece( int start, int end, byte[] text, PieceDescriptor pd )
+    {
           super(start, end, buildInitSB(text, pd));
           _usesUnicode = pd.isUnicode();
           _pd = pd;
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java

index 6c6ca188c4b08dff5ace4343e7b842b37b94473b..453d0285a11658a0b12b6a236e3c4e13b243e42a 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -24,6 +24,8 @@ import java.util.List;
  
  import org.apache.poi.hwpf.model.io.HWPFOutputStream;
  import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
  
  /**
   * The piece table for matching up character positions to bits of text. This
@@ -34,6 +36,9 @@ import org.apache.poi.poifs.common.POIFSConstants;
   */
  public class TextPieceTable implements CharIndexTranslator
  {
+    private static final POILogger logger = POILogFactory
+            .getLogger( TextPieceTable.class );
+
      // int _multiple;
      int _cpMin;
      protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
@@ -101,7 +106,7 @@ public class TextPieceTable implements CharIndexTranslator
  
              // And now build the piece
              _textPieces.add( new TextPiece( nodeStartChars, nodeEndChars, buf,
-                    pieces[x], node.getStart() ) );
+                    pieces[x] ) );
          }
  
          // In the interest of our sanity, now sort the text pieces
@@ -251,6 +256,41 @@ public class TextPieceTable implements CharIndexTranslator
          return _cpMin;
      }
  
+    public StringBuilder getText()
+    {
+        final long start = System.currentTimeMillis();
+
+        // rebuild document paragraphs structure
+        StringBuilder docText = new StringBuilder();
+        for ( TextPiece textPiece : _textPieces )
+        {
+            String toAppend = textPiece.getStringBuffer().toString();
+            int toAppendLength = toAppend.length();
+
+            if ( toAppendLength != textPiece.getEnd() - textPiece.getStart() )
+            {
+                logger.log(
+                        POILogger.WARN,
+                        "Text piece has boundaries [",
+                        Integer.valueOf( textPiece.getStart() ),
+                        "; ",
+                        Integer.valueOf( textPiece.getEnd() ),
+                        ") but length ",
+                        Integer.valueOf( textPiece.getEnd()
+                                - textPiece.getStart() ) );
+            }
+
+            docText.replace( textPiece.getStart(), textPiece.getStart()
+                    + toAppendLength, toAppend );
+        }
+
+        logger.log( POILogger.DEBUG, "Document text were rebuilded in ",
+                Long.valueOf( System.currentTimeMillis() - start ), " ms (",
+                Integer.valueOf( docText.length() ), " chars)" );
+
+        return docText;
+    }
+
      public List<TextPiece> getTextPieces()
      {
          return _textPieces;
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java

index 51287ef7b6588f15a6aae2cc474d9fa04ac5743f..7bb89b869e505eb88e0689658172ca3ac11d2d5c 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@@ -31,7 +31,7 @@ import org.apache.poi.hwpf.model.PropertyNode;
  import org.apache.poi.hwpf.model.SEPX;
  import org.apache.poi.hwpf.model.StyleSheet;
  import org.apache.poi.hwpf.model.SubdocumentType;
-import org.apache.poi.hwpf.model.TextPiece;
+import org.apache.poi.hwpf.model.TextPieceTable;
  import org.apache.poi.hwpf.sprm.CharacterSprmCompressor;
  import org.apache.poi.hwpf.sprm.ParagraphSprmCompressor;
  import org.apache.poi.hwpf.sprm.SprmBuffer;
@@ -108,18 +108,8 @@ public class Range { // TODO -instantiable superclass
         /** The end index in the characterRuns list for this Range. */
         protected int _charEnd;
  
-       /** Have we loaded the Text indexes yet */
-       protected boolean _textRangeFound;
-
-       /** All text pieces that belong to the document this Range belongs to. */
-       protected List<TextPiece> _text;
-
-       /** The start index in the text list for this Range. */
-       protected int _textStart;
-
-       /** The end index in the text list for this Range. */
-       protected int _textEnd;
-
+       protected StringBuilder _text;
+       
         // protected Range()
         // {
         //
@@ -144,7 +134,7 @@ public class Range { // TODO -instantiable superclass
                 _sections = _doc.getSectionTable().getSections();
                 _paragraphs = _doc.getParagraphTable().getParagraphs();
                 _characters = _doc.getCharacterTable().getTextRuns();
-               _text = _doc.getTextTable().getTextPieces();
+               _text = _doc.getText();
                 _parent = new WeakReference<Range>(null);
  
                 sanityCheckStartEnd();
@@ -171,6 +161,7 @@ public class Range { // TODO -instantiable superclass
                 _parent = new WeakReference<Range>(parent);
  
                 sanityCheckStartEnd();
+               assert sanityCheck();
         }
  
         /**
@@ -212,23 +203,17 @@ public class Range { // TODO -instantiable superclass
                 }
         }
  
-       /**
-        * Does any <code>TextPiece</code> in this Range use unicode?
-        *
-        * @return true if it does and false if it doesn't
-        */
-       public boolean usesUnicode() {
-
-               initText();
-
-               for (int i = _textStart; i < _textEnd; i++) {
-                       TextPiece piece = _text.get(i);
-                       if (piece.isUnicode())
-                               return true;
-               }
-
-               return false;
-       }
+    /**
+     * @return always return true
+     * @deprecated Range is not linked to any text piece anymore, so to check if
+     *             unicode is used please access {@link TextPieceTable} during
+     *             document load time
+     */
+    @Deprecated
+    public boolean usesUnicode()
+    {
+        return true;
+    }
  
         /**
          * Gets the text that this Range contains.
@@ -236,29 +221,7 @@ public class Range { // TODO -instantiable superclass
          * @return The text for this range.
          */
         public String text() {
-               initText();
-
-               StringBuffer sb = new StringBuffer();
-
-               for (int x = _textStart; x < _textEnd; x++) {
-                       TextPiece piece = _text.get(x);
-
-                       // Figure out where in this piece the text
-                       // we're after lives
-                       int rStart = 0;
-                       int rEnd = piece.characterLength();
-                       if (_start > piece.getStart()) {
-                               rStart = _start - piece.getStart();
-                       }
-                       if (_end < piece.getEnd()) {
-                               rEnd -= (piece.getEnd() - _end);
-                       }
-
-                       // Luckily TextPieces work in characters, so we don't
-                       // need to worry about unicode here
-                       sb.append(piece.substring(rStart, rEnd));
-               }
-               return sb.toString();
+           return _text.substring( _start, _end );
         }
  
         /**
@@ -346,67 +309,52 @@ public class Range { // TODO -instantiable superclass
                 return _charEnd - _charStart;
         }
  
-       /**
-        * Inserts text into the front of this range.
-        *
-        * @param text
-        *            The text to insert
-        * @return The character run that text was inserted into.
-        */
-       public CharacterRun insertBefore(String text)
-       // throws UnsupportedEncodingException
-       {
-               initAll();
-
-               TextPiece tp = _text.get(_textStart);
-               StringBuffer sb = tp.getStringBuffer();
-
-               // Since this is the first item in our list, it is safe to assume that
-               // _start >= tp.getStart()
-               int insertIndex = _start - tp.getStart();
-               sb.insert(insertIndex, text);
-
-               int adjustedLength = _doc.getTextTable().adjustForInsert(_textStart, text.length());
-               _doc.getCharacterTable().adjustForInsert(_charStart, adjustedLength);
-               _doc.getParagraphTable().adjustForInsert(_parStart, adjustedLength);
-               _doc.getSectionTable().adjustForInsert(_sectionStart, adjustedLength);
-               adjustForInsert(adjustedLength);
+    /**
+     * Inserts text into the front of this range.
+     * 
+     * @param text
+     *            The text to insert
+     * @return The character run that text was inserted into.
+     */
+    public CharacterRun insertBefore( String text )
+    {
+        initAll();
  
-               // update the FIB.CCPText + friends fields
-               adjustFIB(text.length());
+        _text.insert( _start, text );
+        _doc.getCharacterTable().adjustForInsert( _charStart, text.length() );
+        _doc.getParagraphTable().adjustForInsert( _parStart, text.length() );
+        _doc.getSectionTable().adjustForInsert( _sectionStart, text.length() );
+        adjustForInsert( text.length() );
  
-               return getCharacterRun(0);
-       }
+        // update the FIB.CCPText + friends fields
+        adjustFIB( text.length() );
  
-       /**
-        * Inserts text onto the end of this range
-        *
-        * @param text
-        *            The text to insert
-        * @return The character run the text was inserted into.
-        */
-       public CharacterRun insertAfter(String text) {
-               initAll();
+        assert sanityCheck();
  
-               int listIndex = _textEnd - 1;
-               TextPiece tp = _text.get(listIndex);
-               StringBuffer sb = tp.getStringBuffer();
+        return getCharacterRun( 0 );
+    }
  
-               int insertIndex = _end - tp.getStart();
+    /**
+     * Inserts text onto the end of this range
+     * 
+     * @param text
+     *            The text to insert
+     * @return The character run the text was inserted into.
+     */
+    public CharacterRun insertAfter( String text )
+    {
+        initAll();
  
-               if (tp.getStringBuffer().charAt(_end - 1) == '\r' && text.charAt(0) != '\u0007') {
-                       insertIndex--;
-               }
-               sb.insert(insertIndex, text);
-               int adjustedLength = _doc.getTextTable().adjustForInsert(listIndex, text.length());
-               _doc.getCharacterTable().adjustForInsert(_charEnd - 1, adjustedLength);
-               _doc.getParagraphTable().adjustForInsert(_parEnd - 1, adjustedLength);
-               _doc.getSectionTable().adjustForInsert(_sectionEnd - 1, adjustedLength);
-               adjustForInsert(text.length());
+        _text.insert( _end, text );
  
-               return getCharacterRun(numCharacterRuns() - 1);
+        _doc.getCharacterTable().adjustForInsert( _charEnd - 1, text.length() );
+        _doc.getParagraphTable().adjustForInsert( _parEnd - 1, text.length() );
+        _doc.getSectionTable().adjustForInsert( _sectionEnd - 1, text.length() );
+        adjustForInsert( text.length() );
  
-       }
+        assert sanityCheck();
+        return getCharacterRun( numCharacterRuns() - 1 );
+    }
  
         /**
          * Inserts text into the front of this range and it gives that text the
@@ -580,7 +528,6 @@ public class Range { // TODO -instantiable superclass
                 int numSections = _sections.size();
                 int numRuns = _characters.size();
                 int numParagraphs = _paragraphs.size();
-               int numTextPieces = _text.size();
  
                 for (int x = _charStart; x < numRuns; x++) {
                         CHPX chpx = _characters.get(x);
@@ -605,10 +552,12 @@ public class Range { // TODO -instantiable superclass
                         // + " -> " + sepx.getEnd());
                 }
  
-               for (int x = _textStart; x < numTextPieces; x++) {
-                       TextPiece piece = _text.get(x);
-                       piece.adjustForDelete(_start, _end - _start);
-               }
+        _text.delete( _start, _end );
+        Range parent = _parent.get();
+        if ( parent != null )
+        {
+            parent.adjustForInsert( -( _end - _start ) );
+        }
  
                 // update the FIB.CCPText + friends field
                 adjustFIB(-(_end - _start));
@@ -623,7 +572,7 @@ public class Range { // TODO -instantiable superclass
          * @param rows
          *            The number of rows.
          * @return The empty Table that is now part of the document.
-     * @deprecated Use code shall not work with {@link ParagraphProperties}
+     * @deprecated Use code shall not work with {@link TableProperties}
          */
         @Deprecated
         public Table insertBefore(TableProperties props, int rows) {
@@ -631,19 +580,28 @@ public class Range { // TODO -instantiable superclass
                 parProps.setFInTable(true);
                 parProps.setItap( 1 );
  
+               final int oldEnd = this._end;
+               
                 int columns = props.getItcMac();
-               for (int x = 0; x < rows; x++) {
-                       Paragraph cell = this.insertBefore(parProps, StyleSheet.NIL_STYLE);
-                       cell.insertAfter(String.valueOf('\u0007'));
-                       for (int y = 1; y < columns; y++) {
-                               cell = cell.insertAfter(parProps, StyleSheet.NIL_STYLE);
-                               cell.insertAfter(String.valueOf('\u0007'));
-                       }
-                       cell = cell.insertAfter(parProps, StyleSheet.NIL_STYLE, String.valueOf('\u0007'));
-                       cell.setTableRowEnd(props);
-               }
-               return new Table(_start, _start + (rows * (columns + 1)) * 2, this, 1);
-       }
+        for ( int x = 0; x < rows; x++ )
+        {
+            Paragraph cell = this.insertBefore( parProps, StyleSheet.NIL_STYLE );
+            cell.insertAfter( String.valueOf( '\u0007' ) );
+            for ( int y = 1; y < columns; y++ )
+            {
+                cell = cell.insertAfter( parProps, StyleSheet.NIL_STYLE );
+                cell.insertAfter( String.valueOf( '\u0007' ) );
+            }
+            cell = cell.insertAfter( parProps, StyleSheet.NIL_STYLE,
+                    String.valueOf( '\u0007' ) );
+            cell.setTableRowEnd( props );
+        }
+
+        final int newEnd = this._end;
+        final int diff = newEnd - oldEnd;
+
+        return new Table( _start, _start + diff, this, 1 );
+    }
  
         /**
          * Inserts a list into the beginning of this range.
@@ -715,23 +673,14 @@ public class Range { // TODO -instantiable superclass
          */
         public void replaceText(String pPlaceHolder, String pValue, int pOffset) {
                 int absPlaceHolderIndex = getStartOffset() + pOffset;
-               Range subRange = new Range(absPlaceHolderIndex, (absPlaceHolderIndex + pPlaceHolder
-                               .length()), getDocument());
-
-               // this Range isn't a proper parent of the subRange() so we'll have to
-               // keep
-               // track of an updated endOffset on our own
-               int previousEndOffset = subRange.getEndOffset();
  
+               Range subRange = new Range(absPlaceHolderIndex, (absPlaceHolderIndex + pPlaceHolder
+                               .length()), this);
                 subRange.insertBefore(pValue);
  
-               if (subRange.getEndOffset() != previousEndOffset) {
-                       adjustForInsert(subRange.getEndOffset() - previousEndOffset);
-               }
-
                 // re-create the sub-range so we can delete it
                 subRange = new Range((absPlaceHolderIndex + pValue.length()), (absPlaceHolderIndex
-                               + pPlaceHolder.length() + pValue.length()), getDocument());
+                               + pPlaceHolder.length() + pValue.length()), this);
  
                 // deletes are automagically propagated
                 subRange.delete();
@@ -921,7 +870,6 @@ public class Range { // TODO -instantiable superclass
          * loads all of the list indexes.
          */
         protected void initAll() {
-               initText();
                 initCharacterRuns();
                 initParagraphs();
                 initSections();
@@ -951,18 +899,6 @@ public class Range { // TODO -instantiable superclass
                 }
         }
  
-       /**
-        * inits the text piece list indexes.
-        */
-       private void initText() {
-               if (!_textRangeFound) {
-                       int[] point = findRange(_text, _textStart, _start, _end);
-                       _textStart = point[0];
-                       _textEnd = point[1];
-                       _textRangeFound = true;
-               }
-       }
-
         /**
          * inits the section list indexes.
          */
@@ -1038,7 +974,6 @@ public class Range { // TODO -instantiable superclass
          * resets the list indexes.
          */
         protected void reset() {
-               _textRangeFound = false;
                 _charRangeFound = false;
                 _parRangeFound = false;
                 _sectionRangeFound = false;
@@ -1153,8 +1088,19 @@ public class Range { // TODO -instantiable superclass
       * Method for debug purposes. Checks that all resolved elements are inside
       * of current range.
       */
-    public void sanityCheck()
+    public boolean sanityCheck()
      {
+        if ( _start < 0 )
+            throw new AssertionError();
+        if ( _start >= _text.length() )
+            throw new AssertionError();
+        if ( _end < 0 )
+            throw new AssertionError();
+        if ( _end > _text.length() )
+            throw new AssertionError();
+        if ( _start > _end )
+            throw new AssertionError();
+
          if ( _charRangeFound )
          {
              for ( int c = _charStart; c < _charEnd; c++ )
@@ -1181,5 +1127,7 @@ public class Range { // TODO -instantiable superclass
                      throw new AssertionError();
              }
          }
+
+        return true;
      }
  }
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestDifferentRoutes.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestDifferentRoutes.java

index bda0866bb0846007a6255e43ec453b6c9f9ff18d..2ecd482892f5394f0647dddd4ec24013468036b3 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestDifferentRoutes.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestDifferentRoutes.java
@@ -17,16 +17,13 @@
  
  package org.apache.poi.hwpf.extractor;
  
-import java.util.Iterator;
+import junit.framework.TestCase;
  
  import org.apache.poi.hwpf.HWPFDocument;
  import org.apache.poi.hwpf.HWPFTestDataSamples;
-import org.apache.poi.hwpf.model.TextPiece;
  import org.apache.poi.hwpf.usermodel.Paragraph;
  import org.apache.poi.hwpf.usermodel.Range;
  
-import junit.framework.TestCase;
-
  /**
   * Test the different routes to extracting text
   *
@@ -78,24 +75,10 @@ public final class TestDifferentRoutes extends TestCase {
          * Test textPieces based extraction
          */
         public void testExtractFromTextPieces() throws Exception {
-               StringBuffer textBuf = new StringBuffer();
-
-               Iterator textPieces = doc.getTextTable().getTextPieces().iterator();
-               while (textPieces.hasNext()) {
-                       TextPiece piece = (TextPiece) textPieces.next();
-
-                       String encoding = "Cp1252";
-                       if (piece.isUnicode()) {
-                               encoding = "UTF-16LE";
-                       }
-                       String text = new String(piece.getRawBytes(), encoding);
-                       textBuf.append(text);
-               }
-
                 StringBuffer exp = new StringBuffer();
                 for (int i = 0; i < p_text.length; i++) {
                         exp.append(p_text[i]);
                 }
-               assertEquals(exp.toString(), textBuf.toString());
+               assertEquals(exp.toString(), doc.getDocumentText());
         }
  }
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java

index c743d52d86f546e6af5fa29fff7bd358b50b6cef..e21cee3573e05049d8cd77899d8d37af6b30ee9b 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java
@@ -53,7 +53,7 @@ public final class TestPAPBinTable extends TestCase
  
          HWPFFileSystem fileSys = new HWPFFileSystem();
  
-        _pAPBinTable.writeTo( fileSys, 0 );
+        _pAPBinTable.writeTo( fileSys, 0, fakeTPT );
          ByteArrayOutputStream tableOut = fileSys.getStream( "1Table" );
          ByteArrayOutputStream mainOut = fileSys.getStream( "WordDocument" );
  
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestTextPieceTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestTextPieceTable.java

index 7c73022ef59fbc521ba30405dcd0dd1f264678e0..bef1fc32e8b9cabc9b97524d94e1db15f3314b2a 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestTextPieceTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestTextPieceTable.java
@@ -169,6 +169,7 @@ public final class TestTextPieceTable extends TestCase {
      throws Exception
    {
      super.setUp();
+    System.setProperty( "org.apache.poi.hwpf.preserveTextTable", Boolean.TRUE.toString() );
  
      _hWPFDocFixture = new HWPFDocFixture(this, HWPFDocFixture.DEFAULT_TEST_FILE);
      _hWPFDocFixture.setUp();
@@ -178,8 +179,9 @@ public final class TestTextPieceTable extends TestCase {
      throws Exception
    {
      _hWPFDocFixture.tearDown();
-
      _hWPFDocFixture = null;
+
+    System.setProperty( "org.apache.poi.hwpf.preserveTextTable", Boolean.FALSE.toString() );
      super.tearDown();
    }
  
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java

index fae45758ac4322925cca9c7d9a60af191f16868b..6fc32c456a485eb21f67ee969958f4e103845137 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
@@ -103,10 +103,6 @@ public final class TestProblems extends HWPFTestCase {
        assertEquals("One paragraph is ok\7", r.getParagraph(3).text());
        assertEquals("\7", r.getParagraph(4).text());
        assertEquals("\r", r.getParagraph(5).text());
-      for(int i=0; i<=5; i++) {
-         assertFalse(r.getParagraph(i).usesUnicode());
-      }
-
  
        // Get the table
        Table t = r.getTable(p);
@@ -304,9 +300,6 @@ public final class TestProblems extends HWPFTestCase {
        assertEquals("Row 3/Cell 3\u0007", r.getParagraph(10).text());
        assertEquals("\u0007", r.getParagraph(11).text());
        assertEquals("\r", r.getParagraph(12).text());
-      for(int i=0; i<=12; i++) {
-         assertFalse(r.getParagraph(i).usesUnicode());
-      }
  
        Paragraph p;
  
@@ -791,7 +784,9 @@ public final class TestProblems extends HWPFTestCase {
              Paragraph actParagraph = actual.getParagraph( p );
  
              assertEquals( expParagraph.text(), actParagraph.text() );
-            assertEquals( expParagraph.isInTable(), actParagraph.isInTable() );
+            assertEquals( "Diffent isInTable flags for paragraphs #" + p
+                    + " -- " + expParagraph + " -- " + actParagraph + ".",
+                    expParagraph.isInTable(), actParagraph.isInTable() );
              assertEquals( expParagraph.isTableRowEnd(),
                      actParagraph.isTableRowEnd() );
  
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java

index f123a2018c907cf8dbf3d51c0881ed4db8946366..2e55486695e2f1d53ecd19781ac324794bf0e4d5 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
@@ -150,6 +150,8 @@ public final class TestRangeDelete extends TestCase {
                 assertEquals(searchText, subRange.text());
  
                 subRange.delete();
+               daDoc.getOverallRange().sanityCheck();
+               daDoc.getRange().sanityCheck();
  
                 // we need to let the model re-calculate the Range before we evaluate it
                 range = daDoc.getRange();
@@ -166,6 +168,7 @@ public final class TestRangeDelete extends TestCase {
                 // this can lead to a StringBufferOutOfBoundsException, so we will add it
                 // even though we don't have an assertion for it
                 Range daRange = daDoc.getRange();
+               daRange.sanityCheck();
                 daRange.text();
         }
author	Sergey Vladimirov <sergey@apache.org>
	Mon, 25 Jul 2011 12:58:09 +0000 (12:58 +0000)
committer	Sergey Vladimirov <sergey@apache.org>
	Mon, 25 Jul 2011 12:58:09 +0000 (12:58 +0000)
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestDifferentRoutes.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestTextPieceTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java		patch \| blob \| history