Finally get all HWPF tests to pass again, by working around how evil PAPX/CHPX/SEPX...

author Nick Burch <nick@apache.org>

Mon, 11 Aug 2008 23:42:39 +0000 (23:42 +0000)

committer Nick Burch <nick@apache.org>

Mon, 11 Aug 2008 23:42:39 +0000 (23:42 +0000)
author Nick Burch <nick@apache.org>
Mon, 11 Aug 2008 23:42:39 +0000 (23:42 +0000)
committer Nick Burch <nick@apache.org>
Mon, 11 Aug 2008 23:42:39 +0000 (23:42 +0000)
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml

index 9c9a4b702f12d9be6d18e5ac0959384bba857773..62b53e54102eed45ab18cb4a5e0e4ab59cf0ff77 100644 (file)
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,7 @@
  
                 <!-- Don't forget to update status.xml too! -->
          <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
             <action dev="POI-DEVELOPERS" type="add">Include headers and footers int he extracted text from HWPF's WordExtractor</action>
             <action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers and footers</action>
             <action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml

index 6170314752eb9c9da08c54de42b6a38662e988d7..a020a892a25750e008fddf8056eb2a66fe70f1e0 100644 (file)
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
         <!-- Don't forget to update changes.xml too! -->
      <changes>
          <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
             <action dev="POI-DEVELOPERS" type="add">Include headers and footers int he extracted text from HWPF's WordExtractor</action>
             <action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers and footers</action>
             <action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text</action>
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java

index 71b36a4f462ef361f58629c83bf64094aefcd540..daf1c8e1720736a3c3b9470dd4563fd424ff371c 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
@@ -253,7 +253,7 @@ public class HWPFDocument extends POIDocument
      // read in the pictures stream
      _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
  
-    _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, getTextTable().getTextPieces());
+    _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit);
      _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
      _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java

index c1b592801268295d822bd1770acfaefc59dcead8..d22edb63261b545dc9ff07f2f191299943a86fc1 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
@@ -36,6 +36,7 @@ public abstract class BytePropertyNode extends PropertyNode {
                                 generateCp(fcEnd, isUnicode),
                                 buf
                 );
+               this.isUnicode = isUnicode;
         }
         private static int generateCp(int val, boolean isUnicode) {
                 if(isUnicode)
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java

index 69e0a67d9e63658e902ee2011c87f9d2ef461b48..f46aee80a18661b19477066da0644560ba60a098 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
@@ -119,7 +119,7 @@ public class CHPBinTable
  
    public void insert(int listIndex, int cpStart, SprmBuffer buf)
    {
-       boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
+       boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
           
      CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
      
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java

index d5fb602b8e713b2aea9295d68271030d61fae88e..e6b302300feb86b64ecab01403720514110184bc 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
@@ -61,7 +61,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
  
        for (int x = 0; x < _crun; x++)
        {
-       boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
+       boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );
          _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
        }
      }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java

index cde563ec080b3d02860f712ad18f00c0c643b6c5..ed47b59c59323cb7aaacd8a6e6e1d611b3691f3e 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
@@ -76,7 +76,7 @@ public class PAPBinTable
  
    public void insert(int listIndex, int cpStart, SprmBuffer buf)
    {
-    boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
+    boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
      
      PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
      
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java

index fefcf442bea20ddf7fe34ab35bab5e47d47dbe5d..1a9a7bad5908ac8acf467939624508d6bbdd75d4 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
@@ -67,7 +67,8 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
        for (int x = 0; x < _crun; x++) {
           int startAt = getStart(x) - fcMin;
           int endAt = getEnd(x) - fcMin;
-        boolean isUnicode = tpt.isUnicodeAt(startAt);
+        boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);
+         //System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
          
           _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
        }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java

index f369c169dcb5a61f21a474de77c425cff04f6541..5a14917962d59c6617f833f19dd9af016126107a 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
@@ -48,6 +48,11 @@ public abstract class PropertyNode implements Comparable, Cloneable
        _cpStart = fcStart;
        _cpEnd = fcEnd;
        _buf = buf;
+      
+      if(_cpStart < 0) {
+         System.err.println("A property claimed to start before zero, at " + _cpStart + "! Resetting it to zero, and hoping for the best");
+         _cpStart = 0;
+      }
    }
  
    /**
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java

index 92ec6cfbb3bfb5b299d13a69a3c35f23edffa8ac..7987280ed1370c0b5cdb60b9cf05f28a27b08878 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
@@ -25,17 +25,15 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
  import org.apache.poi.hwpf.usermodel.SectionProperties;
  
  /**
- * TODO - figure out if this works in characters, like most
- *  things do, or in bytes as PAPX / CHPX does.
   */
-public class SEPX extends PropertyNode
+public class SEPX extends BytePropertyNode
  {
  
    SectionDescriptor _sed;
  
-  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl)
+  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)
    {
-    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
+    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);
      _sed = sed;
    }
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java

index 7b9c23325fc78cf603cd603367c3e79fa7e849d2..b88edbb95e285d5656c7afaaf5ce837c12c61ff5 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
@@ -34,6 +34,9 @@ public class SectionTable
    protected ArrayList _sections = new ArrayList();
    protected List _text;
  
+  /** So we can know if things are unicode or not */
+  private TextPieceTable tpt;
+
    public SectionTable()
    {
    }
@@ -41,10 +44,11 @@ public class SectionTable
  
    public SectionTable(byte[] documentStream, byte[] tableStream, int offset,
                        int size, int fcMin,
-                      List tpt)
+                      TextPieceTable tpt, CPSplitCalculator cps)
    {
      PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
-    _text = tpt;
+    this.tpt = tpt;
+    this._text = tpt.getTextPieces();
  
      int length = sedPlex.length();
  
@@ -54,11 +58,16 @@ public class SectionTable
        SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
  
        int fileOffset = sed.getFc();
+      int startAt = CPtoFC(node.getStart());
+      int endAt = CPtoFC(node.getEnd());
+      
+      boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
+//      System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
  
        // check for the optimization
        if (fileOffset == 0xffffffff)
        {
-        _sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), new byte[0]));
+        _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));
        }
        else
        {
@@ -67,9 +76,34 @@ public class SectionTable
          byte[] buf = new byte[sepxSize];
          fileOffset += LittleEndian.SHORT_SIZE;
          System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
-        _sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), buf));
+        _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));
        }
      }
+    
+    // Some files seem to lie about their unicode status, which
+    //  is very very pesky. Try to work around these, but this
+    //  is getting on for black magic...
+    int mainEndsAt = cps.getMainDocumentEnd();
+    boolean matchAt = false;
+    boolean matchHalf = false;
+    for(int i=0; i<_sections.size(); i++) {
+       SEPX s = (SEPX)_sections.get(i);
+       if(s.getEnd() == mainEndsAt) {
+               matchAt = true;
+       } else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) {
+               matchHalf = true;
+       }
+    }
+    if(! matchAt && matchHalf) {
+       System.err.println("Your document seemed to be mostly unicode, but the section definition was in bytes! Trying anyway, but things may well go wrong!");
+        for(int i=0; i<_sections.size(); i++) {
+               SEPX s = (SEPX)_sections.get(i);
+            GenericPropertyNode node = sedPlex.getProperty(i);
+            
+               s.setStart( CPtoFC(node.getStart()) );
+               s.setEnd( CPtoFC(node.getEnd()) );
+        }
+    }
    }
  
    public void adjustForInsert(int listIndex, int length)
@@ -171,7 +205,7 @@ public class SectionTable
  
        // Line using Ryan's FCtoCP() conversion method -
        // unable to observe any effect on our testcases when using this code - piers
-      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStart()), FCtoCP(sepx.getEnd()), sed.toByteArray());
+      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray());
  
  
        plex.addProperty(property);
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java

index 7e856f1eeb5b0401309e075e2a8ad395628aad85..129603463282c536e727bc8961b7defcf0144812 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -25,6 +25,8 @@ import org.apache.poi.poifs.common.POIFSConstants;
  import java.io.IOException;
  import java.io.UnsupportedEncodingException;
  import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Hashtable;
  import java.util.Iterator;
  import java.util.List;
  
@@ -103,6 +105,15 @@ public class TextPieceTable
        // And now build the piece
        _textPieces.add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node.getStart()));
      }
+    
+    // In the interest of our sanity, now sort the text pieces
+    //  into order, if they're not already
+    TextPiece[] tp = (TextPiece[])
+       _textPieces.toArray(new TextPiece[_textPieces.size()]);
+    Arrays.sort(tp);
+    for(int i=0; i<tp.length; i++) {
+       _textPieces.set(i, tp[i]);
+    }
    }
  
    public int getCpMin()
@@ -123,9 +134,8 @@ public class TextPieceTable
     *  paragraph properties :(
     * @param cp The character offset to check about
     */
-  public boolean isUnicodeAt(int cp) {
+  public boolean isUnicodeAtCharOffset(int cp) {
           boolean lastWas = false;
-         int lastAt = 0;
           
           Iterator it = _textPieces.iterator();
           while(it.hasNext()) {
@@ -135,9 +145,37 @@ public class TextPieceTable
                           return tp.isUnicode();
                   }
                   // Otherwise keep track for the last one
-                 if(tp.getStart() > lastAt) {
-                         lastWas = tp.isUnicode();
+                 lastWas = tp.isUnicode();
+         }
+         
+         // If they ask off the end, just go with the last one...
+         return lastWas;
+  }
+  /**
+   * Is the text at the given byte offset
+   *  unicode, or plain old ascii?
+   * In a very evil fashion, you have to actually 
+   *  know this to make sense of character and
+   *  paragraph properties :(
+   * @param cp The character offset to check about
+   */
+  public boolean isUnicodeAtByteOffset(int bytePos) {
+         boolean lastWas = false;
+         int curByte = 0;
+         
+         Iterator it = _textPieces.iterator();
+         while(it.hasNext()) {
+                 TextPiece tp = (TextPiece)it.next();
+                 int nextByte = curByte + tp.bytesLength();
+                 
+                 // If the text piece covers the character, all good
+                 if(curByte <= bytePos && nextByte >= bytePos) {
+                         return tp.isUnicode();
                   }
+                 // Otherwise keep track for the last one
+                 lastWas = tp.isUnicode();
+                 // Move along
+                 curByte = nextByte;
           }
           
           // If they ask off the end, just go with the last one...
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java

index 6e230a5dd1c43f6550d575a440ec1e44482d8f02..0a145d5fd43130d68ca399d99a1f8a2b911605d1 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@@ -155,6 +155,8 @@ public class Range
      _characters = _doc.getCharacterTable().getTextRuns();
      _text = _doc.getTextTable().getTextPieces();
      _parent = new WeakReference(null);
+    
+    sanityCheckStartEnd();
    }
  
  
@@ -175,6 +177,8 @@ public class Range
      _characters = parent._characters;
      _text = parent._text;
      _parent = new WeakReference(parent);
+    
+    sanityCheckStartEnd();
    }
  
    /**
@@ -226,6 +230,22 @@ public class Range
          _textRangeFound = true;
          break;
      }
+    
+    sanityCheckStartEnd();
+  }
+  
+  /**
+   * Ensures that the start and end were were given
+   *  are actually valid, to avoid issues later on
+   *  if they're not
+   */
+  private void sanityCheckStartEnd() {
+         if(_start < 0) {
+                 throw new IllegalArgumentException("Range start must not be negative. Given " + _start);
+         }
+         if(_end < _start) {
+                 throw new IllegalArgumentException("The end (" + _end + ") must not be before the start ("+_start+")");
+         }
    }
  
    /**
@@ -537,13 +557,17 @@ public class Range
      for (int x = _parStart; x < numParagraphs; x++)
      {
        PAPX papx = (PAPX)_paragraphs.get(x);
+      //System.err.println("Paragraph " + x + " was " + papx.getStart() + " -> " + papx.getEnd());
        papx.adjustForDelete(_start, _end - _start);
+      //System.err.println("Paragraph " + x + " is now " + papx.getStart() + " -> " + papx.getEnd());
      }
  
      for (int x = _sectionStart; x < numSections; x++)
      {
        SEPX sepx = (SEPX)_sections.get(x);
+      //System.err.println("Section " + x + " was " + sepx.getStart() + " -> " + sepx.getEnd());
        sepx.adjustForDelete(_start, _end - _start);
+      //System.err.println("Section " + x + " is now " + sepx.getStart() + " -> " + sepx.getEnd());
      }
      
      for (int x = _textStart; x < numTextPieces; x++)
@@ -806,6 +830,10 @@ public class Range
      {
        throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range");
      }
+    if (tableEnd < 0)
+    {
+      throw new ArrayIndexOutOfBoundsException("The table's end is negative, which isn't allowed!");
+    }
      return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel());
    }
  
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java

index 0912daaf744edafcf61f1f83d2b42777d78b602d..2f5ad1ccbc78415d64df80cad0f773d382594101 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java
@@ -45,13 +45,15 @@ public class TestSectionTable
      byte[] tableStream = _hWPFDocFixture._tableStream;
      int fcMin = fib.getFcMin();
  
+    CPSplitCalculator cps = new CPSplitCalculator(fib);
+    
      ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.getFcClx(), fcMin);
      TextPieceTable tpt = cft.getTextPieceTable();
  
      SectionTable sectionTable = new SectionTable(mainStream, tableStream,
                                                   fib.getFcPlcfsed(),
                                                   fib.getLcbPlcfsed(),
-                                                 fcMin, tpt.getTextPieces());
+                                                 fcMin, tpt, cps);
      HWPFFileSystem fileSys = new HWPFFileSystem();
  
      sectionTable.writeTo(fileSys, 0);
@@ -61,7 +63,9 @@ public class TestSectionTable
      byte[] newTableStream = tableOut.toByteArray();
      byte[] newMainStream = mainOut.toByteArray();
  
-    SectionTable newSectionTable = new SectionTable(newMainStream, newTableStream, 0, newTableStream.length, 0, tpt.getTextPieces());
+    SectionTable newSectionTable = new SectionTable(
+               newMainStream, newTableStream, 0, 
+               newTableStream.length, 0, tpt, cps);
  
      ArrayList oldSections = sectionTable.getSections();
      ArrayList newSections = newSectionTable.getSections();
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java

index 764b3239dcd46c0759cf418c18f462c550402188..db28cbd45609294da86be9dce1b17f10799f2d02 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
@@ -81,9 +81,16 @@ public class TestProblems extends TestCase {
         HWPFDocument doc = new HWPFDocument(new FileInputStream(
                         new File(dirname, "Bug44292.doc")));
                 Range r = doc.getRange();
+               assertEquals(6, r.numParagraphs());
+               assertEquals(0, r.getStartOffset());
+               assertEquals(87, r.getEndOffset());
                         
-               //get the table
+               // Paragraph with table
                 Paragraph p = r.getParagraph(0);
+               assertEquals(0, p.getStartOffset());
+               assertEquals(20, p.getEndOffset());
+               
+               // Get the table
                 Table t = r.getTable(p);
                 
                 //get the only row
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java

index 4c7d7b92e804dc4240d487984da30703f7c9685d..4adc5b9cf8e08dddc6c32b622f8039bb2ac3a673 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
  import junit.framework.TestCase;
  
  import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.model.PAPX;
  
  /**
   *     Test to see if Range.delete() works even if the Range contains a
@@ -37,6 +38,8 @@ public class TestRangeDelete extends TestCase {
                 "${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
         private String originalText =
                 "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.  Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
+       private String lastText =
+               "Thank you, ${organization} ${delete}!\r";
         private String searchText = "${delete}";
         private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
         private String expectedText2 =
@@ -69,32 +72,60 @@ public class TestRangeDelete extends TestCase {
                 Range range;
                 Section section;
                 Paragraph para;
+               PAPX paraDef;
  
                 // First, check overall
                 range = daDoc.getOverallRange();
                 assertEquals(1, range.numSections());
-               assertEquals(4, range.numParagraphs());
+               assertEquals(5, range.numParagraphs());
                 
                 
                 // Now, onto just the doc bit
                 range = daDoc.getRange();
  
                 assertEquals(1, range.numSections());
+               assertEquals(1, daDoc.getSectionTable().getSections().size());
                 section = range.getSection(0);
-
-               assertEquals(4, section.numParagraphs());
+               
+               assertEquals(5, section.numParagraphs());
                 
                 para = section.getParagraph(0);
                 assertEquals(1, para.numCharacterRuns());
                 assertEquals(introText, para.text());
                 
                 para = section.getParagraph(1);
-               assertEquals(2, para.numCharacterRuns());
+               assertEquals(5, para.numCharacterRuns());
                 assertEquals(fillerText, para.text());
                 
+               
+               paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(2);
+               assertEquals(132, paraDef.getStart());
+               assertEquals(400, paraDef.getEnd());
+               
                 para = section.getParagraph(2);
-               assertEquals(6, para.numCharacterRuns());
+               assertEquals(5, para.numCharacterRuns());
                 assertEquals(originalText, para.text());
+               
+               
+               paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(3);
+               assertEquals(400, paraDef.getStart());
+               assertEquals(438, paraDef.getEnd());
+               
+               para = section.getParagraph(3);
+               assertEquals(1, para.numCharacterRuns());
+               assertEquals(lastText, para.text());
+               
+               
+               // Check things match on text length
+               assertEquals(439, range.text().length());
+               assertEquals(439, section.text().length());
+               assertEquals(439, 
+                               section.getParagraph(0).text().length() +
+                               section.getParagraph(1).text().length() +
+                               section.getParagraph(2).text().length() +
+                               section.getParagraph(3).text().length() +
+                               section.getParagraph(4).text().length()
+               );
         }
  
         /**
@@ -108,7 +139,7 @@ public class TestRangeDelete extends TestCase {
                 assertEquals(1, range.numSections());
  
                 Section section = range.getSection(0);
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
  
                 Paragraph para = section.getParagraph(2);
  
@@ -131,7 +162,7 @@ public class TestRangeDelete extends TestCase {
                 assertEquals(1, range.numSections());
                 section = range.getSection(0);
  
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
                 para = section.getParagraph(2);
  
                 text = para.text();
@@ -154,7 +185,7 @@ public class TestRangeDelete extends TestCase {
                 assertEquals(1, range.numSections());
  
                 Section section = range.getSection(0);
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
  
                 Paragraph para = section.getParagraph(2);
  
@@ -188,7 +219,7 @@ public class TestRangeDelete extends TestCase {
                 assertEquals(1, range.numSections());
                 section = range.getSection(0);
  
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
  
                 para = section.getParagraph(0);
                 text = para.text();
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java

index 69be319cf286c9a36f9fbd51a7d797ea847417ad..a520b953f7e3fbe1f79964a89e9f64edf749e8f5 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
@@ -71,14 +71,11 @@ public class TestRangeInsertion extends TestCase {
                 Paragraph para = section.getParagraph(2);
                 assertEquals(originalText, para.text());
  
-               assertEquals(6, para.numCharacterRuns());
+               assertEquals(3, para.numCharacterRuns());
                 String text = 
                         para.getCharacterRun(0).text() + 
                         para.getCharacterRun(1).text() +
-                       para.getCharacterRun(2).text() +
-                       para.getCharacterRun(3).text() +
-                       para.getCharacterRun(4).text() +
-                       para.getCharacterRun(5).text()
+                       para.getCharacterRun(2).text()
                 ;
  
                 assertEquals(originalText, text);
@@ -116,14 +113,11 @@ public class TestRangeInsertion extends TestCase {
                 Paragraph para = section.getParagraph(2);
                 assertEquals((textToInsert + originalText), para.text());
  
-               assertEquals(6, para.numCharacterRuns());
+               assertEquals(3, para.numCharacterRuns());
                 String text = 
                         para.getCharacterRun(0).text() + 
                         para.getCharacterRun(1).text() +
-                       para.getCharacterRun(2).text() +
-                       para.getCharacterRun(3).text() +
-                       para.getCharacterRun(4).text() +
-                       para.getCharacterRun(5).text()
+                       para.getCharacterRun(2).text()
                 ;
  
                 // System.out.println(text);
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java

index 59754fc21e8281e9b12affc0b4c87b9dbbba5cee..1578ebdaf00bac09dd670be34d5987519093e7ab 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
@@ -87,6 +87,16 @@ public class TestRangeProperties extends TestCase {
                                 r.text()
                 );
                 
+               assertEquals(1, r.numSections());
+               assertEquals(1, a.getSectionTable().getSections().size());
+               Section s = r.getSection(0);
+               assertEquals(
+                               a_page_1 +
+                               page_break + "\r" + 
+                               a_page_2,
+                               s.text()
+               );
+               
                 assertEquals(
                                 7,
                                 r.numParagraphs()
@@ -161,6 +171,20 @@ public class TestRangeProperties extends TestCase {
                 assertEquals(
                                 408, r.text().length()
                 );
+       
+               
+               assertEquals(1, r.numSections());
+               assertEquals(1, u.getSectionTable().getSections().size());
+               Section s = r.getSection(0);
+               assertEquals(
+                               u_page_1 +
+                               page_break + "\r" + 
+                               u_page_2,
+                               s.text()
+               );
+               assertEquals(0, s.getStartOffset());
+               assertEquals(408, s.getEndOffset());
+
                 
                 List pDefs = r._paragraphs;
                 assertEquals(35, pDefs.size());
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java

index 05dec843a127675d53134f936909f89ba9da859f..7c47668445ccc836038b43a474fd74ff98fa5263 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
@@ -66,21 +66,22 @@ public class TestRangeReplacement extends TestCase {
                 HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
  
                 Range range = daDoc.getRange();
+               assertEquals(414, range.text().length());
  
                 assertEquals(1, range.numSections());
                 Section section = range.getSection(0);
+               assertEquals(414, section.text().length());
  
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
                 Paragraph para = section.getParagraph(2);
  
-               assertEquals(6, para.numCharacterRuns());
+               assertEquals(5, para.numCharacterRuns());
                 String text = 
                         para.getCharacterRun(0).text() + 
                         para.getCharacterRun(1).text() +
                         para.getCharacterRun(2).text() +
                         para.getCharacterRun(3).text() +
-                       para.getCharacterRun(4).text() +
-                       para.getCharacterRun(5).text()
+                       para.getCharacterRun(4).text()
                 ;
  
                 assertEquals(originalText, text);
@@ -97,7 +98,7 @@ public class TestRangeReplacement extends TestCase {
                 assertEquals(1, range.numSections());
  
                 Section section = range.getSection(0);
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
  
                 Paragraph para = section.getParagraph(2);
  
@@ -130,7 +131,7 @@ public class TestRangeReplacement extends TestCase {
                 assertEquals(1, range.numSections());
  
                 Section section = range.getSection(0);
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
  
                 Paragraph para = section.getParagraph(2);
  
@@ -141,7 +142,7 @@ public class TestRangeReplacement extends TestCase {
  
                 assertEquals(1, range.numSections());
                 section = range.getSection(0);
-               assertEquals(4, section.numParagraphs());
+               assertEquals(5, section.numParagraphs());
  
                 para = section.getParagraph(2);
                 text = para.text();
author	Nick Burch <nick@apache.org>
	Mon, 11 Aug 2008 23:42:39 +0000 (23:42 +0000)
committer	Nick Burch <nick@apache.org>
	Mon, 11 Aug 2008 23:42:39 +0000 (23:42 +0000)
src/documentation/content/xdocs/changes.xml		patch \| blob \| history
src/documentation/content/xdocs/status.xml		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java		patch \| blob \| history