CHPXs and PAPXs are apparently cp based, but are really byte based! Work around this

author Nick Burch <nick@apache.org>

Mon, 11 Aug 2008 21:25:17 +0000 (21:25 +0000)

committer Nick Burch <nick@apache.org>

Mon, 11 Aug 2008 21:25:17 +0000 (21:25 +0000)
author Nick Burch <nick@apache.org>
Mon, 11 Aug 2008 21:25:17 +0000 (21:25 +0000)
committer Nick Burch <nick@apache.org>
Mon, 11 Aug 2008 21:25:17 +0000 (21:25 +0000)
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java

index f06786b1fc9a7a210d192e0452e9120fed669d5c..ab32cb05a1b5aa3baa505904c1e8c60823d6072f 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
@@ -219,24 +219,25 @@ public class HWPFDocument extends POIDocument
          _dataStream = new byte[0];
      }
  
-    // get the start of text in the main stream
-    int fcMin = _fib.getFcMin();
+    // Get the cp of the start of text in the main stream
+    // The latest spec doc says this is always zero!
+    int fcMin = 0;
+    //fcMin = _fib.getFcMin() 
  
-    // load up our standard structures.
+    // Start to load up our standard structures.
      _dop = new DocumentProperties(_tableStream, _fib.getFcDop());
      _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
      _tpt = _cft.getTextPieceTable();
-    _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin);
-    _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin);
-
-    // Word XP puts in a zero filled buffer in front of the text and it screws
-    // up my system for offsets. This is an adjustment.
+    
+    // Word XP and later all put in a zero filled buffer in
+    //  front of the text. This screws up the system for offsets,
+    //  which assume we always start at zero. This is an adjustment.
      int cpMin = _tpt.getCpMin();
-    if (cpMin > 0)
-    {
-      _cbt.adjustForDelete(0, 0, cpMin);
-      _pbt.adjustForDelete(0, 0, cpMin);
-    }
+    
+    // Now load the rest of the properties, which need to be adjusted
+    //  for where text really begin
+    _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), cpMin, _tpt);
+    _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt);
      
      // Read FSPA and Escher information
      _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java

new file mode 100644 (file)

index 0000000..c1b5928
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
@@ -0,0 +1,59 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.model;
+
+/**
+ * Normally PropertyNodes only ever work in characters, but
+ *  a few cases actually store bytes, and this lets everything
+ *  still work despite that.
+ * It handles the conversion as required between bytes
+ *  and characters.
+ */
+public abstract class BytePropertyNode extends PropertyNode {
+       private boolean isUnicode;
+       
+       /**
+        * @param fcStart The start of the text for this property, in _bytes_
+        * @param fcEnd The end of the text for this property, in _bytes_
+        */
+       public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {
+               super(
+                               generateCp(fcStart, isUnicode),
+                               generateCp(fcEnd, isUnicode),
+                               buf
+               );
+       }
+       private static int generateCp(int val, boolean isUnicode) {
+               if(isUnicode)
+                       return val/2;
+               return val;
+       }
+       
+       public boolean isUnicode() {
+               return isUnicode;
+       }
+       public int getStartBytes() {
+               if(isUnicode)
+                       return getStart()*2;
+               return getStart();
+       }
+       public int getEndBytes() {
+               if(isUnicode)
+                       return getEnd()*2;
+               return getEnd();
+       }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java

index 48c5a9d8b4865827f02cc7754e61215605b02086..69e0a67d9e63658e902ee2011c87f9d2ef461b48 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
@@ -37,6 +37,8 @@ public class CHPBinTable
    /** List of character properties.*/
    protected ArrayList _textRuns = new ArrayList();
  
+  /** So we can know if things are unicode or not */
+  private TextPieceTable tpt;
  
    public CHPBinTable()
    {
@@ -52,9 +54,10 @@ public class CHPBinTable
     * @param fcMin
     */
    public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset,
-                     int size, int fcMin)
+                     int size, int fcMin, TextPieceTable tpt)
    {
      PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
+    this.tpt = tpt;
  
      int length = binTable.length();
      for (int x = 0; x < length; x++)
@@ -65,7 +68,7 @@ public class CHPBinTable
        int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
  
        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
-        pageOffset, fcMin);
+        pageOffset, fcMin, tpt);
  
        int fkpSize = cfkp.size();
  
@@ -116,7 +119,14 @@ public class CHPBinTable
  
    public void insert(int listIndex, int cpStart, SprmBuffer buf)
    {
-    CHPX insertChpx = new CHPX(cpStart, cpStart, buf);
+       boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
+         
+    CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
+    
+    // Ensure character offsets are really characters
+    insertChpx.setStart(cpStart);
+    insertChpx.setEnd(cpStart);
+    
      if (listIndex == _textRuns.size())
      {
        _textRuns.add(insertChpx);
@@ -126,7 +136,16 @@ public class CHPBinTable
        CHPX chpx = (CHPX)_textRuns.get(listIndex);
        if (chpx.getStart() < cpStart)
        {
-        CHPX clone = new CHPX(cpStart, chpx.getEnd(), chpx.getSprmBuf());
+       // Copy the properties of the one before to afterwards
+       // Will go:
+       //  Original, until insert at point
+       //  New one
+       //  Clone of original, on to the old end
+        CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);
+        // Again ensure contains character based offsets no matter what
+        clone.setStart(cpStart);
+        clone.setEnd(chpx.getEnd());
+        
          chpx.setEnd(cpStart);
  
          _textRuns.add(listIndex + 1, insertChpx);
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java

index cd1a8c24f0916dfec641bf17f635fc6cfbca31ed..d5fb602b8e713b2aea9295d68271030d61fae88e 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
@@ -55,13 +55,14 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
       * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
       * read from a Word file).
       */
-    public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin)
+    public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt)
      {
        super(documentStream, offset);
  
        for (int x = 0; x < _crun; x++)
        {
-        _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x)));
+       boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
+        _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
        }
      }
  
@@ -157,7 +158,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
          chpx = (CHPX)_chpxList.get(x);
          byte[] grpprl = chpx.getGrpprl();
  
-        LittleEndian.putInt(buf, fcOffset, chpx.getStart() + fcMin);
+        LittleEndian.putInt(buf, fcOffset, chpx.getStartBytes() + fcMin);
          grpprlOffset -= (1 + grpprl.length);
          grpprlOffset -= (grpprlOffset % 2);
          buf[offsetOffset] = (byte)(grpprlOffset/2);
@@ -168,7 +169,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
          fcOffset += FC_SIZE;
        }
        // put the last chpx's end in
-      LittleEndian.putInt(buf, fcOffset, chpx.getEnd() + fcMin);
+      LittleEndian.putInt(buf, fcOffset, chpx.getEndBytes() + fcMin);
        return buf;
      }
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java

index 3e7b5b11fb6506018a883f837d7c6ab2e94f6ced..a89036c73357fe48ffac9df43435954f6da9ecbb 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
@@ -25,22 +25,26 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
  import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor;
  
  /**
- * Comment me
+ * DANGER - works in bytes!
+ * 
+ * Make sure you call getStart() / getEnd() when you want characters
+ *  (normal use), but getStartByte() / getEndByte() when you're 
+ *  reading in / writing out!
   *
   * @author Ryan Ackley
   */
  
-public class CHPX extends PropertyNode
+public class CHPX extends BytePropertyNode
  {
  
-  public CHPX(int fcStart, int fcEnd, byte[] grpprl)
+  public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)
    {
-    super(fcStart, fcEnd, new SprmBuffer(grpprl));
+    super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);
    }
  
-  public CHPX(int fcStart, int fcEnd, SprmBuffer buf)
+  public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)
    {
-    super(fcStart, fcEnd, buf);
+    super(fcStart, fcEnd, buf, isUnicode);
    }
  
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java

index 6f141d7612729b22389f12a9dd3c6bf283d4b1b9..cde563ec080b3d02860f712ad18f00c0c643b6c5 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
@@ -39,14 +39,18 @@ public class PAPBinTable
    protected ArrayList _paragraphs = new ArrayList();
    byte[] _dataStream;
  
+  /** So we can know if things are unicode or not */
+  private TextPieceTable tpt;
+
    public PAPBinTable()
    {
    }
  
    public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset,
-                     int size, int fcMin)
+                     int size, int fcMin, TextPieceTable tpt)
    {
      PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
+    this.tpt = tpt;
  
      int length = binTable.length();
      for (int x = 0; x < length; x++)
@@ -57,13 +61,14 @@ public class PAPBinTable
        int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
  
        PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
-        dataStream, pageOffset, fcMin);
+        dataStream, pageOffset, fcMin, tpt);
  
        int fkpSize = pfkp.size();
  
        for (int y = 0; y < fkpSize; y++)
        {
-        _paragraphs.add(pfkp.getPAPX(y));
+       PAPX papx = pfkp.getPAPX(y);
+        _paragraphs.add(papx);
        }
      }
      _dataStream = dataStream;
@@ -71,7 +76,14 @@ public class PAPBinTable
  
    public void insert(int listIndex, int cpStart, SprmBuffer buf)
    {
-    PAPX forInsert = new PAPX(cpStart, cpStart, buf, _dataStream);
+    boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
+    
+    PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
+    
+    // Ensure character offsets are really characters
+    forInsert.setStart(cpStart);
+    forInsert.setEnd(cpStart);
+    
      if (listIndex == _paragraphs.size())
      {
         _paragraphs.add(forInsert);
@@ -90,10 +102,21 @@ public class PAPBinTable
          {
            exc.printStackTrace();
          }
+        
+       // Copy the properties of the one before to afterwards
+       // Will go:
+       //  Original, until insert at point
+       //  New one
+       //  Clone of original, on to the old end
+        PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode);
+        // Again ensure contains character based offsets no matter what
+        clone.setStart(cpStart);
+        clone.setEnd(currentPap.getEnd());
+        
          currentPap.setEnd(cpStart);
-        PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf, _dataStream);
-        _paragraphs.add(++listIndex, forInsert);
-        _paragraphs.add(++listIndex, splitPap);
+
+        _paragraphs.add(listIndex + 1, forInsert);
+        _paragraphs.add(listIndex + 2, clone);
        }
        else
        {
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java

index 979825bf84aae65637650a00168011fd96e8311f..20f9b63b988d9e707108f049a3c590171644a333 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
@@ -60,13 +60,14 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
      /**
       * Creates a PAPFormattedDiskPage from a 512 byte array
       */
-    public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin)
+    public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
      {
        super(documentStream, offset);
  
        for (int x = 0; x < _crun; x++)
        {
-        _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream));
+       boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
+        _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
        }
        _fkp = null;
        _dataStream = dataStream;
@@ -110,7 +111,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
      }
  
      /**
-     * Gets the papx for the paragraph at index in this fkp.
+     * Gets the papx grpprl for the paragraph at index in this fkp.
       *
       * @param index The index of the papx to get.
       * @return a papx grpprl.
@@ -259,7 +260,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
            grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
            grpprlOffset -= (grpprlOffset % 2);
          }
-        LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin);
+        LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin);
          buf[bxOffset] = (byte)(grpprlOffset/2);
          System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
  
@@ -287,7 +288,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
  
        }
  
-      LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin);
+      LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
        return buf;
      }
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java

index a7e259ec73f452fb157ad92c38562759eb21abc0..1e8ae86b526b4399ad12752391b13ebb43ddea78 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
@@ -29,29 +29,32 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
  import org.apache.poi.hwpf.sprm.SprmOperation;
  
  /**
- * Comment me
+ * DANGER - works in bytes!
+ * 
+ * Make sure you call getStart() / getEnd() when you want characters
+ *  (normal use), but getStartByte() / getEndByte() when you're 
+ *  reading in / writing out!
   *
   * @author Ryan Ackley
   */
  
-public class PAPX extends PropertyNode
-{
+public class PAPX extends BytePropertyNode {
  
    private ParagraphHeight _phe;
    private int _hugeGrpprlOffset = -1;
  
-  public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream)
+  public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)
    {
-    super(fcStart, fcEnd, new SprmBuffer(papx));
+    super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);
      _phe = phe;
      SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
      if(buf != null)
        _buf = buf;
    }
  
-  public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream)
+  public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)
    {
-    super(fcStart, fcEnd, buf);
+    super(fcStart, fcEnd, buf, isUnicode);
      _phe = new ParagraphHeight();
      buf = findHuge(buf, dataStream);
      if(buf != null)
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java

index 42c5f5c27eb02ade48c9506105365527e7179450..5ae16aa5e311cd1467a9fc886d62f5bee8d345bb 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
@@ -22,7 +22,10 @@ import java.util.Arrays;
  
  /**
   * Represents a lightweight node in the Trees used to store content
- * properties. Works only in characters.
+ *  properties.
+ * This only ever works in characters. For the few odd cases when
+ *  the start and end aren't in characters (eg PAPX and CHPX), use
+ *  {@link BytePropertyNode} between you and this.
   *
   * @author Ryan Ackley
   */
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java

index 5e903ecb8ae70793dc2f71dd472a20ecdac82c39..7e856f1eeb5b0401309e075e2a8ad395628aad85 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -25,6 +25,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
  import java.io.IOException;
  import java.io.UnsupportedEncodingException;
  import java.util.ArrayList;
+import java.util.Iterator;
  import java.util.List;
  
  /**
@@ -62,8 +63,17 @@ public class TextPieceTable
        pieces[x] = new PieceDescriptor(node.getBytes(), 0);
      }
  
-    int firstPieceFilePosition = pieces[0].getFilePosition();
-    _cpMin = firstPieceFilePosition - fcMin;
+    
+    // Figure out the cp of the earliest text piece
+    // Note that text pieces don't have to be stored in order!
+    _cpMin = pieces[0].getFilePosition() - fcMin;
+    for (int x = 0; x < pieces.length; x++) {
+       int start = pieces[x].getFilePosition() - fcMin;
+       if(start < _cpMin) {
+               _cpMin = start;
+       }
+    }
+
  
      // using the PieceDescriptors, build our list of TextPieces.
      for (int x = 0; x < pieces.length; x++)
@@ -104,6 +114,35 @@ public class TextPieceTable
    {
      return _textPieces;
    }
+  
+  /**
+   * Is the text at the given Character offset
+   *  unicode, or plain old ascii?
+   * In a very evil fashion, you have to actually 
+   *  know this to make sense of character and
+   *  paragraph properties :(
+   * @param cp The character offset to check about
+   */
+  public boolean isUnicodeAt(int cp) {
+         boolean lastWas = false;
+         int lastAt = 0;
+         
+         Iterator it = _textPieces.iterator();
+         while(it.hasNext()) {
+                 TextPiece tp = (TextPiece)it.next();
+                 // If the text piece covers the character, all good
+                 if(tp.getStart() <= cp && tp.getEnd() >= cp) {
+                         return tp.isUnicode();
+                 }
+                 // Otherwise keep track for the last one
+                 if(tp.getStart() > lastAt) {
+                         lastWas = tp.isUnicode();
+                 }
+         }
+         
+         // If they ask off the end, just go with the last one...
+         return lastWas;
+  }
  
    public byte[] writeTo(HWPFOutputStream docStream)
      throws IOException
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java b/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java

index 6615823281e10f71396285ac0f677ae0200797e5..63961b455222c33dc76fe785a8e11745c332ec29 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java
@@ -70,10 +70,10 @@ public abstract class FIBAbstractType
          private static BitField  fFutureSavedUndo = BitFieldFactory.getInstance(0x0008);
          private static BitField  fWord97Saved = BitFieldFactory.getInstance(0x0010);
          private static BitField  fSpare0 = BitFieldFactory.getInstance(0x00FE);
-    protected  int field_11_chs;
-    protected  int field_12_chsTables;
-    protected  int field_13_fcMin;
-    protected  int field_14_fcMac;
+    protected  int field_11_chs;       /** Latest docs say this is Reserved3! */
+    protected  int field_12_chsTables; /** Latest docs say this is Reserved4! */
+    protected  int field_13_fcMin;     /** Latest docs say this is Reserved5! */
+    protected  int field_14_fcMac;     /** Latest docs say this is Reserved6! */
  
  
      public FIBAbstractType()
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java

index 07e8bfbf910e0b4ec369f3aa1d6b398b0cfdb003..d1f1451acd34a52794cc624ce09d0e1b97996024 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
@@ -32,6 +32,8 @@ public class TestCHPBinTable
  {
    private CHPBinTable _cHPBinTable = null;
    private HWPFDocFixture _hWPFDocFixture;
+  
+  private TextPieceTable fakeTPT = new TextPieceTable();
  
    public TestCHPBinTable(String name)
    {
@@ -46,7 +48,7 @@ public class TestCHPBinTable
      byte[] tableStream = _hWPFDocFixture._tableStream;
      int fcMin = fib.getFcMin();
  
-    _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin);
+    _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin, fakeTPT);
  
      HWPFFileSystem fileSys = new HWPFFileSystem();
  
@@ -57,7 +59,7 @@ public class TestCHPBinTable
      byte[] newTableStream = tableOut.toByteArray();
      byte[] newMainStream = mainOut.toByteArray();
  
-    CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0);
+    CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0, fakeTPT);
  
      ArrayList oldTextRuns = _cHPBinTable._textRuns;
      ArrayList newTextRuns = newBinTable._textRuns;
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java

index 4358cdef0973e35b4bbf901995cf08503851694a..446b5232a5a203fc358a4476db344a09860f84c2 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java
@@ -32,6 +32,8 @@ public class TestPAPBinTable
    private PAPBinTable _pAPBinTable = null;
    private HWPFDocFixture _hWPFDocFixture;
  
+  private TextPieceTable fakeTPT = new TextPieceTable();
+
    public TestPAPBinTable(String name)
    {
      super(name);
@@ -45,7 +47,7 @@ public class TestPAPBinTable
      byte[] tableStream = _hWPFDocFixture._tableStream;
      int fcMin = fib.getFcMin();
  
-    _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin);
+    _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT);
  
      HWPFFileSystem fileSys = new HWPFFileSystem();
  
@@ -56,7 +58,7 @@ public class TestPAPBinTable
      byte[] newTableStream = tableOut.toByteArray();
      byte[] newMainStream = mainOut.toByteArray();
  
-    PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0);
+    PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT);
  
      ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
      ArrayList newTextRuns = newBinTable.getParagraphs();
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java

index 2994b6332d32e513b59667676164a9078e58bff8..7cbd75d6b869544b1ed31cb8c22118852fd79266 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
@@ -18,23 +18,19 @@
  
  package org.apache.poi.hwpf.usermodel;
  
-import java.io.ByteArrayOutputStream;
  import java.io.FileInputStream;
-import java.util.List;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.model.PicturesTable;
-import org.apache.poi.hwpf.usermodel.Picture;
  
  import junit.framework.TestCase;
  
+import org.apache.poi.hwpf.HWPFDocument;
+
  /**
   *     Test to see if Range.delete() works even if the Range contains a
   *     CharacterRun that uses Unicode characters.
   *
   * TODO - re-enable me when unicode paragraph stuff is fixed!
   */
-public abstract class TestRangeDelete extends TestCase {
+public class TestRangeDelete extends TestCase {
  
         // u201c and u201d are "smart-quotes"
         private String originalText =
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java

index 5f21508c9a3cea952ac02e94e4b587d6ea004c38..b4d7470387bb8f6fa63c8fc20ee6dd76ec399bcb 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
@@ -18,23 +18,19 @@
  
  package org.apache.poi.hwpf.usermodel;
  
-import java.io.ByteArrayOutputStream;
  import java.io.FileInputStream;
-import java.util.List;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.model.PicturesTable;
-import org.apache.poi.hwpf.usermodel.Picture;
  
  import junit.framework.TestCase;
  
+import org.apache.poi.hwpf.HWPFDocument;
+
  /**
   *     Test to see if Range.insertBefore() works even if the Range contains a
   *     CharacterRun that uses Unicode characters.
   *
   * TODO - re-enable me when unicode paragraph stuff is fixed!
   */
-public abstract class TestRangeInsertion extends TestCase {
+public class TestRangeInsertion extends TestCase {
  
         // u201c and u201d are "smart-quotes"
         private String originalText =
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java

index 1f0aad5aa5d9afd4ced4f807cfa8f517cc5cb1c7..f8a251b693fc1b83ced2fbff1c09206c4255de7d 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
@@ -18,8 +18,10 @@ package org.apache.poi.hwpf.usermodel;
  
  import java.io.File;
  import java.io.FileInputStream;
+import java.util.List;
  
  import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.model.PropertyNode;
  
  import junit.framework.TestCase;
  
@@ -30,7 +32,7 @@ import junit.framework.TestCase;
   *
   * TODO - re-enable me when unicode paragraph stuff is fixed!
   */
-public abstract class TestRangeProperties extends TestCase {
+public class TestRangeProperties extends TestCase {
         private static final char page_break = (char)12;
         
         private static final String u_page_1 =
@@ -142,8 +144,88 @@ public abstract class TestRangeProperties extends TestCase {
                 assertEquals(22, c1.getFontSize());
                 assertEquals(32, c7.getFontSize());
         }
-       
  
+       /**
+        * Tests the raw definitions of the paragraphs of
+        *  a unicode document
+        */
+       public void testUnicodeParagraphDefinitions() throws Exception {
+               Range r = u.getRange();
+               String[] p1_parts = u_page_1.split("\r");
+               String[] p2_parts = u_page_2.split("\r");
+               
+               assertEquals(
+                               u_page_1 + page_break + "\r" + u_page_2,
+                               r.text()
+               );
+               assertEquals(
+                               408, r.text().length()
+               );
+               
+               List pDefs = r._paragraphs;
+               assertEquals(35, pDefs.size());
+               
+               // Check that the last paragraph ends where it should do
+               assertEquals(531, u.getOverallRange().text().length());
+               assertEquals(530, u.getCPSplitCalculator().getHeaderTextboxEnd());
+               PropertyNode pLast = (PropertyNode)pDefs.get(34);
+//             assertEquals(530, pLast.getEnd());
+               
+               // Only care about the first few really though
+               PropertyNode p0 = (PropertyNode)pDefs.get(0);
+               PropertyNode p1 = (PropertyNode)pDefs.get(1);
+               PropertyNode p2 = (PropertyNode)pDefs.get(2);
+               PropertyNode p3 = (PropertyNode)pDefs.get(3);
+               PropertyNode p4 = (PropertyNode)pDefs.get(4);
+               
+               // 5 paragraphs should get us to the end of our text
+               assertTrue(p0.getStart() < 408);
+               assertTrue(p0.getEnd() < 408);
+               assertTrue(p1.getStart() < 408);
+               assertTrue(p1.getEnd() < 408);
+               assertTrue(p2.getStart() < 408);
+               assertTrue(p2.getEnd() < 408);
+               assertTrue(p3.getStart() < 408);
+               assertTrue(p3.getEnd() < 408);
+               assertTrue(p4.getStart() < 408);
+               assertTrue(p4.getEnd() < 408);
+               
+               // Paragraphs should match with lines
+               assertEquals(
+                               0,
+                               p0.getStart()
+               );
+               assertEquals(
+                               p1_parts[0].length() + 1,
+                               p0.getEnd()
+               );
+               
+               assertEquals(
+                               p1_parts[0].length() + 1,
+                               p1.getStart()
+               );              
+               assertEquals(
+                               p1_parts[0].length() + 1 +
+                               p1_parts[1].length() + 1,
+                               p1.getEnd()
+               );
+               
+               assertEquals(
+                               p1_parts[0].length() + 1 +
+                               p1_parts[1].length() + 1,
+                               p2.getStart()
+               );
+               assertEquals(
+                               p1_parts[0].length() + 1 +
+                               p1_parts[1].length() + 1 +
+                               p1_parts[2].length() + 1,
+                               p2.getEnd()
+               );
+       }
+
+       /**
+        * Tests the paragraph text of a unicode document
+        */
         public void testUnicodeTextParagraphs() throws Exception {
                 Range r = u.getRange();
                 assertEquals(
@@ -154,14 +236,25 @@ public abstract class TestRangeProperties extends TestCase {
                 );
                 
                 assertEquals(
-                               5,
+                               12,
                                 r.numParagraphs()
                 );
                 String[] p1_parts = u_page_1.split("\r");
                 String[] p2_parts = u_page_2.split("\r");
                 
-               System.out.println(r.getParagraph(2).text());
-               // TODO
+               // Check text all matches up properly
+               assertEquals(p1_parts[0] + "\r", r.getParagraph(0).text());
+               assertEquals(p1_parts[1] + "\r", r.getParagraph(1).text());
+               assertEquals(p1_parts[2] + "\r", r.getParagraph(2).text());
+               assertEquals(p1_parts[3] + "\r", r.getParagraph(3).text());
+               assertEquals(p1_parts[4] + "\r", r.getParagraph(4).text());
+               assertEquals(p1_parts[5] + "\r", r.getParagraph(5).text());
+               assertEquals(p1_parts[6] + "\r", r.getParagraph(6).text());
+               assertEquals(p1_parts[7] + "\r", r.getParagraph(7).text());
+               assertEquals(p1_parts[8] + "\r", r.getParagraph(8).text());
+               assertEquals(p1_parts[9] + "\r", r.getParagraph(9).text());
+               assertEquals(page_break + "\r", r.getParagraph(10).text());
+               assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text());
         }
         public void testUnicodeStyling() throws Exception {
                 // TODO
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java

index 67aea65be996f616ed2dad316503a7310b01ac63..a342fdfd7b0f053949002e426bd5cd16012ea6f9 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
@@ -18,23 +18,19 @@
  
  package org.apache.poi.hwpf.usermodel;
  
-import java.io.ByteArrayOutputStream;
  import java.io.FileInputStream;
-import java.util.List;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.model.PicturesTable;
-import org.apache.poi.hwpf.usermodel.Picture;
  
  import junit.framework.TestCase;
  
+import org.apache.poi.hwpf.HWPFDocument;
+
  /**
   *     Test to see if Range.replaceText() works even if the Range contains a
   *     CharacterRun that uses Unicode characters.
   *
   * TODO - re-enable me when unicode paragraph stuff is fixed!
   */
-public abstract class TestRangeReplacement extends TestCase {
+public class TestRangeReplacement extends TestCase {
  
         // u201c and u201d are "smart-quotes"
         private String originalText =
author	Nick Burch <nick@apache.org>
	Mon, 11 Aug 2008 21:25:17 +0000 (21:25 +0000)
committer	Nick Burch <nick@apache.org>
	Mon, 11 Aug 2008 21:25:17 +0000 (21:25 +0000)
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java	[new file with mode: 0644]	patch \| blob
src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java		patch \| blob \| history