More fixes for auto-saved documents

author Maxim Valyanskiy <maxcom@apache.org>

Mon, 26 Jul 2010 14:04:27 +0000 (14:04 +0000)

committer Maxim Valyanskiy <maxcom@apache.org>

Mon, 26 Jul 2010 14:04:27 +0000 (14:04 +0000)
author Maxim Valyanskiy <maxcom@apache.org>
Mon, 26 Jul 2010 14:04:27 +0000 (14:04 +0000)
committer Maxim Valyanskiy <maxcom@apache.org>
Mon, 26 Jul 2010 14:04:27 +0000 (14:04 +0000)
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java

index 628fb75d14746e784eb8d99eb2ae2808f97d675a..10bb77d85126f2be3c8fb911fc2f0055d17400f4 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
@@ -62,7 +62,12 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
        {
         int startAt = getStart(x);
                 int endAt = getEnd(x);
-               _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
+
+        if (!tpt.isIndexInTable(startAt) && !tpt.isIndexInTable(endAt)) {
+            _chpxList.add(null);
+        } else {
+                   _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
+        }
        }
      }
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java

index f56621afad5f425e7ef0f876cf7891e25cce3a07..b7c4db7e9976b8a63cc5edc095e53574f8aefb98 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
@@ -36,12 +36,12 @@ public final class CHPX extends BytePropertyNode
  
    public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
    {
-    super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
+    super(fcStart, translator.lookIndexBackward(fcEnd), translator, new SprmBuffer(grpprl));
    }
  
    public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
    {
-    super(fcStart, fcEnd, translator ,buf);
+    super(fcStart, translator.lookIndexBackward(fcEnd), translator ,buf);
    }
  
  
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java

index d2cc0ebb4dd67b5699539f329ae20e6bbebb4624..cda2fb26a5852a88633cdccf5b4c7880c65cea42 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
@@ -18,9 +18,9 @@
  package org.apache.poi.hwpf.model;\r
  \r
  public interface CharIndexTranslator {\r
-\r
      /**\r
       * Calculates the char index of the given byte index.\r
+     * Look forward if index is not in table\r
       *\r
       * @param bytePos The character offset to check \r
       * @return the char index\r
@@ -28,13 +28,28 @@ public interface CharIndexTranslator {
      int getCharIndex(int bytePos);\r
  \r
      /**\r
-     * Is the text at the given byte offset unicode, or plain old ascii? In a\r
-     * very evil fashion, you have to actually know this to make sense of\r
-     * character and paragraph properties :(\r
+     * Check if index is in table\r
+     *\r
+     * @param bytePos\r
+     * @return true if index in table, false if not\r
+     */\r
+\r
+    boolean isIndexInTable(int bytePos);\r
+\r
+    /**\r
+     * Return first index >= bytePos that is in table\r
+     *\r
+     * @param bytePos\r
+     * @return\r
+     */\r
+    public int lookIndexForward(int bytePos);\r
+\r
+    /**\r
+     * Return last index <= bytePos that is in table\r
       *\r
-     * @param bytePos The character offset to check about\r
-     * @return true if the text at the given byte offset is unicode\r
+     * @param bytePos\r
+     * @return\r
       */\r
-    boolean isUnicodeAtByteOffset(int bytePos);\r
+    public int lookIndexBackward(int bytePos);\r
  \r
  }\r
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java

index 64046eadcb057d99b5f79b53768051f071956151..1ffcaaa6c3086abef1684f6ba9e87af012602ec7 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
@@ -196,7 +196,11 @@ public final class PicturesTable
      Range range = _document.getOverallRange();
      for (int i = 0; i < range.numCharacterRuns(); i++) {
         CharacterRun run = range.getCharacterRun(i);
-       String text = run.text();
+
+        if (run==null) {
+            continue;
+        }
+
         Picture picture = extractPicture(run, false);
         if (picture != null) {
                 pictures.add(picture);
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java

index 11a5b372ee4f0029ea32df756562f1c162c23a3c..cfff0b29378634c6046d5b88e3f899461e08b53a 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -33,7 +33,7 @@ import java.util.List;
   *
   * @author Ryan Ackley
   */
-public final class TextPieceTable implements CharIndexTranslator {
+public class TextPieceTable implements CharIndexTranslator {
         protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
      protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
         // int _multiple;
@@ -118,51 +118,6 @@ public final class TextPieceTable implements CharIndexTranslator {
          Collections.sort(_textPiecesFCOrder, new FCComparator());
      }
  
-       /**
-        * Is the text at the given Character offset unicode, or plain old ascii? In
-        * a very evil fashion, you have to actually know this to make sense of
-        * character and paragraph properties :(
-        *
-        * @param cp
-        *            The character offset to check about
-        */
-       public boolean isUnicodeAtCharOffset(int cp) {
-               boolean lastWas = false;
-
-               for(TextPiece tp : _textPieces) {
-                       // If the text piece covers the character, all good
-                       if (tp.getStart() <= cp && tp.getEnd() >= cp) {
-                               return tp.isUnicode();
-                       }
-                       // Otherwise keep track for the last one
-                       lastWas = tp.isUnicode();
-               }
-
-               // If they ask off the end, just go with the last one...
-               return lastWas;
-       }
-
-       public boolean isUnicodeAtByteOffset(int bytePos) {
-               boolean lastWas = false;
-
-        for(TextPiece tp : _textPieces) {
-                       int curByte = tp.getPieceDescriptor().getFilePosition();
-                       int pieceEnd = curByte + tp.bytesLength();
-
-                       // If the text piece covers the character, all good
-                       if (curByte <= bytePos && pieceEnd > bytePos) {
-                               return tp.isUnicode();
-                       }
-                       // Otherwise keep track for the last one
-                       lastWas = tp.isUnicode();
-                       // Move along
-                       curByte = pieceEnd;
-               }
-
-               // If they ask off the end, just go with the last one...
-               return lastWas;
-       }
-
         public byte[] writeTo(HWPFOutputStream docStream) throws IOException {
  
                 PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes());
@@ -245,19 +200,7 @@ public final class TextPieceTable implements CharIndexTranslator {
      public int getCharIndex(int bytePos) {
          int charCount = 0;
  
-        for(TextPiece tp : _textPiecesFCOrder) {
-                       int pieceStart = tp.getPieceDescriptor().getFilePosition();
-
-            if (bytePos > pieceStart + tp.bytesLength()) {
-                continue;
-            }
-
-                       if (pieceStart > bytePos) {
-                               bytePos = pieceStart;
-                       }
-
-            break;
-        }
+        bytePos = lookIndexForward(bytePos);
  
          for(TextPiece tp : _textPieces) {
              int pieceStart = tp.getPieceDescriptor().getFilePosition();
@@ -287,6 +230,62 @@ public final class TextPieceTable implements CharIndexTranslator {
          return charCount;
      }
  
+    public int lookIndexForward(int bytePos) {
+        for(TextPiece tp : _textPiecesFCOrder) {
+                       int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+            if (bytePos > pieceStart + tp.bytesLength()) {
+                continue;
+            }
+
+                       if (pieceStart > bytePos) {
+                               bytePos = pieceStart;
+                       }
+
+            break;
+        }
+        return bytePos;
+    }
+
+    public int lookIndexBackward(int bytePos) {
+        int lastEnd = 0;
+
+        for(TextPiece tp : _textPiecesFCOrder) {
+                       int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+            if (bytePos > pieceStart + tp.bytesLength()) {
+                lastEnd = pieceStart + tp.bytesLength();
+                continue;
+            }
+
+                       if (pieceStart > bytePos) {
+                               bytePos = lastEnd;
+                       }
+
+            break;
+        }
+
+        return bytePos;
+    }
+
+    public boolean isIndexInTable(int bytePos) {
+        for(TextPiece tp : _textPiecesFCOrder) {
+                       int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+            if (bytePos > pieceStart + tp.bytesLength()) {
+                continue;
+            }
+
+                       if (pieceStart > bytePos) {
+                               return false;
+                       }
+
+            return true;
+        }
+
+        return false;
+    }
+
      private static class FCComparator implements Comparator<TextPiece> {
          public int compare(TextPiece textPiece, TextPiece textPiece1) {
              if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java

index 4d78ca9ba359c9abdefd3b409cedaf192eeb96c7..7c9b541d7ce0ea16d621247c99ee6cc23511f1c8 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@@ -784,6 +784,10 @@ public class Range { // TODO -instantiable superclass
         public CharacterRun getCharacterRun(int index) {
                 initCharacterRuns();
                 CHPX chpx = _characters.get(index + _charStart);
+        
+        if (chpx == null) {
+            return null;
+        }
  
                 int[] point = findRange(_paragraphs, _parStart, Math.max(chpx.getStart(), _start), chpx
                                 .getEnd());
@@ -963,7 +967,7 @@ public class Range { // TODO -instantiable superclass
                 int x = min;
                 PropertyNode node = (PropertyNode) rpl.get(x);
  
-               while (node.getEnd() <= start && x < rpl.size() - 1) {
+               while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) {
                         x++;
                         node = (PropertyNode) rpl.get(x);
                 }
@@ -978,7 +982,7 @@ public class Range { // TODO -instantiable superclass
  
                 int y = x;
                 node = (PropertyNode) rpl.get(y);
-               while (node.getEnd() < end && y < rpl.size() - 1) {
+               while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) {
                         y++;
                         node = (PropertyNode) rpl.get(y);
                 }
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java

index 4ad028aac418fdfbe670ceecdda7822468b26800..7179beba68597cfd6a24f28b0ddcd224054a0f6f 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
@@ -31,7 +31,12 @@ public final class TestCHPBinTable
    private CHPBinTable _cHPBinTable = null;
    private HWPFDocFixture _hWPFDocFixture;
  
-  private TextPieceTable fakeTPT = new TextPieceTable();
+  private TextPieceTable fakeTPT = new TextPieceTable() {
+      @Override
+      public boolean isIndexInTable(int bytePos) {
+          return true;
+      }
+  };
  
    public void testReadWrite()
      throws Exception
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java

index 05013b07fb4a801d370cdd79ad8ecc1447d4f151..a5c9c501e95f26a2301095e5e7059b973026457d 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
@@ -157,4 +157,10 @@ public final class TestPictures extends TestCase {
              doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
      }
  
+    public void testFastSaved2() {
+            HWPFDocument doc = HWPFTestDataSamples.openSampleFile("o_kurs.doc");
+
+            doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
+    }
+
  }
diff --git a/test-data/document/o_kurs.doc b/test-data/document/o_kurs.doc

new file mode 100644 (file)

index 0000000..caab02a

Binary files /dev/null and b/test-data/document/o_kurs.doc differ
author	Maxim Valyanskiy <maxcom@apache.org>
	Mon, 26 Jul 2010 14:04:27 +0000 (14:04 +0000)
committer	Maxim Valyanskiy <maxcom@apache.org>
	Mon, 26 Jul 2010 14:04:27 +0000 (14:04 +0000)
src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java		patch \| blob \| history
test-data/document/o_kurs.doc	[new file with mode: 0644]	patch \| blob