HWPF: Improve reading of auto-saved ("complex") document

author Maxim Valyanskiy <maxcom@apache.org>

Mon, 5 Jul 2010 12:56:02 +0000 (12:56 +0000)

committer Maxim Valyanskiy <maxcom@apache.org>

Mon, 5 Jul 2010 12:56:02 +0000 (12:56 +0000)
author Maxim Valyanskiy <maxcom@apache.org>
Mon, 5 Jul 2010 12:56:02 +0000 (12:56 +0000)
committer Maxim Valyanskiy <maxcom@apache.org>
Mon, 5 Jul 2010 12:56:02 +0000 (12:56 +0000)
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml

index acea33a3af608f03aa6769eae9b5df2696197d65..8e5199d0ae526a20074d8d642ca2e660a3cf11d5 100644 (file)
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -39,6 +39,7 @@
             <action dev="POI-DEVELOPERS" type="add">49508 - Allow the addition of paragraphs to XWPF Table Cells</action>
             <action dev="POI-DEVELOPERS" type="fix">49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text</action>
             <action dev="POI-DEVELOPERS" type="fix">XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though</action>
+           <action dev="POI-DEVELOPERS" type="fix">HWPF: Improve reading of auto-saved ("complex") documents</action>
          </release>
          <release version="3.7-beta1" date="2010-06-20">
             <action dev="POI-DEVELOPERS" type="fix">49432 - Lazy caching of XSSFComment CTComment objects by reference, to make repeated comment searching faster</action>
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java

index 211dc9a6b7aeb2e613491a67ae0f8ed4d1f427c8..4226c7f492de777cb4884f51c3edcef56444fdf3 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
@@ -86,7 +86,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
              TextPiece tp = new TextPiece(
                      0, textData.length, textData, pd, 0
              );
-            tpt.getTextPieces().add(tp);
+            tpt.add(tp);
              
              text.append(tp.getStringBuffer());
          }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java

index 81849c6fb46c1744f67dca0371dc24e698abbf58..fae08e03f5e903b0e347d703ecc37cfb1bb78fe5 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -17,14 +17,15 @@
  
  package org.apache.poi.hwpf.model;
  
+import org.apache.poi.hwpf.model.io.HWPFOutputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+
  import java.io.IOException;
  import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
  import java.util.List;
  
-import org.apache.poi.hwpf.model.io.HWPFOutputStream;
-import org.apache.poi.poifs.common.POIFSConstants;
-
  /**
   * The piece table for matching up character positions to bits of text. This
   * mostly works in bytes, but the TextPieces themselves work in characters. This
@@ -34,6 +35,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
   */
  public final class TextPieceTable implements CharIndexTranslator {
         protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
+    protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
         // int _multiple;
         int _cpMin;
  
@@ -96,11 +98,9 @@ public final class TextPieceTable implements CharIndexTranslator {
  
                 // In the interest of our sanity, now sort the text pieces
                 // into order, if they're not already
-               TextPiece[] tp = _textPieces.toArray(new TextPiece[_textPieces.size()]);
-               Arrays.sort(tp);
-               for (int i = 0; i < tp.length; i++) {
-                       _textPieces.set(i, tp[i]);
-               }
+        Collections.sort(_textPieces);
+        _textPiecesFCOrder = new ArrayList<TextPiece>(_textPieces);
+        Collections.sort(_textPiecesFCOrder, new FCComparator());
         }
  
         public int getCpMin() {
@@ -111,6 +111,13 @@ public final class TextPieceTable implements CharIndexTranslator {
                 return _textPieces;
         }
  
+    public void add(TextPiece piece) {
+        _textPieces.add(piece);
+        _textPiecesFCOrder.add(piece);
+        Collections.sort(_textPieces);
+        Collections.sort(_textPiecesFCOrder, new FCComparator());
+    }
+
         /**
          * Is the text at the given Character offset unicode, or plain old ascii? In
          * a very evil fashion, you have to actually know this to make sense of
@@ -238,7 +245,7 @@ public final class TextPieceTable implements CharIndexTranslator {
         public int getCharIndex(int bytePos) {
                 int charCount = 0;
  
-        for(TextPiece tp : _textPieces) {
+        for(TextPiece tp : _textPiecesFCOrder) {
                         int pieceStart = tp.getPieceDescriptor().getFilePosition();
                         if (pieceStart >= bytePos) {
                                 break;
@@ -259,4 +266,15 @@ public final class TextPieceTable implements CharIndexTranslator {
                 return charCount;
         }
  
+    private static class FCComparator implements Comparator<TextPiece> {
+        public int compare(TextPiece textPiece, TextPiece textPiece1) {
+            if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
+                return 1;
+            } else if (textPiece.getPieceDescriptor().fc<textPiece1.getPieceDescriptor().fc) {
+                return -1;
+            } else {
+                return 0;
+            }
+        }
+    }
  }
author	Maxim Valyanskiy <maxcom@apache.org>
	Mon, 5 Jul 2010 12:56:02 +0000 (12:56 +0000)
committer	Maxim Valyanskiy <maxcom@apache.org>
	Mon, 5 Jul 2010 12:56:02 +0000 (12:56 +0000)
src/documentation/content/xdocs/status.xml		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java		patch \| blob \| history