]> source.dussan.org Git - poi.git/commitdiff
Patch from N. Hira from bug #45001 - Further fix for HWPF Range.delete() and unicode...
authorNick Burch <nick@apache.org>
Sat, 28 Jun 2008 18:54:02 +0000 (18:54 +0000)
committerNick Burch <nick@apache.org>
Sat, 28 Jun 2008 18:54:02 +0000 (18:54 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@672569 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java [new file with mode: 0644]

index 825c99cc5b9d3535ca8234c1376c8f5c43a4d89d..e353ecb435e0ddd93e676490ffabd713577221c1 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">45001 - Further fix for HWPF Range.delete() and unicode characters</action>
            <action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action>
            <action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action>
            <action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action>
index cafa18cc542d0812f782a90ca9eb3244f2534b40..7e2d951568eb27444927446ad4e8fd41395495e8 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">45001 - Further fix for HWPF Range.delete() and unicode characters</action>
            <action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action>
            <action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action>
            <action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action>
index bc33954dff63e548328e28a20f1ac4915e5f55ca..227200ab5d9b811a89ed8a4257a0fe9547c2a1fb 100644 (file)
@@ -91,15 +91,18 @@ public class TextPiece extends PropertyNode implements Comparable
    public void adjustForDelete(int start, int length)
    {
 
+          // length is expected to be the number of code-points,
+          // not the number of characters
+          int numChars = length;
           if (usesUnicode()) {
 
                   start /= 2;
-                  length /= 2;
+                  numChars = (length / 2);
           }
 
           int myStart = getStart();
           int myEnd = getEnd();
-          int end = start + length;
+          int end = start + numChars;
 
           /* do we have to delete from this text piece? */
           if (start <= myEnd && end >= myStart) {
@@ -108,9 +111,14 @@ public class TextPiece extends PropertyNode implements Comparable
                   int overlapStart = Math.max(myStart, start);
                   int overlapEnd = Math.min(myEnd, end);
                   ((StringBuffer)_buf).delete(overlapStart, overlapEnd);
-                  
-                  super.adjustForDelete(start, length);
           }
+
+          // We need to invoke this even if text from this piece is not being
+          // deleted because the adjustment must propagate to all subsequent
+          // text pieces i.e., if text from tp[n] is being deleted, then
+          // tp[n + 1], tp[n + 2], etc. will need to be adjusted.
+          // The superclass is expected to use a separate sentry for this.
+          super.adjustForDelete(start, length);
    }
 
    public int characterLength()
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc
new file mode 100644 (file)
index 0000000..8961083
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
new file mode 100644 (file)
index 0000000..1becc23
--- /dev/null
@@ -0,0 +1,196 @@
+
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.usermodel;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
+import java.util.List;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.model.PicturesTable;
+import org.apache.poi.hwpf.usermodel.Picture;
+
+import junit.framework.TestCase;
+
+/**
+ *     Test to see if Range.delete() works even if the Range contains a
+ *     CharacterRun that uses Unicode characters.
+ */
+public class TestRangeDelete extends TestCase {
+
+       // u201c and u201d are "smart-quotes"
+       private String originalText =
+               "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.  Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
+       private String searchText = "${delete}";
+       private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
+       private String expectedText2 =
+               "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.  Everybody should be thankful to the ${organization}  and all the POI contributors for their assistance in this matter.\r";
+       private String expectedText3 = "Thank you, ${organization} !\r";
+
+       private String illustrativeDocFile;
+
+       protected void setUp() throws Exception {
+
+               String dirname = System.getProperty("HWPF.testdata.path");
+
+               illustrativeDocFile = dirname + "/testRangeDelete.doc";
+       }
+
+       /**
+        * Test just opening the files
+        */
+       public void testOpen() throws Exception {
+
+               HWPFDocument docA = new HWPFDocument(new FileInputStream(illustrativeDocFile));
+       }
+
+       /**
+        * Test (more "confirm" than test) that we have the general structure that we expect to have.
+        */
+       public void testDocStructure() throws Exception {
+
+               HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
+
+               Range range = daDoc.getRange();
+
+               assertEquals(1, range.numSections());
+               Section section = range.getSection(0);
+
+               assertEquals(5, section.numParagraphs());
+               Paragraph para = section.getParagraph(2);
+
+               assertEquals(5, para.numCharacterRuns());
+
+               assertEquals(originalText, para.text());
+       }
+
+       /**
+        * Test that we can delete text (one instance) from our Range with Unicode text.
+        */
+       public void testRangeDeleteOne() throws Exception {
+
+               HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
+
+               Range range = daDoc.getRange();
+               assertEquals(1, range.numSections());
+
+               Section section = range.getSection(0);
+               assertEquals(5, section.numParagraphs());
+
+               Paragraph para = section.getParagraph(2);
+
+               String text = para.text();
+               assertEquals(originalText, text);
+
+               int offset = text.indexOf(searchText);
+               assertEquals(192, offset);
+
+               int absOffset = para.getStartOffset() + offset;
+               if (para.usesUnicode())
+                       absOffset = para.getStartOffset() + (offset * 2);
+
+               Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument());
+               if (subRange.usesUnicode())
+                       subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument());
+
+               assertEquals(searchText, subRange.text());
+
+               subRange.delete();
+
+               // we need to let the model re-calculate the Range before we evaluate it
+               range = daDoc.getRange();
+
+               assertEquals(1, range.numSections());
+               section = range.getSection(0);
+
+               assertEquals(5, section.numParagraphs());
+               para = section.getParagraph(2);
+
+               text = para.text();
+               assertEquals(expectedText2, text);
+
+               // this can lead to a StringBufferOutOfBoundsException, so we will add it
+               // even though we don't have an assertion for it
+               Range daRange = daDoc.getRange();
+               daRange.text();
+       }
+
+       /**
+        * Test that we can delete text (all instances of) from our Range with Unicode text.
+        */
+       public void testRangeDeleteAll() throws Exception {
+
+               HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
+
+               Range range = daDoc.getRange();
+               assertEquals(1, range.numSections());
+
+               Section section = range.getSection(0);
+               assertEquals(5, section.numParagraphs());
+
+               Paragraph para = section.getParagraph(2);
+
+               String text = para.text();
+               assertEquals(originalText, text);
+
+               boolean keepLooking = true;
+               while (keepLooking) {
+
+                       int offset = range.text().indexOf(searchText);
+                       if (offset >= 0) {
+
+                               int absOffset = range.getStartOffset() + offset;
+                               if (range.usesUnicode())
+                                       absOffset = range.getStartOffset() + (offset * 2);
+
+                               Range subRange = new Range(
+                                       absOffset, (absOffset + searchText.length()), range.getDocument());
+                               if (subRange.usesUnicode())
+                                       subRange = new Range(
+                                               absOffset, (absOffset + (searchText.length() * 2)), range.getDocument());
+
+                               assertEquals(searchText, subRange.text());
+
+                               subRange.delete();
+
+                       } else
+                               keepLooking = false;
+               }
+
+               // we need to let the model re-calculate the Range before we use it
+               range = daDoc.getRange();
+
+               assertEquals(1, range.numSections());
+               section = range.getSection(0);
+
+               assertEquals(5, section.numParagraphs());
+
+               para = section.getParagraph(1);
+               text = para.text();
+               assertEquals(expectedText1, text);
+
+               para = section.getParagraph(2);
+               text = para.text();
+               assertEquals(expectedText2, text);
+
+               para = section.getParagraph(3);
+               text = para.text();
+               assertEquals(expectedText3, text);
+       }
+}