From: Nick Burch Date: Sat, 28 Jun 2008 18:54:02 +0000 (+0000) Subject: Patch from N. Hira from bug #45001 - Further fix for HWPF Range.delete() and unicode... X-Git-Tag: REL_3_2_FINAL~272 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=24053679a8f50f7da40f9be9a4b2442fb5b4dbe1;p=poi.git Patch from N. Hira from bug #45001 - Further fix for HWPF Range.delete() and unicode characters git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@672569 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 825c99cc5b..e353ecb435 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45001 - Further fix for HWPF Range.delete() and unicode characters 45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records. Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index cafa18cc54..7e2d951568 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45001 - Further fix for HWPF Range.delete() and unicode characters 45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records. Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index bc33954dff..227200ab5d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -91,15 +91,18 @@ public class TextPiece extends PropertyNode implements Comparable public void adjustForDelete(int start, int length) { + // length is expected to be the number of code-points, + // not the number of characters + int numChars = length; if (usesUnicode()) { start /= 2; - length /= 2; + numChars = (length / 2); } int myStart = getStart(); int myEnd = getEnd(); - int end = start + length; + int end = start + numChars; /* do we have to delete from this text piece? */ if (start <= myEnd && end >= myStart) { @@ -108,9 +111,14 @@ public class TextPiece extends PropertyNode implements Comparable int overlapStart = Math.max(myStart, start); int overlapEnd = Math.min(myEnd, end); ((StringBuffer)_buf).delete(overlapStart, overlapEnd); - - super.adjustForDelete(start, length); } + + // We need to invoke this even if text from this piece is not being + // deleted because the adjustment must propagate to all subsequent + // text pieces i.e., if text from tp[n] is being deleted, then + // tp[n + 1], tp[n + 2], etc. will need to be adjusted. + // The superclass is expected to use a separate sentry for this. + super.adjustForDelete(start, length); } public int characterLength() diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc new file mode 100644 index 0000000000..896108397c Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java new file mode 100644 index 0000000000..1becc234c3 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java @@ -0,0 +1,196 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.usermodel; + +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.util.List; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.PicturesTable; +import org.apache.poi.hwpf.usermodel.Picture; + +import junit.framework.TestCase; + +/** + * Test to see if Range.delete() works even if the Range contains a + * CharacterRun that uses Unicode characters. + */ +public class TestRangeDelete extends TestCase { + + // u201c and u201d are "smart-quotes" + private String originalText = + "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r"; + private String searchText = "${delete}"; + private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r"; + private String expectedText2 = + "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} and all the POI contributors for their assistance in this matter.\r"; + private String expectedText3 = "Thank you, ${organization} !\r"; + + private String illustrativeDocFile; + + protected void setUp() throws Exception { + + String dirname = System.getProperty("HWPF.testdata.path"); + + illustrativeDocFile = dirname + "/testRangeDelete.doc"; + } + + /** + * Test just opening the files + */ + public void testOpen() throws Exception { + + HWPFDocument docA = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + } + + /** + * Test (more "confirm" than test) that we have the general structure that we expect to have. + */ + public void testDocStructure() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + Section section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + Paragraph para = section.getParagraph(2); + + assertEquals(5, para.numCharacterRuns()); + + assertEquals(originalText, para.text()); + } + + /** + * Test that we can delete text (one instance) from our Range with Unicode text. + */ + public void testRangeDeleteOne() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + int offset = text.indexOf(searchText); + assertEquals(192, offset); + + int absOffset = para.getStartOffset() + offset; + if (para.usesUnicode()) + absOffset = para.getStartOffset() + (offset * 2); + + Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument()); + if (subRange.usesUnicode()) + subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument()); + + assertEquals(searchText, subRange.text()); + + subRange.delete(); + + // we need to let the model re-calculate the Range before we evaluate it + range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + para = section.getParagraph(2); + + text = para.text(); + assertEquals(expectedText2, text); + + // this can lead to a StringBufferOutOfBoundsException, so we will add it + // even though we don't have an assertion for it + Range daRange = daDoc.getRange(); + daRange.text(); + } + + /** + * Test that we can delete text (all instances of) from our Range with Unicode text. + */ + public void testRangeDeleteAll() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + boolean keepLooking = true; + while (keepLooking) { + + int offset = range.text().indexOf(searchText); + if (offset >= 0) { + + int absOffset = range.getStartOffset() + offset; + if (range.usesUnicode()) + absOffset = range.getStartOffset() + (offset * 2); + + Range subRange = new Range( + absOffset, (absOffset + searchText.length()), range.getDocument()); + if (subRange.usesUnicode()) + subRange = new Range( + absOffset, (absOffset + (searchText.length() * 2)), range.getDocument()); + + assertEquals(searchText, subRange.text()); + + subRange.delete(); + + } else + keepLooking = false; + } + + // we need to let the model re-calculate the Range before we use it + range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + + para = section.getParagraph(1); + text = para.text(); + assertEquals(expectedText1, text); + + para = section.getParagraph(2); + text = para.text(); + assertEquals(expectedText2, text); + + para = section.getParagraph(3); + text = para.text(); + assertEquals(expectedText3, text); + } +}