From: Sergey Vladimirov Date: Tue, 5 Jul 2011 12:02:02 +0000 (+0000) Subject: fix range detection for documents with section across document parts. X-Git-Tag: REL_3_8_BETA4~313 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=5a5e58e870e0cb6e523ab4f38c21ffd4eaa26283;p=poi.git fix range detection for documents with section across document parts. Bug46817.doc is the example of document where section contains all document parts - including main part and additional from textbox. Previous implemenetation didn't correctly calculate last paragraph for Section (intersection of SEPX and main document range). Test case is added. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143014 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 03a1733794..6b738d318b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -1001,22 +1001,32 @@ public class Range { // TODO -instantiable superclass node = rpl.get(x); } - if (node.getStart()>end) { - return new int[] {0, 0}; + if ( node.getStart() > end ) + { + return new int[] { 0, 0 }; } - if (node.getEnd() <= start) { - return new int[] { rpl.size(), rpl.size() }; - } + if ( node.getEnd() <= start ) + { + return new int[] { rpl.size(), rpl.size() }; + } - int y = x; - node = rpl.get(y); - while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) { - y++; - node = rpl.get(y); - } - return new int[] { x, y + 1 }; - } + for ( int y = x; y < rpl.size(); y++ ) + { + node = rpl.get( y ); + if ( node == null ) + continue; + + if ( node.getStart() < end && node.getEnd() <= end ) + continue; + + if ( node.getStart() < end ) + return new int[] { x, y +1 }; + + return new int[] { x, y }; + } + return new int[] { x, rpl.size() }; + } /** * resets the list indexes. @@ -1109,4 +1119,11 @@ public class Range { // TODO -instantiable superclass protected HWPFDocumentCore getDocument() { return _doc; } + + @Override + public String toString() + { + return "Range from " + getStartOffset() + " to " + getEndOffset() + + " (chars)"; + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index 5d12716590..04032f22e6 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -62,6 +62,12 @@ public class TestWordToHtmlConverter extends TestCase return result; } + public void testAIOOBTap() throws Exception + { + String result = getHtmlText( "AIOOB-Tap.doc" ); + assertTrue( result.substring( 0, 2000 ).contains( "" ) ); + } + public void testBug33519() throws Exception { getHtmlText( "Bug33519.doc" ); @@ -73,6 +79,12 @@ public class TestWordToHtmlConverter extends TestCase assertTrue( result .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) ); } + + public void testBug46817() throws Exception + { + String result = getHtmlText( "Bug46817.doc" ); + assertTrue( result.contains( "
" ) ); + } public void testEquation() throws Exception { @@ -82,13 +94,6 @@ public class TestWordToHtmlConverter extends TestCase .contains( "" ) ); } - public void testAIOOBTap() throws Exception - { - String result = getHtmlText( "AIOOB-Tap.doc" ); - - assertTrue( result.substring( 0, 2000 ).contains( "
" ) ); - } - public void testHyperlink() throws Exception { String result = getHtmlText( "hyperlink.doc" ); @@ -102,6 +107,13 @@ public class TestWordToHtmlConverter extends TestCase getHtmlText( "innertable.doc" ); } + public void testMBD001D0B89() throws Exception + { + String result = getHtmlText( "MBD001D0B89.doc" ); + + assertTrue( result.contains( "
" ) ); + } + public void testPageref() throws Exception { String result = getHtmlText( "pageref.doc" ); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java index 0ae09c8f4c..cedb303eab 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRange.java @@ -17,38 +17,76 @@ package org.apache.poi.hwpf.usermodel; +import java.util.ArrayList; + +import org.apache.poi.hwpf.model.SEPX; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.hwpf.HWPFDocument; + import junit.framework.TestCase; /** - * Tests for Range which aren't around deletion, insertion, - * text replacement or textual contents + * Tests for Range which aren't around deletion, insertion, text replacement or + * textual contents */ -public final class TestRange extends TestCase { - public void testFieldStripping() { - String exp = "This is some text."; - - String single = "This is some \u0013Blah!\u0015text."; - String with14 = "This is \u0013Blah!\u0014some\u0015 text."; - String withNested = - "This is \u0013Blah!\u0013Blah!\u0015\u0015some text."; - String withNested14 = - "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text."; - String withNestedIn14 = - "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text."; - - // Check all comes out right - assertEquals(exp, Range.stripFields(exp)); - assertEquals(exp, Range.stripFields(single)); - assertEquals(exp, Range.stripFields(with14)); - assertEquals(exp, Range.stripFields(withNested)); - assertEquals(exp, Range.stripFields(withNested14)); - assertEquals(exp, Range.stripFields(withNestedIn14)); - - // Ones that are odd and we won't change - String odd1 = "This\u0015 is \u0013 odd"; - String odd2 = "This\u0015 is \u0014 also \u0013 odd"; - - assertEquals(odd1, Range.stripFields(odd1)); - assertEquals(odd2, Range.stripFields(odd2)); - } +public final class TestRange extends TestCase +{ + public void testFieldStripping() + { + String exp = "This is some text."; + + String single = "This is some \u0013Blah!\u0015text."; + String with14 = "This is \u0013Blah!\u0014some\u0015 text."; + String withNested = "This is \u0013Blah!\u0013Blah!\u0015\u0015some text."; + String withNested14 = "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text."; + String withNestedIn14 = "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text."; + + // Check all comes out right + assertEquals( exp, Range.stripFields( exp ) ); + assertEquals( exp, Range.stripFields( single ) ); + assertEquals( exp, Range.stripFields( with14 ) ); + assertEquals( exp, Range.stripFields( withNested ) ); + assertEquals( exp, Range.stripFields( withNested14 ) ); + assertEquals( exp, Range.stripFields( withNestedIn14 ) ); + + // Ones that are odd and we won't change + String odd1 = "This\u0015 is \u0013 odd"; + String odd2 = "This\u0015 is \u0014 also \u0013 odd"; + + assertEquals( odd1, Range.stripFields( odd1 ) ); + assertEquals( odd2, Range.stripFields( odd2 ) ); + } + + public void testBug46817() throws Exception + { + HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples + .getDocumentInstance().openResourceAsStream( "Bug46817.doc" ) ); + + final ArrayList sections = hwpfDocument.getSectionTable() + .getSections(); + assertEquals( sections.size(), 1 ); + + // whole document, including additional text from shape + SEPX sepx = sections.get( 0 ); + assertEquals( sepx.getStartBytes(), 1024 ); + assertEquals( sepx.getEndBytes(), 3880 ); + assertEquals( sepx.getStart(), 0 ); + assertEquals( sepx.getEnd(), 1428 ); + + // only main range + Range range = hwpfDocument.getRange(); + assertEquals( range.getStartOffset(), 0 ); + assertEquals( range.getEndOffset(), 766 ); + + Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() ); + assertTrue( lastInMainRange.getEndOffset() <= 766 ); + + Section section = range.getSection( 0 ); + assertTrue( section.getEndOffset() <= 766 ); + + Paragraph lastInMainSection = section.getParagraph( section + .numParagraphs() ); + assertTrue( lastInMainSection.getEndOffset() <= 766 ); + } }