diff options
author | Maxim Valyanskiy <maxcom@apache.org> | 2010-08-04 12:43:58 +0000 |
---|---|---|
committer | Maxim Valyanskiy <maxcom@apache.org> | 2010-08-04 12:43:58 +0000 |
commit | 79c4de256f76c7b372333552975c8701c7f6a798 (patch) | |
tree | 7a771bbfb04f9c806de185f7602351be9bcda11d /src | |
parent | 7c0f62f764f06ce10768c37bd9837de7153b95e6 (diff) | |
download | poi-79c4de256f76c7b372333552975c8701c7f6a798.tar.gz poi-79c4de256f76c7b372333552975c8701c7f6a798.zip |
hwpf: append any character data before paragraphs to first paragraph
(workaround for bug#48075)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
3 files changed, 23 insertions, 1 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java index a1462ffe0a..087c6c5c16 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java @@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable { _istd = papx.getIstd(); } + protected Paragraph(PAPX papx, Range parent, int start) + { + super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent); + _props = papx.getParagraphProperties(_doc.getStyleSheet()); + _papx = papx.getSprmBuf(); + _istd = papx.getIstd(); + } + public short getStyleIndex() { return _istd; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 7c9b541d7c..df9cb0c46d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -830,7 +830,11 @@ public class Range { // TODO -instantiable superclass if (props.getIlfo() > 0) { pap = new ListEntry(papx, this, _doc.getListTables()); } else { - pap = new Paragraph(papx, this); + if (((index + _parStart)==0) && papx.getStart()>0) { + pap = new Paragraph(papx, this, 0); + } else { + pap = new Paragraph(papx, this); + } } return pap; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java index 22eaf0ade4..ea69824daa 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java @@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase { assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425")); assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423")); } + + public void testFirstParagraphFix() throws Exception { + extractor = new WordExtractor( + POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc") + ); + + String text = extractor.getText(); + + assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435")); + } } |