aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaxim Valyanskiy <maxcom@apache.org>2010-08-04 12:43:58 +0000
committerMaxim Valyanskiy <maxcom@apache.org>2010-08-04 12:43:58 +0000
commit79c4de256f76c7b372333552975c8701c7f6a798 (patch)
tree7a771bbfb04f9c806de185f7602351be9bcda11d
parent7c0f62f764f06ce10768c37bd9837de7153b95e6 (diff)
downloadpoi-79c4de256f76c7b372333552975c8701c7f6a798.tar.gz
poi-79c4de256f76c7b372333552975c8701c7f6a798.zip
hwpf: append any character data before paragraphs to first paragraph
(workaround for bug#48075) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java8
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java6
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java10
-rw-r--r--test-data/document/MBD001D0B89.docbin0 -> 35840 bytes
4 files changed, 23 insertions, 1 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
index a1462ffe0a..087c6c5c16 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
@@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable {
_istd = papx.getIstd();
}
+ protected Paragraph(PAPX papx, Range parent, int start)
+ {
+ super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
+ _props = papx.getParagraphProperties(_doc.getStyleSheet());
+ _papx = papx.getSprmBuf();
+ _istd = papx.getIstd();
+ }
+
public short getStyleIndex()
{
return _istd;
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
index 7c9b541d7c..df9cb0c46d 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@@ -830,7 +830,11 @@ public class Range { // TODO -instantiable superclass
if (props.getIlfo() > 0) {
pap = new ListEntry(papx, this, _doc.getListTables());
} else {
- pap = new Paragraph(papx, this);
+ if (((index + _parStart)==0) && papx.getStart()>0) {
+ pap = new Paragraph(papx, this, 0);
+ } else {
+ pap = new Paragraph(papx, this);
+ }
}
return pap;
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
index 22eaf0ade4..ea69824daa 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
@@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase {
assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
}
+
+ public void testFirstParagraphFix() throws Exception {
+ extractor = new WordExtractor(
+ POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
+ );
+
+ String text = extractor.getText();
+
+ assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
+ }
}
diff --git a/test-data/document/MBD001D0B89.doc b/test-data/document/MBD001D0B89.doc
new file mode 100644
index 0000000000..386b5305be
--- /dev/null
+++ b/test-data/document/MBD001D0B89.doc
Binary files differ