]> source.dussan.org Git - poi.git/commitdiff
hwpf: append any character data before paragraphs to first paragraph
authorMaxim Valyanskiy <maxcom@apache.org>
Wed, 4 Aug 2010 12:43:58 +0000 (12:43 +0000)
committerMaxim Valyanskiy <maxcom@apache.org>
Wed, 4 Aug 2010 12:43:58 +0000 (12:43 +0000)
(workaround for bug#48075)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
test-data/document/MBD001D0B89.doc [new file with mode: 0644]

index a1462ffe0afe2a4d27db36626915e60344b786fe..087c6c5c16b83de70ad0a26a1f6c34caff82da09 100644 (file)
@@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable {
     _istd = papx.getIstd();
   }
 
+  protected Paragraph(PAPX papx, Range parent, int start)
+  {
+    super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
+    _props = papx.getParagraphProperties(_doc.getStyleSheet());
+    _papx = papx.getSprmBuf();
+    _istd = papx.getIstd();
+  }
+
   public short getStyleIndex()
   {
     return _istd;
index 7c9b541d7ce0ea16d621247c99ee6cc23511f1c8..df9cb0c46d9c2fb12189874aa69e26d79e23905b 100644 (file)
@@ -830,7 +830,11 @@ public class Range { // TODO -instantiable superclass
                if (props.getIlfo() > 0) {
                        pap = new ListEntry(papx, this, _doc.getListTables());
                } else {
-                       pap = new Paragraph(papx, this);
+            if (((index + _parStart)==0) && papx.getStart()>0) {
+                pap = new Paragraph(papx, this, 0);
+            } else {
+                       pap = new Paragraph(papx, this);
+            }
                }
 
                return pap;
index 22eaf0ade470a641eeb0d300cff28ff8409914a4..ea69824daad467a495a83be197c57d747d7dbd98 100644 (file)
@@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase {
         assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
         assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
     }
+
+    public void testFirstParagraphFix() throws Exception {
+        extractor = new WordExtractor(
+                POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
+        );
+
+        String text = extractor.getText();
+
+        assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
+    }
 }
diff --git a/test-data/document/MBD001D0B89.doc b/test-data/document/MBD001D0B89.doc
new file mode 100644 (file)
index 0000000..386b530
Binary files /dev/null and b/test-data/document/MBD001D0B89.doc differ