From: Yegor Kozlov Date: Sat, 27 Jun 2009 10:39:51 +0000 (+0000) Subject: Support fo text extraction of footnotes, endnotes and comments in HWPF, see Bugzilla... X-Git-Tag: REL_3_5-FINAL~90 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=8e1165ea7e33f1829965ba3cd0987f79f532de5d;p=poi.git Support fo text extraction of footnotes, endnotes and comments in HWPF, see Bugzilla 47400 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@788949 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 34fe02d10f..35554d1143 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,7 @@ + 47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF 47415 - Fixed PageSettingsBlock to allow multiple PLS records 47412 - Fixed concurrency issue with EscherProperties.initProps() 47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index a43852be8b..8b6d2fdae7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -344,6 +344,28 @@ public final class HWPFDocument extends POIDocument ); } + /** + * Returns the range which covers all the Endnotes. + */ + public Range getEndnoteRange() { + return new Range( + _cpSplit.getEndNoteStart(), + _cpSplit.getEndNoteEnd(), + this + ); + } + + /** + * Returns the range which covers all the Endnotes. + */ + public Range getCommentsRange() { + return new Range( + _cpSplit.getCommentsStart(), + _cpSplit.getCommentsEnd(), + this + ); + } + /** * Returns the range which covers all "Header Stories". * A header story contains a header, footer, end note diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index e63ad4f5a4..10ac954263 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.FileInputStream; import java.io.UnsupportedEncodingException; import java.util.Iterator; +import java.util.Arrays; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hwpf.HWPFDocument; @@ -95,34 +96,58 @@ public final class WordExtractor extends POIOLE2TextExtractor { * Get the text from the word file, as an array with one String * per paragraph */ - public String[] getParagraphText() { - String[] ret; - - // Extract using the model code - try { - Range r = doc.getRange(); - - ret = new String[r.numParagraphs()]; - for(int i=0; i -1 ); } + + public void testFootnote() throws Exception { + HWPFDocument doc = new HWPFDocument( + new FileInputStream(filename6) + ); + extractor = new WordExtractor(doc); + + String[] text = extractor.getFootnoteText(); + StringBuffer b = new StringBuffer(); + for (int i=0; i