]> source.dussan.org Git - poi.git/commitdiff
Add a quick guide to using the text extractor and friends, since that's a common use
authorNick Burch <nick@apache.org>
Fri, 26 May 2006 10:43:42 +0000 (10:43 +0000)
committerNick Burch <nick@apache.org>
Fri, 26 May 2006 10:43:42 +0000 (10:43 +0000)
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@409632 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/hwpf/book.xml
src/documentation/content/xdocs/hwpf/quick-guide.xml [new file with mode: 0644]

index 772577a6eed34a67ab83004074cd443f3e555795..d2d95fe9c3a73cf0b3b303e1a653548e88a99599 100644 (file)
@@ -7,6 +7,7 @@
        </menu>
        <menu label="HWPF">
                <menu-item label="Overview" href="index.html"/>
+               <menu-item label="Quick Guide" href="quick-guide.html"/>
                <menu-item label="HWPF Format" href="docoverview.html"/>
                <menu-item label="HWPF Project plan" href="projectplan.html"/>
        </menu>
diff --git a/src/documentation/content/xdocs/hwpf/quick-guide.xml b/src/documentation/content/xdocs/hwpf/quick-guide.xml
new file mode 100644 (file)
index 0000000..2a91b50
--- /dev/null
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2004 The Apache Software Foundation. All rights reserved. -->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
+
+<document>
+    <header>
+        <title>POI-HWPF - A Quick Guide</title>
+        <subtitle>Overview</subtitle>
+        <authors>
+            <person name="Nick Burch" email="nick at torchbox dot com"/>
+        </authors>
+    </header>
+
+    <body>
+        <section><title>Basic Text Extraction</title>
+        <p>For basic text extraction, make use of 
+<code>org.apache.poi.hwpf.extractor.WordExtractor</code>. It accepts an input
+stream or a <code>HWPFDocument</code>. The <code>getText()</code> 
+method can be used to 
+get the text from all the paragraphs, or <code>getParagraphText()</code>
+can be used to fetch the text from each paragraph in turn. The other
+option is <code>getTextFromPieces()</code>, which is very fast, but
+tends to return things that aren't text from the page. YMMV.
+               </p>
+               </section>
+               
+               <section><title>Specific Text Extraction</title>
+               <p>To get specific bits of text, first create a 
+<code>org.apache.poi.hwpf.HWPFDocument</code>. Fetch the range 
+with <code>getRange()</code>, then get paragraphs from that. You
+can then get text and other properties.
+               </p>
+               </section>
+               
+               <section><title>Changing Text</title>
+               <p>It is possible to change the text via 
+               <code>insertBefore()</code> and <code>insertAfter()</code>
+               on a <code>Range</code> object (either a <code>Range</code>,
+               <code>Paragraph</code> or <code>CharacterRun</code>).
+               It is also possible to delete a <code>Range</code>, but this
+               code is know to have bugs in it.
+               </p>
+               </section>
+       </body>
+</document>