aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey Vladimirov <sergey@apache.org>2011-08-11 20:50:42 +0000
committerSergey Vladimirov <sergey@apache.org>2011-08-11 20:50:42 +0000
commitead63d1e9cf12436fd534d99a7b8786c0b63e7b1 (patch)
treee5508e1d0b4355adbbd79da1b3744b1a76c8a71c
parent79d02d4e677d30ce34afbb96463eda0e411aac90 (diff)
downloadpoi-ead63d1e9cf12436fd534d99a7b8786c0b63e7b1.tar.gz
poi-ead63d1e9cf12436fd534d99a7b8786c0b63e7b1.zip
simplify API to Word file's part processing, like includint page headers / footers into plain text and HTML
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1156823 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java28
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java27
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java24
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java64
4 files changed, 83 insertions, 60 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
index cc104b84aa..6f35f1d6df 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
@@ -132,6 +132,16 @@ public abstract class AbstractWordConverter
private PicturesManager picturesManager;
+ /**
+ * Special actions that need to be called after processing complete, like
+ * updating stylesheets or building document notes list. Usually they are
+ * called once, but it's okay to call them several times.
+ */
+ protected void afterProcess()
+ {
+ // by default no such actions needed
+ }
+
protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
{
Triplet original = new Triplet();
@@ -594,7 +604,17 @@ public abstract class AbstractWordConverter
processDocumentInformation( summaryInformation );
}
- processDocumentPart( wordDocument, wordDocument.getRange() );
+ final Range docRange = wordDocument.getRange();
+
+ if ( docRange.numSections() == 1 )
+ {
+ processSingleSection( wordDocument, docRange.getSection( 0 ) );
+ afterProcess();
+ return;
+ }
+
+ processDocumentPart( wordDocument, docRange );
+ afterProcess();
}
protected abstract void processDocumentInformation(
@@ -603,12 +623,6 @@ public abstract class AbstractWordConverter
protected void processDocumentPart( HWPFDocumentCore wordDocument,
final Range range )
{
- if ( range.numSections() == 1 )
- {
- processSingleSection( wordDocument, range.getSection( 0 ) );
- return;
- }
-
for ( int s = 0; s < range.numSections(); s++ )
{
processSection( wordDocument, range.getSection( s ), s );
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
index e18efbe2af..59e20e1a07 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
@@ -180,6 +180,15 @@ public class WordToHtmlConverter extends AbstractWordConverter
this.htmlDocumentFacade = new HtmlDocumentFacade( document );
}
+ @Override
+ protected void afterProcess()
+ {
+ if ( notes != null )
+ htmlDocumentFacade.getBody().appendChild( notes );
+
+ htmlDocumentFacade.updateStylesheet();
+ }
+
public Document getDocument()
{
return htmlDocumentFacade.getDocument();
@@ -242,17 +251,6 @@ public class WordToHtmlConverter extends AbstractWordConverter
}
@Override
- public void processDocument( HWPFDocumentCore wordDocument )
- {
- super.processDocument( wordDocument );
-
- if ( notes != null )
- htmlDocumentFacade.getBody().appendChild( notes );
-
- htmlDocumentFacade.updateStylesheet();
- }
-
- @Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
@@ -271,6 +269,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
}
@Override
+ public void processDocumentPart( HWPFDocumentCore wordDocument, Range range )
+ {
+ super.processDocumentPart( wordDocument, range );
+ afterProcess();
+ }
+
+ @Override
protected void processDrawnObject( HWPFDocument doc,
CharacterRun characterRun, OfficeDrawing officeDrawing,
String path, Element block )
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java
index 7e60f37f56..f3f921f5b7 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java
@@ -166,6 +166,13 @@ public class WordToTextConverter extends AbstractWordConverter
this.textDocumentFacade = new TextDocumentFacade( document );
}
+ @Override
+ protected void afterProcess()
+ {
+ if ( notes != null )
+ textDocumentFacade.getBody().appendChild( notes );
+ }
+
public Document getDocument()
{
return textDocumentFacade.getDocument();
@@ -209,15 +216,6 @@ public class WordToTextConverter extends AbstractWordConverter
}
@Override
- public void processDocument( HWPFDocumentCore wordDocument )
- {
- super.processDocument( wordDocument );
-
- if ( notes != null )
- textDocumentFacade.getBody().appendChild( notes );
- }
-
- @Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
@@ -242,6 +240,14 @@ public class WordToTextConverter extends AbstractWordConverter
}
@Override
+ public void processDocumentPart( HWPFDocumentCore wordDocument,
+ Range range )
+ {
+ super.processDocumentPart( wordDocument, range );
+ afterProcess();
+ }
+
+ @Override
protected void processDrawnObject( HWPFDocument doc,
CharacterRun characterRun, OfficeDrawing officeDrawing,
String path, Element block )
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
index 8438df4f04..dc06fb9262 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
@@ -20,14 +20,10 @@ package org.apache.poi.hwpf.extractor;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.poi.hwpf.converter.WordToTextConverter;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.converter.WordToTextConverter;
import org.apache.poi.hwpf.usermodel.HeaderStories;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
@@ -72,6 +68,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @deprecated Use {@link #WordExtractor(DirectoryNode)} instead
*/
@Deprecated
+ @SuppressWarnings( "unused" )
public WordExtractor( DirectoryNode dir, POIFSFileSystem fs )
throws IOException
{
@@ -290,34 +287,35 @@ public final class WordExtractor extends POIOLE2TextExtractor
{
try
{
- final StringWriter stringWriter = new StringWriter();
- @SuppressWarnings( "unused" )
- WordToTextConverter wordToTextConverter = new WordToTextConverter()
- {
- {
- HeaderStories hs = new HeaderStories( doc );
-
- if ( hs.getFirstHeaderSubrange() != null )
- processDocumentPart( doc, hs.getFirstHeaderSubrange() );
- if ( hs.getEvenHeaderSubrange() != null )
- processDocumentPart( doc, hs.getEvenHeaderSubrange() );
- if ( hs.getOddHeaderSubrange() != null )
- processDocumentPart( doc, hs.getOddHeaderSubrange() );
-
- processDocument( doc );
- processDocumentPart( doc, doc.getMainTextboxRange() );
-
- if ( hs.getFirstFooterSubrange() != null )
- processDocumentPart( doc, hs.getFirstFooterSubrange() );
- if ( hs.getEvenFooterSubrange() != null )
- processDocumentPart( doc, hs.getEvenFooterSubrange() );
- if ( hs.getOddFooterSubrange() != null )
- processDocumentPart( doc, hs.getOddFooterSubrange() );
-
- stringWriter.append( getText() );
- }
- };
- return stringWriter.toString();
+ WordToTextConverter wordToTextConverter = new WordToTextConverter();
+
+ HeaderStories hs = new HeaderStories( doc );
+
+ if ( hs.getFirstHeaderSubrange() != null )
+ wordToTextConverter.processDocumentPart( doc,
+ hs.getFirstHeaderSubrange() );
+ if ( hs.getEvenHeaderSubrange() != null )
+ wordToTextConverter.processDocumentPart( doc,
+ hs.getEvenHeaderSubrange() );
+ if ( hs.getOddHeaderSubrange() != null )
+ wordToTextConverter.processDocumentPart( doc,
+ hs.getOddHeaderSubrange() );
+
+ wordToTextConverter.processDocument( doc );
+ wordToTextConverter.processDocumentPart( doc,
+ doc.getMainTextboxRange() );
+
+ if ( hs.getFirstFooterSubrange() != null )
+ wordToTextConverter.processDocumentPart( doc,
+ hs.getFirstFooterSubrange() );
+ if ( hs.getEvenFooterSubrange() != null )
+ wordToTextConverter.processDocumentPart( doc,
+ hs.getEvenFooterSubrange() );
+ if ( hs.getOddFooterSubrange() != null )
+ wordToTextConverter.processDocumentPart( doc,
+ hs.getOddFooterSubrange() );
+
+ return wordToTextConverter.getText();
}
catch ( Exception exc )
{