private static final byte SPECCHAR_DRAWN_OBJECT = 8;
- private static final char UNICODECHAR_NONBREAKING_HYPHEN = '\u2011';
+ protected static final char UNICODECHAR_NONBREAKING_HYPHEN = '\u2011';
- private static final char UNICODECHAR_ZERO_WIDTH_SPACE = '\u200b';
+ protected static final char UNICODECHAR_ZERO_WIDTH_SPACE = '\u200b';
private static void addToStructures( List<Structure> structures,
Structure structure )
Element currentBlock, Range range, int currentTableLevel,
List<Bookmark> rangeBookmarks );
- protected boolean processCharacters( final HWPFDocumentCore document,
+ protected boolean processCharacters( final HWPFDocumentCore wordDocument,
final int currentTableLevel, final Range range, final Element block )
{
if ( range == null )
* reconstruct the structure of range -- sergey
*/
List<Structure> structures = new LinkedList<Structure>();
- if ( document instanceof HWPFDocument )
+ if ( wordDocument instanceof HWPFDocument )
{
- final HWPFDocument doc = (HWPFDocument) document;
+ final HWPFDocument doc = (HWPFDocument) wordDocument;
Map<Integer, List<Bookmark>> rangeBookmarks = doc.getBookmarks()
.getBookmarksStartedBetween( range.getStartOffset(),
CharacterRun characterRun = range.getCharacterRun( c );
if ( characterRun == null )
throw new AssertionError();
- Field aliveField = ( (HWPFDocument) document ).getFields()
+ Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
.getFieldByStartOffset( FieldsDocumentPart.MAIN,
characterRun.getStartOffset() );
if ( aliveField != null )
return "BetweenStructuresSubrange " + super.toString();
}
};
- processCharacters( document, currentTableLevel, subrange, block );
+ processCharacters( wordDocument, currentTableLevel, subrange,
+ block );
}
if ( structure.structure instanceof Bookmark )
{
// other bookmarks with same bundaries
List<Bookmark> bookmarks = new LinkedList<Bookmark>();
- for ( Bookmark bookmark : ( (HWPFDocument) document )
+ for ( Bookmark bookmark : ( (HWPFDocument) wordDocument )
.getBookmarks()
.getBookmarksStartedBetween( structure.start,
structure.start + 1 ).values().iterator()
}
};
- processBookmarks( document, block, subrange,
+ processBookmarks( wordDocument, block, subrange,
currentTableLevel, bookmarks );
}
finally
else if ( structure.structure instanceof Field )
{
Field field = (Field) structure.structure;
- processField( (HWPFDocument) document, range,
+ processField( (HWPFDocument) wordDocument, range,
currentTableLevel, field, block );
}
else
return "AfterStructureSubrange " + super.toString();
}
};
- processCharacters( document, currentTableLevel, subrange, block );
+ processCharacters( wordDocument, currentTableLevel, subrange,
+ block );
}
return true;
}
if ( characterRun == null )
throw new AssertionError();
- if ( document instanceof HWPFDocument
- && ( (HWPFDocument) document ).getPicturesTable()
+ if ( wordDocument instanceof HWPFDocument
+ && ( (HWPFDocument) wordDocument ).getPicturesTable()
.hasPicture( characterRun ) )
{
- HWPFDocument newFormat = (HWPFDocument) document;
+ HWPFDocument newFormat = (HWPFDocument) wordDocument;
Picture picture = newFormat.getPicturesTable().extractPicture(
characterRun, true );
if ( characterRun.isSpecialCharacter() )
{
if ( text.charAt( 0 ) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
- && ( document instanceof HWPFDocument ) )
+ && ( wordDocument instanceof HWPFDocument ) )
{
- HWPFDocument doc = (HWPFDocument) document;
+ HWPFDocument doc = (HWPFDocument) wordDocument;
processNoteAnchor( doc, characterRun, block );
continue;
}
if ( text.charAt( 0 ) == SPECCHAR_DRAWN_OBJECT
- && ( document instanceof HWPFDocument ) )
+ && ( wordDocument instanceof HWPFDocument ) )
{
- HWPFDocument doc = (HWPFDocument) document;
+ HWPFDocument doc = (HWPFDocument) wordDocument;
processDrawnObject( doc, characterRun, block );
continue;
}
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
{
- if ( document instanceof HWPFDocument )
+ if ( wordDocument instanceof HWPFDocument )
{
- Field aliveField = ( (HWPFDocument) document ).getFields()
- .getFieldByStartOffset( FieldsDocumentPart.MAIN,
+ Field aliveField = ( (HWPFDocument) wordDocument )
+ .getFields().getFieldByStartOffset(
+ FieldsDocumentPart.MAIN,
characterRun.getStartOffset() );
if ( aliveField != null )
{
- processField( ( (HWPFDocument) document ), range,
+ processField( ( (HWPFDocument) wordDocument ), range,
currentTableLevel, aliveField, block );
int continueAfter = aliveField.getFieldEndOffset();
}
}
- int skipTo = tryDeadField( document, range, currentTableLevel,
- c, block );
+ int skipTo = tryDeadField( wordDocument, range,
+ currentTableLevel, c, block );
if ( skipTo != c )
{
CharacterRun characterRun, OfficeDrawing officeDrawing,
String path, Element block );
- protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
+ protected abstract void processEndnoteAutonumbered( HWPFDocument wordDocument,
int noteIndex, Element block, Range endnoteTextRange );
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
field.secondSubrange( parentRange ), currentBlock );
}
- protected abstract void processFootnoteAutonumbered( HWPFDocument doc,
+ protected abstract void processFootnoteAutonumbered( HWPFDocument wordDocument,
int noteIndex, Element block, Range footnoteTextRange );
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
String pageref );
protected abstract void processParagraph( HWPFDocumentCore wordDocument,
- Element parentFopElement, int currentTableLevel,
- Paragraph paragraph, String bulletText );
+ Element parentElement, int currentTableLevel, Paragraph paragraph,
+ String bulletText );
protected void processParagraphes( HWPFDocumentCore wordDocument,
Element flow, Range range, int currentTableLevel )
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.apache.poi.util.Beta;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+@Beta
+public class TextDocumentFacade
+{
+ protected final Element body;
+ protected final Document document;
+ protected final Element head;
+ protected final Element root;
+
+ protected Element title;
+ protected Text titleText;
+
+ public TextDocumentFacade( Document document )
+ {
+ this.document = document;
+
+ root = document.createElement( "html" );
+ document.appendChild( root );
+
+ body = document.createElement( "body" );
+ head = document.createElement( "head" );
+
+ root.appendChild( head );
+ root.appendChild( body );
+
+ title = document.createElement( "title" );
+ titleText = document.createTextNode( "" );
+ head.appendChild( title );
+ }
+
+ public void addAuthor( String value )
+ {
+ addMeta( "Author", value );
+ }
+
+ public void addDescription( String value )
+ {
+ addMeta( "Description", value );
+ }
+
+ public void addKeywords( String value )
+ {
+ addMeta( "Keywords", value );
+ }
+
+ public void addMeta( final String name, String value )
+ {
+ Element meta = document.createElement( "meta" );
+
+ Element metaName = document.createElement( "name" );
+ metaName.appendChild( document.createTextNode( name + ": " ) );
+ meta.appendChild( metaName );
+
+ Element metaValue = document.createElement( "value" );
+ metaValue.appendChild( document.createTextNode( value + "\n" ) );
+ meta.appendChild( metaValue );
+
+ head.appendChild( meta );
+ }
+
+ public Element createBlock()
+ {
+ return document.createElement( "div" );
+ }
+
+ public Element createHeader1()
+ {
+ Element result = document.createElement( "h1" );
+ result.appendChild( document.createTextNode( " " ) );
+ return result;
+ }
+
+ public Element createHeader2()
+ {
+ Element result = document.createElement( "h2" );
+ result.appendChild( document.createTextNode( " " ) );
+ return result;
+ }
+
+ public Element createParagraph()
+ {
+ return document.createElement( "p" );
+ }
+
+ public Element createTable()
+ {
+ return document.createElement( "table" );
+ }
+
+ public Element createTableBody()
+ {
+ return document.createElement( "tbody" );
+ }
+
+ public Element createTableCell()
+ {
+ return document.createElement( "td" );
+ }
+
+ public Element createTableRow()
+ {
+ return document.createElement( "tr" );
+ }
+
+ public Text createText( String data )
+ {
+ return document.createTextNode( data );
+ }
+
+ public Element createUnorderedList()
+ {
+ return document.createElement( "ul" );
+ }
+
+ public Element getBody()
+ {
+ return body;
+ }
+
+ public Document getDocument()
+ {
+ return document;
+ }
+
+ public Element getHead()
+ {
+ return head;
+ }
+
+ public String getTitle()
+ {
+ if ( title == null )
+ return null;
+
+ return titleText.getTextContent();
+ }
+
+ public void setTitle( String titleText )
+ {
+ if ( WordToHtmlUtils.isEmpty( titleText ) && this.title != null )
+ {
+ this.head.removeChild( this.title );
+ this.title = null;
+ this.titleText = null;
+ }
+
+ if ( this.title == null )
+ {
+ this.title = document.createElement( "title" );
+ this.titleText = document.createTextNode( titleText );
+ this.title.appendChild( this.titleText );
+ this.head.appendChild( title );
+ }
+
+ this.titleText.setData( titleText );
+ }
+}
}
@Override
- protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
+ protected void processEndnoteAutonumbered( HWPFDocument wordDocument, int noteIndex,
Element block, Range endnoteTextRange )
{
final String textIndex = String.valueOf( internalLinkCounter
setId( backwardLink, forwardLinkName );
endnote.appendChild( backwardLink );
- processCharacters( doc, Integer.MIN_VALUE, endnoteTextRange, endnote );
+ processCharacters( wordDocument, Integer.MIN_VALUE, endnoteTextRange, endnote );
WordToFoUtils.compactInlines( endnote );
this.endnotes.add( endnote );
}
@Override
- protected void processFootnoteAutonumbered( HWPFDocument doc,
+ protected void processFootnoteAutonumbered( HWPFDocument wordDocument,
int noteIndex, Element block, Range footnoteTextRange )
{
final String textIndex = String.valueOf( internalLinkCounter
footnoteBody.appendChild( footnoteBlock );
footNote.appendChild( footnoteBody );
- processCharacters( doc, Integer.MIN_VALUE, footnoteTextRange,
+ processCharacters( wordDocument, Integer.MIN_VALUE, footnoteTextRange,
footnoteBlock );
WordToFoUtils.compactInlines( footnoteBlock );
}
@Override
- protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
+ protected void processEndnoteAutonumbered( HWPFDocument wordDocument, int noteIndex,
Element block, Range endnoteTextRange )
{
- processNoteAutonumbered( doc, "end", noteIndex, block, endnoteTextRange );
+ processNoteAutonumbered( wordDocument, "end", noteIndex, block, endnoteTextRange );
}
@Override
- protected void processFootnoteAutonumbered( HWPFDocument doc,
+ protected void processFootnoteAutonumbered( HWPFDocument wordDocument,
int noteIndex, Element block, Range footnoteTextRange )
{
- processNoteAutonumbered( doc, "foot", noteIndex, block,
+ processNoteAutonumbered( wordDocument, "foot", noteIndex, block,
footnoteTextRange );
}
}
protected void processParagraph( HWPFDocumentCore hwpfDocument,
- Element parentFopElement, int currentTableLevel,
- Paragraph paragraph, String bulletText )
+ Element parentElement, int currentTableLevel, Paragraph paragraph,
+ String bulletText )
{
final Element pElement = htmlDocumentFacade.createParagraph();
- parentFopElement.appendChild( pElement );
+ parentElement.appendChild( pElement );
StringBuilder style = new StringBuilder();
WordToHtmlUtils.addParagraphProperties( paragraph, style );
--- /dev/null
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.usermodel.Bookmark;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.OfficeDrawing;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.apache.poi.util.Beta;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+@Beta
+public class WordToTextConverter extends AbstractWordConverter
+{
+
+ /**
+ * Java main() interface to interact with {@link WordToTextConverter}
+ *
+ * <p>
+ * Usage: WordToTextConverter infile outfile
+ * </p>
+ * Where infile is an input .doc file ( Word 95-2007) which will be rendered
+ * as plain text into outfile
+ */
+ public static void main( String[] args )
+ {
+ if ( args.length < 2 )
+ {
+ System.err
+ .println( "Usage: WordToTextConverter <inputFile.doc> <saveTo.txt>" );
+ return;
+ }
+
+ System.out.println( "Converting " + args[0] );
+ System.out.println( "Saving output to " + args[1] );
+ try
+ {
+ Document doc = WordToTextConverter.process( new File( args[0] ) );
+
+ FileWriter out = new FileWriter( args[1] );
+ DOMSource domSource = new DOMSource( doc );
+ StreamResult streamResult = new StreamResult( out );
+
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer serializer = tf.newTransformer();
+ // TODO set encoding from a command argument
+ serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
+ serializer.setOutputProperty( OutputKeys.INDENT, "no" );
+ serializer.setOutputProperty( OutputKeys.METHOD, "text" );
+ serializer.transform( domSource, streamResult );
+ out.close();
+ }
+ catch ( Exception e )
+ {
+ e.printStackTrace();
+ }
+ }
+
+ static Document process( File docFile ) throws Exception
+ {
+ final HWPFDocumentCore wordDocument = AbstractWordUtils
+ .loadDoc( docFile );
+ WordToTextConverter wordToTextConverter = new WordToTextConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToTextConverter.processDocument( wordDocument );
+ return wordToTextConverter.getDocument();
+ }
+
+ private AtomicInteger noteCounters = new AtomicInteger( 1 );
+
+ private Element notes = null;
+
+ private final TextDocumentFacade textDocumentFacade;
+
+ /**
+ * Creates new instance of {@link WordToTextConverter}. Can be used for
+ * output several {@link HWPFDocument}s into single text document.
+ *
+ * @param document
+ * XML DOM Document used as storage for text pieces
+ */
+ public WordToTextConverter( Document document )
+ {
+ this.textDocumentFacade = new TextDocumentFacade( document );
+ }
+
+ public Document getDocument()
+ {
+ return textDocumentFacade.getDocument();
+ }
+
+ @Override
+ protected void outputCharacters( Element block, CharacterRun characterRun,
+ String text )
+ {
+ block.appendChild( textDocumentFacade.createText( text ) );
+ }
+
+ @Override
+ protected void processBookmarks( HWPFDocumentCore wordDocument,
+ Element currentBlock, Range range, int currentTableLevel,
+ List<Bookmark> rangeBookmarks )
+ {
+ processCharacters( wordDocument, currentTableLevel, range, currentBlock );
+ }
+
+ @Override
+ public void processDocument( HWPFDocumentCore wordDocument )
+ {
+ super.processDocument( wordDocument );
+
+ if ( notes != null )
+ textDocumentFacade.getBody().appendChild( notes );
+ }
+
+ @Override
+ protected void processDocumentInformation(
+ SummaryInformation summaryInformation )
+ {
+ if ( AbstractWordUtils.isNotEmpty( summaryInformation.getTitle() ) )
+ textDocumentFacade.setTitle( summaryInformation.getTitle() );
+
+ if ( AbstractWordUtils.isNotEmpty( summaryInformation.getAuthor() ) )
+ textDocumentFacade.addAuthor( summaryInformation.getAuthor() );
+
+ if ( AbstractWordUtils.isNotEmpty( summaryInformation.getComments() ) )
+ textDocumentFacade
+ .addDescription( summaryInformation.getComments() );
+
+ if ( AbstractWordUtils.isNotEmpty( summaryInformation.getKeywords() ) )
+ textDocumentFacade.addKeywords( summaryInformation.getKeywords() );
+ }
+
+ @Override
+ protected void processDrawnObject( HWPFDocument doc,
+ CharacterRun characterRun, OfficeDrawing officeDrawing,
+ String path, Element block )
+ {
+ // ignore
+ }
+
+ @Override
+ protected void processEndnoteAutonumbered( HWPFDocument wordDocument,
+ int noteIndex, Element block, Range endnoteTextRange )
+ {
+ processNote( wordDocument, block, endnoteTextRange );
+ }
+
+ @Override
+ protected void processFootnoteAutonumbered( HWPFDocument wordDocument,
+ int noteIndex, Element block, Range footnoteTextRange )
+ {
+ processNote( wordDocument, block, footnoteTextRange );
+ }
+
+ @Override
+ protected void processHyperlink( HWPFDocumentCore wordDocument,
+ Element currentBlock, Range textRange, int currentTableLevel,
+ String hyperlink )
+ {
+ processCharacters( wordDocument, currentTableLevel, textRange,
+ currentBlock );
+
+ currentBlock.appendChild( textDocumentFacade.createText( " ("
+ + UNICODECHAR_ZERO_WIDTH_SPACE
+ + hyperlink.replaceAll( "\\/", UNICODECHAR_ZERO_WIDTH_SPACE
+ + "\\/" + UNICODECHAR_ZERO_WIDTH_SPACE )
+ + UNICODECHAR_ZERO_WIDTH_SPACE + ")" ) );
+ }
+
+ @Override
+ protected void processImage( Element currentBlock, boolean inlined,
+ Picture picture )
+ {
+ // ignore
+ }
+
+ @Override
+ protected void processLineBreak( Element block, CharacterRun characterRun )
+ {
+ block.appendChild( textDocumentFacade.createText( "\n" ) );
+ }
+
+ protected void processNote( HWPFDocument wordDocument, Element block,
+ Range noteTextRange )
+ {
+ final int noteIndex = noteCounters.getAndIncrement();
+ block.appendChild( textDocumentFacade
+ .createText( UNICODECHAR_ZERO_WIDTH_SPACE + "[" + noteIndex
+ + "]" + UNICODECHAR_ZERO_WIDTH_SPACE ) );
+
+ if ( notes == null )
+ notes = textDocumentFacade.createBlock();
+
+ Element note = textDocumentFacade.createBlock();
+ notes.appendChild( note );
+
+ note.appendChild( textDocumentFacade.createText( "^" + noteIndex
+ + "\t " ) );
+ processCharacters( wordDocument, Integer.MIN_VALUE, noteTextRange, note );
+ note.appendChild( textDocumentFacade.createText( "\n" ) );
+ }
+
+ @Override
+ protected void processPageref( HWPFDocumentCore wordDocument,
+ Element currentBlock, Range textRange, int currentTableLevel,
+ String pageref )
+ {
+ processCharacters( wordDocument, currentTableLevel, textRange,
+ currentBlock );
+ }
+
+ @Override
+ protected void processParagraph( HWPFDocumentCore wordDocument,
+ Element parentElement, int currentTableLevel, Paragraph paragraph,
+ String bulletText )
+ {
+ Element pElement = textDocumentFacade.createParagraph();
+ pElement.appendChild( textDocumentFacade.createText( bulletText ) );
+ processCharacters( wordDocument, currentTableLevel, paragraph, pElement );
+ pElement.appendChild( textDocumentFacade.createText( "\n" ) );
+ parentElement.appendChild( pElement );
+ }
+
+ @Override
+ protected void processSection( HWPFDocumentCore wordDocument,
+ Section section, int s )
+ {
+ Element sectionElement = textDocumentFacade.createBlock();
+ processParagraphes( wordDocument, sectionElement, section,
+ Integer.MIN_VALUE );
+ sectionElement.appendChild( textDocumentFacade.createText( "\n" ) );
+ textDocumentFacade.body.appendChild( sectionElement );
+ }
+
+ protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
+ Table table )
+ {
+ final int tableRows = table.numRows();
+ for ( int r = 0; r < tableRows; r++ )
+ {
+ TableRow tableRow = table.getRow( r );
+
+ Element tableRowElement = textDocumentFacade.createTableRow();
+
+ final int rowCells = tableRow.numCells();
+ for ( int c = 0; c < rowCells; c++ )
+ {
+ TableCell tableCell = tableRow.getCell( c );
+
+ Element tableCellElement = textDocumentFacade.createTableCell();
+
+ if ( c != 0 )
+ tableCellElement.appendChild( textDocumentFacade
+ .createText( "\t" ) );
+
+ processParagraphes( hwpfDocument, tableCellElement, tableCell,
+ table.getTableLevel() );
+ tableRowElement.appendChild( tableCellElement );
+ }
+
+ tableRowElement.appendChild( textDocumentFacade.createText( "\n" ) );
+ flow.appendChild( tableRowElement );
+ }
+ }
+
+}
public static Test suite()
{
- TestSuite suite = new TestSuite(TestWordToConverterSuite.class.getName());
+ TestSuite suite = new TestSuite(
+ TestWordToConverterSuite.class.getName() );
File directory = POIDataSamples.getDocumentInstance().getFile(
"../document" );
{
public void runTest() throws Exception
{
- test( child, false );
+ testFo( child );
}
} );
suite.addTest( new TestCase( name + " [HTML]" )
{
public void runTest() throws Exception
{
- test( child, true );
+ testHtml( child );
+ }
+ } );
+ suite.addTest( new TestCase( name + " [TEXT]" )
+ {
+ public void runTest() throws Exception
+ {
+ testText( child );
}
} );
return suite;
}
- protected static void test( File child, boolean html ) throws Exception
+ protected static void testFo( File child ) throws Exception
{
HWPFDocumentCore hwpfDocument;
try
}
catch ( Exception exc )
{
- // unable to parse file -- not WordToFoConverter fault
return;
}
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
- transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ transformer.setOutputProperty( OutputKeys.INDENT, "false" );
transformer.transform(
new DOMSource( wordToFoConverter.getDocument() ),
new StreamResult( stringWriter ) );
- if ( html )
- transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+ // no exceptions
+ }
+
+ protected static void testHtml( File child ) throws Exception
+ {
+ HWPFDocumentCore hwpfDocument;
+ try
+ {
+ hwpfDocument = AbstractWordUtils.loadDoc( child );
+ }
+ catch ( Exception exc )
+ {
+ return;
+ }
+
+ WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToHtmlConverter.processDocument( hwpfDocument );
+
+ StringWriter stringWriter = new StringWriter();
+
+ Transformer transformer = TransformerFactory.newInstance()
+ .newTransformer();
+ transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+ transformer.setOutputProperty( OutputKeys.INDENT, "false" );
+ transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+ transformer.transform(
+ new DOMSource( wordToHtmlConverter.getDocument() ),
+ new StreamResult( stringWriter ) );
+
+ // no exceptions
+ }
+
+ protected static void testText( File child ) throws Exception
+ {
+ HWPFDocumentCore wordDocument;
+ try
+ {
+ wordDocument = AbstractWordUtils.loadDoc( child );
+ }
+ catch ( Exception exc )
+ {
+ return;
+ }
+
+ WordToTextConverter wordToTextConverter = new WordToTextConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToTextConverter.processDocument( wordDocument );
+
+ StringWriter stringWriter = new StringWriter();
+
+ Transformer transformer = TransformerFactory.newInstance()
+ .newTransformer();
+ transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+ transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ transformer.setOutputProperty( OutputKeys.METHOD, "text" );
+ transformer.transform(
+ new DOMSource( wordToTextConverter.getDocument() ),
+ new StreamResult( stringWriter ) );
+ stringWriter.toString();
// no exceptions
}
}