git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1142767 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_8_BETA4
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.List; | import java.util.List; | ||||
import org.w3c.dom.Document; | import org.w3c.dom.Document; | ||||
import org.w3c.dom.Element; | import org.w3c.dom.Element; | ||||
public abstract class AbstractWordExtractor | |||||
public abstract class AbstractWordConverter | |||||
{ | { | ||||
private static final byte BEL_MARK = 7; | private static final byte BEL_MARK = 7; | ||||
private static final byte FIELD_SEPARATOR_MARK = 20; | private static final byte FIELD_SEPARATOR_MARK = 20; | ||||
private static final POILogger logger = POILogFactory | private static final POILogger logger = POILogFactory | ||||
.getLogger( AbstractWordExtractor.class ); | |||||
.getLogger( AbstractWordConverter.class ); | |||||
public abstract Document getDocument(); | public abstract Document getDocument(); | ||||
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.io.Closeable; | import java.io.Closeable; | ||||
import java.io.File; | import java.io.File; |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import org.w3c.dom.Document; | import org.w3c.dom.Document; | ||||
import org.w3c.dom.Element; | import org.w3c.dom.Element; |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import org.w3c.dom.Document; | import org.w3c.dom.Document; | ||||
import org.w3c.dom.Element; | import org.w3c.dom.Element; |
* ==================================================================== | * ==================================================================== | ||||
*/ | */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
/** | /** | ||||
* Comment me | * Comment me | ||||
* | |||||
* | |||||
* @author Ryan Ackley | * @author Ryan Ackley | ||||
*/ | */ | ||||
public final class NumberFormatter { | |||||
public final class NumberFormatter | |||||
{ | |||||
private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", | |||||
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "x", "y", "z" }; | |||||
private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", | |||||
"f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", | |||||
"s", "t", "u", "v", "x", "y", "z" }; | |||||
private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", "v", "vi", "vii", | |||||
"viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", | |||||
"xx", "xxi", "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", | |||||
"xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", "xxxviii", | |||||
"xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", "xlvii", "xlviii", | |||||
"xlix", "l" }; | |||||
private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", | |||||
"v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", | |||||
"xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", | |||||
"xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", "xxxi", | |||||
"xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", | |||||
"xxxviii", "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", | |||||
"xlvi", "xlvii", "xlviii", "xlix", "l" }; | |||||
private final static int T_ARABIC = 0; | private final static int T_ARABIC = 0; | ||||
private final static int T_LOWER_LETTER = 4; | private final static int T_LOWER_LETTER = 4; | ||||
private final static int T_UPPER_LETTER = 3; | private final static int T_UPPER_LETTER = 3; | ||||
private final static int T_UPPER_ROMAN = 1; | private final static int T_UPPER_ROMAN = 1; | ||||
public static String getNumber(int num, int style) { | |||||
switch (style) { | |||||
public static String getNumber( int num, int style ) | |||||
{ | |||||
switch ( style ) | |||||
{ | |||||
case T_UPPER_ROMAN: | case T_UPPER_ROMAN: | ||||
return C_ROMAN[num - 1].toUpperCase(); | return C_ROMAN[num - 1].toUpperCase(); | ||||
case T_LOWER_ROMAN: | case T_LOWER_ROMAN: | ||||
case T_ARABIC: | case T_ARABIC: | ||||
case T_ORDINAL: | case T_ORDINAL: | ||||
default: | default: | ||||
return String.valueOf(num); | |||||
return String.valueOf( num ); | |||||
} | } | ||||
} | } | ||||
} | } |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileWriter; | import java.io.FileWriter; | ||||
/** | /** | ||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | ||||
*/ | */ | ||||
public class WordToFoExtractor extends AbstractWordExtractor | |||||
public class WordToFoConverter extends AbstractWordConverter | |||||
{ | { | ||||
/** | /** | ||||
} | } | ||||
private static final POILogger logger = POILogFactory | private static final POILogger logger = POILogFactory | ||||
.getLogger( WordToFoExtractor.class ); | |||||
.getLogger( WordToFoConverter.class ); | |||||
public static String getBorderType( BorderCode borderCode ) | public static String getBorderType( BorderCode borderCode ) | ||||
{ | { | ||||
} | } | ||||
/** | /** | ||||
* Java main() interface to interact with WordToFoExtractor | |||||
* Java main() interface to interact with {@link WordToFoConverter} | |||||
* | * | ||||
* <p> | * <p> | ||||
* Usage: WordToFoExtractor infile outfile | |||||
* Usage: WordToFoConverter infile outfile | |||||
* </p> | * </p> | ||||
* Where infile is an input .doc file ( Word 97-2007) which will be rendered | * Where infile is an input .doc file ( Word 97-2007) which will be rendered | ||||
* as XSL-FO into outfile | * as XSL-FO into outfile | ||||
* | |||||
*/ | */ | ||||
public static void main( String[] args ) | public static void main( String[] args ) | ||||
{ | { | ||||
if ( args.length < 2 ) | if ( args.length < 2 ) | ||||
{ | { | ||||
System.err | System.err | ||||
.println( "Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>" ); | |||||
.println( "Usage: WordToFoConverter <inputFile.doc> <saveTo.fo>" ); | |||||
return; | return; | ||||
} | } | ||||
System.out.println( "Saving output to " + args[1] ); | System.out.println( "Saving output to " + args[1] ); | ||||
try | try | ||||
{ | { | ||||
Document doc = WordToFoExtractor.process( new File( args[0] ) ); | |||||
Document doc = WordToFoConverter.process( new File( args[0] ) ); | |||||
FileWriter out = new FileWriter( args[1] ); | FileWriter out = new FileWriter( args[1] ); | ||||
DOMSource domSource = new DOMSource( doc ); | DOMSource domSource = new DOMSource( doc ); | ||||
static Document process( File docFile ) throws Exception | static Document process( File docFile ) throws Exception | ||||
{ | { | ||||
final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile ); | final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile ); | ||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor( | |||||
WordToFoConverter wordToFoConverter = new WordToFoConverter( | |||||
DocumentBuilderFactory.newInstance().newDocumentBuilder() | DocumentBuilderFactory.newInstance().newDocumentBuilder() | ||||
.newDocument() ); | .newDocument() ); | ||||
wordToFoExtractor.processDocument( hwpfDocument ); | |||||
return wordToFoExtractor.getDocument(); | |||||
wordToFoConverter.processDocument( hwpfDocument ); | |||||
return wordToFoConverter.getDocument(); | |||||
} | } | ||||
private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>(); | private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>(); | ||||
protected final FoDocumentFacade foDocumentFacade; | protected final FoDocumentFacade foDocumentFacade; | ||||
/** | /** | ||||
* Creates new instance of {@link WordToFoExtractor}. Can be used for output | |||||
* Creates new instance of {@link WordToFoConverter}. Can be used for output | |||||
* several {@link HWPFDocument}s into single FO document. | * several {@link HWPFDocument}s into single FO document. | ||||
* | * | ||||
* @param document | * @param document | ||||
* XML DOM Document used as XSL FO document. Shall support | * XML DOM Document used as XSL FO document. Shall support | ||||
* namespaces | * namespaces | ||||
*/ | */ | ||||
public WordToFoExtractor( Document document ) | |||||
public WordToFoConverter( Document document ) | |||||
{ | { | ||||
this.foDocumentFacade = new FoDocumentFacade( document ); | this.foDocumentFacade = new FoDocumentFacade( document ); | ||||
} | } |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import org.apache.poi.hwpf.usermodel.BorderCode; | import org.apache.poi.hwpf.usermodel.BorderCode; | ||||
import org.apache.poi.hwpf.usermodel.CharacterProperties; | import org.apache.poi.hwpf.usermodel.CharacterProperties; |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileWriter; | import java.io.FileWriter; | ||||
import org.w3c.dom.Element; | import org.w3c.dom.Element; | ||||
import org.w3c.dom.Text; | import org.w3c.dom.Text; | ||||
import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH; | |||||
import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH; | |||||
/** | /** | ||||
* Converts Word files (95-2007) into HTML files. | |||||
* <p> | |||||
* This implementation doesn't create images or links to them. This can be | |||||
* changed by overriding {@link #processImage(Element, boolean, Picture)} | |||||
* method. | |||||
* | |||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | ||||
*/ | */ | ||||
public class WordToHtmlExtractor extends AbstractWordExtractor | |||||
public class WordToHtmlConverter extends AbstractWordConverter | |||||
{ | { | ||||
/** | /** | ||||
} | } | ||||
private static final POILogger logger = POILogFactory | private static final POILogger logger = POILogFactory | ||||
.getLogger( WordToHtmlExtractor.class ); | |||||
.getLogger( WordToHtmlConverter.class ); | |||||
private static String getSectionStyle( Section section ) | private static String getSectionStyle( Section section ) | ||||
{ | { | ||||
} | } | ||||
/** | /** | ||||
* Java main() interface to interact with WordToHtmlExtractor | |||||
* Java main() interface to interact with {@link WordToHtmlConverter} | |||||
* | * | ||||
* <p> | * <p> | ||||
* Usage: WordToHtmlExtractor infile outfile | |||||
* Usage: WordToHtmlConverter infile outfile | |||||
* </p> | * </p> | ||||
* Where infile is an input .doc file ( Word 95-2007) which will be rendered | * Where infile is an input .doc file ( Word 95-2007) which will be rendered | ||||
* as HTML into outfile | * as HTML into outfile | ||||
if ( args.length < 2 ) | if ( args.length < 2 ) | ||||
{ | { | ||||
System.err | System.err | ||||
.println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" ); | |||||
.println( "Usage: WordToHtmlConverter <inputFile.doc> <saveTo.html>" ); | |||||
return; | return; | ||||
} | } | ||||
System.out.println( "Saving output to " + args[1] ); | System.out.println( "Saving output to " + args[1] ); | ||||
try | try | ||||
{ | { | ||||
Document doc = WordToHtmlExtractor.process( new File( args[0] ) ); | |||||
Document doc = WordToHtmlConverter.process( new File( args[0] ) ); | |||||
FileWriter out = new FileWriter( args[1] ); | FileWriter out = new FileWriter( args[1] ); | ||||
DOMSource domSource = new DOMSource( doc ); | DOMSource domSource = new DOMSource( doc ); | ||||
static Document process( File docFile ) throws Exception | static Document process( File docFile ) throws Exception | ||||
{ | { | ||||
final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile ); | final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile ); | ||||
WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor( | |||||
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( | |||||
DocumentBuilderFactory.newInstance().newDocumentBuilder() | DocumentBuilderFactory.newInstance().newDocumentBuilder() | ||||
.newDocument() ); | .newDocument() ); | ||||
wordToHtmlExtractor.processDocument( wordDocument ); | |||||
return wordToHtmlExtractor.getDocument(); | |||||
wordToHtmlConverter.processDocument( wordDocument ); | |||||
return wordToHtmlConverter.getDocument(); | |||||
} | } | ||||
private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>(); | private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>(); | ||||
private final HtmlDocumentFacade htmlDocumentFacade; | private final HtmlDocumentFacade htmlDocumentFacade; | ||||
/** | /** | ||||
* Creates new instance of {@link WordToHtmlExtractor}. Can be used for | |||||
* Creates new instance of {@link WordToHtmlConverter}. Can be used for | |||||
* output several {@link HWPFDocument}s into single HTML document. | * output several {@link HWPFDocument}s into single HTML document. | ||||
* | * | ||||
* @param document | * @param document | ||||
* XML DOM Document used as HTML document | * XML DOM Document used as HTML document | ||||
*/ | */ | ||||
public WordToHtmlExtractor( Document document ) | |||||
public WordToHtmlConverter( Document document ) | |||||
{ | { | ||||
this.htmlDocumentFacade = new HtmlDocumentFacade( document ); | this.htmlDocumentFacade = new HtmlDocumentFacade( document ); | ||||
} | } |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import org.apache.poi.hwpf.usermodel.BorderCode; | import org.apache.poi.hwpf.usermodel.BorderCode; | ||||
import org.apache.poi.hwpf.usermodel.CharacterProperties; | import org.apache.poi.hwpf.usermodel.CharacterProperties; |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.io.File; | import java.io.File; | ||||
import java.io.FilenameFilter; | import java.io.FilenameFilter; | ||||
import org.apache.poi.POIDataSamples; | import org.apache.poi.POIDataSamples; | ||||
import org.apache.poi.hwpf.HWPFDocumentCore; | import org.apache.poi.hwpf.HWPFDocumentCore; | ||||
public class TestWordToExtractorSuite | |||||
public class TestWordToConverterSuite | |||||
{ | { | ||||
/** | /** | ||||
* YK: a quick hack to exclude failing documents from the suite. | * YK: a quick hack to exclude failing documents from the suite. | ||||
} | } | ||||
catch ( Exception exc ) | catch ( Exception exc ) | ||||
{ | { | ||||
// unable to parse file -- not WordToFoExtractor fault | |||||
// unable to parse file -- not WordToFoConverter fault | |||||
return; | return; | ||||
} | } | ||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor( | |||||
WordToFoConverter wordToFoConverter = new WordToFoConverter( | |||||
DocumentBuilderFactory.newInstance().newDocumentBuilder() | DocumentBuilderFactory.newInstance().newDocumentBuilder() | ||||
.newDocument() ); | .newDocument() ); | ||||
wordToFoExtractor.processDocument( hwpfDocument ); | |||||
wordToFoConverter.processDocument( hwpfDocument ); | |||||
StringWriter stringWriter = new StringWriter(); | StringWriter stringWriter = new StringWriter(); | ||||
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); | transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); | ||||
transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); | transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); | ||||
transformer.transform( | transformer.transform( | ||||
new DOMSource( wordToFoExtractor.getDocument() ), | |||||
new DOMSource( wordToFoConverter.getDocument() ), | |||||
new StreamResult( stringWriter ) ); | new StreamResult( stringWriter ) ); | ||||
if ( html ) | if ( html ) |
* limitations under the License. | * limitations under the License. | ||||
* ==================================================================== | * ==================================================================== | ||||
*/ | */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.io.StringWriter; | import java.io.StringWriter; | ||||
import org.apache.poi.hwpf.HWPFDocument; | import org.apache.poi.hwpf.HWPFDocument; | ||||
/** | /** | ||||
* Test cases for {@link WordToFoExtractor} | |||||
* Test cases for {@link WordToFoConverter} | |||||
* | * | ||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | ||||
*/ | */ | ||||
public class TestWordToFoExtractor extends TestCase | |||||
public class TestWordToFoConverter extends TestCase | |||||
{ | { | ||||
private static String getFoText( final String sampleFileName ) | private static String getFoText( final String sampleFileName ) | ||||
throws Exception | throws Exception | ||||
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples | HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples | ||||
.getDocumentInstance().openResourceAsStream( sampleFileName ) ); | .getDocumentInstance().openResourceAsStream( sampleFileName ) ); | ||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor( | |||||
WordToFoConverter wordToFoConverter = new WordToFoConverter( | |||||
DocumentBuilderFactory.newInstance().newDocumentBuilder() | DocumentBuilderFactory.newInstance().newDocumentBuilder() | ||||
.newDocument() ); | .newDocument() ); | ||||
wordToFoExtractor.processDocument( hwpfDocument ); | |||||
wordToFoConverter.processDocument( hwpfDocument ); | |||||
StringWriter stringWriter = new StringWriter(); | StringWriter stringWriter = new StringWriter(); | ||||
.newTransformer(); | .newTransformer(); | ||||
transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); | transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); | ||||
transformer.transform( | transformer.transform( | ||||
new DOMSource( wordToFoExtractor.getDocument() ), | |||||
new DOMSource( wordToFoConverter.getDocument() ), | |||||
new StreamResult( stringWriter ) ); | new StreamResult( stringWriter ) ); | ||||
String result = stringWriter.toString(); | String result = stringWriter.toString(); | ||||
return result; | return result; | ||||
} | } | ||||
public void testHyperlink() throws Exception | |||||
public void testEquation() throws Exception | |||||
{ | { | ||||
final String sampleFileName = "hyperlink.doc"; | |||||
final String sampleFileName = "equation.doc"; | |||||
String result = getFoText( sampleFileName ); | String result = getFoText( sampleFileName ); | ||||
assertTrue( result | assertTrue( result | ||||
.contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) ); | |||||
assertTrue( result.contains( "Hyperlink text" ) ); | |||||
.contains( "<!--Image link to '0.emf' can be here-->" ) ); | |||||
} | } | ||||
public void testEquation() throws Exception | |||||
public void testHyperlink() throws Exception | |||||
{ | { | ||||
final String sampleFileName = "equation.doc"; | |||||
final String sampleFileName = "hyperlink.doc"; | |||||
String result = getFoText( sampleFileName ); | String result = getFoText( sampleFileName ); | ||||
assertTrue( result | assertTrue( result | ||||
.contains( "<!--Image link to '0.emf' can be here-->" ) ); | |||||
.contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) ); | |||||
assertTrue( result.contains( "Hyperlink text" ) ); | |||||
} | } | ||||
public void testPageref() throws Exception | public void testPageref() throws Exception |
See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
limitations under the License. | limitations under the License. | ||||
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.hwpf.extractor; | |||||
package org.apache.poi.hwpf.converter; | |||||
import java.io.StringWriter; | import java.io.StringWriter; | ||||
import org.apache.poi.hwpf.HWPFDocument; | import org.apache.poi.hwpf.HWPFDocument; | ||||
/** | /** | ||||
* Test cases for {@link WordToFoExtractor} | |||||
* Test cases for {@link WordToFoConverter} | |||||
* | * | ||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) | ||||
*/ | */ | ||||
public class TestWordToHtmlExtractor extends TestCase | |||||
public class TestWordToHtmlConverter extends TestCase | |||||
{ | { | ||||
private static String getHtmlText( final String sampleFileName ) | private static String getHtmlText( final String sampleFileName ) | ||||
throws Exception | throws Exception | ||||
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples | HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples | ||||
.getDocumentInstance().openResourceAsStream( sampleFileName ) ); | .getDocumentInstance().openResourceAsStream( sampleFileName ) ); | ||||
WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor( | |||||
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( | |||||
DocumentBuilderFactory.newInstance().newDocumentBuilder() | DocumentBuilderFactory.newInstance().newDocumentBuilder() | ||||
.newDocument() ); | .newDocument() ); | ||||
wordToHtmlExtractor.processDocument( hwpfDocument ); | |||||
wordToHtmlConverter.processDocument( hwpfDocument ); | |||||
StringWriter stringWriter = new StringWriter(); | StringWriter stringWriter = new StringWriter(); | ||||
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); | transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); | ||||
transformer.setOutputProperty( OutputKeys.METHOD, "html" ); | transformer.setOutputProperty( OutputKeys.METHOD, "html" ); | ||||
transformer.transform( | transformer.transform( | ||||
new DOMSource( wordToHtmlExtractor.getDocument() ), | |||||
new DOMSource( wordToHtmlConverter.getDocument() ), | |||||
new StreamResult( stringWriter ) ); | new StreamResult( stringWriter ) ); | ||||
String result = stringWriter.toString(); | String result = stringWriter.toString(); |