Browse Source

rename extractor -> converter and move to converter package

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1142767 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_3_8_BETA4
Sergey Vladimirov 13 years ago
parent
commit
03035ed09a

src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordExtractor.java → src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;


public abstract class AbstractWordExtractor
public abstract class AbstractWordConverter
{ {
private static final byte BEL_MARK = 7; private static final byte BEL_MARK = 7;


private static final byte FIELD_SEPARATOR_MARK = 20; private static final byte FIELD_SEPARATOR_MARK = 20;


private static final POILogger logger = POILogFactory private static final POILogger logger = POILogFactory
.getLogger( AbstractWordExtractor.class );
.getLogger( AbstractWordConverter.class );


public abstract Document getDocument(); public abstract Document getDocument();



src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordUtils.java → src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.io.Closeable; import java.io.Closeable;
import java.io.File; import java.io.File;

src/scratchpad/src/org/apache/poi/hwpf/extractor/FoDocumentFacade.java → src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;

src/scratchpad/src/org/apache/poi/hwpf/extractor/HtmlDocumentFacade.java → src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;

src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java → src/scratchpad/src/org/apache/poi/hwpf/converter/NumberFormatter.java View File

* ==================================================================== * ====================================================================
*/ */


package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


/** /**
* Comment me * Comment me
*
*
* @author Ryan Ackley * @author Ryan Ackley
*/ */
public final class NumberFormatter {
public final class NumberFormatter
{


private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "x", "y", "z" };
private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e",
"f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
"s", "t", "u", "v", "x", "y", "z" };


private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", "v", "vi", "vii",
"viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix",
"xx", "xxi", "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx",
"xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", "xxxviii",
"xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", "xlvii", "xlviii",
"xlix", "l" };
private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv",
"v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv",
"xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii",
"xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", "xxxi",
"xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii",
"xxxviii", "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv",
"xlvi", "xlvii", "xlviii", "xlix", "l" };


private final static int T_ARABIC = 0; private final static int T_ARABIC = 0;
private final static int T_LOWER_LETTER = 4; private final static int T_LOWER_LETTER = 4;
private final static int T_UPPER_LETTER = 3; private final static int T_UPPER_LETTER = 3;
private final static int T_UPPER_ROMAN = 1; private final static int T_UPPER_ROMAN = 1;


public static String getNumber(int num, int style) {
switch (style) {
public static String getNumber( int num, int style )
{
switch ( style )
{
case T_UPPER_ROMAN: case T_UPPER_ROMAN:
return C_ROMAN[num - 1].toUpperCase(); return C_ROMAN[num - 1].toUpperCase();
case T_LOWER_ROMAN: case T_LOWER_ROMAN:
case T_ARABIC: case T_ARABIC:
case T_ORDINAL: case T_ORDINAL:
default: default:
return String.valueOf(num);
return String.valueOf( num );
} }
} }
} }

src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java → src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileWriter;
/** /**
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/ */
public class WordToFoExtractor extends AbstractWordExtractor
public class WordToFoConverter extends AbstractWordConverter
{ {


/** /**
} }


private static final POILogger logger = POILogFactory private static final POILogger logger = POILogFactory
.getLogger( WordToFoExtractor.class );
.getLogger( WordToFoConverter.class );


public static String getBorderType( BorderCode borderCode ) public static String getBorderType( BorderCode borderCode )
{ {
} }


/** /**
* Java main() interface to interact with WordToFoExtractor
* Java main() interface to interact with {@link WordToFoConverter}
* *
* <p> * <p>
* Usage: WordToFoExtractor infile outfile
* Usage: WordToFoConverter infile outfile
* </p> * </p>
* Where infile is an input .doc file ( Word 97-2007) which will be rendered * Where infile is an input .doc file ( Word 97-2007) which will be rendered
* as XSL-FO into outfile * as XSL-FO into outfile
*
*/ */
public static void main( String[] args ) public static void main( String[] args )
{ {
if ( args.length < 2 ) if ( args.length < 2 )
{ {
System.err System.err
.println( "Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>" );
.println( "Usage: WordToFoConverter <inputFile.doc> <saveTo.fo>" );
return; return;
} }


System.out.println( "Saving output to " + args[1] ); System.out.println( "Saving output to " + args[1] );
try try
{ {
Document doc = WordToFoExtractor.process( new File( args[0] ) );
Document doc = WordToFoConverter.process( new File( args[0] ) );


FileWriter out = new FileWriter( args[1] ); FileWriter out = new FileWriter( args[1] );
DOMSource domSource = new DOMSource( doc ); DOMSource domSource = new DOMSource( doc );
static Document process( File docFile ) throws Exception static Document process( File docFile ) throws Exception
{ {
final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile ); final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
WordToFoConverter wordToFoConverter = new WordToFoConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder() DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() ); .newDocument() );
wordToFoExtractor.processDocument( hwpfDocument );
return wordToFoExtractor.getDocument();
wordToFoConverter.processDocument( hwpfDocument );
return wordToFoConverter.getDocument();
} }


private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>(); private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
protected final FoDocumentFacade foDocumentFacade; protected final FoDocumentFacade foDocumentFacade;


/** /**
* Creates new instance of {@link WordToFoExtractor}. Can be used for output
* Creates new instance of {@link WordToFoConverter}. Can be used for output
* several {@link HWPFDocument}s into single FO document. * several {@link HWPFDocument}s into single FO document.
* *
* @param document * @param document
* XML DOM Document used as XSL FO document. Shall support * XML DOM Document used as XSL FO document. Shall support
* namespaces * namespaces
*/ */
public WordToFoExtractor( Document document )
public WordToFoConverter( Document document )
{ {
this.foDocumentFacade = new FoDocumentFacade( document ); this.foDocumentFacade = new FoDocumentFacade( document );
} }

src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java → src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import org.apache.poi.hwpf.usermodel.BorderCode; import org.apache.poi.hwpf.usermodel.BorderCode;
import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterProperties;

src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlExtractor.java → src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileWriter;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Text; import org.w3c.dom.Text;


import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH;
import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;


/** /**
* Converts Word files (95-2007) into HTML files.
* <p>
* This implementation doesn't create images or links to them. This can be
* changed by overriding {@link #processImage(Element, boolean, Picture)}
* method.
*
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/ */
public class WordToHtmlExtractor extends AbstractWordExtractor
public class WordToHtmlConverter extends AbstractWordConverter
{ {


/** /**
} }


private static final POILogger logger = POILogFactory private static final POILogger logger = POILogFactory
.getLogger( WordToHtmlExtractor.class );
.getLogger( WordToHtmlConverter.class );


private static String getSectionStyle( Section section ) private static String getSectionStyle( Section section )
{ {
} }


/** /**
* Java main() interface to interact with WordToHtmlExtractor
* Java main() interface to interact with {@link WordToHtmlConverter}
* *
* <p> * <p>
* Usage: WordToHtmlExtractor infile outfile
* Usage: WordToHtmlConverter infile outfile
* </p> * </p>
* Where infile is an input .doc file ( Word 95-2007) which will be rendered * Where infile is an input .doc file ( Word 95-2007) which will be rendered
* as HTML into outfile * as HTML into outfile
if ( args.length < 2 ) if ( args.length < 2 )
{ {
System.err System.err
.println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" );
.println( "Usage: WordToHtmlConverter <inputFile.doc> <saveTo.html>" );
return; return;
} }


System.out.println( "Saving output to " + args[1] ); System.out.println( "Saving output to " + args[1] );
try try
{ {
Document doc = WordToHtmlExtractor.process( new File( args[0] ) );
Document doc = WordToHtmlConverter.process( new File( args[0] ) );


FileWriter out = new FileWriter( args[1] ); FileWriter out = new FileWriter( args[1] );
DOMSource domSource = new DOMSource( doc ); DOMSource domSource = new DOMSource( doc );
static Document process( File docFile ) throws Exception static Document process( File docFile ) throws Exception
{ {
final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile ); final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder() DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() ); .newDocument() );
wordToHtmlExtractor.processDocument( wordDocument );
return wordToHtmlExtractor.getDocument();
wordToHtmlConverter.processDocument( wordDocument );
return wordToHtmlConverter.getDocument();
} }


private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>(); private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
private final HtmlDocumentFacade htmlDocumentFacade; private final HtmlDocumentFacade htmlDocumentFacade;


/** /**
* Creates new instance of {@link WordToHtmlExtractor}. Can be used for
* Creates new instance of {@link WordToHtmlConverter}. Can be used for
* output several {@link HWPFDocument}s into single HTML document. * output several {@link HWPFDocument}s into single HTML document.
* *
* @param document * @param document
* XML DOM Document used as HTML document * XML DOM Document used as HTML document
*/ */
public WordToHtmlExtractor( Document document )
public WordToHtmlConverter( Document document )
{ {
this.htmlDocumentFacade = new HtmlDocumentFacade( document ); this.htmlDocumentFacade = new HtmlDocumentFacade( document );
} }

src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlUtils.java → src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import org.apache.poi.hwpf.usermodel.BorderCode; import org.apache.poi.hwpf.usermodel.BorderCode;
import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterProperties;

src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToExtractorSuite.java → src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.io.File; import java.io.File;
import java.io.FilenameFilter; import java.io.FilenameFilter;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.HWPFDocumentCore;


public class TestWordToExtractorSuite
public class TestWordToConverterSuite
{ {
/** /**
* YK: a quick hack to exclude failing documents from the suite. * YK: a quick hack to exclude failing documents from the suite.
} }
catch ( Exception exc ) catch ( Exception exc )
{ {
// unable to parse file -- not WordToFoExtractor fault
// unable to parse file -- not WordToFoConverter fault
return; return;
} }


WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
WordToFoConverter wordToFoConverter = new WordToFoConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder() DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() ); .newDocument() );
wordToFoExtractor.processDocument( hwpfDocument );
wordToFoConverter.processDocument( hwpfDocument );


StringWriter stringWriter = new StringWriter(); StringWriter stringWriter = new StringWriter();


transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
transformer.transform( transformer.transform(
new DOMSource( wordToFoExtractor.getDocument() ),
new DOMSource( wordToFoConverter.getDocument() ),
new StreamResult( stringWriter ) ); new StreamResult( stringWriter ) );


if ( html ) if ( html )

src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractor.java → src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java View File

* limitations under the License. * limitations under the License.
* ==================================================================== * ====================================================================
*/ */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.io.StringWriter; import java.io.StringWriter;


import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;


/** /**
* Test cases for {@link WordToFoExtractor}
* Test cases for {@link WordToFoConverter}
* *
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/ */
public class TestWordToFoExtractor extends TestCase
public class TestWordToFoConverter extends TestCase
{ {
private static String getFoText( final String sampleFileName ) private static String getFoText( final String sampleFileName )
throws Exception throws Exception
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
.getDocumentInstance().openResourceAsStream( sampleFileName ) ); .getDocumentInstance().openResourceAsStream( sampleFileName ) );


WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
WordToFoConverter wordToFoConverter = new WordToFoConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder() DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() ); .newDocument() );
wordToFoExtractor.processDocument( hwpfDocument );
wordToFoConverter.processDocument( hwpfDocument );


StringWriter stringWriter = new StringWriter(); StringWriter stringWriter = new StringWriter();


.newTransformer(); .newTransformer();
transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
transformer.transform( transformer.transform(
new DOMSource( wordToFoExtractor.getDocument() ),
new DOMSource( wordToFoConverter.getDocument() ),
new StreamResult( stringWriter ) ); new StreamResult( stringWriter ) );


String result = stringWriter.toString(); String result = stringWriter.toString();
return result; return result;
} }


public void testHyperlink() throws Exception
public void testEquation() throws Exception
{ {
final String sampleFileName = "hyperlink.doc";
final String sampleFileName = "equation.doc";
String result = getFoText( sampleFileName ); String result = getFoText( sampleFileName );


assertTrue( result assertTrue( result
.contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
assertTrue( result.contains( "Hyperlink text" ) );
.contains( "<!--Image link to '0.emf' can be here-->" ) );
} }


public void testEquation() throws Exception
public void testHyperlink() throws Exception
{ {
final String sampleFileName = "equation.doc";
final String sampleFileName = "hyperlink.doc";
String result = getFoText( sampleFileName ); String result = getFoText( sampleFileName );


assertTrue( result assertTrue( result
.contains( "<!--Image link to '0.emf' can be here-->" ) );
.contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
assertTrue( result.contains( "Hyperlink text" ) );
} }


public void testPageref() throws Exception public void testPageref() throws Exception

src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToHtmlExtractor.java → src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java View File

See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hwpf.extractor;
package org.apache.poi.hwpf.converter;


import java.io.StringWriter; import java.io.StringWriter;


import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;


/** /**
* Test cases for {@link WordToFoExtractor}
* Test cases for {@link WordToFoConverter}
* *
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/ */
public class TestWordToHtmlExtractor extends TestCase
public class TestWordToHtmlConverter extends TestCase
{ {
private static String getHtmlText( final String sampleFileName ) private static String getHtmlText( final String sampleFileName )
throws Exception throws Exception
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
.getDocumentInstance().openResourceAsStream( sampleFileName ) ); .getDocumentInstance().openResourceAsStream( sampleFileName ) );


WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder() DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() ); .newDocument() );
wordToHtmlExtractor.processDocument( hwpfDocument );
wordToHtmlConverter.processDocument( hwpfDocument );


StringWriter stringWriter = new StringWriter(); StringWriter stringWriter = new StringWriter();


transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
transformer.setOutputProperty( OutputKeys.METHOD, "html" ); transformer.setOutputProperty( OutputKeys.METHOD, "html" );
transformer.transform( transformer.transform(
new DOMSource( wordToHtmlExtractor.getDocument() ),
new DOMSource( wordToHtmlConverter.getDocument() ),
new StreamResult( stringWriter ) ); new StreamResult( stringWriter ) );


String result = stringWriter.toString(); String result = stringWriter.toString();

Loading…
Cancel
Save