]> source.dussan.org Git - poi.git/commitdiff
rename extractor -> converter and move to converter package
authorSergey Vladimirov <sergey@apache.org>
Mon, 4 Jul 2011 19:14:44 +0000 (19:14 +0000)
committerSergey Vladimirov <sergey@apache.org>
Mon, 4 Jul 2011 19:14:44 +0000 (19:14 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1142767 13f79535-47bb-0310-9956-ffa450edef68

24 files changed:
src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/NumberFormatter.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordExtractor.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordUtils.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/FoDocumentFacade.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/HtmlDocumentFacade.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlExtractor.java [deleted file]
src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlUtils.java [deleted file]
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToExtractorSuite.java [deleted file]
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractor.java [deleted file]
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToHtmlExtractor.java [deleted file]

diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
new file mode 100644 (file)
index 0000000..e047d3f
--- /dev/null
@@ -0,0 +1,365 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.model.ListFormatOverride;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+public abstract class AbstractWordConverter
+{
+    private static final byte BEL_MARK = 7;
+
+    private static final byte FIELD_BEGIN_MARK = 19;
+
+    private static final byte FIELD_END_MARK = 21;
+
+    private static final byte FIELD_SEPARATOR_MARK = 20;
+
+    private static final POILogger logger = POILogFactory
+            .getLogger( AbstractWordConverter.class );
+
+    public abstract Document getDocument();
+
+    protected abstract void outputCharacters( Element block,
+            CharacterRun characterRun, String text );
+
+    protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
+            int currentTableLevel, Paragraph paragraph, final Element block,
+            List<CharacterRun> characterRuns, final int start, final int end )
+    {
+        boolean haveAnyText = false;
+
+        for ( int c = start; c < end; c++ )
+        {
+            CharacterRun characterRun = characterRuns.get( c );
+
+            if ( characterRun == null )
+                throw new AssertionError();
+
+            if ( hwpfDocument instanceof HWPFDocument
+                    && ( (HWPFDocument) hwpfDocument ).getPicturesTable()
+                            .hasPicture( characterRun ) )
+            {
+                HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
+                Picture picture = newFormat.getPicturesTable().extractPicture(
+                        characterRun, true );
+
+                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
+                        picture );
+                continue;
+            }
+
+            String text = characterRun.text();
+            if ( text.getBytes().length == 0 )
+                continue;
+
+            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
+            {
+                int skipTo = tryField( hwpfDocument, paragraph,
+                        currentTableLevel, characterRuns, c, block );
+
+                if ( skipTo != c )
+                {
+                    c = skipTo;
+                    continue;
+                }
+
+                continue;
+            }
+            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
+            {
+                // shall not appear without FIELD_BEGIN_MARK
+                continue;
+            }
+            if ( text.getBytes()[0] == FIELD_END_MARK )
+            {
+                // shall not appear without FIELD_BEGIN_MARK
+                continue;
+            }
+
+            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
+                    || characterRun.isOle2() )
+            {
+                continue;
+            }
+
+            if ( text.endsWith( "\r" )
+                    || ( text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0 ) )
+                text = text.substring( 0, text.length() - 1 );
+
+            outputCharacters( block, characterRun, text );
+
+            haveAnyText |= text.trim().length() != 0;
+        }
+
+        return haveAnyText;
+    }
+
+    public void processDocument( HWPFDocumentCore wordDocument )
+    {
+        final Range range = wordDocument.getRange();
+        for ( int s = 0; s < range.numSections(); s++ )
+        {
+            processSection( wordDocument, range.getSection( s ), s );
+        }
+    }
+
+    protected void processField( HWPFDocumentCore wordDocument,
+            Element currentBlock, Paragraph paragraph, int currentTableLevel,
+            List<CharacterRun> characterRuns, int beginMark, int separatorMark,
+            int endMark )
+    {
+
+        Pattern hyperlinkPattern = Pattern
+                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
+        Pattern pagerefPattern = Pattern
+                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
+
+        if ( separatorMark - beginMark > 1 )
+        {
+            int index = beginMark + 1;
+            CharacterRun firstAfterBegin = null;
+            while ( index < separatorMark )
+            {
+                firstAfterBegin = paragraph.getCharacterRun( index );
+                if ( firstAfterBegin == null )
+                {
+                    logger.log( POILogger.WARN,
+                            "Paragraph " + paragraph.getStartOffset() + "--"
+                                    + paragraph.getEndOffset()
+                                    + " contains null CharacterRun #" + index );
+                    index++;
+                    continue;
+                }
+                break;
+            }
+
+            if ( firstAfterBegin != null )
+            {
+                final Matcher hyperlinkMatcher = hyperlinkPattern
+                        .matcher( firstAfterBegin.text() );
+                if ( hyperlinkMatcher.matches() )
+                {
+                    String hyperlink = hyperlinkMatcher.group( 1 );
+                    processHyperlink( wordDocument, currentBlock, paragraph,
+                            characterRuns, currentTableLevel, hyperlink,
+                            separatorMark + 1, endMark );
+                    return;
+                }
+
+                final Matcher pagerefMatcher = pagerefPattern
+                        .matcher( firstAfterBegin.text() );
+                if ( pagerefMatcher.matches() )
+                {
+                    String pageref = pagerefMatcher.group( 1 );
+                    processPageref( wordDocument, currentBlock, paragraph,
+                            characterRuns, currentTableLevel, pageref,
+                            separatorMark + 1, endMark );
+                    return;
+                }
+            }
+        }
+
+        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
+        for ( int i = beginMark; i <= endMark; i++ )
+        {
+            debug.append( "\t" );
+            debug.append( paragraph.getCharacterRun( i ) );
+            debug.append( "\n" );
+        }
+        logger.log( POILogger.WARN, debug );
+
+        // just output field value
+        if ( separatorMark + 1 < endMark )
+            processCharacters( wordDocument, currentTableLevel, paragraph,
+                    currentBlock, characterRuns, separatorMark + 1, endMark );
+
+        return;
+    }
+
+    protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
+            Element currentBlock, Paragraph paragraph,
+            List<CharacterRun> characterRuns, int currentTableLevel,
+            String hyperlink, int i, int endMark );
+
+    protected abstract void processImage( Element currentBlock,
+            boolean inlined, Picture picture );
+
+    protected abstract void processPageref( HWPFDocumentCore wordDocument,
+            Element currentBlock, Paragraph paragraph,
+            List<CharacterRun> characterRuns, int currentTableLevel,
+            String pageref, int beginTextInclusive, int endTextExclusive );
+
+    protected abstract void processParagraph( HWPFDocumentCore wordDocument,
+            Element parentFopElement, int currentTableLevel,
+            Paragraph paragraph, String bulletText );
+
+    protected abstract void processSection( HWPFDocumentCore wordDocument,
+            Section section, int s );
+
+    protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
+            Element flow, Range range, int currentTableLevel )
+    {
+        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
+        for ( TableIterator tableIterator = AbstractWordUtils.newTableIterator(
+                range, currentTableLevel + 1 ); tableIterator.hasNext(); )
+        {
+            Table next = tableIterator.next();
+            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
+        }
+
+        final ListTables listTables = wordDocument.getListTables();
+        int currentListInfo = 0;
+
+        final int paragraphs = range.numParagraphs();
+        for ( int p = 0; p < paragraphs; p++ )
+        {
+            Paragraph paragraph = range.getParagraph( p );
+
+            if ( allTables.containsKey( Integer.valueOf( paragraph
+                    .getStartOffset() ) ) )
+            {
+                Table table = allTables.get( Integer.valueOf( paragraph
+                        .getStartOffset() ) );
+                processTable( wordDocument, flow, table, currentTableLevel + 1 );
+                continue;
+            }
+
+            if ( paragraph.isInTable()
+                    && paragraph.getTableLevel() != currentTableLevel )
+            {
+                continue;
+            }
+
+            if ( paragraph.getIlfo() != currentListInfo )
+            {
+                currentListInfo = paragraph.getIlfo();
+            }
+
+            if ( currentListInfo != 0 )
+            {
+                if ( listTables != null )
+                {
+                    final ListFormatOverride listFormatOverride = listTables
+                            .getOverride( paragraph.getIlfo() );
+
+                    String label = AbstractWordUtils.getBulletText( listTables,
+                            paragraph, listFormatOverride.getLsid() );
+
+                    processParagraph( wordDocument, flow, currentTableLevel,
+                            paragraph, label );
+                }
+                else
+                {
+                    logger.log( POILogger.WARN,
+                            "Paragraph #" + paragraph.getStartOffset() + "-"
+                                    + paragraph.getEndOffset()
+                                    + " has reference to list structure #"
+                                    + currentListInfo
+                                    + ", but listTables not defined in file" );
+
+                    processParagraph( wordDocument, flow, currentTableLevel,
+                            paragraph, AbstractWordUtils.EMPTY );
+                }
+            }
+            else
+            {
+                processParagraph( wordDocument, flow, currentTableLevel,
+                        paragraph, AbstractWordUtils.EMPTY );
+            }
+        }
+
+    }
+
+    protected void processSingleSection( HWPFDocumentCore wordDocument,
+            Section section )
+    {
+        processSection( wordDocument, section, 0 );
+    }
+
+    protected abstract void processTable( HWPFDocumentCore wordDocument,
+            Element flow, Table table, int newTableLevel );
+
+    protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
+            int currentTableLevel, List<CharacterRun> characterRuns,
+            int beginMark, Element currentBlock )
+    {
+        int separatorMark = -1;
+        int endMark = -1;
+        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
+        {
+            CharacterRun characterRun = paragraph.getCharacterRun( c );
+
+            String text = characterRun.text();
+            if ( text.getBytes().length == 0 )
+                continue;
+
+            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
+            {
+                if ( separatorMark != -1 )
+                {
+                    // double;
+                    return beginMark;
+                }
+
+                separatorMark = c;
+                continue;
+            }
+
+            if ( text.getBytes()[0] == FIELD_END_MARK )
+            {
+                if ( endMark != -1 )
+                {
+                    // double;
+                    return beginMark;
+                }
+
+                endMark = c;
+                break;
+            }
+
+        }
+
+        if ( separatorMark == -1 || endMark == -1 )
+            return beginMark;
+
+        processField( wordDocument, currentBlock, paragraph, currentTableLevel,
+                characterRuns, beginMark, separatorMark, endMark );
+
+        return endMark;
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java
new file mode 100644 (file)
index 0000000..9bbbf73
--- /dev/null
@@ -0,0 +1,404 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.HWPFOldDocument;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.hwpf.model.CHPX;
+import org.apache.poi.hwpf.model.ListLevel;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+public class AbstractWordUtils
+{
+    static final String EMPTY = "";
+
+    private static final POILogger logger = POILogFactory
+            .getLogger( AbstractWordUtils.class );
+
+    public static final float TWIPS_PER_INCH = 1440.0f;
+    public static final int TWIPS_PER_PT = 20;
+
+    static void closeQuietly( final Closeable closeable )
+    {
+        try
+        {
+            closeable.close();
+        }
+        catch ( Exception exc )
+        {
+            logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
+                    exc );
+        }
+    }
+
+    static boolean equals( String str1, String str2 )
+    {
+        return str1 == null ? str2 == null : str1.equals( str2 );
+    }
+
+    // XXX incorporate into Range
+    static List<CharacterRun> findCharacterRuns( Range range )
+    {
+        final int min = range.getStartOffset();
+        final int max = range.getEndOffset();
+
+        List<CharacterRun> result = new ArrayList<CharacterRun>();
+        List<CHPX> chpxs = getCharacters( range );
+        for ( int i = 0; i < chpxs.size(); i++ )
+        {
+            CHPX chpx = chpxs.get( i );
+            if ( chpx == null )
+                continue;
+
+            if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
+                    chpx.getEnd() ) )
+            {
+                final CharacterRun characterRun = getCharacterRun( range, chpx );
+
+                if ( characterRun == null )
+                    continue;
+
+                result.add( characterRun );
+            }
+        }
+
+        return result;
+    }
+
+    public static String getBorderType( BorderCode borderCode )
+    {
+        if ( borderCode == null )
+            throw new IllegalArgumentException( "borderCode is null" );
+
+        switch ( borderCode.getBorderType() )
+        {
+        case 1:
+        case 2:
+            return "solid";
+        case 3:
+            return "double";
+        case 5:
+            return "solid";
+        case 6:
+            return "dotted";
+        case 7:
+        case 8:
+            return "dashed";
+        case 9:
+            return "dotted";
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 15:
+        case 16:
+        case 17:
+        case 18:
+        case 19:
+            return "double";
+        case 20:
+            return "solid";
+        case 21:
+            return "double";
+        case 22:
+            return "dashed";
+        case 23:
+            return "dashed";
+        case 24:
+            return "ridge";
+        case 25:
+            return "grooved";
+        default:
+            return "solid";
+        }
+    }
+
+    public static String getBorderWidth( BorderCode borderCode )
+    {
+        int lineWidth = borderCode.getLineWidth();
+        int pt = lineWidth / 8;
+        int pte = lineWidth - pt * 8;
+
+        StringBuilder stringBuilder = new StringBuilder();
+        stringBuilder.append( pt );
+        stringBuilder.append( "." );
+        stringBuilder.append( 1000 / 8 * pte );
+        stringBuilder.append( "pt" );
+        return stringBuilder.toString();
+    }
+
+    public static String getBulletText( ListTables listTables,
+            Paragraph paragraph, int listId )
+    {
+        final ListLevel listLevel = listTables.getLevel( listId,
+                paragraph.getIlvl() );
+
+        if ( listLevel.getNumberText() == null )
+            return EMPTY;
+
+        StringBuffer bulletBuffer = new StringBuffer();
+        char[] xst = listLevel.getNumberText().toCharArray();
+        for ( char element : xst )
+        {
+            if ( element < 9 )
+            {
+                ListLevel numLevel = listTables.getLevel( listId, element );
+
+                int num = numLevel.getStartAt();
+                bulletBuffer.append( NumberFormatter.getNumber( num,
+                        listLevel.getNumberFormat() ) );
+
+                if ( numLevel == listLevel )
+                {
+                    numLevel.setStartAt( numLevel.getStartAt() + 1 );
+                }
+
+            }
+            else
+            {
+                bulletBuffer.append( element );
+            }
+        }
+
+        byte follow = getIxchFollow( listLevel );
+        switch ( follow )
+        {
+        case 0:
+            bulletBuffer.append( "\t" );
+            break;
+        case 1:
+            bulletBuffer.append( " " );
+            break;
+        default:
+            break;
+        }
+
+        return bulletBuffer.toString();
+    }
+
+    private static CharacterRun getCharacterRun( Range range, CHPX chpx )
+    {
+        try
+        {
+            Method method = Range.class.getDeclaredMethod( "getCharacterRun",
+                    CHPX.class );
+            method.setAccessible( true );
+            return (CharacterRun) method.invoke( range, chpx );
+        }
+        catch ( Exception exc )
+        {
+            throw new Error( exc );
+        }
+    }
+
+    private static List<CHPX> getCharacters( Range range )
+    {
+        try
+        {
+            Field field = Range.class.getDeclaredField( "_characters" );
+            field.setAccessible( true );
+            return (List<CHPX>) field.get( range );
+        }
+        catch ( Exception exc )
+        {
+            throw new Error( exc );
+        }
+    }
+
+    public static String getColor( int ico )
+    {
+        switch ( ico )
+        {
+        case 1:
+            return "black";
+        case 2:
+            return "blue";
+        case 3:
+            return "cyan";
+        case 4:
+            return "green";
+        case 5:
+            return "magenta";
+        case 6:
+            return "red";
+        case 7:
+            return "yellow";
+        case 8:
+            return "white";
+        case 9:
+            return "darkblue";
+        case 10:
+            return "darkcyan";
+        case 11:
+            return "darkgreen";
+        case 12:
+            return "darkmagenta";
+        case 13:
+            return "darkred";
+        case 14:
+            return "darkyellow";
+        case 15:
+            return "darkgray";
+        case 16:
+            return "lightgray";
+        default:
+            return "black";
+        }
+    }
+
+    public static byte getIxchFollow( ListLevel listLevel )
+    {
+        try
+        {
+            Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
+            field.setAccessible( true );
+            return ( (Byte) field.get( listLevel ) ).byteValue();
+        }
+        catch ( Exception exc )
+        {
+            throw new Error( exc );
+        }
+    }
+
+    public static String getJustification( int js )
+    {
+        switch ( js )
+        {
+        case 0:
+            return "start";
+        case 1:
+            return "center";
+        case 2:
+            return "end";
+        case 3:
+        case 4:
+            return "justify";
+        case 5:
+            return "center";
+        case 6:
+            return "left";
+        case 7:
+            return "start";
+        case 8:
+            return "end";
+        case 9:
+            return "justify";
+        }
+        return "";
+    }
+
+    public static String getListItemNumberLabel( int number, int format )
+    {
+
+        if ( format != 0 )
+            System.err.println( "NYI: toListItemNumberLabel(): " + format );
+
+        return String.valueOf( number );
+    }
+
+    public static SectionProperties getSectionProperties( Section section )
+    {
+        try
+        {
+            Field field = Section.class.getDeclaredField( "_props" );
+            field.setAccessible( true );
+            return (SectionProperties) field.get( section );
+        }
+        catch ( Exception exc )
+        {
+            throw new Error( exc );
+        }
+    }
+
+    static boolean isEmpty( String str )
+    {
+        return str == null || str.length() == 0;
+    }
+
+    static boolean isNotEmpty( String str )
+    {
+        return !isEmpty( str );
+    }
+
+    public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
+    {
+        final FileInputStream istream = new FileInputStream( docFile );
+        try
+        {
+            return loadDoc( istream );
+        }
+        finally
+        {
+            closeQuietly( istream );
+        }
+    }
+
+    public static HWPFDocumentCore loadDoc( InputStream inputStream )
+            throws IOException
+    {
+        final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
+                .verifyAndBuildPOIFS( inputStream );
+        try
+        {
+            return new HWPFDocument( poifsFileSystem );
+        }
+        catch ( OldWordFileFormatException exc )
+        {
+            return new HWPFOldDocument( poifsFileSystem );
+        }
+    }
+
+    public static TableIterator newTableIterator( Range range, int level )
+    {
+        try
+        {
+            Constructor<TableIterator> constructor = TableIterator.class
+                    .getDeclaredConstructor( Range.class, int.class );
+            constructor.setAccessible( true );
+            return constructor.newInstance( range, Integer.valueOf( level ) );
+        }
+        catch ( Exception exc )
+        {
+            throw new Error( exc );
+        }
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/FoDocumentFacade.java
new file mode 100644 (file)
index 0000000..8a944a9
--- /dev/null
@@ -0,0 +1,201 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+public class FoDocumentFacade
+{
+    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
+
+    protected final Document document;
+    protected final Element layoutMasterSet;
+    protected final Element root;
+
+    public FoDocumentFacade( Document document )
+    {
+        this.document = document;
+
+        root = document.createElementNS( NS_XSLFO, "fo:root" );
+        document.appendChild( root );
+
+        layoutMasterSet = document.createElementNS( NS_XSLFO,
+                "fo:layout-master-set" );
+        root.appendChild( layoutMasterSet );
+    }
+
+    public Element addFlowToPageSequence( final Element pageSequence,
+            String flowName )
+    {
+        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
+        flow.setAttribute( "flow-name", flowName );
+        pageSequence.appendChild( flow );
+
+        return flow;
+    }
+
+    public Element addListItem( Element listBlock )
+    {
+        Element result = createListItem();
+        listBlock.appendChild( result );
+        return result;
+    }
+
+    public Element addListItemBody( Element listItem )
+    {
+        Element result = createListItemBody();
+        listItem.appendChild( result );
+        return result;
+    }
+
+    public Element addListItemLabel( Element listItem, String text )
+    {
+        Element result = createListItemLabel( text );
+        listItem.appendChild( result );
+        return result;
+    }
+
+    public Element addPageSequence( String pageMaster )
+    {
+        final Element pageSequence = document.createElementNS( NS_XSLFO,
+                "fo:page-sequence" );
+        pageSequence.setAttribute( "master-reference", pageMaster );
+        root.appendChild( pageSequence );
+        return pageSequence;
+    }
+
+    public Element addRegionBody( Element pageMaster )
+    {
+        final Element regionBody = document.createElementNS( NS_XSLFO,
+                "fo:region-body" );
+        pageMaster.appendChild( regionBody );
+
+        return regionBody;
+    }
+
+    public Element addSimplePageMaster( String masterName )
+    {
+        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
+                "fo:simple-page-master" );
+        simplePageMaster.setAttribute( "master-name", masterName );
+        layoutMasterSet.appendChild( simplePageMaster );
+
+        return simplePageMaster;
+    }
+
+    protected Element createBasicLinkExternal( String externalDestination )
+    {
+        final Element basicLink = document.createElementNS( NS_XSLFO,
+                "fo:basic-link" );
+        basicLink.setAttribute( "external-destination", externalDestination );
+        return basicLink;
+    }
+
+    public Element createBasicLinkInternal( String internalDestination )
+    {
+        final Element basicLink = document.createElementNS( NS_XSLFO,
+                "fo:basic-link" );
+        basicLink.setAttribute( "internal-destination", internalDestination );
+        return basicLink;
+    }
+
+    public Element createBlock()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:block" );
+    }
+
+    public Element createExternalGraphic( String source )
+    {
+        Element result = document.createElementNS( NS_XSLFO,
+                "fo:external-graphic" );
+        result.setAttribute( "src", "url('" + source + "')" );
+        return result;
+    }
+
+    public Element createInline()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:inline" );
+    }
+
+    public Element createLeader()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:leader" );
+    }
+
+    public Element createListBlock()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:list-block" );
+    }
+
+    public Element createListItem()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:list-item" );
+    }
+
+    public Element createListItemBody()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
+    }
+
+    public Element createListItemLabel( String text )
+    {
+        Element result = document.createElementNS( NS_XSLFO,
+                "fo:list-item-label" );
+        Element block = createBlock();
+        block.appendChild( document.createTextNode( text ) );
+        result.appendChild( block );
+        return result;
+    }
+
+    protected Element createTable()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:table" );
+    }
+
+    protected Element createTableBody()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:table-body" );
+    }
+
+    protected Element createTableCell()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
+    }
+
+    protected Element createTableHeader()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:table-header" );
+    }
+
+    protected Element createTableRow()
+    {
+        return document.createElementNS( NS_XSLFO, "fo:table-row" );
+    }
+
+    protected Text createText( String data )
+    {
+        return document.createTextNode( data );
+    }
+
+    public Document getDocument()
+    {
+        return document;
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java
new file mode 100644 (file)
index 0000000..506ba6d
--- /dev/null
@@ -0,0 +1,107 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+public class HtmlDocumentFacade
+{
+
+    protected final Element body;
+    protected final Document document;
+    protected final Element head;
+    protected final Element html;
+
+    public HtmlDocumentFacade( Document document )
+    {
+        this.document = document;
+
+        html = document.createElement( "html" );
+        document.appendChild( html );
+
+        body = document.createElement( "body" );
+        head = document.createElement( "head" );
+
+        html.appendChild( head );
+        html.appendChild( body );
+    }
+
+    public Element createHyperlink( String internalDestination )
+    {
+        final Element basicLink = document.createElement( "a" );
+        basicLink.setAttribute( "href", internalDestination );
+        return basicLink;
+    }
+
+    public Element createListItem()
+    {
+        return document.createElement( "li" );
+    }
+
+    public Element createParagraph()
+    {
+        return document.createElement( "p" );
+    }
+
+    public Element createTable()
+    {
+        return document.createElement( "table" );
+    }
+
+    public Element createTableBody()
+    {
+        return document.createElement( "tbody" );
+    }
+
+    public Element createTableCell()
+    {
+        return document.createElement( "td" );
+    }
+
+    public Element createTableHeader()
+    {
+        return document.createElement( "thead" );
+    }
+
+    public Element createTableHeaderCell()
+    {
+        return document.createElement( "th" );
+    }
+
+    public Element createTableRow()
+    {
+        return document.createElement( "tr" );
+    }
+
+    public Text createText( String data )
+    {
+        return document.createTextNode( data );
+    }
+
+    public Element createUnorderedList()
+    {
+        return document.createElement( "ul" );
+    }
+
+    public Document getDocument()
+    {
+        return document;
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/NumberFormatter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/NumberFormatter.java
new file mode 100644 (file)
index 0000000..9897cfb
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.hwpf.converter;
+
+/**
+ * Comment me
+ * 
+ * @author Ryan Ackley
+ */
+public final class NumberFormatter
+{
+
+    private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e",
+            "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
+            "s", "t", "u", "v", "x", "y", "z" };
+
+    private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv",
+            "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv",
+            "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii",
+            "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", "xxxi",
+            "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii",
+            "xxxviii", "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv",
+            "xlvi", "xlvii", "xlviii", "xlix", "l" };
+
+    private final static int T_ARABIC = 0;
+    private final static int T_LOWER_LETTER = 4;
+    private final static int T_LOWER_ROMAN = 2;
+    private final static int T_ORDINAL = 5;
+    private final static int T_UPPER_LETTER = 3;
+    private final static int T_UPPER_ROMAN = 1;
+
+    public static String getNumber( int num, int style )
+    {
+        switch ( style )
+        {
+        case T_UPPER_ROMAN:
+            return C_ROMAN[num - 1].toUpperCase();
+        case T_LOWER_ROMAN:
+            return C_ROMAN[num - 1];
+        case T_UPPER_LETTER:
+            return C_LETTERS[num - 1].toUpperCase();
+        case T_LOWER_LETTER:
+            return C_LETTERS[num - 1];
+        case T_ARABIC:
+        case T_ORDINAL:
+        default:
+            return String.valueOf( num );
+        }
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
new file mode 100644 (file)
index 0000000..5744a7b
--- /dev/null
@@ -0,0 +1,626 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.model.ListFormatOverride;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+/**
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class WordToFoConverter extends AbstractWordConverter
+{
+
+    /**
+     * Holds properties values, applied to current <tt>fo:block</tt> element.
+     * Those properties shall not be doubled in children <tt>fo:inline</tt>
+     * elements.
+     */
+    private static class BlockProperies
+    {
+        final boolean pBold;
+        final String pFontName;
+        final int pFontSize;
+        final boolean pItalic;
+
+        public BlockProperies( String pFontName, int pFontSize, boolean pBold,
+                boolean pItalic )
+        {
+            this.pFontName = pFontName;
+            this.pFontSize = pFontSize;
+            this.pBold = pBold;
+            this.pItalic = pItalic;
+        }
+    }
+
+    private static final POILogger logger = POILogFactory
+            .getLogger( WordToFoConverter.class );
+
+    public static String getBorderType( BorderCode borderCode )
+    {
+        if ( borderCode == null )
+            throw new IllegalArgumentException( "borderCode is null" );
+
+        switch ( borderCode.getBorderType() )
+        {
+        case 1:
+        case 2:
+            return "solid";
+        case 3:
+            return "double";
+        case 5:
+            return "solid";
+        case 6:
+            return "dotted";
+        case 7:
+        case 8:
+            return "dashed";
+        case 9:
+            return "dotted";
+        case 10:
+        case 11:
+        case 12:
+        case 13:
+        case 14:
+        case 15:
+        case 16:
+        case 17:
+        case 18:
+        case 19:
+            return "double";
+        case 20:
+            return "solid";
+        case 21:
+            return "double";
+        case 22:
+            return "dashed";
+        case 23:
+            return "dashed";
+        case 24:
+            return "ridge";
+        case 25:
+            return "grooved";
+        default:
+            return "solid";
+        }
+    }
+
+    /**
+     * Java main() interface to interact with {@link WordToFoConverter}
+     * 
+     * <p>
+     * Usage: WordToFoConverter infile outfile
+     * </p>
+     * Where infile is an input .doc file ( Word 97-2007) which will be rendered
+     * as XSL-FO into outfile
+     */
+    public static void main( String[] args )
+    {
+        if ( args.length < 2 )
+        {
+            System.err
+                    .println( "Usage: WordToFoConverter <inputFile.doc> <saveTo.fo>" );
+            return;
+        }
+
+        System.out.println( "Converting " + args[0] );
+        System.out.println( "Saving output to " + args[1] );
+        try
+        {
+            Document doc = WordToFoConverter.process( new File( args[0] ) );
+
+            FileWriter out = new FileWriter( args[1] );
+            DOMSource domSource = new DOMSource( doc );
+            StreamResult streamResult = new StreamResult( out );
+            TransformerFactory tf = TransformerFactory.newInstance();
+            Transformer serializer = tf.newTransformer();
+            // TODO set encoding from a command argument
+            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
+            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
+            serializer.transform( domSource, streamResult );
+            out.close();
+        }
+        catch ( Exception e )
+        {
+            e.printStackTrace();
+        }
+    }
+
+    static Document process( File docFile ) throws Exception
+    {
+        final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
+        WordToFoConverter wordToFoConverter = new WordToFoConverter(
+                DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                        .newDocument() );
+        wordToFoConverter.processDocument( hwpfDocument );
+        return wordToFoConverter.getDocument();
+    }
+
+    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
+
+    protected final FoDocumentFacade foDocumentFacade;
+
+    /**
+     * Creates new instance of {@link WordToFoConverter}. Can be used for output
+     * several {@link HWPFDocument}s into single FO document.
+     * 
+     * @param document
+     *            XML DOM Document used as XSL FO document. Shall support
+     *            namespaces
+     */
+    public WordToFoConverter( Document document )
+    {
+        this.foDocumentFacade = new FoDocumentFacade( document );
+    }
+
+    protected String createPageMaster( SectionProperties sep, String type,
+            int section )
+    {
+        float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
+        float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
+        float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
+        float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
+        float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
+        float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
+
+        // add these to the header
+        String pageMasterName = type + "-page" + section;
+
+        Element pageMaster = foDocumentFacade
+                .addSimplePageMaster( pageMasterName );
+        pageMaster.setAttribute( "page-height", height + "in" );
+        pageMaster.setAttribute( "page-width", width + "in" );
+
+        Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
+        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
+                + "in " + bottomMargin + "in " + leftMargin + "in" );
+
+        /*
+         * 6.4.14 fo:region-body
+         * 
+         * The values of the padding and border-width traits must be "0".
+         */
+        // WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
+        // WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
+        // WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
+        // WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
+
+        if ( sep.getCcolM1() > 0 )
+        {
+            regionBody.setAttribute( "column-count", ""
+                    + ( sep.getCcolM1() + 1 ) );
+            if ( sep.getFEvenlySpaced() )
+            {
+                regionBody.setAttribute( "column-gap",
+                        ( sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH )
+                                + "in" );
+            }
+            else
+            {
+                regionBody.setAttribute( "column-gap", "0.25in" );
+            }
+        }
+
+        return pageMasterName;
+    }
+
+    public Document getDocument()
+    {
+        return foDocumentFacade.getDocument();
+    }
+
+    @Override
+    protected void outputCharacters( Element block, CharacterRun characterRun,
+            String text )
+    {
+        BlockProperies blockProperies = this.blocksProperies.peek();
+        Element inline = foDocumentFacade.createInline();
+        if ( characterRun.isBold() != blockProperies.pBold )
+        {
+            WordToFoUtils.setBold( inline, characterRun.isBold() );
+        }
+        if ( characterRun.isItalic() != blockProperies.pItalic )
+        {
+            WordToFoUtils.setItalic( inline, characterRun.isItalic() );
+        }
+        if ( characterRun.getFontName() != null
+                && !AbstractWordUtils.equals( characterRun.getFontName(),
+                        blockProperies.pFontName ) )
+        {
+            WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
+        }
+        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
+        {
+            WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
+        }
+        WordToFoUtils.setCharactersProperties( characterRun, inline );
+        block.appendChild( inline );
+
+        Text textNode = foDocumentFacade.createText( text );
+        inline.appendChild( textNode );
+    }
+
+    protected void processHyperlink( HWPFDocumentCore hwpfDocument,
+            Element currentBlock, Paragraph paragraph,
+            List<CharacterRun> characterRuns, int currentTableLevel,
+            String hyperlink, int beginTextInclusive, int endTextExclusive )
+    {
+        Element basicLink = foDocumentFacade
+                .createBasicLinkExternal( hyperlink );
+        currentBlock.appendChild( basicLink );
+
+        if ( beginTextInclusive < endTextExclusive )
+            processCharacters( hwpfDocument, currentTableLevel, paragraph,
+                    basicLink, characterRuns, beginTextInclusive,
+                    endTextExclusive );
+    }
+
+    /**
+     * This method shall store image bytes in external file and convert it if
+     * necessary. Images shall be stored using PNG format (for bitmap) or SVG
+     * (for vector). Other formats may be not supported by your XSL FO
+     * processor.
+     * <p>
+     * Please note the
+     * {@link WordToFoUtils#setPictureProperties(Picture, Element)} method.
+     * 
+     * @param currentBlock
+     *            currently processed FO element, like <tt>fo:block</tt>. Shall
+     *            be used as parent of newly created
+     *            <tt>fo:external-graphic</tt> or
+     *            <tt>fo:instream-foreign-object</tt>
+     * @param inlined
+     *            if image is inlined
+     * @param picture
+     *            HWPF object, contained picture data and properties
+     */
+    protected void processImage( Element currentBlock, boolean inlined,
+            Picture picture )
+    {
+        // no default implementation -- skip
+        currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
+                "Image link to '" + picture.suggestFullFileName()
+                        + "' can be here" ) );
+    }
+
+    protected void processPageref( HWPFDocumentCore hwpfDocument,
+            Element currentBlock, Paragraph paragraph,
+            List<CharacterRun> characterRuns, int currentTableLevel,
+            String pageref, int beginTextInclusive, int endTextExclusive )
+    {
+        Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
+        currentBlock.appendChild( basicLink );
+
+        if ( beginTextInclusive < endTextExclusive )
+            processCharacters( hwpfDocument, currentTableLevel, paragraph,
+                    basicLink, characterRuns, beginTextInclusive,
+                    endTextExclusive );
+    }
+
+    protected void processParagraph( HWPFDocumentCore hwpfDocument,
+            Element parentFopElement, int currentTableLevel,
+            Paragraph paragraph, String bulletText )
+    {
+        final Element block = foDocumentFacade.createBlock();
+        parentFopElement.appendChild( block );
+
+        WordToFoUtils.setParagraphProperties( paragraph, block );
+
+        final int charRuns = paragraph.numCharacterRuns();
+
+        if ( charRuns == 0 )
+        {
+            return;
+        }
+
+        {
+            final String pFontName;
+            final int pFontSize;
+            final boolean pBold;
+            final boolean pItalic;
+            {
+                CharacterRun characterRun = paragraph.getCharacterRun( 0 );
+                pFontSize = characterRun.getFontSize() / 2;
+                pFontName = characterRun.getFontName();
+                pBold = characterRun.isBold();
+                pItalic = characterRun.isItalic();
+            }
+            WordToFoUtils.setFontFamily( block, pFontName );
+            WordToFoUtils.setFontSize( block, pFontSize );
+            WordToFoUtils.setBold( block, pBold );
+            WordToFoUtils.setItalic( block, pItalic );
+
+            blocksProperies.push( new BlockProperies( pFontName, pFontSize,
+                    pBold, pItalic ) );
+        }
+        try
+        {
+            boolean haveAnyText = false;
+
+            if ( WordToFoUtils.isNotEmpty( bulletText ) )
+            {
+                Element inline = foDocumentFacade.createInline();
+                block.appendChild( inline );
+
+                Text textNode = foDocumentFacade.createText( bulletText );
+                inline.appendChild( textNode );
+
+                haveAnyText |= bulletText.trim().length() != 0;
+            }
+
+            List<CharacterRun> characterRuns = WordToFoUtils
+                    .findCharacterRuns( paragraph );
+            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
+                    paragraph, block, characterRuns, 0, characterRuns.size() );
+
+            if ( !haveAnyText )
+            {
+                Element leader = foDocumentFacade.createLeader();
+                block.appendChild( leader );
+            }
+        }
+        finally
+        {
+            blocksProperies.pop();
+        }
+
+        return;
+    }
+
+    protected void processSection( HWPFDocumentCore wordDocument,
+            Section section, int sectionCounter )
+    {
+        String regularPage = createPageMaster(
+                WordToFoUtils.getSectionProperties( section ), "page",
+                sectionCounter );
+
+        Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
+        Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
+                "xsl-region-body" );
+
+        processSectionParagraphes( wordDocument, flow, section, 0 );
+    }
+
+    protected void processSectionParagraphes( HWPFDocument wordDocument,
+            Element flow, Range range, int currentTableLevel )
+    {
+        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
+        for ( TableIterator tableIterator = WordToFoUtils.newTableIterator(
+                range, currentTableLevel + 1 ); tableIterator.hasNext(); )
+        {
+            Table next = tableIterator.next();
+            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
+        }
+
+        final ListTables listTables = wordDocument.getListTables();
+        int currentListInfo = 0;
+
+        final int paragraphs = range.numParagraphs();
+        for ( int p = 0; p < paragraphs; p++ )
+        {
+            Paragraph paragraph = range.getParagraph( p );
+
+            if ( allTables.containsKey( Integer.valueOf( paragraph
+                    .getStartOffset() ) ) )
+            {
+                Table table = allTables.get( Integer.valueOf( paragraph
+                        .getStartOffset() ) );
+                processTable( wordDocument, flow, table, currentTableLevel + 1 );
+                continue;
+            }
+
+            if ( paragraph.isInTable()
+                    && paragraph.getTableLevel() != currentTableLevel )
+            {
+                continue;
+            }
+
+            if ( paragraph.getIlfo() != currentListInfo )
+            {
+                currentListInfo = paragraph.getIlfo();
+            }
+
+            if ( currentListInfo != 0 )
+            {
+                if ( listTables != null )
+                {
+                    final ListFormatOverride listFormatOverride = listTables
+                            .getOverride( paragraph.getIlfo() );
+
+                    String label = WordToFoUtils.getBulletText( listTables,
+                            paragraph, listFormatOverride.getLsid() );
+
+                    processParagraph( wordDocument, flow, currentTableLevel,
+                            paragraph, label );
+                }
+                else
+                {
+                    logger.log( POILogger.WARN,
+                            "Paragraph #" + paragraph.getStartOffset() + "-"
+                                    + paragraph.getEndOffset()
+                                    + " has reference to list structure #"
+                                    + currentListInfo
+                                    + ", but listTables not defined in file" );
+
+                    processParagraph( wordDocument, flow, currentTableLevel,
+                            paragraph, WordToFoUtils.EMPTY );
+                }
+            }
+            else
+            {
+                processParagraph( wordDocument, flow, currentTableLevel,
+                        paragraph, WordToFoUtils.EMPTY );
+            }
+        }
+
+    }
+
+    protected void processTable( HWPFDocumentCore wordDocument, Element flow,
+            Table table, int thisTableLevel )
+    {
+        Element tableHeader = foDocumentFacade.createTableHeader();
+        Element tableBody = foDocumentFacade.createTableBody();
+
+        final int tableRows = table.numRows();
+
+        int maxColumns = Integer.MIN_VALUE;
+        for ( int r = 0; r < tableRows; r++ )
+        {
+            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
+        }
+
+        for ( int r = 0; r < tableRows; r++ )
+        {
+            TableRow tableRow = table.getRow( r );
+
+            Element tableRowElement = foDocumentFacade.createTableRow();
+            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
+
+            final int rowCells = tableRow.numCells();
+            for ( int c = 0; c < rowCells; c++ )
+            {
+                TableCell tableCell = tableRow.getCell( c );
+
+                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
+                    continue;
+
+                if ( tableCell.isVerticallyMerged()
+                        && !tableCell.isFirstVerticallyMerged() )
+                    continue;
+
+                Element tableCellElement = foDocumentFacade.createTableCell();
+                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
+                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
+                        c == rowCells - 1 );
+
+                if ( tableCell.isFirstMerged() )
+                {
+                    int count = 0;
+                    for ( int c1 = c; c1 < rowCells; c1++ )
+                    {
+                        TableCell nextCell = tableRow.getCell( c1 );
+                        if ( nextCell.isMerged() )
+                            count++;
+                        if ( !nextCell.isMerged() )
+                            break;
+                    }
+                    tableCellElement.setAttribute( "number-columns-spanned", ""
+                            + count );
+                }
+                else
+                {
+                    if ( c == rowCells - 1 && c != maxColumns - 1 )
+                    {
+                        tableCellElement.setAttribute(
+                                "number-columns-spanned", ""
+                                        + ( maxColumns - c ) );
+                    }
+                }
+
+                if ( tableCell.isFirstVerticallyMerged() )
+                {
+                    int count = 0;
+                    for ( int r1 = r; r1 < tableRows; r1++ )
+                    {
+                        TableRow nextRow = table.getRow( r1 );
+                        if ( nextRow.numCells() < c )
+                            break;
+                        TableCell nextCell = nextRow.getCell( c );
+                        if ( nextCell.isVerticallyMerged() )
+                            count++;
+                        if ( !nextCell.isVerticallyMerged() )
+                            break;
+                    }
+                    tableCellElement.setAttribute( "number-rows-spanned", ""
+                            + count );
+                }
+
+                processSectionParagraphes( wordDocument, tableCellElement,
+                        tableCell, thisTableLevel );
+
+                if ( !tableCellElement.hasChildNodes() )
+                {
+                    tableCellElement.appendChild( foDocumentFacade
+                            .createBlock() );
+                }
+
+                tableRowElement.appendChild( tableCellElement );
+            }
+
+            if ( tableRow.isTableHeader() )
+            {
+                tableHeader.appendChild( tableRowElement );
+            }
+            else
+            {
+                tableBody.appendChild( tableRowElement );
+            }
+        }
+
+        final Element tableElement = foDocumentFacade.createTable();
+        if ( tableHeader.hasChildNodes() )
+        {
+            tableElement.appendChild( tableHeader );
+        }
+        if ( tableBody.hasChildNodes() )
+        {
+            tableElement.appendChild( tableBody );
+            flow.appendChild( tableElement );
+        }
+        else
+        {
+            logger.log(
+                    POILogger.WARN,
+                    "Table without body starting on offset "
+                            + table.getStartOffset() + " -- "
+                            + table.getEndOffset() );
+        }
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java
new file mode 100644 (file)
index 0000000..6595370
--- /dev/null
@@ -0,0 +1,323 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterProperties;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.w3c.dom.Element;
+
+public class WordToFoUtils extends AbstractWordUtils
+{
+    public static void setBold( final Element element, final boolean bold )
+    {
+        element.setAttribute( "font-weight", bold ? "bold" : "normal" );
+    }
+
+    public static void setBorder( Element element, BorderCode borderCode,
+            String where )
+    {
+        if ( element == null )
+            throw new IllegalArgumentException( "element is null" );
+
+        if ( borderCode == null || borderCode.getBorderType() == 0 )
+            return;
+
+        if ( isEmpty( where ) )
+        {
+            element.setAttribute( "border-style", getBorderType( borderCode ) );
+            element.setAttribute( "border-color",
+                    getColor( borderCode.getColor() ) );
+            element.setAttribute( "border-width", getBorderWidth( borderCode ) );
+        }
+        else
+        {
+            element.setAttribute( "border-" + where + "-style",
+                    getBorderType( borderCode ) );
+            element.setAttribute( "border-" + where + "-color",
+                    getColor( borderCode.getColor() ) );
+            element.setAttribute( "border-" + where + "-width",
+                    getBorderWidth( borderCode ) );
+        }
+    }
+
+    public static void setCharactersProperties(
+            final CharacterRun characterRun, final Element inline )
+    {
+        final CharacterProperties clonedProperties = characterRun
+                .cloneProperties();
+        StringBuilder textDecorations = new StringBuilder();
+
+        setBorder( inline, clonedProperties.getBrc(), EMPTY );
+
+        if ( characterRun.isCapitalized() )
+        {
+            inline.setAttribute( "text-transform", "uppercase" );
+        }
+        if ( characterRun.isHighlighted() )
+        {
+            inline.setAttribute( "background-color",
+                    getColor( clonedProperties.getIcoHighlight() ) );
+        }
+        if ( characterRun.isStrikeThrough() )
+        {
+            if ( textDecorations.length() > 0 )
+                textDecorations.append( " " );
+            textDecorations.append( "line-through" );
+        }
+        if ( characterRun.isShadowed() )
+        {
+            inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
+                    + "pt" );
+        }
+        if ( characterRun.isSmallCaps() )
+        {
+            inline.setAttribute( "font-variant", "small-caps" );
+        }
+        if ( characterRun.getSubSuperScriptIndex() == 1 )
+        {
+            inline.setAttribute( "baseline-shift", "super" );
+            inline.setAttribute( "font-size", "smaller" );
+        }
+        if ( characterRun.getSubSuperScriptIndex() == 2 )
+        {
+            inline.setAttribute( "baseline-shift", "sub" );
+            inline.setAttribute( "font-size", "smaller" );
+        }
+        if ( characterRun.getUnderlineCode() > 0 )
+        {
+            if ( textDecorations.length() > 0 )
+                textDecorations.append( " " );
+            textDecorations.append( "underline" );
+        }
+        if ( characterRun.isVanished() )
+        {
+            inline.setAttribute( "visibility", "hidden" );
+        }
+        if ( textDecorations.length() > 0 )
+        {
+            inline.setAttribute( "text-decoration", textDecorations.toString() );
+        }
+    }
+
+    public static void setFontFamily( final Element element,
+            final String fontFamily )
+    {
+        if ( isEmpty( fontFamily ) )
+            return;
+
+        element.setAttribute( "font-family", fontFamily );
+    }
+
+    public static void setFontSize( final Element element, final int fontSize )
+    {
+        element.setAttribute( "font-size", String.valueOf( fontSize ) );
+    }
+
+    public static void setIndent( Paragraph paragraph, Element block )
+    {
+        if ( paragraph.getFirstLineIndent() != 0 )
+        {
+            block.setAttribute(
+                    "text-indent",
+                    String.valueOf( paragraph.getFirstLineIndent()
+                            / TWIPS_PER_PT )
+                            + "pt" );
+        }
+        if ( paragraph.getIndentFromLeft() != 0 )
+        {
+            block.setAttribute(
+                    "start-indent",
+                    String.valueOf( paragraph.getIndentFromLeft()
+                            / TWIPS_PER_PT )
+                            + "pt" );
+        }
+        if ( paragraph.getIndentFromRight() != 0 )
+        {
+            block.setAttribute(
+                    "end-indent",
+                    String.valueOf( paragraph.getIndentFromRight()
+                            / TWIPS_PER_PT )
+                            + "pt" );
+        }
+        if ( paragraph.getSpacingBefore() != 0 )
+        {
+            block.setAttribute(
+                    "space-before",
+                    String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
+                            + "pt" );
+        }
+        if ( paragraph.getSpacingAfter() != 0 )
+        {
+            block.setAttribute( "space-after",
+                    String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
+                            + "pt" );
+        }
+    }
+
+    public static void setItalic( final Element element, final boolean italic )
+    {
+        element.setAttribute( "font-style", italic ? "italic" : "normal" );
+    }
+
+    public static void setJustification( Paragraph paragraph,
+            final Element element )
+    {
+        String justification = getJustification( paragraph.getJustification() );
+        if ( isNotEmpty( justification ) )
+            element.setAttribute( "text-align", justification );
+    }
+
+    public static void setParagraphProperties( Paragraph paragraph,
+            Element block )
+    {
+        setIndent( paragraph, block );
+        setJustification( paragraph, block );
+
+        setBorder( block, paragraph.getBottomBorder(), "bottom" );
+        setBorder( block, paragraph.getLeftBorder(), "left" );
+        setBorder( block, paragraph.getRightBorder(), "right" );
+        setBorder( block, paragraph.getTopBorder(), "top" );
+
+        if ( paragraph.pageBreakBefore() )
+        {
+            block.setAttribute( "break-before", "page" );
+        }
+
+        block.setAttribute( "hyphenate",
+                String.valueOf( paragraph.isAutoHyphenated() ) );
+
+        if ( paragraph.keepOnPage() )
+        {
+            block.setAttribute( "keep-together.within-page", "always" );
+        }
+
+        if ( paragraph.keepWithNext() )
+        {
+            block.setAttribute( "keep-with-next.within-page", "always" );
+        }
+
+        block.setAttribute( "linefeed-treatment", "preserve" );
+        block.setAttribute( "white-space-collapse", "false" );
+    }
+
+    public static void setPictureProperties( Picture picture,
+            Element graphicElement )
+    {
+        final int aspectRatioX = picture.getAspectRatioX();
+        final int aspectRatioY = picture.getAspectRatioY();
+
+        if ( aspectRatioX > 0 )
+        {
+            graphicElement
+                    .setAttribute( "content-width", ( ( picture.getDxaGoal()
+                            * aspectRatioX / 100 ) / TWIPS_PER_PT )
+                            + "pt" );
+        }
+        else
+            graphicElement.setAttribute( "content-width",
+                    ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
+
+        if ( aspectRatioY > 0 )
+            graphicElement
+                    .setAttribute( "content-height", ( ( picture.getDyaGoal()
+                            * aspectRatioY / 100 ) / TWIPS_PER_PT )
+                            + "pt" );
+        else
+            graphicElement.setAttribute( "content-height",
+                    ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
+
+        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
+        {
+            graphicElement.setAttribute( "scaling", "uniform" );
+        }
+        else
+        {
+            graphicElement.setAttribute( "scaling", "non-uniform" );
+        }
+
+        graphicElement.setAttribute( "vertical-align", "text-bottom" );
+
+        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
+                || picture.getDyaCropBottom() != 0
+                || picture.getDxaCropLeft() != 0 )
+        {
+            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
+            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
+            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
+            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
+            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
+                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
+                    + "pt)" );
+            graphicElement.setAttribute( "oveerflow", "hidden" );
+        }
+    }
+
+    public static void setTableCellProperties( TableRow tableRow,
+            TableCell tableCell, Element element, boolean toppest,
+            boolean bottomest, boolean leftest, boolean rightest )
+    {
+        element.setAttribute( "width", ( tableCell.getWidth() / TWIPS_PER_INCH )
+                + "in" );
+        element.setAttribute( "padding-start",
+                ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
+        element.setAttribute( "padding-end",
+                ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
+
+        BorderCode top = tableCell.getBrcTop() != null
+                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
+                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
+                .getHorizontalBorder();
+        BorderCode bottom = tableCell.getBrcBottom() != null
+                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
+                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
+                : tableRow.getHorizontalBorder();
+
+        BorderCode left = tableCell.getBrcLeft() != null
+                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
+                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
+                .getVerticalBorder();
+        BorderCode right = tableCell.getBrcRight() != null
+                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
+                .getBrcRight() : rightest ? tableRow.getRightBorder()
+                : tableRow.getVerticalBorder();
+
+        setBorder( element, bottom, "bottom" );
+        setBorder( element, left, "left" );
+        setBorder( element, right, "right" );
+        setBorder( element, top, "top" );
+    }
+
+    public static void setTableRowProperties( TableRow tableRow,
+            Element tableRowElement )
+    {
+        if ( tableRow.getRowHeight() > 0 )
+        {
+            tableRowElement.setAttribute( "height",
+                    ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in" );
+        }
+        if ( !tableRow.cantSplit() )
+        {
+            tableRowElement.setAttribute( "keep-together", "always" );
+        }
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
new file mode 100644 (file)
index 0000000..c931acb
--- /dev/null
@@ -0,0 +1,481 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.List;
+import java.util.Stack;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;
+
+/**
+ * Converts Word files (95-2007) into HTML files.
+ * <p>
+ * This implementation doesn't create images or links to them. This can be
+ * changed by overriding {@link #processImage(Element, boolean, Picture)}
+ * method.
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class WordToHtmlConverter extends AbstractWordConverter
+{
+
+    /**
+     * Holds properties values, applied to current <tt>p</tt> element. Those
+     * properties shall not be doubled in children <tt>span</tt> elements.
+     */
+    private static class BlockProperies
+    {
+        final String pFontName;
+        final int pFontSize;
+
+        public BlockProperies( String pFontName, int pFontSize )
+        {
+            this.pFontName = pFontName;
+            this.pFontSize = pFontSize;
+        }
+    }
+
+    private static final POILogger logger = POILogFactory
+            .getLogger( WordToHtmlConverter.class );
+
+    private static String getSectionStyle( Section section )
+    {
+        SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
+
+        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
+        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
+        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
+        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
+
+        String style = "margin: " + topMargin + "in " + rightMargin + "in "
+                + bottomMargin + "in " + leftMargin + "in; ";
+
+        if ( sep.getCcolM1() > 0 )
+        {
+            style += "column-count: " + ( sep.getCcolM1() + 1 ) + "; ";
+            if ( sep.getFEvenlySpaced() )
+            {
+                style += "column-gap: "
+                        + ( sep.getDxaColumns() / TWIPS_PER_INCH ) + "in; ";
+            }
+            else
+            {
+                style += "column-gap: 0.25in; ";
+            }
+        }
+        return style;
+    }
+
+    /**
+     * Java main() interface to interact with {@link WordToHtmlConverter}
+     * 
+     * <p>
+     * Usage: WordToHtmlConverter infile outfile
+     * </p>
+     * Where infile is an input .doc file ( Word 95-2007) which will be rendered
+     * as HTML into outfile
+     */
+    public static void main( String[] args )
+    {
+        if ( args.length < 2 )
+        {
+            System.err
+                    .println( "Usage: WordToHtmlConverter <inputFile.doc> <saveTo.html>" );
+            return;
+        }
+
+        System.out.println( "Converting " + args[0] );
+        System.out.println( "Saving output to " + args[1] );
+        try
+        {
+            Document doc = WordToHtmlConverter.process( new File( args[0] ) );
+
+            FileWriter out = new FileWriter( args[1] );
+            DOMSource domSource = new DOMSource( doc );
+            StreamResult streamResult = new StreamResult( out );
+
+            TransformerFactory tf = TransformerFactory.newInstance();
+            Transformer serializer = tf.newTransformer();
+            // TODO set encoding from a command argument
+            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
+            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
+            serializer.setOutputProperty( OutputKeys.METHOD, "html" );
+            serializer.transform( domSource, streamResult );
+            out.close();
+        }
+        catch ( Exception e )
+        {
+            e.printStackTrace();
+        }
+    }
+
+    static Document process( File docFile ) throws Exception
+    {
+        final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
+        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+                DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                        .newDocument() );
+        wordToHtmlConverter.processDocument( wordDocument );
+        return wordToHtmlConverter.getDocument();
+    }
+
+    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
+
+    private final HtmlDocumentFacade htmlDocumentFacade;
+
+    /**
+     * Creates new instance of {@link WordToHtmlConverter}. Can be used for
+     * output several {@link HWPFDocument}s into single HTML document.
+     * 
+     * @param document
+     *            XML DOM Document used as HTML document
+     */
+    public WordToHtmlConverter( Document document )
+    {
+        this.htmlDocumentFacade = new HtmlDocumentFacade( document );
+    }
+
+    public Document getDocument()
+    {
+        return htmlDocumentFacade.getDocument();
+    }
+
+    @Override
+    protected void outputCharacters( Element pElement,
+            CharacterRun characterRun, String text )
+    {
+        Element span = htmlDocumentFacade.document.createElement( "span" );
+        pElement.appendChild( span );
+
+        StringBuilder style = new StringBuilder();
+        BlockProperies blockProperies = this.blocksProperies.peek();
+        if ( characterRun.getFontName() != null
+                && !WordToHtmlUtils.equals( characterRun.getFontName(),
+                        blockProperies.pFontName ) )
+        {
+            style.append( "font-family: " + characterRun.getFontName() + "; " );
+        }
+        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
+        {
+            style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
+        }
+
+        WordToHtmlUtils.addCharactersProperties( characterRun, style );
+        if ( style.length() != 0 )
+            span.setAttribute( "style", style.toString() );
+
+        Text textNode = htmlDocumentFacade.createText( text );
+        span.appendChild( textNode );
+    }
+
+    protected void processHyperlink( HWPFDocumentCore wordDocument,
+            Element currentBlock, Paragraph paragraph,
+            List<CharacterRun> characterRuns, int currentTableLevel,
+            String hyperlink, int beginTextInclusive, int endTextExclusive )
+    {
+        Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
+        currentBlock.appendChild( basicLink );
+
+        if ( beginTextInclusive < endTextExclusive )
+            processCharacters( wordDocument, currentTableLevel, paragraph,
+                    basicLink, characterRuns, beginTextInclusive,
+                    endTextExclusive );
+    }
+
+    /**
+     * This method shall store image bytes in external file and convert it if
+     * necessary. Images shall be stored using PNG format. Other formats may be
+     * not supported by user browser.
+     * <p>
+     * Please note the
+     * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
+     * 
+     * @param currentBlock
+     *            currently processed HTML element, like <tt>p</tt>. Shall be
+     *            used as parent of newly created <tt>img</tt>
+     * @param inlined
+     *            if image is inlined
+     * @param picture
+     *            HWPF object, contained picture data and properties
+     */
+    protected void processImage( Element currentBlock, boolean inlined,
+            Picture picture )
+    {
+        // no default implementation -- skip
+        currentBlock.appendChild( htmlDocumentFacade.document
+                .createComment( "Image link to '"
+                        + picture.suggestFullFileName() + "' can be here" ) );
+    }
+
+    protected void processPageref( HWPFDocumentCore hwpfDocument,
+            Element currentBlock, Paragraph paragraph,
+            List<CharacterRun> characterRuns, int currentTableLevel,
+            String pageref, int beginTextInclusive, int endTextExclusive )
+    {
+        Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
+        currentBlock.appendChild( basicLink );
+
+        if ( beginTextInclusive < endTextExclusive )
+            processCharacters( hwpfDocument, currentTableLevel, paragraph,
+                    basicLink, characterRuns, beginTextInclusive,
+                    endTextExclusive );
+    }
+
+    protected void processParagraph( HWPFDocumentCore hwpfDocument,
+            Element parentFopElement, int currentTableLevel,
+            Paragraph paragraph, String bulletText )
+    {
+        final Element pElement = htmlDocumentFacade.createParagraph();
+        parentFopElement.appendChild( pElement );
+
+        StringBuilder style = new StringBuilder();
+        WordToHtmlUtils.addParagraphProperties( paragraph, style );
+
+        final int charRuns = paragraph.numCharacterRuns();
+
+        if ( charRuns == 0 )
+        {
+            return;
+        }
+
+        {
+            final String pFontName;
+            final int pFontSize;
+            final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
+            if ( characterRun != null )
+            {
+                pFontSize = characterRun.getFontSize() / 2;
+                pFontName = characterRun.getFontName();
+                WordToHtmlUtils.addFontFamily( pFontName, style );
+                WordToHtmlUtils.addFontSize( pFontSize, style );
+            }
+            else
+            {
+                pFontSize = -1;
+                pFontName = WordToHtmlUtils.EMPTY;
+            }
+            blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
+        }
+        try
+        {
+            if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
+            {
+                Text textNode = htmlDocumentFacade.createText( bulletText );
+                pElement.appendChild( textNode );
+            }
+
+            List<CharacterRun> characterRuns = WordToHtmlUtils
+                    .findCharacterRuns( paragraph );
+            processCharacters( hwpfDocument, currentTableLevel, paragraph,
+                    pElement, characterRuns, 0, characterRuns.size() );
+        }
+        finally
+        {
+            blocksProperies.pop();
+        }
+
+        if ( style.length() > 0 )
+            pElement.setAttribute( "style", style.toString() );
+
+        return;
+    }
+
+    protected void processSection( HWPFDocumentCore wordDocument,
+            Section section, int sectionCounter )
+    {
+        Element div = htmlDocumentFacade.document.createElement( "div" );
+        div.setAttribute( "style", getSectionStyle( section ) );
+        htmlDocumentFacade.body.appendChild( div );
+
+        processSectionParagraphes( wordDocument, div, section, 0 );
+    }
+
+    @Override
+    protected void processSingleSection( HWPFDocumentCore wordDocument,
+            Section section )
+    {
+        htmlDocumentFacade.body.setAttribute( "style",
+                getSectionStyle( section ) );
+
+        processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
+                section, 0 );
+    }
+
+    protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
+            Table table, int thisTableLevel )
+    {
+        Element tableHeader = htmlDocumentFacade.createTableHeader();
+        Element tableBody = htmlDocumentFacade.createTableBody();
+
+        final int tableRows = table.numRows();
+
+        int maxColumns = Integer.MIN_VALUE;
+        for ( int r = 0; r < tableRows; r++ )
+        {
+            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
+        }
+
+        for ( int r = 0; r < tableRows; r++ )
+        {
+            TableRow tableRow = table.getRow( r );
+
+            Element tableRowElement = htmlDocumentFacade.createTableRow();
+            StringBuilder tableRowStyle = new StringBuilder();
+            WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
+
+            final int rowCells = tableRow.numCells();
+            for ( int c = 0; c < rowCells; c++ )
+            {
+                TableCell tableCell = tableRow.getCell( c );
+
+                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
+                    continue;
+
+                if ( tableCell.isVerticallyMerged()
+                        && !tableCell.isFirstVerticallyMerged() )
+                    continue;
+
+                Element tableCellElement;
+                if ( tableRow.isTableHeader() )
+                {
+                    tableCellElement = htmlDocumentFacade
+                            .createTableHeaderCell();
+                }
+                else
+                {
+                    tableCellElement = htmlDocumentFacade.createTableCell();
+                }
+                StringBuilder tableCellStyle = new StringBuilder();
+                WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
+                        r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
+                        tableCellStyle );
+
+                if ( tableCell.isFirstMerged() )
+                {
+                    int count = 0;
+                    for ( int c1 = c; c1 < rowCells; c1++ )
+                    {
+                        TableCell nextCell = tableRow.getCell( c1 );
+                        if ( nextCell.isMerged() )
+                            count++;
+                        if ( !nextCell.isMerged() )
+                            break;
+                    }
+                    tableCellElement.setAttribute( "colspan", "" + count );
+                }
+                else
+                {
+                    if ( c == rowCells - 1 && c != maxColumns - 1 )
+                    {
+                        tableCellElement.setAttribute( "colspan", ""
+                                + ( maxColumns - c ) );
+                    }
+                }
+
+                if ( tableCell.isFirstVerticallyMerged() )
+                {
+                    int count = 0;
+                    for ( int r1 = r; r1 < tableRows; r1++ )
+                    {
+                        TableRow nextRow = table.getRow( r1 );
+                        if ( nextRow.numCells() < c )
+                            break;
+                        TableCell nextCell = nextRow.getCell( c );
+                        if ( nextCell.isVerticallyMerged() )
+                            count++;
+                        if ( !nextCell.isVerticallyMerged() )
+                            break;
+                    }
+                    tableCellElement.setAttribute( "rowspan", "" + count );
+                }
+
+                processSectionParagraphes( hwpfDocument, tableCellElement,
+                        tableCell, thisTableLevel );
+
+                if ( !tableCellElement.hasChildNodes() )
+                {
+                    tableCellElement.appendChild( htmlDocumentFacade
+                            .createParagraph() );
+                }
+                if ( tableCellStyle.length() > 0 )
+                    tableCellElement.setAttribute( "style",
+                            tableCellStyle.toString() );
+
+                tableRowElement.appendChild( tableCellElement );
+            }
+
+            if ( tableRowStyle.length() > 0 )
+                tableRowElement
+                        .setAttribute( "style", tableRowStyle.toString() );
+
+            if ( tableRow.isTableHeader() )
+            {
+                tableHeader.appendChild( tableRowElement );
+            }
+            else
+            {
+                tableBody.appendChild( tableRowElement );
+            }
+
+        }
+
+        final Element tableElement = htmlDocumentFacade.createTable();
+        if ( tableHeader.hasChildNodes() )
+        {
+            tableElement.appendChild( tableHeader );
+        }
+        if ( tableBody.hasChildNodes() )
+        {
+            tableElement.appendChild( tableBody );
+            flow.appendChild( tableElement );
+        }
+        else
+        {
+            logger.log(
+                    POILogger.WARN,
+                    "Table without body starting on offset "
+                            + table.getStartOffset() + " -- "
+                            + table.getEndOffset() );
+        }
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java
new file mode 100644 (file)
index 0000000..598def6
--- /dev/null
@@ -0,0 +1,292 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterProperties;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.w3c.dom.Element;
+
+public class WordToHtmlUtils extends AbstractWordUtils
+{
+    public static void addBold( final boolean bold, StringBuilder style )
+    {
+        style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" );
+    }
+
+    public static void addBorder( BorderCode borderCode, String where,
+            StringBuilder style )
+    {
+        if ( borderCode == null || borderCode.getBorderType() == 0 )
+            return;
+
+        if ( isEmpty( where ) )
+        {
+            style.append( "border-style: " + getBorderType( borderCode ) + "; " );
+            style.append( "border-color: " + getColor( borderCode.getColor() )
+                    + "; " );
+            style.append( "border-width: " + getBorderWidth( borderCode )
+                    + "; " );
+        }
+        else
+        {
+            style.append( "border-" + where + "-style: "
+                    + getBorderType( borderCode ) + "; " );
+            style.append( "border-" + where + "-color: "
+                    + getColor( borderCode.getColor() ) + "; " );
+            style.append( "border-" + where + "-width: "
+                    + getBorderWidth( borderCode ) + "; " );
+        }
+    }
+
+    public static void addCharactersProperties(
+            final CharacterRun characterRun, StringBuilder style )
+    {
+        final CharacterProperties clonedProperties = characterRun
+                .cloneProperties();
+
+        if ( characterRun.isBold() )
+        {
+            style.append( "font-weight: bold; " );
+        }
+        if ( characterRun.isItalic() )
+        {
+            style.append( "font-style: italic; " );
+        }
+
+        addBorder( clonedProperties.getBrc(), EMPTY, style );
+
+        if ( characterRun.isCapitalized() )
+        {
+            style.append( "text-transform: uppercase; " );
+        }
+        if ( characterRun.isHighlighted() )
+        {
+            style.append( "background-color: "
+                    + getColor( clonedProperties.getIcoHighlight() ) + "; " );
+        }
+        if ( characterRun.isStrikeThrough() )
+        {
+            style.append( "text-decoration: line-through; " );
+        }
+        if ( characterRun.isShadowed() )
+        {
+            style.append( "text-shadow: " + characterRun.getFontSize() / 24
+                    + "pt; " );
+        }
+        if ( characterRun.isSmallCaps() )
+        {
+            style.append( "font-variant: small-caps; " );
+        }
+        if ( characterRun.getSubSuperScriptIndex() == 1 )
+        {
+            style.append( "baseline-shift: super; " );
+            style.append( "font-size: smaller; " );
+        }
+        if ( characterRun.getSubSuperScriptIndex() == 2 )
+        {
+            style.append( "baseline-shift: sub; " );
+            style.append( "font-size: smaller; " );
+        }
+        if ( characterRun.getUnderlineCode() > 0 )
+        {
+            style.append( "text-decoration: underline; " );
+        }
+        if ( characterRun.isVanished() )
+        {
+            style.append( "visibility: hidden; " );
+        }
+    }
+
+    public static void addFontFamily( final String fontFamily,
+            StringBuilder style )
+    {
+        if ( isEmpty( fontFamily ) )
+            return;
+
+        style.append( "font-family: " + fontFamily );
+    }
+
+    public static void addFontSize( final int fontSize, StringBuilder style )
+    {
+        style.append( "font-size: " + fontSize );
+    }
+
+    public static void addIndent( Paragraph paragraph, StringBuilder style )
+    {
+        addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
+        addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
+        addIndent( style, "end-indent", paragraph.getIndentFromRight() );
+        addIndent( style, "space-before", paragraph.getSpacingBefore() );
+        addIndent( style, "space-after", paragraph.getSpacingAfter() );
+    }
+
+    private static void addIndent( StringBuilder style, final String cssName,
+            final int twipsValue )
+    {
+        if ( twipsValue == 0 )
+            return;
+
+        style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " );
+    }
+
+    public static void addJustification( Paragraph paragraph,
+            final StringBuilder style )
+    {
+        String justification = getJustification( paragraph.getJustification() );
+        if ( isNotEmpty( justification ) )
+            style.append( "text-align: " + justification + "; " );
+    }
+
+    public static void addParagraphProperties( Paragraph paragraph,
+            StringBuilder style )
+    {
+        addIndent( paragraph, style );
+        addJustification( paragraph, style );
+
+        addBorder( paragraph.getBottomBorder(), "bottom", style );
+        addBorder( paragraph.getLeftBorder(), "left", style );
+        addBorder( paragraph.getRightBorder(), "right", style );
+        addBorder( paragraph.getTopBorder(), "top", style );
+
+        if ( paragraph.pageBreakBefore() )
+        {
+            style.append( "break-before: page; " );
+        }
+
+        style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
+
+        if ( paragraph.keepOnPage() )
+        {
+            style.append( "keep-together.within-page: always; " );
+        }
+
+        if ( paragraph.keepWithNext() )
+        {
+            style.append( "keep-with-next.within-page: always; " );
+        }
+
+        style.append( "linefeed-treatment: preserve; " );
+        style.append( "white-space-collapse: false; " );
+    }
+
+    public static void addTableCellProperties( TableRow tableRow,
+            TableCell tableCell, boolean toppest, boolean bottomest,
+            boolean leftest, boolean rightest, StringBuilder style )
+    {
+        style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH )
+                + "in; " );
+        style.append( "padding-start: "
+                + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
+        style.append( "padding-end: "
+                + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
+
+        BorderCode top = tableCell.getBrcTop() != null
+                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
+                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
+                .getHorizontalBorder();
+        BorderCode bottom = tableCell.getBrcBottom() != null
+                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
+                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
+                : tableRow.getHorizontalBorder();
+
+        BorderCode left = tableCell.getBrcLeft() != null
+                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
+                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
+                .getVerticalBorder();
+        BorderCode right = tableCell.getBrcRight() != null
+                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
+                .getBrcRight() : rightest ? tableRow.getRightBorder()
+                : tableRow.getVerticalBorder();
+
+        addBorder( bottom, "bottom", style );
+        addBorder( left, "left", style );
+        addBorder( right, "right", style );
+        addBorder( top, "top", style );
+    }
+
+    public static void addTableRowProperties( TableRow tableRow,
+            StringBuilder style )
+    {
+        if ( tableRow.getRowHeight() > 0 )
+        {
+            style.append( "height: "
+                    + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " );
+        }
+        if ( !tableRow.cantSplit() )
+        {
+            style.append( "keep-together: always; " );
+        }
+    }
+
+    public static void setPictureProperties( Picture picture,
+            Element graphicElement )
+    {
+        final int aspectRatioX = picture.getAspectRatioX();
+        final int aspectRatioY = picture.getAspectRatioY();
+
+        if ( aspectRatioX > 0 )
+        {
+            graphicElement
+                    .setAttribute( "content-width", ( ( picture.getDxaGoal()
+                            * aspectRatioX / 100 ) / TWIPS_PER_PT )
+                            + "pt" );
+        }
+        else
+            graphicElement.setAttribute( "content-width",
+                    ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
+
+        if ( aspectRatioY > 0 )
+            graphicElement
+                    .setAttribute( "content-height", ( ( picture.getDyaGoal()
+                            * aspectRatioY / 100 ) / TWIPS_PER_PT )
+                            + "pt" );
+        else
+            graphicElement.setAttribute( "content-height",
+                    ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
+
+        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
+        {
+            graphicElement.setAttribute( "scaling", "uniform" );
+        }
+        else
+        {
+            graphicElement.setAttribute( "scaling", "non-uniform" );
+        }
+
+        graphicElement.setAttribute( "vertical-align", "text-bottom" );
+
+        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
+                || picture.getDyaCropBottom() != 0
+                || picture.getDxaCropLeft() != 0 )
+        {
+            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
+            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
+            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
+            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
+            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
+                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
+                    + "pt)" );
+            graphicElement.setAttribute( "oveerflow", "hidden" );
+        }
+    }
+
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordExtractor.java
deleted file mode 100644 (file)
index f13d9a1..0000000
+++ /dev/null
@@ -1,365 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.model.ListFormatOverride;
-import org.apache.poi.hwpf.model.ListTables;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.Table;
-import org.apache.poi.hwpf.usermodel.TableIterator;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-
-public abstract class AbstractWordExtractor
-{
-    private static final byte BEL_MARK = 7;
-
-    private static final byte FIELD_BEGIN_MARK = 19;
-
-    private static final byte FIELD_END_MARK = 21;
-
-    private static final byte FIELD_SEPARATOR_MARK = 20;
-
-    private static final POILogger logger = POILogFactory
-            .getLogger( AbstractWordExtractor.class );
-
-    public abstract Document getDocument();
-
-    protected abstract void outputCharacters( Element block,
-            CharacterRun characterRun, String text );
-
-    protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
-            int currentTableLevel, Paragraph paragraph, final Element block,
-            List<CharacterRun> characterRuns, final int start, final int end )
-    {
-        boolean haveAnyText = false;
-
-        for ( int c = start; c < end; c++ )
-        {
-            CharacterRun characterRun = characterRuns.get( c );
-
-            if ( characterRun == null )
-                throw new AssertionError();
-
-            if ( hwpfDocument instanceof HWPFDocument
-                    && ( (HWPFDocument) hwpfDocument ).getPicturesTable()
-                            .hasPicture( characterRun ) )
-            {
-                HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
-                Picture picture = newFormat.getPicturesTable().extractPicture(
-                        characterRun, true );
-
-                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
-                        picture );
-                continue;
-            }
-
-            String text = characterRun.text();
-            if ( text.getBytes().length == 0 )
-                continue;
-
-            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
-            {
-                int skipTo = tryField( hwpfDocument, paragraph,
-                        currentTableLevel, characterRuns, c, block );
-
-                if ( skipTo != c )
-                {
-                    c = skipTo;
-                    continue;
-                }
-
-                continue;
-            }
-            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
-            {
-                // shall not appear without FIELD_BEGIN_MARK
-                continue;
-            }
-            if ( text.getBytes()[0] == FIELD_END_MARK )
-            {
-                // shall not appear without FIELD_BEGIN_MARK
-                continue;
-            }
-
-            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
-                    || characterRun.isOle2() )
-            {
-                continue;
-            }
-
-            if ( text.endsWith( "\r" )
-                    || ( text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0 ) )
-                text = text.substring( 0, text.length() - 1 );
-
-            outputCharacters( block, characterRun, text );
-
-            haveAnyText |= text.trim().length() != 0;
-        }
-
-        return haveAnyText;
-    }
-
-    public void processDocument( HWPFDocumentCore wordDocument )
-    {
-        final Range range = wordDocument.getRange();
-        for ( int s = 0; s < range.numSections(); s++ )
-        {
-            processSection( wordDocument, range.getSection( s ), s );
-        }
-    }
-
-    protected void processField( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph, int currentTableLevel,
-            List<CharacterRun> characterRuns, int beginMark, int separatorMark,
-            int endMark )
-    {
-
-        Pattern hyperlinkPattern = Pattern
-                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
-        Pattern pagerefPattern = Pattern
-                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
-
-        if ( separatorMark - beginMark > 1 )
-        {
-            int index = beginMark + 1;
-            CharacterRun firstAfterBegin = null;
-            while ( index < separatorMark )
-            {
-                firstAfterBegin = paragraph.getCharacterRun( index );
-                if ( firstAfterBegin == null )
-                {
-                    logger.log( POILogger.WARN,
-                            "Paragraph " + paragraph.getStartOffset() + "--"
-                                    + paragraph.getEndOffset()
-                                    + " contains null CharacterRun #" + index );
-                    index++;
-                    continue;
-                }
-                break;
-            }
-
-            if ( firstAfterBegin != null )
-            {
-                final Matcher hyperlinkMatcher = hyperlinkPattern
-                        .matcher( firstAfterBegin.text() );
-                if ( hyperlinkMatcher.matches() )
-                {
-                    String hyperlink = hyperlinkMatcher.group( 1 );
-                    processHyperlink( wordDocument, currentBlock, paragraph,
-                            characterRuns, currentTableLevel, hyperlink,
-                            separatorMark + 1, endMark );
-                    return;
-                }
-
-                final Matcher pagerefMatcher = pagerefPattern
-                        .matcher( firstAfterBegin.text() );
-                if ( pagerefMatcher.matches() )
-                {
-                    String pageref = pagerefMatcher.group( 1 );
-                    processPageref( wordDocument, currentBlock, paragraph,
-                            characterRuns, currentTableLevel, pageref,
-                            separatorMark + 1, endMark );
-                    return;
-                }
-            }
-        }
-
-        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
-        for ( int i = beginMark; i <= endMark; i++ )
-        {
-            debug.append( "\t" );
-            debug.append( paragraph.getCharacterRun( i ) );
-            debug.append( "\n" );
-        }
-        logger.log( POILogger.WARN, debug );
-
-        // just output field value
-        if ( separatorMark + 1 < endMark )
-            processCharacters( wordDocument, currentTableLevel, paragraph,
-                    currentBlock, characterRuns, separatorMark + 1, endMark );
-
-        return;
-    }
-
-    protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String hyperlink, int i, int endMark );
-
-    protected abstract void processImage( Element currentBlock,
-            boolean inlined, Picture picture );
-
-    protected abstract void processPageref( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String pageref, int beginTextInclusive, int endTextExclusive );
-
-    protected abstract void processParagraph( HWPFDocumentCore wordDocument,
-            Element parentFopElement, int currentTableLevel,
-            Paragraph paragraph, String bulletText );
-
-    protected abstract void processSection( HWPFDocumentCore wordDocument,
-            Section section, int s );
-
-    protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
-            Element flow, Range range, int currentTableLevel )
-    {
-        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
-        for ( TableIterator tableIterator = AbstractWordUtils.newTableIterator(
-                range, currentTableLevel + 1 ); tableIterator.hasNext(); )
-        {
-            Table next = tableIterator.next();
-            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
-        }
-
-        final ListTables listTables = wordDocument.getListTables();
-        int currentListInfo = 0;
-
-        final int paragraphs = range.numParagraphs();
-        for ( int p = 0; p < paragraphs; p++ )
-        {
-            Paragraph paragraph = range.getParagraph( p );
-
-            if ( allTables.containsKey( Integer.valueOf( paragraph
-                    .getStartOffset() ) ) )
-            {
-                Table table = allTables.get( Integer.valueOf( paragraph
-                        .getStartOffset() ) );
-                processTable( wordDocument, flow, table, currentTableLevel + 1 );
-                continue;
-            }
-
-            if ( paragraph.isInTable()
-                    && paragraph.getTableLevel() != currentTableLevel )
-            {
-                continue;
-            }
-
-            if ( paragraph.getIlfo() != currentListInfo )
-            {
-                currentListInfo = paragraph.getIlfo();
-            }
-
-            if ( currentListInfo != 0 )
-            {
-                if ( listTables != null )
-                {
-                    final ListFormatOverride listFormatOverride = listTables
-                            .getOverride( paragraph.getIlfo() );
-
-                    String label = AbstractWordUtils.getBulletText( listTables,
-                            paragraph, listFormatOverride.getLsid() );
-
-                    processParagraph( wordDocument, flow, currentTableLevel,
-                            paragraph, label );
-                }
-                else
-                {
-                    logger.log( POILogger.WARN,
-                            "Paragraph #" + paragraph.getStartOffset() + "-"
-                                    + paragraph.getEndOffset()
-                                    + " has reference to list structure #"
-                                    + currentListInfo
-                                    + ", but listTables not defined in file" );
-
-                    processParagraph( wordDocument, flow, currentTableLevel,
-                            paragraph, AbstractWordUtils.EMPTY );
-                }
-            }
-            else
-            {
-                processParagraph( wordDocument, flow, currentTableLevel,
-                        paragraph, AbstractWordUtils.EMPTY );
-            }
-        }
-
-    }
-
-    protected void processSingleSection( HWPFDocumentCore wordDocument,
-            Section section )
-    {
-        processSection( wordDocument, section, 0 );
-    }
-
-    protected abstract void processTable( HWPFDocumentCore wordDocument,
-            Element flow, Table table, int newTableLevel );
-
-    protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
-            int currentTableLevel, List<CharacterRun> characterRuns,
-            int beginMark, Element currentBlock )
-    {
-        int separatorMark = -1;
-        int endMark = -1;
-        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
-        {
-            CharacterRun characterRun = paragraph.getCharacterRun( c );
-
-            String text = characterRun.text();
-            if ( text.getBytes().length == 0 )
-                continue;
-
-            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
-            {
-                if ( separatorMark != -1 )
-                {
-                    // double;
-                    return beginMark;
-                }
-
-                separatorMark = c;
-                continue;
-            }
-
-            if ( text.getBytes()[0] == FIELD_END_MARK )
-            {
-                if ( endMark != -1 )
-                {
-                    // double;
-                    return beginMark;
-                }
-
-                endMark = c;
-                break;
-            }
-
-        }
-
-        if ( separatorMark == -1 || endMark == -1 )
-            return beginMark;
-
-        processField( wordDocument, currentBlock, paragraph, currentTableLevel,
-                characterRuns, beginMark, separatorMark, endMark );
-
-        return endMark;
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/AbstractWordUtils.java
deleted file mode 100644 (file)
index 89849c1..0000000
+++ /dev/null
@@ -1,404 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.Closeable;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.HWPFOldDocument;
-import org.apache.poi.hwpf.OldWordFileFormatException;
-import org.apache.poi.hwpf.model.CHPX;
-import org.apache.poi.hwpf.model.ListLevel;
-import org.apache.poi.hwpf.model.ListTables;
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.SectionProperties;
-import org.apache.poi.hwpf.usermodel.TableIterator;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-public class AbstractWordUtils
-{
-    static final String EMPTY = "";
-
-    private static final POILogger logger = POILogFactory
-            .getLogger( AbstractWordUtils.class );
-
-    public static final float TWIPS_PER_INCH = 1440.0f;
-    public static final int TWIPS_PER_PT = 20;
-
-    static void closeQuietly( final Closeable closeable )
-    {
-        try
-        {
-            closeable.close();
-        }
-        catch ( Exception exc )
-        {
-            logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
-                    exc );
-        }
-    }
-
-    static boolean equals( String str1, String str2 )
-    {
-        return str1 == null ? str2 == null : str1.equals( str2 );
-    }
-
-    // XXX incorporate into Range
-    static List<CharacterRun> findCharacterRuns( Range range )
-    {
-        final int min = range.getStartOffset();
-        final int max = range.getEndOffset();
-
-        List<CharacterRun> result = new ArrayList<CharacterRun>();
-        List<CHPX> chpxs = getCharacters( range );
-        for ( int i = 0; i < chpxs.size(); i++ )
-        {
-            CHPX chpx = chpxs.get( i );
-            if ( chpx == null )
-                continue;
-
-            if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
-                    chpx.getEnd() ) )
-            {
-                final CharacterRun characterRun = getCharacterRun( range, chpx );
-
-                if ( characterRun == null )
-                    continue;
-
-                result.add( characterRun );
-            }
-        }
-
-        return result;
-    }
-
-    public static String getBorderType( BorderCode borderCode )
-    {
-        if ( borderCode == null )
-            throw new IllegalArgumentException( "borderCode is null" );
-
-        switch ( borderCode.getBorderType() )
-        {
-        case 1:
-        case 2:
-            return "solid";
-        case 3:
-            return "double";
-        case 5:
-            return "solid";
-        case 6:
-            return "dotted";
-        case 7:
-        case 8:
-            return "dashed";
-        case 9:
-            return "dotted";
-        case 10:
-        case 11:
-        case 12:
-        case 13:
-        case 14:
-        case 15:
-        case 16:
-        case 17:
-        case 18:
-        case 19:
-            return "double";
-        case 20:
-            return "solid";
-        case 21:
-            return "double";
-        case 22:
-            return "dashed";
-        case 23:
-            return "dashed";
-        case 24:
-            return "ridge";
-        case 25:
-            return "grooved";
-        default:
-            return "solid";
-        }
-    }
-
-    public static String getBorderWidth( BorderCode borderCode )
-    {
-        int lineWidth = borderCode.getLineWidth();
-        int pt = lineWidth / 8;
-        int pte = lineWidth - pt * 8;
-
-        StringBuilder stringBuilder = new StringBuilder();
-        stringBuilder.append( pt );
-        stringBuilder.append( "." );
-        stringBuilder.append( 1000 / 8 * pte );
-        stringBuilder.append( "pt" );
-        return stringBuilder.toString();
-    }
-
-    public static String getBulletText( ListTables listTables,
-            Paragraph paragraph, int listId )
-    {
-        final ListLevel listLevel = listTables.getLevel( listId,
-                paragraph.getIlvl() );
-
-        if ( listLevel.getNumberText() == null )
-            return EMPTY;
-
-        StringBuffer bulletBuffer = new StringBuffer();
-        char[] xst = listLevel.getNumberText().toCharArray();
-        for ( char element : xst )
-        {
-            if ( element < 9 )
-            {
-                ListLevel numLevel = listTables.getLevel( listId, element );
-
-                int num = numLevel.getStartAt();
-                bulletBuffer.append( NumberFormatter.getNumber( num,
-                        listLevel.getNumberFormat() ) );
-
-                if ( numLevel == listLevel )
-                {
-                    numLevel.setStartAt( numLevel.getStartAt() + 1 );
-                }
-
-            }
-            else
-            {
-                bulletBuffer.append( element );
-            }
-        }
-
-        byte follow = getIxchFollow( listLevel );
-        switch ( follow )
-        {
-        case 0:
-            bulletBuffer.append( "\t" );
-            break;
-        case 1:
-            bulletBuffer.append( " " );
-            break;
-        default:
-            break;
-        }
-
-        return bulletBuffer.toString();
-    }
-
-    private static CharacterRun getCharacterRun( Range range, CHPX chpx )
-    {
-        try
-        {
-            Method method = Range.class.getDeclaredMethod( "getCharacterRun",
-                    CHPX.class );
-            method.setAccessible( true );
-            return (CharacterRun) method.invoke( range, chpx );
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
-    private static List<CHPX> getCharacters( Range range )
-    {
-        try
-        {
-            Field field = Range.class.getDeclaredField( "_characters" );
-            field.setAccessible( true );
-            return (List<CHPX>) field.get( range );
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
-    public static String getColor( int ico )
-    {
-        switch ( ico )
-        {
-        case 1:
-            return "black";
-        case 2:
-            return "blue";
-        case 3:
-            return "cyan";
-        case 4:
-            return "green";
-        case 5:
-            return "magenta";
-        case 6:
-            return "red";
-        case 7:
-            return "yellow";
-        case 8:
-            return "white";
-        case 9:
-            return "darkblue";
-        case 10:
-            return "darkcyan";
-        case 11:
-            return "darkgreen";
-        case 12:
-            return "darkmagenta";
-        case 13:
-            return "darkred";
-        case 14:
-            return "darkyellow";
-        case 15:
-            return "darkgray";
-        case 16:
-            return "lightgray";
-        default:
-            return "black";
-        }
-    }
-
-    public static byte getIxchFollow( ListLevel listLevel )
-    {
-        try
-        {
-            Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
-            field.setAccessible( true );
-            return ( (Byte) field.get( listLevel ) ).byteValue();
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
-    public static String getJustification( int js )
-    {
-        switch ( js )
-        {
-        case 0:
-            return "start";
-        case 1:
-            return "center";
-        case 2:
-            return "end";
-        case 3:
-        case 4:
-            return "justify";
-        case 5:
-            return "center";
-        case 6:
-            return "left";
-        case 7:
-            return "start";
-        case 8:
-            return "end";
-        case 9:
-            return "justify";
-        }
-        return "";
-    }
-
-    public static String getListItemNumberLabel( int number, int format )
-    {
-
-        if ( format != 0 )
-            System.err.println( "NYI: toListItemNumberLabel(): " + format );
-
-        return String.valueOf( number );
-    }
-
-    public static SectionProperties getSectionProperties( Section section )
-    {
-        try
-        {
-            Field field = Section.class.getDeclaredField( "_props" );
-            field.setAccessible( true );
-            return (SectionProperties) field.get( section );
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
-    static boolean isEmpty( String str )
-    {
-        return str == null || str.length() == 0;
-    }
-
-    static boolean isNotEmpty( String str )
-    {
-        return !isEmpty( str );
-    }
-
-    public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
-    {
-        final FileInputStream istream = new FileInputStream( docFile );
-        try
-        {
-            return loadDoc( istream );
-        }
-        finally
-        {
-            closeQuietly( istream );
-        }
-    }
-
-    public static HWPFDocumentCore loadDoc( InputStream inputStream )
-            throws IOException
-    {
-        final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
-                .verifyAndBuildPOIFS( inputStream );
-        try
-        {
-            return new HWPFDocument( poifsFileSystem );
-        }
-        catch ( OldWordFileFormatException exc )
-        {
-            return new HWPFOldDocument( poifsFileSystem );
-        }
-    }
-
-    public static TableIterator newTableIterator( Range range, int level )
-    {
-        try
-        {
-            Constructor<TableIterator> constructor = TableIterator.class
-                    .getDeclaredConstructor( Range.class, int.class );
-            constructor.setAccessible( true );
-            return constructor.newInstance( range, Integer.valueOf( level ) );
-        }
-        catch ( Exception exc )
-        {
-            throw new Error( exc );
-        }
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/FoDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/FoDocumentFacade.java
deleted file mode 100644 (file)
index 5e474bf..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-public class FoDocumentFacade
-{
-    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
-
-    protected final Document document;
-    protected final Element layoutMasterSet;
-    protected final Element root;
-
-    public FoDocumentFacade( Document document )
-    {
-        this.document = document;
-
-        root = document.createElementNS( NS_XSLFO, "fo:root" );
-        document.appendChild( root );
-
-        layoutMasterSet = document.createElementNS( NS_XSLFO,
-                "fo:layout-master-set" );
-        root.appendChild( layoutMasterSet );
-    }
-
-    public Element addFlowToPageSequence( final Element pageSequence,
-            String flowName )
-    {
-        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
-        flow.setAttribute( "flow-name", flowName );
-        pageSequence.appendChild( flow );
-
-        return flow;
-    }
-
-    public Element addListItem( Element listBlock )
-    {
-        Element result = createListItem();
-        listBlock.appendChild( result );
-        return result;
-    }
-
-    public Element addListItemBody( Element listItem )
-    {
-        Element result = createListItemBody();
-        listItem.appendChild( result );
-        return result;
-    }
-
-    public Element addListItemLabel( Element listItem, String text )
-    {
-        Element result = createListItemLabel( text );
-        listItem.appendChild( result );
-        return result;
-    }
-
-    public Element addPageSequence( String pageMaster )
-    {
-        final Element pageSequence = document.createElementNS( NS_XSLFO,
-                "fo:page-sequence" );
-        pageSequence.setAttribute( "master-reference", pageMaster );
-        root.appendChild( pageSequence );
-        return pageSequence;
-    }
-
-    public Element addRegionBody( Element pageMaster )
-    {
-        final Element regionBody = document.createElementNS( NS_XSLFO,
-                "fo:region-body" );
-        pageMaster.appendChild( regionBody );
-
-        return regionBody;
-    }
-
-    public Element addSimplePageMaster( String masterName )
-    {
-        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
-                "fo:simple-page-master" );
-        simplePageMaster.setAttribute( "master-name", masterName );
-        layoutMasterSet.appendChild( simplePageMaster );
-
-        return simplePageMaster;
-    }
-
-    protected Element createBasicLinkExternal( String externalDestination )
-    {
-        final Element basicLink = document.createElementNS( NS_XSLFO,
-                "fo:basic-link" );
-        basicLink.setAttribute( "external-destination", externalDestination );
-        return basicLink;
-    }
-
-    public Element createBasicLinkInternal( String internalDestination )
-    {
-        final Element basicLink = document.createElementNS( NS_XSLFO,
-                "fo:basic-link" );
-        basicLink.setAttribute( "internal-destination", internalDestination );
-        return basicLink;
-    }
-
-    public Element createBlock()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:block" );
-    }
-
-    public Element createExternalGraphic( String source )
-    {
-        Element result = document.createElementNS( NS_XSLFO,
-                "fo:external-graphic" );
-        result.setAttribute( "src", "url('" + source + "')" );
-        return result;
-    }
-
-    public Element createInline()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:inline" );
-    }
-
-    public Element createLeader()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:leader" );
-    }
-
-    public Element createListBlock()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:list-block" );
-    }
-
-    public Element createListItem()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:list-item" );
-    }
-
-    public Element createListItemBody()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
-    }
-
-    public Element createListItemLabel( String text )
-    {
-        Element result = document.createElementNS( NS_XSLFO,
-                "fo:list-item-label" );
-        Element block = createBlock();
-        block.appendChild( document.createTextNode( text ) );
-        result.appendChild( block );
-        return result;
-    }
-
-    protected Element createTable()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:table" );
-    }
-
-    protected Element createTableBody()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:table-body" );
-    }
-
-    protected Element createTableCell()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
-    }
-
-    protected Element createTableHeader()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:table-header" );
-    }
-
-    protected Element createTableRow()
-    {
-        return document.createElementNS( NS_XSLFO, "fo:table-row" );
-    }
-
-    protected Text createText( String data )
-    {
-        return document.createTextNode( data );
-    }
-
-    public Document getDocument()
-    {
-        return document;
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/HtmlDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/HtmlDocumentFacade.java
deleted file mode 100644 (file)
index 5e2b1f0..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-public class HtmlDocumentFacade
-{
-
-    protected final Element body;
-    protected final Document document;
-    protected final Element head;
-    protected final Element html;
-
-    public HtmlDocumentFacade( Document document )
-    {
-        this.document = document;
-
-        html = document.createElement( "html" );
-        document.appendChild( html );
-
-        body = document.createElement( "body" );
-        head = document.createElement( "head" );
-
-        html.appendChild( head );
-        html.appendChild( body );
-    }
-
-    public Element createHyperlink( String internalDestination )
-    {
-        final Element basicLink = document.createElement( "a" );
-        basicLink.setAttribute( "href", internalDestination );
-        return basicLink;
-    }
-
-    public Element createListItem()
-    {
-        return document.createElement( "li" );
-    }
-
-    public Element createParagraph()
-    {
-        return document.createElement( "p" );
-    }
-
-    public Element createTable()
-    {
-        return document.createElement( "table" );
-    }
-
-    public Element createTableBody()
-    {
-        return document.createElement( "tbody" );
-    }
-
-    public Element createTableCell()
-    {
-        return document.createElement( "td" );
-    }
-
-    public Element createTableHeader()
-    {
-        return document.createElement( "thead" );
-    }
-
-    public Element createTableHeaderCell()
-    {
-        return document.createElement( "th" );
-    }
-
-    public Element createTableRow()
-    {
-        return document.createElement( "tr" );
-    }
-
-    public Text createText( String data )
-    {
-        return document.createTextNode( data );
-    }
-
-    public Element createUnorderedList()
-    {
-        return document.createElement( "ul" );
-    }
-
-    public Document getDocument()
-    {
-        return document;
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java
deleted file mode 100644 (file)
index d4a2cc7..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  ====================================================================
- *    Licensed to the Apache Software Foundation (ASF) under one or more
- *    contributor license agreements.  See the NOTICE file distributed with
- *    this work for additional information regarding copyright ownership.
- *    The ASF licenses this file to You under the Apache License, Version 2.0
- *    (the "License"); you may not use this file except in compliance with
- *    the License.  You may obtain a copy of the License at
- *
- *        http://www.apache.org/licenses/LICENSE-2.0
- *
- *    Unless required by applicable law or agreed to in writing, software
- *    distributed under the License is distributed on an "AS IS" BASIS,
- *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *    See the License for the specific language governing permissions and
- *    limitations under the License.
- * ====================================================================
- */
-
-package org.apache.poi.hwpf.extractor;
-
-/**
- * Comment me
- *
- * @author Ryan Ackley
- */
-public final class NumberFormatter {
-
-    private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i",
-            "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "x", "y", "z" };
-
-    private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", "v", "vi", "vii",
-            "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix",
-            "xx", "xxi", "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx",
-            "xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", "xxxviii",
-            "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", "xlvii", "xlviii",
-            "xlix", "l" };
-
-    private final static int T_ARABIC = 0;
-    private final static int T_LOWER_LETTER = 4;
-    private final static int T_LOWER_ROMAN = 2;
-    private final static int T_ORDINAL = 5;
-    private final static int T_UPPER_LETTER = 3;
-    private final static int T_UPPER_ROMAN = 1;
-
-    public static String getNumber(int num, int style) {
-        switch (style) {
-        case T_UPPER_ROMAN:
-            return C_ROMAN[num - 1].toUpperCase();
-        case T_LOWER_ROMAN:
-            return C_ROMAN[num - 1];
-        case T_UPPER_LETTER:
-            return C_LETTERS[num - 1].toUpperCase();
-        case T_LOWER_LETTER:
-            return C_LETTERS[num - 1];
-        case T_ARABIC:
-        case T_ORDINAL:
-        default:
-            return String.valueOf(num);
-        }
-    }
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java
deleted file mode 100644 (file)
index 67f6bb1..0000000
+++ /dev/null
@@ -1,627 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.model.ListFormatOverride;
-import org.apache.poi.hwpf.model.ListTables;
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.SectionProperties;
-import org.apache.poi.hwpf.usermodel.Table;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableIterator;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-/**
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class WordToFoExtractor extends AbstractWordExtractor
-{
-
-    /**
-     * Holds properties values, applied to current <tt>fo:block</tt> element.
-     * Those properties shall not be doubled in children <tt>fo:inline</tt>
-     * elements.
-     */
-    private static class BlockProperies
-    {
-        final boolean pBold;
-        final String pFontName;
-        final int pFontSize;
-        final boolean pItalic;
-
-        public BlockProperies( String pFontName, int pFontSize, boolean pBold,
-                boolean pItalic )
-        {
-            this.pFontName = pFontName;
-            this.pFontSize = pFontSize;
-            this.pBold = pBold;
-            this.pItalic = pItalic;
-        }
-    }
-
-    private static final POILogger logger = POILogFactory
-            .getLogger( WordToFoExtractor.class );
-
-    public static String getBorderType( BorderCode borderCode )
-    {
-        if ( borderCode == null )
-            throw new IllegalArgumentException( "borderCode is null" );
-
-        switch ( borderCode.getBorderType() )
-        {
-        case 1:
-        case 2:
-            return "solid";
-        case 3:
-            return "double";
-        case 5:
-            return "solid";
-        case 6:
-            return "dotted";
-        case 7:
-        case 8:
-            return "dashed";
-        case 9:
-            return "dotted";
-        case 10:
-        case 11:
-        case 12:
-        case 13:
-        case 14:
-        case 15:
-        case 16:
-        case 17:
-        case 18:
-        case 19:
-            return "double";
-        case 20:
-            return "solid";
-        case 21:
-            return "double";
-        case 22:
-            return "dashed";
-        case 23:
-            return "dashed";
-        case 24:
-            return "ridge";
-        case 25:
-            return "grooved";
-        default:
-            return "solid";
-        }
-    }
-
-    /**
-     * Java main() interface to interact with WordToFoExtractor
-     * 
-     * <p>
-     * Usage: WordToFoExtractor infile outfile
-     * </p>
-     * Where infile is an input .doc file ( Word 97-2007) which will be rendered
-     * as XSL-FO into outfile
-     * 
-     */
-    public static void main( String[] args )
-    {
-        if ( args.length < 2 )
-        {
-            System.err
-                    .println( "Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>" );
-            return;
-        }
-
-        System.out.println( "Converting " + args[0] );
-        System.out.println( "Saving output to " + args[1] );
-        try
-        {
-            Document doc = WordToFoExtractor.process( new File( args[0] ) );
-
-            FileWriter out = new FileWriter( args[1] );
-            DOMSource domSource = new DOMSource( doc );
-            StreamResult streamResult = new StreamResult( out );
-            TransformerFactory tf = TransformerFactory.newInstance();
-            Transformer serializer = tf.newTransformer();
-            // TODO set encoding from a command argument
-            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
-            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
-            serializer.transform( domSource, streamResult );
-            out.close();
-        }
-        catch ( Exception e )
-        {
-            e.printStackTrace();
-        }
-    }
-
-    static Document process( File docFile ) throws Exception
-    {
-        final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
-        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
-                DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                        .newDocument() );
-        wordToFoExtractor.processDocument( hwpfDocument );
-        return wordToFoExtractor.getDocument();
-    }
-
-    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
-
-    protected final FoDocumentFacade foDocumentFacade;
-
-    /**
-     * Creates new instance of {@link WordToFoExtractor}. Can be used for output
-     * several {@link HWPFDocument}s into single FO document.
-     * 
-     * @param document
-     *            XML DOM Document used as XSL FO document. Shall support
-     *            namespaces
-     */
-    public WordToFoExtractor( Document document )
-    {
-        this.foDocumentFacade = new FoDocumentFacade( document );
-    }
-
-    protected String createPageMaster( SectionProperties sep, String type,
-            int section )
-    {
-        float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
-        float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
-        float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
-        float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
-        float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
-        float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
-
-        // add these to the header
-        String pageMasterName = type + "-page" + section;
-
-        Element pageMaster = foDocumentFacade
-                .addSimplePageMaster( pageMasterName );
-        pageMaster.setAttribute( "page-height", height + "in" );
-        pageMaster.setAttribute( "page-width", width + "in" );
-
-        Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
-        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
-                + "in " + bottomMargin + "in " + leftMargin + "in" );
-
-        /*
-         * 6.4.14 fo:region-body
-         * 
-         * The values of the padding and border-width traits must be "0".
-         */
-        // WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
-        // WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
-        // WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
-        // WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
-
-        if ( sep.getCcolM1() > 0 )
-        {
-            regionBody.setAttribute( "column-count", ""
-                    + ( sep.getCcolM1() + 1 ) );
-            if ( sep.getFEvenlySpaced() )
-            {
-                regionBody.setAttribute( "column-gap",
-                        ( sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH )
-                                + "in" );
-            }
-            else
-            {
-                regionBody.setAttribute( "column-gap", "0.25in" );
-            }
-        }
-
-        return pageMasterName;
-    }
-
-    public Document getDocument()
-    {
-        return foDocumentFacade.getDocument();
-    }
-
-    @Override
-    protected void outputCharacters( Element block, CharacterRun characterRun,
-            String text )
-    {
-        BlockProperies blockProperies = this.blocksProperies.peek();
-        Element inline = foDocumentFacade.createInline();
-        if ( characterRun.isBold() != blockProperies.pBold )
-        {
-            WordToFoUtils.setBold( inline, characterRun.isBold() );
-        }
-        if ( characterRun.isItalic() != blockProperies.pItalic )
-        {
-            WordToFoUtils.setItalic( inline, characterRun.isItalic() );
-        }
-        if ( characterRun.getFontName() != null
-                && !AbstractWordUtils.equals( characterRun.getFontName(),
-                        blockProperies.pFontName ) )
-        {
-            WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
-        }
-        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
-        {
-            WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
-        }
-        WordToFoUtils.setCharactersProperties( characterRun, inline );
-        block.appendChild( inline );
-
-        Text textNode = foDocumentFacade.createText( text );
-        inline.appendChild( textNode );
-    }
-
-    protected void processHyperlink( HWPFDocumentCore hwpfDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String hyperlink, int beginTextInclusive, int endTextExclusive )
-    {
-        Element basicLink = foDocumentFacade
-                .createBasicLinkExternal( hyperlink );
-        currentBlock.appendChild( basicLink );
-
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
-    }
-
-    /**
-     * This method shall store image bytes in external file and convert it if
-     * necessary. Images shall be stored using PNG format (for bitmap) or SVG
-     * (for vector). Other formats may be not supported by your XSL FO
-     * processor.
-     * <p>
-     * Please note the
-     * {@link WordToFoUtils#setPictureProperties(Picture, Element)} method.
-     * 
-     * @param currentBlock
-     *            currently processed FO element, like <tt>fo:block</tt>. Shall
-     *            be used as parent of newly created
-     *            <tt>fo:external-graphic</tt> or
-     *            <tt>fo:instream-foreign-object</tt>
-     * @param inlined
-     *            if image is inlined
-     * @param picture
-     *            HWPF object, contained picture data and properties
-     */
-    protected void processImage( Element currentBlock, boolean inlined,
-            Picture picture )
-    {
-        // no default implementation -- skip
-        currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
-                "Image link to '" + picture.suggestFullFileName()
-                        + "' can be here" ) );
-    }
-
-    protected void processPageref( HWPFDocumentCore hwpfDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String pageref, int beginTextInclusive, int endTextExclusive )
-    {
-        Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
-        currentBlock.appendChild( basicLink );
-
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
-    }
-
-    protected void processParagraph( HWPFDocumentCore hwpfDocument,
-            Element parentFopElement, int currentTableLevel,
-            Paragraph paragraph, String bulletText )
-    {
-        final Element block = foDocumentFacade.createBlock();
-        parentFopElement.appendChild( block );
-
-        WordToFoUtils.setParagraphProperties( paragraph, block );
-
-        final int charRuns = paragraph.numCharacterRuns();
-
-        if ( charRuns == 0 )
-        {
-            return;
-        }
-
-        {
-            final String pFontName;
-            final int pFontSize;
-            final boolean pBold;
-            final boolean pItalic;
-            {
-                CharacterRun characterRun = paragraph.getCharacterRun( 0 );
-                pFontSize = characterRun.getFontSize() / 2;
-                pFontName = characterRun.getFontName();
-                pBold = characterRun.isBold();
-                pItalic = characterRun.isItalic();
-            }
-            WordToFoUtils.setFontFamily( block, pFontName );
-            WordToFoUtils.setFontSize( block, pFontSize );
-            WordToFoUtils.setBold( block, pBold );
-            WordToFoUtils.setItalic( block, pItalic );
-
-            blocksProperies.push( new BlockProperies( pFontName, pFontSize,
-                    pBold, pItalic ) );
-        }
-        try
-        {
-            boolean haveAnyText = false;
-
-            if ( WordToFoUtils.isNotEmpty( bulletText ) )
-            {
-                Element inline = foDocumentFacade.createInline();
-                block.appendChild( inline );
-
-                Text textNode = foDocumentFacade.createText( bulletText );
-                inline.appendChild( textNode );
-
-                haveAnyText |= bulletText.trim().length() != 0;
-            }
-
-            List<CharacterRun> characterRuns = WordToFoUtils
-                    .findCharacterRuns( paragraph );
-            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
-                    paragraph, block, characterRuns, 0, characterRuns.size() );
-
-            if ( !haveAnyText )
-            {
-                Element leader = foDocumentFacade.createLeader();
-                block.appendChild( leader );
-            }
-        }
-        finally
-        {
-            blocksProperies.pop();
-        }
-
-        return;
-    }
-
-    protected void processSection( HWPFDocumentCore wordDocument,
-            Section section, int sectionCounter )
-    {
-        String regularPage = createPageMaster(
-                WordToFoUtils.getSectionProperties( section ), "page",
-                sectionCounter );
-
-        Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
-        Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
-                "xsl-region-body" );
-
-        processSectionParagraphes( wordDocument, flow, section, 0 );
-    }
-
-    protected void processSectionParagraphes( HWPFDocument wordDocument,
-            Element flow, Range range, int currentTableLevel )
-    {
-        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
-        for ( TableIterator tableIterator = WordToFoUtils.newTableIterator(
-                range, currentTableLevel + 1 ); tableIterator.hasNext(); )
-        {
-            Table next = tableIterator.next();
-            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
-        }
-
-        final ListTables listTables = wordDocument.getListTables();
-        int currentListInfo = 0;
-
-        final int paragraphs = range.numParagraphs();
-        for ( int p = 0; p < paragraphs; p++ )
-        {
-            Paragraph paragraph = range.getParagraph( p );
-
-            if ( allTables.containsKey( Integer.valueOf( paragraph
-                    .getStartOffset() ) ) )
-            {
-                Table table = allTables.get( Integer.valueOf( paragraph
-                        .getStartOffset() ) );
-                processTable( wordDocument, flow, table, currentTableLevel + 1 );
-                continue;
-            }
-
-            if ( paragraph.isInTable()
-                    && paragraph.getTableLevel() != currentTableLevel )
-            {
-                continue;
-            }
-
-            if ( paragraph.getIlfo() != currentListInfo )
-            {
-                currentListInfo = paragraph.getIlfo();
-            }
-
-            if ( currentListInfo != 0 )
-            {
-                if ( listTables != null )
-                {
-                    final ListFormatOverride listFormatOverride = listTables
-                            .getOverride( paragraph.getIlfo() );
-
-                    String label = WordToFoUtils.getBulletText( listTables,
-                            paragraph, listFormatOverride.getLsid() );
-
-                    processParagraph( wordDocument, flow, currentTableLevel,
-                            paragraph, label );
-                }
-                else
-                {
-                    logger.log( POILogger.WARN,
-                            "Paragraph #" + paragraph.getStartOffset() + "-"
-                                    + paragraph.getEndOffset()
-                                    + " has reference to list structure #"
-                                    + currentListInfo
-                                    + ", but listTables not defined in file" );
-
-                    processParagraph( wordDocument, flow, currentTableLevel,
-                            paragraph, WordToFoUtils.EMPTY );
-                }
-            }
-            else
-            {
-                processParagraph( wordDocument, flow, currentTableLevel,
-                        paragraph, WordToFoUtils.EMPTY );
-            }
-        }
-
-    }
-
-    protected void processTable( HWPFDocumentCore wordDocument, Element flow,
-            Table table, int thisTableLevel )
-    {
-        Element tableHeader = foDocumentFacade.createTableHeader();
-        Element tableBody = foDocumentFacade.createTableBody();
-
-        final int tableRows = table.numRows();
-
-        int maxColumns = Integer.MIN_VALUE;
-        for ( int r = 0; r < tableRows; r++ )
-        {
-            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
-        }
-
-        for ( int r = 0; r < tableRows; r++ )
-        {
-            TableRow tableRow = table.getRow( r );
-
-            Element tableRowElement = foDocumentFacade.createTableRow();
-            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
-
-            final int rowCells = tableRow.numCells();
-            for ( int c = 0; c < rowCells; c++ )
-            {
-                TableCell tableCell = tableRow.getCell( c );
-
-                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
-                    continue;
-
-                if ( tableCell.isVerticallyMerged()
-                        && !tableCell.isFirstVerticallyMerged() )
-                    continue;
-
-                Element tableCellElement = foDocumentFacade.createTableCell();
-                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
-                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
-                        c == rowCells - 1 );
-
-                if ( tableCell.isFirstMerged() )
-                {
-                    int count = 0;
-                    for ( int c1 = c; c1 < rowCells; c1++ )
-                    {
-                        TableCell nextCell = tableRow.getCell( c1 );
-                        if ( nextCell.isMerged() )
-                            count++;
-                        if ( !nextCell.isMerged() )
-                            break;
-                    }
-                    tableCellElement.setAttribute( "number-columns-spanned", ""
-                            + count );
-                }
-                else
-                {
-                    if ( c == rowCells - 1 && c != maxColumns - 1 )
-                    {
-                        tableCellElement.setAttribute(
-                                "number-columns-spanned", ""
-                                        + ( maxColumns - c ) );
-                    }
-                }
-
-                if ( tableCell.isFirstVerticallyMerged() )
-                {
-                    int count = 0;
-                    for ( int r1 = r; r1 < tableRows; r1++ )
-                    {
-                        TableRow nextRow = table.getRow( r1 );
-                        if ( nextRow.numCells() < c )
-                            break;
-                        TableCell nextCell = nextRow.getCell( c );
-                        if ( nextCell.isVerticallyMerged() )
-                            count++;
-                        if ( !nextCell.isVerticallyMerged() )
-                            break;
-                    }
-                    tableCellElement.setAttribute( "number-rows-spanned", ""
-                            + count );
-                }
-
-                processSectionParagraphes( wordDocument, tableCellElement,
-                        tableCell, thisTableLevel );
-
-                if ( !tableCellElement.hasChildNodes() )
-                {
-                    tableCellElement.appendChild( foDocumentFacade
-                            .createBlock() );
-                }
-
-                tableRowElement.appendChild( tableCellElement );
-            }
-
-            if ( tableRow.isTableHeader() )
-            {
-                tableHeader.appendChild( tableRowElement );
-            }
-            else
-            {
-                tableBody.appendChild( tableRowElement );
-            }
-        }
-
-        final Element tableElement = foDocumentFacade.createTable();
-        if ( tableHeader.hasChildNodes() )
-        {
-            tableElement.appendChild( tableHeader );
-        }
-        if ( tableBody.hasChildNodes() )
-        {
-            tableElement.appendChild( tableBody );
-            flow.appendChild( tableElement );
-        }
-        else
-        {
-            logger.log(
-                    POILogger.WARN,
-                    "Table without body starting on offset "
-                            + table.getStartOffset() + " -- "
-                            + table.getEndOffset() );
-        }
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java
deleted file mode 100644 (file)
index 1b3447f..0000000
+++ /dev/null
@@ -1,323 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterProperties;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.w3c.dom.Element;
-
-public class WordToFoUtils extends AbstractWordUtils
-{
-    public static void setBold( final Element element, final boolean bold )
-    {
-        element.setAttribute( "font-weight", bold ? "bold" : "normal" );
-    }
-
-    public static void setBorder( Element element, BorderCode borderCode,
-            String where )
-    {
-        if ( element == null )
-            throw new IllegalArgumentException( "element is null" );
-
-        if ( borderCode == null || borderCode.getBorderType() == 0 )
-            return;
-
-        if ( isEmpty( where ) )
-        {
-            element.setAttribute( "border-style", getBorderType( borderCode ) );
-            element.setAttribute( "border-color",
-                    getColor( borderCode.getColor() ) );
-            element.setAttribute( "border-width", getBorderWidth( borderCode ) );
-        }
-        else
-        {
-            element.setAttribute( "border-" + where + "-style",
-                    getBorderType( borderCode ) );
-            element.setAttribute( "border-" + where + "-color",
-                    getColor( borderCode.getColor() ) );
-            element.setAttribute( "border-" + where + "-width",
-                    getBorderWidth( borderCode ) );
-        }
-    }
-
-    public static void setCharactersProperties(
-            final CharacterRun characterRun, final Element inline )
-    {
-        final CharacterProperties clonedProperties = characterRun
-                .cloneProperties();
-        StringBuilder textDecorations = new StringBuilder();
-
-        setBorder( inline, clonedProperties.getBrc(), EMPTY );
-
-        if ( characterRun.isCapitalized() )
-        {
-            inline.setAttribute( "text-transform", "uppercase" );
-        }
-        if ( characterRun.isHighlighted() )
-        {
-            inline.setAttribute( "background-color",
-                    getColor( clonedProperties.getIcoHighlight() ) );
-        }
-        if ( characterRun.isStrikeThrough() )
-        {
-            if ( textDecorations.length() > 0 )
-                textDecorations.append( " " );
-            textDecorations.append( "line-through" );
-        }
-        if ( characterRun.isShadowed() )
-        {
-            inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
-                    + "pt" );
-        }
-        if ( characterRun.isSmallCaps() )
-        {
-            inline.setAttribute( "font-variant", "small-caps" );
-        }
-        if ( characterRun.getSubSuperScriptIndex() == 1 )
-        {
-            inline.setAttribute( "baseline-shift", "super" );
-            inline.setAttribute( "font-size", "smaller" );
-        }
-        if ( characterRun.getSubSuperScriptIndex() == 2 )
-        {
-            inline.setAttribute( "baseline-shift", "sub" );
-            inline.setAttribute( "font-size", "smaller" );
-        }
-        if ( characterRun.getUnderlineCode() > 0 )
-        {
-            if ( textDecorations.length() > 0 )
-                textDecorations.append( " " );
-            textDecorations.append( "underline" );
-        }
-        if ( characterRun.isVanished() )
-        {
-            inline.setAttribute( "visibility", "hidden" );
-        }
-        if ( textDecorations.length() > 0 )
-        {
-            inline.setAttribute( "text-decoration", textDecorations.toString() );
-        }
-    }
-
-    public static void setFontFamily( final Element element,
-            final String fontFamily )
-    {
-        if ( isEmpty( fontFamily ) )
-            return;
-
-        element.setAttribute( "font-family", fontFamily );
-    }
-
-    public static void setFontSize( final Element element, final int fontSize )
-    {
-        element.setAttribute( "font-size", String.valueOf( fontSize ) );
-    }
-
-    public static void setIndent( Paragraph paragraph, Element block )
-    {
-        if ( paragraph.getFirstLineIndent() != 0 )
-        {
-            block.setAttribute(
-                    "text-indent",
-                    String.valueOf( paragraph.getFirstLineIndent()
-                            / TWIPS_PER_PT )
-                            + "pt" );
-        }
-        if ( paragraph.getIndentFromLeft() != 0 )
-        {
-            block.setAttribute(
-                    "start-indent",
-                    String.valueOf( paragraph.getIndentFromLeft()
-                            / TWIPS_PER_PT )
-                            + "pt" );
-        }
-        if ( paragraph.getIndentFromRight() != 0 )
-        {
-            block.setAttribute(
-                    "end-indent",
-                    String.valueOf( paragraph.getIndentFromRight()
-                            / TWIPS_PER_PT )
-                            + "pt" );
-        }
-        if ( paragraph.getSpacingBefore() != 0 )
-        {
-            block.setAttribute(
-                    "space-before",
-                    String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
-                            + "pt" );
-        }
-        if ( paragraph.getSpacingAfter() != 0 )
-        {
-            block.setAttribute( "space-after",
-                    String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
-                            + "pt" );
-        }
-    }
-
-    public static void setItalic( final Element element, final boolean italic )
-    {
-        element.setAttribute( "font-style", italic ? "italic" : "normal" );
-    }
-
-    public static void setJustification( Paragraph paragraph,
-            final Element element )
-    {
-        String justification = getJustification( paragraph.getJustification() );
-        if ( isNotEmpty( justification ) )
-            element.setAttribute( "text-align", justification );
-    }
-
-    public static void setParagraphProperties( Paragraph paragraph,
-            Element block )
-    {
-        setIndent( paragraph, block );
-        setJustification( paragraph, block );
-
-        setBorder( block, paragraph.getBottomBorder(), "bottom" );
-        setBorder( block, paragraph.getLeftBorder(), "left" );
-        setBorder( block, paragraph.getRightBorder(), "right" );
-        setBorder( block, paragraph.getTopBorder(), "top" );
-
-        if ( paragraph.pageBreakBefore() )
-        {
-            block.setAttribute( "break-before", "page" );
-        }
-
-        block.setAttribute( "hyphenate",
-                String.valueOf( paragraph.isAutoHyphenated() ) );
-
-        if ( paragraph.keepOnPage() )
-        {
-            block.setAttribute( "keep-together.within-page", "always" );
-        }
-
-        if ( paragraph.keepWithNext() )
-        {
-            block.setAttribute( "keep-with-next.within-page", "always" );
-        }
-
-        block.setAttribute( "linefeed-treatment", "preserve" );
-        block.setAttribute( "white-space-collapse", "false" );
-    }
-
-    public static void setPictureProperties( Picture picture,
-            Element graphicElement )
-    {
-        final int aspectRatioX = picture.getAspectRatioX();
-        final int aspectRatioY = picture.getAspectRatioY();
-
-        if ( aspectRatioX > 0 )
-        {
-            graphicElement
-                    .setAttribute( "content-width", ( ( picture.getDxaGoal()
-                            * aspectRatioX / 100 ) / TWIPS_PER_PT )
-                            + "pt" );
-        }
-        else
-            graphicElement.setAttribute( "content-width",
-                    ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
-
-        if ( aspectRatioY > 0 )
-            graphicElement
-                    .setAttribute( "content-height", ( ( picture.getDyaGoal()
-                            * aspectRatioY / 100 ) / TWIPS_PER_PT )
-                            + "pt" );
-        else
-            graphicElement.setAttribute( "content-height",
-                    ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
-
-        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
-        {
-            graphicElement.setAttribute( "scaling", "uniform" );
-        }
-        else
-        {
-            graphicElement.setAttribute( "scaling", "non-uniform" );
-        }
-
-        graphicElement.setAttribute( "vertical-align", "text-bottom" );
-
-        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
-                || picture.getDyaCropBottom() != 0
-                || picture.getDxaCropLeft() != 0 )
-        {
-            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
-            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
-            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
-            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
-            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
-                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
-                    + "pt)" );
-            graphicElement.setAttribute( "oveerflow", "hidden" );
-        }
-    }
-
-    public static void setTableCellProperties( TableRow tableRow,
-            TableCell tableCell, Element element, boolean toppest,
-            boolean bottomest, boolean leftest, boolean rightest )
-    {
-        element.setAttribute( "width", ( tableCell.getWidth() / TWIPS_PER_INCH )
-                + "in" );
-        element.setAttribute( "padding-start",
-                ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
-        element.setAttribute( "padding-end",
-                ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
-
-        BorderCode top = tableCell.getBrcTop() != null
-                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
-                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
-                .getHorizontalBorder();
-        BorderCode bottom = tableCell.getBrcBottom() != null
-                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
-                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
-                : tableRow.getHorizontalBorder();
-
-        BorderCode left = tableCell.getBrcLeft() != null
-                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
-                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
-                .getVerticalBorder();
-        BorderCode right = tableCell.getBrcRight() != null
-                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
-                .getBrcRight() : rightest ? tableRow.getRightBorder()
-                : tableRow.getVerticalBorder();
-
-        setBorder( element, bottom, "bottom" );
-        setBorder( element, left, "left" );
-        setBorder( element, right, "right" );
-        setBorder( element, top, "top" );
-    }
-
-    public static void setTableRowProperties( TableRow tableRow,
-            Element tableRowElement )
-    {
-        if ( tableRow.getRowHeight() > 0 )
-        {
-            tableRowElement.setAttribute( "height",
-                    ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in" );
-        }
-        if ( !tableRow.cantSplit() )
-        {
-            tableRowElement.setAttribute( "keep-together", "always" );
-        }
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlExtractor.java
deleted file mode 100644 (file)
index 6f27e44..0000000
+++ /dev/null
@@ -1,475 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.util.List;
-import java.util.Stack;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.SectionProperties;
-import org.apache.poi.hwpf.usermodel.Table;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH;
-
-/**
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class WordToHtmlExtractor extends AbstractWordExtractor
-{
-
-    /**
-     * Holds properties values, applied to current <tt>p</tt> element. Those
-     * properties shall not be doubled in children <tt>span</tt> elements.
-     */
-    private static class BlockProperies
-    {
-        final String pFontName;
-        final int pFontSize;
-
-        public BlockProperies( String pFontName, int pFontSize )
-        {
-            this.pFontName = pFontName;
-            this.pFontSize = pFontSize;
-        }
-    }
-
-    private static final POILogger logger = POILogFactory
-            .getLogger( WordToHtmlExtractor.class );
-
-    private static String getSectionStyle( Section section )
-    {
-        SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
-
-        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
-        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
-        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
-        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
-
-        String style = "margin: " + topMargin + "in " + rightMargin + "in "
-                + bottomMargin + "in " + leftMargin + "in; ";
-
-        if ( sep.getCcolM1() > 0 )
-        {
-            style += "column-count: " + ( sep.getCcolM1() + 1 ) + "; ";
-            if ( sep.getFEvenlySpaced() )
-            {
-                style += "column-gap: "
-                        + ( sep.getDxaColumns() / TWIPS_PER_INCH ) + "in; ";
-            }
-            else
-            {
-                style += "column-gap: 0.25in; ";
-            }
-        }
-        return style;
-    }
-
-    /**
-     * Java main() interface to interact with WordToHtmlExtractor
-     * 
-     * <p>
-     * Usage: WordToHtmlExtractor infile outfile
-     * </p>
-     * Where infile is an input .doc file ( Word 95-2007) which will be rendered
-     * as HTML into outfile
-     */
-    public static void main( String[] args )
-    {
-        if ( args.length < 2 )
-        {
-            System.err
-                    .println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" );
-            return;
-        }
-
-        System.out.println( "Converting " + args[0] );
-        System.out.println( "Saving output to " + args[1] );
-        try
-        {
-            Document doc = WordToHtmlExtractor.process( new File( args[0] ) );
-
-            FileWriter out = new FileWriter( args[1] );
-            DOMSource domSource = new DOMSource( doc );
-            StreamResult streamResult = new StreamResult( out );
-
-            TransformerFactory tf = TransformerFactory.newInstance();
-            Transformer serializer = tf.newTransformer();
-            // TODO set encoding from a command argument
-            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
-            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
-            serializer.setOutputProperty( OutputKeys.METHOD, "html" );
-            serializer.transform( domSource, streamResult );
-            out.close();
-        }
-        catch ( Exception e )
-        {
-            e.printStackTrace();
-        }
-    }
-
-    static Document process( File docFile ) throws Exception
-    {
-        final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
-        WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
-                DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                        .newDocument() );
-        wordToHtmlExtractor.processDocument( wordDocument );
-        return wordToHtmlExtractor.getDocument();
-    }
-
-    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
-
-    private final HtmlDocumentFacade htmlDocumentFacade;
-
-    /**
-     * Creates new instance of {@link WordToHtmlExtractor}. Can be used for
-     * output several {@link HWPFDocument}s into single HTML document.
-     * 
-     * @param document
-     *            XML DOM Document used as HTML document
-     */
-    public WordToHtmlExtractor( Document document )
-    {
-        this.htmlDocumentFacade = new HtmlDocumentFacade( document );
-    }
-
-    public Document getDocument()
-    {
-        return htmlDocumentFacade.getDocument();
-    }
-
-    @Override
-    protected void outputCharacters( Element pElement,
-            CharacterRun characterRun, String text )
-    {
-        Element span = htmlDocumentFacade.document.createElement( "span" );
-        pElement.appendChild( span );
-
-        StringBuilder style = new StringBuilder();
-        BlockProperies blockProperies = this.blocksProperies.peek();
-        if ( characterRun.getFontName() != null
-                && !WordToHtmlUtils.equals( characterRun.getFontName(),
-                        blockProperies.pFontName ) )
-        {
-            style.append( "font-family: " + characterRun.getFontName() + "; " );
-        }
-        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
-        {
-            style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
-        }
-
-        WordToHtmlUtils.addCharactersProperties( characterRun, style );
-        if ( style.length() != 0 )
-            span.setAttribute( "style", style.toString() );
-
-        Text textNode = htmlDocumentFacade.createText( text );
-        span.appendChild( textNode );
-    }
-
-    protected void processHyperlink( HWPFDocumentCore wordDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String hyperlink, int beginTextInclusive, int endTextExclusive )
-    {
-        Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
-        currentBlock.appendChild( basicLink );
-
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( wordDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
-    }
-
-    /**
-     * This method shall store image bytes in external file and convert it if
-     * necessary. Images shall be stored using PNG format. Other formats may be
-     * not supported by user browser.
-     * <p>
-     * Please note the
-     * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
-     * 
-     * @param currentBlock
-     *            currently processed HTML element, like <tt>p</tt>. Shall be
-     *            used as parent of newly created <tt>img</tt>
-     * @param inlined
-     *            if image is inlined
-     * @param picture
-     *            HWPF object, contained picture data and properties
-     */
-    protected void processImage( Element currentBlock, boolean inlined,
-            Picture picture )
-    {
-        // no default implementation -- skip
-        currentBlock.appendChild( htmlDocumentFacade.document
-                .createComment( "Image link to '"
-                        + picture.suggestFullFileName() + "' can be here" ) );
-    }
-
-    protected void processPageref( HWPFDocumentCore hwpfDocument,
-            Element currentBlock, Paragraph paragraph,
-            List<CharacterRun> characterRuns, int currentTableLevel,
-            String pageref, int beginTextInclusive, int endTextExclusive )
-    {
-        Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
-        currentBlock.appendChild( basicLink );
-
-        if ( beginTextInclusive < endTextExclusive )
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    basicLink, characterRuns, beginTextInclusive,
-                    endTextExclusive );
-    }
-
-    protected void processParagraph( HWPFDocumentCore hwpfDocument,
-            Element parentFopElement, int currentTableLevel,
-            Paragraph paragraph, String bulletText )
-    {
-        final Element pElement = htmlDocumentFacade.createParagraph();
-        parentFopElement.appendChild( pElement );
-
-        StringBuilder style = new StringBuilder();
-        WordToHtmlUtils.addParagraphProperties( paragraph, style );
-
-        final int charRuns = paragraph.numCharacterRuns();
-
-        if ( charRuns == 0 )
-        {
-            return;
-        }
-
-        {
-            final String pFontName;
-            final int pFontSize;
-            final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
-            if ( characterRun != null )
-            {
-                pFontSize = characterRun.getFontSize() / 2;
-                pFontName = characterRun.getFontName();
-                WordToHtmlUtils.addFontFamily( pFontName, style );
-                WordToHtmlUtils.addFontSize( pFontSize, style );
-            }
-            else
-            {
-                pFontSize = -1;
-                pFontName = WordToHtmlUtils.EMPTY;
-            }
-            blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
-        }
-        try
-        {
-            if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
-            {
-                Text textNode = htmlDocumentFacade.createText( bulletText );
-                pElement.appendChild( textNode );
-            }
-
-            List<CharacterRun> characterRuns = WordToHtmlUtils
-                    .findCharacterRuns( paragraph );
-            processCharacters( hwpfDocument, currentTableLevel, paragraph,
-                    pElement, characterRuns, 0, characterRuns.size() );
-        }
-        finally
-        {
-            blocksProperies.pop();
-        }
-
-        if ( style.length() > 0 )
-            pElement.setAttribute( "style", style.toString() );
-
-        return;
-    }
-
-    protected void processSection( HWPFDocumentCore wordDocument,
-            Section section, int sectionCounter )
-    {
-        Element div = htmlDocumentFacade.document.createElement( "div" );
-        div.setAttribute( "style", getSectionStyle( section ) );
-        htmlDocumentFacade.body.appendChild( div );
-
-        processSectionParagraphes( wordDocument, div, section, 0 );
-    }
-
-    @Override
-    protected void processSingleSection( HWPFDocumentCore wordDocument,
-            Section section )
-    {
-        htmlDocumentFacade.body.setAttribute( "style",
-                getSectionStyle( section ) );
-
-        processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
-                section, 0 );
-    }
-
-    protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
-            Table table, int thisTableLevel )
-    {
-        Element tableHeader = htmlDocumentFacade.createTableHeader();
-        Element tableBody = htmlDocumentFacade.createTableBody();
-
-        final int tableRows = table.numRows();
-
-        int maxColumns = Integer.MIN_VALUE;
-        for ( int r = 0; r < tableRows; r++ )
-        {
-            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
-        }
-
-        for ( int r = 0; r < tableRows; r++ )
-        {
-            TableRow tableRow = table.getRow( r );
-
-            Element tableRowElement = htmlDocumentFacade.createTableRow();
-            StringBuilder tableRowStyle = new StringBuilder();
-            WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
-
-            final int rowCells = tableRow.numCells();
-            for ( int c = 0; c < rowCells; c++ )
-            {
-                TableCell tableCell = tableRow.getCell( c );
-
-                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
-                    continue;
-
-                if ( tableCell.isVerticallyMerged()
-                        && !tableCell.isFirstVerticallyMerged() )
-                    continue;
-
-                Element tableCellElement;
-                if ( tableRow.isTableHeader() )
-                {
-                    tableCellElement = htmlDocumentFacade
-                            .createTableHeaderCell();
-                }
-                else
-                {
-                    tableCellElement = htmlDocumentFacade.createTableCell();
-                }
-                StringBuilder tableCellStyle = new StringBuilder();
-                WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
-                        r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
-                        tableCellStyle );
-
-                if ( tableCell.isFirstMerged() )
-                {
-                    int count = 0;
-                    for ( int c1 = c; c1 < rowCells; c1++ )
-                    {
-                        TableCell nextCell = tableRow.getCell( c1 );
-                        if ( nextCell.isMerged() )
-                            count++;
-                        if ( !nextCell.isMerged() )
-                            break;
-                    }
-                    tableCellElement.setAttribute( "colspan", "" + count );
-                }
-                else
-                {
-                    if ( c == rowCells - 1 && c != maxColumns - 1 )
-                    {
-                        tableCellElement.setAttribute( "colspan", ""
-                                + ( maxColumns - c ) );
-                    }
-                }
-
-                if ( tableCell.isFirstVerticallyMerged() )
-                {
-                    int count = 0;
-                    for ( int r1 = r; r1 < tableRows; r1++ )
-                    {
-                        TableRow nextRow = table.getRow( r1 );
-                        if ( nextRow.numCells() < c )
-                            break;
-                        TableCell nextCell = nextRow.getCell( c );
-                        if ( nextCell.isVerticallyMerged() )
-                            count++;
-                        if ( !nextCell.isVerticallyMerged() )
-                            break;
-                    }
-                    tableCellElement.setAttribute( "rowspan", "" + count );
-                }
-
-                processSectionParagraphes( hwpfDocument, tableCellElement,
-                        tableCell, thisTableLevel );
-
-                if ( !tableCellElement.hasChildNodes() )
-                {
-                    tableCellElement.appendChild( htmlDocumentFacade
-                            .createParagraph() );
-                }
-                if ( tableCellStyle.length() > 0 )
-                    tableCellElement.setAttribute( "style",
-                            tableCellStyle.toString() );
-
-                tableRowElement.appendChild( tableCellElement );
-            }
-
-            if ( tableRowStyle.length() > 0 )
-                tableRowElement
-                        .setAttribute( "style", tableRowStyle.toString() );
-
-            if ( tableRow.isTableHeader() )
-            {
-                tableHeader.appendChild( tableRowElement );
-            }
-            else
-            {
-                tableBody.appendChild( tableRowElement );
-            }
-
-        }
-
-        final Element tableElement = htmlDocumentFacade.createTable();
-        if ( tableHeader.hasChildNodes() )
-        {
-            tableElement.appendChild( tableHeader );
-        }
-        if ( tableBody.hasChildNodes() )
-        {
-            tableElement.appendChild( tableBody );
-            flow.appendChild( tableElement );
-        }
-        else
-        {
-            logger.log(
-                    POILogger.WARN,
-                    "Table without body starting on offset "
-                            + table.getStartOffset() + " -- "
-                            + table.getEndOffset() );
-        }
-    }
-
-}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToHtmlUtils.java
deleted file mode 100644 (file)
index 4417f62..0000000
+++ /dev/null
@@ -1,292 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterProperties;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.w3c.dom.Element;
-
-public class WordToHtmlUtils extends AbstractWordUtils
-{
-    public static void addBold( final boolean bold, StringBuilder style )
-    {
-        style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" );
-    }
-
-    public static void addBorder( BorderCode borderCode, String where,
-            StringBuilder style )
-    {
-        if ( borderCode == null || borderCode.getBorderType() == 0 )
-            return;
-
-        if ( isEmpty( where ) )
-        {
-            style.append( "border-style: " + getBorderType( borderCode ) + "; " );
-            style.append( "border-color: " + getColor( borderCode.getColor() )
-                    + "; " );
-            style.append( "border-width: " + getBorderWidth( borderCode )
-                    + "; " );
-        }
-        else
-        {
-            style.append( "border-" + where + "-style: "
-                    + getBorderType( borderCode ) + "; " );
-            style.append( "border-" + where + "-color: "
-                    + getColor( borderCode.getColor() ) + "; " );
-            style.append( "border-" + where + "-width: "
-                    + getBorderWidth( borderCode ) + "; " );
-        }
-    }
-
-    public static void addCharactersProperties(
-            final CharacterRun characterRun, StringBuilder style )
-    {
-        final CharacterProperties clonedProperties = characterRun
-                .cloneProperties();
-
-        if ( characterRun.isBold() )
-        {
-            style.append( "font-weight: bold; " );
-        }
-        if ( characterRun.isItalic() )
-        {
-            style.append( "font-style: italic; " );
-        }
-
-        addBorder( clonedProperties.getBrc(), EMPTY, style );
-
-        if ( characterRun.isCapitalized() )
-        {
-            style.append( "text-transform: uppercase; " );
-        }
-        if ( characterRun.isHighlighted() )
-        {
-            style.append( "background-color: "
-                    + getColor( clonedProperties.getIcoHighlight() ) + "; " );
-        }
-        if ( characterRun.isStrikeThrough() )
-        {
-            style.append( "text-decoration: line-through; " );
-        }
-        if ( characterRun.isShadowed() )
-        {
-            style.append( "text-shadow: " + characterRun.getFontSize() / 24
-                    + "pt; " );
-        }
-        if ( characterRun.isSmallCaps() )
-        {
-            style.append( "font-variant: small-caps; " );
-        }
-        if ( characterRun.getSubSuperScriptIndex() == 1 )
-        {
-            style.append( "baseline-shift: super; " );
-            style.append( "font-size: smaller; " );
-        }
-        if ( characterRun.getSubSuperScriptIndex() == 2 )
-        {
-            style.append( "baseline-shift: sub; " );
-            style.append( "font-size: smaller; " );
-        }
-        if ( characterRun.getUnderlineCode() > 0 )
-        {
-            style.append( "text-decoration: underline; " );
-        }
-        if ( characterRun.isVanished() )
-        {
-            style.append( "visibility: hidden; " );
-        }
-    }
-
-    public static void addFontFamily( final String fontFamily,
-            StringBuilder style )
-    {
-        if ( isEmpty( fontFamily ) )
-            return;
-
-        style.append( "font-family: " + fontFamily );
-    }
-
-    public static void addFontSize( final int fontSize, StringBuilder style )
-    {
-        style.append( "font-size: " + fontSize );
-    }
-
-    public static void addIndent( Paragraph paragraph, StringBuilder style )
-    {
-        addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
-        addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
-        addIndent( style, "end-indent", paragraph.getIndentFromRight() );
-        addIndent( style, "space-before", paragraph.getSpacingBefore() );
-        addIndent( style, "space-after", paragraph.getSpacingAfter() );
-    }
-
-    private static void addIndent( StringBuilder style, final String cssName,
-            final int twipsValue )
-    {
-        if ( twipsValue == 0 )
-            return;
-
-        style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " );
-    }
-
-    public static void addJustification( Paragraph paragraph,
-            final StringBuilder style )
-    {
-        String justification = getJustification( paragraph.getJustification() );
-        if ( isNotEmpty( justification ) )
-            style.append( "text-align: " + justification + "; " );
-    }
-
-    public static void addParagraphProperties( Paragraph paragraph,
-            StringBuilder style )
-    {
-        addIndent( paragraph, style );
-        addJustification( paragraph, style );
-
-        addBorder( paragraph.getBottomBorder(), "bottom", style );
-        addBorder( paragraph.getLeftBorder(), "left", style );
-        addBorder( paragraph.getRightBorder(), "right", style );
-        addBorder( paragraph.getTopBorder(), "top", style );
-
-        if ( paragraph.pageBreakBefore() )
-        {
-            style.append( "break-before: page; " );
-        }
-
-        style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
-
-        if ( paragraph.keepOnPage() )
-        {
-            style.append( "keep-together.within-page: always; " );
-        }
-
-        if ( paragraph.keepWithNext() )
-        {
-            style.append( "keep-with-next.within-page: always; " );
-        }
-
-        style.append( "linefeed-treatment: preserve; " );
-        style.append( "white-space-collapse: false; " );
-    }
-
-    public static void addTableCellProperties( TableRow tableRow,
-            TableCell tableCell, boolean toppest, boolean bottomest,
-            boolean leftest, boolean rightest, StringBuilder style )
-    {
-        style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH )
-                + "in; " );
-        style.append( "padding-start: "
-                + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
-        style.append( "padding-end: "
-                + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
-
-        BorderCode top = tableCell.getBrcTop() != null
-                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
-                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
-                .getHorizontalBorder();
-        BorderCode bottom = tableCell.getBrcBottom() != null
-                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
-                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
-                : tableRow.getHorizontalBorder();
-
-        BorderCode left = tableCell.getBrcLeft() != null
-                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
-                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
-                .getVerticalBorder();
-        BorderCode right = tableCell.getBrcRight() != null
-                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
-                .getBrcRight() : rightest ? tableRow.getRightBorder()
-                : tableRow.getVerticalBorder();
-
-        addBorder( bottom, "bottom", style );
-        addBorder( left, "left", style );
-        addBorder( right, "right", style );
-        addBorder( top, "top", style );
-    }
-
-    public static void addTableRowProperties( TableRow tableRow,
-            StringBuilder style )
-    {
-        if ( tableRow.getRowHeight() > 0 )
-        {
-            style.append( "height: "
-                    + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " );
-        }
-        if ( !tableRow.cantSplit() )
-        {
-            style.append( "keep-together: always; " );
-        }
-    }
-
-    public static void setPictureProperties( Picture picture,
-            Element graphicElement )
-    {
-        final int aspectRatioX = picture.getAspectRatioX();
-        final int aspectRatioY = picture.getAspectRatioY();
-
-        if ( aspectRatioX > 0 )
-        {
-            graphicElement
-                    .setAttribute( "content-width", ( ( picture.getDxaGoal()
-                            * aspectRatioX / 100 ) / TWIPS_PER_PT )
-                            + "pt" );
-        }
-        else
-            graphicElement.setAttribute( "content-width",
-                    ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
-
-        if ( aspectRatioY > 0 )
-            graphicElement
-                    .setAttribute( "content-height", ( ( picture.getDyaGoal()
-                            * aspectRatioY / 100 ) / TWIPS_PER_PT )
-                            + "pt" );
-        else
-            graphicElement.setAttribute( "content-height",
-                    ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
-
-        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
-        {
-            graphicElement.setAttribute( "scaling", "uniform" );
-        }
-        else
-        {
-            graphicElement.setAttribute( "scaling", "non-uniform" );
-        }
-
-        graphicElement.setAttribute( "vertical-align", "text-bottom" );
-
-        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
-                || picture.getDyaCropBottom() != 0
-                || picture.getDxaCropLeft() != 0 )
-        {
-            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
-            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
-            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
-            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
-            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
-                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
-                    + "pt)" );
-            graphicElement.setAttribute( "oveerflow", "hidden" );
-        }
-    }
-
-}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java
new file mode 100644 (file)
index 0000000..570c8d1
--- /dev/null
@@ -0,0 +1,114 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.List;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+
+public class TestWordToConverterSuite
+{
+    /**
+     * YK: a quick hack to exclude failing documents from the suite.
+     */
+    private static List<String> failingFiles = Arrays.asList();
+
+    public static Test suite()
+    {
+        TestSuite suite = new TestSuite();
+
+        File directory = POIDataSamples.getDocumentInstance().getFile(
+                "../document" );
+        for ( final File child : directory.listFiles( new FilenameFilter()
+        {
+            public boolean accept( File dir, String name )
+            {
+                return name.endsWith( ".doc" ) && !failingFiles.contains( name );
+            }
+        } ) )
+        {
+            final String name = child.getName();
+
+            suite.addTest( new TestCase( name + " [FO]" )
+            {
+                public void runTest() throws Exception
+                {
+                    test( child, false );
+                }
+            } );
+            suite.addTest( new TestCase( name + " [HTML]" )
+            {
+                public void runTest() throws Exception
+                {
+                    test( child, true );
+                }
+            } );
+
+        }
+
+        return suite;
+    }
+
+    protected static void test( File child, boolean html ) throws Exception
+    {
+        HWPFDocumentCore hwpfDocument;
+        try
+        {
+            hwpfDocument = AbstractWordUtils.loadDoc( child );
+        }
+        catch ( Exception exc )
+        {
+            // unable to parse file -- not WordToFoConverter fault
+            return;
+        }
+
+        WordToFoConverter wordToFoConverter = new WordToFoConverter(
+                DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                        .newDocument() );
+        wordToFoConverter.processDocument( hwpfDocument );
+
+        StringWriter stringWriter = new StringWriter();
+
+        Transformer transformer = TransformerFactory.newInstance()
+                .newTransformer();
+        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+        transformer.transform(
+                new DOMSource( wordToFoConverter.getDocument() ),
+                new StreamResult( stringWriter ) );
+
+        if ( html )
+            transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+
+        // no exceptions
+    }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToFoConverter.java
new file mode 100644 (file)
index 0000000..3332198
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+package org.apache.poi.hwpf.converter;
+
+import java.io.StringWriter;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.TestCase;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocument;
+
+/**
+ * Test cases for {@link WordToFoConverter}
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class TestWordToFoConverter extends TestCase
+{
+    private static String getFoText( final String sampleFileName )
+            throws Exception
+    {
+        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
+                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
+
+        WordToFoConverter wordToFoConverter = new WordToFoConverter(
+                DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                        .newDocument() );
+        wordToFoConverter.processDocument( hwpfDocument );
+
+        StringWriter stringWriter = new StringWriter();
+
+        Transformer transformer = TransformerFactory.newInstance()
+                .newTransformer();
+        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+        transformer.transform(
+                new DOMSource( wordToFoConverter.getDocument() ),
+                new StreamResult( stringWriter ) );
+
+        String result = stringWriter.toString();
+        return result;
+    }
+
+    public void testEquation() throws Exception
+    {
+        final String sampleFileName = "equation.doc";
+        String result = getFoText( sampleFileName );
+
+        assertTrue( result
+                .contains( "<!--Image link to '0.emf' can be here-->" ) );
+    }
+
+    public void testHyperlink() throws Exception
+    {
+        final String sampleFileName = "hyperlink.doc";
+        String result = getFoText( sampleFileName );
+
+        assertTrue( result
+                .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
+        assertTrue( result.contains( "Hyperlink text" ) );
+    }
+
+    public void testPageref() throws Exception
+    {
+        final String sampleFileName = "pageref.doc";
+        String result = getFoText( sampleFileName );
+
+        System.out.println( result );
+
+        assertTrue( result
+                .contains( "<fo:basic-link internal-destination=\"userref\">" ) );
+        assertTrue( result.contains( "1" ) );
+    }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java
new file mode 100644 (file)
index 0000000..890bce6
--- /dev/null
@@ -0,0 +1,95 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.StringWriter;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.TestCase;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocument;
+
+/**
+ * Test cases for {@link WordToFoConverter}
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class TestWordToHtmlConverter extends TestCase
+{
+    private static String getHtmlText( final String sampleFileName )
+            throws Exception
+    {
+        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
+                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
+
+        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+                DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                        .newDocument() );
+        wordToHtmlConverter.processDocument( hwpfDocument );
+
+        StringWriter stringWriter = new StringWriter();
+
+        Transformer transformer = TransformerFactory.newInstance()
+                .newTransformer();
+        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+        transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+        transformer.transform(
+                new DOMSource( wordToHtmlConverter.getDocument() ),
+                new StreamResult( stringWriter ) );
+
+        String result = stringWriter.toString();
+        return result;
+    }
+
+    public void testBug46610_2() throws Exception
+    {
+        String result = getHtmlText( "Bug46610_2.doc" );
+        assertTrue( result
+                .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
+    }
+
+    public void testEquation() throws Exception
+    {
+        String result = getHtmlText( "equation.doc" );
+
+        assertTrue( result
+                .contains( "<!--Image link to '0.emf' can be here-->" ) );
+    }
+
+    public void testHyperlink() throws Exception
+    {
+        String result = getHtmlText( "hyperlink.doc" );
+
+        assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
+        assertTrue( result.contains( "Hyperlink text" ) );
+    }
+
+    public void testPageref() throws Exception
+    {
+        String result = getHtmlText( "pageref.doc" );
+
+        assertTrue( result.contains( "<a href=\"#userref\">" ) );
+        assertTrue( result.contains( "1" ) );
+    }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToExtractorSuite.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToExtractorSuite.java
deleted file mode 100644 (file)
index 62cfb99..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.StringWriter;
-import java.util.Arrays;
-import java.util.List;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-
-public class TestWordToExtractorSuite
-{
-    /**
-     * YK: a quick hack to exclude failing documents from the suite.
-     */
-    private static List<String> failingFiles = Arrays.asList();
-
-    public static Test suite()
-    {
-        TestSuite suite = new TestSuite();
-
-        File directory = POIDataSamples.getDocumentInstance().getFile(
-                "../document" );
-        for ( final File child : directory.listFiles( new FilenameFilter()
-        {
-            public boolean accept( File dir, String name )
-            {
-                return name.endsWith( ".doc" ) && !failingFiles.contains( name );
-            }
-        } ) )
-        {
-            final String name = child.getName();
-
-            suite.addTest( new TestCase( name + " [FO]" )
-            {
-                public void runTest() throws Exception
-                {
-                    test( child, false );
-                }
-            } );
-            suite.addTest( new TestCase( name + " [HTML]" )
-            {
-                public void runTest() throws Exception
-                {
-                    test( child, true );
-                }
-            } );
-
-        }
-
-        return suite;
-    }
-
-    protected static void test( File child, boolean html ) throws Exception
-    {
-        HWPFDocumentCore hwpfDocument;
-        try
-        {
-            hwpfDocument = AbstractWordUtils.loadDoc( child );
-        }
-        catch ( Exception exc )
-        {
-            // unable to parse file -- not WordToFoExtractor fault
-            return;
-        }
-
-        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
-                DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                        .newDocument() );
-        wordToFoExtractor.processDocument( hwpfDocument );
-
-        StringWriter stringWriter = new StringWriter();
-
-        Transformer transformer = TransformerFactory.newInstance()
-                .newTransformer();
-        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
-        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
-        transformer.transform(
-                new DOMSource( wordToFoExtractor.getDocument() ),
-                new StreamResult( stringWriter ) );
-
-        if ( html )
-            transformer.setOutputProperty( OutputKeys.METHOD, "html" );
-
-        // no exceptions
-    }
-}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractor.java
deleted file mode 100644 (file)
index 8bcd5bb..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  ====================================================================
- *    Licensed to the Apache Software Foundation (ASF) under one or more
- *    contributor license agreements.  See the NOTICE file distributed with
- *    this work for additional information regarding copyright ownership.
- *    The ASF licenses this file to You under the Apache License, Version 2.0
- *    (the "License"); you may not use this file except in compliance with
- *    the License.  You may obtain a copy of the License at
- *
- *        http://www.apache.org/licenses/LICENSE-2.0
- *
- *    Unless required by applicable law or agreed to in writing, software
- *    distributed under the License is distributed on an "AS IS" BASIS,
- *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *    See the License for the specific language governing permissions and
- *    limitations under the License.
- * ====================================================================
- */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.StringWriter;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import junit.framework.TestCase;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hwpf.HWPFDocument;
-
-/**
- * Test cases for {@link WordToFoExtractor}
- * 
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class TestWordToFoExtractor extends TestCase
-{
-    private static String getFoText( final String sampleFileName )
-            throws Exception
-    {
-        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
-                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
-
-        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
-                DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                        .newDocument() );
-        wordToFoExtractor.processDocument( hwpfDocument );
-
-        StringWriter stringWriter = new StringWriter();
-
-        Transformer transformer = TransformerFactory.newInstance()
-                .newTransformer();
-        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
-        transformer.transform(
-                new DOMSource( wordToFoExtractor.getDocument() ),
-                new StreamResult( stringWriter ) );
-
-        String result = stringWriter.toString();
-        return result;
-    }
-
-    public void testHyperlink() throws Exception
-    {
-        final String sampleFileName = "hyperlink.doc";
-        String result = getFoText( sampleFileName );
-
-        assertTrue( result
-                .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
-        assertTrue( result.contains( "Hyperlink text" ) );
-    }
-
-    public void testEquation() throws Exception
-    {
-        final String sampleFileName = "equation.doc";
-        String result = getFoText( sampleFileName );
-
-        assertTrue( result
-                .contains( "<!--Image link to '0.emf' can be here-->" ) );
-    }
-
-    public void testPageref() throws Exception
-    {
-        final String sampleFileName = "pageref.doc";
-        String result = getFoText( sampleFileName );
-
-        System.out.println( result );
-
-        assertTrue( result
-                .contains( "<fo:basic-link internal-destination=\"userref\">" ) );
-        assertTrue( result.contains( "1" ) );
-    }
-}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToHtmlExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordToHtmlExtractor.java
deleted file mode 100644 (file)
index f758e6f..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.StringWriter;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import junit.framework.TestCase;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hwpf.HWPFDocument;
-
-/**
- * Test cases for {@link WordToFoExtractor}
- * 
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class TestWordToHtmlExtractor extends TestCase
-{
-    private static String getHtmlText( final String sampleFileName )
-            throws Exception
-    {
-        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
-                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
-
-        WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
-                DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                        .newDocument() );
-        wordToHtmlExtractor.processDocument( hwpfDocument );
-
-        StringWriter stringWriter = new StringWriter();
-
-        Transformer transformer = TransformerFactory.newInstance()
-                .newTransformer();
-        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
-        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
-        transformer.setOutputProperty( OutputKeys.METHOD, "html" );
-        transformer.transform(
-                new DOMSource( wordToHtmlExtractor.getDocument() ),
-                new StreamResult( stringWriter ) );
-
-        String result = stringWriter.toString();
-        return result;
-    }
-
-    public void testBug46610_2() throws Exception
-    {
-        String result = getHtmlText( "Bug46610_2.doc" );
-        assertTrue( result
-                .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
-    }
-
-    public void testEquation() throws Exception
-    {
-        String result = getHtmlText( "equation.doc" );
-
-        assertTrue( result
-                .contains( "<!--Image link to '0.emf' can be here-->" ) );
-    }
-
-    public void testHyperlink() throws Exception
-    {
-        String result = getHtmlText( "hyperlink.doc" );
-
-        assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
-        assertTrue( result.contains( "Hyperlink text" ) );
-    }
-
-    public void testPageref() throws Exception
-    {
-        String result = getHtmlText( "pageref.doc" );
-
-        assertTrue( result.contains( "<a href=\"#userref\">" ) );
-        assertTrue( result.contains( "1" ) );
-    }
-}