--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.model.ListFormatOverride;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+public abstract class AbstractWordConverter
+{
+ private static final byte BEL_MARK = 7;
+
+ private static final byte FIELD_BEGIN_MARK = 19;
+
+ private static final byte FIELD_END_MARK = 21;
+
+ private static final byte FIELD_SEPARATOR_MARK = 20;
+
+ private static final POILogger logger = POILogFactory
+ .getLogger( AbstractWordConverter.class );
+
+ public abstract Document getDocument();
+
+ protected abstract void outputCharacters( Element block,
+ CharacterRun characterRun, String text );
+
+ protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
+ int currentTableLevel, Paragraph paragraph, final Element block,
+ List<CharacterRun> characterRuns, final int start, final int end )
+ {
+ boolean haveAnyText = false;
+
+ for ( int c = start; c < end; c++ )
+ {
+ CharacterRun characterRun = characterRuns.get( c );
+
+ if ( characterRun == null )
+ throw new AssertionError();
+
+ if ( hwpfDocument instanceof HWPFDocument
+ && ( (HWPFDocument) hwpfDocument ).getPicturesTable()
+ .hasPicture( characterRun ) )
+ {
+ HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
+ Picture picture = newFormat.getPicturesTable().extractPicture(
+ characterRun, true );
+
+ processImage( block, characterRun.text().charAt( 0 ) == 0x01,
+ picture );
+ continue;
+ }
+
+ String text = characterRun.text();
+ if ( text.getBytes().length == 0 )
+ continue;
+
+ if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
+ {
+ int skipTo = tryField( hwpfDocument, paragraph,
+ currentTableLevel, characterRuns, c, block );
+
+ if ( skipTo != c )
+ {
+ c = skipTo;
+ continue;
+ }
+
+ continue;
+ }
+ if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
+ {
+ // shall not appear without FIELD_BEGIN_MARK
+ continue;
+ }
+ if ( text.getBytes()[0] == FIELD_END_MARK )
+ {
+ // shall not appear without FIELD_BEGIN_MARK
+ continue;
+ }
+
+ if ( characterRun.isSpecialCharacter() || characterRun.isObj()
+ || characterRun.isOle2() )
+ {
+ continue;
+ }
+
+ if ( text.endsWith( "\r" )
+ || ( text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0 ) )
+ text = text.substring( 0, text.length() - 1 );
+
+ outputCharacters( block, characterRun, text );
+
+ haveAnyText |= text.trim().length() != 0;
+ }
+
+ return haveAnyText;
+ }
+
+ public void processDocument( HWPFDocumentCore wordDocument )
+ {
+ final Range range = wordDocument.getRange();
+ for ( int s = 0; s < range.numSections(); s++ )
+ {
+ processSection( wordDocument, range.getSection( s ), s );
+ }
+ }
+
+ protected void processField( HWPFDocumentCore wordDocument,
+ Element currentBlock, Paragraph paragraph, int currentTableLevel,
+ List<CharacterRun> characterRuns, int beginMark, int separatorMark,
+ int endMark )
+ {
+
+ Pattern hyperlinkPattern = Pattern
+ .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
+ Pattern pagerefPattern = Pattern
+ .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
+
+ if ( separatorMark - beginMark > 1 )
+ {
+ int index = beginMark + 1;
+ CharacterRun firstAfterBegin = null;
+ while ( index < separatorMark )
+ {
+ firstAfterBegin = paragraph.getCharacterRun( index );
+ if ( firstAfterBegin == null )
+ {
+ logger.log( POILogger.WARN,
+ "Paragraph " + paragraph.getStartOffset() + "--"
+ + paragraph.getEndOffset()
+ + " contains null CharacterRun #" + index );
+ index++;
+ continue;
+ }
+ break;
+ }
+
+ if ( firstAfterBegin != null )
+ {
+ final Matcher hyperlinkMatcher = hyperlinkPattern
+ .matcher( firstAfterBegin.text() );
+ if ( hyperlinkMatcher.matches() )
+ {
+ String hyperlink = hyperlinkMatcher.group( 1 );
+ processHyperlink( wordDocument, currentBlock, paragraph,
+ characterRuns, currentTableLevel, hyperlink,
+ separatorMark + 1, endMark );
+ return;
+ }
+
+ final Matcher pagerefMatcher = pagerefPattern
+ .matcher( firstAfterBegin.text() );
+ if ( pagerefMatcher.matches() )
+ {
+ String pageref = pagerefMatcher.group( 1 );
+ processPageref( wordDocument, currentBlock, paragraph,
+ characterRuns, currentTableLevel, pageref,
+ separatorMark + 1, endMark );
+ return;
+ }
+ }
+ }
+
+ StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
+ for ( int i = beginMark; i <= endMark; i++ )
+ {
+ debug.append( "\t" );
+ debug.append( paragraph.getCharacterRun( i ) );
+ debug.append( "\n" );
+ }
+ logger.log( POILogger.WARN, debug );
+
+ // just output field value
+ if ( separatorMark + 1 < endMark )
+ processCharacters( wordDocument, currentTableLevel, paragraph,
+ currentBlock, characterRuns, separatorMark + 1, endMark );
+
+ return;
+ }
+
+ protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
+ Element currentBlock, Paragraph paragraph,
+ List<CharacterRun> characterRuns, int currentTableLevel,
+ String hyperlink, int i, int endMark );
+
+ protected abstract void processImage( Element currentBlock,
+ boolean inlined, Picture picture );
+
+ protected abstract void processPageref( HWPFDocumentCore wordDocument,
+ Element currentBlock, Paragraph paragraph,
+ List<CharacterRun> characterRuns, int currentTableLevel,
+ String pageref, int beginTextInclusive, int endTextExclusive );
+
+ protected abstract void processParagraph( HWPFDocumentCore wordDocument,
+ Element parentFopElement, int currentTableLevel,
+ Paragraph paragraph, String bulletText );
+
+ protected abstract void processSection( HWPFDocumentCore wordDocument,
+ Section section, int s );
+
+ protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
+ Element flow, Range range, int currentTableLevel )
+ {
+ final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
+ for ( TableIterator tableIterator = AbstractWordUtils.newTableIterator(
+ range, currentTableLevel + 1 ); tableIterator.hasNext(); )
+ {
+ Table next = tableIterator.next();
+ allTables.put( Integer.valueOf( next.getStartOffset() ), next );
+ }
+
+ final ListTables listTables = wordDocument.getListTables();
+ int currentListInfo = 0;
+
+ final int paragraphs = range.numParagraphs();
+ for ( int p = 0; p < paragraphs; p++ )
+ {
+ Paragraph paragraph = range.getParagraph( p );
+
+ if ( allTables.containsKey( Integer.valueOf( paragraph
+ .getStartOffset() ) ) )
+ {
+ Table table = allTables.get( Integer.valueOf( paragraph
+ .getStartOffset() ) );
+ processTable( wordDocument, flow, table, currentTableLevel + 1 );
+ continue;
+ }
+
+ if ( paragraph.isInTable()
+ && paragraph.getTableLevel() != currentTableLevel )
+ {
+ continue;
+ }
+
+ if ( paragraph.getIlfo() != currentListInfo )
+ {
+ currentListInfo = paragraph.getIlfo();
+ }
+
+ if ( currentListInfo != 0 )
+ {
+ if ( listTables != null )
+ {
+ final ListFormatOverride listFormatOverride = listTables
+ .getOverride( paragraph.getIlfo() );
+
+ String label = AbstractWordUtils.getBulletText( listTables,
+ paragraph, listFormatOverride.getLsid() );
+
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, label );
+ }
+ else
+ {
+ logger.log( POILogger.WARN,
+ "Paragraph #" + paragraph.getStartOffset() + "-"
+ + paragraph.getEndOffset()
+ + " has reference to list structure #"
+ + currentListInfo
+ + ", but listTables not defined in file" );
+
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, AbstractWordUtils.EMPTY );
+ }
+ }
+ else
+ {
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, AbstractWordUtils.EMPTY );
+ }
+ }
+
+ }
+
+ protected void processSingleSection( HWPFDocumentCore wordDocument,
+ Section section )
+ {
+ processSection( wordDocument, section, 0 );
+ }
+
+ protected abstract void processTable( HWPFDocumentCore wordDocument,
+ Element flow, Table table, int newTableLevel );
+
+ protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
+ int currentTableLevel, List<CharacterRun> characterRuns,
+ int beginMark, Element currentBlock )
+ {
+ int separatorMark = -1;
+ int endMark = -1;
+ for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
+ {
+ CharacterRun characterRun = paragraph.getCharacterRun( c );
+
+ String text = characterRun.text();
+ if ( text.getBytes().length == 0 )
+ continue;
+
+ if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
+ {
+ if ( separatorMark != -1 )
+ {
+ // double;
+ return beginMark;
+ }
+
+ separatorMark = c;
+ continue;
+ }
+
+ if ( text.getBytes()[0] == FIELD_END_MARK )
+ {
+ if ( endMark != -1 )
+ {
+ // double;
+ return beginMark;
+ }
+
+ endMark = c;
+ break;
+ }
+
+ }
+
+ if ( separatorMark == -1 || endMark == -1 )
+ return beginMark;
+
+ processField( wordDocument, currentBlock, paragraph, currentTableLevel,
+ characterRuns, beginMark, separatorMark, endMark );
+
+ return endMark;
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.HWPFOldDocument;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.hwpf.model.CHPX;
+import org.apache.poi.hwpf.model.ListLevel;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+public class AbstractWordUtils
+{
+ static final String EMPTY = "";
+
+ private static final POILogger logger = POILogFactory
+ .getLogger( AbstractWordUtils.class );
+
+ public static final float TWIPS_PER_INCH = 1440.0f;
+ public static final int TWIPS_PER_PT = 20;
+
+ static void closeQuietly( final Closeable closeable )
+ {
+ try
+ {
+ closeable.close();
+ }
+ catch ( Exception exc )
+ {
+ logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
+ exc );
+ }
+ }
+
+ static boolean equals( String str1, String str2 )
+ {
+ return str1 == null ? str2 == null : str1.equals( str2 );
+ }
+
+ // XXX incorporate into Range
+ static List<CharacterRun> findCharacterRuns( Range range )
+ {
+ final int min = range.getStartOffset();
+ final int max = range.getEndOffset();
+
+ List<CharacterRun> result = new ArrayList<CharacterRun>();
+ List<CHPX> chpxs = getCharacters( range );
+ for ( int i = 0; i < chpxs.size(); i++ )
+ {
+ CHPX chpx = chpxs.get( i );
+ if ( chpx == null )
+ continue;
+
+ if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
+ chpx.getEnd() ) )
+ {
+ final CharacterRun characterRun = getCharacterRun( range, chpx );
+
+ if ( characterRun == null )
+ continue;
+
+ result.add( characterRun );
+ }
+ }
+
+ return result;
+ }
+
+ public static String getBorderType( BorderCode borderCode )
+ {
+ if ( borderCode == null )
+ throw new IllegalArgumentException( "borderCode is null" );
+
+ switch ( borderCode.getBorderType() )
+ {
+ case 1:
+ case 2:
+ return "solid";
+ case 3:
+ return "double";
+ case 5:
+ return "solid";
+ case 6:
+ return "dotted";
+ case 7:
+ case 8:
+ return "dashed";
+ case 9:
+ return "dotted";
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ case 17:
+ case 18:
+ case 19:
+ return "double";
+ case 20:
+ return "solid";
+ case 21:
+ return "double";
+ case 22:
+ return "dashed";
+ case 23:
+ return "dashed";
+ case 24:
+ return "ridge";
+ case 25:
+ return "grooved";
+ default:
+ return "solid";
+ }
+ }
+
+ public static String getBorderWidth( BorderCode borderCode )
+ {
+ int lineWidth = borderCode.getLineWidth();
+ int pt = lineWidth / 8;
+ int pte = lineWidth - pt * 8;
+
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append( pt );
+ stringBuilder.append( "." );
+ stringBuilder.append( 1000 / 8 * pte );
+ stringBuilder.append( "pt" );
+ return stringBuilder.toString();
+ }
+
+ public static String getBulletText( ListTables listTables,
+ Paragraph paragraph, int listId )
+ {
+ final ListLevel listLevel = listTables.getLevel( listId,
+ paragraph.getIlvl() );
+
+ if ( listLevel.getNumberText() == null )
+ return EMPTY;
+
+ StringBuffer bulletBuffer = new StringBuffer();
+ char[] xst = listLevel.getNumberText().toCharArray();
+ for ( char element : xst )
+ {
+ if ( element < 9 )
+ {
+ ListLevel numLevel = listTables.getLevel( listId, element );
+
+ int num = numLevel.getStartAt();
+ bulletBuffer.append( NumberFormatter.getNumber( num,
+ listLevel.getNumberFormat() ) );
+
+ if ( numLevel == listLevel )
+ {
+ numLevel.setStartAt( numLevel.getStartAt() + 1 );
+ }
+
+ }
+ else
+ {
+ bulletBuffer.append( element );
+ }
+ }
+
+ byte follow = getIxchFollow( listLevel );
+ switch ( follow )
+ {
+ case 0:
+ bulletBuffer.append( "\t" );
+ break;
+ case 1:
+ bulletBuffer.append( " " );
+ break;
+ default:
+ break;
+ }
+
+ return bulletBuffer.toString();
+ }
+
+ private static CharacterRun getCharacterRun( Range range, CHPX chpx )
+ {
+ try
+ {
+ Method method = Range.class.getDeclaredMethod( "getCharacterRun",
+ CHPX.class );
+ method.setAccessible( true );
+ return (CharacterRun) method.invoke( range, chpx );
+ }
+ catch ( Exception exc )
+ {
+ throw new Error( exc );
+ }
+ }
+
+ private static List<CHPX> getCharacters( Range range )
+ {
+ try
+ {
+ Field field = Range.class.getDeclaredField( "_characters" );
+ field.setAccessible( true );
+ return (List<CHPX>) field.get( range );
+ }
+ catch ( Exception exc )
+ {
+ throw new Error( exc );
+ }
+ }
+
+ public static String getColor( int ico )
+ {
+ switch ( ico )
+ {
+ case 1:
+ return "black";
+ case 2:
+ return "blue";
+ case 3:
+ return "cyan";
+ case 4:
+ return "green";
+ case 5:
+ return "magenta";
+ case 6:
+ return "red";
+ case 7:
+ return "yellow";
+ case 8:
+ return "white";
+ case 9:
+ return "darkblue";
+ case 10:
+ return "darkcyan";
+ case 11:
+ return "darkgreen";
+ case 12:
+ return "darkmagenta";
+ case 13:
+ return "darkred";
+ case 14:
+ return "darkyellow";
+ case 15:
+ return "darkgray";
+ case 16:
+ return "lightgray";
+ default:
+ return "black";
+ }
+ }
+
+ public static byte getIxchFollow( ListLevel listLevel )
+ {
+ try
+ {
+ Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
+ field.setAccessible( true );
+ return ( (Byte) field.get( listLevel ) ).byteValue();
+ }
+ catch ( Exception exc )
+ {
+ throw new Error( exc );
+ }
+ }
+
+ public static String getJustification( int js )
+ {
+ switch ( js )
+ {
+ case 0:
+ return "start";
+ case 1:
+ return "center";
+ case 2:
+ return "end";
+ case 3:
+ case 4:
+ return "justify";
+ case 5:
+ return "center";
+ case 6:
+ return "left";
+ case 7:
+ return "start";
+ case 8:
+ return "end";
+ case 9:
+ return "justify";
+ }
+ return "";
+ }
+
+ public static String getListItemNumberLabel( int number, int format )
+ {
+
+ if ( format != 0 )
+ System.err.println( "NYI: toListItemNumberLabel(): " + format );
+
+ return String.valueOf( number );
+ }
+
+ public static SectionProperties getSectionProperties( Section section )
+ {
+ try
+ {
+ Field field = Section.class.getDeclaredField( "_props" );
+ field.setAccessible( true );
+ return (SectionProperties) field.get( section );
+ }
+ catch ( Exception exc )
+ {
+ throw new Error( exc );
+ }
+ }
+
+ static boolean isEmpty( String str )
+ {
+ return str == null || str.length() == 0;
+ }
+
+ static boolean isNotEmpty( String str )
+ {
+ return !isEmpty( str );
+ }
+
+ public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
+ {
+ final FileInputStream istream = new FileInputStream( docFile );
+ try
+ {
+ return loadDoc( istream );
+ }
+ finally
+ {
+ closeQuietly( istream );
+ }
+ }
+
+ public static HWPFDocumentCore loadDoc( InputStream inputStream )
+ throws IOException
+ {
+ final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
+ .verifyAndBuildPOIFS( inputStream );
+ try
+ {
+ return new HWPFDocument( poifsFileSystem );
+ }
+ catch ( OldWordFileFormatException exc )
+ {
+ return new HWPFOldDocument( poifsFileSystem );
+ }
+ }
+
+ public static TableIterator newTableIterator( Range range, int level )
+ {
+ try
+ {
+ Constructor<TableIterator> constructor = TableIterator.class
+ .getDeclaredConstructor( Range.class, int.class );
+ constructor.setAccessible( true );
+ return constructor.newInstance( range, Integer.valueOf( level ) );
+ }
+ catch ( Exception exc )
+ {
+ throw new Error( exc );
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+public class FoDocumentFacade
+{
+ private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
+
+ protected final Document document;
+ protected final Element layoutMasterSet;
+ protected final Element root;
+
+ public FoDocumentFacade( Document document )
+ {
+ this.document = document;
+
+ root = document.createElementNS( NS_XSLFO, "fo:root" );
+ document.appendChild( root );
+
+ layoutMasterSet = document.createElementNS( NS_XSLFO,
+ "fo:layout-master-set" );
+ root.appendChild( layoutMasterSet );
+ }
+
+ public Element addFlowToPageSequence( final Element pageSequence,
+ String flowName )
+ {
+ final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
+ flow.setAttribute( "flow-name", flowName );
+ pageSequence.appendChild( flow );
+
+ return flow;
+ }
+
+ public Element addListItem( Element listBlock )
+ {
+ Element result = createListItem();
+ listBlock.appendChild( result );
+ return result;
+ }
+
+ public Element addListItemBody( Element listItem )
+ {
+ Element result = createListItemBody();
+ listItem.appendChild( result );
+ return result;
+ }
+
+ public Element addListItemLabel( Element listItem, String text )
+ {
+ Element result = createListItemLabel( text );
+ listItem.appendChild( result );
+ return result;
+ }
+
+ public Element addPageSequence( String pageMaster )
+ {
+ final Element pageSequence = document.createElementNS( NS_XSLFO,
+ "fo:page-sequence" );
+ pageSequence.setAttribute( "master-reference", pageMaster );
+ root.appendChild( pageSequence );
+ return pageSequence;
+ }
+
+ public Element addRegionBody( Element pageMaster )
+ {
+ final Element regionBody = document.createElementNS( NS_XSLFO,
+ "fo:region-body" );
+ pageMaster.appendChild( regionBody );
+
+ return regionBody;
+ }
+
+ public Element addSimplePageMaster( String masterName )
+ {
+ final Element simplePageMaster = document.createElementNS( NS_XSLFO,
+ "fo:simple-page-master" );
+ simplePageMaster.setAttribute( "master-name", masterName );
+ layoutMasterSet.appendChild( simplePageMaster );
+
+ return simplePageMaster;
+ }
+
+ protected Element createBasicLinkExternal( String externalDestination )
+ {
+ final Element basicLink = document.createElementNS( NS_XSLFO,
+ "fo:basic-link" );
+ basicLink.setAttribute( "external-destination", externalDestination );
+ return basicLink;
+ }
+
+ public Element createBasicLinkInternal( String internalDestination )
+ {
+ final Element basicLink = document.createElementNS( NS_XSLFO,
+ "fo:basic-link" );
+ basicLink.setAttribute( "internal-destination", internalDestination );
+ return basicLink;
+ }
+
+ public Element createBlock()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:block" );
+ }
+
+ public Element createExternalGraphic( String source )
+ {
+ Element result = document.createElementNS( NS_XSLFO,
+ "fo:external-graphic" );
+ result.setAttribute( "src", "url('" + source + "')" );
+ return result;
+ }
+
+ public Element createInline()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:inline" );
+ }
+
+ public Element createLeader()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:leader" );
+ }
+
+ public Element createListBlock()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:list-block" );
+ }
+
+ public Element createListItem()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:list-item" );
+ }
+
+ public Element createListItemBody()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
+ }
+
+ public Element createListItemLabel( String text )
+ {
+ Element result = document.createElementNS( NS_XSLFO,
+ "fo:list-item-label" );
+ Element block = createBlock();
+ block.appendChild( document.createTextNode( text ) );
+ result.appendChild( block );
+ return result;
+ }
+
+ protected Element createTable()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:table" );
+ }
+
+ protected Element createTableBody()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:table-body" );
+ }
+
+ protected Element createTableCell()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:table-cell" );
+ }
+
+ protected Element createTableHeader()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:table-header" );
+ }
+
+ protected Element createTableRow()
+ {
+ return document.createElementNS( NS_XSLFO, "fo:table-row" );
+ }
+
+ protected Text createText( String data )
+ {
+ return document.createTextNode( data );
+ }
+
+ public Document getDocument()
+ {
+ return document;
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+public class HtmlDocumentFacade
+{
+
+ protected final Element body;
+ protected final Document document;
+ protected final Element head;
+ protected final Element html;
+
+ public HtmlDocumentFacade( Document document )
+ {
+ this.document = document;
+
+ html = document.createElement( "html" );
+ document.appendChild( html );
+
+ body = document.createElement( "body" );
+ head = document.createElement( "head" );
+
+ html.appendChild( head );
+ html.appendChild( body );
+ }
+
+ public Element createHyperlink( String internalDestination )
+ {
+ final Element basicLink = document.createElement( "a" );
+ basicLink.setAttribute( "href", internalDestination );
+ return basicLink;
+ }
+
+ public Element createListItem()
+ {
+ return document.createElement( "li" );
+ }
+
+ public Element createParagraph()
+ {
+ return document.createElement( "p" );
+ }
+
+ public Element createTable()
+ {
+ return document.createElement( "table" );
+ }
+
+ public Element createTableBody()
+ {
+ return document.createElement( "tbody" );
+ }
+
+ public Element createTableCell()
+ {
+ return document.createElement( "td" );
+ }
+
+ public Element createTableHeader()
+ {
+ return document.createElement( "thead" );
+ }
+
+ public Element createTableHeaderCell()
+ {
+ return document.createElement( "th" );
+ }
+
+ public Element createTableRow()
+ {
+ return document.createElement( "tr" );
+ }
+
+ public Text createText( String data )
+ {
+ return document.createTextNode( data );
+ }
+
+ public Element createUnorderedList()
+ {
+ return document.createElement( "ul" );
+ }
+
+ public Document getDocument()
+ {
+ return document;
+ }
+
+}
--- /dev/null
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.hwpf.converter;
+
+/**
+ * Comment me
+ *
+ * @author Ryan Ackley
+ */
+public final class NumberFormatter
+{
+
+ private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e",
+ "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
+ "s", "t", "u", "v", "x", "y", "z" };
+
+ private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv",
+ "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv",
+ "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii",
+ "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", "xxxi",
+ "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii",
+ "xxxviii", "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv",
+ "xlvi", "xlvii", "xlviii", "xlix", "l" };
+
+ private final static int T_ARABIC = 0;
+ private final static int T_LOWER_LETTER = 4;
+ private final static int T_LOWER_ROMAN = 2;
+ private final static int T_ORDINAL = 5;
+ private final static int T_UPPER_LETTER = 3;
+ private final static int T_UPPER_ROMAN = 1;
+
+ public static String getNumber( int num, int style )
+ {
+ switch ( style )
+ {
+ case T_UPPER_ROMAN:
+ return C_ROMAN[num - 1].toUpperCase();
+ case T_LOWER_ROMAN:
+ return C_ROMAN[num - 1];
+ case T_UPPER_LETTER:
+ return C_LETTERS[num - 1].toUpperCase();
+ case T_LOWER_LETTER:
+ return C_LETTERS[num - 1];
+ case T_ARABIC:
+ case T_ORDINAL:
+ default:
+ return String.valueOf( num );
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.model.ListFormatOverride;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+/**
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class WordToFoConverter extends AbstractWordConverter
+{
+
+ /**
+ * Holds properties values, applied to current <tt>fo:block</tt> element.
+ * Those properties shall not be doubled in children <tt>fo:inline</tt>
+ * elements.
+ */
+ private static class BlockProperies
+ {
+ final boolean pBold;
+ final String pFontName;
+ final int pFontSize;
+ final boolean pItalic;
+
+ public BlockProperies( String pFontName, int pFontSize, boolean pBold,
+ boolean pItalic )
+ {
+ this.pFontName = pFontName;
+ this.pFontSize = pFontSize;
+ this.pBold = pBold;
+ this.pItalic = pItalic;
+ }
+ }
+
+ private static final POILogger logger = POILogFactory
+ .getLogger( WordToFoConverter.class );
+
+ public static String getBorderType( BorderCode borderCode )
+ {
+ if ( borderCode == null )
+ throw new IllegalArgumentException( "borderCode is null" );
+
+ switch ( borderCode.getBorderType() )
+ {
+ case 1:
+ case 2:
+ return "solid";
+ case 3:
+ return "double";
+ case 5:
+ return "solid";
+ case 6:
+ return "dotted";
+ case 7:
+ case 8:
+ return "dashed";
+ case 9:
+ return "dotted";
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ case 17:
+ case 18:
+ case 19:
+ return "double";
+ case 20:
+ return "solid";
+ case 21:
+ return "double";
+ case 22:
+ return "dashed";
+ case 23:
+ return "dashed";
+ case 24:
+ return "ridge";
+ case 25:
+ return "grooved";
+ default:
+ return "solid";
+ }
+ }
+
+ /**
+ * Java main() interface to interact with {@link WordToFoConverter}
+ *
+ * <p>
+ * Usage: WordToFoConverter infile outfile
+ * </p>
+ * Where infile is an input .doc file ( Word 97-2007) which will be rendered
+ * as XSL-FO into outfile
+ */
+ public static void main( String[] args )
+ {
+ if ( args.length < 2 )
+ {
+ System.err
+ .println( "Usage: WordToFoConverter <inputFile.doc> <saveTo.fo>" );
+ return;
+ }
+
+ System.out.println( "Converting " + args[0] );
+ System.out.println( "Saving output to " + args[1] );
+ try
+ {
+ Document doc = WordToFoConverter.process( new File( args[0] ) );
+
+ FileWriter out = new FileWriter( args[1] );
+ DOMSource domSource = new DOMSource( doc );
+ StreamResult streamResult = new StreamResult( out );
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer serializer = tf.newTransformer();
+ // TODO set encoding from a command argument
+ serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
+ serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ serializer.transform( domSource, streamResult );
+ out.close();
+ }
+ catch ( Exception e )
+ {
+ e.printStackTrace();
+ }
+ }
+
+ static Document process( File docFile ) throws Exception
+ {
+ final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
+ WordToFoConverter wordToFoConverter = new WordToFoConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToFoConverter.processDocument( hwpfDocument );
+ return wordToFoConverter.getDocument();
+ }
+
+ private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
+
+ protected final FoDocumentFacade foDocumentFacade;
+
+ /**
+ * Creates new instance of {@link WordToFoConverter}. Can be used for output
+ * several {@link HWPFDocument}s into single FO document.
+ *
+ * @param document
+ * XML DOM Document used as XSL FO document. Shall support
+ * namespaces
+ */
+ public WordToFoConverter( Document document )
+ {
+ this.foDocumentFacade = new FoDocumentFacade( document );
+ }
+
+ protected String createPageMaster( SectionProperties sep, String type,
+ int section )
+ {
+ float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
+ float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
+ float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
+ float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
+ float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
+ float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
+
+ // add these to the header
+ String pageMasterName = type + "-page" + section;
+
+ Element pageMaster = foDocumentFacade
+ .addSimplePageMaster( pageMasterName );
+ pageMaster.setAttribute( "page-height", height + "in" );
+ pageMaster.setAttribute( "page-width", width + "in" );
+
+ Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
+ regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
+ + "in " + bottomMargin + "in " + leftMargin + "in" );
+
+ /*
+ * 6.4.14 fo:region-body
+ *
+ * The values of the padding and border-width traits must be "0".
+ */
+ // WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
+ // WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
+ // WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
+ // WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
+
+ if ( sep.getCcolM1() > 0 )
+ {
+ regionBody.setAttribute( "column-count", ""
+ + ( sep.getCcolM1() + 1 ) );
+ if ( sep.getFEvenlySpaced() )
+ {
+ regionBody.setAttribute( "column-gap",
+ ( sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH )
+ + "in" );
+ }
+ else
+ {
+ regionBody.setAttribute( "column-gap", "0.25in" );
+ }
+ }
+
+ return pageMasterName;
+ }
+
+ public Document getDocument()
+ {
+ return foDocumentFacade.getDocument();
+ }
+
+ @Override
+ protected void outputCharacters( Element block, CharacterRun characterRun,
+ String text )
+ {
+ BlockProperies blockProperies = this.blocksProperies.peek();
+ Element inline = foDocumentFacade.createInline();
+ if ( characterRun.isBold() != blockProperies.pBold )
+ {
+ WordToFoUtils.setBold( inline, characterRun.isBold() );
+ }
+ if ( characterRun.isItalic() != blockProperies.pItalic )
+ {
+ WordToFoUtils.setItalic( inline, characterRun.isItalic() );
+ }
+ if ( characterRun.getFontName() != null
+ && !AbstractWordUtils.equals( characterRun.getFontName(),
+ blockProperies.pFontName ) )
+ {
+ WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
+ }
+ if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
+ {
+ WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
+ }
+ WordToFoUtils.setCharactersProperties( characterRun, inline );
+ block.appendChild( inline );
+
+ Text textNode = foDocumentFacade.createText( text );
+ inline.appendChild( textNode );
+ }
+
+ protected void processHyperlink( HWPFDocumentCore hwpfDocument,
+ Element currentBlock, Paragraph paragraph,
+ List<CharacterRun> characterRuns, int currentTableLevel,
+ String hyperlink, int beginTextInclusive, int endTextExclusive )
+ {
+ Element basicLink = foDocumentFacade
+ .createBasicLinkExternal( hyperlink );
+ currentBlock.appendChild( basicLink );
+
+ if ( beginTextInclusive < endTextExclusive )
+ processCharacters( hwpfDocument, currentTableLevel, paragraph,
+ basicLink, characterRuns, beginTextInclusive,
+ endTextExclusive );
+ }
+
+ /**
+ * This method shall store image bytes in external file and convert it if
+ * necessary. Images shall be stored using PNG format (for bitmap) or SVG
+ * (for vector). Other formats may be not supported by your XSL FO
+ * processor.
+ * <p>
+ * Please note the
+ * {@link WordToFoUtils#setPictureProperties(Picture, Element)} method.
+ *
+ * @param currentBlock
+ * currently processed FO element, like <tt>fo:block</tt>. Shall
+ * be used as parent of newly created
+ * <tt>fo:external-graphic</tt> or
+ * <tt>fo:instream-foreign-object</tt>
+ * @param inlined
+ * if image is inlined
+ * @param picture
+ * HWPF object, contained picture data and properties
+ */
+ protected void processImage( Element currentBlock, boolean inlined,
+ Picture picture )
+ {
+ // no default implementation -- skip
+ currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
+ "Image link to '" + picture.suggestFullFileName()
+ + "' can be here" ) );
+ }
+
+ protected void processPageref( HWPFDocumentCore hwpfDocument,
+ Element currentBlock, Paragraph paragraph,
+ List<CharacterRun> characterRuns, int currentTableLevel,
+ String pageref, int beginTextInclusive, int endTextExclusive )
+ {
+ Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
+ currentBlock.appendChild( basicLink );
+
+ if ( beginTextInclusive < endTextExclusive )
+ processCharacters( hwpfDocument, currentTableLevel, paragraph,
+ basicLink, characterRuns, beginTextInclusive,
+ endTextExclusive );
+ }
+
+ protected void processParagraph( HWPFDocumentCore hwpfDocument,
+ Element parentFopElement, int currentTableLevel,
+ Paragraph paragraph, String bulletText )
+ {
+ final Element block = foDocumentFacade.createBlock();
+ parentFopElement.appendChild( block );
+
+ WordToFoUtils.setParagraphProperties( paragraph, block );
+
+ final int charRuns = paragraph.numCharacterRuns();
+
+ if ( charRuns == 0 )
+ {
+ return;
+ }
+
+ {
+ final String pFontName;
+ final int pFontSize;
+ final boolean pBold;
+ final boolean pItalic;
+ {
+ CharacterRun characterRun = paragraph.getCharacterRun( 0 );
+ pFontSize = characterRun.getFontSize() / 2;
+ pFontName = characterRun.getFontName();
+ pBold = characterRun.isBold();
+ pItalic = characterRun.isItalic();
+ }
+ WordToFoUtils.setFontFamily( block, pFontName );
+ WordToFoUtils.setFontSize( block, pFontSize );
+ WordToFoUtils.setBold( block, pBold );
+ WordToFoUtils.setItalic( block, pItalic );
+
+ blocksProperies.push( new BlockProperies( pFontName, pFontSize,
+ pBold, pItalic ) );
+ }
+ try
+ {
+ boolean haveAnyText = false;
+
+ if ( WordToFoUtils.isNotEmpty( bulletText ) )
+ {
+ Element inline = foDocumentFacade.createInline();
+ block.appendChild( inline );
+
+ Text textNode = foDocumentFacade.createText( bulletText );
+ inline.appendChild( textNode );
+
+ haveAnyText |= bulletText.trim().length() != 0;
+ }
+
+ List<CharacterRun> characterRuns = WordToFoUtils
+ .findCharacterRuns( paragraph );
+ haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
+ paragraph, block, characterRuns, 0, characterRuns.size() );
+
+ if ( !haveAnyText )
+ {
+ Element leader = foDocumentFacade.createLeader();
+ block.appendChild( leader );
+ }
+ }
+ finally
+ {
+ blocksProperies.pop();
+ }
+
+ return;
+ }
+
+ protected void processSection( HWPFDocumentCore wordDocument,
+ Section section, int sectionCounter )
+ {
+ String regularPage = createPageMaster(
+ WordToFoUtils.getSectionProperties( section ), "page",
+ sectionCounter );
+
+ Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
+ Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
+ "xsl-region-body" );
+
+ processSectionParagraphes( wordDocument, flow, section, 0 );
+ }
+
+ protected void processSectionParagraphes( HWPFDocument wordDocument,
+ Element flow, Range range, int currentTableLevel )
+ {
+ final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
+ for ( TableIterator tableIterator = WordToFoUtils.newTableIterator(
+ range, currentTableLevel + 1 ); tableIterator.hasNext(); )
+ {
+ Table next = tableIterator.next();
+ allTables.put( Integer.valueOf( next.getStartOffset() ), next );
+ }
+
+ final ListTables listTables = wordDocument.getListTables();
+ int currentListInfo = 0;
+
+ final int paragraphs = range.numParagraphs();
+ for ( int p = 0; p < paragraphs; p++ )
+ {
+ Paragraph paragraph = range.getParagraph( p );
+
+ if ( allTables.containsKey( Integer.valueOf( paragraph
+ .getStartOffset() ) ) )
+ {
+ Table table = allTables.get( Integer.valueOf( paragraph
+ .getStartOffset() ) );
+ processTable( wordDocument, flow, table, currentTableLevel + 1 );
+ continue;
+ }
+
+ if ( paragraph.isInTable()
+ && paragraph.getTableLevel() != currentTableLevel )
+ {
+ continue;
+ }
+
+ if ( paragraph.getIlfo() != currentListInfo )
+ {
+ currentListInfo = paragraph.getIlfo();
+ }
+
+ if ( currentListInfo != 0 )
+ {
+ if ( listTables != null )
+ {
+ final ListFormatOverride listFormatOverride = listTables
+ .getOverride( paragraph.getIlfo() );
+
+ String label = WordToFoUtils.getBulletText( listTables,
+ paragraph, listFormatOverride.getLsid() );
+
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, label );
+ }
+ else
+ {
+ logger.log( POILogger.WARN,
+ "Paragraph #" + paragraph.getStartOffset() + "-"
+ + paragraph.getEndOffset()
+ + " has reference to list structure #"
+ + currentListInfo
+ + ", but listTables not defined in file" );
+
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, WordToFoUtils.EMPTY );
+ }
+ }
+ else
+ {
+ processParagraph( wordDocument, flow, currentTableLevel,
+ paragraph, WordToFoUtils.EMPTY );
+ }
+ }
+
+ }
+
+ protected void processTable( HWPFDocumentCore wordDocument, Element flow,
+ Table table, int thisTableLevel )
+ {
+ Element tableHeader = foDocumentFacade.createTableHeader();
+ Element tableBody = foDocumentFacade.createTableBody();
+
+ final int tableRows = table.numRows();
+
+ int maxColumns = Integer.MIN_VALUE;
+ for ( int r = 0; r < tableRows; r++ )
+ {
+ maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
+ }
+
+ for ( int r = 0; r < tableRows; r++ )
+ {
+ TableRow tableRow = table.getRow( r );
+
+ Element tableRowElement = foDocumentFacade.createTableRow();
+ WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
+
+ final int rowCells = tableRow.numCells();
+ for ( int c = 0; c < rowCells; c++ )
+ {
+ TableCell tableCell = tableRow.getCell( c );
+
+ if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
+ continue;
+
+ if ( tableCell.isVerticallyMerged()
+ && !tableCell.isFirstVerticallyMerged() )
+ continue;
+
+ Element tableCellElement = foDocumentFacade.createTableCell();
+ WordToFoUtils.setTableCellProperties( tableRow, tableCell,
+ tableCellElement, r == 0, r == tableRows - 1, c == 0,
+ c == rowCells - 1 );
+
+ if ( tableCell.isFirstMerged() )
+ {
+ int count = 0;
+ for ( int c1 = c; c1 < rowCells; c1++ )
+ {
+ TableCell nextCell = tableRow.getCell( c1 );
+ if ( nextCell.isMerged() )
+ count++;
+ if ( !nextCell.isMerged() )
+ break;
+ }
+ tableCellElement.setAttribute( "number-columns-spanned", ""
+ + count );
+ }
+ else
+ {
+ if ( c == rowCells - 1 && c != maxColumns - 1 )
+ {
+ tableCellElement.setAttribute(
+ "number-columns-spanned", ""
+ + ( maxColumns - c ) );
+ }
+ }
+
+ if ( tableCell.isFirstVerticallyMerged() )
+ {
+ int count = 0;
+ for ( int r1 = r; r1 < tableRows; r1++ )
+ {
+ TableRow nextRow = table.getRow( r1 );
+ if ( nextRow.numCells() < c )
+ break;
+ TableCell nextCell = nextRow.getCell( c );
+ if ( nextCell.isVerticallyMerged() )
+ count++;
+ if ( !nextCell.isVerticallyMerged() )
+ break;
+ }
+ tableCellElement.setAttribute( "number-rows-spanned", ""
+ + count );
+ }
+
+ processSectionParagraphes( wordDocument, tableCellElement,
+ tableCell, thisTableLevel );
+
+ if ( !tableCellElement.hasChildNodes() )
+ {
+ tableCellElement.appendChild( foDocumentFacade
+ .createBlock() );
+ }
+
+ tableRowElement.appendChild( tableCellElement );
+ }
+
+ if ( tableRow.isTableHeader() )
+ {
+ tableHeader.appendChild( tableRowElement );
+ }
+ else
+ {
+ tableBody.appendChild( tableRowElement );
+ }
+ }
+
+ final Element tableElement = foDocumentFacade.createTable();
+ if ( tableHeader.hasChildNodes() )
+ {
+ tableElement.appendChild( tableHeader );
+ }
+ if ( tableBody.hasChildNodes() )
+ {
+ tableElement.appendChild( tableBody );
+ flow.appendChild( tableElement );
+ }
+ else
+ {
+ logger.log(
+ POILogger.WARN,
+ "Table without body starting on offset "
+ + table.getStartOffset() + " -- "
+ + table.getEndOffset() );
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterProperties;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.w3c.dom.Element;
+
+public class WordToFoUtils extends AbstractWordUtils
+{
+ public static void setBold( final Element element, final boolean bold )
+ {
+ element.setAttribute( "font-weight", bold ? "bold" : "normal" );
+ }
+
+ public static void setBorder( Element element, BorderCode borderCode,
+ String where )
+ {
+ if ( element == null )
+ throw new IllegalArgumentException( "element is null" );
+
+ if ( borderCode == null || borderCode.getBorderType() == 0 )
+ return;
+
+ if ( isEmpty( where ) )
+ {
+ element.setAttribute( "border-style", getBorderType( borderCode ) );
+ element.setAttribute( "border-color",
+ getColor( borderCode.getColor() ) );
+ element.setAttribute( "border-width", getBorderWidth( borderCode ) );
+ }
+ else
+ {
+ element.setAttribute( "border-" + where + "-style",
+ getBorderType( borderCode ) );
+ element.setAttribute( "border-" + where + "-color",
+ getColor( borderCode.getColor() ) );
+ element.setAttribute( "border-" + where + "-width",
+ getBorderWidth( borderCode ) );
+ }
+ }
+
+ public static void setCharactersProperties(
+ final CharacterRun characterRun, final Element inline )
+ {
+ final CharacterProperties clonedProperties = characterRun
+ .cloneProperties();
+ StringBuilder textDecorations = new StringBuilder();
+
+ setBorder( inline, clonedProperties.getBrc(), EMPTY );
+
+ if ( characterRun.isCapitalized() )
+ {
+ inline.setAttribute( "text-transform", "uppercase" );
+ }
+ if ( characterRun.isHighlighted() )
+ {
+ inline.setAttribute( "background-color",
+ getColor( clonedProperties.getIcoHighlight() ) );
+ }
+ if ( characterRun.isStrikeThrough() )
+ {
+ if ( textDecorations.length() > 0 )
+ textDecorations.append( " " );
+ textDecorations.append( "line-through" );
+ }
+ if ( characterRun.isShadowed() )
+ {
+ inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
+ + "pt" );
+ }
+ if ( characterRun.isSmallCaps() )
+ {
+ inline.setAttribute( "font-variant", "small-caps" );
+ }
+ if ( characterRun.getSubSuperScriptIndex() == 1 )
+ {
+ inline.setAttribute( "baseline-shift", "super" );
+ inline.setAttribute( "font-size", "smaller" );
+ }
+ if ( characterRun.getSubSuperScriptIndex() == 2 )
+ {
+ inline.setAttribute( "baseline-shift", "sub" );
+ inline.setAttribute( "font-size", "smaller" );
+ }
+ if ( characterRun.getUnderlineCode() > 0 )
+ {
+ if ( textDecorations.length() > 0 )
+ textDecorations.append( " " );
+ textDecorations.append( "underline" );
+ }
+ if ( characterRun.isVanished() )
+ {
+ inline.setAttribute( "visibility", "hidden" );
+ }
+ if ( textDecorations.length() > 0 )
+ {
+ inline.setAttribute( "text-decoration", textDecorations.toString() );
+ }
+ }
+
+ public static void setFontFamily( final Element element,
+ final String fontFamily )
+ {
+ if ( isEmpty( fontFamily ) )
+ return;
+
+ element.setAttribute( "font-family", fontFamily );
+ }
+
+ public static void setFontSize( final Element element, final int fontSize )
+ {
+ element.setAttribute( "font-size", String.valueOf( fontSize ) );
+ }
+
+ public static void setIndent( Paragraph paragraph, Element block )
+ {
+ if ( paragraph.getFirstLineIndent() != 0 )
+ {
+ block.setAttribute(
+ "text-indent",
+ String.valueOf( paragraph.getFirstLineIndent()
+ / TWIPS_PER_PT )
+ + "pt" );
+ }
+ if ( paragraph.getIndentFromLeft() != 0 )
+ {
+ block.setAttribute(
+ "start-indent",
+ String.valueOf( paragraph.getIndentFromLeft()
+ / TWIPS_PER_PT )
+ + "pt" );
+ }
+ if ( paragraph.getIndentFromRight() != 0 )
+ {
+ block.setAttribute(
+ "end-indent",
+ String.valueOf( paragraph.getIndentFromRight()
+ / TWIPS_PER_PT )
+ + "pt" );
+ }
+ if ( paragraph.getSpacingBefore() != 0 )
+ {
+ block.setAttribute(
+ "space-before",
+ String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
+ + "pt" );
+ }
+ if ( paragraph.getSpacingAfter() != 0 )
+ {
+ block.setAttribute( "space-after",
+ String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
+ + "pt" );
+ }
+ }
+
+ public static void setItalic( final Element element, final boolean italic )
+ {
+ element.setAttribute( "font-style", italic ? "italic" : "normal" );
+ }
+
+ public static void setJustification( Paragraph paragraph,
+ final Element element )
+ {
+ String justification = getJustification( paragraph.getJustification() );
+ if ( isNotEmpty( justification ) )
+ element.setAttribute( "text-align", justification );
+ }
+
+ public static void setParagraphProperties( Paragraph paragraph,
+ Element block )
+ {
+ setIndent( paragraph, block );
+ setJustification( paragraph, block );
+
+ setBorder( block, paragraph.getBottomBorder(), "bottom" );
+ setBorder( block, paragraph.getLeftBorder(), "left" );
+ setBorder( block, paragraph.getRightBorder(), "right" );
+ setBorder( block, paragraph.getTopBorder(), "top" );
+
+ if ( paragraph.pageBreakBefore() )
+ {
+ block.setAttribute( "break-before", "page" );
+ }
+
+ block.setAttribute( "hyphenate",
+ String.valueOf( paragraph.isAutoHyphenated() ) );
+
+ if ( paragraph.keepOnPage() )
+ {
+ block.setAttribute( "keep-together.within-page", "always" );
+ }
+
+ if ( paragraph.keepWithNext() )
+ {
+ block.setAttribute( "keep-with-next.within-page", "always" );
+ }
+
+ block.setAttribute( "linefeed-treatment", "preserve" );
+ block.setAttribute( "white-space-collapse", "false" );
+ }
+
+ public static void setPictureProperties( Picture picture,
+ Element graphicElement )
+ {
+ final int aspectRatioX = picture.getAspectRatioX();
+ final int aspectRatioY = picture.getAspectRatioY();
+
+ if ( aspectRatioX > 0 )
+ {
+ graphicElement
+ .setAttribute( "content-width", ( ( picture.getDxaGoal()
+ * aspectRatioX / 100 ) / TWIPS_PER_PT )
+ + "pt" );
+ }
+ else
+ graphicElement.setAttribute( "content-width",
+ ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
+
+ if ( aspectRatioY > 0 )
+ graphicElement
+ .setAttribute( "content-height", ( ( picture.getDyaGoal()
+ * aspectRatioY / 100 ) / TWIPS_PER_PT )
+ + "pt" );
+ else
+ graphicElement.setAttribute( "content-height",
+ ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
+
+ if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
+ {
+ graphicElement.setAttribute( "scaling", "uniform" );
+ }
+ else
+ {
+ graphicElement.setAttribute( "scaling", "non-uniform" );
+ }
+
+ graphicElement.setAttribute( "vertical-align", "text-bottom" );
+
+ if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
+ || picture.getDyaCropBottom() != 0
+ || picture.getDxaCropLeft() != 0 )
+ {
+ int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
+ int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
+ int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
+ int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
+ graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
+ + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
+ + "pt)" );
+ graphicElement.setAttribute( "oveerflow", "hidden" );
+ }
+ }
+
+ public static void setTableCellProperties( TableRow tableRow,
+ TableCell tableCell, Element element, boolean toppest,
+ boolean bottomest, boolean leftest, boolean rightest )
+ {
+ element.setAttribute( "width", ( tableCell.getWidth() / TWIPS_PER_INCH )
+ + "in" );
+ element.setAttribute( "padding-start",
+ ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
+ element.setAttribute( "padding-end",
+ ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
+
+ BorderCode top = tableCell.getBrcTop() != null
+ && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
+ .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
+ .getHorizontalBorder();
+ BorderCode bottom = tableCell.getBrcBottom() != null
+ && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
+ .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
+ : tableRow.getHorizontalBorder();
+
+ BorderCode left = tableCell.getBrcLeft() != null
+ && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
+ .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
+ .getVerticalBorder();
+ BorderCode right = tableCell.getBrcRight() != null
+ && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
+ .getBrcRight() : rightest ? tableRow.getRightBorder()
+ : tableRow.getVerticalBorder();
+
+ setBorder( element, bottom, "bottom" );
+ setBorder( element, left, "left" );
+ setBorder( element, right, "right" );
+ setBorder( element, top, "top" );
+ }
+
+ public static void setTableRowProperties( TableRow tableRow,
+ Element tableRowElement )
+ {
+ if ( tableRow.getRowHeight() > 0 )
+ {
+ tableRowElement.setAttribute( "height",
+ ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in" );
+ }
+ if ( !tableRow.cantSplit() )
+ {
+ tableRowElement.setAttribute( "keep-together", "always" );
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.util.List;
+import java.util.Stack;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;
+
+/**
+ * Converts Word files (95-2007) into HTML files.
+ * <p>
+ * This implementation doesn't create images or links to them. This can be
+ * changed by overriding {@link #processImage(Element, boolean, Picture)}
+ * method.
+ *
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class WordToHtmlConverter extends AbstractWordConverter
+{
+
+ /**
+ * Holds properties values, applied to current <tt>p</tt> element. Those
+ * properties shall not be doubled in children <tt>span</tt> elements.
+ */
+ private static class BlockProperies
+ {
+ final String pFontName;
+ final int pFontSize;
+
+ public BlockProperies( String pFontName, int pFontSize )
+ {
+ this.pFontName = pFontName;
+ this.pFontSize = pFontSize;
+ }
+ }
+
+ private static final POILogger logger = POILogFactory
+ .getLogger( WordToHtmlConverter.class );
+
+ private static String getSectionStyle( Section section )
+ {
+ SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
+
+ float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
+ float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
+ float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
+ float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
+
+ String style = "margin: " + topMargin + "in " + rightMargin + "in "
+ + bottomMargin + "in " + leftMargin + "in; ";
+
+ if ( sep.getCcolM1() > 0 )
+ {
+ style += "column-count: " + ( sep.getCcolM1() + 1 ) + "; ";
+ if ( sep.getFEvenlySpaced() )
+ {
+ style += "column-gap: "
+ + ( sep.getDxaColumns() / TWIPS_PER_INCH ) + "in; ";
+ }
+ else
+ {
+ style += "column-gap: 0.25in; ";
+ }
+ }
+ return style;
+ }
+
+ /**
+ * Java main() interface to interact with {@link WordToHtmlConverter}
+ *
+ * <p>
+ * Usage: WordToHtmlConverter infile outfile
+ * </p>
+ * Where infile is an input .doc file ( Word 95-2007) which will be rendered
+ * as HTML into outfile
+ */
+ public static void main( String[] args )
+ {
+ if ( args.length < 2 )
+ {
+ System.err
+ .println( "Usage: WordToHtmlConverter <inputFile.doc> <saveTo.html>" );
+ return;
+ }
+
+ System.out.println( "Converting " + args[0] );
+ System.out.println( "Saving output to " + args[1] );
+ try
+ {
+ Document doc = WordToHtmlConverter.process( new File( args[0] ) );
+
+ FileWriter out = new FileWriter( args[1] );
+ DOMSource domSource = new DOMSource( doc );
+ StreamResult streamResult = new StreamResult( out );
+
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer serializer = tf.newTransformer();
+ // TODO set encoding from a command argument
+ serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
+ serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ serializer.setOutputProperty( OutputKeys.METHOD, "html" );
+ serializer.transform( domSource, streamResult );
+ out.close();
+ }
+ catch ( Exception e )
+ {
+ e.printStackTrace();
+ }
+ }
+
+ static Document process( File docFile ) throws Exception
+ {
+ final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
+ WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToHtmlConverter.processDocument( wordDocument );
+ return wordToHtmlConverter.getDocument();
+ }
+
+ private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
+
+ private final HtmlDocumentFacade htmlDocumentFacade;
+
+ /**
+ * Creates new instance of {@link WordToHtmlConverter}. Can be used for
+ * output several {@link HWPFDocument}s into single HTML document.
+ *
+ * @param document
+ * XML DOM Document used as HTML document
+ */
+ public WordToHtmlConverter( Document document )
+ {
+ this.htmlDocumentFacade = new HtmlDocumentFacade( document );
+ }
+
+ public Document getDocument()
+ {
+ return htmlDocumentFacade.getDocument();
+ }
+
+ @Override
+ protected void outputCharacters( Element pElement,
+ CharacterRun characterRun, String text )
+ {
+ Element span = htmlDocumentFacade.document.createElement( "span" );
+ pElement.appendChild( span );
+
+ StringBuilder style = new StringBuilder();
+ BlockProperies blockProperies = this.blocksProperies.peek();
+ if ( characterRun.getFontName() != null
+ && !WordToHtmlUtils.equals( characterRun.getFontName(),
+ blockProperies.pFontName ) )
+ {
+ style.append( "font-family: " + characterRun.getFontName() + "; " );
+ }
+ if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
+ {
+ style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
+ }
+
+ WordToHtmlUtils.addCharactersProperties( characterRun, style );
+ if ( style.length() != 0 )
+ span.setAttribute( "style", style.toString() );
+
+ Text textNode = htmlDocumentFacade.createText( text );
+ span.appendChild( textNode );
+ }
+
+ protected void processHyperlink( HWPFDocumentCore wordDocument,
+ Element currentBlock, Paragraph paragraph,
+ List<CharacterRun> characterRuns, int currentTableLevel,
+ String hyperlink, int beginTextInclusive, int endTextExclusive )
+ {
+ Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
+ currentBlock.appendChild( basicLink );
+
+ if ( beginTextInclusive < endTextExclusive )
+ processCharacters( wordDocument, currentTableLevel, paragraph,
+ basicLink, characterRuns, beginTextInclusive,
+ endTextExclusive );
+ }
+
+ /**
+ * This method shall store image bytes in external file and convert it if
+ * necessary. Images shall be stored using PNG format. Other formats may be
+ * not supported by user browser.
+ * <p>
+ * Please note the
+ * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
+ *
+ * @param currentBlock
+ * currently processed HTML element, like <tt>p</tt>. Shall be
+ * used as parent of newly created <tt>img</tt>
+ * @param inlined
+ * if image is inlined
+ * @param picture
+ * HWPF object, contained picture data and properties
+ */
+ protected void processImage( Element currentBlock, boolean inlined,
+ Picture picture )
+ {
+ // no default implementation -- skip
+ currentBlock.appendChild( htmlDocumentFacade.document
+ .createComment( "Image link to '"
+ + picture.suggestFullFileName() + "' can be here" ) );
+ }
+
+ protected void processPageref( HWPFDocumentCore hwpfDocument,
+ Element currentBlock, Paragraph paragraph,
+ List<CharacterRun> characterRuns, int currentTableLevel,
+ String pageref, int beginTextInclusive, int endTextExclusive )
+ {
+ Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
+ currentBlock.appendChild( basicLink );
+
+ if ( beginTextInclusive < endTextExclusive )
+ processCharacters( hwpfDocument, currentTableLevel, paragraph,
+ basicLink, characterRuns, beginTextInclusive,
+ endTextExclusive );
+ }
+
+ protected void processParagraph( HWPFDocumentCore hwpfDocument,
+ Element parentFopElement, int currentTableLevel,
+ Paragraph paragraph, String bulletText )
+ {
+ final Element pElement = htmlDocumentFacade.createParagraph();
+ parentFopElement.appendChild( pElement );
+
+ StringBuilder style = new StringBuilder();
+ WordToHtmlUtils.addParagraphProperties( paragraph, style );
+
+ final int charRuns = paragraph.numCharacterRuns();
+
+ if ( charRuns == 0 )
+ {
+ return;
+ }
+
+ {
+ final String pFontName;
+ final int pFontSize;
+ final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
+ if ( characterRun != null )
+ {
+ pFontSize = characterRun.getFontSize() / 2;
+ pFontName = characterRun.getFontName();
+ WordToHtmlUtils.addFontFamily( pFontName, style );
+ WordToHtmlUtils.addFontSize( pFontSize, style );
+ }
+ else
+ {
+ pFontSize = -1;
+ pFontName = WordToHtmlUtils.EMPTY;
+ }
+ blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
+ }
+ try
+ {
+ if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
+ {
+ Text textNode = htmlDocumentFacade.createText( bulletText );
+ pElement.appendChild( textNode );
+ }
+
+ List<CharacterRun> characterRuns = WordToHtmlUtils
+ .findCharacterRuns( paragraph );
+ processCharacters( hwpfDocument, currentTableLevel, paragraph,
+ pElement, characterRuns, 0, characterRuns.size() );
+ }
+ finally
+ {
+ blocksProperies.pop();
+ }
+
+ if ( style.length() > 0 )
+ pElement.setAttribute( "style", style.toString() );
+
+ return;
+ }
+
+ protected void processSection( HWPFDocumentCore wordDocument,
+ Section section, int sectionCounter )
+ {
+ Element div = htmlDocumentFacade.document.createElement( "div" );
+ div.setAttribute( "style", getSectionStyle( section ) );
+ htmlDocumentFacade.body.appendChild( div );
+
+ processSectionParagraphes( wordDocument, div, section, 0 );
+ }
+
+ @Override
+ protected void processSingleSection( HWPFDocumentCore wordDocument,
+ Section section )
+ {
+ htmlDocumentFacade.body.setAttribute( "style",
+ getSectionStyle( section ) );
+
+ processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
+ section, 0 );
+ }
+
+ protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
+ Table table, int thisTableLevel )
+ {
+ Element tableHeader = htmlDocumentFacade.createTableHeader();
+ Element tableBody = htmlDocumentFacade.createTableBody();
+
+ final int tableRows = table.numRows();
+
+ int maxColumns = Integer.MIN_VALUE;
+ for ( int r = 0; r < tableRows; r++ )
+ {
+ maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
+ }
+
+ for ( int r = 0; r < tableRows; r++ )
+ {
+ TableRow tableRow = table.getRow( r );
+
+ Element tableRowElement = htmlDocumentFacade.createTableRow();
+ StringBuilder tableRowStyle = new StringBuilder();
+ WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
+
+ final int rowCells = tableRow.numCells();
+ for ( int c = 0; c < rowCells; c++ )
+ {
+ TableCell tableCell = tableRow.getCell( c );
+
+ if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
+ continue;
+
+ if ( tableCell.isVerticallyMerged()
+ && !tableCell.isFirstVerticallyMerged() )
+ continue;
+
+ Element tableCellElement;
+ if ( tableRow.isTableHeader() )
+ {
+ tableCellElement = htmlDocumentFacade
+ .createTableHeaderCell();
+ }
+ else
+ {
+ tableCellElement = htmlDocumentFacade.createTableCell();
+ }
+ StringBuilder tableCellStyle = new StringBuilder();
+ WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
+ r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
+ tableCellStyle );
+
+ if ( tableCell.isFirstMerged() )
+ {
+ int count = 0;
+ for ( int c1 = c; c1 < rowCells; c1++ )
+ {
+ TableCell nextCell = tableRow.getCell( c1 );
+ if ( nextCell.isMerged() )
+ count++;
+ if ( !nextCell.isMerged() )
+ break;
+ }
+ tableCellElement.setAttribute( "colspan", "" + count );
+ }
+ else
+ {
+ if ( c == rowCells - 1 && c != maxColumns - 1 )
+ {
+ tableCellElement.setAttribute( "colspan", ""
+ + ( maxColumns - c ) );
+ }
+ }
+
+ if ( tableCell.isFirstVerticallyMerged() )
+ {
+ int count = 0;
+ for ( int r1 = r; r1 < tableRows; r1++ )
+ {
+ TableRow nextRow = table.getRow( r1 );
+ if ( nextRow.numCells() < c )
+ break;
+ TableCell nextCell = nextRow.getCell( c );
+ if ( nextCell.isVerticallyMerged() )
+ count++;
+ if ( !nextCell.isVerticallyMerged() )
+ break;
+ }
+ tableCellElement.setAttribute( "rowspan", "" + count );
+ }
+
+ processSectionParagraphes( hwpfDocument, tableCellElement,
+ tableCell, thisTableLevel );
+
+ if ( !tableCellElement.hasChildNodes() )
+ {
+ tableCellElement.appendChild( htmlDocumentFacade
+ .createParagraph() );
+ }
+ if ( tableCellStyle.length() > 0 )
+ tableCellElement.setAttribute( "style",
+ tableCellStyle.toString() );
+
+ tableRowElement.appendChild( tableCellElement );
+ }
+
+ if ( tableRowStyle.length() > 0 )
+ tableRowElement
+ .setAttribute( "style", tableRowStyle.toString() );
+
+ if ( tableRow.isTableHeader() )
+ {
+ tableHeader.appendChild( tableRowElement );
+ }
+ else
+ {
+ tableBody.appendChild( tableRowElement );
+ }
+
+ }
+
+ final Element tableElement = htmlDocumentFacade.createTable();
+ if ( tableHeader.hasChildNodes() )
+ {
+ tableElement.appendChild( tableHeader );
+ }
+ if ( tableBody.hasChildNodes() )
+ {
+ tableElement.appendChild( tableBody );
+ flow.appendChild( tableElement );
+ }
+ else
+ {
+ logger.log(
+ POILogger.WARN,
+ "Table without body starting on offset "
+ + table.getStartOffset() + " -- "
+ + table.getEndOffset() );
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterProperties;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.w3c.dom.Element;
+
+public class WordToHtmlUtils extends AbstractWordUtils
+{
+ public static void addBold( final boolean bold, StringBuilder style )
+ {
+ style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" );
+ }
+
+ public static void addBorder( BorderCode borderCode, String where,
+ StringBuilder style )
+ {
+ if ( borderCode == null || borderCode.getBorderType() == 0 )
+ return;
+
+ if ( isEmpty( where ) )
+ {
+ style.append( "border-style: " + getBorderType( borderCode ) + "; " );
+ style.append( "border-color: " + getColor( borderCode.getColor() )
+ + "; " );
+ style.append( "border-width: " + getBorderWidth( borderCode )
+ + "; " );
+ }
+ else
+ {
+ style.append( "border-" + where + "-style: "
+ + getBorderType( borderCode ) + "; " );
+ style.append( "border-" + where + "-color: "
+ + getColor( borderCode.getColor() ) + "; " );
+ style.append( "border-" + where + "-width: "
+ + getBorderWidth( borderCode ) + "; " );
+ }
+ }
+
+ public static void addCharactersProperties(
+ final CharacterRun characterRun, StringBuilder style )
+ {
+ final CharacterProperties clonedProperties = characterRun
+ .cloneProperties();
+
+ if ( characterRun.isBold() )
+ {
+ style.append( "font-weight: bold; " );
+ }
+ if ( characterRun.isItalic() )
+ {
+ style.append( "font-style: italic; " );
+ }
+
+ addBorder( clonedProperties.getBrc(), EMPTY, style );
+
+ if ( characterRun.isCapitalized() )
+ {
+ style.append( "text-transform: uppercase; " );
+ }
+ if ( characterRun.isHighlighted() )
+ {
+ style.append( "background-color: "
+ + getColor( clonedProperties.getIcoHighlight() ) + "; " );
+ }
+ if ( characterRun.isStrikeThrough() )
+ {
+ style.append( "text-decoration: line-through; " );
+ }
+ if ( characterRun.isShadowed() )
+ {
+ style.append( "text-shadow: " + characterRun.getFontSize() / 24
+ + "pt; " );
+ }
+ if ( characterRun.isSmallCaps() )
+ {
+ style.append( "font-variant: small-caps; " );
+ }
+ if ( characterRun.getSubSuperScriptIndex() == 1 )
+ {
+ style.append( "baseline-shift: super; " );
+ style.append( "font-size: smaller; " );
+ }
+ if ( characterRun.getSubSuperScriptIndex() == 2 )
+ {
+ style.append( "baseline-shift: sub; " );
+ style.append( "font-size: smaller; " );
+ }
+ if ( characterRun.getUnderlineCode() > 0 )
+ {
+ style.append( "text-decoration: underline; " );
+ }
+ if ( characterRun.isVanished() )
+ {
+ style.append( "visibility: hidden; " );
+ }
+ }
+
+ public static void addFontFamily( final String fontFamily,
+ StringBuilder style )
+ {
+ if ( isEmpty( fontFamily ) )
+ return;
+
+ style.append( "font-family: " + fontFamily );
+ }
+
+ public static void addFontSize( final int fontSize, StringBuilder style )
+ {
+ style.append( "font-size: " + fontSize );
+ }
+
+ public static void addIndent( Paragraph paragraph, StringBuilder style )
+ {
+ addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
+ addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
+ addIndent( style, "end-indent", paragraph.getIndentFromRight() );
+ addIndent( style, "space-before", paragraph.getSpacingBefore() );
+ addIndent( style, "space-after", paragraph.getSpacingAfter() );
+ }
+
+ private static void addIndent( StringBuilder style, final String cssName,
+ final int twipsValue )
+ {
+ if ( twipsValue == 0 )
+ return;
+
+ style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " );
+ }
+
+ public static void addJustification( Paragraph paragraph,
+ final StringBuilder style )
+ {
+ String justification = getJustification( paragraph.getJustification() );
+ if ( isNotEmpty( justification ) )
+ style.append( "text-align: " + justification + "; " );
+ }
+
+ public static void addParagraphProperties( Paragraph paragraph,
+ StringBuilder style )
+ {
+ addIndent( paragraph, style );
+ addJustification( paragraph, style );
+
+ addBorder( paragraph.getBottomBorder(), "bottom", style );
+ addBorder( paragraph.getLeftBorder(), "left", style );
+ addBorder( paragraph.getRightBorder(), "right", style );
+ addBorder( paragraph.getTopBorder(), "top", style );
+
+ if ( paragraph.pageBreakBefore() )
+ {
+ style.append( "break-before: page; " );
+ }
+
+ style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
+
+ if ( paragraph.keepOnPage() )
+ {
+ style.append( "keep-together.within-page: always; " );
+ }
+
+ if ( paragraph.keepWithNext() )
+ {
+ style.append( "keep-with-next.within-page: always; " );
+ }
+
+ style.append( "linefeed-treatment: preserve; " );
+ style.append( "white-space-collapse: false; " );
+ }
+
+ public static void addTableCellProperties( TableRow tableRow,
+ TableCell tableCell, boolean toppest, boolean bottomest,
+ boolean leftest, boolean rightest, StringBuilder style )
+ {
+ style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH )
+ + "in; " );
+ style.append( "padding-start: "
+ + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
+ style.append( "padding-end: "
+ + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
+
+ BorderCode top = tableCell.getBrcTop() != null
+ && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
+ .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
+ .getHorizontalBorder();
+ BorderCode bottom = tableCell.getBrcBottom() != null
+ && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
+ .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
+ : tableRow.getHorizontalBorder();
+
+ BorderCode left = tableCell.getBrcLeft() != null
+ && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
+ .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
+ .getVerticalBorder();
+ BorderCode right = tableCell.getBrcRight() != null
+ && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
+ .getBrcRight() : rightest ? tableRow.getRightBorder()
+ : tableRow.getVerticalBorder();
+
+ addBorder( bottom, "bottom", style );
+ addBorder( left, "left", style );
+ addBorder( right, "right", style );
+ addBorder( top, "top", style );
+ }
+
+ public static void addTableRowProperties( TableRow tableRow,
+ StringBuilder style )
+ {
+ if ( tableRow.getRowHeight() > 0 )
+ {
+ style.append( "height: "
+ + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " );
+ }
+ if ( !tableRow.cantSplit() )
+ {
+ style.append( "keep-together: always; " );
+ }
+ }
+
+ public static void setPictureProperties( Picture picture,
+ Element graphicElement )
+ {
+ final int aspectRatioX = picture.getAspectRatioX();
+ final int aspectRatioY = picture.getAspectRatioY();
+
+ if ( aspectRatioX > 0 )
+ {
+ graphicElement
+ .setAttribute( "content-width", ( ( picture.getDxaGoal()
+ * aspectRatioX / 100 ) / TWIPS_PER_PT )
+ + "pt" );
+ }
+ else
+ graphicElement.setAttribute( "content-width",
+ ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
+
+ if ( aspectRatioY > 0 )
+ graphicElement
+ .setAttribute( "content-height", ( ( picture.getDyaGoal()
+ * aspectRatioY / 100 ) / TWIPS_PER_PT )
+ + "pt" );
+ else
+ graphicElement.setAttribute( "content-height",
+ ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
+
+ if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
+ {
+ graphicElement.setAttribute( "scaling", "uniform" );
+ }
+ else
+ {
+ graphicElement.setAttribute( "scaling", "non-uniform" );
+ }
+
+ graphicElement.setAttribute( "vertical-align", "text-bottom" );
+
+ if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
+ || picture.getDyaCropBottom() != 0
+ || picture.getDxaCropLeft() != 0 )
+ {
+ int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
+ int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
+ int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
+ int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
+ graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
+ + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
+ + "pt)" );
+ graphicElement.setAttribute( "oveerflow", "hidden" );
+ }
+ }
+
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.model.ListFormatOverride;
-import org.apache.poi.hwpf.model.ListTables;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.Table;
-import org.apache.poi.hwpf.usermodel.TableIterator;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-
-public abstract class AbstractWordExtractor
-{
- private static final byte BEL_MARK = 7;
-
- private static final byte FIELD_BEGIN_MARK = 19;
-
- private static final byte FIELD_END_MARK = 21;
-
- private static final byte FIELD_SEPARATOR_MARK = 20;
-
- private static final POILogger logger = POILogFactory
- .getLogger( AbstractWordExtractor.class );
-
- public abstract Document getDocument();
-
- protected abstract void outputCharacters( Element block,
- CharacterRun characterRun, String text );
-
- protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
- int currentTableLevel, Paragraph paragraph, final Element block,
- List<CharacterRun> characterRuns, final int start, final int end )
- {
- boolean haveAnyText = false;
-
- for ( int c = start; c < end; c++ )
- {
- CharacterRun characterRun = characterRuns.get( c );
-
- if ( characterRun == null )
- throw new AssertionError();
-
- if ( hwpfDocument instanceof HWPFDocument
- && ( (HWPFDocument) hwpfDocument ).getPicturesTable()
- .hasPicture( characterRun ) )
- {
- HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
- Picture picture = newFormat.getPicturesTable().extractPicture(
- characterRun, true );
-
- processImage( block, characterRun.text().charAt( 0 ) == 0x01,
- picture );
- continue;
- }
-
- String text = characterRun.text();
- if ( text.getBytes().length == 0 )
- continue;
-
- if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
- {
- int skipTo = tryField( hwpfDocument, paragraph,
- currentTableLevel, characterRuns, c, block );
-
- if ( skipTo != c )
- {
- c = skipTo;
- continue;
- }
-
- continue;
- }
- if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
- {
- // shall not appear without FIELD_BEGIN_MARK
- continue;
- }
- if ( text.getBytes()[0] == FIELD_END_MARK )
- {
- // shall not appear without FIELD_BEGIN_MARK
- continue;
- }
-
- if ( characterRun.isSpecialCharacter() || characterRun.isObj()
- || characterRun.isOle2() )
- {
- continue;
- }
-
- if ( text.endsWith( "\r" )
- || ( text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0 ) )
- text = text.substring( 0, text.length() - 1 );
-
- outputCharacters( block, characterRun, text );
-
- haveAnyText |= text.trim().length() != 0;
- }
-
- return haveAnyText;
- }
-
- public void processDocument( HWPFDocumentCore wordDocument )
- {
- final Range range = wordDocument.getRange();
- for ( int s = 0; s < range.numSections(); s++ )
- {
- processSection( wordDocument, range.getSection( s ), s );
- }
- }
-
- protected void processField( HWPFDocumentCore wordDocument,
- Element currentBlock, Paragraph paragraph, int currentTableLevel,
- List<CharacterRun> characterRuns, int beginMark, int separatorMark,
- int endMark )
- {
-
- Pattern hyperlinkPattern = Pattern
- .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
- Pattern pagerefPattern = Pattern
- .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
-
- if ( separatorMark - beginMark > 1 )
- {
- int index = beginMark + 1;
- CharacterRun firstAfterBegin = null;
- while ( index < separatorMark )
- {
- firstAfterBegin = paragraph.getCharacterRun( index );
- if ( firstAfterBegin == null )
- {
- logger.log( POILogger.WARN,
- "Paragraph " + paragraph.getStartOffset() + "--"
- + paragraph.getEndOffset()
- + " contains null CharacterRun #" + index );
- index++;
- continue;
- }
- break;
- }
-
- if ( firstAfterBegin != null )
- {
- final Matcher hyperlinkMatcher = hyperlinkPattern
- .matcher( firstAfterBegin.text() );
- if ( hyperlinkMatcher.matches() )
- {
- String hyperlink = hyperlinkMatcher.group( 1 );
- processHyperlink( wordDocument, currentBlock, paragraph,
- characterRuns, currentTableLevel, hyperlink,
- separatorMark + 1, endMark );
- return;
- }
-
- final Matcher pagerefMatcher = pagerefPattern
- .matcher( firstAfterBegin.text() );
- if ( pagerefMatcher.matches() )
- {
- String pageref = pagerefMatcher.group( 1 );
- processPageref( wordDocument, currentBlock, paragraph,
- characterRuns, currentTableLevel, pageref,
- separatorMark + 1, endMark );
- return;
- }
- }
- }
-
- StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
- for ( int i = beginMark; i <= endMark; i++ )
- {
- debug.append( "\t" );
- debug.append( paragraph.getCharacterRun( i ) );
- debug.append( "\n" );
- }
- logger.log( POILogger.WARN, debug );
-
- // just output field value
- if ( separatorMark + 1 < endMark )
- processCharacters( wordDocument, currentTableLevel, paragraph,
- currentBlock, characterRuns, separatorMark + 1, endMark );
-
- return;
- }
-
- protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
- Element currentBlock, Paragraph paragraph,
- List<CharacterRun> characterRuns, int currentTableLevel,
- String hyperlink, int i, int endMark );
-
- protected abstract void processImage( Element currentBlock,
- boolean inlined, Picture picture );
-
- protected abstract void processPageref( HWPFDocumentCore wordDocument,
- Element currentBlock, Paragraph paragraph,
- List<CharacterRun> characterRuns, int currentTableLevel,
- String pageref, int beginTextInclusive, int endTextExclusive );
-
- protected abstract void processParagraph( HWPFDocumentCore wordDocument,
- Element parentFopElement, int currentTableLevel,
- Paragraph paragraph, String bulletText );
-
- protected abstract void processSection( HWPFDocumentCore wordDocument,
- Section section, int s );
-
- protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
- Element flow, Range range, int currentTableLevel )
- {
- final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
- for ( TableIterator tableIterator = AbstractWordUtils.newTableIterator(
- range, currentTableLevel + 1 ); tableIterator.hasNext(); )
- {
- Table next = tableIterator.next();
- allTables.put( Integer.valueOf( next.getStartOffset() ), next );
- }
-
- final ListTables listTables = wordDocument.getListTables();
- int currentListInfo = 0;
-
- final int paragraphs = range.numParagraphs();
- for ( int p = 0; p < paragraphs; p++ )
- {
- Paragraph paragraph = range.getParagraph( p );
-
- if ( allTables.containsKey( Integer.valueOf( paragraph
- .getStartOffset() ) ) )
- {
- Table table = allTables.get( Integer.valueOf( paragraph
- .getStartOffset() ) );
- processTable( wordDocument, flow, table, currentTableLevel + 1 );
- continue;
- }
-
- if ( paragraph.isInTable()
- && paragraph.getTableLevel() != currentTableLevel )
- {
- continue;
- }
-
- if ( paragraph.getIlfo() != currentListInfo )
- {
- currentListInfo = paragraph.getIlfo();
- }
-
- if ( currentListInfo != 0 )
- {
- if ( listTables != null )
- {
- final ListFormatOverride listFormatOverride = listTables
- .getOverride( paragraph.getIlfo() );
-
- String label = AbstractWordUtils.getBulletText( listTables,
- paragraph, listFormatOverride.getLsid() );
-
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, label );
- }
- else
- {
- logger.log( POILogger.WARN,
- "Paragraph #" + paragraph.getStartOffset() + "-"
- + paragraph.getEndOffset()
- + " has reference to list structure #"
- + currentListInfo
- + ", but listTables not defined in file" );
-
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, AbstractWordUtils.EMPTY );
- }
- }
- else
- {
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, AbstractWordUtils.EMPTY );
- }
- }
-
- }
-
- protected void processSingleSection( HWPFDocumentCore wordDocument,
- Section section )
- {
- processSection( wordDocument, section, 0 );
- }
-
- protected abstract void processTable( HWPFDocumentCore wordDocument,
- Element flow, Table table, int newTableLevel );
-
- protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
- int currentTableLevel, List<CharacterRun> characterRuns,
- int beginMark, Element currentBlock )
- {
- int separatorMark = -1;
- int endMark = -1;
- for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
- {
- CharacterRun characterRun = paragraph.getCharacterRun( c );
-
- String text = characterRun.text();
- if ( text.getBytes().length == 0 )
- continue;
-
- if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
- {
- if ( separatorMark != -1 )
- {
- // double;
- return beginMark;
- }
-
- separatorMark = c;
- continue;
- }
-
- if ( text.getBytes()[0] == FIELD_END_MARK )
- {
- if ( endMark != -1 )
- {
- // double;
- return beginMark;
- }
-
- endMark = c;
- break;
- }
-
- }
-
- if ( separatorMark == -1 || endMark == -1 )
- return beginMark;
-
- processField( wordDocument, currentBlock, paragraph, currentTableLevel,
- characterRuns, beginMark, separatorMark, endMark );
-
- return endMark;
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.Closeable;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.HWPFOldDocument;
-import org.apache.poi.hwpf.OldWordFileFormatException;
-import org.apache.poi.hwpf.model.CHPX;
-import org.apache.poi.hwpf.model.ListLevel;
-import org.apache.poi.hwpf.model.ListTables;
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.SectionProperties;
-import org.apache.poi.hwpf.usermodel.TableIterator;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-
-public class AbstractWordUtils
-{
- static final String EMPTY = "";
-
- private static final POILogger logger = POILogFactory
- .getLogger( AbstractWordUtils.class );
-
- public static final float TWIPS_PER_INCH = 1440.0f;
- public static final int TWIPS_PER_PT = 20;
-
- static void closeQuietly( final Closeable closeable )
- {
- try
- {
- closeable.close();
- }
- catch ( Exception exc )
- {
- logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
- exc );
- }
- }
-
- static boolean equals( String str1, String str2 )
- {
- return str1 == null ? str2 == null : str1.equals( str2 );
- }
-
- // XXX incorporate into Range
- static List<CharacterRun> findCharacterRuns( Range range )
- {
- final int min = range.getStartOffset();
- final int max = range.getEndOffset();
-
- List<CharacterRun> result = new ArrayList<CharacterRun>();
- List<CHPX> chpxs = getCharacters( range );
- for ( int i = 0; i < chpxs.size(); i++ )
- {
- CHPX chpx = chpxs.get( i );
- if ( chpx == null )
- continue;
-
- if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
- chpx.getEnd() ) )
- {
- final CharacterRun characterRun = getCharacterRun( range, chpx );
-
- if ( characterRun == null )
- continue;
-
- result.add( characterRun );
- }
- }
-
- return result;
- }
-
- public static String getBorderType( BorderCode borderCode )
- {
- if ( borderCode == null )
- throw new IllegalArgumentException( "borderCode is null" );
-
- switch ( borderCode.getBorderType() )
- {
- case 1:
- case 2:
- return "solid";
- case 3:
- return "double";
- case 5:
- return "solid";
- case 6:
- return "dotted";
- case 7:
- case 8:
- return "dashed";
- case 9:
- return "dotted";
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- case 16:
- case 17:
- case 18:
- case 19:
- return "double";
- case 20:
- return "solid";
- case 21:
- return "double";
- case 22:
- return "dashed";
- case 23:
- return "dashed";
- case 24:
- return "ridge";
- case 25:
- return "grooved";
- default:
- return "solid";
- }
- }
-
- public static String getBorderWidth( BorderCode borderCode )
- {
- int lineWidth = borderCode.getLineWidth();
- int pt = lineWidth / 8;
- int pte = lineWidth - pt * 8;
-
- StringBuilder stringBuilder = new StringBuilder();
- stringBuilder.append( pt );
- stringBuilder.append( "." );
- stringBuilder.append( 1000 / 8 * pte );
- stringBuilder.append( "pt" );
- return stringBuilder.toString();
- }
-
- public static String getBulletText( ListTables listTables,
- Paragraph paragraph, int listId )
- {
- final ListLevel listLevel = listTables.getLevel( listId,
- paragraph.getIlvl() );
-
- if ( listLevel.getNumberText() == null )
- return EMPTY;
-
- StringBuffer bulletBuffer = new StringBuffer();
- char[] xst = listLevel.getNumberText().toCharArray();
- for ( char element : xst )
- {
- if ( element < 9 )
- {
- ListLevel numLevel = listTables.getLevel( listId, element );
-
- int num = numLevel.getStartAt();
- bulletBuffer.append( NumberFormatter.getNumber( num,
- listLevel.getNumberFormat() ) );
-
- if ( numLevel == listLevel )
- {
- numLevel.setStartAt( numLevel.getStartAt() + 1 );
- }
-
- }
- else
- {
- bulletBuffer.append( element );
- }
- }
-
- byte follow = getIxchFollow( listLevel );
- switch ( follow )
- {
- case 0:
- bulletBuffer.append( "\t" );
- break;
- case 1:
- bulletBuffer.append( " " );
- break;
- default:
- break;
- }
-
- return bulletBuffer.toString();
- }
-
- private static CharacterRun getCharacterRun( Range range, CHPX chpx )
- {
- try
- {
- Method method = Range.class.getDeclaredMethod( "getCharacterRun",
- CHPX.class );
- method.setAccessible( true );
- return (CharacterRun) method.invoke( range, chpx );
- }
- catch ( Exception exc )
- {
- throw new Error( exc );
- }
- }
-
- private static List<CHPX> getCharacters( Range range )
- {
- try
- {
- Field field = Range.class.getDeclaredField( "_characters" );
- field.setAccessible( true );
- return (List<CHPX>) field.get( range );
- }
- catch ( Exception exc )
- {
- throw new Error( exc );
- }
- }
-
- public static String getColor( int ico )
- {
- switch ( ico )
- {
- case 1:
- return "black";
- case 2:
- return "blue";
- case 3:
- return "cyan";
- case 4:
- return "green";
- case 5:
- return "magenta";
- case 6:
- return "red";
- case 7:
- return "yellow";
- case 8:
- return "white";
- case 9:
- return "darkblue";
- case 10:
- return "darkcyan";
- case 11:
- return "darkgreen";
- case 12:
- return "darkmagenta";
- case 13:
- return "darkred";
- case 14:
- return "darkyellow";
- case 15:
- return "darkgray";
- case 16:
- return "lightgray";
- default:
- return "black";
- }
- }
-
- public static byte getIxchFollow( ListLevel listLevel )
- {
- try
- {
- Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
- field.setAccessible( true );
- return ( (Byte) field.get( listLevel ) ).byteValue();
- }
- catch ( Exception exc )
- {
- throw new Error( exc );
- }
- }
-
- public static String getJustification( int js )
- {
- switch ( js )
- {
- case 0:
- return "start";
- case 1:
- return "center";
- case 2:
- return "end";
- case 3:
- case 4:
- return "justify";
- case 5:
- return "center";
- case 6:
- return "left";
- case 7:
- return "start";
- case 8:
- return "end";
- case 9:
- return "justify";
- }
- return "";
- }
-
- public static String getListItemNumberLabel( int number, int format )
- {
-
- if ( format != 0 )
- System.err.println( "NYI: toListItemNumberLabel(): " + format );
-
- return String.valueOf( number );
- }
-
- public static SectionProperties getSectionProperties( Section section )
- {
- try
- {
- Field field = Section.class.getDeclaredField( "_props" );
- field.setAccessible( true );
- return (SectionProperties) field.get( section );
- }
- catch ( Exception exc )
- {
- throw new Error( exc );
- }
- }
-
- static boolean isEmpty( String str )
- {
- return str == null || str.length() == 0;
- }
-
- static boolean isNotEmpty( String str )
- {
- return !isEmpty( str );
- }
-
- public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
- {
- final FileInputStream istream = new FileInputStream( docFile );
- try
- {
- return loadDoc( istream );
- }
- finally
- {
- closeQuietly( istream );
- }
- }
-
- public static HWPFDocumentCore loadDoc( InputStream inputStream )
- throws IOException
- {
- final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
- .verifyAndBuildPOIFS( inputStream );
- try
- {
- return new HWPFDocument( poifsFileSystem );
- }
- catch ( OldWordFileFormatException exc )
- {
- return new HWPFOldDocument( poifsFileSystem );
- }
- }
-
- public static TableIterator newTableIterator( Range range, int level )
- {
- try
- {
- Constructor<TableIterator> constructor = TableIterator.class
- .getDeclaredConstructor( Range.class, int.class );
- constructor.setAccessible( true );
- return constructor.newInstance( range, Integer.valueOf( level ) );
- }
- catch ( Exception exc )
- {
- throw new Error( exc );
- }
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-public class FoDocumentFacade
-{
- private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
-
- protected final Document document;
- protected final Element layoutMasterSet;
- protected final Element root;
-
- public FoDocumentFacade( Document document )
- {
- this.document = document;
-
- root = document.createElementNS( NS_XSLFO, "fo:root" );
- document.appendChild( root );
-
- layoutMasterSet = document.createElementNS( NS_XSLFO,
- "fo:layout-master-set" );
- root.appendChild( layoutMasterSet );
- }
-
- public Element addFlowToPageSequence( final Element pageSequence,
- String flowName )
- {
- final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
- flow.setAttribute( "flow-name", flowName );
- pageSequence.appendChild( flow );
-
- return flow;
- }
-
- public Element addListItem( Element listBlock )
- {
- Element result = createListItem();
- listBlock.appendChild( result );
- return result;
- }
-
- public Element addListItemBody( Element listItem )
- {
- Element result = createListItemBody();
- listItem.appendChild( result );
- return result;
- }
-
- public Element addListItemLabel( Element listItem, String text )
- {
- Element result = createListItemLabel( text );
- listItem.appendChild( result );
- return result;
- }
-
- public Element addPageSequence( String pageMaster )
- {
- final Element pageSequence = document.createElementNS( NS_XSLFO,
- "fo:page-sequence" );
- pageSequence.setAttribute( "master-reference", pageMaster );
- root.appendChild( pageSequence );
- return pageSequence;
- }
-
- public Element addRegionBody( Element pageMaster )
- {
- final Element regionBody = document.createElementNS( NS_XSLFO,
- "fo:region-body" );
- pageMaster.appendChild( regionBody );
-
- return regionBody;
- }
-
- public Element addSimplePageMaster( String masterName )
- {
- final Element simplePageMaster = document.createElementNS( NS_XSLFO,
- "fo:simple-page-master" );
- simplePageMaster.setAttribute( "master-name", masterName );
- layoutMasterSet.appendChild( simplePageMaster );
-
- return simplePageMaster;
- }
-
- protected Element createBasicLinkExternal( String externalDestination )
- {
- final Element basicLink = document.createElementNS( NS_XSLFO,
- "fo:basic-link" );
- basicLink.setAttribute( "external-destination", externalDestination );
- return basicLink;
- }
-
- public Element createBasicLinkInternal( String internalDestination )
- {
- final Element basicLink = document.createElementNS( NS_XSLFO,
- "fo:basic-link" );
- basicLink.setAttribute( "internal-destination", internalDestination );
- return basicLink;
- }
-
- public Element createBlock()
- {
- return document.createElementNS( NS_XSLFO, "fo:block" );
- }
-
- public Element createExternalGraphic( String source )
- {
- Element result = document.createElementNS( NS_XSLFO,
- "fo:external-graphic" );
- result.setAttribute( "src", "url('" + source + "')" );
- return result;
- }
-
- public Element createInline()
- {
- return document.createElementNS( NS_XSLFO, "fo:inline" );
- }
-
- public Element createLeader()
- {
- return document.createElementNS( NS_XSLFO, "fo:leader" );
- }
-
- public Element createListBlock()
- {
- return document.createElementNS( NS_XSLFO, "fo:list-block" );
- }
-
- public Element createListItem()
- {
- return document.createElementNS( NS_XSLFO, "fo:list-item" );
- }
-
- public Element createListItemBody()
- {
- return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
- }
-
- public Element createListItemLabel( String text )
- {
- Element result = document.createElementNS( NS_XSLFO,
- "fo:list-item-label" );
- Element block = createBlock();
- block.appendChild( document.createTextNode( text ) );
- result.appendChild( block );
- return result;
- }
-
- protected Element createTable()
- {
- return document.createElementNS( NS_XSLFO, "fo:table" );
- }
-
- protected Element createTableBody()
- {
- return document.createElementNS( NS_XSLFO, "fo:table-body" );
- }
-
- protected Element createTableCell()
- {
- return document.createElementNS( NS_XSLFO, "fo:table-cell" );
- }
-
- protected Element createTableHeader()
- {
- return document.createElementNS( NS_XSLFO, "fo:table-header" );
- }
-
- protected Element createTableRow()
- {
- return document.createElementNS( NS_XSLFO, "fo:table-row" );
- }
-
- protected Text createText( String data )
- {
- return document.createTextNode( data );
- }
-
- public Document getDocument()
- {
- return document;
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-public class HtmlDocumentFacade
-{
-
- protected final Element body;
- protected final Document document;
- protected final Element head;
- protected final Element html;
-
- public HtmlDocumentFacade( Document document )
- {
- this.document = document;
-
- html = document.createElement( "html" );
- document.appendChild( html );
-
- body = document.createElement( "body" );
- head = document.createElement( "head" );
-
- html.appendChild( head );
- html.appendChild( body );
- }
-
- public Element createHyperlink( String internalDestination )
- {
- final Element basicLink = document.createElement( "a" );
- basicLink.setAttribute( "href", internalDestination );
- return basicLink;
- }
-
- public Element createListItem()
- {
- return document.createElement( "li" );
- }
-
- public Element createParagraph()
- {
- return document.createElement( "p" );
- }
-
- public Element createTable()
- {
- return document.createElement( "table" );
- }
-
- public Element createTableBody()
- {
- return document.createElement( "tbody" );
- }
-
- public Element createTableCell()
- {
- return document.createElement( "td" );
- }
-
- public Element createTableHeader()
- {
- return document.createElement( "thead" );
- }
-
- public Element createTableHeaderCell()
- {
- return document.createElement( "th" );
- }
-
- public Element createTableRow()
- {
- return document.createElement( "tr" );
- }
-
- public Text createText( String data )
- {
- return document.createTextNode( data );
- }
-
- public Element createUnorderedList()
- {
- return document.createElement( "ul" );
- }
-
- public Document getDocument()
- {
- return document;
- }
-
-}
+++ /dev/null
-/*
- * ====================================================================
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ====================================================================
- */
-
-package org.apache.poi.hwpf.extractor;
-
-/**
- * Comment me
- *
- * @author Ryan Ackley
- */
-public final class NumberFormatter {
-
- private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i",
- "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "x", "y", "z" };
-
- private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", "v", "vi", "vii",
- "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix",
- "xx", "xxi", "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx",
- "xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", "xxxviii",
- "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", "xlvii", "xlviii",
- "xlix", "l" };
-
- private final static int T_ARABIC = 0;
- private final static int T_LOWER_LETTER = 4;
- private final static int T_LOWER_ROMAN = 2;
- private final static int T_ORDINAL = 5;
- private final static int T_UPPER_LETTER = 3;
- private final static int T_UPPER_ROMAN = 1;
-
- public static String getNumber(int num, int style) {
- switch (style) {
- case T_UPPER_ROMAN:
- return C_ROMAN[num - 1].toUpperCase();
- case T_LOWER_ROMAN:
- return C_ROMAN[num - 1];
- case T_UPPER_LETTER:
- return C_LETTERS[num - 1].toUpperCase();
- case T_LOWER_LETTER:
- return C_LETTERS[num - 1];
- case T_ARABIC:
- case T_ORDINAL:
- default:
- return String.valueOf(num);
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.model.ListFormatOverride;
-import org.apache.poi.hwpf.model.ListTables;
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.Range;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.SectionProperties;
-import org.apache.poi.hwpf.usermodel.Table;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableIterator;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-/**
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class WordToFoExtractor extends AbstractWordExtractor
-{
-
- /**
- * Holds properties values, applied to current <tt>fo:block</tt> element.
- * Those properties shall not be doubled in children <tt>fo:inline</tt>
- * elements.
- */
- private static class BlockProperies
- {
- final boolean pBold;
- final String pFontName;
- final int pFontSize;
- final boolean pItalic;
-
- public BlockProperies( String pFontName, int pFontSize, boolean pBold,
- boolean pItalic )
- {
- this.pFontName = pFontName;
- this.pFontSize = pFontSize;
- this.pBold = pBold;
- this.pItalic = pItalic;
- }
- }
-
- private static final POILogger logger = POILogFactory
- .getLogger( WordToFoExtractor.class );
-
- public static String getBorderType( BorderCode borderCode )
- {
- if ( borderCode == null )
- throw new IllegalArgumentException( "borderCode is null" );
-
- switch ( borderCode.getBorderType() )
- {
- case 1:
- case 2:
- return "solid";
- case 3:
- return "double";
- case 5:
- return "solid";
- case 6:
- return "dotted";
- case 7:
- case 8:
- return "dashed";
- case 9:
- return "dotted";
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- case 16:
- case 17:
- case 18:
- case 19:
- return "double";
- case 20:
- return "solid";
- case 21:
- return "double";
- case 22:
- return "dashed";
- case 23:
- return "dashed";
- case 24:
- return "ridge";
- case 25:
- return "grooved";
- default:
- return "solid";
- }
- }
-
- /**
- * Java main() interface to interact with WordToFoExtractor
- *
- * <p>
- * Usage: WordToFoExtractor infile outfile
- * </p>
- * Where infile is an input .doc file ( Word 97-2007) which will be rendered
- * as XSL-FO into outfile
- *
- */
- public static void main( String[] args )
- {
- if ( args.length < 2 )
- {
- System.err
- .println( "Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>" );
- return;
- }
-
- System.out.println( "Converting " + args[0] );
- System.out.println( "Saving output to " + args[1] );
- try
- {
- Document doc = WordToFoExtractor.process( new File( args[0] ) );
-
- FileWriter out = new FileWriter( args[1] );
- DOMSource domSource = new DOMSource( doc );
- StreamResult streamResult = new StreamResult( out );
- TransformerFactory tf = TransformerFactory.newInstance();
- Transformer serializer = tf.newTransformer();
- // TODO set encoding from a command argument
- serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
- serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
- serializer.transform( domSource, streamResult );
- out.close();
- }
- catch ( Exception e )
- {
- e.printStackTrace();
- }
- }
-
- static Document process( File docFile ) throws Exception
- {
- final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
- WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
- DocumentBuilderFactory.newInstance().newDocumentBuilder()
- .newDocument() );
- wordToFoExtractor.processDocument( hwpfDocument );
- return wordToFoExtractor.getDocument();
- }
-
- private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
-
- protected final FoDocumentFacade foDocumentFacade;
-
- /**
- * Creates new instance of {@link WordToFoExtractor}. Can be used for output
- * several {@link HWPFDocument}s into single FO document.
- *
- * @param document
- * XML DOM Document used as XSL FO document. Shall support
- * namespaces
- */
- public WordToFoExtractor( Document document )
- {
- this.foDocumentFacade = new FoDocumentFacade( document );
- }
-
- protected String createPageMaster( SectionProperties sep, String type,
- int section )
- {
- float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
- float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
- float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
- float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
- float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
- float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
-
- // add these to the header
- String pageMasterName = type + "-page" + section;
-
- Element pageMaster = foDocumentFacade
- .addSimplePageMaster( pageMasterName );
- pageMaster.setAttribute( "page-height", height + "in" );
- pageMaster.setAttribute( "page-width", width + "in" );
-
- Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
- regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
- + "in " + bottomMargin + "in " + leftMargin + "in" );
-
- /*
- * 6.4.14 fo:region-body
- *
- * The values of the padding and border-width traits must be "0".
- */
- // WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
- // WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
- // WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
- // WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
-
- if ( sep.getCcolM1() > 0 )
- {
- regionBody.setAttribute( "column-count", ""
- + ( sep.getCcolM1() + 1 ) );
- if ( sep.getFEvenlySpaced() )
- {
- regionBody.setAttribute( "column-gap",
- ( sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH )
- + "in" );
- }
- else
- {
- regionBody.setAttribute( "column-gap", "0.25in" );
- }
- }
-
- return pageMasterName;
- }
-
- public Document getDocument()
- {
- return foDocumentFacade.getDocument();
- }
-
- @Override
- protected void outputCharacters( Element block, CharacterRun characterRun,
- String text )
- {
- BlockProperies blockProperies = this.blocksProperies.peek();
- Element inline = foDocumentFacade.createInline();
- if ( characterRun.isBold() != blockProperies.pBold )
- {
- WordToFoUtils.setBold( inline, characterRun.isBold() );
- }
- if ( characterRun.isItalic() != blockProperies.pItalic )
- {
- WordToFoUtils.setItalic( inline, characterRun.isItalic() );
- }
- if ( characterRun.getFontName() != null
- && !AbstractWordUtils.equals( characterRun.getFontName(),
- blockProperies.pFontName ) )
- {
- WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
- }
- if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
- {
- WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
- }
- WordToFoUtils.setCharactersProperties( characterRun, inline );
- block.appendChild( inline );
-
- Text textNode = foDocumentFacade.createText( text );
- inline.appendChild( textNode );
- }
-
- protected void processHyperlink( HWPFDocumentCore hwpfDocument,
- Element currentBlock, Paragraph paragraph,
- List<CharacterRun> characterRuns, int currentTableLevel,
- String hyperlink, int beginTextInclusive, int endTextExclusive )
- {
- Element basicLink = foDocumentFacade
- .createBasicLinkExternal( hyperlink );
- currentBlock.appendChild( basicLink );
-
- if ( beginTextInclusive < endTextExclusive )
- processCharacters( hwpfDocument, currentTableLevel, paragraph,
- basicLink, characterRuns, beginTextInclusive,
- endTextExclusive );
- }
-
- /**
- * This method shall store image bytes in external file and convert it if
- * necessary. Images shall be stored using PNG format (for bitmap) or SVG
- * (for vector). Other formats may be not supported by your XSL FO
- * processor.
- * <p>
- * Please note the
- * {@link WordToFoUtils#setPictureProperties(Picture, Element)} method.
- *
- * @param currentBlock
- * currently processed FO element, like <tt>fo:block</tt>. Shall
- * be used as parent of newly created
- * <tt>fo:external-graphic</tt> or
- * <tt>fo:instream-foreign-object</tt>
- * @param inlined
- * if image is inlined
- * @param picture
- * HWPF object, contained picture data and properties
- */
- protected void processImage( Element currentBlock, boolean inlined,
- Picture picture )
- {
- // no default implementation -- skip
- currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
- "Image link to '" + picture.suggestFullFileName()
- + "' can be here" ) );
- }
-
- protected void processPageref( HWPFDocumentCore hwpfDocument,
- Element currentBlock, Paragraph paragraph,
- List<CharacterRun> characterRuns, int currentTableLevel,
- String pageref, int beginTextInclusive, int endTextExclusive )
- {
- Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
- currentBlock.appendChild( basicLink );
-
- if ( beginTextInclusive < endTextExclusive )
- processCharacters( hwpfDocument, currentTableLevel, paragraph,
- basicLink, characterRuns, beginTextInclusive,
- endTextExclusive );
- }
-
- protected void processParagraph( HWPFDocumentCore hwpfDocument,
- Element parentFopElement, int currentTableLevel,
- Paragraph paragraph, String bulletText )
- {
- final Element block = foDocumentFacade.createBlock();
- parentFopElement.appendChild( block );
-
- WordToFoUtils.setParagraphProperties( paragraph, block );
-
- final int charRuns = paragraph.numCharacterRuns();
-
- if ( charRuns == 0 )
- {
- return;
- }
-
- {
- final String pFontName;
- final int pFontSize;
- final boolean pBold;
- final boolean pItalic;
- {
- CharacterRun characterRun = paragraph.getCharacterRun( 0 );
- pFontSize = characterRun.getFontSize() / 2;
- pFontName = characterRun.getFontName();
- pBold = characterRun.isBold();
- pItalic = characterRun.isItalic();
- }
- WordToFoUtils.setFontFamily( block, pFontName );
- WordToFoUtils.setFontSize( block, pFontSize );
- WordToFoUtils.setBold( block, pBold );
- WordToFoUtils.setItalic( block, pItalic );
-
- blocksProperies.push( new BlockProperies( pFontName, pFontSize,
- pBold, pItalic ) );
- }
- try
- {
- boolean haveAnyText = false;
-
- if ( WordToFoUtils.isNotEmpty( bulletText ) )
- {
- Element inline = foDocumentFacade.createInline();
- block.appendChild( inline );
-
- Text textNode = foDocumentFacade.createText( bulletText );
- inline.appendChild( textNode );
-
- haveAnyText |= bulletText.trim().length() != 0;
- }
-
- List<CharacterRun> characterRuns = WordToFoUtils
- .findCharacterRuns( paragraph );
- haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
- paragraph, block, characterRuns, 0, characterRuns.size() );
-
- if ( !haveAnyText )
- {
- Element leader = foDocumentFacade.createLeader();
- block.appendChild( leader );
- }
- }
- finally
- {
- blocksProperies.pop();
- }
-
- return;
- }
-
- protected void processSection( HWPFDocumentCore wordDocument,
- Section section, int sectionCounter )
- {
- String regularPage = createPageMaster(
- WordToFoUtils.getSectionProperties( section ), "page",
- sectionCounter );
-
- Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
- Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
- "xsl-region-body" );
-
- processSectionParagraphes( wordDocument, flow, section, 0 );
- }
-
- protected void processSectionParagraphes( HWPFDocument wordDocument,
- Element flow, Range range, int currentTableLevel )
- {
- final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
- for ( TableIterator tableIterator = WordToFoUtils.newTableIterator(
- range, currentTableLevel + 1 ); tableIterator.hasNext(); )
- {
- Table next = tableIterator.next();
- allTables.put( Integer.valueOf( next.getStartOffset() ), next );
- }
-
- final ListTables listTables = wordDocument.getListTables();
- int currentListInfo = 0;
-
- final int paragraphs = range.numParagraphs();
- for ( int p = 0; p < paragraphs; p++ )
- {
- Paragraph paragraph = range.getParagraph( p );
-
- if ( allTables.containsKey( Integer.valueOf( paragraph
- .getStartOffset() ) ) )
- {
- Table table = allTables.get( Integer.valueOf( paragraph
- .getStartOffset() ) );
- processTable( wordDocument, flow, table, currentTableLevel + 1 );
- continue;
- }
-
- if ( paragraph.isInTable()
- && paragraph.getTableLevel() != currentTableLevel )
- {
- continue;
- }
-
- if ( paragraph.getIlfo() != currentListInfo )
- {
- currentListInfo = paragraph.getIlfo();
- }
-
- if ( currentListInfo != 0 )
- {
- if ( listTables != null )
- {
- final ListFormatOverride listFormatOverride = listTables
- .getOverride( paragraph.getIlfo() );
-
- String label = WordToFoUtils.getBulletText( listTables,
- paragraph, listFormatOverride.getLsid() );
-
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, label );
- }
- else
- {
- logger.log( POILogger.WARN,
- "Paragraph #" + paragraph.getStartOffset() + "-"
- + paragraph.getEndOffset()
- + " has reference to list structure #"
- + currentListInfo
- + ", but listTables not defined in file" );
-
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, WordToFoUtils.EMPTY );
- }
- }
- else
- {
- processParagraph( wordDocument, flow, currentTableLevel,
- paragraph, WordToFoUtils.EMPTY );
- }
- }
-
- }
-
- protected void processTable( HWPFDocumentCore wordDocument, Element flow,
- Table table, int thisTableLevel )
- {
- Element tableHeader = foDocumentFacade.createTableHeader();
- Element tableBody = foDocumentFacade.createTableBody();
-
- final int tableRows = table.numRows();
-
- int maxColumns = Integer.MIN_VALUE;
- for ( int r = 0; r < tableRows; r++ )
- {
- maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
- }
-
- for ( int r = 0; r < tableRows; r++ )
- {
- TableRow tableRow = table.getRow( r );
-
- Element tableRowElement = foDocumentFacade.createTableRow();
- WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
-
- final int rowCells = tableRow.numCells();
- for ( int c = 0; c < rowCells; c++ )
- {
- TableCell tableCell = tableRow.getCell( c );
-
- if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
- continue;
-
- if ( tableCell.isVerticallyMerged()
- && !tableCell.isFirstVerticallyMerged() )
- continue;
-
- Element tableCellElement = foDocumentFacade.createTableCell();
- WordToFoUtils.setTableCellProperties( tableRow, tableCell,
- tableCellElement, r == 0, r == tableRows - 1, c == 0,
- c == rowCells - 1 );
-
- if ( tableCell.isFirstMerged() )
- {
- int count = 0;
- for ( int c1 = c; c1 < rowCells; c1++ )
- {
- TableCell nextCell = tableRow.getCell( c1 );
- if ( nextCell.isMerged() )
- count++;
- if ( !nextCell.isMerged() )
- break;
- }
- tableCellElement.setAttribute( "number-columns-spanned", ""
- + count );
- }
- else
- {
- if ( c == rowCells - 1 && c != maxColumns - 1 )
- {
- tableCellElement.setAttribute(
- "number-columns-spanned", ""
- + ( maxColumns - c ) );
- }
- }
-
- if ( tableCell.isFirstVerticallyMerged() )
- {
- int count = 0;
- for ( int r1 = r; r1 < tableRows; r1++ )
- {
- TableRow nextRow = table.getRow( r1 );
- if ( nextRow.numCells() < c )
- break;
- TableCell nextCell = nextRow.getCell( c );
- if ( nextCell.isVerticallyMerged() )
- count++;
- if ( !nextCell.isVerticallyMerged() )
- break;
- }
- tableCellElement.setAttribute( "number-rows-spanned", ""
- + count );
- }
-
- processSectionParagraphes( wordDocument, tableCellElement,
- tableCell, thisTableLevel );
-
- if ( !tableCellElement.hasChildNodes() )
- {
- tableCellElement.appendChild( foDocumentFacade
- .createBlock() );
- }
-
- tableRowElement.appendChild( tableCellElement );
- }
-
- if ( tableRow.isTableHeader() )
- {
- tableHeader.appendChild( tableRowElement );
- }
- else
- {
- tableBody.appendChild( tableRowElement );
- }
- }
-
- final Element tableElement = foDocumentFacade.createTable();
- if ( tableHeader.hasChildNodes() )
- {
- tableElement.appendChild( tableHeader );
- }
- if ( tableBody.hasChildNodes() )
- {
- tableElement.appendChild( tableBody );
- flow.appendChild( tableElement );
- }
- else
- {
- logger.log(
- POILogger.WARN,
- "Table without body starting on offset "
- + table.getStartOffset() + " -- "
- + table.getEndOffset() );
- }
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterProperties;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.w3c.dom.Element;
-
-public class WordToFoUtils extends AbstractWordUtils
-{
- public static void setBold( final Element element, final boolean bold )
- {
- element.setAttribute( "font-weight", bold ? "bold" : "normal" );
- }
-
- public static void setBorder( Element element, BorderCode borderCode,
- String where )
- {
- if ( element == null )
- throw new IllegalArgumentException( "element is null" );
-
- if ( borderCode == null || borderCode.getBorderType() == 0 )
- return;
-
- if ( isEmpty( where ) )
- {
- element.setAttribute( "border-style", getBorderType( borderCode ) );
- element.setAttribute( "border-color",
- getColor( borderCode.getColor() ) );
- element.setAttribute( "border-width", getBorderWidth( borderCode ) );
- }
- else
- {
- element.setAttribute( "border-" + where + "-style",
- getBorderType( borderCode ) );
- element.setAttribute( "border-" + where + "-color",
- getColor( borderCode.getColor() ) );
- element.setAttribute( "border-" + where + "-width",
- getBorderWidth( borderCode ) );
- }
- }
-
- public static void setCharactersProperties(
- final CharacterRun characterRun, final Element inline )
- {
- final CharacterProperties clonedProperties = characterRun
- .cloneProperties();
- StringBuilder textDecorations = new StringBuilder();
-
- setBorder( inline, clonedProperties.getBrc(), EMPTY );
-
- if ( characterRun.isCapitalized() )
- {
- inline.setAttribute( "text-transform", "uppercase" );
- }
- if ( characterRun.isHighlighted() )
- {
- inline.setAttribute( "background-color",
- getColor( clonedProperties.getIcoHighlight() ) );
- }
- if ( characterRun.isStrikeThrough() )
- {
- if ( textDecorations.length() > 0 )
- textDecorations.append( " " );
- textDecorations.append( "line-through" );
- }
- if ( characterRun.isShadowed() )
- {
- inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
- + "pt" );
- }
- if ( characterRun.isSmallCaps() )
- {
- inline.setAttribute( "font-variant", "small-caps" );
- }
- if ( characterRun.getSubSuperScriptIndex() == 1 )
- {
- inline.setAttribute( "baseline-shift", "super" );
- inline.setAttribute( "font-size", "smaller" );
- }
- if ( characterRun.getSubSuperScriptIndex() == 2 )
- {
- inline.setAttribute( "baseline-shift", "sub" );
- inline.setAttribute( "font-size", "smaller" );
- }
- if ( characterRun.getUnderlineCode() > 0 )
- {
- if ( textDecorations.length() > 0 )
- textDecorations.append( " " );
- textDecorations.append( "underline" );
- }
- if ( characterRun.isVanished() )
- {
- inline.setAttribute( "visibility", "hidden" );
- }
- if ( textDecorations.length() > 0 )
- {
- inline.setAttribute( "text-decoration", textDecorations.toString() );
- }
- }
-
- public static void setFontFamily( final Element element,
- final String fontFamily )
- {
- if ( isEmpty( fontFamily ) )
- return;
-
- element.setAttribute( "font-family", fontFamily );
- }
-
- public static void setFontSize( final Element element, final int fontSize )
- {
- element.setAttribute( "font-size", String.valueOf( fontSize ) );
- }
-
- public static void setIndent( Paragraph paragraph, Element block )
- {
- if ( paragraph.getFirstLineIndent() != 0 )
- {
- block.setAttribute(
- "text-indent",
- String.valueOf( paragraph.getFirstLineIndent()
- / TWIPS_PER_PT )
- + "pt" );
- }
- if ( paragraph.getIndentFromLeft() != 0 )
- {
- block.setAttribute(
- "start-indent",
- String.valueOf( paragraph.getIndentFromLeft()
- / TWIPS_PER_PT )
- + "pt" );
- }
- if ( paragraph.getIndentFromRight() != 0 )
- {
- block.setAttribute(
- "end-indent",
- String.valueOf( paragraph.getIndentFromRight()
- / TWIPS_PER_PT )
- + "pt" );
- }
- if ( paragraph.getSpacingBefore() != 0 )
- {
- block.setAttribute(
- "space-before",
- String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
- + "pt" );
- }
- if ( paragraph.getSpacingAfter() != 0 )
- {
- block.setAttribute( "space-after",
- String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
- + "pt" );
- }
- }
-
- public static void setItalic( final Element element, final boolean italic )
- {
- element.setAttribute( "font-style", italic ? "italic" : "normal" );
- }
-
- public static void setJustification( Paragraph paragraph,
- final Element element )
- {
- String justification = getJustification( paragraph.getJustification() );
- if ( isNotEmpty( justification ) )
- element.setAttribute( "text-align", justification );
- }
-
- public static void setParagraphProperties( Paragraph paragraph,
- Element block )
- {
- setIndent( paragraph, block );
- setJustification( paragraph, block );
-
- setBorder( block, paragraph.getBottomBorder(), "bottom" );
- setBorder( block, paragraph.getLeftBorder(), "left" );
- setBorder( block, paragraph.getRightBorder(), "right" );
- setBorder( block, paragraph.getTopBorder(), "top" );
-
- if ( paragraph.pageBreakBefore() )
- {
- block.setAttribute( "break-before", "page" );
- }
-
- block.setAttribute( "hyphenate",
- String.valueOf( paragraph.isAutoHyphenated() ) );
-
- if ( paragraph.keepOnPage() )
- {
- block.setAttribute( "keep-together.within-page", "always" );
- }
-
- if ( paragraph.keepWithNext() )
- {
- block.setAttribute( "keep-with-next.within-page", "always" );
- }
-
- block.setAttribute( "linefeed-treatment", "preserve" );
- block.setAttribute( "white-space-collapse", "false" );
- }
-
- public static void setPictureProperties( Picture picture,
- Element graphicElement )
- {
- final int aspectRatioX = picture.getAspectRatioX();
- final int aspectRatioY = picture.getAspectRatioY();
-
- if ( aspectRatioX > 0 )
- {
- graphicElement
- .setAttribute( "content-width", ( ( picture.getDxaGoal()
- * aspectRatioX / 100 ) / TWIPS_PER_PT )
- + "pt" );
- }
- else
- graphicElement.setAttribute( "content-width",
- ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
-
- if ( aspectRatioY > 0 )
- graphicElement
- .setAttribute( "content-height", ( ( picture.getDyaGoal()
- * aspectRatioY / 100 ) / TWIPS_PER_PT )
- + "pt" );
- else
- graphicElement.setAttribute( "content-height",
- ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
-
- if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
- {
- graphicElement.setAttribute( "scaling", "uniform" );
- }
- else
- {
- graphicElement.setAttribute( "scaling", "non-uniform" );
- }
-
- graphicElement.setAttribute( "vertical-align", "text-bottom" );
-
- if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
- || picture.getDyaCropBottom() != 0
- || picture.getDxaCropLeft() != 0 )
- {
- int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
- int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
- int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
- int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
- graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
- + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
- + "pt)" );
- graphicElement.setAttribute( "oveerflow", "hidden" );
- }
- }
-
- public static void setTableCellProperties( TableRow tableRow,
- TableCell tableCell, Element element, boolean toppest,
- boolean bottomest, boolean leftest, boolean rightest )
- {
- element.setAttribute( "width", ( tableCell.getWidth() / TWIPS_PER_INCH )
- + "in" );
- element.setAttribute( "padding-start",
- ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
- element.setAttribute( "padding-end",
- ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
-
- BorderCode top = tableCell.getBrcTop() != null
- && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
- .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
- .getHorizontalBorder();
- BorderCode bottom = tableCell.getBrcBottom() != null
- && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
- .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
- : tableRow.getHorizontalBorder();
-
- BorderCode left = tableCell.getBrcLeft() != null
- && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
- .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
- .getVerticalBorder();
- BorderCode right = tableCell.getBrcRight() != null
- && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
- .getBrcRight() : rightest ? tableRow.getRightBorder()
- : tableRow.getVerticalBorder();
-
- setBorder( element, bottom, "bottom" );
- setBorder( element, left, "left" );
- setBorder( element, right, "right" );
- setBorder( element, top, "top" );
- }
-
- public static void setTableRowProperties( TableRow tableRow,
- Element tableRowElement )
- {
- if ( tableRow.getRowHeight() > 0 )
- {
- tableRowElement.setAttribute( "height",
- ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in" );
- }
- if ( !tableRow.cantSplit() )
- {
- tableRowElement.setAttribute( "keep-together", "always" );
- }
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.util.List;
-import java.util.Stack;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.Section;
-import org.apache.poi.hwpf.usermodel.SectionProperties;
-import org.apache.poi.hwpf.usermodel.Table;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Text;
-
-import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH;
-
-/**
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class WordToHtmlExtractor extends AbstractWordExtractor
-{
-
- /**
- * Holds properties values, applied to current <tt>p</tt> element. Those
- * properties shall not be doubled in children <tt>span</tt> elements.
- */
- private static class BlockProperies
- {
- final String pFontName;
- final int pFontSize;
-
- public BlockProperies( String pFontName, int pFontSize )
- {
- this.pFontName = pFontName;
- this.pFontSize = pFontSize;
- }
- }
-
- private static final POILogger logger = POILogFactory
- .getLogger( WordToHtmlExtractor.class );
-
- private static String getSectionStyle( Section section )
- {
- SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
-
- float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
- float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
- float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
- float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
-
- String style = "margin: " + topMargin + "in " + rightMargin + "in "
- + bottomMargin + "in " + leftMargin + "in; ";
-
- if ( sep.getCcolM1() > 0 )
- {
- style += "column-count: " + ( sep.getCcolM1() + 1 ) + "; ";
- if ( sep.getFEvenlySpaced() )
- {
- style += "column-gap: "
- + ( sep.getDxaColumns() / TWIPS_PER_INCH ) + "in; ";
- }
- else
- {
- style += "column-gap: 0.25in; ";
- }
- }
- return style;
- }
-
- /**
- * Java main() interface to interact with WordToHtmlExtractor
- *
- * <p>
- * Usage: WordToHtmlExtractor infile outfile
- * </p>
- * Where infile is an input .doc file ( Word 95-2007) which will be rendered
- * as HTML into outfile
- */
- public static void main( String[] args )
- {
- if ( args.length < 2 )
- {
- System.err
- .println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" );
- return;
- }
-
- System.out.println( "Converting " + args[0] );
- System.out.println( "Saving output to " + args[1] );
- try
- {
- Document doc = WordToHtmlExtractor.process( new File( args[0] ) );
-
- FileWriter out = new FileWriter( args[1] );
- DOMSource domSource = new DOMSource( doc );
- StreamResult streamResult = new StreamResult( out );
-
- TransformerFactory tf = TransformerFactory.newInstance();
- Transformer serializer = tf.newTransformer();
- // TODO set encoding from a command argument
- serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
- serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
- serializer.setOutputProperty( OutputKeys.METHOD, "html" );
- serializer.transform( domSource, streamResult );
- out.close();
- }
- catch ( Exception e )
- {
- e.printStackTrace();
- }
- }
-
- static Document process( File docFile ) throws Exception
- {
- final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
- WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
- DocumentBuilderFactory.newInstance().newDocumentBuilder()
- .newDocument() );
- wordToHtmlExtractor.processDocument( wordDocument );
- return wordToHtmlExtractor.getDocument();
- }
-
- private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
-
- private final HtmlDocumentFacade htmlDocumentFacade;
-
- /**
- * Creates new instance of {@link WordToHtmlExtractor}. Can be used for
- * output several {@link HWPFDocument}s into single HTML document.
- *
- * @param document
- * XML DOM Document used as HTML document
- */
- public WordToHtmlExtractor( Document document )
- {
- this.htmlDocumentFacade = new HtmlDocumentFacade( document );
- }
-
- public Document getDocument()
- {
- return htmlDocumentFacade.getDocument();
- }
-
- @Override
- protected void outputCharacters( Element pElement,
- CharacterRun characterRun, String text )
- {
- Element span = htmlDocumentFacade.document.createElement( "span" );
- pElement.appendChild( span );
-
- StringBuilder style = new StringBuilder();
- BlockProperies blockProperies = this.blocksProperies.peek();
- if ( characterRun.getFontName() != null
- && !WordToHtmlUtils.equals( characterRun.getFontName(),
- blockProperies.pFontName ) )
- {
- style.append( "font-family: " + characterRun.getFontName() + "; " );
- }
- if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
- {
- style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
- }
-
- WordToHtmlUtils.addCharactersProperties( characterRun, style );
- if ( style.length() != 0 )
- span.setAttribute( "style", style.toString() );
-
- Text textNode = htmlDocumentFacade.createText( text );
- span.appendChild( textNode );
- }
-
- protected void processHyperlink( HWPFDocumentCore wordDocument,
- Element currentBlock, Paragraph paragraph,
- List<CharacterRun> characterRuns, int currentTableLevel,
- String hyperlink, int beginTextInclusive, int endTextExclusive )
- {
- Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
- currentBlock.appendChild( basicLink );
-
- if ( beginTextInclusive < endTextExclusive )
- processCharacters( wordDocument, currentTableLevel, paragraph,
- basicLink, characterRuns, beginTextInclusive,
- endTextExclusive );
- }
-
- /**
- * This method shall store image bytes in external file and convert it if
- * necessary. Images shall be stored using PNG format. Other formats may be
- * not supported by user browser.
- * <p>
- * Please note the
- * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
- *
- * @param currentBlock
- * currently processed HTML element, like <tt>p</tt>. Shall be
- * used as parent of newly created <tt>img</tt>
- * @param inlined
- * if image is inlined
- * @param picture
- * HWPF object, contained picture data and properties
- */
- protected void processImage( Element currentBlock, boolean inlined,
- Picture picture )
- {
- // no default implementation -- skip
- currentBlock.appendChild( htmlDocumentFacade.document
- .createComment( "Image link to '"
- + picture.suggestFullFileName() + "' can be here" ) );
- }
-
- protected void processPageref( HWPFDocumentCore hwpfDocument,
- Element currentBlock, Paragraph paragraph,
- List<CharacterRun> characterRuns, int currentTableLevel,
- String pageref, int beginTextInclusive, int endTextExclusive )
- {
- Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
- currentBlock.appendChild( basicLink );
-
- if ( beginTextInclusive < endTextExclusive )
- processCharacters( hwpfDocument, currentTableLevel, paragraph,
- basicLink, characterRuns, beginTextInclusive,
- endTextExclusive );
- }
-
- protected void processParagraph( HWPFDocumentCore hwpfDocument,
- Element parentFopElement, int currentTableLevel,
- Paragraph paragraph, String bulletText )
- {
- final Element pElement = htmlDocumentFacade.createParagraph();
- parentFopElement.appendChild( pElement );
-
- StringBuilder style = new StringBuilder();
- WordToHtmlUtils.addParagraphProperties( paragraph, style );
-
- final int charRuns = paragraph.numCharacterRuns();
-
- if ( charRuns == 0 )
- {
- return;
- }
-
- {
- final String pFontName;
- final int pFontSize;
- final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
- if ( characterRun != null )
- {
- pFontSize = characterRun.getFontSize() / 2;
- pFontName = characterRun.getFontName();
- WordToHtmlUtils.addFontFamily( pFontName, style );
- WordToHtmlUtils.addFontSize( pFontSize, style );
- }
- else
- {
- pFontSize = -1;
- pFontName = WordToHtmlUtils.EMPTY;
- }
- blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
- }
- try
- {
- if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
- {
- Text textNode = htmlDocumentFacade.createText( bulletText );
- pElement.appendChild( textNode );
- }
-
- List<CharacterRun> characterRuns = WordToHtmlUtils
- .findCharacterRuns( paragraph );
- processCharacters( hwpfDocument, currentTableLevel, paragraph,
- pElement, characterRuns, 0, characterRuns.size() );
- }
- finally
- {
- blocksProperies.pop();
- }
-
- if ( style.length() > 0 )
- pElement.setAttribute( "style", style.toString() );
-
- return;
- }
-
- protected void processSection( HWPFDocumentCore wordDocument,
- Section section, int sectionCounter )
- {
- Element div = htmlDocumentFacade.document.createElement( "div" );
- div.setAttribute( "style", getSectionStyle( section ) );
- htmlDocumentFacade.body.appendChild( div );
-
- processSectionParagraphes( wordDocument, div, section, 0 );
- }
-
- @Override
- protected void processSingleSection( HWPFDocumentCore wordDocument,
- Section section )
- {
- htmlDocumentFacade.body.setAttribute( "style",
- getSectionStyle( section ) );
-
- processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
- section, 0 );
- }
-
- protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
- Table table, int thisTableLevel )
- {
- Element tableHeader = htmlDocumentFacade.createTableHeader();
- Element tableBody = htmlDocumentFacade.createTableBody();
-
- final int tableRows = table.numRows();
-
- int maxColumns = Integer.MIN_VALUE;
- for ( int r = 0; r < tableRows; r++ )
- {
- maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
- }
-
- for ( int r = 0; r < tableRows; r++ )
- {
- TableRow tableRow = table.getRow( r );
-
- Element tableRowElement = htmlDocumentFacade.createTableRow();
- StringBuilder tableRowStyle = new StringBuilder();
- WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
-
- final int rowCells = tableRow.numCells();
- for ( int c = 0; c < rowCells; c++ )
- {
- TableCell tableCell = tableRow.getCell( c );
-
- if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
- continue;
-
- if ( tableCell.isVerticallyMerged()
- && !tableCell.isFirstVerticallyMerged() )
- continue;
-
- Element tableCellElement;
- if ( tableRow.isTableHeader() )
- {
- tableCellElement = htmlDocumentFacade
- .createTableHeaderCell();
- }
- else
- {
- tableCellElement = htmlDocumentFacade.createTableCell();
- }
- StringBuilder tableCellStyle = new StringBuilder();
- WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
- r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
- tableCellStyle );
-
- if ( tableCell.isFirstMerged() )
- {
- int count = 0;
- for ( int c1 = c; c1 < rowCells; c1++ )
- {
- TableCell nextCell = tableRow.getCell( c1 );
- if ( nextCell.isMerged() )
- count++;
- if ( !nextCell.isMerged() )
- break;
- }
- tableCellElement.setAttribute( "colspan", "" + count );
- }
- else
- {
- if ( c == rowCells - 1 && c != maxColumns - 1 )
- {
- tableCellElement.setAttribute( "colspan", ""
- + ( maxColumns - c ) );
- }
- }
-
- if ( tableCell.isFirstVerticallyMerged() )
- {
- int count = 0;
- for ( int r1 = r; r1 < tableRows; r1++ )
- {
- TableRow nextRow = table.getRow( r1 );
- if ( nextRow.numCells() < c )
- break;
- TableCell nextCell = nextRow.getCell( c );
- if ( nextCell.isVerticallyMerged() )
- count++;
- if ( !nextCell.isVerticallyMerged() )
- break;
- }
- tableCellElement.setAttribute( "rowspan", "" + count );
- }
-
- processSectionParagraphes( hwpfDocument, tableCellElement,
- tableCell, thisTableLevel );
-
- if ( !tableCellElement.hasChildNodes() )
- {
- tableCellElement.appendChild( htmlDocumentFacade
- .createParagraph() );
- }
- if ( tableCellStyle.length() > 0 )
- tableCellElement.setAttribute( "style",
- tableCellStyle.toString() );
-
- tableRowElement.appendChild( tableCellElement );
- }
-
- if ( tableRowStyle.length() > 0 )
- tableRowElement
- .setAttribute( "style", tableRowStyle.toString() );
-
- if ( tableRow.isTableHeader() )
- {
- tableHeader.appendChild( tableRowElement );
- }
- else
- {
- tableBody.appendChild( tableRowElement );
- }
-
- }
-
- final Element tableElement = htmlDocumentFacade.createTable();
- if ( tableHeader.hasChildNodes() )
- {
- tableElement.appendChild( tableHeader );
- }
- if ( tableBody.hasChildNodes() )
- {
- tableElement.appendChild( tableBody );
- flow.appendChild( tableElement );
- }
- else
- {
- logger.log(
- POILogger.WARN,
- "Table without body starting on offset "
- + table.getStartOffset() + " -- "
- + table.getEndOffset() );
- }
- }
-
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import org.apache.poi.hwpf.usermodel.BorderCode;
-import org.apache.poi.hwpf.usermodel.CharacterProperties;
-import org.apache.poi.hwpf.usermodel.CharacterRun;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.hwpf.usermodel.TableCell;
-import org.apache.poi.hwpf.usermodel.TableRow;
-import org.w3c.dom.Element;
-
-public class WordToHtmlUtils extends AbstractWordUtils
-{
- public static void addBold( final boolean bold, StringBuilder style )
- {
- style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" );
- }
-
- public static void addBorder( BorderCode borderCode, String where,
- StringBuilder style )
- {
- if ( borderCode == null || borderCode.getBorderType() == 0 )
- return;
-
- if ( isEmpty( where ) )
- {
- style.append( "border-style: " + getBorderType( borderCode ) + "; " );
- style.append( "border-color: " + getColor( borderCode.getColor() )
- + "; " );
- style.append( "border-width: " + getBorderWidth( borderCode )
- + "; " );
- }
- else
- {
- style.append( "border-" + where + "-style: "
- + getBorderType( borderCode ) + "; " );
- style.append( "border-" + where + "-color: "
- + getColor( borderCode.getColor() ) + "; " );
- style.append( "border-" + where + "-width: "
- + getBorderWidth( borderCode ) + "; " );
- }
- }
-
- public static void addCharactersProperties(
- final CharacterRun characterRun, StringBuilder style )
- {
- final CharacterProperties clonedProperties = characterRun
- .cloneProperties();
-
- if ( characterRun.isBold() )
- {
- style.append( "font-weight: bold; " );
- }
- if ( characterRun.isItalic() )
- {
- style.append( "font-style: italic; " );
- }
-
- addBorder( clonedProperties.getBrc(), EMPTY, style );
-
- if ( characterRun.isCapitalized() )
- {
- style.append( "text-transform: uppercase; " );
- }
- if ( characterRun.isHighlighted() )
- {
- style.append( "background-color: "
- + getColor( clonedProperties.getIcoHighlight() ) + "; " );
- }
- if ( characterRun.isStrikeThrough() )
- {
- style.append( "text-decoration: line-through; " );
- }
- if ( characterRun.isShadowed() )
- {
- style.append( "text-shadow: " + characterRun.getFontSize() / 24
- + "pt; " );
- }
- if ( characterRun.isSmallCaps() )
- {
- style.append( "font-variant: small-caps; " );
- }
- if ( characterRun.getSubSuperScriptIndex() == 1 )
- {
- style.append( "baseline-shift: super; " );
- style.append( "font-size: smaller; " );
- }
- if ( characterRun.getSubSuperScriptIndex() == 2 )
- {
- style.append( "baseline-shift: sub; " );
- style.append( "font-size: smaller; " );
- }
- if ( characterRun.getUnderlineCode() > 0 )
- {
- style.append( "text-decoration: underline; " );
- }
- if ( characterRun.isVanished() )
- {
- style.append( "visibility: hidden; " );
- }
- }
-
- public static void addFontFamily( final String fontFamily,
- StringBuilder style )
- {
- if ( isEmpty( fontFamily ) )
- return;
-
- style.append( "font-family: " + fontFamily );
- }
-
- public static void addFontSize( final int fontSize, StringBuilder style )
- {
- style.append( "font-size: " + fontSize );
- }
-
- public static void addIndent( Paragraph paragraph, StringBuilder style )
- {
- addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
- addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
- addIndent( style, "end-indent", paragraph.getIndentFromRight() );
- addIndent( style, "space-before", paragraph.getSpacingBefore() );
- addIndent( style, "space-after", paragraph.getSpacingAfter() );
- }
-
- private static void addIndent( StringBuilder style, final String cssName,
- final int twipsValue )
- {
- if ( twipsValue == 0 )
- return;
-
- style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " );
- }
-
- public static void addJustification( Paragraph paragraph,
- final StringBuilder style )
- {
- String justification = getJustification( paragraph.getJustification() );
- if ( isNotEmpty( justification ) )
- style.append( "text-align: " + justification + "; " );
- }
-
- public static void addParagraphProperties( Paragraph paragraph,
- StringBuilder style )
- {
- addIndent( paragraph, style );
- addJustification( paragraph, style );
-
- addBorder( paragraph.getBottomBorder(), "bottom", style );
- addBorder( paragraph.getLeftBorder(), "left", style );
- addBorder( paragraph.getRightBorder(), "right", style );
- addBorder( paragraph.getTopBorder(), "top", style );
-
- if ( paragraph.pageBreakBefore() )
- {
- style.append( "break-before: page; " );
- }
-
- style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
-
- if ( paragraph.keepOnPage() )
- {
- style.append( "keep-together.within-page: always; " );
- }
-
- if ( paragraph.keepWithNext() )
- {
- style.append( "keep-with-next.within-page: always; " );
- }
-
- style.append( "linefeed-treatment: preserve; " );
- style.append( "white-space-collapse: false; " );
- }
-
- public static void addTableCellProperties( TableRow tableRow,
- TableCell tableCell, boolean toppest, boolean bottomest,
- boolean leftest, boolean rightest, StringBuilder style )
- {
- style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH )
- + "in; " );
- style.append( "padding-start: "
- + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
- style.append( "padding-end: "
- + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
-
- BorderCode top = tableCell.getBrcTop() != null
- && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
- .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
- .getHorizontalBorder();
- BorderCode bottom = tableCell.getBrcBottom() != null
- && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
- .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
- : tableRow.getHorizontalBorder();
-
- BorderCode left = tableCell.getBrcLeft() != null
- && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
- .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
- .getVerticalBorder();
- BorderCode right = tableCell.getBrcRight() != null
- && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
- .getBrcRight() : rightest ? tableRow.getRightBorder()
- : tableRow.getVerticalBorder();
-
- addBorder( bottom, "bottom", style );
- addBorder( left, "left", style );
- addBorder( right, "right", style );
- addBorder( top, "top", style );
- }
-
- public static void addTableRowProperties( TableRow tableRow,
- StringBuilder style )
- {
- if ( tableRow.getRowHeight() > 0 )
- {
- style.append( "height: "
- + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " );
- }
- if ( !tableRow.cantSplit() )
- {
- style.append( "keep-together: always; " );
- }
- }
-
- public static void setPictureProperties( Picture picture,
- Element graphicElement )
- {
- final int aspectRatioX = picture.getAspectRatioX();
- final int aspectRatioY = picture.getAspectRatioY();
-
- if ( aspectRatioX > 0 )
- {
- graphicElement
- .setAttribute( "content-width", ( ( picture.getDxaGoal()
- * aspectRatioX / 100 ) / TWIPS_PER_PT )
- + "pt" );
- }
- else
- graphicElement.setAttribute( "content-width",
- ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
-
- if ( aspectRatioY > 0 )
- graphicElement
- .setAttribute( "content-height", ( ( picture.getDyaGoal()
- * aspectRatioY / 100 ) / TWIPS_PER_PT )
- + "pt" );
- else
- graphicElement.setAttribute( "content-height",
- ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
-
- if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
- {
- graphicElement.setAttribute( "scaling", "uniform" );
- }
- else
- {
- graphicElement.setAttribute( "scaling", "non-uniform" );
- }
-
- graphicElement.setAttribute( "vertical-align", "text-bottom" );
-
- if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
- || picture.getDyaCropBottom() != 0
- || picture.getDxaCropLeft() != 0 )
- {
- int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
- int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
- int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
- int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
- graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
- + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
- + "pt)" );
- graphicElement.setAttribute( "oveerflow", "hidden" );
- }
- }
-
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.List;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocumentCore;
+
+public class TestWordToConverterSuite
+{
+ /**
+ * YK: a quick hack to exclude failing documents from the suite.
+ */
+ private static List<String> failingFiles = Arrays.asList();
+
+ public static Test suite()
+ {
+ TestSuite suite = new TestSuite();
+
+ File directory = POIDataSamples.getDocumentInstance().getFile(
+ "../document" );
+ for ( final File child : directory.listFiles( new FilenameFilter()
+ {
+ public boolean accept( File dir, String name )
+ {
+ return name.endsWith( ".doc" ) && !failingFiles.contains( name );
+ }
+ } ) )
+ {
+ final String name = child.getName();
+
+ suite.addTest( new TestCase( name + " [FO]" )
+ {
+ public void runTest() throws Exception
+ {
+ test( child, false );
+ }
+ } );
+ suite.addTest( new TestCase( name + " [HTML]" )
+ {
+ public void runTest() throws Exception
+ {
+ test( child, true );
+ }
+ } );
+
+ }
+
+ return suite;
+ }
+
+ protected static void test( File child, boolean html ) throws Exception
+ {
+ HWPFDocumentCore hwpfDocument;
+ try
+ {
+ hwpfDocument = AbstractWordUtils.loadDoc( child );
+ }
+ catch ( Exception exc )
+ {
+ // unable to parse file -- not WordToFoConverter fault
+ return;
+ }
+
+ WordToFoConverter wordToFoConverter = new WordToFoConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToFoConverter.processDocument( hwpfDocument );
+
+ StringWriter stringWriter = new StringWriter();
+
+ Transformer transformer = TransformerFactory.newInstance()
+ .newTransformer();
+ transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+ transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ transformer.transform(
+ new DOMSource( wordToFoConverter.getDocument() ),
+ new StreamResult( stringWriter ) );
+
+ if ( html )
+ transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+
+ // no exceptions
+ }
+}
--- /dev/null
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+package org.apache.poi.hwpf.converter;
+
+import java.io.StringWriter;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.TestCase;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocument;
+
+/**
+ * Test cases for {@link WordToFoConverter}
+ *
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class TestWordToFoConverter extends TestCase
+{
+ private static String getFoText( final String sampleFileName )
+ throws Exception
+ {
+ HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
+ .getDocumentInstance().openResourceAsStream( sampleFileName ) );
+
+ WordToFoConverter wordToFoConverter = new WordToFoConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToFoConverter.processDocument( hwpfDocument );
+
+ StringWriter stringWriter = new StringWriter();
+
+ Transformer transformer = TransformerFactory.newInstance()
+ .newTransformer();
+ transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ transformer.transform(
+ new DOMSource( wordToFoConverter.getDocument() ),
+ new StreamResult( stringWriter ) );
+
+ String result = stringWriter.toString();
+ return result;
+ }
+
+ public void testEquation() throws Exception
+ {
+ final String sampleFileName = "equation.doc";
+ String result = getFoText( sampleFileName );
+
+ assertTrue( result
+ .contains( "<!--Image link to '0.emf' can be here-->" ) );
+ }
+
+ public void testHyperlink() throws Exception
+ {
+ final String sampleFileName = "hyperlink.doc";
+ String result = getFoText( sampleFileName );
+
+ assertTrue( result
+ .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
+ assertTrue( result.contains( "Hyperlink text" ) );
+ }
+
+ public void testPageref() throws Exception
+ {
+ final String sampleFileName = "pageref.doc";
+ String result = getFoText( sampleFileName );
+
+ System.out.println( result );
+
+ assertTrue( result
+ .contains( "<fo:basic-link internal-destination=\"userref\">" ) );
+ assertTrue( result.contains( "1" ) );
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import java.io.StringWriter;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import junit.framework.TestCase;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocument;
+
+/**
+ * Test cases for {@link WordToFoConverter}
+ *
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class TestWordToHtmlConverter extends TestCase
+{
+ private static String getHtmlText( final String sampleFileName )
+ throws Exception
+ {
+ HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
+ .getDocumentInstance().openResourceAsStream( sampleFileName ) );
+
+ WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+ DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ .newDocument() );
+ wordToHtmlConverter.processDocument( hwpfDocument );
+
+ StringWriter stringWriter = new StringWriter();
+
+ Transformer transformer = TransformerFactory.newInstance()
+ .newTransformer();
+ transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
+ transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
+ transformer.setOutputProperty( OutputKeys.METHOD, "html" );
+ transformer.transform(
+ new DOMSource( wordToHtmlConverter.getDocument() ),
+ new StreamResult( stringWriter ) );
+
+ String result = stringWriter.toString();
+ return result;
+ }
+
+ public void testBug46610_2() throws Exception
+ {
+ String result = getHtmlText( "Bug46610_2.doc" );
+ assertTrue( result
+ .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
+ }
+
+ public void testEquation() throws Exception
+ {
+ String result = getHtmlText( "equation.doc" );
+
+ assertTrue( result
+ .contains( "<!--Image link to '0.emf' can be here-->" ) );
+ }
+
+ public void testHyperlink() throws Exception
+ {
+ String result = getHtmlText( "hyperlink.doc" );
+
+ assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
+ assertTrue( result.contains( "Hyperlink text" ) );
+ }
+
+ public void testPageref() throws Exception
+ {
+ String result = getHtmlText( "pageref.doc" );
+
+ assertTrue( result.contains( "<a href=\"#userref\">" ) );
+ assertTrue( result.contains( "1" ) );
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.FilenameFilter;
-import java.io.StringWriter;
-import java.util.Arrays;
-import java.util.List;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hwpf.HWPFDocumentCore;
-
-public class TestWordToExtractorSuite
-{
- /**
- * YK: a quick hack to exclude failing documents from the suite.
- */
- private static List<String> failingFiles = Arrays.asList();
-
- public static Test suite()
- {
- TestSuite suite = new TestSuite();
-
- File directory = POIDataSamples.getDocumentInstance().getFile(
- "../document" );
- for ( final File child : directory.listFiles( new FilenameFilter()
- {
- public boolean accept( File dir, String name )
- {
- return name.endsWith( ".doc" ) && !failingFiles.contains( name );
- }
- } ) )
- {
- final String name = child.getName();
-
- suite.addTest( new TestCase( name + " [FO]" )
- {
- public void runTest() throws Exception
- {
- test( child, false );
- }
- } );
- suite.addTest( new TestCase( name + " [HTML]" )
- {
- public void runTest() throws Exception
- {
- test( child, true );
- }
- } );
-
- }
-
- return suite;
- }
-
- protected static void test( File child, boolean html ) throws Exception
- {
- HWPFDocumentCore hwpfDocument;
- try
- {
- hwpfDocument = AbstractWordUtils.loadDoc( child );
- }
- catch ( Exception exc )
- {
- // unable to parse file -- not WordToFoExtractor fault
- return;
- }
-
- WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
- DocumentBuilderFactory.newInstance().newDocumentBuilder()
- .newDocument() );
- wordToFoExtractor.processDocument( hwpfDocument );
-
- StringWriter stringWriter = new StringWriter();
-
- Transformer transformer = TransformerFactory.newInstance()
- .newTransformer();
- transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
- transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
- transformer.transform(
- new DOMSource( wordToFoExtractor.getDocument() ),
- new StreamResult( stringWriter ) );
-
- if ( html )
- transformer.setOutputProperty( OutputKeys.METHOD, "html" );
-
- // no exceptions
- }
-}
+++ /dev/null
-/*
- * ====================================================================
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ====================================================================
- */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.StringWriter;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import junit.framework.TestCase;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hwpf.HWPFDocument;
-
-/**
- * Test cases for {@link WordToFoExtractor}
- *
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class TestWordToFoExtractor extends TestCase
-{
- private static String getFoText( final String sampleFileName )
- throws Exception
- {
- HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
- .getDocumentInstance().openResourceAsStream( sampleFileName ) );
-
- WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
- DocumentBuilderFactory.newInstance().newDocumentBuilder()
- .newDocument() );
- wordToFoExtractor.processDocument( hwpfDocument );
-
- StringWriter stringWriter = new StringWriter();
-
- Transformer transformer = TransformerFactory.newInstance()
- .newTransformer();
- transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
- transformer.transform(
- new DOMSource( wordToFoExtractor.getDocument() ),
- new StreamResult( stringWriter ) );
-
- String result = stringWriter.toString();
- return result;
- }
-
- public void testHyperlink() throws Exception
- {
- final String sampleFileName = "hyperlink.doc";
- String result = getFoText( sampleFileName );
-
- assertTrue( result
- .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
- assertTrue( result.contains( "Hyperlink text" ) );
- }
-
- public void testEquation() throws Exception
- {
- final String sampleFileName = "equation.doc";
- String result = getFoText( sampleFileName );
-
- assertTrue( result
- .contains( "<!--Image link to '0.emf' can be here-->" ) );
- }
-
- public void testPageref() throws Exception
- {
- final String sampleFileName = "pageref.doc";
- String result = getFoText( sampleFileName );
-
- System.out.println( result );
-
- assertTrue( result
- .contains( "<fo:basic-link internal-destination=\"userref\">" ) );
- assertTrue( result.contains( "1" ) );
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.StringWriter;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import junit.framework.TestCase;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.hwpf.HWPFDocument;
-
-/**
- * Test cases for {@link WordToFoExtractor}
- *
- * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
- */
-public class TestWordToHtmlExtractor extends TestCase
-{
- private static String getHtmlText( final String sampleFileName )
- throws Exception
- {
- HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
- .getDocumentInstance().openResourceAsStream( sampleFileName ) );
-
- WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
- DocumentBuilderFactory.newInstance().newDocumentBuilder()
- .newDocument() );
- wordToHtmlExtractor.processDocument( hwpfDocument );
-
- StringWriter stringWriter = new StringWriter();
-
- Transformer transformer = TransformerFactory.newInstance()
- .newTransformer();
- transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
- transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
- transformer.setOutputProperty( OutputKeys.METHOD, "html" );
- transformer.transform(
- new DOMSource( wordToHtmlExtractor.getDocument() ),
- new StreamResult( stringWriter ) );
-
- String result = stringWriter.toString();
- return result;
- }
-
- public void testBug46610_2() throws Exception
- {
- String result = getHtmlText( "Bug46610_2.doc" );
- assertTrue( result
- .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
- }
-
- public void testEquation() throws Exception
- {
- String result = getHtmlText( "equation.doc" );
-
- assertTrue( result
- .contains( "<!--Image link to '0.emf' can be here-->" ) );
- }
-
- public void testHyperlink() throws Exception
- {
- String result = getHtmlText( "hyperlink.doc" );
-
- assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
- assertTrue( result.contains( "Hyperlink text" ) );
- }
-
- public void testPageref() throws Exception
- {
- String result = getHtmlText( "pageref.doc" );
-
- assertTrue( result.contains( "<a href=\"#userref\">" ) );
- assertTrue( result.contains( "1" ) );
- }
-}