import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
private static final POILogger logger = POILogFactory
.getLogger( AbstractWordConverter.class );
+ private FontReplacer fontReplacer = new DefaultFontReplacer();
+
+ protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
+ {
+ Triplet original = new Triplet();
+ original.bold = characterRun.isBold();
+ original.italic = characterRun.isItalic();
+ original.fontName = characterRun.getFontName();
+ Triplet updated = getFontReplacer().update( original );
+ return updated;
+ }
+
public abstract Document getDocument();
+ public FontReplacer getFontReplacer()
+ {
+ return fontReplacer;
+ }
+
protected abstract void outputCharacters( Element block,
CharacterRun characterRun, String text );
return haveAnyText;
}
- public void processDocument( HWPFDocumentCore wordDocument )
- {
- final SummaryInformation summaryInformation = wordDocument
- .getSummaryInformation();
- if ( summaryInformation != null )
- {
- processDocumentInformation( summaryInformation );
- }
-
- final Range range = wordDocument.getRange();
- for ( int s = 0; s < range.numSections(); s++ )
- {
- processSection( wordDocument, range.getSection( s ), s );
- }
- }
-
- protected abstract void processDocumentInformation(
- SummaryInformation summaryInformation );
-
protected void processDeadField( HWPFDocumentCore wordDocument,
Element currentBlock, Range range, int currentTableLevel,
int beginMark, int separatorMark, int endMark )
return;
}
+ public void processDocument( HWPFDocumentCore wordDocument )
+ {
+ final SummaryInformation summaryInformation = wordDocument
+ .getSummaryInformation();
+ if ( summaryInformation != null )
+ {
+ processDocumentInformation( summaryInformation );
+ }
+
+ final Range range = wordDocument.getRange();
+ for ( int s = 0; s < range.numSections(); s++ )
+ {
+ processSection( wordDocument, range.getSection( s ), s );
+ }
+ }
+
+ protected abstract void processDocumentInformation(
+ SummaryInformation summaryInformation );
+
+ protected void processField( HWPFDocument hwpfDocument, Range parentRange,
+ int currentTableLevel, Field field, Element currentBlock )
+ {
+ switch ( field.getType() )
+ {
+ case 37: // page reference
+ {
+ final Range firstSubrange = field.firstSubrange( parentRange );
+ if ( firstSubrange != null )
+ {
+ String formula = firstSubrange.text();
+ Pattern pagerefPattern = Pattern
+ .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
+ Matcher matcher = pagerefPattern.matcher( formula );
+ if ( matcher.find() )
+ {
+ String pageref = matcher.group( 1 );
+ processPageref( hwpfDocument, currentBlock,
+ field.secondSubrange( parentRange ),
+ currentTableLevel, pageref );
+ return;
+ }
+ }
+ break;
+ }
+ case 88: // hyperlink
+ {
+ final Range firstSubrange = field.firstSubrange( parentRange );
+ if ( firstSubrange != null )
+ {
+ String formula = firstSubrange.text();
+ Pattern hyperlinkPattern = Pattern
+ .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
+ Matcher matcher = hyperlinkPattern.matcher( formula );
+ if ( matcher.find() )
+ {
+ String hyperlink = matcher.group( 1 );
+ processHyperlink( hwpfDocument, currentBlock,
+ field.secondSubrange( parentRange ),
+ currentTableLevel, hyperlink );
+ return;
+ }
+ }
+ break;
+ }
+ }
+
+ logger.log( POILogger.WARN, parentRange + " contains " + field
+ + " with unsupported type or format" );
+ processCharacters( hwpfDocument, currentTableLevel,
+ field.secondSubrange( parentRange ), currentBlock );
+ }
+
+ protected Field processField( HWPFDocumentCore wordDocument,
+ Range charactersRange, int currentTableLevel, int startOffset,
+ Element currentBlock )
+ {
+ if ( !( wordDocument instanceof HWPFDocument ) )
+ return null;
+
+ HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
+ Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
+ FieldsTables.PLCFFLDMOM, startOffset );
+ if ( field == null )
+ return null;
+
+ processField( hwpfDocument, charactersRange, currentTableLevel, field,
+ currentBlock );
+
+ return field;
+ }
+
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String hyperlink );
protected abstract void processTable( HWPFDocumentCore wordDocument,
Element flow, Table table );
- protected Field processField( HWPFDocumentCore wordDocument,
- Range charactersRange, int currentTableLevel, int startOffset,
- Element currentBlock )
+ public void setFontReplacer( FontReplacer fontReplacer )
{
- if ( !( wordDocument instanceof HWPFDocument ) )
- return null;
-
- HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
- Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
- FieldsTables.PLCFFLDMOM, startOffset );
- if ( field == null )
- return null;
-
- processField( hwpfDocument, charactersRange, currentTableLevel, field,
- currentBlock );
-
- return field;
- }
-
- protected void processField( HWPFDocument hwpfDocument, Range parentRange,
- int currentTableLevel, Field field, Element currentBlock )
- {
- switch ( field.getType() )
- {
- case 37: // page reference
- {
- final Range firstSubrange = field.firstSubrange( parentRange );
- if ( firstSubrange != null )
- {
- String formula = firstSubrange.text();
- Pattern pagerefPattern = Pattern
- .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
- Matcher matcher = pagerefPattern.matcher( formula );
- if ( matcher.find() )
- {
- String pageref = matcher.group( 1 );
- processPageref( hwpfDocument, currentBlock,
- field.secondSubrange( parentRange ),
- currentTableLevel, pageref );
- return;
- }
- }
- break;
- }
- case 88: // hyperlink
- {
- final Range firstSubrange = field.firstSubrange( parentRange );
- if ( firstSubrange != null )
- {
- String formula = firstSubrange.text();
- Pattern hyperlinkPattern = Pattern
- .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
- Matcher matcher = hyperlinkPattern.matcher( formula );
- if ( matcher.find() )
- {
- String hyperlink = matcher.group( 1 );
- processHyperlink( hwpfDocument, currentBlock,
- field.secondSubrange( parentRange ),
- currentTableLevel, hyperlink );
- return;
- }
- }
- break;
- }
- }
-
- logger.log( POILogger.WARN, parentRange + " contains " + field
- + " with unsupported type or format" );
- processCharacters( hwpfDocument, currentTableLevel,
- field.secondSubrange( parentRange ), currentBlock );
+ this.fontReplacer = fontReplacer;
}
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
--- /dev/null
+package org.apache.poi.hwpf.converter;
+
+public class DefaultFontReplacer implements FontReplacer
+{
+ public Triplet update( Triplet original )
+ {
+ if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
+ {
+ String fontName = original.fontName;
+
+ if ( fontName.endsWith( " Regular" ) )
+ fontName = AbstractWordUtils.substringBeforeLast( fontName,
+ " Regular" );
+
+ if ( fontName
+ .endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) )
+ fontName = AbstractWordUtils
+ .substringBeforeLast( fontName,
+ " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" )
+ + " Bold";
+
+ if ( fontName
+ .endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) )
+ fontName = AbstractWordUtils
+ .substringBeforeLast(
+ fontName,
+ " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" )
+ + " Bold Italic";
+
+ if ( fontName.endsWith( " \u041A\u0443\u0440\u0441\u0438\u0432" ) )
+ fontName = AbstractWordUtils.substringBeforeLast( fontName,
+ " \u041A\u0443\u0440\u0441\u0438\u0432" ) + " Italic";
+
+ original.fontName = fontName;
+ }
+
+ if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
+ {
+ if ( "Times Regular".equals( original.fontName )
+ || "Times-Regular".equals( original.fontName ) )
+ {
+ original.fontName = "Times";
+ original.bold = false;
+ original.italic = false;
+ }
+ if ( "Times Bold".equals( original.fontName )
+ || "Times-Bold".equals( original.fontName ) )
+ {
+ original.fontName = "Times";
+ original.bold = true;
+ original.italic = false;
+ }
+ if ( "Times Italic".equals( original.fontName )
+ || "Times-Italic".equals( original.fontName ) )
+ {
+ original.fontName = "Times";
+ original.bold = false;
+ original.italic = true;
+ }
+ if ( "Times Bold Italic".equals( original.fontName )
+ || "Times-BoldItalic".equals( original.fontName ) )
+ {
+ original.fontName = "Times";
+ original.bold = true;
+ original.italic = true;
+ }
+ }
+
+ return original;
+ }
+}
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
+import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
{
BlockProperies blockProperies = this.blocksProperies.peek();
Element inline = foDocumentFacade.createInline();
- if ( characterRun.isBold() != blockProperies.pBold )
+
+ Triplet triplet = getCharacterRunTriplet( characterRun );
+
+ if ( triplet.bold != blockProperies.pBold )
{
- WordToFoUtils.setBold( inline, characterRun.isBold() );
+ WordToFoUtils.setBold( inline, triplet.bold );
}
- if ( characterRun.isItalic() != blockProperies.pItalic )
+ if ( triplet.italic != blockProperies.pItalic )
{
- WordToFoUtils.setItalic( inline, characterRun.isItalic() );
+ WordToFoUtils.setItalic( inline, triplet.italic );
}
- if ( characterRun.getFontName() != null
- && !AbstractWordUtils.equals( characterRun.getFontName(),
+ if ( WordToFoUtils.isNotEmpty( triplet.fontName )
+ && !WordToFoUtils.equals( triplet.fontName,
blockProperies.pFontName ) )
{
WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
}
{
- final String pFontName;
- final int pFontSize;
- final boolean pBold;
- final boolean pItalic;
- {
- CharacterRun characterRun = paragraph.getCharacterRun( 0 );
- pFontSize = characterRun.getFontSize() / 2;
- pFontName = characterRun.getFontName();
- pBold = characterRun.isBold();
- pItalic = characterRun.isItalic();
- }
- WordToFoUtils.setFontFamily( block, pFontName );
+ CharacterRun characterRun = paragraph.getCharacterRun( 0 );
+ int pFontSize = characterRun.getFontSize() / 2;
+ Triplet triplet = getCharacterRunTriplet( characterRun );
+
+ WordToFoUtils.setFontFamily( block, triplet.fontName );
WordToFoUtils.setFontSize( block, pFontSize );
- WordToFoUtils.setBold( block, pBold );
- WordToFoUtils.setItalic( block, pItalic );
+ WordToFoUtils.setBold( block, triplet.bold );
+ WordToFoUtils.setItalic( block, triplet.italic );
- blocksProperies.push( new BlockProperies( pFontName, pFontSize,
- pBold, pItalic ) );
+ blocksProperies.push( new BlockProperies( triplet.fontName,
+ pFontSize, triplet.bold, triplet.italic ) );
}
try
{
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
+import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
StringBuilder style = new StringBuilder();
BlockProperies blockProperies = this.blocksProperies.peek();
- if ( characterRun.getFontName() != null
- && !WordToHtmlUtils.equals( characterRun.getFontName(),
+ Triplet triplet = getCharacterRunTriplet( characterRun );
+
+ if ( WordToHtmlUtils.isNotEmpty( triplet.fontName )
+ && !WordToHtmlUtils.equals( triplet.fontName,
blockProperies.pFontName ) )
{
- style.append( "font-family: " + characterRun.getFontName() + "; " );
+ style.append( "font-family: " + triplet.fontName + "; " );
}
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
{
style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
}
+ if ( triplet.bold )
+ {
+ style.append( "font-weight: bold; " );
+ }
+ if ( triplet.italic )
+ {
+ style.append( "font-style: italic; " );
+ }
WordToHtmlUtils.addCharactersProperties( characterRun, style );
if ( style.length() != 0 )
final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
if ( characterRun != null )
{
+ Triplet triplet = getCharacterRunTriplet(characterRun);
pFontSize = characterRun.getFontSize() / 2;
- pFontName = characterRun.getFontName();
+ pFontName = triplet.fontName;
WordToHtmlUtils.addFontFamily( pFontName, style );
WordToHtmlUtils.addFontSize( pFontSize, style );
}