git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145604 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_8_BETA4
@@ -19,6 +19,8 @@ package org.apache.poi.hwpf.converter; | |||
import java.util.regex.Matcher; | |||
import java.util.regex.Pattern; | |||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet; | |||
import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.HWPFDocumentCore; | |||
@@ -50,8 +52,25 @@ public abstract class AbstractWordConverter | |||
private static final POILogger logger = POILogFactory | |||
.getLogger( AbstractWordConverter.class ); | |||
private FontReplacer fontReplacer = new DefaultFontReplacer(); | |||
protected Triplet getCharacterRunTriplet( CharacterRun characterRun ) | |||
{ | |||
Triplet original = new Triplet(); | |||
original.bold = characterRun.isBold(); | |||
original.italic = characterRun.isItalic(); | |||
original.fontName = characterRun.getFontName(); | |||
Triplet updated = getFontReplacer().update( original ); | |||
return updated; | |||
} | |||
public abstract Document getDocument(); | |||
public FontReplacer getFontReplacer() | |||
{ | |||
return fontReplacer; | |||
} | |||
protected abstract void outputCharacters( Element block, | |||
CharacterRun characterRun, String text ); | |||
@@ -144,25 +163,6 @@ public abstract class AbstractWordConverter | |||
return haveAnyText; | |||
} | |||
public void processDocument( HWPFDocumentCore wordDocument ) | |||
{ | |||
final SummaryInformation summaryInformation = wordDocument | |||
.getSummaryInformation(); | |||
if ( summaryInformation != null ) | |||
{ | |||
processDocumentInformation( summaryInformation ); | |||
} | |||
final Range range = wordDocument.getRange(); | |||
for ( int s = 0; s < range.numSections(); s++ ) | |||
{ | |||
processSection( wordDocument, range.getSection( s ), s ); | |||
} | |||
} | |||
protected abstract void processDocumentInformation( | |||
SummaryInformation summaryInformation ); | |||
protected void processDeadField( HWPFDocumentCore wordDocument, | |||
Element currentBlock, Range range, int currentTableLevel, | |||
int beginMark, int separatorMark, int endMark ) | |||
@@ -195,6 +195,97 @@ public abstract class AbstractWordConverter | |||
return; | |||
} | |||
public void processDocument( HWPFDocumentCore wordDocument ) | |||
{ | |||
final SummaryInformation summaryInformation = wordDocument | |||
.getSummaryInformation(); | |||
if ( summaryInformation != null ) | |||
{ | |||
processDocumentInformation( summaryInformation ); | |||
} | |||
final Range range = wordDocument.getRange(); | |||
for ( int s = 0; s < range.numSections(); s++ ) | |||
{ | |||
processSection( wordDocument, range.getSection( s ), s ); | |||
} | |||
} | |||
protected abstract void processDocumentInformation( | |||
SummaryInformation summaryInformation ); | |||
protected void processField( HWPFDocument hwpfDocument, Range parentRange, | |||
int currentTableLevel, Field field, Element currentBlock ) | |||
{ | |||
switch ( field.getType() ) | |||
{ | |||
case 37: // page reference | |||
{ | |||
final Range firstSubrange = field.firstSubrange( parentRange ); | |||
if ( firstSubrange != null ) | |||
{ | |||
String formula = firstSubrange.text(); | |||
Pattern pagerefPattern = Pattern | |||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" ); | |||
Matcher matcher = pagerefPattern.matcher( formula ); | |||
if ( matcher.find() ) | |||
{ | |||
String pageref = matcher.group( 1 ); | |||
processPageref( hwpfDocument, currentBlock, | |||
field.secondSubrange( parentRange ), | |||
currentTableLevel, pageref ); | |||
return; | |||
} | |||
} | |||
break; | |||
} | |||
case 88: // hyperlink | |||
{ | |||
final Range firstSubrange = field.firstSubrange( parentRange ); | |||
if ( firstSubrange != null ) | |||
{ | |||
String formula = firstSubrange.text(); | |||
Pattern hyperlinkPattern = Pattern | |||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" ); | |||
Matcher matcher = hyperlinkPattern.matcher( formula ); | |||
if ( matcher.find() ) | |||
{ | |||
String hyperlink = matcher.group( 1 ); | |||
processHyperlink( hwpfDocument, currentBlock, | |||
field.secondSubrange( parentRange ), | |||
currentTableLevel, hyperlink ); | |||
return; | |||
} | |||
} | |||
break; | |||
} | |||
} | |||
logger.log( POILogger.WARN, parentRange + " contains " + field | |||
+ " with unsupported type or format" ); | |||
processCharacters( hwpfDocument, currentTableLevel, | |||
field.secondSubrange( parentRange ), currentBlock ); | |||
} | |||
protected Field processField( HWPFDocumentCore wordDocument, | |||
Range charactersRange, int currentTableLevel, int startOffset, | |||
Element currentBlock ) | |||
{ | |||
if ( !( wordDocument instanceof HWPFDocument ) ) | |||
return null; | |||
HWPFDocument hwpfDocument = (HWPFDocument) wordDocument; | |||
Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset( | |||
FieldsTables.PLCFFLDMOM, startOffset ); | |||
if ( field == null ) | |||
return null; | |||
processField( hwpfDocument, charactersRange, currentTableLevel, field, | |||
currentBlock ); | |||
return field; | |||
} | |||
protected abstract void processHyperlink( HWPFDocumentCore wordDocument, | |||
Element currentBlock, Range textRange, int currentTableLevel, | |||
String hyperlink ); | |||
@@ -292,76 +383,9 @@ public abstract class AbstractWordConverter | |||
protected abstract void processTable( HWPFDocumentCore wordDocument, | |||
Element flow, Table table ); | |||
protected Field processField( HWPFDocumentCore wordDocument, | |||
Range charactersRange, int currentTableLevel, int startOffset, | |||
Element currentBlock ) | |||
public void setFontReplacer( FontReplacer fontReplacer ) | |||
{ | |||
if ( !( wordDocument instanceof HWPFDocument ) ) | |||
return null; | |||
HWPFDocument hwpfDocument = (HWPFDocument) wordDocument; | |||
Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset( | |||
FieldsTables.PLCFFLDMOM, startOffset ); | |||
if ( field == null ) | |||
return null; | |||
processField( hwpfDocument, charactersRange, currentTableLevel, field, | |||
currentBlock ); | |||
return field; | |||
} | |||
protected void processField( HWPFDocument hwpfDocument, Range parentRange, | |||
int currentTableLevel, Field field, Element currentBlock ) | |||
{ | |||
switch ( field.getType() ) | |||
{ | |||
case 37: // page reference | |||
{ | |||
final Range firstSubrange = field.firstSubrange( parentRange ); | |||
if ( firstSubrange != null ) | |||
{ | |||
String formula = firstSubrange.text(); | |||
Pattern pagerefPattern = Pattern | |||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" ); | |||
Matcher matcher = pagerefPattern.matcher( formula ); | |||
if ( matcher.find() ) | |||
{ | |||
String pageref = matcher.group( 1 ); | |||
processPageref( hwpfDocument, currentBlock, | |||
field.secondSubrange( parentRange ), | |||
currentTableLevel, pageref ); | |||
return; | |||
} | |||
} | |||
break; | |||
} | |||
case 88: // hyperlink | |||
{ | |||
final Range firstSubrange = field.firstSubrange( parentRange ); | |||
if ( firstSubrange != null ) | |||
{ | |||
String formula = firstSubrange.text(); | |||
Pattern hyperlinkPattern = Pattern | |||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" ); | |||
Matcher matcher = hyperlinkPattern.matcher( formula ); | |||
if ( matcher.find() ) | |||
{ | |||
String hyperlink = matcher.group( 1 ); | |||
processHyperlink( hwpfDocument, currentBlock, | |||
field.secondSubrange( parentRange ), | |||
currentTableLevel, hyperlink ); | |||
return; | |||
} | |||
} | |||
break; | |||
} | |||
} | |||
logger.log( POILogger.WARN, parentRange + " contains " + field | |||
+ " with unsupported type or format" ); | |||
processCharacters( hwpfDocument, currentTableLevel, | |||
field.secondSubrange( parentRange ), currentBlock ); | |||
this.fontReplacer = fontReplacer; | |||
} | |||
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range, |
@@ -98,7 +98,7 @@ public class AbstractWordUtils | |||
return "solid"; | |||
} | |||
} | |||
public static String getBorderWidth( BorderCode borderCode ) | |||
{ | |||
int lineWidth = borderCode.getLineWidth(); | |||
@@ -320,4 +320,18 @@ public class AbstractWordUtils | |||
} | |||
} | |||
static String substringBeforeLast( String str, String separator ) | |||
{ | |||
if ( isEmpty( str ) || isEmpty( separator ) ) | |||
{ | |||
return str; | |||
} | |||
int pos = str.lastIndexOf( separator ); | |||
if ( pos == -1 ) | |||
{ | |||
return str; | |||
} | |||
return str.substring( 0, pos ); | |||
} | |||
} |
@@ -0,0 +1,71 @@ | |||
package org.apache.poi.hwpf.converter; | |||
public class DefaultFontReplacer implements FontReplacer | |||
{ | |||
public Triplet update( Triplet original ) | |||
{ | |||
if ( !AbstractWordUtils.isNotEmpty( original.fontName ) ) | |||
{ | |||
String fontName = original.fontName; | |||
if ( fontName.endsWith( " Regular" ) ) | |||
fontName = AbstractWordUtils.substringBeforeLast( fontName, | |||
" Regular" ); | |||
if ( fontName | |||
.endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) ) | |||
fontName = AbstractWordUtils | |||
.substringBeforeLast( fontName, | |||
" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) | |||
+ " Bold"; | |||
if ( fontName | |||
.endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) ) | |||
fontName = AbstractWordUtils | |||
.substringBeforeLast( | |||
fontName, | |||
" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) | |||
+ " Bold Italic"; | |||
if ( fontName.endsWith( " \u041A\u0443\u0440\u0441\u0438\u0432" ) ) | |||
fontName = AbstractWordUtils.substringBeforeLast( fontName, | |||
" \u041A\u0443\u0440\u0441\u0438\u0432" ) + " Italic"; | |||
original.fontName = fontName; | |||
} | |||
if ( !AbstractWordUtils.isNotEmpty( original.fontName ) ) | |||
{ | |||
if ( "Times Regular".equals( original.fontName ) | |||
|| "Times-Regular".equals( original.fontName ) ) | |||
{ | |||
original.fontName = "Times"; | |||
original.bold = false; | |||
original.italic = false; | |||
} | |||
if ( "Times Bold".equals( original.fontName ) | |||
|| "Times-Bold".equals( original.fontName ) ) | |||
{ | |||
original.fontName = "Times"; | |||
original.bold = true; | |||
original.italic = false; | |||
} | |||
if ( "Times Italic".equals( original.fontName ) | |||
|| "Times-Italic".equals( original.fontName ) ) | |||
{ | |||
original.fontName = "Times"; | |||
original.bold = false; | |||
original.italic = true; | |||
} | |||
if ( "Times Bold Italic".equals( original.fontName ) | |||
|| "Times-BoldItalic".equals( original.fontName ) ) | |||
{ | |||
original.fontName = "Times"; | |||
original.bold = true; | |||
original.italic = true; | |||
} | |||
} | |||
return original; | |||
} | |||
} |
@@ -0,0 +1,13 @@ | |||
package org.apache.poi.hwpf.converter; | |||
public interface FontReplacer | |||
{ | |||
public class Triplet | |||
{ | |||
public String fontName; | |||
public boolean bold; | |||
public boolean italic; | |||
} | |||
public Triplet update( Triplet original ); | |||
} |
@@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory; | |||
import javax.xml.transform.dom.DOMSource; | |||
import javax.xml.transform.stream.StreamResult; | |||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet; | |||
import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.HWPFDocumentCore; | |||
@@ -206,16 +208,19 @@ public class WordToFoConverter extends AbstractWordConverter | |||
{ | |||
BlockProperies blockProperies = this.blocksProperies.peek(); | |||
Element inline = foDocumentFacade.createInline(); | |||
if ( characterRun.isBold() != blockProperies.pBold ) | |||
Triplet triplet = getCharacterRunTriplet( characterRun ); | |||
if ( triplet.bold != blockProperies.pBold ) | |||
{ | |||
WordToFoUtils.setBold( inline, characterRun.isBold() ); | |||
WordToFoUtils.setBold( inline, triplet.bold ); | |||
} | |||
if ( characterRun.isItalic() != blockProperies.pItalic ) | |||
if ( triplet.italic != blockProperies.pItalic ) | |||
{ | |||
WordToFoUtils.setItalic( inline, characterRun.isItalic() ); | |||
WordToFoUtils.setItalic( inline, triplet.italic ); | |||
} | |||
if ( characterRun.getFontName() != null | |||
&& !AbstractWordUtils.equals( characterRun.getFontName(), | |||
if ( WordToFoUtils.isNotEmpty( triplet.fontName ) | |||
&& !WordToFoUtils.equals( triplet.fontName, | |||
blockProperies.pFontName ) ) | |||
{ | |||
WordToFoUtils.setFontFamily( inline, characterRun.getFontName() ); | |||
@@ -318,24 +323,17 @@ public class WordToFoConverter extends AbstractWordConverter | |||
} | |||
{ | |||
final String pFontName; | |||
final int pFontSize; | |||
final boolean pBold; | |||
final boolean pItalic; | |||
{ | |||
CharacterRun characterRun = paragraph.getCharacterRun( 0 ); | |||
pFontSize = characterRun.getFontSize() / 2; | |||
pFontName = characterRun.getFontName(); | |||
pBold = characterRun.isBold(); | |||
pItalic = characterRun.isItalic(); | |||
} | |||
WordToFoUtils.setFontFamily( block, pFontName ); | |||
CharacterRun characterRun = paragraph.getCharacterRun( 0 ); | |||
int pFontSize = characterRun.getFontSize() / 2; | |||
Triplet triplet = getCharacterRunTriplet( characterRun ); | |||
WordToFoUtils.setFontFamily( block, triplet.fontName ); | |||
WordToFoUtils.setFontSize( block, pFontSize ); | |||
WordToFoUtils.setBold( block, pBold ); | |||
WordToFoUtils.setItalic( block, pItalic ); | |||
WordToFoUtils.setBold( block, triplet.bold ); | |||
WordToFoUtils.setItalic( block, triplet.italic ); | |||
blocksProperies.push( new BlockProperies( pFontName, pFontSize, | |||
pBold, pItalic ) ); | |||
blocksProperies.push( new BlockProperies( triplet.fontName, | |||
pFontSize, triplet.bold, triplet.italic ) ); | |||
} | |||
try | |||
{ |
@@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory; | |||
import javax.xml.transform.dom.DOMSource; | |||
import javax.xml.transform.stream.StreamResult; | |||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet; | |||
import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.HWPFDocumentCore; | |||
@@ -189,16 +191,26 @@ public class WordToHtmlConverter extends AbstractWordConverter | |||
StringBuilder style = new StringBuilder(); | |||
BlockProperies blockProperies = this.blocksProperies.peek(); | |||
if ( characterRun.getFontName() != null | |||
&& !WordToHtmlUtils.equals( characterRun.getFontName(), | |||
Triplet triplet = getCharacterRunTriplet( characterRun ); | |||
if ( WordToHtmlUtils.isNotEmpty( triplet.fontName ) | |||
&& !WordToHtmlUtils.equals( triplet.fontName, | |||
blockProperies.pFontName ) ) | |||
{ | |||
style.append( "font-family: " + characterRun.getFontName() + "; " ); | |||
style.append( "font-family: " + triplet.fontName + "; " ); | |||
} | |||
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize ) | |||
{ | |||
style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " ); | |||
} | |||
if ( triplet.bold ) | |||
{ | |||
style.append( "font-weight: bold; " ); | |||
} | |||
if ( triplet.italic ) | |||
{ | |||
style.append( "font-style: italic; " ); | |||
} | |||
WordToHtmlUtils.addCharactersProperties( characterRun, style ); | |||
if ( style.length() != 0 ) | |||
@@ -299,8 +311,9 @@ public class WordToHtmlConverter extends AbstractWordConverter | |||
final CharacterRun characterRun = paragraph.getCharacterRun( 0 ); | |||
if ( characterRun != null ) | |||
{ | |||
Triplet triplet = getCharacterRunTriplet(characterRun); | |||
pFontSize = characterRun.getFontSize() / 2; | |||
pFontName = characterRun.getFontName(); | |||
pFontName = triplet.fontName; | |||
WordToHtmlUtils.addFontFamily( pFontName, style ); | |||
WordToHtmlUtils.addFontSize( pFontSize, style ); | |||
} |
@@ -63,15 +63,6 @@ public class WordToHtmlUtils extends AbstractWordUtils | |||
final CharacterProperties clonedProperties = characterRun | |||
.cloneProperties(); | |||
if ( characterRun.isBold() ) | |||
{ | |||
style.append( "font-weight: bold; " ); | |||
} | |||
if ( characterRun.isItalic() ) | |||
{ | |||
style.append( "font-style: italic; " ); | |||
} | |||
addBorder( clonedProperties.getBrc(), EMPTY, style ); | |||
if ( characterRun.isCapitalized() ) |