]> source.dussan.org Git - poi.git/commitdiff
add font replacer interface and default implementation
authorSergey Vladimirov <sergey@apache.org>
Tue, 12 Jul 2011 14:17:57 +0000 (14:17 +0000)
committerSergey Vladimirov <sergey@apache.org>
Tue, 12 Jul 2011 14:17:57 +0000 (14:17 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145604 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java
src/scratchpad/src/org/apache/poi/hwpf/converter/DefaultFontReplacer.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/FontReplacer.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java

index 9cf29f1937b774fbf013bd6970f919fa6c5e5eb4..2f740f5131118f1ef978e419f1825aed2e95b484 100644 (file)
@@ -19,6 +19,8 @@ package org.apache.poi.hwpf.converter;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
@@ -50,8 +52,25 @@ public abstract class AbstractWordConverter
     private static final POILogger logger = POILogFactory
             .getLogger( AbstractWordConverter.class );
 
+    private FontReplacer fontReplacer = new DefaultFontReplacer();
+
+    protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
+    {
+        Triplet original = new Triplet();
+        original.bold = characterRun.isBold();
+        original.italic = characterRun.isItalic();
+        original.fontName = characterRun.getFontName();
+        Triplet updated = getFontReplacer().update( original );
+        return updated;
+    }
+
     public abstract Document getDocument();
 
+    public FontReplacer getFontReplacer()
+    {
+        return fontReplacer;
+    }
+
     protected abstract void outputCharacters( Element block,
             CharacterRun characterRun, String text );
 
@@ -144,25 +163,6 @@ public abstract class AbstractWordConverter
         return haveAnyText;
     }
 
-    public void processDocument( HWPFDocumentCore wordDocument )
-    {
-        final SummaryInformation summaryInformation = wordDocument
-                .getSummaryInformation();
-        if ( summaryInformation != null )
-        {
-            processDocumentInformation( summaryInformation );
-        }
-
-        final Range range = wordDocument.getRange();
-        for ( int s = 0; s < range.numSections(); s++ )
-        {
-            processSection( wordDocument, range.getSection( s ), s );
-        }
-    }
-
-    protected abstract void processDocumentInformation(
-            SummaryInformation summaryInformation );
-
     protected void processDeadField( HWPFDocumentCore wordDocument,
             Element currentBlock, Range range, int currentTableLevel,
             int beginMark, int separatorMark, int endMark )
@@ -195,6 +195,97 @@ public abstract class AbstractWordConverter
         return;
     }
 
+    public void processDocument( HWPFDocumentCore wordDocument )
+    {
+        final SummaryInformation summaryInformation = wordDocument
+                .getSummaryInformation();
+        if ( summaryInformation != null )
+        {
+            processDocumentInformation( summaryInformation );
+        }
+
+        final Range range = wordDocument.getRange();
+        for ( int s = 0; s < range.numSections(); s++ )
+        {
+            processSection( wordDocument, range.getSection( s ), s );
+        }
+    }
+
+    protected abstract void processDocumentInformation(
+            SummaryInformation summaryInformation );
+
+    protected void processField( HWPFDocument hwpfDocument, Range parentRange,
+            int currentTableLevel, Field field, Element currentBlock )
+    {
+        switch ( field.getType() )
+        {
+        case 37: // page reference
+        {
+            final Range firstSubrange = field.firstSubrange( parentRange );
+            if ( firstSubrange != null )
+            {
+                String formula = firstSubrange.text();
+                Pattern pagerefPattern = Pattern
+                        .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
+                Matcher matcher = pagerefPattern.matcher( formula );
+                if ( matcher.find() )
+                {
+                    String pageref = matcher.group( 1 );
+                    processPageref( hwpfDocument, currentBlock,
+                            field.secondSubrange( parentRange ),
+                            currentTableLevel, pageref );
+                    return;
+                }
+            }
+            break;
+        }
+        case 88: // hyperlink
+        {
+            final Range firstSubrange = field.firstSubrange( parentRange );
+            if ( firstSubrange != null )
+            {
+                String formula = firstSubrange.text();
+                Pattern hyperlinkPattern = Pattern
+                        .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
+                Matcher matcher = hyperlinkPattern.matcher( formula );
+                if ( matcher.find() )
+                {
+                    String hyperlink = matcher.group( 1 );
+                    processHyperlink( hwpfDocument, currentBlock,
+                            field.secondSubrange( parentRange ),
+                            currentTableLevel, hyperlink );
+                    return;
+                }
+            }
+            break;
+        }
+        }
+
+        logger.log( POILogger.WARN, parentRange + " contains " + field
+                + " with unsupported type or format" );
+        processCharacters( hwpfDocument, currentTableLevel,
+                field.secondSubrange( parentRange ), currentBlock );
+    }
+
+    protected Field processField( HWPFDocumentCore wordDocument,
+            Range charactersRange, int currentTableLevel, int startOffset,
+            Element currentBlock )
+    {
+        if ( !( wordDocument instanceof HWPFDocument ) )
+            return null;
+
+        HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
+        Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
+                FieldsTables.PLCFFLDMOM, startOffset );
+        if ( field == null )
+            return null;
+
+        processField( hwpfDocument, charactersRange, currentTableLevel, field,
+                currentBlock );
+
+        return field;
+    }
+
     protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
             Element currentBlock, Range textRange, int currentTableLevel,
             String hyperlink );
@@ -292,76 +383,9 @@ public abstract class AbstractWordConverter
     protected abstract void processTable( HWPFDocumentCore wordDocument,
             Element flow, Table table );
 
-    protected Field processField( HWPFDocumentCore wordDocument,
-            Range charactersRange, int currentTableLevel, int startOffset,
-            Element currentBlock )
+    public void setFontReplacer( FontReplacer fontReplacer )
     {
-        if ( !( wordDocument instanceof HWPFDocument ) )
-            return null;
-
-        HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
-        Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
-                FieldsTables.PLCFFLDMOM, startOffset );
-        if ( field == null )
-            return null;
-
-        processField( hwpfDocument, charactersRange, currentTableLevel, field,
-                currentBlock );
-
-        return field;
-    }
-
-    protected void processField( HWPFDocument hwpfDocument, Range parentRange,
-            int currentTableLevel, Field field, Element currentBlock )
-    {
-        switch ( field.getType() )
-        {
-        case 37: // page reference
-        {
-            final Range firstSubrange = field.firstSubrange( parentRange );
-            if ( firstSubrange != null )
-            {
-                String formula = firstSubrange.text();
-                Pattern pagerefPattern = Pattern
-                        .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
-                Matcher matcher = pagerefPattern.matcher( formula );
-                if ( matcher.find() )
-                {
-                    String pageref = matcher.group( 1 );
-                    processPageref( hwpfDocument, currentBlock,
-                            field.secondSubrange( parentRange ),
-                            currentTableLevel, pageref );
-                    return;
-                }
-            }
-            break;
-        }
-        case 88: // hyperlink
-        {
-            final Range firstSubrange = field.firstSubrange( parentRange );
-            if ( firstSubrange != null )
-            {
-                String formula = firstSubrange.text();
-                Pattern hyperlinkPattern = Pattern
-                        .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
-                Matcher matcher = hyperlinkPattern.matcher( formula );
-                if ( matcher.find() )
-                {
-                    String hyperlink = matcher.group( 1 );
-                    processHyperlink( hwpfDocument, currentBlock,
-                            field.secondSubrange( parentRange ),
-                            currentTableLevel, hyperlink );
-                    return;
-                }
-            }
-            break;
-        }
-        }
-
-        logger.log( POILogger.WARN, parentRange + " contains " + field
-                + " with unsupported type or format" );
-        processCharacters( hwpfDocument, currentTableLevel,
-                field.secondSubrange( parentRange ), currentBlock );
+        this.fontReplacer = fontReplacer;
     }
 
     protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
index b74ddc043426ff0337924ddc21c106ec7de7bbe0..79c5f7e645e41f4513a0fdc881ce17bc7609507f 100644 (file)
@@ -98,7 +98,7 @@ public class AbstractWordUtils
             return "solid";
         }
     }
-
+    
     public static String getBorderWidth( BorderCode borderCode )
     {
         int lineWidth = borderCode.getLineWidth();
@@ -320,4 +320,18 @@ public class AbstractWordUtils
         }
     }
 
+    static String substringBeforeLast( String str, String separator )
+    {
+        if ( isEmpty( str ) || isEmpty( separator ) )
+        {
+            return str;
+        }
+        int pos = str.lastIndexOf( separator );
+        if ( pos == -1 )
+        {
+            return str;
+        }
+        return str.substring( 0, pos );
+    }
+
 }
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/DefaultFontReplacer.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/DefaultFontReplacer.java
new file mode 100644 (file)
index 0000000..b99816e
--- /dev/null
@@ -0,0 +1,71 @@
+package org.apache.poi.hwpf.converter;
+
+public class DefaultFontReplacer implements FontReplacer
+{
+    public Triplet update( Triplet original )
+    {
+        if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
+        {
+            String fontName = original.fontName;
+
+            if ( fontName.endsWith( " Regular" ) )
+                fontName = AbstractWordUtils.substringBeforeLast( fontName,
+                        " Regular" );
+
+            if ( fontName
+                    .endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) )
+                fontName = AbstractWordUtils
+                        .substringBeforeLast( fontName,
+                                " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" )
+                        + " Bold";
+
+            if ( fontName
+                    .endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) )
+                fontName = AbstractWordUtils
+                        .substringBeforeLast(
+                                fontName,
+                                " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" )
+                        + " Bold Italic";
+
+            if ( fontName.endsWith( " \u041A\u0443\u0440\u0441\u0438\u0432" ) )
+                fontName = AbstractWordUtils.substringBeforeLast( fontName,
+                        " \u041A\u0443\u0440\u0441\u0438\u0432" ) + " Italic";
+
+            original.fontName = fontName;
+        }
+
+        if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
+        {
+            if ( "Times Regular".equals( original.fontName )
+                    || "Times-Regular".equals( original.fontName ) )
+            {
+                original.fontName = "Times";
+                original.bold = false;
+                original.italic = false;
+            }
+            if ( "Times Bold".equals( original.fontName )
+                    || "Times-Bold".equals( original.fontName ) )
+            {
+                original.fontName = "Times";
+                original.bold = true;
+                original.italic = false;
+            }
+            if ( "Times Italic".equals( original.fontName )
+                    || "Times-Italic".equals( original.fontName ) )
+            {
+                original.fontName = "Times";
+                original.bold = false;
+                original.italic = true;
+            }
+            if ( "Times Bold Italic".equals( original.fontName )
+                    || "Times-BoldItalic".equals( original.fontName ) )
+            {
+                original.fontName = "Times";
+                original.bold = true;
+                original.italic = true;
+            }
+        }
+
+        return original;
+    }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/FontReplacer.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/FontReplacer.java
new file mode 100644 (file)
index 0000000..51ec26d
--- /dev/null
@@ -0,0 +1,13 @@
+package org.apache.poi.hwpf.converter;
+
+public interface FontReplacer
+{
+    public class Triplet
+    {
+        public String fontName;
+        public boolean bold;
+        public boolean italic;
+    }
+
+    public Triplet update( Triplet original );
+}
index 3be52d8d0a0924e37a213c318789a6b92993a76c..e1f47d7385ec4d97f76d6170ce2bdaf283fe0e7d 100644 (file)
@@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
+import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
@@ -206,16 +208,19 @@ public class WordToFoConverter extends AbstractWordConverter
     {
         BlockProperies blockProperies = this.blocksProperies.peek();
         Element inline = foDocumentFacade.createInline();
-        if ( characterRun.isBold() != blockProperies.pBold )
+
+        Triplet triplet = getCharacterRunTriplet( characterRun );
+
+        if ( triplet.bold != blockProperies.pBold )
         {
-            WordToFoUtils.setBold( inline, characterRun.isBold() );
+            WordToFoUtils.setBold( inline, triplet.bold );
         }
-        if ( characterRun.isItalic() != blockProperies.pItalic )
+        if ( triplet.italic != blockProperies.pItalic )
         {
-            WordToFoUtils.setItalic( inline, characterRun.isItalic() );
+            WordToFoUtils.setItalic( inline, triplet.italic );
         }
-        if ( characterRun.getFontName() != null
-                && !AbstractWordUtils.equals( characterRun.getFontName(),
+        if ( WordToFoUtils.isNotEmpty( triplet.fontName )
+                && !WordToFoUtils.equals( triplet.fontName,
                         blockProperies.pFontName ) )
         {
             WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
@@ -318,24 +323,17 @@ public class WordToFoConverter extends AbstractWordConverter
         }
 
         {
-            final String pFontName;
-            final int pFontSize;
-            final boolean pBold;
-            final boolean pItalic;
-            {
-                CharacterRun characterRun = paragraph.getCharacterRun( 0 );
-                pFontSize = characterRun.getFontSize() / 2;
-                pFontName = characterRun.getFontName();
-                pBold = characterRun.isBold();
-                pItalic = characterRun.isItalic();
-            }
-            WordToFoUtils.setFontFamily( block, pFontName );
+            CharacterRun characterRun = paragraph.getCharacterRun( 0 );
+            int pFontSize = characterRun.getFontSize() / 2;
+            Triplet triplet = getCharacterRunTriplet( characterRun );
+
+            WordToFoUtils.setFontFamily( block, triplet.fontName );
             WordToFoUtils.setFontSize( block, pFontSize );
-            WordToFoUtils.setBold( block, pBold );
-            WordToFoUtils.setItalic( block, pItalic );
+            WordToFoUtils.setBold( block, triplet.bold );
+            WordToFoUtils.setItalic( block, triplet.italic );
 
-            blocksProperies.push( new BlockProperies( pFontName, pFontSize,
-                    pBold, pItalic ) );
+            blocksProperies.push( new BlockProperies( triplet.fontName,
+                    pFontSize, triplet.bold, triplet.italic ) );
         }
         try
         {
index 2355413c8297e8e6ab9a728c174f5565bf19ae75..4c8e03f2914247edffe8eb3822795a015d4a37ff 100644 (file)
@@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
+import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
+
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
@@ -189,16 +191,26 @@ public class WordToHtmlConverter extends AbstractWordConverter
 
         StringBuilder style = new StringBuilder();
         BlockProperies blockProperies = this.blocksProperies.peek();
-        if ( characterRun.getFontName() != null
-                && !WordToHtmlUtils.equals( characterRun.getFontName(),
+        Triplet triplet = getCharacterRunTriplet( characterRun );
+
+        if ( WordToHtmlUtils.isNotEmpty( triplet.fontName )
+                && !WordToHtmlUtils.equals( triplet.fontName,
                         blockProperies.pFontName ) )
         {
-            style.append( "font-family: " + characterRun.getFontName() + "; " );
+            style.append( "font-family: " + triplet.fontName + "; " );
         }
         if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
         {
             style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
         }
+        if ( triplet.bold )
+        {
+            style.append( "font-weight: bold; " );
+        }
+        if ( triplet.italic )
+        {
+            style.append( "font-style: italic; " );
+        }
 
         WordToHtmlUtils.addCharactersProperties( characterRun, style );
         if ( style.length() != 0 )
@@ -299,8 +311,9 @@ public class WordToHtmlConverter extends AbstractWordConverter
             final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
             if ( characterRun != null )
             {
+                Triplet triplet = getCharacterRunTriplet(characterRun);
                 pFontSize = characterRun.getFontSize() / 2;
-                pFontName = characterRun.getFontName();
+                pFontName = triplet.fontName;
                 WordToHtmlUtils.addFontFamily( pFontName, style );
                 WordToHtmlUtils.addFontSize( pFontSize, style );
             }
index 80e13a453a845f40d71ebf48ef03fcf088d5290b..f257ed3c0d2c3cc9be1ba430d237cdc94aa55cde 100644 (file)
@@ -63,15 +63,6 @@ public class WordToHtmlUtils extends AbstractWordUtils
         final CharacterProperties clonedProperties = characterRun
                 .cloneProperties();
 
-        if ( characterRun.isBold() )
-        {
-            style.append( "font-weight: bold; " );
-        }
-        if ( characterRun.isItalic() )
-        {
-            style.append( "font-style: italic; " );
-        }
-
         addBorder( clonedProperties.getBrc(), EMPTY, style );
 
         if ( characterRun.isCapitalized() )