]> source.dussan.org Git - poi.git/commitdiff
add processing of hyphen chars in Word-to-HTML and Word-to-FO converters
authorSergey Vladimirov <sergey@apache.org>
Thu, 21 Jul 2011 03:33:29 +0000 (03:33 +0000)
committerSergey Vladimirov <sergey@apache.org>
Thu, 21 Jul 2011 03:33:29 +0000 (03:33 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149020 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java

index e3bc643dfee4cb3a16bee5114d8da73fdb826c00..41533b00829123d38e686a28e90275c604d6f422 100644 (file)
@@ -61,6 +61,10 @@ public abstract class AbstractWordConverter
 
     private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
 
+    private static final char UNICODECHAR_NONBREAKING_HYPHEN = '\u2011';
+
+    private static final char UNICODECHAR_ZERO_WIDTH_SPACE = '\u200b';
+
     private final Set<Bookmark> bookmarkStack = new LinkedHashSet<Bookmark>();
 
     private FontReplacer fontReplacer = new DefaultFontReplacer();
@@ -225,6 +229,16 @@ public abstract class AbstractWordConverter
                         }
                         processLineBreak( block, characterRun );
                     }
+                    else if ( charChar == 30 )
+                    {
+                        // Non-breaking hyphens are stored as ASCII 30
+                        stringBuilder.append( UNICODECHAR_NONBREAKING_HYPHEN );
+                    }
+                    else if ( charChar == 31 )
+                    {
+                        // Non-required hyphens to zero-width space
+                        stringBuilder.append( UNICODECHAR_ZERO_WIDTH_SPACE );
+                    }
                     else
                     {
                         stringBuilder.append( charChar );