From 3de5c462382b38dd964567e0a3b604699079f907 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Thu, 21 Jul 2011 09:18:38 +0000 Subject: [PATCH] compact fo:inline's in FO output of Word-to-FO converter git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149087 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hwpf/converter/AbstractWordUtils.java | 60 +++++++++++++++++++ .../poi/hwpf/converter/WordToFoConverter.java | 5 ++ .../poi/hwpf/converter/WordToFoUtils.java | 5 ++ .../hwpf/converter/WordToHtmlConverter.java | 29 +-------- .../poi/hwpf/converter/WordToHtmlUtils.java | 6 ++ 5 files changed, 77 insertions(+), 28 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java index 1e55f36beb..d464afc26b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java @@ -31,6 +31,11 @@ import org.apache.poi.hwpf.usermodel.BorderCode; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.IOUtils; +import org.w3c.dom.Attr; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; public class AbstractWordUtils { @@ -39,6 +44,61 @@ public class AbstractWordUtils public static final float TWIPS_PER_INCH = 1440.0f; public static final int TWIPS_PER_PT = 20; + static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) + { + if ( node1.getNodeType() != Node.ELEMENT_NODE + || node2.getNodeType() != Node.ELEMENT_NODE ) + return false; + + Element element1 = (Element) node1; + Element element2 = (Element) node2; + + if ( !equals( requiredTagName, element1.getTagName() ) + || !equals( requiredTagName, element2.getTagName() ) ) + return false; + + NamedNodeMap attributes1 = element1.getAttributes(); + NamedNodeMap attributes2 = element2.getAttributes(); + + if ( attributes1.getLength() != attributes2.getLength() ) + return false; + + for ( int i = 0; i < attributes1.getLength(); i++ ) + { + final Attr attr1 = (Attr) attributes1.item( i ); + final Attr attr2; + if ( isNotEmpty( attr1.getNamespaceURI() ) ) + attr2 = (Attr) attributes2.getNamedItemNS( + attr1.getNamespaceURI(), attr1.getLocalName() ); + else + attr2 = (Attr) attributes2.getNamedItem( attr1.getName() ); + + if ( attr2 == null + || !equals( attr1.getTextContent(), attr2.getTextContent() ) ) + return false; + } + + return true; + } + + static void compactChildNodes( Element parentElement, String childTagName ) + { + NodeList childNodes = parentElement.getChildNodes(); + for ( int i = 0; i < childNodes.getLength() - 1; i++ ) + { + Node child1 = childNodes.item( i ); + Node child2 = childNodes.item( i + 1 ); + if ( !WordToFoUtils.canBeMerged( child1, child2, childTagName ) ) + continue; + + // merge + while ( child2.getChildNodes().getLength() > 0 ) + child1.appendChild( child2.getFirstChild() ); + child2.getParentNode().removeChild( child2 ); + i--; + } + } + static boolean equals( String str1, String str2 ) { return str1 == null ? str2 == null : str1.equals( str2 ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index c9873d7c7d..ba757248a1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -295,6 +295,8 @@ public class WordToFoConverter extends AbstractWordConverter { blocksProperies.pop(); } + + WordToFoUtils.compactInlines( endnote ); this.endnotes.add( endnote ); } @@ -337,6 +339,8 @@ public class WordToFoConverter extends AbstractWordConverter { blocksProperies.pop(); } + + WordToFoUtils.compactInlines( footnoteBlock ); } protected void processHyperlink( HWPFDocumentCore wordDocument, @@ -456,6 +460,7 @@ public class WordToFoConverter extends AbstractWordConverter blocksProperies.pop(); } + WordToFoUtils.compactInlines( block ); return; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java index 66f8734e11..d543bfc3a4 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java @@ -26,6 +26,11 @@ import org.w3c.dom.Element; public class WordToFoUtils extends AbstractWordUtils { + static void compactInlines( Element blockElement ) + { + compactChildNodes( blockElement, "fo:inline" ); + } + public static void setBold( final Element element, final boolean bold ) { element.setAttribute( "font-weight", bold ? "bold" : "normal" ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index 7d3b3f81d9..aa25963ae9 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -45,8 +45,6 @@ import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; import org.w3c.dom.Text; import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH; @@ -526,32 +524,7 @@ public class WordToHtmlConverter extends AbstractWordConverter htmlDocumentFacade.getOrCreateCssClass( pElement.getTagName(), "p", style.toString() ) ); - { - // compact spans - NodeList childNodes = pElement.getChildNodes(); - for ( int i = 0; i < childNodes.getLength() - 1; i++ ) - { - Node child1 = childNodes.item( i ); - Node child2 = childNodes.item( i + 1 ); - if ( child1.getNodeType() != Node.ELEMENT_NODE - || child2.getNodeType() != Node.ELEMENT_NODE - || !WordToHtmlUtils.equals( "span", - ( (Element) child1 ).getTagName() ) - || !WordToHtmlUtils.equals( "span", - ( (Element) child2 ).getTagName() ) - || !WordToHtmlUtils.equals( - ( (Element) child1 ).getAttribute( "class" ), - ( (Element) child2 ).getAttribute( "class" ) ) ) - continue; - - // merge - while ( child2.getChildNodes().getLength() > 0 ) - child1.appendChild( child2.getFirstChild() ); - child2.getParentNode().removeChild( child2 ); - i--; - } - } - + WordToHtmlUtils.compactSpans( pElement ); return; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java index 87dbb982bb..2b1848a3ef 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java @@ -21,6 +21,7 @@ import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.TableCell; import org.apache.poi.hwpf.usermodel.TableRow; +import org.w3c.dom.Element; public class WordToHtmlUtils extends AbstractWordUtils { @@ -223,4 +224,9 @@ public class WordToHtmlUtils extends AbstractWordUtils } } + static void compactSpans( Element pElement ) + { + compactChildNodes( pElement, "span" ); + } + } -- 2.39.5