import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
public class AbstractWordUtils
{
public static final float TWIPS_PER_INCH = 1440.0f;
public static final int TWIPS_PER_PT = 20;
+ static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
+ {
+ if ( node1.getNodeType() != Node.ELEMENT_NODE
+ || node2.getNodeType() != Node.ELEMENT_NODE )
+ return false;
+
+ Element element1 = (Element) node1;
+ Element element2 = (Element) node2;
+
+ if ( !equals( requiredTagName, element1.getTagName() )
+ || !equals( requiredTagName, element2.getTagName() ) )
+ return false;
+
+ NamedNodeMap attributes1 = element1.getAttributes();
+ NamedNodeMap attributes2 = element2.getAttributes();
+
+ if ( attributes1.getLength() != attributes2.getLength() )
+ return false;
+
+ for ( int i = 0; i < attributes1.getLength(); i++ )
+ {
+ final Attr attr1 = (Attr) attributes1.item( i );
+ final Attr attr2;
+ if ( isNotEmpty( attr1.getNamespaceURI() ) )
+ attr2 = (Attr) attributes2.getNamedItemNS(
+ attr1.getNamespaceURI(), attr1.getLocalName() );
+ else
+ attr2 = (Attr) attributes2.getNamedItem( attr1.getName() );
+
+ if ( attr2 == null
+ || !equals( attr1.getTextContent(), attr2.getTextContent() ) )
+ return false;
+ }
+
+ return true;
+ }
+
+ static void compactChildNodes( Element parentElement, String childTagName )
+ {
+ NodeList childNodes = parentElement.getChildNodes();
+ for ( int i = 0; i < childNodes.getLength() - 1; i++ )
+ {
+ Node child1 = childNodes.item( i );
+ Node child2 = childNodes.item( i + 1 );
+ if ( !WordToFoUtils.canBeMerged( child1, child2, childTagName ) )
+ continue;
+
+ // merge
+ while ( child2.getChildNodes().getLength() > 0 )
+ child1.appendChild( child2.getFirstChild() );
+ child2.getParentNode().removeChild( child2 );
+ i--;
+ }
+ }
+
static boolean equals( String str1, String str2 )
{
return str1 == null ? str2 == null : str1.equals( str2 );
import org.apache.poi.util.POILogger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;
htmlDocumentFacade.getOrCreateCssClass(
pElement.getTagName(), "p", style.toString() ) );
- {
- // compact spans
- NodeList childNodes = pElement.getChildNodes();
- for ( int i = 0; i < childNodes.getLength() - 1; i++ )
- {
- Node child1 = childNodes.item( i );
- Node child2 = childNodes.item( i + 1 );
- if ( child1.getNodeType() != Node.ELEMENT_NODE
- || child2.getNodeType() != Node.ELEMENT_NODE
- || !WordToHtmlUtils.equals( "span",
- ( (Element) child1 ).getTagName() )
- || !WordToHtmlUtils.equals( "span",
- ( (Element) child2 ).getTagName() )
- || !WordToHtmlUtils.equals(
- ( (Element) child1 ).getAttribute( "class" ),
- ( (Element) child2 ).getAttribute( "class" ) ) )
- continue;
-
- // merge
- while ( child2.getChildNodes().getLength() > 0 )
- child1.appendChild( child2.getFirstChild() );
- child2.getParentNode().removeChild( child2 );
- i--;
- }
- }
-
+ WordToHtmlUtils.compactSpans( pElement );
return;
}