]> source.dussan.org Git - poi.git/commitdiff
better processing of word tables in cases different rows have different cell widths
authorSergey Vladimirov <sergey@apache.org>
Fri, 22 Jul 2011 09:42:32 +0000 (09:42 +0000)
committerSergey Vladimirov <sergey@apache.org>
Fri, 22 Jul 2011 09:42:32 +0000 (09:42 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java
src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java
src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java
src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java
test-data/document/table-merges.doc [new file with mode: 0644]

index d464afc26bba3a89900e843f9fe6b5b4fb10d98a..6e974ea6d983da0f6e90f08d51896399a4ee27db 100644 (file)
@@ -20,6 +20,8 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Set;
+import java.util.TreeSet;
 
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFDocumentCore;
@@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel;
 import org.apache.poi.hwpf.model.ListTables;
 import org.apache.poi.hwpf.usermodel.BorderCode;
 import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableRow;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.util.IOUtils;
 import org.w3c.dom.Attr;
@@ -44,6 +49,42 @@ public class AbstractWordUtils
     public static final float TWIPS_PER_INCH = 1440.0f;
     public static final int TWIPS_PER_PT = 20;
 
+    /**
+     * Creates array of all possible cell edges. In HTML (and FO) cells from
+     * different rows and same column should have same width, otherwise spanning
+     * shall be used.
+     * 
+     * @param table
+     *            table to build cell edges array from
+     * @return array of cell edges (including leftest one) in twips
+     */
+    static int[] buildTableCellEdgesArray( Table table )
+    {
+        Set<Integer> edges = new TreeSet<Integer>();
+
+        for ( int r = 0; r < table.numRows(); r++ )
+        {
+            TableRow tableRow = table.getRow( r );
+            for ( int c = 0; c < tableRow.numCells(); c++ )
+            {
+                TableCell tableCell = tableRow.getCell( c );
+
+                edges.add( Integer.valueOf( tableCell.getLeftEdge() ) );
+                edges.add( Integer.valueOf( tableCell.getLeftEdge()
+                        + tableCell.getWidth() ) );
+            }
+        }
+
+        Integer[] sorted = edges.toArray( new Integer[edges.size()] );
+        int[] result = new int[sorted.length];
+        for ( int i = 0; i < sorted.length; i++ )
+        {
+            result[i] = sorted[i].intValue();
+        }
+
+        return result;
+    }
+
     static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
     {
         if ( node1.getNodeType() != Node.ELEMENT_NODE
index ba757248a10a173ee20987b6e3c1f11dd6c21d94..ca5b4a89e549a49a7bc00ba2adac64c80e45c148 100644 (file)
@@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter
         Element tableHeader = foDocumentFacade.createTableHeader();
         Element tableBody = foDocumentFacade.createTableBody();
 
+        final int[] tableCellEdges = WordToHtmlUtils
+                .buildTableCellEdgesArray( table );
         final int tableRows = table.numRows();
 
         int maxColumns = Integer.MIN_VALUE;
@@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter
             Element tableRowElement = foDocumentFacade.createTableRow();
             WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
 
+            // index of current element in tableCellEdges[]
+            int currentEdgeIndex = 0;
             final int rowCells = tableRow.numCells();
             for ( int c = 0; c < rowCells; c++ )
             {
@@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter
                         tableCellElement, r == 0, r == tableRows - 1, c == 0,
                         c == rowCells - 1 );
 
-                if ( tableCell.isFirstMerged() )
+                int colSpan = 0;
+                int cellRightEdge = tableCell.getLeftEdge()
+                        + tableCell.getWidth();
+                while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
                 {
-                    int count = 0;
-                    for ( int c1 = c; c1 < rowCells; c1++ )
-                    {
-                        TableCell nextCell = tableRow.getCell( c1 );
-                        if ( nextCell.isMerged() )
-                            count++;
-                        if ( !nextCell.isMerged() )
-                            break;
-                    }
-                    tableCellElement.setAttribute( "number-columns-spanned", ""
-                            + count );
-                }
-                else
-                {
-                    if ( c == rowCells - 1 && c != maxColumns - 1 )
-                    {
-                        tableCellElement.setAttribute(
-                                "number-columns-spanned", ""
-                                        + ( maxColumns - c ) );
-                    }
+                    colSpan++;
+                    currentEdgeIndex++;
                 }
 
+                if ( colSpan == 0 )
+                    continue;
+
+                if ( colSpan != 1 )
+                    tableCellElement.setAttribute( "number-columns-spanned",
+                            String.valueOf( colSpan ) );
+
                 if ( tableCell.isFirstVerticallyMerged() )
                 {
                     int count = 0;
@@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter
                         if ( !nextCell.isVerticallyMerged() )
                             break;
                     }
-                    tableCellElement.setAttribute( "number-rows-spanned", ""
-                            + count );
+                    if ( count > 1 )
+                        tableCellElement.setAttribute( "number-rows-spanned",
+                                String.valueOf( count ) );
                 }
 
                 processParagraphes( wordDocument, tableCellElement, tableCell,
index aa25963ae93ad16ee2ab1155296ace70a6893d97..72410abc7f10ee9a30feecd90bd002acaee1b031 100644 (file)
@@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
         Element tableHeader = htmlDocumentFacade.createTableHeader();
         Element tableBody = htmlDocumentFacade.createTableBody();
 
+        final int[] tableCellEdges = WordToHtmlUtils
+                .buildTableCellEdgesArray( table );
         final int tableRows = table.numRows();
 
         int maxColumns = Integer.MIN_VALUE;
@@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
             StringBuilder tableRowStyle = new StringBuilder();
             WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
 
+            // index of current element in tableCellEdges[]
+            int currentEdgeIndex = 0;
             final int rowCells = tableRow.numCells();
             for ( int c = 0; c < rowCells; c++ )
             {
                 TableCell tableCell = tableRow.getCell( c );
 
-                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
-                    continue;
-
                 if ( tableCell.isVerticallyMerged()
                         && !tableCell.isFirstVerticallyMerged() )
                     continue;
@@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter
                         r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
                         tableCellStyle );
 
-                if ( tableCell.isFirstMerged() )
+                int colSpan = 0;
+                int cellRightEdge = tableCell.getLeftEdge()
+                        + tableCell.getWidth();
+                while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
                 {
-                    int count = 0;
-                    for ( int c1 = c; c1 < rowCells; c1++ )
-                    {
-                        TableCell nextCell = tableRow.getCell( c1 );
-                        if ( nextCell.isMerged() )
-                            count++;
-                        if ( !nextCell.isMerged() )
-                            break;
-                    }
-                    tableCellElement.setAttribute( "colspan", "" + count );
+                    colSpan++;
+                    currentEdgeIndex++;
                 }
-                else
+
+                if ( colSpan == 0 )
+                    continue;
+
+                if ( colSpan != 1 )
                 {
-                    if ( c == rowCells - 1 && c != maxColumns - 1 )
-                    {
-                        tableCellElement.setAttribute( "colspan", ""
-                                + ( maxColumns - c ) );
-                    }
+                    tableCellElement.setAttribute( "colspan",
+                            String.valueOf( colSpan ) );
                 }
 
                 if ( tableCell.isFirstVerticallyMerged() )
                 {
-                    int count = 0;
-                    for ( int r1 = r; r1 < tableRows; r1++ )
+                    int count = 1;
+                    for ( int r1 = r + 1; r1 < tableRows; r1++ )
                     {
                         TableRow nextRow = table.getRow( r1 );
                         if ( nextRow.numCells() < c )
                             break;
                         TableCell nextCell = nextRow.getCell( c );
-                        if ( nextCell.isVerticallyMerged() )
-                            count++;
-                        if ( !nextCell.isVerticallyMerged() )
+                        if ( !nextCell.isVerticallyMerged()
+                                || nextCell.isFirstVerticallyMerged() )
                             break;
+                        count++;
                     }
-                    tableCellElement.setAttribute( "rowspan", "" + count );
+                    if ( count > 1 )
+                        tableCellElement.setAttribute( "rowspan",
+                                String.valueOf( count ) );
                 }
 
                 processParagraphes( hwpfDocument, tableCellElement, tableCell,
index a488fe3391714ed7902d99cd07202b833ca7ccbb..c2215a0c03e05c687ec9667f1dd3e31594c271a6 100644 (file)
@@ -19,7 +19,7 @@ package org.apache.poi.hwpf;
 
 import junit.framework.Test;
 import junit.framework.TestSuite;
-
+import org.apache.poi.hwpf.converter.AbstractWordUtilsTest;
 import org.apache.poi.hwpf.converter.TestWordToFoConverter;
 import org.apache.poi.hwpf.converter.TestWordToHtmlConverter;
 import org.apache.poi.hwpf.extractor.TestDifferentRoutes;
@@ -72,6 +72,7 @@ public final class AllHWPFTests
 
         // org.apache.poi.hwpf.converter
         // suite.addTestSuite( TestWordToConverterSuite.class );
+        suite.addTestSuite( AbstractWordUtilsTest.class );
         suite.addTestSuite( TestWordToFoConverter.class );
         suite.addTestSuite( TestWordToHtmlConverter.class );
 
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java
new file mode 100644 (file)
index 0000000..4cb37b8
--- /dev/null
@@ -0,0 +1,53 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.converter;
+
+import org.apache.poi.hwpf.usermodel.Range;
+
+import junit.framework.TestCase;
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFTestDataSamples;
+import org.apache.poi.hwpf.usermodel.Table;
+
+/**
+ * Test cases for {@link AbstractWordUtils}
+ * 
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class AbstractWordUtilsTest extends TestCase
+{
+    /**
+     * Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)}
+     */
+    public void testBuildTableCellEdgesArray()
+    {
+        HWPFDocument document = HWPFTestDataSamples
+                .openSampleFile( "table-merges.doc" );
+        final Range range = document.getRange();
+        Table table = range.getTable( range.getParagraph( 0 ) );
+
+        int[] result = AbstractWordUtils.buildTableCellEdgesArray( table );
+        assertEquals( 6, result.length );
+
+        assertEquals( 0000, result[0] );
+        assertEquals( 1062, result[1] );
+        assertEquals( 5738, result[2] );
+        assertEquals( 6872, result[3] );
+        assertEquals( 8148, result[4] );
+        assertEquals( 9302, result[5] );
+    }
+}
index 7dbe0d06a00f0bf465812f5425be2822fd576000..b6b70fcc0acad5838b25cd71937aaf1a941f23ab 100644 (file)
@@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase
         getHtmlText( "innertable.doc" );
     }
 
+    public void testTableMerges() throws Exception
+    {
+        String result = getHtmlText( "table-merges.doc" );
+        
+        assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
+        assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
+    }
+
     public void testO_kurs_doc() throws Exception
     {
         getHtmlText( "o_kurs.doc" );
diff --git a/test-data/document/table-merges.doc b/test-data/document/table-merges.doc
new file mode 100644 (file)
index 0000000..77822ac
Binary files /dev/null and b/test-data/document/table-merges.doc differ