From: Sergey Vladimirov Date: Fri, 22 Jul 2011 09:42:32 +0000 (+0000) Subject: better processing of word tables in cases different rows have different cell widths X-Git-Tag: REL_3_8_BETA4~101 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1ed12025dfce6ced53541b135a4a6e7c3d41e424;p=poi.git better processing of word tables in cases different rows have different cell widths git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java index d464afc26b..6e974ea6d9 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java @@ -20,6 +20,8 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Set; +import java.util.TreeSet; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; @@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel; import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.usermodel.BorderCode; import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Table; +import org.apache.poi.hwpf.usermodel.TableCell; +import org.apache.poi.hwpf.usermodel.TableRow; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.IOUtils; import org.w3c.dom.Attr; @@ -44,6 +49,42 @@ public class AbstractWordUtils public static final float TWIPS_PER_INCH = 1440.0f; public static final int TWIPS_PER_PT = 20; + /** + * Creates array of all possible cell edges. In HTML (and FO) cells from + * different rows and same column should have same width, otherwise spanning + * shall be used. + * + * @param table + * table to build cell edges array from + * @return array of cell edges (including leftest one) in twips + */ + static int[] buildTableCellEdgesArray( Table table ) + { + Set edges = new TreeSet(); + + for ( int r = 0; r < table.numRows(); r++ ) + { + TableRow tableRow = table.getRow( r ); + for ( int c = 0; c < tableRow.numCells(); c++ ) + { + TableCell tableCell = tableRow.getCell( c ); + + edges.add( Integer.valueOf( tableCell.getLeftEdge() ) ); + edges.add( Integer.valueOf( tableCell.getLeftEdge() + + tableCell.getWidth() ) ); + } + } + + Integer[] sorted = edges.toArray( new Integer[edges.size()] ); + int[] result = new int[sorted.length]; + for ( int i = 0; i < sorted.length; i++ ) + { + result[i] = sorted[i].intValue(); + } + + return result; + } + static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) { if ( node1.getNodeType() != Node.ELEMENT_NODE diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index ba757248a1..ca5b4a89e5 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter Element tableHeader = foDocumentFacade.createTableHeader(); Element tableBody = foDocumentFacade.createTableBody(); + final int[] tableCellEdges = WordToHtmlUtils + .buildTableCellEdgesArray( table ); final int tableRows = table.numRows(); int maxColumns = Integer.MIN_VALUE; @@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter Element tableRowElement = foDocumentFacade.createTableRow(); WordToFoUtils.setTableRowProperties( tableRow, tableRowElement ); + // index of current element in tableCellEdges[] + int currentEdgeIndex = 0; final int rowCells = tableRow.numCells(); for ( int c = 0; c < rowCells; c++ ) { @@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter tableCellElement, r == 0, r == tableRows - 1, c == 0, c == rowCells - 1 ); - if ( tableCell.isFirstMerged() ) + int colSpan = 0; + int cellRightEdge = tableCell.getLeftEdge() + + tableCell.getWidth(); + while ( tableCellEdges[currentEdgeIndex] < cellRightEdge ) { - int count = 0; - for ( int c1 = c; c1 < rowCells; c1++ ) - { - TableCell nextCell = tableRow.getCell( c1 ); - if ( nextCell.isMerged() ) - count++; - if ( !nextCell.isMerged() ) - break; - } - tableCellElement.setAttribute( "number-columns-spanned", "" - + count ); - } - else - { - if ( c == rowCells - 1 && c != maxColumns - 1 ) - { - tableCellElement.setAttribute( - "number-columns-spanned", "" - + ( maxColumns - c ) ); - } + colSpan++; + currentEdgeIndex++; } + if ( colSpan == 0 ) + continue; + + if ( colSpan != 1 ) + tableCellElement.setAttribute( "number-columns-spanned", + String.valueOf( colSpan ) ); + if ( tableCell.isFirstVerticallyMerged() ) { int count = 0; @@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter if ( !nextCell.isVerticallyMerged() ) break; } - tableCellElement.setAttribute( "number-rows-spanned", "" - + count ); + if ( count > 1 ) + tableCellElement.setAttribute( "number-rows-spanned", + String.valueOf( count ) ); } processParagraphes( wordDocument, tableCellElement, tableCell, diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index aa25963ae9..72410abc7f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter Element tableHeader = htmlDocumentFacade.createTableHeader(); Element tableBody = htmlDocumentFacade.createTableBody(); + final int[] tableCellEdges = WordToHtmlUtils + .buildTableCellEdgesArray( table ); final int tableRows = table.numRows(); int maxColumns = Integer.MIN_VALUE; @@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter StringBuilder tableRowStyle = new StringBuilder(); WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle ); + // index of current element in tableCellEdges[] + int currentEdgeIndex = 0; final int rowCells = tableRow.numCells(); for ( int c = 0; c < rowCells; c++ ) { TableCell tableCell = tableRow.getCell( c ); - if ( tableCell.isMerged() && !tableCell.isFirstMerged() ) - continue; - if ( tableCell.isVerticallyMerged() && !tableCell.isFirstVerticallyMerged() ) continue; @@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter r == 0, r == tableRows - 1, c == 0, c == rowCells - 1, tableCellStyle ); - if ( tableCell.isFirstMerged() ) + int colSpan = 0; + int cellRightEdge = tableCell.getLeftEdge() + + tableCell.getWidth(); + while ( tableCellEdges[currentEdgeIndex] < cellRightEdge ) { - int count = 0; - for ( int c1 = c; c1 < rowCells; c1++ ) - { - TableCell nextCell = tableRow.getCell( c1 ); - if ( nextCell.isMerged() ) - count++; - if ( !nextCell.isMerged() ) - break; - } - tableCellElement.setAttribute( "colspan", "" + count ); + colSpan++; + currentEdgeIndex++; } - else + + if ( colSpan == 0 ) + continue; + + if ( colSpan != 1 ) { - if ( c == rowCells - 1 && c != maxColumns - 1 ) - { - tableCellElement.setAttribute( "colspan", "" - + ( maxColumns - c ) ); - } + tableCellElement.setAttribute( "colspan", + String.valueOf( colSpan ) ); } if ( tableCell.isFirstVerticallyMerged() ) { - int count = 0; - for ( int r1 = r; r1 < tableRows; r1++ ) + int count = 1; + for ( int r1 = r + 1; r1 < tableRows; r1++ ) { TableRow nextRow = table.getRow( r1 ); if ( nextRow.numCells() < c ) break; TableCell nextCell = nextRow.getCell( c ); - if ( nextCell.isVerticallyMerged() ) - count++; - if ( !nextCell.isVerticallyMerged() ) + if ( !nextCell.isVerticallyMerged() + || nextCell.isFirstVerticallyMerged() ) break; + count++; } - tableCellElement.setAttribute( "rowspan", "" + count ); + if ( count > 1 ) + tableCellElement.setAttribute( "rowspan", + String.valueOf( count ) ); } processParagraphes( hwpfDocument, tableCellElement, tableCell, diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java b/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java index a488fe3391..c2215a0c03 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java @@ -19,7 +19,7 @@ package org.apache.poi.hwpf; import junit.framework.Test; import junit.framework.TestSuite; - +import org.apache.poi.hwpf.converter.AbstractWordUtilsTest; import org.apache.poi.hwpf.converter.TestWordToFoConverter; import org.apache.poi.hwpf.converter.TestWordToHtmlConverter; import org.apache.poi.hwpf.extractor.TestDifferentRoutes; @@ -72,6 +72,7 @@ public final class AllHWPFTests // org.apache.poi.hwpf.converter // suite.addTestSuite( TestWordToConverterSuite.class ); + suite.addTestSuite( AbstractWordUtilsTest.class ); suite.addTestSuite( TestWordToFoConverter.class ); suite.addTestSuite( TestWordToHtmlConverter.class ); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java new file mode 100644 index 0000000000..4cb37b8de9 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java @@ -0,0 +1,53 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.converter; + +import org.apache.poi.hwpf.usermodel.Range; + +import junit.framework.TestCase; +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.HWPFTestDataSamples; +import org.apache.poi.hwpf.usermodel.Table; + +/** + * Test cases for {@link AbstractWordUtils} + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +public class AbstractWordUtilsTest extends TestCase +{ + /** + * Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)} + */ + public void testBuildTableCellEdgesArray() + { + HWPFDocument document = HWPFTestDataSamples + .openSampleFile( "table-merges.doc" ); + final Range range = document.getRange(); + Table table = range.getTable( range.getParagraph( 0 ) ); + + int[] result = AbstractWordUtils.buildTableCellEdgesArray( table ); + assertEquals( 6, result.length ); + + assertEquals( 0000, result[0] ); + assertEquals( 1062, result[1] ); + assertEquals( 5738, result[2] ); + assertEquals( 6872, result[3] ); + assertEquals( 8148, result[4] ); + assertEquals( 9302, result[5] ); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index 7dbe0d06a0..b6b70fcc0a 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase getHtmlText( "innertable.doc" ); } + public void testTableMerges() throws Exception + { + String result = getHtmlText( "table-merges.doc" ); + + assertContains( result, "" ); + assertContains( result, "" ); + } + public void testO_kurs_doc() throws Exception { getHtmlText( "o_kurs.doc" ); diff --git a/test-data/document/table-merges.doc b/test-data/document/table-merges.doc new file mode 100644 index 0000000000..77822acd16 Binary files /dev/null and b/test-data/document/table-merges.doc differ