From c5d30dcfea66f09ef04c7c12d05daa4f258535b1 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Tue, 5 Jul 2011 01:44:29 +0000 Subject: [PATCH] simplify table converting; fix bug that prevents table from AIOOB-Tap.doc to be converted git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1142872 13f79535-47bb-0310-9956-ffa450edef68 --- .../hwpf/converter/AbstractWordConverter.java | 27 ++---- .../poi/hwpf/converter/WordToFoConverter.java | 86 +------------------ .../hwpf/converter/WordToHtmlConverter.java | 8 +- .../converter/TestWordToHtmlConverter.java | 7 ++ 4 files changed, 20 insertions(+), 108 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index e047d3f564..31641cffce 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -16,9 +16,7 @@ ==================================================================== */ package org.apache.poi.hwpf.converter; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -32,7 +30,6 @@ import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Section; import org.apache.poi.hwpf.usermodel.Table; -import org.apache.poi.hwpf.usermodel.TableIterator; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.w3c.dom.Document; @@ -233,14 +230,6 @@ public abstract class AbstractWordConverter protected void processSectionParagraphes( HWPFDocumentCore wordDocument, Element flow, Range range, int currentTableLevel ) { - final Map allTables = new HashMap(); - for ( TableIterator tableIterator = AbstractWordUtils.newTableIterator( - range, currentTableLevel + 1 ); tableIterator.hasNext(); ) - { - Table next = tableIterator.next(); - allTables.put( Integer.valueOf( next.getStartOffset() ), next ); - } - final ListTables listTables = wordDocument.getListTables(); int currentListInfo = 0; @@ -249,18 +238,14 @@ public abstract class AbstractWordConverter { Paragraph paragraph = range.getParagraph( p ); - if ( allTables.containsKey( Integer.valueOf( paragraph - .getStartOffset() ) ) ) - { - Table table = allTables.get( Integer.valueOf( paragraph - .getStartOffset() ) ); - processTable( wordDocument, flow, table, currentTableLevel + 1 ); - continue; - } - if ( paragraph.isInTable() && paragraph.getTableLevel() != currentTableLevel ) { + Table table = range.getTable( paragraph ); + processTable( wordDocument, flow, table ); + + p += table.numParagraphs(); + p--; continue; } @@ -311,7 +296,7 @@ public abstract class AbstractWordConverter } protected abstract void processTable( HWPFDocumentCore wordDocument, - Element flow, Table table, int newTableLevel ); + Element flow, Table table ); protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph, int currentTableLevel, List characterRuns, diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index ba3a1e78c1..baf2d4c6ff 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -18,9 +18,7 @@ package org.apache.poi.hwpf.converter; import java.io.File; import java.io.FileWriter; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Stack; import javax.xml.parsers.DocumentBuilderFactory; @@ -32,17 +30,13 @@ import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; -import org.apache.poi.hwpf.model.ListFormatOverride; -import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Picture; -import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Section; import org.apache.poi.hwpf.usermodel.SectionProperties; import org.apache.poi.hwpf.usermodel.Table; import org.apache.poi.hwpf.usermodel.TableCell; -import org.apache.poi.hwpf.usermodel.TableIterator; import org.apache.poi.hwpf.usermodel.TableRow; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; @@ -374,85 +368,11 @@ public class WordToFoConverter extends AbstractWordConverter Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence, "xsl-region-body" ); - processSectionParagraphes( wordDocument, flow, section, 0 ); - } - - protected void processSectionParagraphes( HWPFDocument wordDocument, - Element flow, Range range, int currentTableLevel ) - { - final Map allTables = new HashMap(); - for ( TableIterator tableIterator = WordToFoUtils.newTableIterator( - range, currentTableLevel + 1 ); tableIterator.hasNext(); ) - { - Table next = tableIterator.next(); - allTables.put( Integer.valueOf( next.getStartOffset() ), next ); - } - - final ListTables listTables = wordDocument.getListTables(); - int currentListInfo = 0; - - final int paragraphs = range.numParagraphs(); - for ( int p = 0; p < paragraphs; p++ ) - { - Paragraph paragraph = range.getParagraph( p ); - - if ( allTables.containsKey( Integer.valueOf( paragraph - .getStartOffset() ) ) ) - { - Table table = allTables.get( Integer.valueOf( paragraph - .getStartOffset() ) ); - processTable( wordDocument, flow, table, currentTableLevel + 1 ); - continue; - } - - if ( paragraph.isInTable() - && paragraph.getTableLevel() != currentTableLevel ) - { - continue; - } - - if ( paragraph.getIlfo() != currentListInfo ) - { - currentListInfo = paragraph.getIlfo(); - } - - if ( currentListInfo != 0 ) - { - if ( listTables != null ) - { - final ListFormatOverride listFormatOverride = listTables - .getOverride( paragraph.getIlfo() ); - - String label = WordToFoUtils.getBulletText( listTables, - paragraph, listFormatOverride.getLsid() ); - - processParagraph( wordDocument, flow, currentTableLevel, - paragraph, label ); - } - else - { - logger.log( POILogger.WARN, - "Paragraph #" + paragraph.getStartOffset() + "-" - + paragraph.getEndOffset() - + " has reference to list structure #" - + currentListInfo - + ", but listTables not defined in file" ); - - processParagraph( wordDocument, flow, currentTableLevel, - paragraph, WordToFoUtils.EMPTY ); - } - } - else - { - processParagraph( wordDocument, flow, currentTableLevel, - paragraph, WordToFoUtils.EMPTY ); - } - } - + processSectionParagraphes( wordDocument, flow, section, Integer.MIN_VALUE ); } protected void processTable( HWPFDocumentCore wordDocument, Element flow, - Table table, int thisTableLevel ) + Table table ) { Element tableHeader = foDocumentFacade.createTableHeader(); Element tableBody = foDocumentFacade.createTableBody(); @@ -532,7 +452,7 @@ public class WordToFoConverter extends AbstractWordConverter } processSectionParagraphes( wordDocument, tableCellElement, - tableCell, thisTableLevel ); + tableCell, table.getTableLevel() ); if ( !tableCellElement.hasChildNodes() ) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index c931acb9c7..bbffc9d7d2 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -326,7 +326,7 @@ public class WordToHtmlConverter extends AbstractWordConverter div.setAttribute( "style", getSectionStyle( section ) ); htmlDocumentFacade.body.appendChild( div ); - processSectionParagraphes( wordDocument, div, section, 0 ); + processSectionParagraphes( wordDocument, div, section, Integer.MIN_VALUE ); } @Override @@ -337,11 +337,11 @@ public class WordToHtmlConverter extends AbstractWordConverter getSectionStyle( section ) ); processSectionParagraphes( wordDocument, htmlDocumentFacade.body, - section, 0 ); + section, Integer.MIN_VALUE ); } protected void processTable( HWPFDocumentCore hwpfDocument, Element flow, - Table table, int thisTableLevel ) + Table table ) { Element tableHeader = htmlDocumentFacade.createTableHeader(); Element tableBody = htmlDocumentFacade.createTableBody(); @@ -429,7 +429,7 @@ public class WordToHtmlConverter extends AbstractWordConverter } processSectionParagraphes( hwpfDocument, tableCellElement, - tableCell, thisTableLevel ); + tableCell, table.getTableLevel() ); if ( !tableCellElement.hasChildNodes() ) { diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index 890bce6e6c..272e25b61b 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -77,6 +77,13 @@ public class TestWordToHtmlConverter extends TestCase .contains( "" ) ); } + public void testAIOOBTap() throws Exception + { + String result = getHtmlText( "AIOOB-Tap.doc" ); + + assertTrue( result.substring( 0, 2000 ).contains( "" ) ); + } + public void testHyperlink() throws Exception { String result = getHtmlText( "hyperlink.doc" ); -- 2.39.5