From: Nick Burch Date: Fri, 11 Jun 2010 13:29:44 +0000 (+0000) Subject: Fix for bug #48245 - tweak HWPF table cell detection to work across more files X-Git-Tag: REL_3_7_BETA1~9 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=5977a9db0013b47cc632e3450788baa21502f6a6;p=poi.git Fix for bug #48245 - tweak HWPF table cell detection to work across more files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@953694 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index e0cdba7af7..92474f0aa5 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 48245 - tweak HWPF table cell detection to work across more files 48996 - initial support for External Name References in HSSF formula evaluation 46664 - fix up Tab IDs when adding new sheets, so that print areas don't end up invalid 45269 - improve replaceText on HWPF ranges diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java index 857a92cfbe..a2a8d46760 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java @@ -17,6 +17,7 @@ package org.apache.poi.hwpf.usermodel; +import org.apache.poi.hwpf.model.PropertyNode; import org.apache.poi.hwpf.sprm.TableSprmUncompressor; public final class TableRow @@ -57,10 +58,19 @@ public final class TableRow p = getParagraph(end); s = p.text(); } - _cells[cellIndex] = new TableCell(start, end+1, this, levelNum, + + // Create it for the correct paragraph range + _cells[cellIndex] = new TableCell(start, end, this, levelNum, _tprops.getRgtc()[cellIndex], _tprops.getRgdxaCenter()[cellIndex], _tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]); + // Now we've decided where everything is, tweak the + // record of the paragraph end so that the + // paragraph level counts work + // This is a bit hacky, we really need a better fix... + _cells[cellIndex]._parEnd++; + + // Next! end++; start = end; } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index 94b66f8945..7e3857caeb 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -30,172 +30,356 @@ import org.apache.poi.hwpf.model.StyleSheet; */ public final class TestProblems extends HWPFTestCase { - /** - * ListEntry passed no ListTable - */ - public void testListEntryNoListTable() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc"); - - Range r = doc.getRange(); - StyleSheet styleSheet = doc.getStyleSheet(); - for (int x = 0; x < r.numSections(); x++) { - Section s = r.getSection(x); - for (int y = 0; y < s.numParagraphs(); y++) { - Paragraph paragraph = s.getParagraph(y); - // System.out.println(paragraph.getCharacterRun(0).text()); - } - } - } - - /** - * AIOOB for TableSprmUncompressor.unCompressTAPOperation - */ - public void testSprmAIOOB() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc"); - - Range r = doc.getRange(); - StyleSheet styleSheet = doc.getStyleSheet(); - for (int x = 0; x < r.numSections(); x++) { - Section s = r.getSection(x); - for (int y = 0; y < s.numParagraphs(); y++) { - Paragraph paragraph = s.getParagraph(y); - // System.out.println(paragraph.getCharacterRun(0).text()); - } - } - } - - /** - * Test for TableCell not skipping the last paragraph. Bugs #45062 and - * #44292 - */ - public void testTableCellLastParagraph() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc"); - Range r = doc.getRange(); - assertEquals(6, r.numParagraphs()); - assertEquals(0, r.getStartOffset()); - assertEquals(87, r.getEndOffset()); - - // Paragraph with table - Paragraph p = r.getParagraph(0); - assertEquals(0, p.getStartOffset()); - assertEquals(20, p.getEndOffset()); - - // Get the table - Table t = r.getTable(p); - - // get the only row - assertEquals(1, t.numRows()); - TableRow row = t.getRow(0); - - // get the first cell - TableCell cell = row.getCell(0); - // First cell should have one paragraph - assertEquals(1, cell.numParagraphs()); - assertEquals("One paragraph is ok\7", cell.getParagraph(0).text()); - - // get the second - cell = row.getCell(1); - // Second cell should be detected as having two paragraphs - assertEquals(2, cell.numParagraphs()); - assertEquals("First para is ok\r", cell.getParagraph(0).text()); - assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text()); - - // get the last cell - cell = row.getCell(2); - // Last cell should have one paragraph - assertEquals(1, cell.numParagraphs()); - assertEquals("One paragraph is ok\7", cell.getParagraph(0).text()); - } - - public void testRangeDelete() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc"); - - Range range = doc.getRange(); - int numParagraphs = range.numParagraphs(); - - int totalLength = 0, deletedLength = 0; - - for (int i = 0; i < numParagraphs; i++) { - Paragraph para = range.getParagraph(i); - String text = para.text(); - - totalLength += text.length(); - if (text.indexOf("{delete me}") > -1) { - para.delete(); - deletedLength = text.length(); - } - } - - // check the text length after deletion - int newLength = 0; - range = doc.getRange(); - numParagraphs = range.numParagraphs(); - - for (int i = 0; i < numParagraphs; i++) { - Paragraph para = range.getParagraph(i); - String text = para.text(); - - newLength += text.length(); - } - - assertEquals(newLength, totalLength - deletedLength); - } - - /** - * With an encrypted file, we should give a suitable exception, and not OOM - */ - public void testEncryptedFile() { - try { - HWPFTestDataSamples.openSampleFile("PasswordProtected.doc"); - fail(); - } catch (EncryptedDocumentException e) { - // Good - } - } - - public void testWriteProperties() { - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc"); - assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor()); - - // Write and read - HWPFDocument doc2 = writeOutAndRead(doc); - assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor()); - } - - /** - * Test for reading paragraphs from Range after replacing some - * text in this Range. - * Bug #45269 - */ - public void testReadParagraphsAfterReplaceText()throws Exception{ - HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc"); - Range range = doc.getRange(); - - String toFind = "campo1"; - String longer = " foi porraaaaa "; - String shorter = " foi "; - - //check replace with longer text - for (int x = 0; x < range.numParagraphs(); x++) { - Paragraph para = range.getParagraph(x); - int offset = para.text().indexOf(toFind); - if (offset >= 0) { - para.replaceText(toFind, longer, offset); - assertEquals(offset, para.text().indexOf(longer)); + /** + * ListEntry passed no ListTable + */ + public void testListEntryNoListTable() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc"); + + Range r = doc.getRange(); + StyleSheet styleSheet = doc.getStyleSheet(); + for (int x = 0; x < r.numSections(); x++) { + Section s = r.getSection(x); + for (int y = 0; y < s.numParagraphs(); y++) { + Paragraph paragraph = s.getParagraph(y); + // System.out.println(paragraph.getCharacterRun(0).text()); + } + } + } + + /** + * AIOOB for TableSprmUncompressor.unCompressTAPOperation + */ + public void testSprmAIOOB() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc"); + + Range r = doc.getRange(); + StyleSheet styleSheet = doc.getStyleSheet(); + for (int x = 0; x < r.numSections(); x++) { + Section s = r.getSection(x); + for (int y = 0; y < s.numParagraphs(); y++) { + Paragraph paragraph = s.getParagraph(y); + // System.out.println(paragraph.getCharacterRun(0).text()); + } + } + } + + /** + * Test for TableCell not skipping the last paragraph. Bugs #45062 and + * #44292 + */ + public void testTableCellLastParagraph() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc"); + Range r = doc.getRange(); + assertEquals(6, r.numParagraphs()); + assertEquals(0, r.getStartOffset()); + assertEquals(87, r.getEndOffset()); + + // Paragraph with table + Paragraph p = r.getParagraph(0); + assertEquals(0, p.getStartOffset()); + assertEquals(20, p.getEndOffset()); + + // Check a few bits of the table directly + assertEquals("One paragraph is ok\7", r.getParagraph(0).text()); + assertEquals("First para is ok\r", r.getParagraph(1).text()); + assertEquals("Second paragraph is skipped\7", r.getParagraph(2).text()); + assertEquals("One paragraph is ok\7", r.getParagraph(3).text()); + assertEquals("\7", r.getParagraph(4).text()); + assertEquals("\r", r.getParagraph(5).text()); + for(int i=0; i<=5; i++) { + assertFalse(r.getParagraph(i).usesUnicode()); + } + + + // Get the table + Table t = r.getTable(p); + + // get the only row + assertEquals(1, t.numRows()); + TableRow row = t.getRow(0); + + // sanity check our row + assertEquals(5, row.numParagraphs()); + assertEquals(0, row._parStart); + assertEquals(5, row._parEnd); + assertEquals(0, row.getStartOffset()); + assertEquals(87, row.getEndOffset()); + + + // get the first cell + TableCell cell = row.getCell(0); + // First cell should have one paragraph + assertEquals(1, cell.numParagraphs()); + assertEquals("One paragraph is ok\7", cell.getParagraph(0).text()); + assertEquals(0, cell._parStart); + assertEquals(1, cell._parEnd); + assertEquals(0, cell.getStartOffset()); + assertEquals(20, cell.getEndOffset()); + + + // get the second + cell = row.getCell(1); + // Second cell should be detected as having two paragraphs + assertEquals(2, cell.numParagraphs()); + assertEquals("First para is ok\r", cell.getParagraph(0).text()); + assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text()); + assertEquals(1, cell._parStart); + assertEquals(3, cell._parEnd); + assertEquals(20, cell.getStartOffset()); + assertEquals(65, cell.getEndOffset()); + + + // get the last cell + cell = row.getCell(2); + // Last cell should have one paragraph + assertEquals(1, cell.numParagraphs()); + assertEquals("One paragraph is ok\7", cell.getParagraph(0).text()); + assertEquals(3, cell._parStart); + assertEquals(4, cell._parEnd); + assertEquals(65, cell.getStartOffset()); + assertEquals(85, cell.getEndOffset()); + } + + public void testRangeDelete() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc"); + + Range range = doc.getRange(); + int numParagraphs = range.numParagraphs(); + + int totalLength = 0, deletedLength = 0; + + for (int i = 0; i < numParagraphs; i++) { + Paragraph para = range.getParagraph(i); + String text = para.text(); + + totalLength += text.length(); + if (text.indexOf("{delete me}") > -1) { + para.delete(); + deletedLength = text.length(); + } + } + + // check the text length after deletion + int newLength = 0; + range = doc.getRange(); + numParagraphs = range.numParagraphs(); + + for (int i = 0; i < numParagraphs; i++) { + Paragraph para = range.getParagraph(i); + String text = para.text(); + + newLength += text.length(); + } + + assertEquals(newLength, totalLength - deletedLength); + } + + /** + * With an encrypted file, we should give a suitable exception, and not OOM + */ + public void testEncryptedFile() { + try { + HWPFTestDataSamples.openSampleFile("PasswordProtected.doc"); + fail(); + } catch (EncryptedDocumentException e) { + // Good + } + } + + public void testWriteProperties() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc"); + assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor()); + + // Write and read + HWPFDocument doc2 = writeOutAndRead(doc); + assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor()); + } + + /** + * Test for reading paragraphs from Range after replacing some + * text in this Range. + * Bug #45269 + */ + public void testReadParagraphsAfterReplaceText()throws Exception{ + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc"); + Range range = doc.getRange(); + + String toFind = "campo1"; + String longer = " foi porraaaaa "; + String shorter = " foi "; + + //check replace with longer text + for (int x = 0; x < range.numParagraphs(); x++) { + Paragraph para = range.getParagraph(x); + int offset = para.text().indexOf(toFind); + if (offset >= 0) { + para.replaceText(toFind, longer, offset); + assertEquals(offset, para.text().indexOf(longer)); + } + } + + doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc"); + range = doc.getRange(); + + //check replace with shorter text + for (int x = 0; x < range.numParagraphs(); x++) { + Paragraph para = range.getParagraph(x); + int offset = para.text().indexOf(toFind); + if (offset >= 0) { + para.replaceText(toFind, shorter, offset); + assertEquals(offset, para.text().indexOf(shorter)); + } } - } - - doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc"); - range = doc.getRange(); - - //check replace with shorter text - for (int x = 0; x < range.numParagraphs(); x++) { - Paragraph para = range.getParagraph(x); - int offset = para.text().indexOf(toFind); - if (offset >= 0) { - para.replaceText(toFind, shorter, offset); - assertEquals(offset, para.text().indexOf(shorter)); + } + + /** + * Bug #48245 - don't include the text from the + * next cell in the current one + */ + public void testTableIterator() throws Exception { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("simple-table2.doc"); + Range r = doc.getRange(); + + // Check the text is as we'd expect + assertEquals(13, r.numParagraphs()); + assertEquals("Row 1/Cell 1\u0007", r.getParagraph(0).text()); + assertEquals("Row 1/Cell 2\u0007", r.getParagraph(1).text()); + assertEquals("Row 1/Cell 3\u0007", r.getParagraph(2).text()); + assertEquals("\u0007", r.getParagraph(3).text()); + assertEquals("Row 2/Cell 1\u0007", r.getParagraph(4).text()); + assertEquals("Row 2/Cell 2\u0007", r.getParagraph(5).text()); + assertEquals("Row 2/Cell 3\u0007", r.getParagraph(6).text()); + assertEquals("\u0007", r.getParagraph(7).text()); + assertEquals("Row 3/Cell 1\u0007", r.getParagraph(8).text()); + assertEquals("Row 3/Cell 2\u0007", r.getParagraph(9).text()); + assertEquals("Row 3/Cell 3\u0007", r.getParagraph(10).text()); + assertEquals("\u0007", r.getParagraph(11).text()); + assertEquals("\r", r.getParagraph(12).text()); + for(int i=0; i<=12; i++) { + assertFalse(r.getParagraph(i).usesUnicode()); } - } - } + + Paragraph p; + + // Take a look in detail at the first couple of + // paragraphs + p = r.getParagraph(0); + assertEquals(1, p.numParagraphs()); + assertEquals(0, p.getStartOffset()); + assertEquals(13, p.getEndOffset()); + assertEquals(0, p._parStart); + assertEquals(1, p._parEnd); + + p = r.getParagraph(1); + assertEquals(1, p.numParagraphs()); + assertEquals(13, p.getStartOffset()); + assertEquals(26, p.getEndOffset()); + assertEquals(1, p._parStart); + assertEquals(2, p._parEnd); + + p = r.getParagraph(2); + assertEquals(1, p.numParagraphs()); + assertEquals(26, p.getStartOffset()); + assertEquals(39, p.getEndOffset()); + assertEquals(2, p._parStart); + assertEquals(3, p._parEnd); + + + // Now look at the table + Table table = r.getTable(r.getParagraph(0)); + assertEquals(3, table.numRows()); + + TableRow row; + TableCell cell; + + + row = table.getRow(0); + assertEquals(0, row._parStart); + assertEquals(4, row._parEnd); + + cell = row.getCell(0); + assertEquals(1, cell.numParagraphs()); + assertEquals(0, cell._parStart); + assertEquals(1, cell._parEnd); + assertEquals(0, cell.getStartOffset()); + assertEquals(13, cell.getEndOffset()); + assertEquals("Row 1/Cell 1\u0007", cell.text()); + + cell = row.getCell(1); + assertEquals(1, cell.numParagraphs()); + assertEquals(1, cell._parStart); + assertEquals(2, cell._parEnd); + assertEquals(13, cell.getStartOffset()); + assertEquals(26, cell.getEndOffset()); + assertEquals("Row 1/Cell 2\u0007", cell.text()); + + cell = row.getCell(2); + assertEquals(1, cell.numParagraphs()); + assertEquals(2, cell._parStart); + assertEquals(3, cell._parEnd); + assertEquals(26, cell.getStartOffset()); + assertEquals(39, cell.getEndOffset()); + assertEquals("Row 1/Cell 3\u0007", cell.text()); + + + // Onto row #2 + row = table.getRow(1); + assertEquals(4, row._parStart); + assertEquals(8, row._parEnd); + + cell = row.getCell(0); + assertEquals(1, cell.numParagraphs()); + assertEquals(4, cell._parStart); + assertEquals(5, cell._parEnd); + assertEquals(40, cell.getStartOffset()); + assertEquals(53, cell.getEndOffset()); + assertEquals("Row 2/Cell 1\u0007", cell.text()); + + cell = row.getCell(1); + assertEquals(1, cell.numParagraphs()); + assertEquals(5, cell._parStart); + assertEquals(6, cell._parEnd); + assertEquals(53, cell.getStartOffset()); + assertEquals(66, cell.getEndOffset()); + assertEquals("Row 2/Cell 2\u0007", cell.text()); + + cell = row.getCell(2); + assertEquals(1, cell.numParagraphs()); + assertEquals(6, cell._parStart); + assertEquals(7, cell._parEnd); + assertEquals(66, cell.getStartOffset()); + assertEquals(79, cell.getEndOffset()); + assertEquals("Row 2/Cell 3\u0007", cell.text()); + + + // Finally row 3 + row = table.getRow(2); + assertEquals(8, row._parStart); + assertEquals(12, row._parEnd); + + cell = row.getCell(0); + assertEquals(1, cell.numParagraphs()); + assertEquals(8, cell._parStart); + assertEquals(9, cell._parEnd); + assertEquals(80, cell.getStartOffset()); + assertEquals(93, cell.getEndOffset()); + assertEquals("Row 3/Cell 1\u0007", cell.text()); + + cell = row.getCell(1); + assertEquals(1, cell.numParagraphs()); + assertEquals(9, cell._parStart); + assertEquals(10, cell._parEnd); + assertEquals(93, cell.getStartOffset()); + assertEquals(106, cell.getEndOffset()); + assertEquals("Row 3/Cell 2\u0007", cell.text()); + + cell = row.getCell(2); + assertEquals(1, cell.numParagraphs()); + assertEquals(10, cell._parStart); + assertEquals(11, cell._parEnd); + assertEquals(106, cell.getStartOffset()); + assertEquals(119, cell.getEndOffset()); + assertEquals("Row 3/Cell 3\u0007", cell.text()); + } } diff --git a/test-data/document/simple-table2.doc b/test-data/document/simple-table2.doc new file mode 100644 index 0000000000..1a5ff5f710 Binary files /dev/null and b/test-data/document/simple-table2.doc differ