aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2010-06-11 13:29:44 +0000
committerNick Burch <nick@apache.org>2010-06-11 13:29:44 +0000
commit5977a9db0013b47cc632e3450788baa21502f6a6 (patch)
tree5c22c51c6588091ca670192cb30488b47e4c7706
parent034e5c4a978f646c7e62b571db56758e6df5fc14 (diff)
downloadpoi-5977a9db0013b47cc632e3450788baa21502f6a6.tar.gz
poi-5977a9db0013b47cc632e3450788baa21502f6a6.zip
Fix for bug #48245 - tweak HWPF table cell detection to work across more files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@953694 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java12
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java516
-rw-r--r--test-data/document/simple-table2.docbin0 -> 27136 bytes
4 files changed, 362 insertions, 167 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index e0cdba7af7..92474f0aa5 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
+ <action dev="POI-DEVELOPERS" type="add">48245 - tweak HWPF table cell detection to work across more files</action>
<action dev="POI-DEVELOPERS" type="add">48996 - initial support for External Name References in HSSF formula evaluation</action>
<action dev="POI-DEVELOPERS" type="fix">46664 - fix up Tab IDs when adding new sheets, so that print areas don't end up invalid</action>
<action dev="POI-DEVELOPERS" type="fix">45269 - improve replaceText on HWPF ranges</action>
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java
index 857a92cfbe..a2a8d46760 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java
@@ -17,6 +17,7 @@
package org.apache.poi.hwpf.usermodel;
+import org.apache.poi.hwpf.model.PropertyNode;
import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
public final class TableRow
@@ -57,10 +58,19 @@ public final class TableRow
p = getParagraph(end);
s = p.text();
}
- _cells[cellIndex] = new TableCell(start, end+1, this, levelNum,
+
+ // Create it for the correct paragraph range
+ _cells[cellIndex] = new TableCell(start, end, this, levelNum,
_tprops.getRgtc()[cellIndex],
_tprops.getRgdxaCenter()[cellIndex],
_tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]);
+ // Now we've decided where everything is, tweak the
+ // record of the paragraph end so that the
+ // paragraph level counts work
+ // This is a bit hacky, we really need a better fix...
+ _cells[cellIndex]._parEnd++;
+
+ // Next!
end++;
start = end;
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
index 94b66f8945..7e3857caeb 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
@@ -30,172 +30,356 @@ import org.apache.poi.hwpf.model.StyleSheet;
*/
public final class TestProblems extends HWPFTestCase {
- /**
- * ListEntry passed no ListTable
- */
- public void testListEntryNoListTable() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc");
-
- Range r = doc.getRange();
- StyleSheet styleSheet = doc.getStyleSheet();
- for (int x = 0; x < r.numSections(); x++) {
- Section s = r.getSection(x);
- for (int y = 0; y < s.numParagraphs(); y++) {
- Paragraph paragraph = s.getParagraph(y);
- // System.out.println(paragraph.getCharacterRun(0).text());
- }
- }
- }
-
- /**
- * AIOOB for TableSprmUncompressor.unCompressTAPOperation
- */
- public void testSprmAIOOB() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc");
-
- Range r = doc.getRange();
- StyleSheet styleSheet = doc.getStyleSheet();
- for (int x = 0; x < r.numSections(); x++) {
- Section s = r.getSection(x);
- for (int y = 0; y < s.numParagraphs(); y++) {
- Paragraph paragraph = s.getParagraph(y);
- // System.out.println(paragraph.getCharacterRun(0).text());
- }
- }
- }
-
- /**
- * Test for TableCell not skipping the last paragraph. Bugs #45062 and
- * #44292
- */
- public void testTableCellLastParagraph() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc");
- Range r = doc.getRange();
- assertEquals(6, r.numParagraphs());
- assertEquals(0, r.getStartOffset());
- assertEquals(87, r.getEndOffset());
-
- // Paragraph with table
- Paragraph p = r.getParagraph(0);
- assertEquals(0, p.getStartOffset());
- assertEquals(20, p.getEndOffset());
-
- // Get the table
- Table t = r.getTable(p);
-
- // get the only row
- assertEquals(1, t.numRows());
- TableRow row = t.getRow(0);
-
- // get the first cell
- TableCell cell = row.getCell(0);
- // First cell should have one paragraph
- assertEquals(1, cell.numParagraphs());
- assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
-
- // get the second
- cell = row.getCell(1);
- // Second cell should be detected as having two paragraphs
- assertEquals(2, cell.numParagraphs());
- assertEquals("First para is ok\r", cell.getParagraph(0).text());
- assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text());
-
- // get the last cell
- cell = row.getCell(2);
- // Last cell should have one paragraph
- assertEquals(1, cell.numParagraphs());
- assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
- }
-
- public void testRangeDelete() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc");
-
- Range range = doc.getRange();
- int numParagraphs = range.numParagraphs();
-
- int totalLength = 0, deletedLength = 0;
-
- for (int i = 0; i < numParagraphs; i++) {
- Paragraph para = range.getParagraph(i);
- String text = para.text();
-
- totalLength += text.length();
- if (text.indexOf("{delete me}") > -1) {
- para.delete();
- deletedLength = text.length();
- }
- }
-
- // check the text length after deletion
- int newLength = 0;
- range = doc.getRange();
- numParagraphs = range.numParagraphs();
-
- for (int i = 0; i < numParagraphs; i++) {
- Paragraph para = range.getParagraph(i);
- String text = para.text();
-
- newLength += text.length();
- }
-
- assertEquals(newLength, totalLength - deletedLength);
- }
-
- /**
- * With an encrypted file, we should give a suitable exception, and not OOM
- */
- public void testEncryptedFile() {
- try {
- HWPFTestDataSamples.openSampleFile("PasswordProtected.doc");
- fail();
- } catch (EncryptedDocumentException e) {
- // Good
- }
- }
-
- public void testWriteProperties() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
- assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor());
-
- // Write and read
- HWPFDocument doc2 = writeOutAndRead(doc);
- assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor());
- }
-
- /**
- * Test for reading paragraphs from Range after replacing some
- * text in this Range.
- * Bug #45269
- */
- public void testReadParagraphsAfterReplaceText()throws Exception{
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
- Range range = doc.getRange();
-
- String toFind = "campo1";
- String longer = " foi porraaaaa ";
- String shorter = " foi ";
-
- //check replace with longer text
- for (int x = 0; x < range.numParagraphs(); x++) {
- Paragraph para = range.getParagraph(x);
- int offset = para.text().indexOf(toFind);
- if (offset >= 0) {
- para.replaceText(toFind, longer, offset);
- assertEquals(offset, para.text().indexOf(longer));
+ /**
+ * ListEntry passed no ListTable
+ */
+ public void testListEntryNoListTable() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc");
+
+ Range r = doc.getRange();
+ StyleSheet styleSheet = doc.getStyleSheet();
+ for (int x = 0; x < r.numSections(); x++) {
+ Section s = r.getSection(x);
+ for (int y = 0; y < s.numParagraphs(); y++) {
+ Paragraph paragraph = s.getParagraph(y);
+ // System.out.println(paragraph.getCharacterRun(0).text());
+ }
+ }
+ }
+
+ /**
+ * AIOOB for TableSprmUncompressor.unCompressTAPOperation
+ */
+ public void testSprmAIOOB() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc");
+
+ Range r = doc.getRange();
+ StyleSheet styleSheet = doc.getStyleSheet();
+ for (int x = 0; x < r.numSections(); x++) {
+ Section s = r.getSection(x);
+ for (int y = 0; y < s.numParagraphs(); y++) {
+ Paragraph paragraph = s.getParagraph(y);
+ // System.out.println(paragraph.getCharacterRun(0).text());
+ }
+ }
+ }
+
+ /**
+ * Test for TableCell not skipping the last paragraph. Bugs #45062 and
+ * #44292
+ */
+ public void testTableCellLastParagraph() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc");
+ Range r = doc.getRange();
+ assertEquals(6, r.numParagraphs());
+ assertEquals(0, r.getStartOffset());
+ assertEquals(87, r.getEndOffset());
+
+ // Paragraph with table
+ Paragraph p = r.getParagraph(0);
+ assertEquals(0, p.getStartOffset());
+ assertEquals(20, p.getEndOffset());
+
+ // Check a few bits of the table directly
+ assertEquals("One paragraph is ok\7", r.getParagraph(0).text());
+ assertEquals("First para is ok\r", r.getParagraph(1).text());
+ assertEquals("Second paragraph is skipped\7", r.getParagraph(2).text());
+ assertEquals("One paragraph is ok\7", r.getParagraph(3).text());
+ assertEquals("\7", r.getParagraph(4).text());
+ assertEquals("\r", r.getParagraph(5).text());
+ for(int i=0; i<=5; i++) {
+ assertFalse(r.getParagraph(i).usesUnicode());
+ }
+
+
+ // Get the table
+ Table t = r.getTable(p);
+
+ // get the only row
+ assertEquals(1, t.numRows());
+ TableRow row = t.getRow(0);
+
+ // sanity check our row
+ assertEquals(5, row.numParagraphs());
+ assertEquals(0, row._parStart);
+ assertEquals(5, row._parEnd);
+ assertEquals(0, row.getStartOffset());
+ assertEquals(87, row.getEndOffset());
+
+
+ // get the first cell
+ TableCell cell = row.getCell(0);
+ // First cell should have one paragraph
+ assertEquals(1, cell.numParagraphs());
+ assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
+ assertEquals(0, cell._parStart);
+ assertEquals(1, cell._parEnd);
+ assertEquals(0, cell.getStartOffset());
+ assertEquals(20, cell.getEndOffset());
+
+
+ // get the second
+ cell = row.getCell(1);
+ // Second cell should be detected as having two paragraphs
+ assertEquals(2, cell.numParagraphs());
+ assertEquals("First para is ok\r", cell.getParagraph(0).text());
+ assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text());
+ assertEquals(1, cell._parStart);
+ assertEquals(3, cell._parEnd);
+ assertEquals(20, cell.getStartOffset());
+ assertEquals(65, cell.getEndOffset());
+
+
+ // get the last cell
+ cell = row.getCell(2);
+ // Last cell should have one paragraph
+ assertEquals(1, cell.numParagraphs());
+ assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
+ assertEquals(3, cell._parStart);
+ assertEquals(4, cell._parEnd);
+ assertEquals(65, cell.getStartOffset());
+ assertEquals(85, cell.getEndOffset());
+ }
+
+ public void testRangeDelete() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc");
+
+ Range range = doc.getRange();
+ int numParagraphs = range.numParagraphs();
+
+ int totalLength = 0, deletedLength = 0;
+
+ for (int i = 0; i < numParagraphs; i++) {
+ Paragraph para = range.getParagraph(i);
+ String text = para.text();
+
+ totalLength += text.length();
+ if (text.indexOf("{delete me}") > -1) {
+ para.delete();
+ deletedLength = text.length();
+ }
+ }
+
+ // check the text length after deletion
+ int newLength = 0;
+ range = doc.getRange();
+ numParagraphs = range.numParagraphs();
+
+ for (int i = 0; i < numParagraphs; i++) {
+ Paragraph para = range.getParagraph(i);
+ String text = para.text();
+
+ newLength += text.length();
+ }
+
+ assertEquals(newLength, totalLength - deletedLength);
+ }
+
+ /**
+ * With an encrypted file, we should give a suitable exception, and not OOM
+ */
+ public void testEncryptedFile() {
+ try {
+ HWPFTestDataSamples.openSampleFile("PasswordProtected.doc");
+ fail();
+ } catch (EncryptedDocumentException e) {
+ // Good
+ }
+ }
+
+ public void testWriteProperties() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
+ assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor());
+
+ // Write and read
+ HWPFDocument doc2 = writeOutAndRead(doc);
+ assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor());
+ }
+
+ /**
+ * Test for reading paragraphs from Range after replacing some
+ * text in this Range.
+ * Bug #45269
+ */
+ public void testReadParagraphsAfterReplaceText()throws Exception{
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
+ Range range = doc.getRange();
+
+ String toFind = "campo1";
+ String longer = " foi porraaaaa ";
+ String shorter = " foi ";
+
+ //check replace with longer text
+ for (int x = 0; x < range.numParagraphs(); x++) {
+ Paragraph para = range.getParagraph(x);
+ int offset = para.text().indexOf(toFind);
+ if (offset >= 0) {
+ para.replaceText(toFind, longer, offset);
+ assertEquals(offset, para.text().indexOf(longer));
+ }
+ }
+
+ doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
+ range = doc.getRange();
+
+ //check replace with shorter text
+ for (int x = 0; x < range.numParagraphs(); x++) {
+ Paragraph para = range.getParagraph(x);
+ int offset = para.text().indexOf(toFind);
+ if (offset >= 0) {
+ para.replaceText(toFind, shorter, offset);
+ assertEquals(offset, para.text().indexOf(shorter));
+ }
}
- }
-
- doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
- range = doc.getRange();
-
- //check replace with shorter text
- for (int x = 0; x < range.numParagraphs(); x++) {
- Paragraph para = range.getParagraph(x);
- int offset = para.text().indexOf(toFind);
- if (offset >= 0) {
- para.replaceText(toFind, shorter, offset);
- assertEquals(offset, para.text().indexOf(shorter));
+ }
+
+ /**
+ * Bug #48245 - don't include the text from the
+ * next cell in the current one
+ */
+ public void testTableIterator() throws Exception {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("simple-table2.doc");
+ Range r = doc.getRange();
+
+ // Check the text is as we'd expect
+ assertEquals(13, r.numParagraphs());
+ assertEquals("Row 1/Cell 1\u0007", r.getParagraph(0).text());
+ assertEquals("Row 1/Cell 2\u0007", r.getParagraph(1).text());
+ assertEquals("Row 1/Cell 3\u0007", r.getParagraph(2).text());
+ assertEquals("\u0007", r.getParagraph(3).text());
+ assertEquals("Row 2/Cell 1\u0007", r.getParagraph(4).text());
+ assertEquals("Row 2/Cell 2\u0007", r.getParagraph(5).text());
+ assertEquals("Row 2/Cell 3\u0007", r.getParagraph(6).text());
+ assertEquals("\u0007", r.getParagraph(7).text());
+ assertEquals("Row 3/Cell 1\u0007", r.getParagraph(8).text());
+ assertEquals("Row 3/Cell 2\u0007", r.getParagraph(9).text());
+ assertEquals("Row 3/Cell 3\u0007", r.getParagraph(10).text());
+ assertEquals("\u0007", r.getParagraph(11).text());
+ assertEquals("\r", r.getParagraph(12).text());
+ for(int i=0; i<=12; i++) {
+ assertFalse(r.getParagraph(i).usesUnicode());
}
- }
- }
+
+ Paragraph p;
+
+ // Take a look in detail at the first couple of
+ // paragraphs
+ p = r.getParagraph(0);
+ assertEquals(1, p.numParagraphs());
+ assertEquals(0, p.getStartOffset());
+ assertEquals(13, p.getEndOffset());
+ assertEquals(0, p._parStart);
+ assertEquals(1, p._parEnd);
+
+ p = r.getParagraph(1);
+ assertEquals(1, p.numParagraphs());
+ assertEquals(13, p.getStartOffset());
+ assertEquals(26, p.getEndOffset());
+ assertEquals(1, p._parStart);
+ assertEquals(2, p._parEnd);
+
+ p = r.getParagraph(2);
+ assertEquals(1, p.numParagraphs());
+ assertEquals(26, p.getStartOffset());
+ assertEquals(39, p.getEndOffset());
+ assertEquals(2, p._parStart);
+ assertEquals(3, p._parEnd);
+
+
+ // Now look at the table
+ Table table = r.getTable(r.getParagraph(0));
+ assertEquals(3, table.numRows());
+
+ TableRow row;
+ TableCell cell;
+
+
+ row = table.getRow(0);
+ assertEquals(0, row._parStart);
+ assertEquals(4, row._parEnd);
+
+ cell = row.getCell(0);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(0, cell._parStart);
+ assertEquals(1, cell._parEnd);
+ assertEquals(0, cell.getStartOffset());
+ assertEquals(13, cell.getEndOffset());
+ assertEquals("Row 1/Cell 1\u0007", cell.text());
+
+ cell = row.getCell(1);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(1, cell._parStart);
+ assertEquals(2, cell._parEnd);
+ assertEquals(13, cell.getStartOffset());
+ assertEquals(26, cell.getEndOffset());
+ assertEquals("Row 1/Cell 2\u0007", cell.text());
+
+ cell = row.getCell(2);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(2, cell._parStart);
+ assertEquals(3, cell._parEnd);
+ assertEquals(26, cell.getStartOffset());
+ assertEquals(39, cell.getEndOffset());
+ assertEquals("Row 1/Cell 3\u0007", cell.text());
+
+
+ // Onto row #2
+ row = table.getRow(1);
+ assertEquals(4, row._parStart);
+ assertEquals(8, row._parEnd);
+
+ cell = row.getCell(0);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(4, cell._parStart);
+ assertEquals(5, cell._parEnd);
+ assertEquals(40, cell.getStartOffset());
+ assertEquals(53, cell.getEndOffset());
+ assertEquals("Row 2/Cell 1\u0007", cell.text());
+
+ cell = row.getCell(1);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(5, cell._parStart);
+ assertEquals(6, cell._parEnd);
+ assertEquals(53, cell.getStartOffset());
+ assertEquals(66, cell.getEndOffset());
+ assertEquals("Row 2/Cell 2\u0007", cell.text());
+
+ cell = row.getCell(2);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(6, cell._parStart);
+ assertEquals(7, cell._parEnd);
+ assertEquals(66, cell.getStartOffset());
+ assertEquals(79, cell.getEndOffset());
+ assertEquals("Row 2/Cell 3\u0007", cell.text());
+
+
+ // Finally row 3
+ row = table.getRow(2);
+ assertEquals(8, row._parStart);
+ assertEquals(12, row._parEnd);
+
+ cell = row.getCell(0);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(8, cell._parStart);
+ assertEquals(9, cell._parEnd);
+ assertEquals(80, cell.getStartOffset());
+ assertEquals(93, cell.getEndOffset());
+ assertEquals("Row 3/Cell 1\u0007", cell.text());
+
+ cell = row.getCell(1);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(9, cell._parStart);
+ assertEquals(10, cell._parEnd);
+ assertEquals(93, cell.getStartOffset());
+ assertEquals(106, cell.getEndOffset());
+ assertEquals("Row 3/Cell 2\u0007", cell.text());
+
+ cell = row.getCell(2);
+ assertEquals(1, cell.numParagraphs());
+ assertEquals(10, cell._parStart);
+ assertEquals(11, cell._parEnd);
+ assertEquals(106, cell.getStartOffset());
+ assertEquals(119, cell.getEndOffset());
+ assertEquals("Row 3/Cell 3\u0007", cell.text());
+ }
}
diff --git a/test-data/document/simple-table2.doc b/test-data/document/simple-table2.doc
new file mode 100644
index 0000000000..1a5ff5f710
--- /dev/null
+++ b/test-data/document/simple-table2.doc
Binary files differ