From: Nick Burch Date: Sat, 9 Aug 2008 17:58:35 +0000 (+0000) Subject: More range tests, which show that we do have a bug in the hwpf unicode support X-Git-Tag: REL_3_2_FINAL~182 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=7d80941f462aa9a4e38b7d139bd74bedebd76848;p=poi.git More range tests, which show that we do have a bug in the hwpf unicode support git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684309 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java index 9a19344e27..c0aa62569a 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java @@ -29,7 +29,18 @@ import junit.framework.TestCase; * the different ranges */ public class TestHWPFRangeParts extends TestCase { - private static final String page_1 = + private static final char page_break = (char)12; + private static final String headerDef = + "\u0003\r\r" + + "\u0004\r\r" + + "\u0003\r\r" + + "\u0004\r\r" + ; + private static final String footerDef = "\r"; + private static final String endHeaderFooter = "\r\r"; + + + private static final String a_page_1 = "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\r" + "\r" + "HEADING TEXT\r" + @@ -38,37 +49,60 @@ public class TestHWPFRangeParts extends TestCase { "\r\r" + "End of page 1\r" ; - private static final char page_break = (char)12; - private static final String page_2 = + private static final String a_page_2 = "This is page two. It also has a three column heading, and a three column footer.\r" ; - private static final String headerDef = - "\u0003\r\r" + - "\u0004\r\r" + - "\u0003\r\r" + - "\u0004\r\r" - ; - private static final String header = + private static final String a_header = "First header column!\tMid header Right header!\r" ; - private static final String footerDef = - "\r" - ; - private static final String footer = + private static final String a_footer = "Footer Left\tFooter Middle Footer Right\r" ; - private static final String endHeaderFooter = - "\r\r" + + + private static final String u_page_1 = + "This is a fairly simple word document, over two pages, with headers and footers.\r" + + "The trick with this one is that it contains some Unicode based strings in it.\r" + + "Firstly, some currency symbols:\r" + + "\tGBP - \u00a3\r" + + "\tEUR - \u20ac\r" + + "Now, we\u2019ll have some French text, in bold and big:\r" + + "\tMoli\u00e8re\r" + + "And some normal French text:\r" + + "\tL'Avare ou l'\u00c9cole du mensonge\r" + + "That\u2019s it for page one\r" + ; + private static final String u_page_2 = + "This is page two. Les Pr\u00e9cieuses ridicules. The end.\r" + ; + + private static final String u_header = + "This is a simple header, with a \u20ac euro symbol in it.\r" + ; + private static final String u_footer = + "The footer, with Moli\u00e8re, has Unicode in it.\r" ; - private HWPFDocument doc; + /** + * A document made up only of basic ASCII text + */ + private HWPFDocument docAscii; + /** + * A document with some unicode in it too + */ + private HWPFDocument docUnicode; public void setUp() throws Exception { - String filename = System.getProperty("HWPF.testdata.path"); - filename = filename + "/ThreeColHeadFoot.doc"; + String dirname = System.getProperty("HWPF.testdata.path"); - doc = new HWPFDocument( + String filename = dirname + "/HeaderFooterUnicode.doc"; + docUnicode = new HWPFDocument( + new FileInputStream(filename) + ); + + filename = dirname + "/ThreeColHeadFoot.doc"; + docAscii = new HWPFDocument( new FileInputStream(filename) ); } @@ -77,32 +111,32 @@ public class TestHWPFRangeParts extends TestCase { // First check the start and end bits assertEquals( 0, - doc._cpSplit.getMainDocumentStart() + docAscii._cpSplit.getMainDocumentStart() ); assertEquals( - page_1.length() + + a_page_1.length() + 2 + // page break - page_2.length(), - doc._cpSplit.getMainDocumentEnd() + a_page_2.length(), + docAscii._cpSplit.getMainDocumentEnd() ); assertEquals( 238, - doc._cpSplit.getFootnoteStart() + docAscii._cpSplit.getFootnoteStart() ); assertEquals( 238, - doc._cpSplit.getFootnoteEnd() + docAscii._cpSplit.getFootnoteEnd() ); assertEquals( 238, - doc._cpSplit.getHeaderStoryStart() + docAscii._cpSplit.getHeaderStoryStart() ); assertEquals( - 238 + headerDef.length() + header.length() + - footerDef.length() + footer.length() + endHeaderFooter.length(), - doc._cpSplit.getHeaderStoryEnd() + 238 + headerDef.length() + a_header.length() + + footerDef.length() + a_footer.length() + endHeaderFooter.length(), + docAscii._cpSplit.getHeaderStoryEnd() ); } @@ -110,33 +144,104 @@ public class TestHWPFRangeParts extends TestCase { Range r; // Now check the real ranges - r = doc.getRange(); + r = docAscii.getRange(); + assertEquals( + a_page_1 + + page_break + "\r" + + a_page_2, + r.text() + ); + + r = docAscii.getHeaderStoryRange(); + assertEquals( + headerDef + + a_header + + footerDef + + a_footer + + endHeaderFooter, + r.text() + ); + + r = docAscii.getOverallRange(); + assertEquals( + a_page_1 + + page_break + "\r" + + a_page_2 + + headerDef + + a_header + + footerDef + + a_footer + + endHeaderFooter + + "\r", + r.text() + ); + } + + public void testBasicsUnicode() throws Exception { + // First check the start and end bits + assertEquals( + 0, + docUnicode._cpSplit.getMainDocumentStart() + ); + assertEquals( + u_page_1.length() + + 2 + // page break + u_page_2.length(), + docUnicode._cpSplit.getMainDocumentEnd() + ); + + assertEquals( + 408, + docUnicode._cpSplit.getFootnoteStart() + ); + assertEquals( + 408, + docUnicode._cpSplit.getFootnoteEnd() + ); + + assertEquals( + 408, + docUnicode._cpSplit.getHeaderStoryStart() + ); + // TODO - fix this one + assertEquals( + 408 + headerDef.length() + u_header.length() + + footerDef.length() + u_footer.length() + endHeaderFooter.length(), + docUnicode._cpSplit.getHeaderStoryEnd() + ); + } + + public void testContentsUnicode() throws Exception { + Range r; + + // Now check the real ranges + r = docUnicode.getRange(); assertEquals( - page_1 + + u_page_1 + page_break + "\r" + - page_2, + u_page_2, r.text() ); - r = doc.getHeaderStoryRange(); + r = docUnicode.getHeaderStoryRange(); assertEquals( headerDef + - header + + u_header + footerDef + - footer + + u_footer + endHeaderFooter, r.text() ); - r = doc.getOverallRange(); + r = docUnicode.getOverallRange(); assertEquals( - page_1 + + u_page_1 + page_break + "\r" + - page_2 + + u_page_2 + headerDef + - header + + u_header + footerDef + - footer + + u_footer + endHeaderFooter + "\r", r.text()