More range tests, which show that we do have a bug in the hwpf unicode support

author Nick Burch <nick@apache.org>

Sat, 9 Aug 2008 17:58:35 +0000 (17:58 +0000)

committer Nick Burch <nick@apache.org>

Sat, 9 Aug 2008 17:58:35 +0000 (17:58 +0000)
author Nick Burch <nick@apache.org>
Sat, 9 Aug 2008 17:58:35 +0000 (17:58 +0000)
committer Nick Burch <nick@apache.org>
Sat, 9 Aug 2008 17:58:35 +0000 (17:58 +0000)
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java

index 9a19344e2770a8289e6cc4cad3edceab8123bc97..c0aa62569a7e53e98fc2a5b9af94e2b6f03c0dee 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFRangeParts.java
@@ -29,7 +29,18 @@ import junit.framework.TestCase;
   *  the different ranges
   */
  public class TestHWPFRangeParts extends TestCase {
-       private static final String page_1 =
+       private static final char page_break = (char)12;
+       private static final String headerDef = 
+               "\u0003\r\r" +
+               "\u0004\r\r" +
+               "\u0003\r\r" +
+               "\u0004\r\r"
+       ;
+       private static final String footerDef = "\r";
+       private static final String endHeaderFooter = "\r\r";
+
+       
+       private static final String a_page_1 =
                 "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\r" +
                 "\r" +
                 "HEADING TEXT\r" + 
@@ -38,37 +49,60 @@ public class TestHWPFRangeParts extends TestCase {
                 "\r\r" +
                 "End of page 1\r"
         ;
-       private static final char page_break = (char)12;
-       private static final String page_2 =
+       private static final String a_page_2 =
                 "This is page two. It also has a three column heading, and a three column footer.\r"
         ;
         
-       private static final String headerDef = 
-               "\u0003\r\r" +
-               "\u0004\r\r" +
-               "\u0003\r\r" +
-               "\u0004\r\r"
-       ;
-       private static final String header =
+       private static final String a_header =
                 "First header column!\tMid header Right header!\r"
         ;
-       private static final String footerDef = 
-               "\r"
-       ;
-       private static final String footer =
+       private static final String a_footer =
                 "Footer Left\tFooter Middle Footer Right\r"
         ;
-       private static final String endHeaderFooter =
-               "\r\r"
+       
+       
+       private static final String u_page_1 =
+               "This is a fairly simple word document, over two pages, with headers and footers.\r" +
+               "The trick with this one is that it contains some Unicode based strings in it.\r" +
+               "Firstly, some currency symbols:\r" +
+               "\tGBP - \u00a3\r" +
+        "\tEUR - \u20ac\r" +
+        "Now, we\u2019ll have some French text, in bold and big:\r" +
+        "\tMoli\u00e8re\r" +
+        "And some normal French text:\r" +
+        "\tL'Avare ou l'\u00c9cole du mensonge\r" +
+        "That\u2019s it for page one\r"
+       ;
+       private static final String u_page_2 =
+               "This is page two. Les Pr\u00e9cieuses ridicules. The end.\r"
+       ;
+       
+       private static final String u_header =
+               "This is a simple header, with a \u20ac euro symbol in it.\r"
+       ;
+       private static final String u_footer =
+               "The footer, with Moli\u00e8re, has Unicode in it.\r"
         ;
         
-       private HWPFDocument doc;
+       /**
+        * A document made up only of basic ASCII text
+        */
+       private HWPFDocument docAscii;
+       /**
+        * A document with some unicode in it too
+        */
+       private HWPFDocument docUnicode;
         
         public void setUp() throws Exception {
-               String filename = System.getProperty("HWPF.testdata.path");
-               filename = filename + "/ThreeColHeadFoot.doc";
+               String dirname = System.getProperty("HWPF.testdata.path");
                 
-               doc = new HWPFDocument(
+               String filename = dirname + "/HeaderFooterUnicode.doc";
+               docUnicode = new HWPFDocument(
+                               new FileInputStream(filename)
+               );
+               
+               filename = dirname + "/ThreeColHeadFoot.doc";
+               docAscii = new HWPFDocument(
                                 new FileInputStream(filename)
                 );
         }
@@ -77,32 +111,32 @@ public class TestHWPFRangeParts extends TestCase {
                 // First check the start and end bits
                 assertEquals(
                                 0,
-                               doc._cpSplit.getMainDocumentStart()
+                               docAscii._cpSplit.getMainDocumentStart()
                 );
                 assertEquals(
-                               page_1.length() +
+                               a_page_1.length() +
                                 2 + // page break
-                               page_2.length(),
-                               doc._cpSplit.getMainDocumentEnd()
+                               a_page_2.length(),
+                               docAscii._cpSplit.getMainDocumentEnd()
                 );
                 
                 assertEquals(
                                 238,
-                               doc._cpSplit.getFootnoteStart()
+                               docAscii._cpSplit.getFootnoteStart()
                 );
                 assertEquals(
                                 238,
-                               doc._cpSplit.getFootnoteEnd()
+                               docAscii._cpSplit.getFootnoteEnd()
                 );
                 
                 assertEquals(
                                 238,
-                               doc._cpSplit.getHeaderStoryStart()
+                               docAscii._cpSplit.getHeaderStoryStart()
                 );
                 assertEquals(
-                               238 + headerDef.length() + header.length() +
-                               footerDef.length() + footer.length() + endHeaderFooter.length(),
-                               doc._cpSplit.getHeaderStoryEnd()
+                               238 + headerDef.length() + a_header.length() +
+                               footerDef.length() + a_footer.length() + endHeaderFooter.length(),
+                               docAscii._cpSplit.getHeaderStoryEnd()
                 );
         }
         
@@ -110,33 +144,104 @@ public class TestHWPFRangeParts extends TestCase {
                 Range r;
                 
                 // Now check the real ranges
-               r = doc.getRange();
+               r = docAscii.getRange();
+               assertEquals(
+                               a_page_1 +
+                               page_break + "\r" +
+                               a_page_2,
+                               r.text()
+               );
+               
+               r = docAscii.getHeaderStoryRange();
+               assertEquals(
+                               headerDef +
+                               a_header +
+                               footerDef +
+                               a_footer + 
+                               endHeaderFooter,
+                               r.text()
+               );
+               
+               r = docAscii.getOverallRange();
+               assertEquals(
+                               a_page_1 +
+                               page_break + "\r" +
+                               a_page_2 + 
+                               headerDef +
+                               a_header +
+                               footerDef +
+                               a_footer + 
+                               endHeaderFooter +
+                               "\r",
+                               r.text()
+               );
+       }
+       
+       public void testBasicsUnicode() throws Exception {
+               // First check the start and end bits
+               assertEquals(
+                               0,
+                               docUnicode._cpSplit.getMainDocumentStart()
+               );
+               assertEquals(
+                               u_page_1.length() +
+                               2 + // page break
+                               u_page_2.length(),
+                               docUnicode._cpSplit.getMainDocumentEnd()
+               );
+               
+               assertEquals(
+                               408,
+                               docUnicode._cpSplit.getFootnoteStart()
+               );
+               assertEquals(
+                               408,
+                               docUnicode._cpSplit.getFootnoteEnd()
+               );
+               
+               assertEquals(
+                               408,
+                               docUnicode._cpSplit.getHeaderStoryStart()
+               );
+               // TODO - fix this one
+               assertEquals(
+                               408 + headerDef.length() + u_header.length() +
+                               footerDef.length() + u_footer.length() + endHeaderFooter.length(),
+                               docUnicode._cpSplit.getHeaderStoryEnd()
+               );
+       }
+       
+       public void testContentsUnicode() throws Exception {
+               Range r;
+               
+               // Now check the real ranges
+               r = docUnicode.getRange();
                 assertEquals(
-                               page_1 +
+                               u_page_1 +
                                 page_break + "\r" +
-                               page_2,
+                               u_page_2,
                                 r.text()
                 );
                 
-               r = doc.getHeaderStoryRange();
+               r = docUnicode.getHeaderStoryRange();
                 assertEquals(
                                 headerDef +
-                               header +
+                               u_header +
                                 footerDef +
-                               footer + 
+                               u_footer + 
                                 endHeaderFooter,
                                 r.text()
                 );
                 
-               r = doc.getOverallRange();
+               r = docUnicode.getOverallRange();
                 assertEquals(
-                               page_1 +
+                               u_page_1 +
                                 page_break + "\r" +
-                               page_2 + 
+                               u_page_2 + 
                                 headerDef +
-                               header +
+                               u_header +
                                 footerDef +
-                               footer + 
+                               u_footer + 
                                 endHeaderFooter +
                                 "\r",
                                 r.text()
author	Nick Burch <nick@apache.org>
	Sat, 9 Aug 2008 17:58:35 +0000 (17:58 +0000)
committer	Nick Burch <nick@apache.org>
	Sat, 9 Aug 2008 17:58:35 +0000 (17:58 +0000)