Unit test for bugs #54880 & #55030 - seems ok so far

author Nick Burch <nick@apache.org>

Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)

committer Nick Burch <nick@apache.org>

Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
author Nick Burch <nick@apache.org>
Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
committer Nick Burch <nick@apache.org>
Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java

index cddacf9fed2286560cb71a04ffa4a387f7ac41f3..104637c27b2457802f0d4c8b22dd2f9e0511ae2a 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -329,6 +329,27 @@ public final class TestExtractor extends TestCase {
         assertContains(text, masterText);
      }
  
+    /**
+     * Bug #54880 Chinese text not extracted properly
+     */
+    public void testChineseText() throws Exception {
+       HSLFSlideShow hslf = new HSLFSlideShow(slTests.openResourceAsStream("54880_chinese.ppt"));
+       ppe = new PowerPointExtractor(hslf);
+       
+       String text = ppe.getText();
+       
+       // Check for the english text line
+       assertContains(text, "Single byte");
+       
+       // Check for the english text in the mixed line
+       assertContains(text, "Mix");
+       
+       // Check for the chinese text in the mixed line - 表
+       assertContains(text, "\u8868");
+       
+       // Check for the chinese only text line - ﾊﾝｶｸ
+       assertContains(text, "\uff8a\uff9d\uff76\uff78");
+    }
      
      /**
       * Tests that we can work with both {@link POIFSFileSystem}
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java

index c94f91f2ad4857a6616bfc64cd9a4c516b1298ee..63acbaceb5864be473b91a7ab82802ddd313c9f5 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java
@@ -38,24 +38,25 @@ import org.apache.poi.POIDataSamples;
   * @author Nick Burch (nick at torchbox dot com)
   */
  public final class TestRichTextRun extends TestCase {
-    private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
-
-       // SlideShow primed on the test data
-       private SlideShow ss;
-       private SlideShow ssRichA;
-       private SlideShow ssRichB;
-       private SlideShow ssRichC;
-       private HSLFSlideShow hss;
-       private HSLFSlideShow hssRichA;
-       private HSLFSlideShow hssRichB;
-       private HSLFSlideShow hssRichC;
-       private static String filenameC;
-
-       protected void setUp() throws Exception {
-
-               // Basic (non rich) test file
-        hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
-               ss = new SlideShow(hss);
+   private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
+
+   // SlideShow primed on the test data
+   private SlideShow ss;
+   private SlideShow ssRichA;
+   private SlideShow ssRichB;
+   private SlideShow ssRichC;
+   private SlideShow ssChinese;
+   private HSLFSlideShow hss;
+   private HSLFSlideShow hssRichA;
+   private HSLFSlideShow hssRichB;
+   private HSLFSlideShow hssRichC;
+   private HSLFSlideShow hssChinese;
+   private static String filenameC;
+
+   protected void setUp() throws Exception {
+      // Basic (non rich) test file
+      hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
+      ss = new SlideShow(hss);
  
                 // Rich test file A
                 hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt"));
@@ -70,8 +71,18 @@ public final class TestRichTextRun extends TestCase {
                 filenameC = "ParagraphStylesShorterThanCharStyles.ppt";
          hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC));
                 ssRichC = new SlideShow(hssRichC);
+               
+               // Rich test file with Chinese + English text in it
+      hssChinese = new HSLFSlideShow(_slTests.openResourceAsStream("54880_chinese.ppt"));
+      ssChinese = new SlideShow(hssChinese);
         }
  
+   private static void assertContains(String haystack, String needle) {
+      assertTrue(
+            "Unable to find expected text '" + needle + "' in text:\n" + haystack,
+            haystack.contains(needle)
+      );
+   }
         /**
          * Test the stuff about getting/setting bold
          *  on a non rich text run
@@ -623,4 +634,37 @@ if(false) {
  //             FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt");
  //             ppt.write(fout);
         }
+       
+       public void testChineseParagraphs() throws Exception {
+      RichTextRun[] rts;
+      RichTextRun rt;
+      TextRun[] txt;
+      Slide[] slides = ssChinese.getSlides();
+
+      // One slide
+      assertEquals(1, slides.length);
+      
+      // One block of text within that
+      txt = slides[0].getTextRuns();
+      assertEquals(1, txt.length);
+      
+      // One rich block of text in that - text is all the same style
+      // TODO Is this completely correct?
+      rts = txt[0].getRichTextRuns();
+      assertEquals(1, rts.length);
+      rt = rts[0];
+      
+      // Check we can get the english text out of that
+      String text = rt.getText();
+      assertContains(text, "Single byte");
+      // And the chinese - ﾊﾝｶｸ
+      assertContains(text, "\uff8a\uff9d\uff76\uff78");
+      
+      // It isn't bold or italic
+      assertFalse(rt.isBold());
+      assertFalse(rt.isItalic());
+      
+      // Font is Calibri
+      assertEquals("Calibri", rt.getFontName());
+       }
  }
author	Nick Burch <nick@apache.org>
	Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
committer	Nick Burch <nick@apache.org>
	Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java		patch \| blob \| history