]> source.dussan.org Git - poi.git/commitdiff
Unit test for bugs #54880 & #55030 - seems ok so far
authorNick Burch <nick@apache.org>
Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
committerNick Burch <nick@apache.org>
Fri, 31 May 2013 21:17:55 +0000 (21:17 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1488403 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java

index cddacf9fed2286560cb71a04ffa4a387f7ac41f3..104637c27b2457802f0d4c8b22dd2f9e0511ae2a 100644 (file)
@@ -329,6 +329,27 @@ public final class TestExtractor extends TestCase {
        assertContains(text, masterText);
     }
 
+    /**
+     * Bug #54880 Chinese text not extracted properly
+     */
+    public void testChineseText() throws Exception {
+       HSLFSlideShow hslf = new HSLFSlideShow(slTests.openResourceAsStream("54880_chinese.ppt"));
+       ppe = new PowerPointExtractor(hslf);
+       
+       String text = ppe.getText();
+       
+       // Check for the english text line
+       assertContains(text, "Single byte");
+       
+       // Check for the english text in the mixed line
+       assertContains(text, "Mix");
+       
+       // Check for the chinese text in the mixed line - 表
+       assertContains(text, "\u8868");
+       
+       // Check for the chinese only text line - ハンカク
+       assertContains(text, "\uff8a\uff9d\uff76\uff78");
+    }
     
     /**
      * Tests that we can work with both {@link POIFSFileSystem}
index c94f91f2ad4857a6616bfc64cd9a4c516b1298ee..63acbaceb5864be473b91a7ab82802ddd313c9f5 100644 (file)
@@ -38,24 +38,25 @@ import org.apache.poi.POIDataSamples;
  * @author Nick Burch (nick at torchbox dot com)
  */
 public final class TestRichTextRun extends TestCase {
-    private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
-
-       // SlideShow primed on the test data
-       private SlideShow ss;
-       private SlideShow ssRichA;
-       private SlideShow ssRichB;
-       private SlideShow ssRichC;
-       private HSLFSlideShow hss;
-       private HSLFSlideShow hssRichA;
-       private HSLFSlideShow hssRichB;
-       private HSLFSlideShow hssRichC;
-       private static String filenameC;
-
-       protected void setUp() throws Exception {
-
-               // Basic (non rich) test file
-        hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
-               ss = new SlideShow(hss);
+   private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
+
+   // SlideShow primed on the test data
+   private SlideShow ss;
+   private SlideShow ssRichA;
+   private SlideShow ssRichB;
+   private SlideShow ssRichC;
+   private SlideShow ssChinese;
+   private HSLFSlideShow hss;
+   private HSLFSlideShow hssRichA;
+   private HSLFSlideShow hssRichB;
+   private HSLFSlideShow hssRichC;
+   private HSLFSlideShow hssChinese;
+   private static String filenameC;
+
+   protected void setUp() throws Exception {
+      // Basic (non rich) test file
+      hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
+      ss = new SlideShow(hss);
 
                // Rich test file A
                hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt"));
@@ -70,8 +71,18 @@ public final class TestRichTextRun extends TestCase {
                filenameC = "ParagraphStylesShorterThanCharStyles.ppt";
         hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC));
                ssRichC = new SlideShow(hssRichC);
+               
+               // Rich test file with Chinese + English text in it
+      hssChinese = new HSLFSlideShow(_slTests.openResourceAsStream("54880_chinese.ppt"));
+      ssChinese = new SlideShow(hssChinese);
        }
 
+   private static void assertContains(String haystack, String needle) {
+      assertTrue(
+            "Unable to find expected text '" + needle + "' in text:\n" + haystack,
+            haystack.contains(needle)
+      );
+   }
        /**
         * Test the stuff about getting/setting bold
         *  on a non rich text run
@@ -623,4 +634,37 @@ if(false) {
 //             FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt");
 //             ppt.write(fout);
        }
+       
+       public void testChineseParagraphs() throws Exception {
+      RichTextRun[] rts;
+      RichTextRun rt;
+      TextRun[] txt;
+      Slide[] slides = ssChinese.getSlides();
+
+      // One slide
+      assertEquals(1, slides.length);
+      
+      // One block of text within that
+      txt = slides[0].getTextRuns();
+      assertEquals(1, txt.length);
+      
+      // One rich block of text in that - text is all the same style
+      // TODO Is this completely correct?
+      rts = txt[0].getRichTextRuns();
+      assertEquals(1, rts.length);
+      rt = rts[0];
+      
+      // Check we can get the english text out of that
+      String text = rt.getText();
+      assertContains(text, "Single byte");
+      // And the chinese - ハンカク
+      assertContains(text, "\uff8a\uff9d\uff76\uff78");
+      
+      // It isn't bold or italic
+      assertFalse(rt.isBold());
+      assertFalse(rt.isItalic());
+      
+      // Font is Calibri
+      assertEquals("Calibri", rt.getFontName());
+       }
 }