From: Nick Burch Date: Fri, 31 May 2013 21:17:55 +0000 (+0000) Subject: Unit test for bugs #54880 & #55030 - seems ok so far X-Git-Tag: 3.10-beta1~39 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=c225df5b713c0ce21005a508cb2d85ccc6030b28;p=poi.git Unit test for bugs #54880 & #55030 - seems ok so far git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1488403 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index cddacf9fed..104637c27b 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -329,6 +329,27 @@ public final class TestExtractor extends TestCase { assertContains(text, masterText); } + /** + * Bug #54880 Chinese text not extracted properly + */ + public void testChineseText() throws Exception { + HSLFSlideShow hslf = new HSLFSlideShow(slTests.openResourceAsStream("54880_chinese.ppt")); + ppe = new PowerPointExtractor(hslf); + + String text = ppe.getText(); + + // Check for the english text line + assertContains(text, "Single byte"); + + // Check for the english text in the mixed line + assertContains(text, "Mix"); + + // Check for the chinese text in the mixed line - 表 + assertContains(text, "\u8868"); + + // Check for the chinese only text line - ハンカク + assertContains(text, "\uff8a\uff9d\uff76\uff78"); + } /** * Tests that we can work with both {@link POIFSFileSystem} diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java index c94f91f2ad..63acbaceb5 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java @@ -38,24 +38,25 @@ import org.apache.poi.POIDataSamples; * @author Nick Burch (nick at torchbox dot com) */ public final class TestRichTextRun extends TestCase { - private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance(); - - // SlideShow primed on the test data - private SlideShow ss; - private SlideShow ssRichA; - private SlideShow ssRichB; - private SlideShow ssRichC; - private HSLFSlideShow hss; - private HSLFSlideShow hssRichA; - private HSLFSlideShow hssRichB; - private HSLFSlideShow hssRichC; - private static String filenameC; - - protected void setUp() throws Exception { - - // Basic (non rich) test file - hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt")); - ss = new SlideShow(hss); + private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance(); + + // SlideShow primed on the test data + private SlideShow ss; + private SlideShow ssRichA; + private SlideShow ssRichB; + private SlideShow ssRichC; + private SlideShow ssChinese; + private HSLFSlideShow hss; + private HSLFSlideShow hssRichA; + private HSLFSlideShow hssRichB; + private HSLFSlideShow hssRichC; + private HSLFSlideShow hssChinese; + private static String filenameC; + + protected void setUp() throws Exception { + // Basic (non rich) test file + hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt")); + ss = new SlideShow(hss); // Rich test file A hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt")); @@ -70,8 +71,18 @@ public final class TestRichTextRun extends TestCase { filenameC = "ParagraphStylesShorterThanCharStyles.ppt"; hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC)); ssRichC = new SlideShow(hssRichC); + + // Rich test file with Chinese + English text in it + hssChinese = new HSLFSlideShow(_slTests.openResourceAsStream("54880_chinese.ppt")); + ssChinese = new SlideShow(hssChinese); } + private static void assertContains(String haystack, String needle) { + assertTrue( + "Unable to find expected text '" + needle + "' in text:\n" + haystack, + haystack.contains(needle) + ); + } /** * Test the stuff about getting/setting bold * on a non rich text run @@ -623,4 +634,37 @@ if(false) { // FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt"); // ppt.write(fout); } + + public void testChineseParagraphs() throws Exception { + RichTextRun[] rts; + RichTextRun rt; + TextRun[] txt; + Slide[] slides = ssChinese.getSlides(); + + // One slide + assertEquals(1, slides.length); + + // One block of text within that + txt = slides[0].getTextRuns(); + assertEquals(1, txt.length); + + // One rich block of text in that - text is all the same style + // TODO Is this completely correct? + rts = txt[0].getRichTextRuns(); + assertEquals(1, rts.length); + rt = rts[0]; + + // Check we can get the english text out of that + String text = rt.getText(); + assertContains(text, "Single byte"); + // And the chinese - ハンカク + assertContains(text, "\uff8a\uff9d\uff76\uff78"); + + // It isn't bold or italic + assertFalse(rt.isBold()); + assertFalse(rt.isItalic()); + + // Font is Calibri + assertEquals("Calibri", rt.getFontName()); + } }