diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2018-04-18 15:02:02 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2018-04-18 15:02:02 +0000 |
commit | f395630abd891b9d4ee09932c309d566d8360860 (patch) | |
tree | 13657f32c2e9f9ee2fc933a11cd47791f328aff9 /src/scratchpad/testcases/org | |
parent | 1079e66ea7cc4a53d4d4a7e54a615bddbfeaaa12 (diff) | |
download | poi-f395630abd891b9d4ee09932c309d566d8360860.tar.gz poi-f395630abd891b9d4ee09932c309d566d8360860.zip |
Bug 62092 - Text not extracted from grouped text shapes in HSLF
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1829453 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad/testcases/org')
-rw-r--r-- | src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java | 62 | ||||
-rw-r--r-- | src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java | 15 |
2 files changed, 48 insertions, 29 deletions
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index f2700d4fac..6a34d1af3f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -33,9 +33,9 @@ import java.io.InputStream; import java.util.List; import org.apache.poi.POIDataSamples; +import org.apache.poi.hslf.usermodel.HSLFObjectShape; import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl; -import org.apache.poi.hslf.usermodel.HSLFObjectShape; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.poifs.filesystem.DirectoryNode; @@ -89,12 +89,12 @@ public final class TestExtractor { public void testReadSheetText() throws IOException { // Basic 2 page example PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt"); - ensureTwoStringsTheSame(expectText, ppe.getText()); + assertEquals(expectText, ppe.getText()); ppe.close(); // 1 page example with text boxes PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt"); - ensureTwoStringsTheSame(expectText2, ppe2.getText()); + assertEquals(expectText2, ppe2.getText()); ppe2.close(); } @@ -103,15 +103,15 @@ public final class TestExtractor { // Basic 2 page example PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt"); String notesText = ppe.getNotes(); - String expText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n"; - ensureTwoStringsTheSame(expText, notesText); + String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n"; + assertEquals(expText, notesText); ppe.close(); // Other one doesn't have notes PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt"); notesText = ppe2.getNotes(); expText = ""; - ensureTwoStringsTheSame(expText, notesText); + assertEquals(expText, notesText); ppe2.close(); } @@ -122,8 +122,8 @@ public final class TestExtractor { "This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n" }; String[] ntText = new String[]{ - "These are the notes for page 1\n", - "These are the notes on page two, again lacking formatting\n" + "\nThese are the notes for page 1\n", + "\nThese are the notes on page two, again lacking formatting\n" }; PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt"); @@ -137,7 +137,7 @@ public final class TestExtractor { ppe.setSlidesByDefault(true); ppe.setNotesByDefault(true); - assertEquals(slText[0] + slText[1] + "\n" + ntText[0] + ntText[1], ppe.getText()); + assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText()); ppe.close(); } @@ -166,16 +166,6 @@ public final class TestExtractor { ppe.close(); } - private void ensureTwoStringsTheSame(String exp, String act) { - assertEquals(exp.length(), act.length()); - char[] expC = exp.toCharArray(); - char[] actC = act.toCharArray(); - for (int i = 0; i < expC.length; i++) { - assertEquals("Char " + i, expC[i], actC[i]); - } - assertEquals(exp, act); - } - @Test public void testExtractFromEmbeded() throws IOException { InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls"); @@ -454,4 +444,38 @@ public final class TestExtractor { assertContains(text, "Prague"); ppe.close(); } + + @Test + public void testExtractGroupedShapeText() throws Exception { + try (final PowerPointExtractor ppe = openExtractor("bug62092.ppt")) { + final String text = ppe.getText(); + + //this tests that we're ignoring text shapes at depth=0 + //i.e. POI has already included them in the slide's getTextParagraphs() + assertContains(text, "Text box1"); + assertEquals(1, countMatches(text,"Text box1")); + + + //the WordArt and text box count tests will fail + //if this content is available via getTextParagraphs() of the slide in POI + //i.e. when POI is fixed, these tests will fail, and + //we'll have to remove the workaround in HSLFExtractor's extractGroupText(...) + assertEquals(1, countMatches(text,"WordArt1")); + assertEquals(1, countMatches(text,"WordArt2")); + assertEquals(1, countMatches(text,"Ungrouped text box"));//should only be 1 + assertContains(text, "Text box2"); + assertContains(text, "Text box3"); + assertContains(text, "Text box4"); + assertContains(text, "Text box5"); + + //see below -- need to extract hyperlinks + assertContains(text, "tika"); + assertContains(text, "MyTitle"); + + } + } + + private static int countMatches(final String base, final String find) { + return base.split(find).length-1; + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java b/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java index 814259ca15..dbf89692bb 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java @@ -18,10 +18,10 @@ package org.apache.poi.hslf.record; -import static org.junit.Assert.assertEquals; - import org.junit.Test; +import static org.junit.Assert.assertEquals; + /** * Tests that RecordTypes returns the right records and classes when asked */ @@ -42,20 +42,15 @@ public final class TestRecordTypes { @Test public void testPPTClassLookups() { - assertEquals(Slide.class, RecordTypes.Slide.handlingClass); - assertEquals(TextCharsAtom.class, RecordTypes.TextCharsAtom.handlingClass); - assertEquals(TextBytesAtom.class, RecordTypes.TextBytesAtom.handlingClass); - assertEquals(SlideListWithText.class, RecordTypes.SlideListWithText.handlingClass); - // If this record is ever implemented, change to one that isn't! // This is checking the "unhandled default" stuff works - assertEquals(UnknownRecordPlaceholder.class, RecordTypes.forTypeID(-10).handlingClass); + assertEquals(RecordTypes.UnknownRecordPlaceholder, RecordTypes.forTypeID(-10)); } @Test public void testEscherClassLookups() { // Should all come back with null, as DDF handles them - assertEquals(null, RecordTypes.EscherDggContainer.handlingClass); - assertEquals(null, RecordTypes.EscherBStoreContainer.handlingClass); + assertEquals(null, RecordTypes.EscherDggContainer.recordConstructor); + assertEquals(null, RecordTypes.EscherBStoreContainer.recordConstructor); } } |