aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad/testcases/org
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2018-04-18 15:02:02 +0000
committerAndreas Beeker <kiwiwings@apache.org>2018-04-18 15:02:02 +0000
commitf395630abd891b9d4ee09932c309d566d8360860 (patch)
tree13657f32c2e9f9ee2fc933a11cd47791f328aff9 /src/scratchpad/testcases/org
parent1079e66ea7cc4a53d4d4a7e54a615bddbfeaaa12 (diff)
downloadpoi-f395630abd891b9d4ee09932c309d566d8360860.tar.gz
poi-f395630abd891b9d4ee09932c309d566d8360860.zip
Bug 62092 - Text not extracted from grouped text shapes in HSLF
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1829453 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad/testcases/org')
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java62
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java15
2 files changed, 48 insertions, 29 deletions
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
index f2700d4fac..6a34d1af3f 100644
--- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
@@ -33,9 +33,9 @@ import java.io.InputStream;
import java.util.List;
import org.apache.poi.POIDataSamples;
+import org.apache.poi.hslf.usermodel.HSLFObjectShape;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
-import org.apache.poi.hslf.usermodel.HSLFObjectShape;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.poifs.filesystem.DirectoryNode;
@@ -89,12 +89,12 @@ public final class TestExtractor {
public void testReadSheetText() throws IOException {
// Basic 2 page example
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
- ensureTwoStringsTheSame(expectText, ppe.getText());
+ assertEquals(expectText, ppe.getText());
ppe.close();
// 1 page example with text boxes
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt");
- ensureTwoStringsTheSame(expectText2, ppe2.getText());
+ assertEquals(expectText2, ppe2.getText());
ppe2.close();
}
@@ -103,15 +103,15 @@ public final class TestExtractor {
// Basic 2 page example
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
String notesText = ppe.getNotes();
- String expText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n";
- ensureTwoStringsTheSame(expText, notesText);
+ String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n";
+ assertEquals(expText, notesText);
ppe.close();
// Other one doesn't have notes
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt");
notesText = ppe2.getNotes();
expText = "";
- ensureTwoStringsTheSame(expText, notesText);
+ assertEquals(expText, notesText);
ppe2.close();
}
@@ -122,8 +122,8 @@ public final class TestExtractor {
"This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"
};
String[] ntText = new String[]{
- "These are the notes for page 1\n",
- "These are the notes on page two, again lacking formatting\n"
+ "\nThese are the notes for page 1\n",
+ "\nThese are the notes on page two, again lacking formatting\n"
};
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
@@ -137,7 +137,7 @@ public final class TestExtractor {
ppe.setSlidesByDefault(true);
ppe.setNotesByDefault(true);
- assertEquals(slText[0] + slText[1] + "\n" + ntText[0] + ntText[1], ppe.getText());
+ assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText());
ppe.close();
}
@@ -166,16 +166,6 @@ public final class TestExtractor {
ppe.close();
}
- private void ensureTwoStringsTheSame(String exp, String act) {
- assertEquals(exp.length(), act.length());
- char[] expC = exp.toCharArray();
- char[] actC = act.toCharArray();
- for (int i = 0; i < expC.length; i++) {
- assertEquals("Char " + i, expC[i], actC[i]);
- }
- assertEquals(exp, act);
- }
-
@Test
public void testExtractFromEmbeded() throws IOException {
InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
@@ -454,4 +444,38 @@ public final class TestExtractor {
assertContains(text, "Prague");
ppe.close();
}
+
+ @Test
+ public void testExtractGroupedShapeText() throws Exception {
+ try (final PowerPointExtractor ppe = openExtractor("bug62092.ppt")) {
+ final String text = ppe.getText();
+
+ //this tests that we're ignoring text shapes at depth=0
+ //i.e. POI has already included them in the slide's getTextParagraphs()
+ assertContains(text, "Text box1");
+ assertEquals(1, countMatches(text,"Text box1"));
+
+
+ //the WordArt and text box count tests will fail
+ //if this content is available via getTextParagraphs() of the slide in POI
+ //i.e. when POI is fixed, these tests will fail, and
+ //we'll have to remove the workaround in HSLFExtractor's extractGroupText(...)
+ assertEquals(1, countMatches(text,"WordArt1"));
+ assertEquals(1, countMatches(text,"WordArt2"));
+ assertEquals(1, countMatches(text,"Ungrouped text box"));//should only be 1
+ assertContains(text, "Text box2");
+ assertContains(text, "Text box3");
+ assertContains(text, "Text box4");
+ assertContains(text, "Text box5");
+
+ //see below -- need to extract hyperlinks
+ assertContains(text, "tika");
+ assertContains(text, "MyTitle");
+
+ }
+ }
+
+ private static int countMatches(final String base, final String find) {
+ return base.split(find).length-1;
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java b/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java
index 814259ca15..dbf89692bb 100644
--- a/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/record/TestRecordTypes.java
@@ -18,10 +18,10 @@
package org.apache.poi.hslf.record;
-import static org.junit.Assert.assertEquals;
-
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
/**
* Tests that RecordTypes returns the right records and classes when asked
*/
@@ -42,20 +42,15 @@ public final class TestRecordTypes {
@Test
public void testPPTClassLookups() {
- assertEquals(Slide.class, RecordTypes.Slide.handlingClass);
- assertEquals(TextCharsAtom.class, RecordTypes.TextCharsAtom.handlingClass);
- assertEquals(TextBytesAtom.class, RecordTypes.TextBytesAtom.handlingClass);
- assertEquals(SlideListWithText.class, RecordTypes.SlideListWithText.handlingClass);
-
// If this record is ever implemented, change to one that isn't!
// This is checking the "unhandled default" stuff works
- assertEquals(UnknownRecordPlaceholder.class, RecordTypes.forTypeID(-10).handlingClass);
+ assertEquals(RecordTypes.UnknownRecordPlaceholder, RecordTypes.forTypeID(-10));
}
@Test
public void testEscherClassLookups() {
// Should all come back with null, as DDF handles them
- assertEquals(null, RecordTypes.EscherDggContainer.handlingClass);
- assertEquals(null, RecordTypes.EscherBStoreContainer.handlingClass);
+ assertEquals(null, RecordTypes.EscherDggContainer.recordConstructor);
+ assertEquals(null, RecordTypes.EscherBStoreContainer.recordConstructor);
}
}