From a770463b350386c2edeca1f45ab3791299a704dd Mon Sep 17 00:00:00 2001 From: Tim Allison Date: Fri, 28 Oct 2016 13:26:49 +0000 Subject: [PATCH] whitespace clean up before fix for BUG-60305 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1767021 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hslf/extractor/TestExtractor.java | 629 +++++++++--------- 1 file changed, 318 insertions(+), 311 deletions(-) diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index 0cf408a8f4..8ab30ee973 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -46,181 +46,188 @@ import org.junit.Test; * Tests that the extractor correctly gets the text out of our sample file */ public final class TestExtractor { - /** Extractor primed on the 2 page basic test data */ - private PowerPointExtractor ppe; - private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"; - - /** Extractor primed on the 1 page but text-box'd test data */ - private PowerPointExtractor ppe2; - private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n"; - - /** Where our embeded files live */ - private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); - - @Before - public void setUp() throws Exception { - ppe = new PowerPointExtractor(slTests.getFile("basic_test_ppt_file.ppt").getCanonicalPath()); - ppe2 = new PowerPointExtractor(slTests.getFile("with_textbox.ppt").getCanonicalPath()); - } - - @After - public void closeResources() throws Exception { - ppe2.close(); - ppe.close(); - } - + /** + * Extractor primed on the 2 page basic test data + */ + private PowerPointExtractor ppe; + private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"; + + /** + * Extractor primed on the 1 page but text-box'd test data + */ + private PowerPointExtractor ppe2; + private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n"; + + /** + * Where our embeded files live + */ + private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); + + @Before + public void setUp() throws Exception { + ppe = new PowerPointExtractor(slTests.getFile("basic_test_ppt_file.ppt").getCanonicalPath()); + ppe2 = new PowerPointExtractor(slTests.getFile("with_textbox.ppt").getCanonicalPath()); + } + + @After + public void closeResources() throws Exception { + ppe2.close(); + ppe.close(); + } + @Test public void testReadSheetText() { - // Basic 2 page example - String sheetText = ppe.getText(); + // Basic 2 page example + String sheetText = ppe.getText(); - ensureTwoStringsTheSame(expectText, sheetText); - - - // 1 page example with text boxes - sheetText = ppe2.getText(); + ensureTwoStringsTheSame(expectText, sheetText); - ensureTwoStringsTheSame(expectText2, sheetText); + + // 1 page example with text boxes + sheetText = ppe2.getText(); + + ensureTwoStringsTheSame(expectText2, sheetText); } - + @Test - public void testReadNoteText() { - // Basic 2 page example - String notesText = ppe.getNotes(); - String expText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n"; - - ensureTwoStringsTheSame(expText, notesText); - - // Other one doesn't have notes - notesText = ppe2.getNotes(); - expText = ""; - - ensureTwoStringsTheSame(expText, notesText); - } - + public void testReadNoteText() { + // Basic 2 page example + String notesText = ppe.getNotes(); + String expText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n"; + + ensureTwoStringsTheSame(expText, notesText); + + // Other one doesn't have notes + notesText = ppe2.getNotes(); + expText = ""; + + ensureTwoStringsTheSame(expText, notesText); + } + @Test - public void testReadBoth() { - String[] slText = new String[] { - "This is a test title\nThis is a test subtitle\nThis is on page 1\n", - "This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n" - }; - String[] ntText = new String[] { - "These are the notes for page 1\n", - "These are the notes on page two, again lacking formatting\n" - }; - - ppe.setSlidesByDefault(true); - ppe.setNotesByDefault(false); - assertEquals(slText[0]+slText[1], ppe.getText()); - - ppe.setSlidesByDefault(false); - ppe.setNotesByDefault(true); - assertEquals(ntText[0]+ntText[1], ppe.getText()); - - ppe.setSlidesByDefault(true); - ppe.setNotesByDefault(true); - assertEquals(slText[0]+slText[1]+"\n"+ntText[0]+ntText[1], ppe.getText()); - } - - /** - * Test that when presented with a PPT file missing the odd - * core record, we can still get the rest of the text out - * @throws Exception - */ + public void testReadBoth() { + String[] slText = new String[]{ + "This is a test title\nThis is a test subtitle\nThis is on page 1\n", + "This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n" + }; + String[] ntText = new String[]{ + "These are the notes for page 1\n", + "These are the notes on page two, again lacking formatting\n" + }; + + ppe.setSlidesByDefault(true); + ppe.setNotesByDefault(false); + assertEquals(slText[0] + slText[1], ppe.getText()); + + ppe.setSlidesByDefault(false); + ppe.setNotesByDefault(true); + assertEquals(ntText[0] + ntText[1], ppe.getText()); + + ppe.setSlidesByDefault(true); + ppe.setNotesByDefault(true); + assertEquals(slText[0] + slText[1] + "\n" + ntText[0] + ntText[1], ppe.getText()); + } + + /** + * Test that when presented with a PPT file missing the odd + * core record, we can still get the rest of the text out + * + * @throws Exception + */ @Test - public void testMissingCoreRecords() throws Exception { + public void testMissingCoreRecords() throws Exception { ppe.close(); - ppe = new PowerPointExtractor(slTests.openResourceAsStream("missing_core_records.ppt")); - - String text = ppe.getText(true, false); - String nText = ppe.getNotes(); - - assertNotNull(text); - assertNotNull(nText); - - // Notes record were corrupt, so don't expect any - assertEquals(nText.length(), 0); - - // Slide records were fine - assertTrue(text.startsWith("Using Disease Surveillance and Response")); - } - + ppe = new PowerPointExtractor(slTests.openResourceAsStream("missing_core_records.ppt")); + + String text = ppe.getText(true, false); + String nText = ppe.getNotes(); + + assertNotNull(text); + assertNotNull(nText); + + // Notes record were corrupt, so don't expect any + assertEquals(nText.length(), 0); + + // Slide records were fine + assertTrue(text.startsWith("Using Disease Surveillance and Response")); + } + private void ensureTwoStringsTheSame(String exp, String act) { - assertEquals(exp.length(),act.length()); - char[] expC = exp.toCharArray(); - char[] actC = act.toCharArray(); - for(int i=0; i shapes = ppe.getOLEShapes(); - assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size()); - int num_ppt = 0, num_doc = 0, num_xls = 0; - for(OLEShape ole : shapes) { - String name = ole.getInstanceName(); - InputStream data = ole.getObjectData().getData(); - if ("Worksheet".equals(name)) { - HSSFWorkbook wb = new HSSFWorkbook(data); - num_xls++; - wb.close(); - } else if ("Document".equals(name)) { - HWPFDocument doc = new HWPFDocument(data); - num_doc++; - } else if ("Presentation".equals(name)) { - num_ppt++; - HSLFSlideShow ppt = new HSLFSlideShow(data); - ppt.close(); - } - data.close(); - } - assertEquals("Expected 2 embedded Word Documents", 2, num_doc); - assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls); - assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt); - } + POIFSFileSystem fs = new POIFSFileSystem( + POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls") + ); + HSLFSlideShowImpl ss; + + DirectoryNode dirA = (DirectoryNode) + fs.getRoot().getEntry("MBD0000A3B6"); + DirectoryNode dirB = (DirectoryNode) + fs.getRoot().getEntry("MBD0000A3B3"); + + assertNotNull(dirA.getEntry("PowerPoint Document")); + assertNotNull(dirB.getEntry("PowerPoint Document")); + + // Check the first file + ss = new HSLFSlideShowImpl(dirA); + ppe.close(); + ppe = new PowerPointExtractor(ss); + assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n", + ppe.getText(true, false) + ); + + // And the second + ss = new HSLFSlideShowImpl(dirB); + ppe.close(); + ppe = new PowerPointExtractor(ss); + assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n", + ppe.getText(true, false) + ); + fs.close(); + } + + /** + * A powerpoint file with embeded powerpoint files + */ + @SuppressWarnings("unused") + @Test + public void testExtractFromOwnEmbeded() throws Exception { + String path = "ppt_with_embeded.ppt"; + ppe.close(); + ppe = new PowerPointExtractor(POIDataSamples.getSlideShowInstance().openResourceAsStream(path)); + List shapes = ppe.getOLEShapes(); + assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size()); + int num_ppt = 0, num_doc = 0, num_xls = 0; + for (OLEShape ole : shapes) { + String name = ole.getInstanceName(); + InputStream data = ole.getObjectData().getData(); + if ("Worksheet".equals(name)) { + HSSFWorkbook wb = new HSSFWorkbook(data); + num_xls++; + wb.close(); + } else if ("Document".equals(name)) { + HWPFDocument doc = new HWPFDocument(data); + num_doc++; + } else if ("Presentation".equals(name)) { + num_ppt++; + HSLFSlideShow ppt = new HSLFSlideShow(data); + ppt.close(); + } + data.close(); + } + assertEquals("Expected 2 embedded Word Documents", 2, num_doc); + assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls); + assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt); + } /** * A powerpoint file with embeded powerpoint files @@ -231,7 +238,7 @@ public final class TestExtractor { ppe.close(); ppe = new PowerPointExtractor(POIDataSamples.getSlideShowInstance().openResourceAsStream(path)); List shapes = ppe.getOLEShapes(); - + for (OLEShape shape : shapes) { IOUtils.copy(shape.getObjectData().getData(), new ByteArrayOutputStream()); } @@ -243,120 +250,120 @@ public final class TestExtractor { @Test public void testWithComments() throws Exception { ppe.close(); - ppe = new PowerPointExtractor(slTests.openResourceAsStream("WithComments.ppt")); - - String text = ppe.getText(); - assertFalse("Comments not in by default", text.contains("This is a test comment")); - - ppe.setCommentsByDefault(true); - - text = ppe.getText(); - assertContains(text, "This is a test comment"); - - - // And another file + ppe = new PowerPointExtractor(slTests.openResourceAsStream("WithComments.ppt")); + + String text = ppe.getText(); + assertFalse("Comments not in by default", text.contains("This is a test comment")); + + ppe.setCommentsByDefault(true); + + text = ppe.getText(); + assertContains(text, "This is a test comment"); + + + // And another file ppe.close(); - ppe = new PowerPointExtractor(slTests.openResourceAsStream("45543.ppt")); - - text = ppe.getText(); - assertFalse("Comments not in by default", text.contains("testdoc")); - - ppe.setCommentsByDefault(true); - - text = ppe.getText(); - assertContains(text, "testdoc"); + ppe = new PowerPointExtractor(slTests.openResourceAsStream("45543.ppt")); + + text = ppe.getText(); + assertFalse("Comments not in by default", text.contains("testdoc")); + + ppe.setCommentsByDefault(true); + + text = ppe.getText(); + assertContains(text, "testdoc"); } - + /** * From bug #45537 */ @Test public void testHeaderFooter() throws Exception { - String text; - - // With a header on the notes - HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("45537_Header.ppt")); - HSLFSlideShow ss = new HSLFSlideShow(hslf); - assertNotNull(ss.getNotesHeadersFooters()); - assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getHeaderText()); - ppe.close(); - - ppe = new PowerPointExtractor(hslf); - - text = ppe.getText(); - assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc")); - assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase")); - - ppe.setNotesByDefault(true); - text = ppe.getText(); - assertContains(text, "testdoc"); - assertContains(text, "test phrase"); - ss.close(); - - // And with a footer, also on notes - hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("45537_Footer.ppt")); - ss = new HSLFSlideShow(hslf); - assertNotNull(ss.getNotesHeadersFooters()); - assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getFooterText()); - ppe.close(); - - ppe = new PowerPointExtractor(slTests.openResourceAsStream("45537_Footer.ppt")); - - text = ppe.getText(); - assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc")); - assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase")); - - ppe.setNotesByDefault(true); - text = ppe.getText(); - assertContains(text, "testdoc"); - assertContains(text, "test phrase"); + String text; + + // With a header on the notes + HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("45537_Header.ppt")); + HSLFSlideShow ss = new HSLFSlideShow(hslf); + assertNotNull(ss.getNotesHeadersFooters()); + assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getHeaderText()); + ppe.close(); + + ppe = new PowerPointExtractor(hslf); + + text = ppe.getText(); + assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc")); + assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase")); + + ppe.setNotesByDefault(true); + text = ppe.getText(); + assertContains(text, "testdoc"); + assertContains(text, "test phrase"); + ss.close(); + + // And with a footer, also on notes + hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("45537_Footer.ppt")); + ss = new HSLFSlideShow(hslf); + assertNotNull(ss.getNotesHeadersFooters()); + assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getFooterText()); + ppe.close(); + + ppe = new PowerPointExtractor(slTests.openResourceAsStream("45537_Footer.ppt")); + + text = ppe.getText(); + assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc")); + assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase")); + + ppe.setNotesByDefault(true); + text = ppe.getText(); + assertContains(text, "testdoc"); + assertContains(text, "test phrase"); } - - @SuppressWarnings("unused") - @Test - public void testSlideMasterText() throws Exception { - String masterTitleText = "This is the Master Title"; - String masterRandomText = "This text comes from the Master Slide"; - String masterFooterText = "Footer from the master slide"; - HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("WithMaster.ppt")); - ppe.close(); - - ppe = new PowerPointExtractor(hslf); - - String text = ppe.getText(); - //assertContains(text, masterTitleText); // TODO Is this available in PPT? - //assertContains(text, masterRandomText); // TODO Extract - assertContains(text, masterFooterText); - } - - @Test - public void testMasterText() throws Exception { - ppe.close(); - ppe = new PowerPointExtractor(slTests.openResourceAsStream("master_text.ppt")); - - // Initially not there - String text = ppe.getText(); - assertFalse(text.contains("Text that I added to the master slide")); - - // Enable, shows up - ppe.setMasterByDefault(true); - text = ppe.getText(); - assertTrue(text.contains("Text that I added to the master slide")); - - // Make sure placeholder text does not come out - assertFalse(text.contains("Click to edit Master")); - - // Now with another file only containing master text - // Will always show up - String masterText = "Footer from the master slide"; - HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("WithMaster.ppt")); - ppe.close(); - - ppe = new PowerPointExtractor(hslf); - - text = ppe.getText(); - assertContainsIgnoreCase(text, "master"); - assertContains(text, masterText); + + @SuppressWarnings("unused") + @Test + public void testSlideMasterText() throws Exception { + String masterTitleText = "This is the Master Title"; + String masterRandomText = "This text comes from the Master Slide"; + String masterFooterText = "Footer from the master slide"; + HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("WithMaster.ppt")); + ppe.close(); + + ppe = new PowerPointExtractor(hslf); + + String text = ppe.getText(); + //assertContains(text, masterTitleText); // TODO Is this available in PPT? + //assertContains(text, masterRandomText); // TODO Extract + assertContains(text, masterFooterText); + } + + @Test + public void testMasterText() throws Exception { + ppe.close(); + ppe = new PowerPointExtractor(slTests.openResourceAsStream("master_text.ppt")); + + // Initially not there + String text = ppe.getText(); + assertFalse(text.contains("Text that I added to the master slide")); + + // Enable, shows up + ppe.setMasterByDefault(true); + text = ppe.getText(); + assertTrue(text.contains("Text that I added to the master slide")); + + // Make sure placeholder text does not come out + assertFalse(text.contains("Click to edit Master")); + + // Now with another file only containing master text + // Will always show up + String masterText = "Footer from the master slide"; + HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("WithMaster.ppt")); + ppe.close(); + + ppe = new PowerPointExtractor(hslf); + + text = ppe.getText(); + assertContainsIgnoreCase(text, "master"); + assertContains(text, masterText); } /** @@ -364,56 +371,56 @@ public final class TestExtractor { */ @Test public void testChineseText() throws Exception { - HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("54880_chinese.ppt")); - ppe.close(); - ppe = new PowerPointExtractor(hslf); - - String text = ppe.getText(); - - // Check for the english text line - assertContains(text, "Single byte"); - - // Check for the english text in the mixed line - assertContains(text, "Mix"); - - // Check for the chinese text in the mixed line - assertContains(text, "\u8868"); - - // Check for the chinese only text line - assertContains(text, "\uff8a\uff9d\uff76\uff78"); + HSLFSlideShowImpl hslf = new HSLFSlideShowImpl(slTests.openResourceAsStream("54880_chinese.ppt")); + ppe.close(); + ppe = new PowerPointExtractor(hslf); + + String text = ppe.getText(); + + // Check for the english text line + assertContains(text, "Single byte"); + + // Check for the english text in the mixed line + assertContains(text, "Mix"); + + // Check for the chinese text in the mixed line + assertContains(text, "\u8868"); + + // Check for the chinese only text line + assertContains(text, "\uff8a\uff9d\uff76\uff78"); } - + /** * Tests that we can work with both {@link POIFSFileSystem} - * and {@link NPOIFSFileSystem} + * and {@link NPOIFSFileSystem} */ @SuppressWarnings("resource") @Test public void testDifferentPOIFS() throws Exception { - // Open the two filesystems - DirectoryNode[] files = new DirectoryNode[2]; - files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot(); - NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt")); - files[1] = npoifsFileSystem.getRoot(); - - // Open directly - for(DirectoryNode dir : files) { - PowerPointExtractor extractor = new PowerPointExtractor(dir); - assertEquals(expectText, extractor.getText()); - } - - // Open via a HWPFDocument - for(DirectoryNode dir : files) { - HSLFSlideShowImpl slideshow = new HSLFSlideShowImpl(dir); - PowerPointExtractor extractor = new PowerPointExtractor(slideshow); - assertEquals(expectText, extractor.getText()); - } - - npoifsFileSystem.close(); + // Open the two filesystems + DirectoryNode[] files = new DirectoryNode[2]; + files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot(); + NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt")); + files[1] = npoifsFileSystem.getRoot(); + + // Open directly + for (DirectoryNode dir : files) { + PowerPointExtractor extractor = new PowerPointExtractor(dir); + assertEquals(expectText, extractor.getText()); + } + + // Open via a HWPFDocument + for (DirectoryNode dir : files) { + HSLFSlideShowImpl slideshow = new HSLFSlideShowImpl(dir); + PowerPointExtractor extractor = new PowerPointExtractor(slideshow); + assertEquals(expectText, extractor.getText()); + } + + npoifsFileSystem.close(); } @Test - public void testTable() throws Exception{ + public void testTable() throws Exception { // ppe = new PowerPointExtractor(slTests.openResourceAsStream("54111.ppt")); // String text = ppe.getText(); // String target = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n"+ @@ -428,7 +435,7 @@ public final class TestExtractor { ppe = new PowerPointExtractor(slTests.openResourceAsStream("54722.ppt")); String text = ppe.getText(); - String target = "this\tText\tis\twithin\ta\n"+ + String target = "this\tText\tis\twithin\ta\n" + "table\t1\t2\t3\t4"; assertTrue(text.contains(target)); } @@ -441,7 +448,7 @@ public final class TestExtractor { ppe = new PowerPointExtractor(hslf); ppe.setMasterByDefault(true); - + String text = ppe.getText(); assertContains(text, "Prague"); hslf.close(); -- 2.39.5