]> source.dussan.org Git - poi.git/commitdiff
#54570 - InvalidFormatException because of Absolute URI forbidden
authorAndreas Beeker <kiwiwings@apache.org>
Mon, 16 May 2016 21:04:00 +0000 (21:04 +0000)
committerAndreas Beeker <kiwiwings@apache.org>
Mon, 16 May 2016 21:04:00 +0000 (21:04 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1744137 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java
src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
src/testcases/org/apache/poi/POITestCase.java
test-data/slideshow/bug54570.pptx [new file with mode: 0644]

index 64177bcb263b6a88b789e8dc874e02b4db79f593..a4e2a670b1c18867eebcdc9c92ce257c0dd9cef9 100644 (file)
@@ -191,7 +191,7 @@ implements SlideShow<XSLFShape,XSLFTextParagraph> {
             List<PackagePart> mediaParts = getPackage().getPartsByName(Pattern.compile("/ppt/media/.*?"));
             _pictures = new ArrayList<XSLFPictureData>(mediaParts.size());
             for(PackagePart part : mediaParts){
-                XSLFPictureData pd = new XSLFPictureData(part, null);
+                XSLFPictureData pd = new XSLFPictureData(part);
                 pd.setIndex(_pictures.size());
                 _pictures.add(pd);
             }
index 346aeab898f482e422b128e015a8174bdecd8aa4..d3c7c97db74af27a36fd226152e9ac47482babe2 100644 (file)
 ==================================================================== */
 package org.apache.poi.xslf.extractor;
 
+import static org.apache.poi.POITestCase.assertContains;
+import static org.apache.poi.POITestCase.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xslf.usermodel.XSLFSlideShow;
-
-import junit.framework.TestCase;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.xmlbeans.XmlException;
+import org.junit.Test;
 
 /**
- * Tests for HXFPowerPointExtractor
+ * Tests for XSLFPowerPointExtractor
  */
-public class TestXSLFPowerPointExtractor extends TestCase {
-   /**
-    * A simple file
-    */
-   private XSLFSlideShow xmlA;
-   private OPCPackage pkg;
-       
-   private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
-
-   protected void setUp() throws Exception {
-      slTests = POIDataSamples.getSlideShowInstance();
-      pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
-      xmlA = new XSLFSlideShow(pkg);
-   }
+public class TestXSLFPowerPointExtractor {
+    private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
 
        /**
         * Get text out of the simple file
+        * @throws XmlException
+        * @throws OpenXML4JException
         */
-       public void testGetSimpleText() throws Exception {
-               new XSLFPowerPointExtractor(xmlA).close();
+    @Test
+    public void testGetSimpleText()
+    throws IOException, XmlException, OpenXML4JException {
+        XMLSlideShow xmlA = openPPTX("sample.pptx");
+        @SuppressWarnings("resource")
+        OPCPackage pkg = xmlA.getPackage();
+
+           new XSLFPowerPointExtractor(xmlA).close();
                new XSLFPowerPointExtractor(pkg).close();
-               
-               XSLFPowerPointExtractor extractor = 
+
+               XSLFPowerPointExtractor extractor =
                        new XSLFPowerPointExtractor(xmlA);
                extractor.getText();
-               
+
                String text = extractor.getText();
                assertTrue(text.length() > 0);
-               
+
                // Check Basics
                assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
-               assertTrue(text.contains("amet\n\n"));
+               assertContains(text, "amet\n\n");
 
                // Our placeholder master text
                // This shouldn't show up in the output
-               String masterText =
-         "Click to edit Master title style\n" +
-         "Click to edit Master subtitle style\n" +
-         "\n\n\n\n\n\n" +
-         "Click to edit Master title style\n" +
-         "Click to edit Master text styles\n" +
-         "Second level\n" +
-         "Third level\n" +
-         "Fourth level\n" +
-         "Fifth level\n";
-               
+           // String masterText =
+           //     "Click to edit Master title style\n" +
+        //     "Click to edit Master subtitle style\n" +
+        //     "\n\n\n\n\n\n" +
+        //     "Click to edit Master title style\n" +
+        //     "Click to edit Master text styles\n" +
+        //     "Second level\n" +
+        //     "Third level\n" +
+        //     "Fourth level\n" +
+        //     "Fifth level\n";
+
                // Just slides, no notes
                text = extractor.getText(true, false, false);
-               assertEquals(
-                               "Lorem ipsum dolor sit amet\n" +
-                               "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
-                               "\n" +
-                               "Lorem ipsum dolor sit amet\n" +
-                               "Lorem\n" +
-                               "ipsum\n" +
-                               "dolor\n" +
-                               "sit\n" +
-                               "amet\n" +
-                               "\n"
-                               , text
-               );
-               
+               String slideText =
+               "Lorem ipsum dolor sit amet\n" +
+            "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
+            "\n" +
+            "Lorem ipsum dolor sit amet\n" +
+            "Lorem\n" +
+            "ipsum\n" +
+            "dolor\n" +
+            "sit\n" +
+            "amet\n" +
+            "\n";
+               assertEquals(slideText, text);
+
                // Just notes, no slides
                text = extractor.getText(false, true);
-               assertEquals(
-                               "\n\n\n\n", text
-               );
-               
+               assertEquals("\n\n\n\n", text);
+
                // Both
                text = extractor.getText(true, true, false);
-               assertEquals(
-                               "Lorem ipsum dolor sit amet\n" +
-                               "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
+               String bothText =
+               "Lorem ipsum dolor sit amet\n" +
+            "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
             "\n\n\n" +
-                               "Lorem ipsum dolor sit amet\n" +
-                               "Lorem\n" +
-                               "ipsum\n" +
-                               "dolor\n" +
-                               "sit\n" +
-                               "amet\n" +
-                               "\n\n\n"
-                               , text
-               );
-               
+            "Lorem ipsum dolor sit amet\n" +
+            "Lorem\n" +
+            "ipsum\n" +
+            "dolor\n" +
+            "sit\n" +
+            "amet\n" +
+            "\n\n\n";
+        assertEquals(bothText, text);
+
                // With Slides and Master Text
-      text = extractor.getText(true, false, true);
-      assertEquals(
+        text = extractor.getText(true, false, true);
+        String smText =
             "Lorem ipsum dolor sit amet\n" +
             "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
             "\n" +
@@ -123,13 +126,12 @@ public class TestXSLFPowerPointExtractor extends TestCase {
             "dolor\n" +
             "sit\n" +
             "amet\n" +
-            "\n"
-            , text
-      );
-               
+            "\n";
+        assertEquals(smText, text);
+
                // With Slides, Notes and Master Text
-      text = extractor.getText(true, true, true);
-      assertEquals(
+        text = extractor.getText(true, true, true);
+        String snmText =
             "Lorem ipsum dolor sit amet\n" +
             "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
             "\n" +
@@ -140,67 +142,61 @@ public class TestXSLFPowerPointExtractor extends TestCase {
             "dolor\n" +
             "sit\n" +
             "amet\n" +
-            "\n\n\n"
-            , text
-      );
-               
+            "\n\n\n";
+        assertEquals(snmText, text);
+
                // Via set defaults
                extractor.setSlidesByDefault(false);
                extractor.setNotesByDefault(true);
                text = extractor.getText();
-               assertEquals(
-                               "\n\n\n\n", text
-               );
-               
+               assertEquals("\n\n\n\n", text);
+
                extractor.close();
+               xmlA.close();
        }
-       
-   public void testGetComments() throws Exception {
-      XSLFSlideShow xml = 
-         new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
-      XSLFPowerPointExtractor extractor = 
-         new XSLFPowerPointExtractor(xml);
-
-      String text = extractor.getText();
-      assertTrue(text.length() > 0);
-
-      // Check comments are there
-      assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
-      assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
-
-      // Check the authors came through too
-      assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
-               
-               extractor.close();
-   }
-       
+
+    public void testGetComments() throws IOException {
+        XMLSlideShow xml = openPPTX("45545_Comment.pptx");
+        XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+
+        // Check comments are there
+        assertContains(text, "testdoc");
+        assertContains(text, "test phrase");
+
+        // Check the authors came through too
+        assertContains(text, "XPVMWARE01");
+
+        extractor.close();
+        xml.close();
+    }
+
        public void testGetMasterText() throws Exception {
-      XSLFSlideShow xml = 
-         new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
-      XSLFPowerPointExtractor extractor = 
-         new XSLFPowerPointExtractor(xml);
-      extractor.setSlidesByDefault(true);
-      extractor.setNotesByDefault(false);
-      extractor.setMasterByDefault(true);
-      
-      String text = extractor.getText();
-      assertTrue(text.length() > 0);
-
-      // Check master text is there
-      assertTrue("Unable to find expected word in text\n" + text, 
-            text.contains("Footer from the master slide"));
-
-      // Theme text shouldn't show up
-      String themeText = 
-         "Theme Master Title\n" +
-         "Theme Master first level\n" +
-         "And the 2nd level\n" +
-         "Our 3rd level goes here\n" +
-         "And onto the 4th, such fun....\n" +
-         "Finally is the Fifth level\n";
-      
-      // Check the whole text
-      assertEquals(
+           XMLSlideShow xml = openPPTX("WithMaster.pptx");
+           XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+           extractor.setSlidesByDefault(true);
+           extractor.setNotesByDefault(false);
+           extractor.setMasterByDefault(true);
+
+           String text = extractor.getText();
+           assertTrue(text.length() > 0);
+
+           // Check master text is there
+           assertContains(text, "Footer from the master slide");
+
+           // Theme text shouldn't show up
+           // String themeText =
+        //     "Theme Master Title\n" +
+        //     "Theme Master first level\n" +
+        //     "And the 2nd level\n" +
+        //     "Our 3rd level goes here\n" +
+        //     "And onto the 4th, such fun....\n" +
+        //     "Finally is the Fifth level\n";
+
+           // Check the whole text
+           String wholeText =
             "First page title\n" +
             "First page subtitle\n" +
             "This is the Master Title\n" +
@@ -210,108 +206,124 @@ public class TestXSLFPowerPointExtractor extends TestCase {
             "2nd page subtitle\n" +
             "Footer from the master slide\n" +
             "This is the Master Title\n" +
-            "This text comes from the Master Slide\n"
-            , text
-      );
-               
+            "This text comes from the Master Slide\n";
+           assertEquals(wholeText, text);
+
                extractor.close();
+               xml.close();
        }
 
-    public void testTable() throws Exception {
-        XSLFSlideShow xml = 
-           new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
-        XSLFPowerPointExtractor extractor =
-            new XSLFPowerPointExtractor(xml);
+       @Test
+       public void testTable() throws Exception {
+        XMLSlideShow xml = openPPTX("present1.pptx");
+        XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
 
         String text = extractor.getText();
         assertTrue(text.length() > 0);
 
         // Check comments are there
         assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
-               
+
                extractor.close();
+               xml.close();
     }
-    
+
     /**
      * Test that we can get the text from macro enabled,
-     *  template, theme, slide enabled etc formats, as 
+     *  template, theme, slide enabled etc formats, as
      *  well as from the normal file
      */
+    @Test
     public void testDifferentSubformats() throws Exception {
-       String[] extensions = new String[] {
-             "pptx", "pptm", "ppsm", "ppsx",
-             "thmx", 
-             //"xps" // Doesn't have a core document
-       };
-       for(String extension : extensions) {
-          String filename = "testPPT." + extension;
-          XSLFSlideShow xml = 
-             new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
-          XSLFPowerPointExtractor extractor =
-             new XSLFPowerPointExtractor(xml);
-
-         String text = extractor.getText();
-         if(extension.equals("thmx")) {
-            // Theme file doesn't have any textual content
-            assertEquals(0, text.length());
-            continue;
-         }
-         
+        String[] extensions = new String[] {
+            "pptx", "pptm", "ppsm", "ppsx", "thmx",
+            // "xps" - Doesn't have a core document
+        };
+        for(String extension : extensions) {
+            String filename = "testPPT." + extension;
+            XMLSlideShow xml = openPPTX(filename);
+            XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+
+            String text = extractor.getText();
+            if (extension.equals("thmx")) {
+                // Theme file doesn't have any textual content
+                assertEquals(0, text.length());
+                continue;
+            }
+
          assertTrue(text.length() > 0);
          assertTrue(
-               "Text missing for " + filename + "\n" + text, 
+               "Text missing for " + filename + "\n" + text,
                text.contains("Attachment Test")
          );
          assertTrue(
-               "Text missing for " + filename + "\n" + text, 
+               "Text missing for " + filename + "\n" + text,
                text.contains("This is a test file data with the same content")
          );
          assertTrue(
-               "Text missing for " + filename + "\n" + text, 
+               "Text missing for " + filename + "\n" + text,
                text.contains("content parsing")
          );
          assertTrue(
-               "Text missing for " + filename + "\n" + text, 
+               "Text missing for " + filename + "\n" + text,
                text.contains("Different words to test against")
          );
          assertTrue(
-               "Text missing for " + filename + "\n" + text, 
+               "Text missing for " + filename + "\n" + text,
                text.contains("Mystery")
          );
-         
+
                 extractor.close();
+                xml.close();
        }
     }
 
-   public void test45541() throws Exception {
-       // extract text from a powerpoint that has a header in the notes-element
-       POITextExtractor extr = ExtractorFactory.createExtractor(slTests
-               .openResourceAsStream("45541_Header.pptx"));
-       String text = extr.getText();
-       assertNotNull(text);
-       assertFalse("Had: " + text, text.contains("testdoc"));
-       
-       text = ((XSLFPowerPointExtractor)extr).getText(false, true);
-       assertNotNull(text);
-       assertTrue("Had: " + text, text.contains("testdoc"));
-       extr.close();
-       assertNotNull(text);
+    @Test
+    public void test45541() throws Exception {
+        // extract text from a powerpoint that has a header in the notes-element
+        POITextExtractor extr = ExtractorFactory.createExtractor(
+            slTests.getFile("45541_Header.pptx"));
+        String text = extr.getText();
+        assertNotNull(text);
+        assertFalse("Had: " + text, text.contains("testdoc"));
 
-       // extract text from a powerpoint that has a footer in the master-slide
-       extr = ExtractorFactory.createExtractor(slTests
-               .openResourceAsStream("45541_Footer.pptx"));
-       text = extr.getText();
-       assertNotNull(text);
-       assertFalse("Had " + text, text.contains("testdoc"));
-       
-       text = ((XSLFPowerPointExtractor)extr).getText(false, true);
+        text = ((XSLFPowerPointExtractor)extr).getText(false, true);
+        assertContains(text, "testdoc");
+        extr.close();
        assertNotNull(text);
-       assertFalse("Had: " + text, text.contains("testdoc"));
 
-       text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
-       assertNotNull(text);
-       assertFalse("Had: " + text, text.contains("testdoc"));
+        // extract text from a powerpoint that has a footer in the master-slide
+        extr = ExtractorFactory.createExtractor(
+            slTests.getFile("45541_Footer.pptx"));
+        text = extr.getText();
+        assertNotContained(text, "testdoc");
+
+        text = ((XSLFPowerPointExtractor)extr).getText(false, true);
+        assertNotContained(text, "testdoc");
 
-       extr.close();
-   }
+        text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
+        assertNotContained(text, "testdoc");
+
+        extr.close();
+    }
+
+
+    @Test
+    public void bug54570() throws IOException {
+        XMLSlideShow xml = openPPTX("bug54570.pptx");
+        XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
+        String text = extractor.getText();
+        assertNotNull(text);
+        extractor.close();
+        xml.close();
+    }
+
+    private XMLSlideShow openPPTX(String file) throws IOException {
+        InputStream is = slTests.openResourceAsStream(file);
+        try {
+            return new XMLSlideShow(is);
+        } finally {
+            is.close();
+        }
+    }
 }
index ee9ab9ab3b19e54c0668eddc29767d05b02ee0f8..61e45833da0b6647188e3a759f659281a67dab9e 100644 (file)
@@ -18,6 +18,7 @@
 package org.apache.poi;
 
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -26,73 +27,48 @@ import java.lang.reflect.Method;
 import java.security.AccessController;
 import java.security.PrivilegedActionException;
 import java.security.PrivilegedExceptionAction;
-import java.util.Collection;
 import java.util.Map;
 
 import org.apache.poi.util.SuppressForbidden;
 
 /**
- * Parent class for POI JUnit TestCases, which provide additional
- *  features 
+ * Util class for POI JUnit TestCases, which provide additional features 
  */
-public class POITestCase {
+public final class POITestCase {
     public static void assertContains(String haystack, String needle) {
+        assertNotNull(haystack);
         assertTrue(
               "Unable to find expected text '" + needle + "' in text:\n" + haystack,
               haystack.contains(needle)
         );
-     }
+    }
+    
     public static void assertNotContained(String haystack, String needle) {
+        assertNotNull(haystack);
         assertFalse(
               "Unexpectedly found text '" + needle + "' in text:\n" + haystack,
               haystack.contains(needle)
         );
-     }
-    
-    public static <T> void assertContains(T needle, T[] haystack)
-    {
-       // Check
-       for (T thing : haystack) {
-          if (thing.equals(needle)) {
-             return;
-          }
-       }
-
-       // Failed, try to build a nice error
-       StringBuilder sb = new StringBuilder();
-       sb.append("Unable to find ").append(needle).append(" in [");
-       for (T thing : haystack) {
-           sb.append(" ").append(thing.toString()).append(" ,");
-        }
-        sb.setCharAt(sb.length()-1, ']');
-
-        fail(sb.toString());
-     }
+    }
     
-     public static  <T> void assertContains(T needle, Collection<T> haystack) {
-        if (haystack.contains(needle)) {
-           return;
-        }
-        fail("Unable to find " + needle + " in " + haystack);
-     }
-     
-     /**
-      * @param map haystack
-      * @param key needle
-      */
-     public static  <T> void assertContains(Map<T, ?> map, T key) {
-         if (map.containsKey(key)) {
+    /**
+     * @param map haystack
+     * @param key needle
+     */
+    public static  <T> void assertContains(Map<T, ?> map, T key) {
+        if (map.containsKey(key)) {
             return;
-         }
-         fail("Unable to find " + key + " in " + map);
-      }
+        }
+        fail("Unable to find " + key + " in " + map);
+    }
      
-     /** Utility method to get the value of a private/protected field.
-      * Only use this method in test cases!!!
-      */
-     public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
-         assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
-         try {
+    /**
+     * Utility method to get the value of a private/protected field.
+     * Only use this method in test cases!!!
+     */
+    public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
+        assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
+        try {
             return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
                 @Override
                 @SuppressWarnings("unchecked")
@@ -103,18 +79,19 @@ public class POITestCase {
                     return (R) f.get(instance);
                 }
             });
-         } catch (PrivilegedActionException pae) {
-             throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
-         }
-     }
+        } catch (PrivilegedActionException pae) {
+            throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
+        }
+    }
      
-     /** Utility method to call a private/protected method.
-      * Only use this method in test cases!!!
-      */
-     public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
-             final Class<?>[] parameterTypes, final Object[] parameters) {
-         assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
-         try {
+    /**
+     * Utility method to call a private/protected method.
+     * Only use this method in test cases!!!
+     */
+    public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
+        final Class<?>[] parameterTypes, final Object[] parameters) {
+        assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
+        try {
             return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
                 @Override
                 @SuppressWarnings("unchecked")
@@ -125,8 +102,8 @@ public class POITestCase {
                     return (R) m.invoke(instance, parameters);
                 }
             });
-         } catch (PrivilegedActionException pae) {
-             throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
-         }
-     }
+        } catch (PrivilegedActionException pae) {
+            throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
+        }
+    }
 }
diff --git a/test-data/slideshow/bug54570.pptx b/test-data/slideshow/bug54570.pptx
new file mode 100644 (file)
index 0000000..a5547e9
Binary files /dev/null and b/test-data/slideshow/bug54570.pptx differ