]> source.dussan.org Git - poi.git/commitdiff
Fixed ExtractorFactory to support .xltx and .dotx files, see Bugzilla 47517
authorYegor Kozlov <yegor@apache.org>
Sat, 18 Jul 2009 09:09:11 +0000 (09:09 +0000)
committerYegor Kozlov <yegor@apache.org>
Sat, 18 Jul 2009 09:09:11 +0000 (09:09 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795327 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx [new file with mode: 0755]
src/testcases/org/apache/poi/hssf/data/test.xltx [new file with mode: 0755]

index dc25f63a38d118657612690a6f683f555632c14f..a313f36026baddcc68d21f2a658e3b337c4e320f 100644 (file)
@@ -94,18 +94,27 @@ public class ExtractorFactory {
                if(core.size() != 1) {
                        throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
                }
-               
-               PackagePart corePart = pkg.getPart(core.getRelationship(0));
-               if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) {
-                       return new XSSFExcelExtractor(pkg);
-               }
-               if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) {
+
+        PackagePart corePart = pkg.getPart(core.getRelationship(0));
+        if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
+            corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
+            corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
+            corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
+            corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
+            return new XSSFExcelExtractor(pkg);
+        }
+
+        if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
+            corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
+            corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
+            corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
                        return new XWPFWordExtractor(pkg);
                }
+
                if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
                        return new XSLFPowerPointExtractor(pkg);
                }
-               throw new IllegalArgumentException("No supported documents found in the OOXML package");
+               throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
        }
        
        public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
index bbbbaa9733eee06964b61011e56e304b385fb6d4..76daeed760b82b63e6baf193959aedbf8cd43fc2 100644 (file)
@@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase {
        
        private File xls;
        private File xlsx;
-       
+        private File xltx;
+
        private File doc;
        private File docx;
+        private File dotx;
 
        private File ppt;
        private File pptx;
@@ -77,10 +79,12 @@ public class TestExtractorFactory extends TestCase {
                
                xls = new File(excel_dir, "SampleSS.xls");
                xlsx = new File(excel_dir, "SampleSS.xlsx");
-               
+                xltx = new File(excel_dir, "test.xltx");
+
                doc = new File(word_dir, "SampleDoc.doc");
                docx = new File(word_dir, "SampleDoc.docx");
-               
+        dotx = new File(word_dir, "test.dotx");
+
                ppt = new File(powerpoint_dir, "SampleShow.ppt");
                pptx = new File(powerpoint_dir, "SampleShow.pptx");
                
@@ -104,6 +108,15 @@ public class TestExtractorFactory extends TestCase {
                assertTrue(
                                ExtractorFactory.createExtractor(xlsx).getText().length() > 200
                );
+
+                assertTrue(
+                                ExtractorFactory.createExtractor(xltx)
+                                instanceof XSSFExcelExtractor
+                );
+                assertTrue(
+                                ExtractorFactory.createExtractor(xltx).getText().contains("test")
+                );
+
                
                // Word
                assertTrue(
@@ -121,7 +134,15 @@ public class TestExtractorFactory extends TestCase {
                assertTrue(
                                ExtractorFactory.createExtractor(docx).getText().length() > 120
                );
-               
+
+                assertTrue(
+                                ExtractorFactory.createExtractor(dotx)
+                                instanceof XWPFWordExtractor
+                );
+                assertTrue(
+                                ExtractorFactory.createExtractor(dotx).getText().contains("Test")
+                );
+
                // PowerPoint
                assertTrue(
                                ExtractorFactory.createExtractor(ppt)
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx
new file mode 100755 (executable)
index 0000000..0b74e39
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx differ
diff --git a/src/testcases/org/apache/poi/hssf/data/test.xltx b/src/testcases/org/apache/poi/hssf/data/test.xltx
new file mode 100755 (executable)
index 0000000..3974eb2
Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/test.xltx differ