From fbff3e557bbee0882a9b6492cad0e87ace030477 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Thu, 29 Jul 2010 11:57:08 +0000 Subject: [PATCH] Refactor to make it easier to tell which content types each POIXMLTextExtractor handles git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@980414 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/extractor/ExtractorFactory.java | 69 ++++++----- .../org/apache/poi/xslf/XSLFSlideShow.java | 20 +--- .../extractor/XSLFPowerPointExtractor.java | 7 ++ .../poi/xslf/usermodel/XSLFRelation.java | 111 ++++++++++++++++++ .../xssf/extractor/XSSFExcelExtractor.java | 7 ++ .../poi/xwpf/extractor/XWPFWordExtractor.java | 7 ++ 7 files changed, 171 insertions(+), 51 deletions(-) create mode 100644 src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFRelation.java diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 6f2623971d..085fc30e20 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Make it easier to tell which content types each POIXMLTextExtractor handles 49649 - Added clone support for UserSView* and Feat* families of records 49653 - Support for escaped unicode characters in Shared String Table 49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 7ccb9f9297..4864714f49 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -52,6 +52,7 @@ import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.xslf.XSLFSlideShow; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; +import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xssf.usermodel.XSSFRelation; @@ -155,42 +156,40 @@ public class ExtractorFactory { } public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { - PackageRelationshipCollection core = - pkg.getRelationshipsByType(CORE_DOCUMENT_REL); - if(core.size() != 1) { - throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); - } - - PackagePart corePart = pkg.getPart(core.getRelationship(0)); - if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) || - corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) || - corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) || - corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) || - corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) { - if(getPreferEventExtractor()) { - return new XSSFEventBasedExcelExtractor(pkg); - } else { - return new XSSFExcelExtractor(pkg); - } - } - - if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) || - corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) || - corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) || - corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) { - return new XWPFWordExtractor(pkg); - } - - if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE) || - corePart.getContentType().equals(XSLFSlideShow.MACRO_CONTENT_TYPE) || - corePart.getContentType().equals(XSLFSlideShow.MACRO_TEMPLATE_CONTENT_TYPE) || - corePart.getContentType().equals(XSLFSlideShow.PRESENTATIONML_CONTENT_TYPE) || - corePart.getContentType().equals(XSLFSlideShow.PRESENTATIONML_TEMPLATE_CONTENT_TYPE) || - corePart.getContentType().equals(XSLFSlideShow.PRESENTATION_MACRO_CONTENT_TYPE)) { - return new XSLFPowerPointExtractor(pkg); - } + PackageRelationshipCollection core = + pkg.getRelationshipsByType(CORE_DOCUMENT_REL); + if(core.size() != 1) { + throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); + } - throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")"); + PackagePart corePart = pkg.getPart(core.getRelationship(0)); + + // Is it XSSF? + for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { + if(corePart.getContentType().equals(rel.getContentType())) { + if(getPreferEventExtractor()) { + return new XSSFEventBasedExcelExtractor(pkg); + } else { + return new XSSFExcelExtractor(pkg); + } + } + } + + // Is it XWPF? + for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { + if(corePart.getContentType().equals(rel.getContentType())) { + return new XWPFWordExtractor(pkg); + } + } + + // Is it XSLF? + for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { + if(corePart.getContentType().equals(rel.getContentType())) { + return new XSLFPowerPointExtractor(pkg); + } + } + + throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")"); } public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException { diff --git a/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java index 3ac2dc67bc..c2046afb68 100644 --- a/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java +++ b/src/ooxml/java/org/apache/poi/xslf/XSLFSlideShow.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.poi.POIXMLDocument; import org.apache.poi.util.Internal; +import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; @@ -29,7 +30,6 @@ import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; import org.apache.xmlbeans.XmlException; -import org.openxmlformats.schemas.drawingml.x2006.main.ThemeDocument; import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation; @@ -57,18 +57,6 @@ import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument; * WARNING - APIs expected to change rapidly */ public class XSLFSlideShow extends POIXMLDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"; - public static final String MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml"; - public static final String MACRO_TEMPLATE_CONTENT_TYPE = "application/vnd.ms-powerpoint.template.macroEnabled.main+xml"; - public static final String PRESENTATIONML_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml"; - public static final String PRESENTATIONML_TEMPLATE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml"; - public static final String PRESENTATION_MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml"; - public static final String THEME_MANAGER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.themeManager+xml"; - public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"; - public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"; - public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"; - public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"; - public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"; private PresentationDocument presentationDoc; /** @@ -79,7 +67,7 @@ public class XSLFSlideShow extends POIXMLDocument { public XSLFSlideShow(OPCPackage container) throws OpenXML4JException, IOException, XmlException { super(container); - if(getCorePart().getContentType().equals(THEME_MANAGER_CONTENT_TYPE)) { + if(getCorePart().getContentType().equals(XSLFRelation.THEME_MANAGER.getContentType())) { rebase(getPackage()); } @@ -187,7 +175,7 @@ public class XSLFSlideShow extends POIXMLDocument { PackagePart slidePart = getSlidePart(parentSlide); try { - notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE); + notes = slidePart.getRelationshipsByType(XSLFRelation.NOTES.getRelation()); } catch(InvalidFormatException e) { throw new IllegalStateException(e); } @@ -231,7 +219,7 @@ public class XSLFSlideShow extends POIXMLDocument { PackagePart slidePart = getSlidePart(slide); try { - commentRels = slidePart.getRelationshipsByType(COMMENT_RELATION_TYPE); + commentRels = slidePart.getRelationshipsByType(XSLFRelation.COMMENTS.getRelation()); } catch(InvalidFormatException e) { throw new IllegalStateException(e); } diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java index fa4c77f8c0..d20a357964 100644 --- a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java +++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java @@ -23,6 +23,7 @@ import org.apache.poi.xslf.XSLFSlideShow; import org.apache.poi.xslf.usermodel.DrawingParagraph; import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; +import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.xmlbeans.XmlException; import org.openxmlformats.schemas.presentationml.x2006.main.*; @@ -30,6 +31,12 @@ import org.openxmlformats.schemas.presentationml.x2006.main.*; import java.io.IOException; public class XSLFPowerPointExtractor extends POIXMLTextExtractor { + public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[] { + XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE, + XSLFRelation.PRESENTATIONML, XSLFRelation.PRESENTATIONML_TEMPLATE, + XSLFRelation.PRESENTATION_MACRO + }; + private XMLSlideShow slideshow; private boolean slidesByDefault = true; private boolean notesByDefault = false; diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFRelation.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFRelation.java new file mode 100644 index 0000000000..b3be77f54b --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFRelation.java @@ -0,0 +1,111 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xslf.usermodel; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.poi.POIXMLDocumentPart; +import org.apache.poi.POIXMLRelation; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +public class XSLFRelation extends POIXMLRelation { + + private static POILogger log = POILogFactory.getLogger(XSLFRelation.class); + + /** + * A map to lookup POIXMLRelation by its relation type + */ + protected static Map _table = new HashMap(); + + public static final XSLFRelation MAIN = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml", + null, null, null + ); + + public static final XSLFRelation MACRO = new XSLFRelation( + "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml", + null, null, null + ); + + public static final XSLFRelation MACRO_TEMPLATE = new XSLFRelation( + "application/vnd.ms-powerpoint.template.macroEnabled.main+xml", + null, null, null + ); + + public static final XSLFRelation PRESENTATIONML = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml", + null, null, null + ); + + public static final XSLFRelation PRESENTATIONML_TEMPLATE = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml", + null, null, null + ); + + public static final XSLFRelation PRESENTATION_MACRO = new XSLFRelation( + "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml", + null, null, null + ); + + public static final XSLFRelation THEME_MANAGER = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.themeManager+xml", + null, null, null + ); + + public static final XSLFRelation NOTES = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide", + null, null + ); + + public static final XSLFRelation SLIDE = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.slide+xml", + null, null, null + ); + + public static final XSLFRelation SLIDE_LAYOUT = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout", + null, null + ); + + public static final XSLFRelation COMMENTS = new XSLFRelation( + "application/vnd.openxmlformats-officedocument.presentationml.comments+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments", + null, null + ); + + + private XSLFRelation(String type, String rel, String defaultName, Class cls) { + super(type, rel, defaultName, cls); + + if(cls != null && !_table.containsKey(rel)) _table.put(rel, this); + } + + /** + * Get POIXMLRelation by relation type + * + * @param rel relation type, for example, + * http://schemas.openxmlformats.org/officeDocument/2006/relationships/image + * @return registered POIXMLRelation or null if not found + */ + public static XSLFRelation getInstance(String rel){ + return _table.get(rel); + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index 0f089e2db1..c606471f86 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -28,6 +28,7 @@ import org.apache.poi.ss.usermodel.Comment; import org.apache.poi.ss.usermodel.HeaderFooter; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.usermodel.XSSFCell; +import org.apache.poi.xssf.usermodel.XSSFRelation; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.xmlbeans.XmlException; @@ -36,6 +37,12 @@ import org.apache.xmlbeans.XmlException; * Helper class to extract text from an OOXML Excel file */ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor { + public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { + XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK, + XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK, + XSSFRelation.MACROS_WORKBOOK + }; + private XSSFWorkbook workbook; private boolean includeSheetNames = true; private boolean formulasNotResults = false; diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java index 4047344a6a..9c159956d0 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java +++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java @@ -30,6 +30,7 @@ import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator; import org.apache.poi.xwpf.model.XWPFParagraphDecorator; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.apache.poi.xwpf.usermodel.XWPFTable; import org.apache.xmlbeans.XmlException; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr; @@ -38,6 +39,12 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr; * Helper class to extract text from an OOXML Word file */ public class XWPFWordExtractor extends POIXMLTextExtractor { + public static final XWPFRelation[] SUPPORTED_TYPES = new XWPFRelation[] { + XWPFRelation.DOCUMENT, XWPFRelation.TEMPLATE, + XWPFRelation.MACRO_DOCUMENT, + XWPFRelation.MACRO_TEMPLATE_DOCUMENT + }; + private XWPFDocument document; private boolean fetchHyperlinks = false; -- 2.39.5