<changes>
<release version="3.7-beta2" date="2010-??-??">
+ <action dev="POI-DEVELOPERS" type="add">Make it easier to tell which content types each POIXMLTextExtractor handles</action>
<action dev="POI-DEVELOPERS" type="fix">49649 - Added clone support for UserSView* and Feat* families of records</action>
<action dev="POI-DEVELOPERS" type="fix">49653 - Support for escaped unicode characters in Shared String Table</action>
<action dev="POI-DEVELOPERS" type="fix">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFRelation;
}
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
- PackageRelationshipCollection core =
- pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
- if(core.size() != 1) {
- throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
- }
-
- PackagePart corePart = pkg.getPart(core.getRelationship(0));
- if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
- corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
- corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
- corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
- corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
- if(getPreferEventExtractor()) {
- return new XSSFEventBasedExcelExtractor(pkg);
- } else {
- return new XSSFExcelExtractor(pkg);
- }
- }
-
- if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
- corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
- corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
- corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
- return new XWPFWordExtractor(pkg);
- }
-
- if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE) ||
- corePart.getContentType().equals(XSLFSlideShow.MACRO_CONTENT_TYPE) ||
- corePart.getContentType().equals(XSLFSlideShow.MACRO_TEMPLATE_CONTENT_TYPE) ||
- corePart.getContentType().equals(XSLFSlideShow.PRESENTATIONML_CONTENT_TYPE) ||
- corePart.getContentType().equals(XSLFSlideShow.PRESENTATIONML_TEMPLATE_CONTENT_TYPE) ||
- corePart.getContentType().equals(XSLFSlideShow.PRESENTATION_MACRO_CONTENT_TYPE)) {
- return new XSLFPowerPointExtractor(pkg);
- }
+ PackageRelationshipCollection core =
+ pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
+ if(core.size() != 1) {
+ throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
+ }
- throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
+ PackagePart corePart = pkg.getPart(core.getRelationship(0));
+
+ // Is it XSSF?
+ for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
+ if(corePart.getContentType().equals(rel.getContentType())) {
+ if(getPreferEventExtractor()) {
+ return new XSSFEventBasedExcelExtractor(pkg);
+ } else {
+ return new XSSFExcelExtractor(pkg);
+ }
+ }
+ }
+
+ // Is it XWPF?
+ for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
+ if(corePart.getContentType().equals(rel.getContentType())) {
+ return new XWPFWordExtractor(pkg);
+ }
+ }
+
+ // Is it XSLF?
+ for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
+ if(corePart.getContentType().equals(rel.getContentType())) {
+ return new XSLFPowerPointExtractor(pkg);
+ }
+ }
+
+ throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
}
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
import org.apache.poi.POIXMLDocument;
import org.apache.poi.util.Internal;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.drawingml.x2006.main.ThemeDocument;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
* WARNING - APIs expected to change rapidly
*/
public class XSLFSlideShow extends POIXMLDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
- public static final String MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml";
- public static final String MACRO_TEMPLATE_CONTENT_TYPE = "application/vnd.ms-powerpoint.template.macroEnabled.main+xml";
- public static final String PRESENTATIONML_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml";
- public static final String PRESENTATIONML_TEMPLATE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml";
- public static final String PRESENTATION_MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml";
- public static final String THEME_MANAGER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.themeManager+xml";
- public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
- public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
- public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
- public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
- public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
private PresentationDocument presentationDoc;
/**
public XSLFSlideShow(OPCPackage container) throws OpenXML4JException, IOException, XmlException {
super(container);
- if(getCorePart().getContentType().equals(THEME_MANAGER_CONTENT_TYPE)) {
+ if(getCorePart().getContentType().equals(XSLFRelation.THEME_MANAGER.getContentType())) {
rebase(getPackage());
}
PackagePart slidePart = getSlidePart(parentSlide);
try {
- notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
+ notes = slidePart.getRelationshipsByType(XSLFRelation.NOTES.getRelation());
} catch(InvalidFormatException e) {
throw new IllegalStateException(e);
}
PackagePart slidePart = getSlidePart(slide);
try {
- commentRels = slidePart.getRelationshipsByType(COMMENT_RELATION_TYPE);
+ commentRels = slidePart.getRelationshipsByType(XSLFRelation.COMMENTS.getRelation());
} catch(InvalidFormatException e) {
throw new IllegalStateException(e);
}
import org.apache.poi.xslf.usermodel.DrawingParagraph;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.presentationml.x2006.main.*;
import java.io.IOException;
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
+ public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[] {
+ XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
+ XSLFRelation.PRESENTATIONML, XSLFRelation.PRESENTATIONML_TEMPLATE,
+ XSLFRelation.PRESENTATION_MACRO
+ };
+
private XMLSlideShow slideshow;
private boolean slidesByDefault = true;
private boolean notesByDefault = false;
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf.usermodel;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.poi.POIXMLDocumentPart;
+import org.apache.poi.POIXMLRelation;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+public class XSLFRelation extends POIXMLRelation {
+
+ private static POILogger log = POILogFactory.getLogger(XSLFRelation.class);
+
+ /**
+ * A map to lookup POIXMLRelation by its relation type
+ */
+ protected static Map<String, XSLFRelation> _table = new HashMap<String, XSLFRelation>();
+
+ public static final XSLFRelation MAIN = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation MACRO = new XSLFRelation(
+ "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation MACRO_TEMPLATE = new XSLFRelation(
+ "application/vnd.ms-powerpoint.template.macroEnabled.main+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation PRESENTATIONML = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation PRESENTATIONML_TEMPLATE = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation PRESENTATION_MACRO = new XSLFRelation(
+ "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation THEME_MANAGER = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.themeManager+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation NOTES = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide",
+ null, null
+ );
+
+ public static final XSLFRelation SLIDE = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.slide+xml",
+ null, null, null
+ );
+
+ public static final XSLFRelation SLIDE_LAYOUT = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout",
+ null, null
+ );
+
+ public static final XSLFRelation COMMENTS = new XSLFRelation(
+ "application/vnd.openxmlformats-officedocument.presentationml.comments+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
+ null, null
+ );
+
+
+ private XSLFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
+ super(type, rel, defaultName, cls);
+
+ if(cls != null && !_table.containsKey(rel)) _table.put(rel, this);
+ }
+
+ /**
+ * Get POIXMLRelation by relation type
+ *
+ * @param rel relation type, for example,
+ * <code>http://schemas.openxmlformats.org/officeDocument/2006/relationships/image</code>
+ * @return registered POIXMLRelation or null if not found
+ */
+ public static XSLFRelation getInstance(String rel){
+ return _table.get(rel);
+ }
+}
import org.apache.poi.ss.usermodel.HeaderFooter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
* Helper class to extract text from an OOXML Excel file
*/
public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
+ public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
+ XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
+ XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
+ XSSFRelation.MACROS_WORKBOOK
+ };
+
private XSSFWorkbook workbook;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
* Helper class to extract text from an OOXML Word file
*/
public class XWPFWordExtractor extends POIXMLTextExtractor {
+ public static final XWPFRelation[] SUPPORTED_TYPES = new XWPFRelation[] {
+ XWPFRelation.DOCUMENT, XWPFRelation.TEMPLATE,
+ XWPFRelation.MACRO_DOCUMENT,
+ XWPFRelation.MACRO_TEMPLATE_DOCUMENT
+ };
+
private XWPFDocument document;
private boolean fetchHyperlinks = false;