123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
- package org.apache.poi.xssf.eventusermodel;
-
- import javax.xml.parsers.ParserConfigurationException;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Locale;
- import java.util.Map;
-
- import org.apache.poi.POIXMLException;
- import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
- import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.apache.poi.openxml4j.opc.PackagePart;
- import org.apache.poi.openxml4j.opc.PackagePartName;
- import org.apache.poi.openxml4j.opc.PackageRelationship;
- import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
- import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
- import org.apache.poi.openxml4j.opc.PackagingURIHelper;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
- import org.apache.poi.util.SAXHelper;
- import org.apache.poi.xssf.model.CommentsTable;
- import org.apache.poi.xssf.model.SharedStringsTable;
- import org.apache.poi.xssf.model.StylesTable;
- import org.apache.poi.xssf.model.ThemesTable;
- import org.apache.poi.xssf.usermodel.XSSFDrawing;
- import org.apache.poi.xssf.usermodel.XSSFRelation;
- import org.apache.poi.xssf.usermodel.XSSFShape;
- import org.apache.xmlbeans.XmlException;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
-
- /**
- * This class makes it easy to get at individual parts
- * of an OOXML .xlsx file, suitable for low memory sax
- * parsing or similar.
- * It makes up the core part of the EventUserModel support
- * for XSSF.
- */
- public class XSSFReader {
-
- private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
-
- protected OPCPackage pkg;
- protected PackagePart workbookPart;
-
- /**
- * Creates a new XSSFReader, for the given package
- */
- public XSSFReader(OPCPackage pkg) throws IOException, OpenXML4JException {
- this.pkg = pkg;
-
- PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
- PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
-
- // strict OOXML likely not fully supported, see #57699
- // this code is similar to POIXMLDocumentPart.getPartFromOPCPackage(), but I could not combine it
- // easily due to different return values
- if(coreDocRelationship == null) {
- if (this.pkg.getRelationshipsByType(
- PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0) != null) {
- throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
- }
-
- throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
- }
-
- // Get the part that holds the workbook
- workbookPart = this.pkg.getPart(coreDocRelationship);
- }
-
-
- /**
- * Opens up the Shared Strings Table, parses it, and
- * returns a handy object for working with
- * shared strings.
- */
- public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
- ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.SHARED_STRINGS.getContentType());
- return parts.size() == 0 ? null : new SharedStringsTable(parts.get(0));
- }
-
- /**
- * Opens up the Styles Table, parses it, and
- * returns a handy object for working with cell styles
- */
- public StylesTable getStylesTable() throws IOException, InvalidFormatException {
- ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.STYLES.getContentType());
- if(parts.size() == 0) return null;
-
- // Create the Styles Table, and associate the Themes if present
- StylesTable styles = new StylesTable(parts.get(0));
- parts = pkg.getPartsByContentType( XSSFRelation.THEME.getContentType());
- if(parts.size() != 0) {
- styles.setTheme(new ThemesTable(parts.get(0)));
- }
- return styles;
- }
-
-
-
- /**
- * Returns an InputStream to read the contents of the
- * shared strings table.
- */
- public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
- return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * styles table.
- */
- public InputStream getStylesData() throws IOException, InvalidFormatException {
- return XSSFRelation.STYLES.getContents(workbookPart);
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * themes table.
- */
- public InputStream getThemesData() throws IOException, InvalidFormatException {
- return XSSFRelation.THEME.getContents(workbookPart);
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * main Workbook, which contains key overall data for
- * the file, including sheet definitions.
- */
- public InputStream getWorkbookData() throws IOException, InvalidFormatException {
- return workbookPart.getInputStream();
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * specified Sheet.
- * @param relId The relationId of the sheet, from a r:id on the workbook
- */
- public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
- PackageRelationship rel = workbookPart.getRelationship(relId);
- if(rel == null) {
- throw new IllegalArgumentException("No Sheet found with r:id " + relId);
- }
-
- PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- PackagePart sheet = pkg.getPart(relName);
- if(sheet == null) {
- throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
- }
- return sheet.getInputStream();
- }
-
- /**
- * Returns an Iterator which will let you get at all the
- * different Sheets in turn.
- * Each sheet's InputStream is only opened when fetched
- * from the Iterator. It's up to you to close the
- * InputStreams when done with each one.
- */
- public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
- return new SheetIterator(workbookPart);
- }
-
- /**
- * Iterator over sheet data.
- */
- public static class SheetIterator implements Iterator<InputStream> {
-
- /**
- * Maps relId and the corresponding PackagePart
- */
- private final Map<String, PackagePart> sheetMap;
-
- /**
- * Current sheet reference
- */
- XSSFSheetRef xssfSheetRef;
-
- /**
- * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
- * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
- * i.e. as they are stored in the underlying package
- */
- final Iterator<XSSFSheetRef> sheetIterator;
-
- /**
- * Construct a new SheetIterator
- *
- * @param wb package part holding workbook.xml
- */
- SheetIterator(PackagePart wb) throws IOException {
-
- /**
- * The order of sheets is defined by the order of CTSheet elements in workbook.xml
- */
- try {
- //step 1. Map sheet's relationship Id and the corresponding PackagePart
- sheetMap = new HashMap<String, PackagePart>();
- OPCPackage pkg = wb.getPackage();
- String REL_WORKSHEET = XSSFRelation.WORKSHEET.getRelation();
- String REL_CHARTSHEET = XSSFRelation.CHARTSHEET.getRelation();
- for(PackageRelationship rel : wb.getRelationships()){
- String relType = rel.getRelationshipType();
- if (relType.equals(REL_WORKSHEET) || relType.equals(REL_CHARTSHEET)) {
- PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- sheetMap.put(rel.getId(), pkg.getPart(relName));
- }
- }
- //step 2. Read array of CTSheet elements, wrap it in a LinkedList
- //and construct an iterator
- sheetIterator = createSheetIteratorFromWB(wb);
- } catch (InvalidFormatException e){
- throw new POIXMLException(e);
- }
- }
-
- Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
-
- XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
- XMLReader xmlReader = null;
- try {
- xmlReader = SAXHelper.newXMLReader();
- } catch (ParserConfigurationException e) {
- throw new POIXMLException(e);
- } catch (SAXException e) {
- throw new POIXMLException(e);
- }
- xmlReader.setContentHandler(xmlSheetRefReader);
- try {
- xmlReader.parse(new InputSource(wb.getInputStream()));
- } catch (SAXException e) {
- throw new POIXMLException(e);
- }
-
- List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
- for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
- //if there's no relationship id, silently skip the sheet
- String sheetId = xssfSheetRef.getId();
- if (sheetId != null && sheetId.length() > 0) {
- validSheets.add(xssfSheetRef);
- }
- }
- return validSheets.iterator();
- }
-
-
- /**
- * Returns <tt>true</tt> if the iteration has more elements.
- *
- * @return <tt>true</tt> if the iterator has more elements.
- */
- @Override
- public boolean hasNext() {
- return sheetIterator.hasNext();
- }
-
- /**
- * Returns input stream of the next sheet in the iteration
- *
- * @return input stream of the next sheet in the iteration
- */
- @Override
- public InputStream next() {
- xssfSheetRef = sheetIterator.next();
-
- String sheetId = xssfSheetRef.getId();
- try {
- PackagePart sheetPkg = sheetMap.get(sheetId);
- return sheetPkg.getInputStream();
- } catch(IOException e) {
- throw new POIXMLException(e);
- }
- }
-
- /**
- * Returns name of the current sheet
- *
- * @return name of the current sheet
- */
- public String getSheetName() {
- return xssfSheetRef.getName();
- }
-
- /**
- * Returns the comments associated with this sheet,
- * or null if there aren't any
- */
- public CommentsTable getSheetComments() {
- PackagePart sheetPkg = getSheetPart();
-
- // Do we have a comments relationship? (Only ever one if so)
- try {
- PackageRelationshipCollection commentsList =
- sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
- if(commentsList.size() > 0) {
- PackageRelationship comments = commentsList.getRelationship(0);
- PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
- PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
- return new CommentsTable(commentsPart);
- }
- } catch (InvalidFormatException e) {
- return null;
- } catch (IOException e) {
- return null;
- }
- return null;
- }
-
- /**
- * Returns the shapes associated with this sheet,
- * an empty list or null if there is an exception
- */
- public List<XSSFShape> getShapes() {
- PackagePart sheetPkg = getSheetPart();
- List<XSSFShape> shapes= new LinkedList<XSSFShape>();
- // Do we have a comments relationship? (Only ever one if so)
- try {
- PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
- for (int i = 0; i < drawingsList.size(); i++){
- PackageRelationship drawings = drawingsList.getRelationship(i);
- PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
- PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
- if (drawingsPart == null) {
- //parts can go missing; Excel ignores them silently -- TIKA-2134
- LOGGER.log(POILogger.WARN, "Missing drawing: "+drawingsName +". Skipping it.");
- continue;
- }
- XSSFDrawing drawing = new XSSFDrawing(drawingsPart);
- for (XSSFShape shape : drawing.getShapes()){
- shapes.add(shape);
- }
- }
- } catch (XmlException e){
- return null;
- } catch (InvalidFormatException e) {
- return null;
- } catch (IOException e) {
- return null;
- }
- return shapes;
- }
-
- public PackagePart getSheetPart() {
- String sheetId = xssfSheetRef.getId();
- return sheetMap.get(sheetId);
- }
-
- /**
- * We're read only, so remove isn't supported
- */
- @Override
- public void remove() {
- throw new IllegalStateException("Not supported");
- }
- }
-
- protected final static class XSSFSheetRef {
- //do we need to store sheetId, too?
- private final String id;
- private final String name;
-
- public XSSFSheetRef(String id, String name) {
- this.id = id;
- this.name = name;
- }
-
- public String getId() {
- return id;
- }
-
- public String getName() {
- return name;
- }
- }
-
- //scrapes sheet reference info and order from workbook.xml
- private static class XMLSheetRefReader extends DefaultHandler {
- private final static String SHEET = "sheet";
- private final static String ID = "id";
- private final static String NAME = "name";
-
- private final List<XSSFSheetRef> sheetRefs = new LinkedList();
-
- @Override
- public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
- if (localName.toLowerCase(Locale.US).equals(SHEET)) {
- String name = null;
- String id = null;
- for (int i = 0; i < attrs.getLength(); i++) {
- if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
- name = attrs.getValue(i);
- } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
- id = attrs.getValue(i);
- }
- sheetRefs.add(new XSSFSheetRef(id, name));
- }
- }
- }
-
- List<XSSFSheetRef> getSheetRefs() {
- return Collections.unmodifiableList(sheetRefs);
- }
- }
- }
|