import java.io.IOException;
import java.io.InputStream;
-import java.util.Iterator;
+import java.util.*;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.POIXMLException;
+import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackagePartName;
import org.openxml4j.opc.PackageRelationship;
-import org.openxml4j.opc.PackageRelationshipCollection;
import org.openxml4j.opc.PackageRelationshipTypes;
import org.openxml4j.opc.PackagingURIHelper;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
/**
* This class makes it easy to get at individual parts
* for XSSF.
*/
public class XSSFReader {
- private Package pkg;
- private PackagePart workbookPart;
-
- /**
- * Creates a new XSSFReader, for the given package
- */
- public XSSFReader(Package pkg) throws IOException, OpenXML4JException {
- this.pkg = pkg;
-
+ private Package pkg;
+ private PackagePart workbookPart;
+
+ /**
+ * Creates a new XSSFReader, for the given package
+ */
+ public XSSFReader(Package pkg) throws IOException, OpenXML4JException {
+ this.pkg = pkg;
+
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
-
+
// Get the part that holds the workbook
workbookPart = this.pkg.getPart(coreDocRelationship);
- }
-
-
- /**
- * Opens up the Shared Strings Table, parses it, and
- * returns a handy object for working with
- * shared strings.
- */
- public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
- return new SharedStringsTable(getSharedStringsData());
- }
-
- /**
- * Opens up the Styles Table, parses it, and
- * returns a handy object for working with cell styles
- */
- public StylesTable getStylesTable() throws IOException, InvalidFormatException {
- return new StylesTable(getStylesData());
- }
-
-
-
- /**
- * Returns an InputStream to read the contents of the
- * shared strings table.
- */
- public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
- return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * styles table.
- */
- public InputStream getStylesData() throws IOException, InvalidFormatException {
- return XSSFRelation.STYLES.getContents(workbookPart);
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * main Workbook, which contains key overall data for
- * the file, including sheet definitions.
- */
- public InputStream getWorkbookData() throws IOException, InvalidFormatException {
- return workbookPart.getInputStream();
- }
-
- /**
- * Returns an InputStream to read the contents of the
- * specified Sheet.
- * @param relId The relationId of the sheet, from a r:id on the workbook
- */
- public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
+ }
+
+
+ /**
+ * Opens up the Shared Strings Table, parses it, and
+ * returns a handy object for working with
+ * shared strings.
+ */
+ public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
+ return new SharedStringsTable(getSharedStringsData());
+ }
+
+ /**
+ * Opens up the Styles Table, parses it, and
+ * returns a handy object for working with cell styles
+ */
+ public StylesTable getStylesTable() throws IOException, InvalidFormatException {
+ return new StylesTable(getStylesData());
+ }
+
+
+
+ /**
+ * Returns an InputStream to read the contents of the
+ * shared strings table.
+ */
+ public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
+ return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
+ }
+
+ /**
+ * Returns an InputStream to read the contents of the
+ * styles table.
+ */
+ public InputStream getStylesData() throws IOException, InvalidFormatException {
+ return XSSFRelation.STYLES.getContents(workbookPart);
+ }
+
+ /**
+ * Returns an InputStream to read the contents of the
+ * main Workbook, which contains key overall data for
+ * the file, including sheet definitions.
+ */
+ public InputStream getWorkbookData() throws IOException, InvalidFormatException {
+ return workbookPart.getInputStream();
+ }
+
+ /**
+ * Returns an InputStream to read the contents of the
+ * specified Sheet.
+ * @param relId The relationId of the sheet, from a r:id on the workbook
+ */
+ public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
PackageRelationship rel = workbookPart.getRelationship(relId);
if(rel == null) {
- throw new IllegalArgumentException("No Sheet found with r:id " + relId);
+ throw new IllegalArgumentException("No Sheet found with r:id " + relId);
}
-
+
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
PackagePart sheet = pkg.getPart(relName);
if(sheet == null) {
- throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
+ throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
}
return sheet.getInputStream();
- }
-
- /**
- * Returns an Iterator which will let you get at all the
- * different Sheets in turn.
- * Each sheet's InputStream is only opened when fetched
- * from the Iterator. It's up to you to close the
- * InputStreams when done with each one.
- */
- public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
- return new SheetDataIterator();
- }
-
- private class SheetDataIterator implements Iterator<InputStream> {
- private Iterator<PackageRelationship> sheetRels;
- private SheetDataIterator() throws IOException, InvalidFormatException {
- // Find all the sheets
- PackageRelationshipCollection sheets =
- workbookPart.getRelationshipsByType(
- XSSFRelation.WORKSHEET.getRelation()
- );
- sheetRels = sheets.iterator();
- }
-
- public boolean hasNext() {
- return sheetRels.hasNext();
- }
-
- public InputStream next() {
- PackageRelationship sheet = sheetRels.next();
- try {
- PackagePartName relName = PackagingURIHelper.createPartName(sheet.getTargetURI());
- PackagePart sheetPkg = pkg.getPart(relName);
- return sheetPkg.getInputStream();
- } catch(IOException e) {
- throw new RuntimeException(e);
- } catch(InvalidFormatException ife) {
- throw new RuntimeException(ife);
- }
- }
-
- public void remove() {
- throw new IllegalStateException("Not supported");
- }
- }
+ }
+
+ /**
+ * Returns an Iterator which will let you get at all the
+ * different Sheets in turn.
+ * Each sheet's InputStream is only opened when fetched
+ * from the Iterator. It's up to you to close the
+ * InputStreams when done with each one.
+ */
+ public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
+ return new SheetIterator(workbookPart);
+ }
+
+ /**
+ * Iterator over sheet data.
+ */
+ public static class SheetIterator implements Iterator<InputStream> {
+
+ /**
+ * Maps relId and the corresponding PackagePart
+ */
+ private Map<String, PackagePart> sheetMap;
+
+ /**
+ * Current CTSheet bean
+ */
+ private CTSheet ctSheet;
+
+ /**
+ * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
+ * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
+ * i.e. as they are stored in the underlying package
+ */
+ private Iterator<CTSheet> sheetIterator;
+
+ /**
+ * Construct a new SheetIterator
+ *
+ * @param wb package part holding workbook.xml
+ */
+ private SheetIterator(PackagePart wb) throws IOException {
+
+ /**
+ * The order of sheets is defined by the order of CTSheet elements in workbook.xml
+ */
+ try {
+ //step 1. Map sheet's relationship Id and the corresponding PackagePart
+ sheetMap = new HashMap<String, PackagePart>();
+ for(PackageRelationship rel : wb.getRelationships()){
+ if(rel.getRelationshipType().equals(XSSFRelation.WORKSHEET.getRelation())){
+ PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+ sheetMap.put(rel.getId(), wb.getPackage().getPart(relName));
+ }
+ }
+ //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
+ //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
+ CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream()).getWorkbook();
+ sheetIterator = Arrays.asList(wbBean.getSheets().getSheetArray()).iterator();
+ } catch (InvalidFormatException e){
+ throw new POIXMLException(e);
+ } catch (XmlException e){
+ throw new POIXMLException(e);
+ }
+ }
+
+ /**
+ * Returns <tt>true</tt> if the iteration has more elements.
+ *
+ * @return <tt>true</tt> if the iterator has more elements.
+ */
+ public boolean hasNext() {
+ return sheetIterator.hasNext();
+ }
+
+ /**
+ * Returns input stream of the next sheet in the iteration
+ *
+ * @return input stream of the next sheet in the iteration
+ */
+ public InputStream next() {
+ ctSheet = sheetIterator.next();
+
+ String sheetId = ctSheet.getId();
+ try {
+ PackagePart sheetPkg = sheetMap.get(sheetId);
+ return sheetPkg.getInputStream();
+ } catch(IOException e) {
+ throw new POIXMLException(e);
+ }
+ }
+
+ /**
+ * Returns name of the current sheet
+ *
+ * @return name of the current sheet
+ */
+ public String getSheetName() {
+ return ctSheet.getName();
+ }
+
+ public void remove() {
+ throw new IllegalStateException("Not supported");
+ }
+ }
}