]> source.dussan.org Git - poi.git/commitdiff
iterator over sheet data in XSSFReader returns sheets in logical order, i.e. as...
authorYegor Kozlov <yegor@apache.org>
Wed, 1 Oct 2008 16:14:39 +0000 (16:14 +0000)
committerYegor Kozlov <yegor@apache.org>
Wed, 1 Oct 2008 16:14:39 +0000 (16:14 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@700821 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java
src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx [new file with mode: 0755]

index 181913f266d561ff058aa42ca1efc5de7cf306a5..f0a84d160385fc68127e6151fbde028d5e3e344d 100644 (file)
@@ -18,20 +18,24 @@ package org.apache.poi.xssf.eventusermodel;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Iterator;
+import java.util.*;
 
 import org.apache.poi.xssf.model.SharedStringsTable;
 import org.apache.poi.xssf.model.StylesTable;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.POIXMLException;
+import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.InvalidFormatException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxml4j.opc.PackagePart;
 import org.openxml4j.opc.PackagePartName;
 import org.openxml4j.opc.PackageRelationship;
-import org.openxml4j.opc.PackageRelationshipCollection;
 import org.openxml4j.opc.PackageRelationshipTypes;
 import org.openxml4j.opc.PackagingURIHelper;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
 
 /**
  * This class makes it easy to get at individual parts
@@ -41,127 +45,186 @@ import org.openxml4j.opc.PackagingURIHelper;
  *  for XSSF.
  */
 public class XSSFReader {
-       private Package pkg;
-       private PackagePart workbookPart;
-       
-       /**
-        * Creates a new XSSFReader, for the given package
-        */
-       public XSSFReader(Package pkg) throws IOException, OpenXML4JException {
-               this.pkg = pkg;
-               
+    private Package pkg;
+    private PackagePart workbookPart;
+
+    /**
+     * Creates a new XSSFReader, for the given package
+     */
+    public XSSFReader(Package pkg) throws IOException, OpenXML4JException {
+        this.pkg = pkg;
+
         PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
                 PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
-    
+
         // Get the part that holds the workbook
         workbookPart = this.pkg.getPart(coreDocRelationship);
-       }
-
-       
-       /**
-        * Opens up the Shared Strings Table, parses it, and
-        *  returns a handy object for working with 
-        *  shared strings.
-        */
-       public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
-               return new SharedStringsTable(getSharedStringsData());
-       }
-       
-       /**
-        * Opens up the Styles Table, parses it, and
-        *  returns a handy object for working with cell styles
-        */
-       public StylesTable getStylesTable() throws IOException, InvalidFormatException {
-               return new StylesTable(getStylesData());
-       }
-
-       
-       
-       /**
-        * Returns an InputStream to read the contents of the
-        *  shared strings table.
-        */
-       public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
-               return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
-       }
-       
-       /**
-        * Returns an InputStream to read the contents of the
-        *  styles table.
-        */
-       public InputStream getStylesData() throws IOException, InvalidFormatException {
-               return XSSFRelation.STYLES.getContents(workbookPart);
-       }
-       
-       /**
-        * Returns an InputStream to read the contents of the 
-        *  main Workbook, which contains key overall data for
-        *  the file, including sheet definitions.
-        */
-       public InputStream getWorkbookData() throws IOException, InvalidFormatException {
-               return workbookPart.getInputStream();
-       }
-       
-       /**
-        * Returns an InputStream to read the contents of the
-        *  specified Sheet.
-        * @param relId The relationId of the sheet, from a r:id on the workbook
-        */
-       public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
+    }
+
+
+    /**
+     * Opens up the Shared Strings Table, parses it, and
+     *  returns a handy object for working with
+     *  shared strings.
+     */
+    public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
+        return new SharedStringsTable(getSharedStringsData());
+    }
+
+    /**
+     * Opens up the Styles Table, parses it, and
+     *  returns a handy object for working with cell styles
+     */
+    public StylesTable getStylesTable() throws IOException, InvalidFormatException {
+        return new StylesTable(getStylesData());
+    }
+
+
+
+    /**
+     * Returns an InputStream to read the contents of the
+     *  shared strings table.
+     */
+    public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
+        return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
+    }
+
+    /**
+     * Returns an InputStream to read the contents of the
+     *  styles table.
+     */
+    public InputStream getStylesData() throws IOException, InvalidFormatException {
+        return XSSFRelation.STYLES.getContents(workbookPart);
+    }
+
+    /**
+     * Returns an InputStream to read the contents of the
+     *  main Workbook, which contains key overall data for
+     *  the file, including sheet definitions.
+     */
+    public InputStream getWorkbookData() throws IOException, InvalidFormatException {
+        return workbookPart.getInputStream();
+    }
+
+    /**
+     * Returns an InputStream to read the contents of the
+     *  specified Sheet.
+     * @param relId The relationId of the sheet, from a r:id on the workbook
+     */
+    public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
         PackageRelationship rel = workbookPart.getRelationship(relId);
         if(rel == null) {
-               throw new IllegalArgumentException("No Sheet found with r:id " + relId);
+            throw new IllegalArgumentException("No Sheet found with r:id " + relId);
         }
-        
+
         PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
         PackagePart sheet = pkg.getPart(relName);
         if(sheet == null) {
-               throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
+            throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
         }
         return sheet.getInputStream();
-       }
-       
-       /**
-        * Returns an Iterator which will let you get at all the
-        *  different Sheets in turn.
-        * Each sheet's InputStream is only opened when fetched
-        *  from the Iterator. It's up to you to close the
-        *  InputStreams when done with each one.
-        */
-       public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
-               return new SheetDataIterator();
-       }
-       
-       private class SheetDataIterator implements Iterator<InputStream> {
-               private Iterator<PackageRelationship> sheetRels;
-               private SheetDataIterator() throws IOException, InvalidFormatException {
-                       // Find all the sheets
-                       PackageRelationshipCollection sheets =
-                               workbookPart.getRelationshipsByType(
-                                               XSSFRelation.WORKSHEET.getRelation()
-                       );
-                       sheetRels = sheets.iterator();
-               }
-
-               public boolean hasNext() {
-                       return sheetRels.hasNext();
-               }
-
-               public InputStream next() {
-                       PackageRelationship sheet = sheetRels.next();
-                       try {
-                       PackagePartName relName = PackagingURIHelper.createPartName(sheet.getTargetURI());
-                               PackagePart sheetPkg = pkg.getPart(relName);
-                               return sheetPkg.getInputStream();
-                       } catch(IOException e) {
-                               throw new RuntimeException(e);
-                       } catch(InvalidFormatException ife) {
-                               throw new RuntimeException(ife);
-                       }
-               }
-
-               public void remove() {
-                       throw new IllegalStateException("Not supported");
-               }
-       }
+    }
+
+    /**
+     * Returns an Iterator which will let you get at all the
+     *  different Sheets in turn.
+     * Each sheet's InputStream is only opened when fetched
+     *  from the Iterator. It's up to you to close the
+     *  InputStreams when done with each one.
+     */
+    public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
+        return new SheetIterator(workbookPart);
+    }
+
+    /**
+     * Iterator over sheet data.
+     */
+    public static class SheetIterator implements Iterator<InputStream> {
+
+        /**
+         *  Maps relId and the corresponding PackagePart
+         */
+        private Map<String, PackagePart> sheetMap;
+
+        /**
+         * Current CTSheet bean
+         */
+        private CTSheet ctSheet;
+
+        /**
+         * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
+         * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
+         * i.e. as they are stored in the underlying package
+         */
+        private Iterator<CTSheet> sheetIterator;
+
+        /**
+         * Construct a new SheetIterator
+         *
+         * @param wb package part holding workbook.xml
+         */
+        private SheetIterator(PackagePart wb) throws IOException {
+
+            /**
+             * The order of sheets is defined by the order of CTSheet elements in workbook.xml
+             */
+            try {
+                //step 1. Map sheet's relationship Id and the corresponding PackagePart
+                sheetMap = new HashMap<String, PackagePart>();
+                for(PackageRelationship rel : wb.getRelationships()){
+                    if(rel.getRelationshipType().equals(XSSFRelation.WORKSHEET.getRelation())){
+                        PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+                        sheetMap.put(rel.getId(), wb.getPackage().getPart(relName));
+                    }
+                }
+                //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
+                //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
+                CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream()).getWorkbook();
+                sheetIterator = Arrays.asList(wbBean.getSheets().getSheetArray()).iterator();
+            } catch (InvalidFormatException e){
+                throw new POIXMLException(e);
+            } catch (XmlException e){
+                throw new POIXMLException(e);
+            }
+        }
+
+        /**
+         * Returns <tt>true</tt> if the iteration has more elements.
+         *
+         * @return <tt>true</tt> if the iterator has more elements.
+         */
+        public boolean hasNext() {
+            return sheetIterator.hasNext();
+        }
+
+        /**
+         * Returns input stream of the next sheet in the iteration
+         *
+         * @return input stream of the next sheet in the iteration
+         */
+        public InputStream next() {
+            ctSheet = sheetIterator.next();
+
+            String sheetId = ctSheet.getId();
+            try {
+                PackagePart sheetPkg = sheetMap.get(sheetId);
+                return sheetPkg.getInputStream();
+            } catch(IOException e) {
+                throw new POIXMLException(e);
+            }
+        }
+
+        /**
+         * Returns name of the current sheet
+         *
+         * @return name of the current sheet
+         */
+        public String getSheetName() {
+            return ctSheet.getName();
+        }
+
+        public void remove() {
+            throw new IllegalStateException("Not supported");
+        }
+    }
 }
index 8ba59de4340debe940c41711b4be0e64b333e973..5a552df4f618129a269a756c051e4a9657b1c844 100644 (file)
@@ -109,4 +109,30 @@ public class TestXSSFReader extends TestCase {
        }
        assertEquals(3, count);
     }
+
+    /**
+     * Check that the sheet iterator returns sheets in the logical order
+     * (as they are defined in the workbook.xml)
+     */
+    public void testOrderOfSheets() throws Exception {
+        File f = new File(dirName, "reordered_sheets.xlsx");
+        Package pkg = Package.open(f.toString());
+
+        XSSFReader r = new XSSFReader(pkg);
+
+        String[] sheetNames = {"Sheet4", "Sheet2", "Sheet3", "Sheet1"};
+        XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
+
+        int count = 0;
+        while(it.hasNext()) {
+            InputStream inp = it.next();
+            assertNotNull(inp);
+            inp.close();
+
+            assertEquals(sheetNames[count], it.getSheetName());
+            count++;
+        }
+        assertEquals(4, count);
+
+    }
 }
diff --git a/src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx b/src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx
new file mode 100755 (executable)
index 0000000..f46f59b
Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx differ