]> source.dussan.org Git - poi.git/commitdiff
Some refactoring of XSSFEventBasedExcelExtractor, to make it easier for you to have...
authorNick Burch <nick@apache.org>
Fri, 19 Nov 2010 18:14:06 +0000 (18:14 +0000)
committerNick Burch <nick@apache.org>
Fri, 19 Nov 2010 18:14:06 +0000 (18:14 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1036968 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java

index f401b0308c6b73e487d9d51dd472ba19402a1c7e..1172555e4586e9eba6a9634593130609bcbc47d7 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta1" date="2010-??-??">
+           <action dev="poi-developers" type="add">50076 - Refactor XSSFEventBasedExcelExtractor to make it easier for you to have control over outputting the cell contents</action>
            <action dev="poi-developers" type="fix">50258 - avoid corruption of XSSFWorkbook after applying XSSFRichTextRun#applyFont</action>
            <action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action>
            <action dev="poi-developers" type="fix">50113 - Remove cell from Calculation Chain after setting cell type to blank </action>
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
new file mode 100644 (file)
index 0000000..873d84e
--- /dev/null
@@ -0,0 +1,253 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.eventusermodel;
+
+import org.apache.poi.ss.usermodel.BuiltinFormats;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.model.StylesTable;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This class handles the processing of a sheet#.xml 
+ *  sheet part of a XSSF .xlsx file, and generates
+ *  row and cell events for it.
+ */
+public class XSSFSheetXMLHandler extends DefaultHandler {
+   /**
+    * These are the different kinds of cells we support.
+    * We keep track of the current one between
+    *  the start and end.
+    */
+   enum xssfDataType {
+       BOOLEAN,
+       ERROR,
+       FORMULA,
+       INLINE_STRING,
+       SST_STRING,
+       NUMBER,
+   }
+   
+   /**
+    * Table with the styles used for formatting
+    */
+   private StylesTable stylesTable;
+
+   private ReadOnlySharedStringsTable sharedStringsTable;
+
+   /**
+    * Where our text is going
+    */
+   private final SheetContentsHandler output;
+
+   // Set when V start element is seen
+   private boolean vIsOpen;
+   // Set when F start element is seen
+   private boolean fIsOpen;
+
+   // Set when cell start element is seen;
+   // used when cell close element is seen.
+   private xssfDataType nextDataType;
+
+   // Used to format numeric cell values.
+   private short formatIndex;
+   private String formatString;
+   private final DataFormatter formatter;
+   private String cellRef;
+   private boolean formulasNotResults;
+
+   // Gathers characters as they are seen.
+   private StringBuffer value = new StringBuffer();
+   private StringBuffer formula = new StringBuffer();
+
+   /**
+    * Accepts objects needed while parsing.
+    *
+    * @param styles  Table of styles
+    * @param strings Table of shared strings
+    * @param cols    Minimum number of columns to show
+    * @param target  Sink for output
+    */
+   public XSSFSheetXMLHandler(
+           StylesTable styles,
+           ReadOnlySharedStringsTable strings,
+           SheetContentsHandler sheetContentsHandler,
+           boolean formulasNotResults) {
+       this.stylesTable = styles;
+       this.sharedStringsTable = strings;
+       this.output = sheetContentsHandler;
+       this.formulasNotResults = formulasNotResults;
+       this.nextDataType = xssfDataType.NUMBER;
+       this.formatter = new DataFormatter();
+   }
+
+   public void startElement(String uri, String localName, String name,
+                            Attributes attributes) throws SAXException {
+
+       if ("inlineStr".equals(name) || "v".equals(name)) {
+           vIsOpen = true;
+           // Clear contents cache
+           value.setLength(0);
+       } else if ("f".equals(name)) {
+          // Clear contents cache
+          formula.setLength(0);
+          
+          // Mark us as being a formula if not already
+          if(nextDataType == xssfDataType.NUMBER) {
+             nextDataType = xssfDataType.FORMULA;
+          }
+          
+          // Decide where to get the formula string from
+          String type = attributes.getValue("t"); 
+          if(type != null && type.equals("shared")) {
+             System.err.println("Warning - shared formulas not yet supported!");
+          } else {
+             fIsOpen = true;
+          }
+       }
+       else if("row".equals(name)) {
+           int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
+           output.startRow(rowNum);
+       }
+       // c => cell
+       else if ("c".equals(name)) {
+           // Set up defaults.
+           this.nextDataType = xssfDataType.NUMBER;
+           this.formatIndex = -1;
+           this.formatString = null;
+           cellRef = attributes.getValue("r");
+           String cellType = attributes.getValue("t");
+           String cellStyleStr = attributes.getValue("s");
+           if ("b".equals(cellType))
+               nextDataType = xssfDataType.BOOLEAN;
+           else if ("e".equals(cellType))
+               nextDataType = xssfDataType.ERROR;
+           else if ("inlineStr".equals(cellType))
+               nextDataType = xssfDataType.INLINE_STRING;
+           else if ("s".equals(cellType))
+               nextDataType = xssfDataType.SST_STRING;
+           else if ("str".equals(cellType))
+               nextDataType = xssfDataType.FORMULA;
+           else if (cellStyleStr != null) {
+              // Number, but almost certainly with a special style or format
+               int styleIndex = Integer.parseInt(cellStyleStr);
+               XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
+               this.formatIndex = style.getDataFormat();
+               this.formatString = style.getDataFormatString();
+               if (this.formatString == null)
+                   this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
+           }
+       }
+   }
+
+   public void endElement(String uri, String localName, String name)
+           throws SAXException {
+       String thisStr = null;
+
+       // v => contents of a cell
+       if ("v".equals(name)) {
+           vIsOpen = false;
+           
+           // Process the value contents as required, now we have it all
+           switch (nextDataType) {
+               case BOOLEAN:
+                   char first = value.charAt(0);
+                   thisStr = first == '0' ? "FALSE" : "TRUE";
+                   break;
+
+               case ERROR:
+                   thisStr = "ERROR:" + value.toString();
+                   break;
+
+               case FORMULA:
+                   if(formulasNotResults) {
+                      thisStr = formula.toString();
+                   } else {
+                      thisStr = value.toString();
+                   }
+                   break;
+
+               case INLINE_STRING:
+                   // TODO: have seen an example of this, so it's untested.
+                   XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
+                   thisStr = rtsi.toString();
+                   break;
+
+               case SST_STRING:
+                   String sstIndex = value.toString();
+                   try {
+                       int idx = Integer.parseInt(sstIndex);
+                       XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
+                       thisStr = rtss.toString();
+                   }
+                   catch (NumberFormatException ex) {
+                       System.err.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
+                   }
+                   break;
+
+               case NUMBER:
+                   String n = value.toString();
+                   if (this.formatString != null)
+                       thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
+                   else
+                       thisStr = n;
+                   break;
+
+               default:
+                   thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
+                   break;
+           }
+           
+           // Output
+           output.cell(cellRef, thisStr);
+       } else if ("f".equals(name)) {
+          fIsOpen = false;
+       } else if ("row".equals(name)) {
+          output.endRow();
+       }
+   }
+
+   /**
+    * Captures characters only if a suitable element is open.
+    * Originally was just "v"; extended for inlineStr also.
+    */
+   public void characters(char[] ch, int start, int length)
+           throws SAXException {
+       if (vIsOpen) {
+           value.append(ch, start, length);
+       }
+       if (fIsOpen) {
+          formula.append(ch, start, length);
+       }
+   }
+
+   /**
+    * You need to implement this to handle the results
+    *  of the sheet parsing.
+    */
+   public interface SheetContentsHandler {
+      /** A row with the (zero based) row number has started */
+      public void startRow(int rowNum);
+      /** A row with the (zero based) row number has ended */
+      public void endRow();
+      /** A cell, with the given formatted value, was encountered */
+      public void cell(String cellReference, String formattedValue);
+   }
+}
index 9be971189401beb822364b3f0551d6e3cdf4811c..c765ef79259a4950f1db9d9547273b2515173304 100644 (file)
@@ -26,20 +26,16 @@ import javax.xml.parsers.SAXParserFactory;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.ss.usermodel.BuiltinFormats;
-import org.apache.poi.ss.usermodel.DataFormatter;
 import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
 import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
 import org.apache.poi.xssf.model.StylesTable;
-import org.apache.poi.xssf.usermodel.XSSFCellStyle;
-import org.apache.poi.xssf.usermodel.XSSFRichTextString;
 import org.apache.xmlbeans.XmlException;
-import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
 
 /**
  * Implementation of a text extractor from OOXML Excel
@@ -50,20 +46,6 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
        private boolean includeSheetNames = true;
        private boolean formulasNotResults = false;
 
-   /**
-    * These are the different kinds of cells we support.
-    * We keep track of the current one between
-    *  the start and end.
-    */
-   enum xssfDataType {
-       BOOLEAN,
-       ERROR,
-       FORMULA,
-       INLINE_STRING,
-       SST_STRING,
-       NUMBER,
-   }
-   
        public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
                this(OPCPackage.open(path));
        }
@@ -97,212 +79,11 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
                this.formulasNotResults = formulasNotResults;
        }
        
-       
-   /**
-    * Handler for sheets. Processes each row and cell,
-    *  formatting Cells as best as it can.
-    */
-   class MyXSSFSheetHandler extends DefaultHandler {
-       /**
-        * Table with the styles used for formatting
-        */
-       private StylesTable stylesTable;
-
-       private ReadOnlySharedStringsTable sharedStringsTable;
-
-       /**
-        * Where our text is going
-        */
-       private final StringBuffer output;
-
-       // Set when V start element is seen
-       private boolean vIsOpen;
-       // Set when F start element is seen
-       private boolean fIsOpen;
-
-       // Set when cell start element is seen;
-       // used when cell close element is seen.
-       private xssfDataType nextDataType;
-
-       // Used to format numeric cell values.
-       private short formatIndex;
-       private String formatString;
-       private final DataFormatter formatter;
-
-       // Gathers characters as they are seen.
-       private StringBuffer value = new StringBuffer();
-       private StringBuffer formula = new StringBuffer();
-       private boolean firstCellOfRow = true;
-
-       /**
-        * Accepts objects needed while parsing.
-        *
-        * @param styles  Table of styles
-        * @param strings Table of shared strings
-        * @param cols    Minimum number of columns to show
-        * @param target  Sink for output
-        */
-       public MyXSSFSheetHandler(
-               StylesTable styles,
-               ReadOnlySharedStringsTable strings,
-               StringBuffer output) {
-           this.stylesTable = styles;
-           this.sharedStringsTable = strings;
-           this.output = output;
-           this.nextDataType = xssfDataType.NUMBER;
-           this.formatter = new DataFormatter();
-       }
-
-       public void startElement(String uri, String localName, String name,
-                                Attributes attributes) throws SAXException {
-
-           if ("inlineStr".equals(name) || "v".equals(name)) {
-               vIsOpen = true;
-               // Clear contents cache
-               value.setLength(0);
-           } else if ("f".equals(name)) {
-              // Clear contents cache
-              formula.setLength(0);
-              
-              // Mark us as being a formula if not already
-              if(nextDataType == xssfDataType.NUMBER) {
-                 nextDataType = xssfDataType.FORMULA;
-              }
-              
-              // Decide where to get the formula string from
-              String type = attributes.getValue("t"); 
-              if(type != null && type.equals("shared")) {
-                 System.err.println("Warning - shared formulas not yet supported!");
-              } else {
-                 fIsOpen = true;
-              }
-           }
-           else if("row".equals(name)) {
-               firstCellOfRow = true;
-           }
-           // c => cell
-           else if ("c".equals(name)) {
-               // Set up defaults.
-               this.nextDataType = xssfDataType.NUMBER;
-               this.formatIndex = -1;
-               this.formatString = null;
-               String cellType = attributes.getValue("t");
-               String cellStyleStr = attributes.getValue("s");
-               if ("b".equals(cellType))
-                   nextDataType = xssfDataType.BOOLEAN;
-               else if ("e".equals(cellType))
-                   nextDataType = xssfDataType.ERROR;
-               else if ("inlineStr".equals(cellType))
-                   nextDataType = xssfDataType.INLINE_STRING;
-               else if ("s".equals(cellType))
-                   nextDataType = xssfDataType.SST_STRING;
-               else if ("str".equals(cellType))
-                   nextDataType = xssfDataType.FORMULA;
-               else if (cellStyleStr != null) {
-                  // Number, but almost certainly with a special style or format
-                   int styleIndex = Integer.parseInt(cellStyleStr);
-                   XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
-                   this.formatIndex = style.getDataFormat();
-                   this.formatString = style.getDataFormatString();
-                   if (this.formatString == null)
-                       this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
-               }
-           }
-       }
-
-       public void endElement(String uri, String localName, String name)
-               throws SAXException {
-           String thisStr = null;
-
-           // v => contents of a cell
-           if ("v".equals(name)) {
-               vIsOpen = false;
-               
-               // Process the value contents as required, now we have it all
-               switch (nextDataType) {
-                   case BOOLEAN:
-                       char first = value.charAt(0);
-                       thisStr = first == '0' ? "FALSE" : "TRUE";
-                       break;
-
-                   case ERROR:
-                       thisStr = "ERROR:" + value.toString();
-                       break;
-
-                   case FORMULA:
-                       if(formulasNotResults) {
-                          thisStr = formula.toString();
-                       } else {
-                          thisStr = value.toString();
-                       }
-                       break;
-
-                   case INLINE_STRING:
-                       // TODO: have seen an example of this, so it's untested.
-                       XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
-                       thisStr = rtsi.toString();
-                       break;
-
-                   case SST_STRING:
-                       String sstIndex = value.toString();
-                       try {
-                           int idx = Integer.parseInt(sstIndex);
-                           XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
-                           thisStr = rtss.toString();
-                       }
-                       catch (NumberFormatException ex) {
-                           System.err.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
-                       }
-                       break;
-
-                   case NUMBER:
-                       String n = value.toString();
-                       if (this.formatString != null)
-                           thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
-                       else
-                           thisStr = n;
-                       break;
-
-                   default:
-                       thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
-                       break;
-               }
-               
-               // Output
-               if(!firstCellOfRow) {
-                  output.append('\t');
-               }
-               firstCellOfRow = false;
-               
-               output.append(thisStr);
-           } else if ("f".equals(name)) {
-              fIsOpen = false;
-           } else if ("row".equals(name)) {
-              // Finish the line
-              output.append('\n');
-           }
-       }
-
-       /**
-        * Captures characters only if a suitable element is open.
-        * Originally was just "v"; extended for inlineStr also.
-        */
-       public void characters(char[] ch, int start, int length)
-               throws SAXException {
-           if (vIsOpen) {
-               value.append(ch, start, length);
-           }
-           if (fIsOpen) {
-              formula.append(ch, start, length);
-           }
-       }
-   }
-
    /**
     * Processes the given sheet
     */
    public void processSheet(
-           StringBuffer output,
+           SheetTextExtractor sheetExtractor,
            StylesTable styles,
            ReadOnlySharedStringsTable strings,
            InputStream sheetInputStream)
@@ -313,7 +94,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
        try {
           SAXParser saxParser = saxFactory.newSAXParser();
           XMLReader sheetParser = saxParser.getXMLReader();
-          ContentHandler handler = new MyXSSFSheetHandler(styles, strings, output);
+          ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, sheetExtractor, formulasNotResults);
           sheetParser.setContentHandler(handler);
           sheetParser.parse(sheetSource);
        } catch(ParserConfigurationException e) {
@@ -332,13 +113,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
           XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    
           StringBuffer text = new StringBuffer();
+          SheetTextExtractor sheetExtractor = new SheetTextExtractor(text);
+          
           while (iter.hasNext()) {
               InputStream stream = iter.next();
               if(includeSheetNames) {
                  text.append(iter.getSheetName());
                  text.append('\n');
               }
-              processSheet(text, styles, strings, stream);
+              processSheet(sheetExtractor, styles, strings, stream);
               stream.close();
           }
           
@@ -354,4 +137,30 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
           return null;
        }
    }
+   
+   protected class SheetTextExtractor implements SheetContentsHandler {
+      private final StringBuffer output;
+      private boolean firstCellOfRow = true;
+      
+      protected SheetTextExtractor(StringBuffer output) {
+         this.output = output;
+      }
+      
+      public void startRow(int rowNum) {
+         firstCellOfRow = true;
+      }
+      
+      public void endRow() {
+         output.append('\n');
+      }
+
+      public void cell(String cellRef, String formattedValue) {
+         if(firstCellOfRow) {
+            firstCellOfRow = false;
+         } else {
+            output.append('\t');
+         }
+         output.append(formattedValue);
+      }
+   }
 }