]> source.dussan.org Git - poi.git/commitdiff
code improvements to RecordFactoryInputStream
authorJosh Micich <josh@apache.org>
Fri, 7 Aug 2009 00:21:00 +0000 (00:21 +0000)
committerJosh Micich <josh@apache.org>
Fri, 7 Aug 2009 00:21:00 +0000 (00:21 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@801850 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
src/java/org/apache/poi/hssf/record/RecordFactory.java
src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java

index 2238b39b82d75998d5cdd6b4dd6ca36246f3a61e..2b392c0e3cbeecb41f9850009c1752668086204f 100644 (file)
@@ -31,7 +31,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  * processWorkbookEvents along with a request.
  *
  * This will cause your file to be processed a record at a time.  Each record with
- * a static id matching one that you have registed in your HSSFRequest will be passed
+ * a static id matching one that you have registered in your HSSFRequest will be passed
  * to your associated HSSFListener.
  *
  * @see org.apache.poi.hssf.dev.EFHSSF
@@ -39,115 +39,98 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  * @author Andrew C. Oliver (acoliver at apache dot org)
  * @author Carey Sublette  (careysub@earthling.net)
  */
+public class HSSFEventFactory {
+       /** Creates a new instance of HSSFEventFactory */
+       public HSSFEventFactory() {
+               // no instance fields
+       }
 
-public class HSSFEventFactory
-{
-    /** Creates a new instance of HSSFEventFactory */
-
-    public HSSFEventFactory()
-    {
-    }
-
-    /**
-     * Processes a file into essentially record events.
-     *
-     * @param req       an Instance of HSSFRequest which has your registered listeners
-     * @param fs        a POIFS filesystem containing your workbook
-     */
-
-    public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
-        throws IOException
-    {
-        InputStream in = fs.createDocumentInputStream("Workbook");
+       /**
+        * Processes a file into essentially record events.
+        *
+        * @param req an Instance of HSSFRequest which has your registered listeners
+        * @param fs  a POIFS filesystem containing your workbook
+        */
+       public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException {
+               InputStream in = fs.createDocumentInputStream("Workbook");
 
-        processEvents(req, in);
-    }
+               processEvents(req, in);
+       }
 
-    /**
+       /**
         * Processes a file into essentially record events.
         *
-        * @param req       an Instance of HSSFRequest which has your registered listeners
-        * @param fs        a POIFS filesystem containing your workbook
-        * @return                      numeric user-specified result code.
+        * @param req an Instance of HSSFRequest which has your registered listeners
+        * @param fs  a POIFS filesystem containing your workbook
+        * @return    numeric user-specified result code.
         */
-
        public short abortableProcessWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
-               throws IOException, HSSFUserException
-       {
+               throws IOException, HSSFUserException {
                InputStream in = fs.createDocumentInputStream("Workbook");
                return abortableProcessEvents(req, in);
-    }
-
-    /**
-     * Processes a DocumentInputStream into essentially Record events.
-     *
-     * If an <code>AbortableHSSFListener</code> causes a halt to processing during this call
-     * the method will return just as with <code>abortableProcessEvents</code>, but no
-     * user code or <code>HSSFUserException</code> will be passed back.
-     *
-     * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
-     * @param req       an Instance of HSSFRequest which has your registered listeners
-     * @param in        a DocumentInputStream obtained from POIFS's POIFSFileSystem object
-     */
-
-    public void processEvents(HSSFRequest req, InputStream in)
-        throws IOException
-       {
-               try
-               {
+       }
+
+       /**
+        * Processes a DocumentInputStream into essentially Record events.
+        *
+        * If an <code>AbortableHSSFListener</code> causes a halt to processing during this call
+        * the method will return just as with <code>abortableProcessEvents</code>, but no
+        * user code or <code>HSSFUserException</code> will be passed back.
+        *
+        * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
+        * @param req an Instance of HSSFRequest which has your registered listeners
+        * @param in  a DocumentInputStream obtained from POIFS's POIFSFileSystem object
+        */
+       public void processEvents(HSSFRequest req, InputStream in) {
+               try {
                        genericProcessEvents(req, new RecordInputStream(in));
+               } catch (HSSFUserException hue) {
+                       /*If an HSSFUserException user exception is thrown, ignore it.*/
                }
-               catch (HSSFUserException hue)
-               {/*If an HSSFUserException user exception is thrown, ignore it.*/ }
        }
 
 
-    /**
-     * Processes a DocumentInputStream into essentially Record events.
-     *
-     * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
-     * @param req       an Instance of HSSFRequest which has your registered listeners
-     * @param in        a DocumentInputStream obtained from POIFS's POIFSFileSystem object
-        * @return                      numeric user-specified result code.
-     */
-
-    public short abortableProcessEvents(HSSFRequest req, InputStream in)
-        throws IOException, HSSFUserException
-    {
+       /**
+        * Processes a DocumentInputStream into essentially Record events.
+        *
+        * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
+        * @param req an Instance of HSSFRequest which has your registered listeners
+        * @param in  a DocumentInputStream obtained from POIFS's POIFSFileSystem object
+        * @return    numeric user-specified result code.
+        */
+       public short abortableProcessEvents(HSSFRequest req, InputStream in)
+               throws HSSFUserException {
                return genericProcessEvents(req, new RecordInputStream(in));
-    }
+       }
 
-     /**
+       /**
         * Processes a DocumentInputStream into essentially Record events.
         *
         * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
-        * @param req       an Instance of HSSFRequest which has your registered listeners
-        * @param in        a DocumentInputStream obtained from POIFS's POIFSFileSystem object
-        * @return                      numeric user-specified result code.
+        * @param req an Instance of HSSFRequest which has your registered listeners
+        * @param in  a DocumentInputStream obtained from POIFS's POIFSFileSystem object
+        * @return    numeric user-specified result code.
         */
-
        protected short genericProcessEvents(HSSFRequest req, RecordInputStream in)
-               throws IOException, HSSFUserException
-       {
-               boolean going = true;
+               throws HSSFUserException {
                short userCode = 0;
-               Record r = null;
-               
+
                // Create a new RecordStream and use that
-               RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in);
-               
+               RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in, false);
+
                // Process each record as they come in
-               while(going) {
-                       r = recordStream.nextRecord();
-                       if(r != null) {
-                               userCode = req.processRecord(r);
-                               if (userCode != 0) break;
-                       } else {
-                               going = false;
+               while(true) {
+                       Record r = recordStream.nextRecord();
+                       if(r == null) {
+                               break;
+                       }
+                       userCode = req.processRecord(r);
+                       if (userCode != 0) {
+                               break;
                        }
                }
-               
+
                // All done, return our last code
                return userCode;
-    }
+       }
 }
index 53213dfc1a5345d12e4c4efa082a02b97276265a..f45860667844b9d045aa9ab9611eb2227f6cb7f7 100644 (file)
@@ -369,12 +369,11 @@ public final class RecordFactory {
        public static List<Record> createRecords(InputStream in) throws RecordFormatException {
                List<Record> records = new ArrayList<Record>(NUM_RECORDS);
 
-               RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in));
-                recStream.setIncludeContinueRecords(true);
+               RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in), true);
 
-        Record record;
+               Record record;
                while ((record = recStream.nextRecord())!=null) {
-                        records.add(record);
+                       records.add(record);
                }
 
                return records;
index 19f2ca45c92e20abf9278df21aa0e7345cdb4c28..541dfd2dc04038074e1115b139061ab2571f7bd0 100755 (executable)
@@ -19,10 +19,6 @@ package org.apache.poi.hssf.record;
 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
 import org.apache.poi.hssf.eventusermodel.HSSFListener;
 
-import java.util.Arrays;
-import java.util.LinkedList;
-import java.util.List;
-
 /**
  * A stream based way to get at complete records, with
  * as low a memory footprint as possible.
@@ -34,203 +30,209 @@ import java.util.List;
  * them, but this does allow for a "pull" style of coding.
  */
 public class RecordFactoryInputStream {
-    private final RecordInputStream recStream;
-
-    /**
-     * Have we returned all the records there are?
-     */
-    private boolean complete = false;
-
-    /**
-     * Sometimes we end up with a bunch of
-     * records. When we do, these should
-     * be returned before the next normal
-     * record processing occurs (i.e. before
-     * we check for continue records and
-     * return rec)
-     */
-    private final LinkedList bonusRecords = new LinkedList();
-
-    /**
-     * The most recent record that we gave to the user
-     */
-    private Record lastRecord = null;
-    /**
-     * The most recent DrawingRecord seen
-     */
-    private DrawingRecord lastDrawingRecord = new DrawingRecord();
-
-    private int bofDepth = 0;
-
-    private boolean lastRecordWasEOFLevelZero = false;
-
-    private boolean includeContinueRecords = false;
-
-    public RecordFactoryInputStream(RecordInputStream inp) {
-        recStream = inp;
-    }
-
-    /**
-     * Returns the next (complete) record from the
-     * stream, or null if there are no more.
-     */
-    public Record nextRecord() {
-        Record r = null;
-
-        // Loop until we get something
-        while (r == null && !complete) {
-            // Are there any bonus records that we need to
-            //  return?
-            r = getBonusRecord();
-
-            // If not, ask for the next real record
-            if (r == null) {
-                r = getNextRecord();
-            }
-        }
-
-        // All done
-        return r;
-    }
-
-    /**
-     * If there are any "bonus" records, that should
-     * be returned before processing new ones,
-     * grabs the next and returns it.
-     * If not, returns null;
-     */
-    private Record getBonusRecord() {
-        if (!bonusRecords.isEmpty()) {
-            return (Record) bonusRecords.removeFirst();
-        }
-        return null;
-    }
-
-    /**
-     * Returns the next available record, or null if
-     * this pass didn't return a record that's
-     * suitable for returning (eg was a continue record).
-     */
-    private Record getNextRecord() {
-        /*
-        * How to recognise end of stream?
-        * In the best case, the underlying input stream (in) ends just after the last EOF record
-        * Usually however, the stream is padded with an arbitrary byte count.  Excel and most apps
-        * reliably use zeros for padding and if this were always the case, this code could just
-        * skip all the (zero sized) records with sid==0.  However, bug 46987 shows a file with
-        * non-zero padding that is read OK by Excel (Excel also fixes the padding).
-        *
-        * So to properly detect the workbook end of stream, this code has to identify the last
-        * EOF record.  This is not so easy because the worbook bof+eof pair do not bracket the
-        * whole stream.  The worksheets follow the workbook, but it is not easy to tell how many
-        * sheet sub-streams should be present.  Hence we are looking for an EOF record that is not
-        * immediately followed by a BOF record.  One extra complication is that bof+eof sub-
-        * streams can be nested within worksheet streams and it's not clear in these cases what
-        * record might follow any EOF record.  So we also need to keep track of the bof/eof
-        * nesting level.
-        */
-
-        if (recStream.hasNextRecord()) {
-            // Grab our next record
-            recStream.nextRecord();
-
-            if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
-                // Normally InputStream (in) contains only zero padding after this point
-                complete = true;
-                return null;
-            }
-
-            Record record = RecordFactory.createSingleRecord(recStream);
-            lastRecordWasEOFLevelZero = false;
-
-            if (record instanceof BOFRecord) {
-                bofDepth++;
-                return record;
-            }
-
-            if (record instanceof EOFRecord) {
-                bofDepth--;
-                if (bofDepth < 1) {
-                    lastRecordWasEOFLevelZero = true;
-                }
-
-                return record;
-            }
-
-            if (record instanceof DBCellRecord) {
-                // Not needed by POI.  Regenerated from scratch by POI when spreadsheet is written
-                return null;
-            }
-
-            if (record instanceof RKRecord) {
-                return RecordFactory.convertToNumberRecord((RKRecord) record);
-            }
-
-            if (record instanceof MulRKRecord) {
-                NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
-
-                List<NumberRecord> list = Arrays.asList(records);
-                bonusRecords.addAll(list.subList(1, list.size()));
-
-                return records[0];
-            }
-
-            if (record.getSid() == DrawingGroupRecord.sid
-                    && lastRecord instanceof DrawingGroupRecord) {
-                DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord;
-                lastDGRecord.join((AbstractEscherHolderRecord) record);
-                return null;
-            } else if (record.getSid() == ContinueRecord.sid) {
-                ContinueRecord contRec = (ContinueRecord) record;
-
-                if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) {
-                    // Drawing records have a very strange continue behaviour.
-                    //There can actually be OBJ records mixed between the continues.
-                    lastDrawingRecord.processContinueRecord(contRec.getData());
-                    //we must remember the position of the continue record.
-                    //in the serialization procedure the original structure of records must be preserved
-                    if (includeContinueRecords) {
-                        return record;
-                    } else {
-                        return null;
-                    }
-                } else if (lastRecord instanceof DrawingGroupRecord) {
-                    ((DrawingGroupRecord) lastRecord).processContinueRecord(contRec.getData());
-                    return null;
-                } else if (lastRecord instanceof DrawingRecord) {
-                    ((DrawingRecord) lastRecord).processContinueRecord(contRec.getData());
-                    return null;
-                } else if (lastRecord instanceof UnknownRecord) {
-                    //Gracefully handle records that we don't know about,
-                    //that happen to be continued
-                    return record;
-                } else if (lastRecord instanceof EOFRecord) {
-                    // This is really odd, but excel still sometimes
-                    //  outputs a file like this all the same
-                    return record;
-                } else {
-                    throw new RecordFormatException("Unhandled Continue Record");
-                }
-            } else {
-                lastRecord = record;
-                if (record instanceof DrawingRecord) {
-                    lastDrawingRecord = (DrawingRecord) record;
-                }
-
-                return record;
-            }
-
-        } else {
-            // No more records
-            complete = true;
-            return null;
-        }
-    }
-
-    /**
-     * Return or not ContinueRecord in nextRecord
-     */
-    public void setIncludeContinueRecords(boolean includeContinueRecords) {
-        this.includeContinueRecords = includeContinueRecords;
-    }
-}
\ No newline at end of file
+
+       private final RecordInputStream _recStream;
+       private final boolean _shouldIncludeContinueRecords;
+
+       /**
+        * Temporarily stores a group of {@link NumberRecord}s.  This is uses when the most
+        * recently read underlying record is a {@link MulRKRecord}
+        */
+       private NumberRecord[] _multipleNumberRecords;
+
+       /**
+        * used to help iterating over multiple number records
+        */
+       private int _multipleNumberRecordIndex = -1;
+
+       /**
+        * The most recent record that we gave to the user
+        */
+       private Record _lastRecord = null;
+       /**
+        * The most recent DrawingRecord seen
+        */
+       private DrawingRecord _lastDrawingRecord = new DrawingRecord();
+
+       private int _bofDepth;
+
+       private boolean _lastRecordWasEOFLevelZero;
+
+
+       /**
+        * @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
+        * {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
+        * processing).
+        */
+       public RecordFactoryInputStream(RecordInputStream inp, boolean shouldIncludeContinueRecords) {
+               _recStream = inp;
+               _shouldIncludeContinueRecords = shouldIncludeContinueRecords;
+
+               /*
+               * How to recognise end of stream?
+               * In the best case, the underlying input stream (in) ends just after the last EOF record
+               * Usually however, the stream is padded with an arbitrary byte count.  Excel and most apps
+               * reliably use zeros for padding and if this were always the case, this code could just
+               * skip all the (zero sized) records with sid==0.  However, bug 46987 shows a file with
+               * non-zero padding that is read OK by Excel (Excel also fixes the padding).
+               *
+               * So to properly detect the workbook end of stream, this code has to identify the last
+               * EOF record.  This is not so easy because the worbook bof+eof pair do not bracket the
+               * whole stream.  The worksheets follow the workbook, but it is not easy to tell how many
+               * sheet sub-streams should be present.  Hence we are looking for an EOF record that is not
+               * immediately followed by a BOF record.  One extra complication is that bof+eof sub-
+               * streams can be nested within worksheet streams and it's not clear in these cases what
+               * record might follow any EOF record.  So we also need to keep track of the bof/eof
+               * nesting level.
+               */
+               _bofDepth=0;
+               _lastRecordWasEOFLevelZero = false;
+       }
+
+       /**
+        * Returns the next (complete) record from the
+        * stream, or null if there are no more.
+        */
+       public Record nextRecord() {
+               Record r;
+               r = getNextMultipleNumberRecord();
+               if (r != null) {
+                       // found a NumberRecord (expanded from a recent MULRK record)
+                       return r;
+               }
+               while (true) {
+                       if (!_recStream.hasNextRecord()) {
+                               // recStream is exhausted;
+                       return null;
+                       }
+
+                       // step underlying RecordInputStream to the next record
+                       _recStream.nextRecord();
+
+                       if (_lastRecordWasEOFLevelZero) {
+                               // Potential place for ending the workbook stream
+                               // Check that the next record is not BOFRecord(0x0809)
+                               // Normally the input stream contains only zero padding after the last EOFRecord,
+                               // but bug 46987 suggests that the padding may be garbage.
+                               // This code relies on the padding bytes not starting with BOFRecord.sid
+                               if (_recStream.getSid() != BOFRecord.sid) {
+                                       return null;
+                               }
+                               // else - another sheet substream starting here
+                       }
+
+                       r = readNextRecord();
+                       if (r == null) {
+                               // some record types may get skipped (e.g. DBCellRecord and ContinueRecord)
+                               continue;
+                       }
+                       return r;
+               }
+       }
+
+       /**
+        * @return the next {@link NumberRecord} from the multiple record group as expanded from
+        * a recently read {@link MulRKRecord}. <code>null</code> if not present.
+        */
+       private NumberRecord getNextMultipleNumberRecord() {
+               if (_multipleNumberRecords != null) {
+                       int ix = _multipleNumberRecordIndex;
+                       if (ix < _multipleNumberRecords.length) {
+                               NumberRecord result = _multipleNumberRecords[ix];
+                               _multipleNumberRecordIndex = ix + 1;
+                               return result;
+                       }
+                       _multipleNumberRecordIndex = -1;
+                       _multipleNumberRecords = null;
+               }
+               return null;
+       }
+
+       /**
+        * @return the next available record, or <code>null</code> if
+        * this pass didn't return a record that's
+        * suitable for returning (eg was a continue record).
+        */
+       private Record readNextRecord() {
+
+               Record record = RecordFactory.createSingleRecord(_recStream);
+               _lastRecordWasEOFLevelZero = false;
+
+               if (record instanceof BOFRecord) {
+                       _bofDepth++;
+                       return record;
+               }
+
+               if (record instanceof EOFRecord) {
+                       _bofDepth--;
+                       if (_bofDepth < 1) {
+                               _lastRecordWasEOFLevelZero = true;
+                       }
+
+                       return record;
+               }
+
+               if (record instanceof DBCellRecord) {
+                       // Not needed by POI.  Regenerated from scratch by POI when spreadsheet is written
+                       return null;
+               }
+
+               if (record instanceof RKRecord) {
+                       return RecordFactory.convertToNumberRecord((RKRecord) record);
+               }
+
+               if (record instanceof MulRKRecord) {
+                       NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
+
+                       _multipleNumberRecords = records;
+                       _multipleNumberRecordIndex = 1;
+                       return records[0];
+               }
+
+               if (record.getSid() == DrawingGroupRecord.sid
+                               && _lastRecord instanceof DrawingGroupRecord) {
+                       DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) _lastRecord;
+                       lastDGRecord.join((AbstractEscherHolderRecord) record);
+                       return null;
+               }
+               if (record.getSid() == ContinueRecord.sid) {
+                       ContinueRecord contRec = (ContinueRecord) record;
+
+                       if (_lastRecord instanceof ObjRecord || _lastRecord instanceof TextObjectRecord) {
+                               // Drawing records have a very strange continue behaviour.
+                               //There can actually be OBJ records mixed between the continues.
+                               _lastDrawingRecord.processContinueRecord(contRec.getData());
+                               //we must remember the position of the continue record.
+                               //in the serialization procedure the original structure of records must be preserved
+                               if (_shouldIncludeContinueRecords) {
+                                       return record;
+                               }
+                               return null;
+                       }
+                       if (_lastRecord instanceof DrawingGroupRecord) {
+                               ((DrawingGroupRecord) _lastRecord).processContinueRecord(contRec.getData());
+                               return null;
+                       }
+                       if (_lastRecord instanceof DrawingRecord) {
+                               ((DrawingRecord) _lastRecord).processContinueRecord(contRec.getData());
+                               return null;
+                       }
+                       if (_lastRecord instanceof UnknownRecord) {
+                               //Gracefully handle records that we don't know about,
+                               //that happen to be continued
+                               return record;
+                       }
+                       if (_lastRecord instanceof EOFRecord) {
+                               // This is really odd, but excel still sometimes
+                               //  outputs a file like this all the same
+                               return record;
+                       }
+                       throw new RecordFormatException("Unhandled Continue Record");
+               }
+               _lastRecord = record;
+               if (record instanceof DrawingRecord) {
+                       _lastDrawingRecord = (DrawingRecord) record;
+               }
+               return record;
+       }
+}