From b7f2c0df62d6303d967add6e80ecd9fdf264a36a Mon Sep 17 00:00:00 2001 From: Glen Stampoultzis Date: Thu, 26 Jun 2003 12:33:35 +0000 Subject: [PATCH] A real implementation of the ExtSST record. The old implementation just wrote a fake record git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_2_BRANCH@353158 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hssf/model/Workbook.java | 114 +++++++++--------- .../poi/hssf/record/ExtSSTInfoSubRecord.java | 4 +- .../apache/poi/hssf/record/ExtSSTRecord.java | 26 +++- .../poi/hssf/record/RecordProcessor.java | 5 + .../org/apache/poi/hssf/record/SSTRecord.java | 68 +++++++++-- .../hssf/record/SSTRecordSizeCalculator.java | 4 +- .../apache/poi/hssf/record/SSTSerializer.java | 36 +++++- 7 files changed, 181 insertions(+), 76 deletions(-) diff --git a/src/java/org/apache/poi/hssf/model/Workbook.java b/src/java/org/apache/poi/hssf/model/Workbook.java index cad91171e8..ac5843e211 100644 --- a/src/java/org/apache/poi/hssf/model/Workbook.java +++ b/src/java/org/apache/poi/hssf/model/Workbook.java @@ -686,37 +686,27 @@ public class Workbook implements Model { * * @return byte array containing the HSSF-only portions of the POIFS file. */ - - public byte [] serialize() { - log.log(DEBUG, "Serializing Workbook!"); - byte[] retval = null; - - // ArrayList bytes = new ArrayList(records.size()); - int arraysize = getSize(); - int pos = 0; - - // for (int k = 0; k < records.size(); k++) - // { - // bytes.add((( Record ) records.get(k)).serialize()); - // } - // for (int k = 0; k < bytes.size(); k++) - // { - // arraysize += (( byte [] ) bytes.get(k)).length; - // } - retval = new byte[ arraysize ]; - for (int k = 0; k < records.size(); k++) { - - // byte[] rec = (( byte [] ) bytes.get(k)); - // System.arraycopy(rec, 0, retval, pos, rec.length); - Record record = records.get(k); - // Let's skip RECALCID records, as they are only use for optimization - if(record.getSid() != RecalcIdRecord.sid || ((RecalcIdRecord)record).isNeeded()) { - pos += record.serialize(pos, retval); // rec.length; - } - } - log.log(DEBUG, "Exiting serialize workbook"); - return retval; - } + // GJS: Not used so why keep it. +// public byte [] serialize() { +// log.log(DEBUG, "Serializing Workbook!"); +// byte[] retval = null; +// +//// ArrayList bytes = new ArrayList(records.size()); +// int arraysize = getSize(); +// int pos = 0; +// +// retval = new byte[ arraysize ]; +// for (int k = 0; k < records.size(); k++) { +// +// Record record = records.get(k); +//// Let's skip RECALCID records, as they are only use for optimization +// if(record.getSid() != RecalcIdRecord.sid || ((RecalcIdRecord)record).isNeeded()) { +// pos += record.serialize(pos, retval); // rec.length; +// } +// } +// log.log(DEBUG, "Exiting serialize workbook"); +// return retval; +// } /** * Serializes all records int the worksheet section into a big byte array. Use @@ -725,44 +715,54 @@ public class Workbook implements Model { * @param data array of bytes to write this to */ - public int serialize(int offset, byte [] data) { - log.log(DEBUG, "Serializing Workbook with offsets"); + public int serialize( int offset, byte[] data ) + { + log.log( DEBUG, "Serializing Workbook with offsets" ); - // ArrayList bytes = new ArrayList(records.size()); - // int arraysize = getSize(); // 0; - int pos = 0; + int pos = 0; - // for (int k = 0; k < records.size(); k++) - // { - // bytes.add((( Record ) records.get(k)).serialize()); - // - // } - // for (int k = 0; k < bytes.size(); k++) - // { - // arraysize += (( byte [] ) bytes.get(k)).length; - // } - for (int k = 0; k < records.size(); k++) { + SSTRecord sst = null; + int sstPos = 0; + for ( int k = 0; k < records.size(); k++ ) + { - // byte[] rec = (( byte [] ) bytes.get(k)); - // System.arraycopy(rec, 0, data, offset + pos, rec.length); - Record record = records.get(k); + Record record = records.get( k ); // Let's skip RECALCID records, as they are only use for optimization - if(record.getSid() != RecalcIdRecord.sid || ((RecalcIdRecord)record).isNeeded()) { - pos += record.serialize(pos + offset, data); // rec.length; + if ( record.getSid() != RecalcIdRecord.sid || ( (RecalcIdRecord) record ).isNeeded() ) + { + if (record instanceof SSTRecord) + { + sst = (SSTRecord)record; + sstPos = pos; + } + if (record.getSid() == ExtSSTRecord.sid && sst != null) + { + record = sst.createExtSSTRecord(sstPos + offset); + } + pos += record.serialize( pos + offset, data ); // rec.length; } } - log.log(DEBUG, "Exiting serialize workbook"); + log.log( DEBUG, "Exiting serialize workbook" ); return pos; } - public int getSize() { + public int getSize() + { int retval = 0; - for (int k = 0; k < records.size(); k++) { - Record record = records.get(k); + SSTRecord sst = null; + for ( int k = 0; k < records.size(); k++ ) + { + Record record = records.get( k ); // Let's skip RECALCID records, as they are only use for optimization - if(record.getSid() != RecalcIdRecord.sid || ((RecalcIdRecord)record).isNeeded()) { - retval += record.getRecordSize(); + if ( record.getSid() != RecalcIdRecord.sid || ( (RecalcIdRecord) record ).isNeeded() ) + { + if (record instanceof SSTRecord) + sst = (SSTRecord)record; + if (record.getSid() == ExtSSTRecord.sid && sst != null) + retval += sst.calcExtSSTRecordSize(); + else + retval += record.getRecordSize(); } } return retval; diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java index e80c8affdf..6af6ec3b91 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java @@ -114,7 +114,7 @@ public class ExtSSTInfoSubRecord field_1_stream_pos = pos; } - public void setBucketSSTOffset(short offset) + public void setBucketRecordOffset(short offset) { field_2_bucket_sst_offset = offset; } @@ -159,6 +159,6 @@ public class ExtSSTInfoSubRecord public short getSid() { - return this.sid; + return sid; } } diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java index 7a8e2391e9..e825987c3f 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java @@ -79,6 +79,7 @@ public class ExtSSTRecord private short field_1_strings_per_bucket; private ArrayList field_2_sst_info; + public ExtSSTRecord() { field_2_sst_info = new ArrayList(); @@ -189,26 +190,39 @@ public class ExtSSTRecord public int serialize(int offset, byte [] data) { LittleEndian.putShort(data, 0 + offset, sid); - -// LittleEndian.putShort(data,2,(short)(2 + (getNumInfoRecords() *8))); - LittleEndian.putShort(data, 2 + offset, ( short ) (2 + (0x3fa - 2))); - int pos = 4; + LittleEndian.putShort(data, 2 + offset, (short)(getRecordSize() - 4)); + LittleEndian.putShort(data, 4 + offset, field_1_strings_per_bucket); + int pos = 6; for (int k = 0; k < getNumInfoRecords(); k++) { System.arraycopy(getInfoRecordAt(k).serialize(), 0, data, pos + offset, 8); + pos += getInfoRecordAt(k).getRecordSize(); } return getRecordSize(); } public int getRecordSize() { - return 6 + 0x3fa - 2; + return 4 + 2 + field_2_sst_info.size() * 8; } public short getSid() { - return this.sid; + return sid; + } + + public void setBucketOffsets( int[] bucketAbsoluteOffsets, int[] bucketRelativeOffsets ) + { + this.field_2_sst_info = new ArrayList(bucketAbsoluteOffsets.length); + for ( int i = 0; i < bucketAbsoluteOffsets.length; i++ ) + { + ExtSSTInfoSubRecord r = new ExtSSTInfoSubRecord(); + r.setBucketRecordOffset((short)bucketRelativeOffsets[i]); + r.setStreamPos(bucketAbsoluteOffsets[i]); + field_2_sst_info.add(r); + } } + } diff --git a/src/java/org/apache/poi/hssf/record/RecordProcessor.java b/src/java/org/apache/poi/hssf/record/RecordProcessor.java index c8d659c259..ba86f678f8 100644 --- a/src/java/org/apache/poi/hssf/record/RecordProcessor.java +++ b/src/java/org/apache/poi/hssf/record/RecordProcessor.java @@ -156,5 +156,10 @@ class RecordProcessor recordOffset += amount; available -= amount; } + + public int getRecordOffset() + { + return recordOffset; + } } diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java index a3f0807021..9cd941121d 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java @@ -60,7 +60,6 @@ import org.apache.poi.util.LittleEndianConsts; import java.util.Iterator; import java.util.List; -import java.util.ArrayList; /** * Title: Static String Table Record @@ -73,7 +72,7 @@ import java.util.ArrayList; * @author Andrew C. Oliver (acoliver at apache dot org) * @author Marc Johnson (mjohnson at apache dot org) * @author Glen Stampoultzis (glens at apache.org) - * @version 2.0-pre + * * @see org.apache.poi.hssf.record.LabelSSTRecord * @see org.apache.poi.hssf.record.ContinueRecord */ @@ -112,10 +111,14 @@ public class SSTRecord private List _record_lengths = null; private SSTDeserializer deserializer; + /** Offsets from the beginning of the SST record (even across continuations) */ + int[] bucketAbsoluteOffsets; + /** Offsets relative the start of the current SST or continue record */ + int[] bucketRelativeOffsets; + /** * default constructor */ - public SSTRecord() { field_1_num_strings = 0; @@ -220,7 +223,7 @@ public class SSTRecord field_1_num_strings++; String str = ( string == null ) ? "" : string; - int rval = -1; + int rval; UnicodeString ucs = new UnicodeString(); ucs.setString( str ); @@ -334,7 +337,7 @@ public class SSTRecord for ( int k = 0; k < field_3_strings.size(); k++ ) { buffer.append( " .string_" + k + " = " ) - .append( ( (UnicodeString) field_3_strings + .append( ( field_3_strings .get( new Integer( k ) ) ).toString() ).append( "\n" ); } buffer.append( "[/SST]\n" ); @@ -394,7 +397,7 @@ public class SSTRecord * The data consists of sets of string data. This string data is * arranged as follows: *

- * + *

      * short  string_length;   // length of string data
      * byte   string_flag;     // flag specifying special string
      *                         // handling
@@ -407,7 +410,7 @@ public class SSTRecord
      *                         // array is run_count)
      * byte[] extension;       // optional extension (length of array
      *                         // is extend_length)
-     * 
+     * 
*

* The string_flag is bit mapped as follows: *

@@ -507,14 +510,22 @@ public class SSTRecord * Subclasses should implement this so that their data is passed back in a * byte array. * - * @return byte array containing instance data + * @return size */ public int serialize( int offset, byte[] data ) { SSTSerializer serializer = new SSTSerializer( _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() ); - return serializer.serialize( getRecordSize(), offset, data ); + int bytes = serializer.serialize( getRecordSize(), offset, data ); + bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets(); + bucketRelativeOffsets = serializer.getBucketRelativeOffsets(); +// for ( int i = 0; i < bucketAbsoluteOffsets.length; i++ ) +// { +// System.out.println( "bucketAbsoluteOffset = " + bucketAbsoluteOffsets[i] ); +// System.out.println( "bucketRelativeOffset = " + bucketRelativeOffsets[i] ); +// } + return bytes; } @@ -538,6 +549,45 @@ public class SSTRecord { deserializer.processContinueRecord( record ); } + + /** + * Creates an extended string record based on the current contents of + * the current SST record. The offset within the stream to the SST record + * is required because the extended string record points directly to the + * strings in the SST record. + *

+ * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN + * SERIALIZED. + * + * @param sstOffset The offset in the stream to the start of the + * SST record. + * @return The new SST record. + */ + public ExtSSTRecord createExtSSTRecord(int sstOffset) + { + if (bucketAbsoluteOffsets == null || bucketAbsoluteOffsets == null) + throw new IllegalStateException("SST record has not yet been serialized."); + + ExtSSTRecord extSST = new ExtSSTRecord(); + extSST.setNumStringsPerBucket((short)8); + int[] absoluteOffsets = (int[]) bucketAbsoluteOffsets.clone(); + int[] relativeOffsets = (int[]) bucketRelativeOffsets.clone(); + for ( int i = 0; i < absoluteOffsets.length; i++ ) + absoluteOffsets[i] += sstOffset; + extSST.setBucketOffsets(absoluteOffsets, relativeOffsets); + return extSST; + } + + /** + * Calculates the size in bytes of the EXTSST record as it would be if the + * record was serialized. + * + * @return The size of the ExtSST record in bytes. + */ + public int calcExtSSTRecordSize() + { + return 4 + 2 + ((field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE) + 1) * 8; + } } diff --git a/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java b/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java index fbdfba50f3..15a52f9807 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java @@ -61,7 +61,9 @@ import java.util.List; import java.util.Map; /** - * Used to calculate the record sizes for a particular record. + * Used to calculate the record sizes for a particular record. This kind of + * sucks because it's similar to the SST serialization code. In general + * the SST serialization code needs to be rewritten. * * @author Glen Stampoultzis (glens at apache.org) */ diff --git a/src/java/org/apache/poi/hssf/record/SSTSerializer.java b/src/java/org/apache/poi/hssf/record/SSTSerializer.java index 8239eeb8b4..69e7af87d5 100644 --- a/src/java/org/apache/poi/hssf/record/SSTSerializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTSerializer.java @@ -77,6 +77,14 @@ class SSTSerializer private int numUniqueStrings; private SSTRecordHeader sstRecordHeader; + /** Offsets from the beginning of the SST record (even across continuations) */ + int[] bucketAbsoluteOffsets; + /** Offsets relative the start of the current SST or continue record */ + int[] bucketRelativeOffsets; + int startOfSST, startOfRecord; + /** The default bucket size (this is used for ExternSST) */ + final static int DEFAULT_BUCKET_SIZE = 8; + public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings ) { this.recordLengths = recordLengths; @@ -84,6 +92,9 @@ class SSTSerializer this.numStrings = numStrings; this.numUniqueStrings = numUniqueStrings; this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); + + this.bucketAbsoluteOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1]; + this.bucketRelativeOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1]; } /** @@ -133,7 +144,6 @@ class SSTSerializer /** * This case is chosen when an SST record does not span over to a continue record. - * */ private void serializeSingleSSTRecord( byte[] data, int offset, int record_length_index ) { @@ -144,6 +154,11 @@ class SSTSerializer for ( int k = 0; k < strings.size(); k++ ) { + if (k % DEFAULT_BUCKET_SIZE == 0) + { + bucketAbsoluteOffsets[k / DEFAULT_BUCKET_SIZE] = pos; + bucketRelativeOffsets[k / DEFAULT_BUCKET_SIZE] = pos; + } System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() ); pos += getUnicodeString( k ).getRecordSize(); } @@ -157,6 +172,8 @@ class SSTSerializer private void serializeLargeRecord( int record_size, int record_length_index, byte[] buffer, int offset ) { + startOfSST = offset; + byte[] stringReminant = null; int stringIndex = 0; boolean lastneedcontinue = false; @@ -170,6 +187,7 @@ class SSTSerializer recordLength, numStrings, numUniqueStrings ); // write the appropriate header + startOfRecord = offset + totalWritten; recordProcessor.writeRecordHeader( offset, totalWritten, recordLength, first_record ); first_record = false; @@ -189,6 +207,12 @@ class SSTSerializer { UnicodeString unistr = getUnicodeString( stringIndex ); + if (stringIndex % DEFAULT_BUCKET_SIZE == 0) + { + bucketAbsoluteOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfSST; + bucketRelativeOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfRecord; + } + if ( unistr.getRecordSize() <= recordProcessor.getAvailable() ) { recordProcessor.writeWholeString( unistr, offset, totalWritten ); @@ -235,4 +259,14 @@ class SSTSerializer { return recordLengths; } + + public int[] getBucketAbsoluteOffsets() + { + return bucketAbsoluteOffsets; + } + + public int[] getBucketRelativeOffsets() + { + return bucketRelativeOffsets; + } } -- 2.39.5