Refactoring and cleanup work in prep for looking more deeply into SST handling.

author Glen Stampoultzis <glens@apache.org>

Mon, 27 May 2002 11:43:27 +0000 (11:43 +0000)

committer Glen Stampoultzis <glens@apache.org>

Mon, 27 May 2002 11:43:27 +0000 (11:43 +0000)
author Glen Stampoultzis <glens@apache.org>
Mon, 27 May 2002 11:43:27 +0000 (11:43 +0000)
committer Glen Stampoultzis <glens@apache.org>
Mon, 27 May 2002 11:43:27 +0000 (11:43 +0000)
diff --git a/src/java/org/apache/poi/hssf/model/Workbook.java b/src/java/org/apache/poi/hssf/model/Workbook.java

index d45153f9c9c9db729bb9910a2845b95d901cbaa9..55fe659322b901a9681895458b47daa57049a814 100644 (file)
--- a/src/java/org/apache/poi/hssf/model/Workbook.java
+++ b/src/java/org/apache/poi/hssf/model/Workbook.java
@@ -643,26 +643,11 @@ public class Workbook
      {
          log.log(DEBUG, "Serializing Workbook with offsets");
  
-        // ArrayList bytes     = new ArrayList(records.size());
-//        int arraysize = getSize();   // 0;
          int pos       = 0;
  
-//        for (int k = 0; k < records.size(); k++)
-//        {
-//            bytes.add((( Record ) records.get(k)).serialize());
-//
-//        }
-//        for (int k = 0; k < bytes.size(); k++)
-//       {
-//            arraysize += (( byte [] ) bytes.get(k)).length;
-//        }
          for (int k = 0; k < records.size(); k++)
          {
-
-            // byte[] rec = (( byte [] ) bytes.get(k));
-            // System.arraycopy(rec, 0, data, offset + pos, rec.length);
-            pos += (( Record ) records.get(k)).serialize(pos + offset,
-                    data);   // rec.length;
+            pos += (( Record ) records.get(k)).serialize(pos + offset, data);   // rec.length;
          }
          log.log(DEBUG, "Exiting serialize workbook");
          return pos;
diff --git a/src/java/org/apache/poi/hssf/record/ContinueRecord.java b/src/java/org/apache/poi/hssf/record/ContinueRecord.java

index 2b67a62d4005d2afbfcedb7d156a8d467f6f9c5a..5017ade922c80b8e6383e17fdaebe5d403f5f0cb 100644 (file)
--- a/src/java/org/apache/poi/hssf/record/ContinueRecord.java
+++ b/src/java/org/apache/poi/hssf/record/ContinueRecord.java
@@ -161,9 +161,7 @@ public class ContinueRecord
  
          // how many continue records do we need
          // System.out.println("In ProcessContinue");
-        int       records   =
-            (data.length
-             / 8214);   // we've a 1 offset but we're also off by one due to rounding...so it balances out
+        int       records   = (data.length / 8214);   // we've a 1 offset but we're also off by one due to rounding...so it balances out
          int       offset    = 8214;
  
          // System.out.println("we have "+records+" continue records to process");
@@ -174,8 +172,7 @@ public class ContinueRecord
          for (int cr = 0; cr < records; cr++)
          {
              ContinueRecord contrec   = new ContinueRecord();
-            int            arraysize = Math.min((8214 - 4),
-                                                (data.length - offset));
+            int            arraysize = Math.min((8214 - 4), (data.length - offset));
              byte[]         crdata    = new byte[ arraysize ];
  
              System.arraycopy(data, offset, crdata, 0, arraysize);
diff --git a/src/java/org/apache/poi/hssf/record/RecordProcessor.java b/src/java/org/apache/poi/hssf/record/RecordProcessor.java

new file mode 100644 (file)

index 0000000..06eb364
--- /dev/null
+++ b/src/java/org/apache/poi/hssf/record/RecordProcessor.java
@@ -0,0 +1,142 @@
+package org.apache.poi.hssf.record;
+
+import org.apache.poi.util.LittleEndianConsts;
+import org.apache.poi.util.LittleEndian;
+
+class RecordProcessor
+{
+    private byte[] data;
+    private int recordOffset;
+    private int available;
+    private SSTRecordHeader sstRecordHeader;
+
+    public RecordProcessor( byte[] data, int available, int numStrings, int numUniqueStrings )
+    {
+        this.data = data;
+        this.available = available;
+        this.sstRecordHeader = new SSTRecordHeader(numStrings, numUniqueStrings);
+    }
+
+    public int getAvailable()
+    {
+        return available;
+    }
+
+    public void writeRecordHeader( int offset, int totalWritten, int recordLength, boolean first_record )
+    {
+        if ( first_record )
+        {
+            available -= 8;
+            recordOffset = sstRecordHeader.writeSSTHeader( data, recordOffset + offset + totalWritten, recordLength );
+        }
+        else
+        {
+            recordOffset = writeContinueHeader( data, recordOffset + offset + totalWritten, recordLength );
+        }
+    }
+
+    public byte[] writeStringRemainder( boolean lastStringCompleted, byte[] stringreminant, int offset, int totalWritten )
+    {
+        if ( !lastStringCompleted )
+        {
+            // write reminant -- it'll all fit neatly
+            System.arraycopy( stringreminant, 0, data, recordOffset + offset + totalWritten, stringreminant.length );
+            adjustPointers( stringreminant.length );
+        }
+        else
+        {
+            // write as much of the remnant as possible
+            System.arraycopy( stringreminant, 0, data, recordOffset + offset + totalWritten, available );
+            byte[] leftover = new byte[( stringreminant.length - available ) + LittleEndianConsts.BYTE_SIZE];
+
+            System.arraycopy( stringreminant, available, leftover, LittleEndianConsts.BYTE_SIZE, stringreminant.length - available );
+            leftover[0] = stringreminant[0];
+            stringreminant = leftover;
+            adjustPointers( available );    // Consume all available remaining space
+        }
+        return stringreminant;
+    }
+
+    public void writeWholeString( UnicodeString unistr, int offset, int totalWritten )
+    {
+        unistr.serialize( recordOffset + offset + totalWritten, data );
+        int rsize = unistr.getRecordSize();
+        adjustPointers( rsize );
+    }
+
+    public byte[] writePartString( UnicodeString unistr, int offset, int totalWritten )
+    {
+        byte[] stringReminant;
+        byte[] ucs = unistr.serialize();
+
+        System.arraycopy( ucs, 0, data, recordOffset + offset + totalWritten, available );
+        stringReminant = new byte[( ucs.length - available ) + LittleEndianConsts.BYTE_SIZE];
+        System.arraycopy( ucs, available, stringReminant, LittleEndianConsts.BYTE_SIZE, ucs.length - available );
+        stringReminant[0] = ucs[LittleEndianConsts.SHORT_SIZE];
+        available = 0;
+        return stringReminant;
+    }
+
+
+    private int writeContinueHeader( final byte[] data, final int pos,
+                                     final int recsize )
+    {
+        int offset = pos;
+
+        LittleEndian.putShort( data, offset, ContinueRecord.sid );
+        offset += LittleEndianConsts.SHORT_SIZE;
+        LittleEndian.putShort( data, offset, (short) ( recsize ) );
+        offset += LittleEndianConsts.SHORT_SIZE;
+        return offset - pos;
+    }
+
+
+    private void adjustPointers( int amount )
+    {
+        recordOffset += amount;
+        available -= amount;
+    }
+}
+
+class SSTRecordHeader
+{
+    int numStrings;
+    int numUniqueStrings;
+
+    /**
+     *
+     */
+    public SSTRecordHeader( int numStrings, int numUniqueStrings )
+    {
+        this.numStrings = numStrings;
+        this.numUniqueStrings = numUniqueStrings;
+    }
+
+    /**
+     * Writes out the SST record.  This consists of the sid, the record size, the number of
+     * strings and the number of unique strings.
+     *
+     * @param data          The data buffer to write the header to.
+     * @param bufferIndex   The index into the data buffer where the header should be written.
+     * @param recSize       The number of records written.
+     *
+     * @return The bufer of bytes modified.
+     */
+    public int writeSSTHeader( byte[] data, int bufferIndex, int recSize )
+    {
+        int offset = bufferIndex;
+
+        LittleEndian.putShort( data, offset, SSTRecord.sid );
+        offset += LittleEndianConsts.SHORT_SIZE;
+        LittleEndian.putShort( data, offset, (short) ( recSize ) );
+        offset += LittleEndianConsts.SHORT_SIZE;
+//        LittleEndian.putInt( data, offset, getNumStrings() );
+        LittleEndian.putInt( data, offset, numStrings );
+        offset += LittleEndianConsts.INT_SIZE;
+//        LittleEndian.putInt( data, offset, getNumUniqueStrings() );
+        LittleEndian.putInt( data, offset, numUniqueStrings );
+        offset += LittleEndianConsts.INT_SIZE;
+        return offset - bufferIndex;
+    }
+
+}
+\ No newline at end of file
diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java

index d8428148aba4bee437123b3b42b5d5234c7384c7..07054a577766a4dfabf7f00cf9c31ecf5bdfe59d 100644 (file)
--- a/src/java/org/apache/poi/hssf/record/SSTRecord.java
+++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java
@@ -1,4 +1,3 @@
-
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
@@ -59,7 +58,9 @@ import org.apache.poi.util.BinaryTree;
  import org.apache.poi.util.LittleEndian;
  import org.apache.poi.util.LittleEndianConsts;
  
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
  
  /**
   * Title:        Static String Table Record
@@ -71,65 +72,59 @@ import java.util.*;
   * <P>
   * @author Andrew C. Oliver (acoliver at apache dot org)
   * @author Marc Johnson (mjohnson at apache dot org)
+ * @author Glen Stampoultzis (glens at apache.org)
   * @version 2.0-pre
   * @see org.apache.poi.hssf.record.LabelSSTRecord
   * @see org.apache.poi.hssf.record.ContinueRecord
   */
  
  public class SSTRecord
-    extends Record
+        extends Record
  {
  
-    // how big can an SST record be? As big as any record can be: 8228
-    // bytes
-    private static final int  _max                     = 8228;
+    /** how big can an SST record be? As big as any record can be: 8228 bytes */
+    static final int MAX_RECORD_SIZE = 8228;
+
+    /** standard record overhead: two shorts (record id plus data space size)*/
+    static final int STD_RECORD_OVERHEAD =
+            2 * LittleEndianConsts.SHORT_SIZE;
  
-    // standard record overhead: two shorts (record id plus data space
-    // size)
-    private static final int  _std_record_overhead     =
-        2 * LittleEndianConsts.SHORT_SIZE;
+    /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
+    static final int SST_RECORD_OVERHEAD =
+            ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
  
-    // SST overhead: the standard record overhead, plus the number of
-    // strings and the number of unique strings -- two ints
-    private static final int  _sst_record_overhead     =
-        (_std_record_overhead + (2 * LittleEndianConsts.INT_SIZE));
+    /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
+    static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
  
-    // how much data can we stuff into an SST record? That would be
-    // _max minus the standard SST record overhead
-    private static final int  _max_data_space          =
-        _max - _sst_record_overhead;
+    /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
+    static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
  
-    // overhead for each string includes the string's character count
-    // (a short) and the flag describing its characteristics (a byte)
-    private static final int  _string_minimal_overhead =
-        LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
-    public static final short sid                      = 0xfc;
+    public static final short sid = 0xfc;
  
-    // union of strings in the SST and EXTSST
-    private int               field_1_num_strings;
+    /** union of strings in the SST and EXTSST */
+    private int field_1_num_strings;
  
-    // according to docs ONLY SST
-    private int               field_2_num_unique_strings;
-    private BinaryTree        field_3_strings;
+    /** according to docs ONLY SST */
+    private int field_2_num_unique_strings;
+    private BinaryTree field_3_strings;
  
-    // this is the number of characters we expect in the first
-    // sub-record in a subsequent continuation record
-    private int               __expected_chars;
+    /** this is the number of characters we expect in the first sub-record in a subsequent continuation record */
+    private int __expected_chars;
  
-    // this is the string we were working on before hitting the end of
-    // the current record. This string is NOT finished.
-    private String            _unfinished_string;
+    /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
+    private String _unfinished_string;
  
-    // this is the total length of the current string being handled
-    private int               _total_length_bytes;
+    /** this is the total length of the current string being handled */
+    private int _total_length_bytes;
  
-    // this is the offset into a string field of the actual string
-    // data
-    private int               _string_data_offset;
+    /** this is the offset into a string field of the actual string data */
+    private int _string_data_offset;
  
-    // this is true if the string uses wide characters
-    private boolean           _wide_char;
-    private List              _record_lengths = null;
+    /** this is true if the string uses wide characters */
+    private boolean _wide_char;
+
+    /** Record lengths for initial SST record and all continue records */
+    private List _record_lengths = null;
  
      /**
       * default constructor
@@ -137,14 +132,14 @@ public class SSTRecord
  
      public SSTRecord()
      {
-        field_1_num_strings        = 0;
+        field_1_num_strings = 0;
          field_2_num_unique_strings = 0;
-        field_3_strings            = new BinaryTree();
-        setExpectedChars(0);
-        _unfinished_string  = "";
+        field_3_strings = new BinaryTree();
+        setExpectedChars( 0 );
+        _unfinished_string = "";
          _total_length_bytes = 0;
          _string_data_offset = 0;
-        _wide_char          = false;
+        _wide_char = false;
      }
  
      /**
@@ -156,9 +151,9 @@ public class SSTRecord
       * @param data of the record (should not contain sid/len)
       */
  
-    public SSTRecord(final short id, final short size, final byte [] data)
+    public SSTRecord( final short id, final short size, final byte[] data )
      {
-        super(id, size, data);
+        super( id, size, data );
      }
  
      /**
@@ -171,10 +166,10 @@ public class SSTRecord
       * @param offset of the record
       */
  
-    public SSTRecord(final short id, final short size, final byte [] data,
-                     int offset)
+    public SSTRecord( final short id, final short size, final byte[] data,
+                      int offset )
      {
-        super(id, size, data, offset);
+        super( id, size, data, offset );
      }
  
      /**
@@ -192,13 +187,13 @@ public class SSTRecord
       * @return the index of that string in the table
       */
  
-    public int addString(final String string)
+    public int addString( final String string )
      {
          int rval;
  
-        if (string == null)
+        if ( string == null )
          {
-            rval = addString("", false);
+            rval = addString( "", false );
          }
          else
          {
@@ -207,17 +202,17 @@ public class SSTRecord
              // present, we have to use 16-bit encoding. Otherwise, we
              // can use 8-bit encoding
              boolean useUTF16 = false;
-            int     strlen   = string.length();
+            int strlen = string.length();
  
-            for (int j = 0; j < strlen; j++)
+            for ( int j = 0; j < strlen; j++ )
              {
-                if (string.charAt(j) > 255)
+                if ( string.charAt( j ) > 255 )
                  {
                      useUTF16 = true;
                      break;
                  }
              }
-            rval = addString(string, useUTF16);
+            rval = addString( string, useUTF16 );
          }
          return rval;
      }
@@ -238,21 +233,21 @@ public class SSTRecord
       * @return the index of that string in the table
       */
  
-    public int addString(final String string, final boolean useUTF16)
+    public int addString( final String string, final boolean useUTF16 )
      {
          field_1_num_strings++;
-        String        str  = (string == null) ? ""
-                                              : string;
-        int           rval = -1;
-        UnicodeString ucs  = new UnicodeString();
-
-        ucs.setString(str);
-        ucs.setCharCount(( short ) str.length());
-        ucs.setOptionFlags(( byte ) (useUTF16 ? 1
-                                              : 0));
-        Integer integer = ( Integer ) field_3_strings.getKeyForValue(ucs);
-
-        if (integer != null)
+        String str = ( string == null ) ? ""
+                : string;
+        int rval = -1;
+        UnicodeString ucs = new UnicodeString();
+
+        ucs.setString( str );
+        ucs.setCharCount( (short) str.length() );
+        ucs.setOptionFlags( (byte) ( useUTF16 ? 1
+                : 0 ) );
+        Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );
+
+        if ( integer != null )
          {
              rval = integer.intValue();
          }
@@ -263,8 +258,8 @@ public class SSTRecord
              // strings we've already collected
              rval = field_3_strings.size();
              field_2_num_unique_strings++;
-            integer = new Integer(rval);
-            field_3_strings.put(integer, ucs);
+            integer = new Integer( rval );
+            field_3_strings.put( integer, ucs );
          }
          return rval;
      }
@@ -298,7 +293,7 @@ public class SSTRecord
       *
       */
  
-    public void setNumStrings(final int count)
+    public void setNumStrings( final int count )
      {
          field_1_num_strings = count;
      }
@@ -314,7 +309,7 @@ public class SSTRecord
       * @param count  number of strings
       */
  
-    public void getNumUniqueStrings(final int count)
+    public void getNumUniqueStrings( final int count )
      {
          field_2_num_unique_strings = count;
      }
@@ -327,16 +322,16 @@ public class SSTRecord
       * @return the desired string
       */
  
-    public String getString(final int id)
+    public String getString( final int id )
      {
-        return (( UnicodeString ) field_3_strings.get(new Integer(id)))
-            .getString();
+        return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) )
+                .getString();
      }
  
-    public boolean getString16bit(final int id)
+    public boolean getString16bit( final int id )
      {
-        return ((( UnicodeString ) field_3_strings.get(new Integer(id)))
-            .getOptionFlags() == 1);
+        UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
+        return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
      }
  
      /**
@@ -349,216 +344,21 @@ public class SSTRecord
      {
          StringBuffer buffer = new StringBuffer();
  
-        buffer.append("[SST]\n");
-        buffer.append("    .numstrings     = ")
-            .append(Integer.toHexString(getNumStrings())).append("\n");
-        buffer.append("    .uniquestrings  = ")
-            .append(Integer.toHexString(getNumUniqueStrings())).append("\n");
-        for (int k = 0; k < field_3_strings.size(); k++)
+        buffer.append( "[SST]\n" );
+        buffer.append( "    .numstrings     = " )
+                .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
+        buffer.append( "    .uniquestrings  = " )
+                .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
+        for ( int k = 0; k < field_3_strings.size(); k++ )
          {
-            buffer.append("    .string_" + k + "      = ")
-                .append((( UnicodeString ) field_3_strings
-                .get(new Integer(k))).toString()).append("\n");
+            buffer.append( "    .string_" + k + "      = " )
+                    .append( ( (UnicodeString) field_3_strings
+                    .get( new Integer( k ) ) ).toString() ).append( "\n" );
          }
-        buffer.append("[/SST]\n");
+        buffer.append( "[/SST]\n" );
          return buffer.toString();
      }
  
-    /**
-     * Create a byte array consisting of an SST record and any
-     * required Continue records, ready to be written out.
-     * <p>
-     * If an SST record and any subsequent Continue records are read
-     * in to create this instance, this method should produce a byte
-     * array that is identical to the byte array produced by
-     * concatenating the input records' data.
-     *
-     * @return the byte array
-     */
-
-    public int serialize(int offset, byte [] data)
-    {
-        int rval                = getRecordSize();
-        int record_length_index = 0;
-
-        // get the linear size of that array
-        int unicodesize         = calculateUnicodeSize();
-
-        if (unicodesize > _max_data_space)
-        {
-            byte[]  stringreminant     = null;
-            int     unipos             = 0;
-            boolean lastneedcontinue   = false;
-            int     stringbyteswritten = 0;
-            boolean first_record       = true;
-            int     totalWritten       = 0;
-            int     size               = 0;
-
-            while (totalWritten != rval)
-            {
-                int pos = 0;
-
-                // write the appropriate header
-                int available;
-
-                if (first_record)
-                {
-                    size         =
-                        (( Integer ) _record_lengths
-                            .get(record_length_index++)).intValue();
-                    available    = size - 8;
-                    pos          = writeSSTHeader(data,
-                                                  pos + offset
-                                                  + totalWritten, size);
-                    size         += _std_record_overhead;
-                    first_record = false;
-                }
-                else
-                {
-                    pos = 0;
-                    int to_be_written = (unicodesize - stringbyteswritten)
-                                        + (lastneedcontinue ? 1
-                                                            : 0);           // not used?
-
-                    size      =
-                        (( Integer ) _record_lengths
-                            .get(record_length_index++)).intValue();
-                    available = size;
-                    pos       = writeContinueHeader(data,
-                                                    pos + offset
-                                                    + totalWritten, size);
-                    size      = size + _std_record_overhead;
-                }
-
-                // now, write the rest of the data into the current
-                // record space
-                if (lastneedcontinue)
-                {
-
-                    // the last string in the previous record was not
-                    // written out completely
-                    if (stringreminant.length <= available)
-                    {
-
-                        // write reminant -- it'll all fit neatly
-                        System.arraycopy(stringreminant, 0, data,
-                                         pos + offset + totalWritten,
-                                         stringreminant.length);
-                        stringbyteswritten += stringreminant.length - 1;
-                        pos                += stringreminant.length;
-                        lastneedcontinue   = false;
-                        available          -= stringreminant.length;
-                    }
-                    else
-                    {
-
-                        // write as much of the remnant as possible
-                        System.arraycopy(stringreminant, 0, data,
-                                         pos + offset + totalWritten,
-                                         available);
-                        stringbyteswritten += available - 1;
-                        pos                += available;
-                        byte[] leftover =
-                            new byte[ (stringreminant.length - available) + LittleEndianConsts.BYTE_SIZE ];
-
-                        System.arraycopy(stringreminant, available, leftover,
-                                         LittleEndianConsts.BYTE_SIZE,
-                                         stringreminant.length - available);
-                        leftover[ 0 ]    = stringreminant[ 0 ];
-                        stringreminant   = leftover;
-                        available        = 0;
-                        lastneedcontinue = true;
-                    }
-                }
-
-                // last string's remnant, if any, is cleaned up as
-                // best as can be done ... now let's try and write
-                // some more strings
-                for (; unipos < field_3_strings.size(); unipos++)
-                {
-                    Integer       intunipos = new Integer(unipos);
-                    UnicodeString unistr    =
-                        (( UnicodeString ) field_3_strings.get(intunipos));
-
-                    if (unistr.getRecordSize() <= available)
-                    {
-                        unistr.serialize(pos + offset + totalWritten, data);
-                        int rsize = unistr.getRecordSize();
-
-                        stringbyteswritten += rsize;
-                        pos                += rsize;
-                        available          -= rsize;
-                    }
-                    else
-                    {
-
-                        // can't write the entire string out
-                        if (available >= _string_minimal_overhead)
-                        {
-
-                            // we can write some of it
-                            byte[] ucs = unistr.serialize();
-
-                            System.arraycopy(ucs, 0, data,
-                                             pos + offset + totalWritten,
-                                             available);
-                            stringbyteswritten += available;
-                            stringreminant     =
-                                new byte[ (ucs.length - available) + LittleEndianConsts.BYTE_SIZE ];
-                            System.arraycopy(ucs, available, stringreminant,
-                                             LittleEndianConsts.BYTE_SIZE,
-                                             ucs.length - available);
-                            stringreminant[ 0 ] =
-                                ucs[ LittleEndianConsts.SHORT_SIZE ];
-                            available           = 0;
-                            lastneedcontinue    = true;
-                            unipos++;
-                        }
-                        break;
-                    }
-                }
-                totalWritten += size;
-            }
-        }
-        else
-        {
-
-            // short data: write one simple SST record
-            int datasize = _sst_record_overhead + unicodesize;           // not used?
-
-            writeSSTHeader(
-                data, 0 + offset,
-                _sst_record_overhead
-                + (( Integer ) _record_lengths.get(
-                record_length_index++)).intValue() - _std_record_overhead);
-            int pos = _sst_record_overhead;
-
-            for (int k = 0; k < field_3_strings.size(); k++)
-            {
-                UnicodeString unistr =
-                    (( UnicodeString ) field_3_strings.get(new Integer(k)));
-
-                System.arraycopy(unistr.serialize(), 0, data, pos + offset,
-                                 unistr.getRecordSize());
-                pos += unistr.getRecordSize();
-            }
-        }
-        return rval;
-    }
-
-    // not used: remove?
-    private int calculateStringsize()
-    {
-        int retval = 0;
-
-        for (int k = 0; k < field_3_strings.size(); k++)
-        {
-            retval +=
-                (( UnicodeString ) field_3_strings.get(new Integer(k)))
-                    .getRecordSize();
-        }
-        return retval;
-    }
  
      /**
       * Process a Continue record. A Continue record for an SST record
@@ -581,86 +381,86 @@ public class SSTRecord
       * @param record the Continue record's byte data
       */
  
-    public void processContinueRecord(final byte [] record)
+    public void processContinueRecord( final byte[] record )
      {
-        if (getExpectedChars() == 0)
+        if ( getExpectedChars() == 0 )
          {
-            _unfinished_string  = "";
+            _unfinished_string = "";
              _total_length_bytes = 0;
              _string_data_offset = 0;
-            _wide_char          = false;
-            manufactureStrings(record, 0, ( short ) record.length);
+            _wide_char = false;
+            manufactureStrings( record, 0, (short) record.length );
          }
          else
          {
              int data_length = record.length - LittleEndianConsts.BYTE_SIZE;
  
-            if (calculateByteCount(getExpectedChars()) > data_length)
+            if ( calculateByteCount( getExpectedChars() ) > data_length )
              {
  
                  // create artificial data to create a UnicodeString
                  byte[] input =
-                    new byte[ record.length + LittleEndianConsts.SHORT_SIZE ];
-                short  size  = ( short ) (((record[ 0 ] & 1) == 1)
-                                          ? (data_length
-                                             / LittleEndianConsts.SHORT_SIZE)
-                                          : (data_length
-                                             / LittleEndianConsts.BYTE_SIZE));
-
-                LittleEndian.putShort(input, ( byte ) 0, size);
-                System.arraycopy(record, 0, input,
-                                 LittleEndianConsts.SHORT_SIZE,
-                                 record.length);
-                UnicodeString ucs = new UnicodeString(UnicodeString.sid,
-                                                      ( short ) input.length,
-                                                      input);
+                        new byte[record.length + LittleEndianConsts.SHORT_SIZE];
+                short size = (short) ( ( ( record[0] & 1 ) == 1 )
+                        ? ( data_length
+                        / LittleEndianConsts.SHORT_SIZE )
+                        : ( data_length
+                        / LittleEndianConsts.BYTE_SIZE ) );
+
+                LittleEndian.putShort( input, (byte) 0, size );
+                System.arraycopy( record, 0, input,
+                        LittleEndianConsts.SHORT_SIZE,
+                        record.length );
+                UnicodeString ucs = new UnicodeString( UnicodeString.sid,
+                        (short) input.length,
+                        input );
  
                  _unfinished_string = _unfinished_string + ucs.getString();
-                setExpectedChars(getExpectedChars() - size);
+                setExpectedChars( getExpectedChars() - size );
              }
              else
              {
-                setupStringParameters(record, -LittleEndianConsts.SHORT_SIZE,
-                                      getExpectedChars());
-                byte[] str_data = new byte[ _total_length_bytes ];
-                int    length   = _string_minimal_overhead
-                                  + (calculateByteCount(getExpectedChars()));
-                byte[] bstring  = new byte[ length ];
+                setupStringParameters( record, -LittleEndianConsts.SHORT_SIZE,
+                        getExpectedChars() );
+                byte[] str_data = new byte[_total_length_bytes];
+                int length = STRING_MINIMAL_OVERHEAD
+                        + ( calculateByteCount( getExpectedChars() ) );
+                byte[] bstring = new byte[length];
  
                  // Copy data from the record into the string
                  // buffer. Copy skips the length of a short in the
                  // string buffer, to leave room for the string length.
-                System.arraycopy(record, 0, str_data,
-                                 LittleEndianConsts.SHORT_SIZE,
-                                 str_data.length
-                                 - LittleEndianConsts.SHORT_SIZE);
+                System.arraycopy( record, 0, str_data,
+                        LittleEndianConsts.SHORT_SIZE,
+                        str_data.length
+                        - LittleEndianConsts.SHORT_SIZE );
  
                  // write the string length
-                LittleEndian.putShort(bstring, 0,
-                                      ( short ) getExpectedChars());
+                LittleEndian.putShort( bstring, 0,
+                        (short) getExpectedChars() );
  
                  // write the options flag
-                bstring[ LittleEndianConsts.SHORT_SIZE ] =
-                    str_data[ LittleEndianConsts.SHORT_SIZE ];
+                bstring[LittleEndianConsts.SHORT_SIZE] =
+                        str_data[LittleEndianConsts.SHORT_SIZE];
  
                  // copy the bytes/words making up the string; skipping
                  // past all the overhead of the str_data array
-                System.arraycopy(str_data, _string_data_offset, bstring,
-                                 _string_minimal_overhead,
-                                 bstring.length - _string_minimal_overhead);
+                System.arraycopy( str_data, _string_data_offset, bstring,
+                        STRING_MINIMAL_OVERHEAD,
+                        bstring.length - STRING_MINIMAL_OVERHEAD );
  
                  // use special constructor to create the final string
-                UnicodeString string  =
-                    new UnicodeString(UnicodeString.sid,
-                                      ( short ) bstring.length, bstring,
-                                      _unfinished_string);
-                Integer       integer = new Integer(field_3_strings.size());
-
-                field_3_strings.put(integer, string);
-                manufactureStrings(record,
-                                   _total_length_bytes
-                                   - LittleEndianConsts
-                                       .SHORT_SIZE, ( short ) record.length);
+                UnicodeString string =
+                        new UnicodeString( UnicodeString.sid,
+                                (short) bstring.length, bstring,
+                                _unfinished_string );
+                Integer integer = new Integer( field_3_strings.size() );
+
+                field_3_strings.put( integer, string );
+                manufactureStrings( record,
+                        _total_length_bytes
+                        - LittleEndianConsts
+                        .SHORT_SIZE, (short) record.length );
              }
          }
      }
@@ -683,24 +483,18 @@ public class SSTRecord
          return field_2_num_unique_strings;
      }
  
-    /**
-     *
-     * @param o
-     * @return true if equal
-     */
-
-    public boolean equals(Object o)
+    public boolean equals( Object o )
      {
-        if ((o == null) || (o.getClass() != this.getClass()))
+        if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
          {
              return false;
          }
-        SSTRecord other = ( SSTRecord ) o;
+        SSTRecord other = (SSTRecord) o;
  
-        return ((field_1_num_strings == other
-            .field_1_num_strings) && (field_2_num_unique_strings == other
-                .field_2_num_unique_strings) && field_3_strings
-                    .equals(other.field_3_strings));
+        return ( ( field_1_num_strings == other
+                .field_1_num_strings ) && ( field_2_num_unique_strings == other
+                .field_2_num_unique_strings ) && field_3_strings
+                .equals( other.field_3_strings ) );
      }
  
      /**
@@ -711,12 +505,12 @@ public class SSTRecord
       * @exception RecordFormatException if validation fails
       */
  
-    protected void validateSid(final short id)
-        throws RecordFormatException
+    protected void validateSid( final short id )
+            throws RecordFormatException
      {
-        if (id != sid)
+        if ( id != sid )
          {
-            throw new RecordFormatException("NOT An SST RECORD");
+            throw new RecordFormatException( "NOT An SST RECORD" );
          }
      }
  
@@ -800,22 +594,22 @@ public class SSTRecord
       * @param size size of the raw data
       */
  
-    protected void fillFields(final byte [] data, final short size,
-                              int offset)
+    protected void fillFields( final byte[] data, final short size,
+                               int offset )
      {
  
          // this method is ALWAYS called after construction -- using
          // the nontrivial constructor, of course -- so this is where
          // we initialize our fields
-        field_1_num_strings        = LittleEndian.getInt(data, 0 + offset);
-        field_2_num_unique_strings = LittleEndian.getInt(data, 4 + offset);
-        field_3_strings            = new BinaryTree();
-        setExpectedChars(0);
-        _unfinished_string  = "";
+        field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
+        field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
+        field_3_strings = new BinaryTree();
+        setExpectedChars( 0 );
+        _unfinished_string = "";
          _total_length_bytes = 0;
          _string_data_offset = 0;
-        _wide_char          = false;
-        manufactureStrings(data, 8 + offset, size);
+        _wide_char = false;
+        manufactureStrings( data, 8 + offset, size );
      }
  
      /**
@@ -883,337 +677,182 @@ public class SSTRecord
          return _wide_char;
      }
  
-    private int writeSSTHeader(final byte [] data, final int pos,
-                               final int recsize)
-    {
-        int offset = pos;
-
-        LittleEndian.putShort(data, offset, sid);
-        offset += LittleEndianConsts.SHORT_SIZE;
-        LittleEndian.putShort(data, offset, ( short ) (recsize));
-        offset += LittleEndianConsts.SHORT_SIZE;
-        LittleEndian.putInt(data, offset, getNumStrings());
-        offset += LittleEndianConsts.INT_SIZE;
-        LittleEndian.putInt(data, offset, getNumUniqueStrings());
-        offset += LittleEndianConsts.INT_SIZE;
-        return offset - pos;
-    }
  
-    private int writeContinueHeader(final byte [] data, final int pos,
-                                    final int recsize)
-    {
-        int offset = pos;
-
-        LittleEndian.putShort(data, offset, ContinueRecord.sid);
-        offset += LittleEndianConsts.SHORT_SIZE;
-        LittleEndian.putShort(data, offset, ( short ) (recsize));
-        offset += LittleEndianConsts.SHORT_SIZE;
-        return offset - pos;
-    }
-
-    private int calculateUCArrayLength(final byte [][] ucarray)
-    {
-        int retval = 0;
-
-        for (int k = 0; k < ucarray.length; k++)
-        {
-            retval += ucarray[ k ].length;
-        }
-        return retval;
-    }
-
-    private void manufactureStrings(final byte [] data, final int index,
-                                    short size)
+    private void manufactureStrings( final byte[] data, final int index,
+                                     short size )
      {
          int offset = index;
  
-        while (offset < size)
+        while ( offset < size )
          {
              int remaining = size - offset;
  
-            if ((remaining > 0)
-                    && (remaining < LittleEndianConsts.SHORT_SIZE))
+            if ( ( remaining > 0 )
+                    && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
              {
                  throw new RecordFormatException(
-                    "Cannot get length of the last string in SSTRecord");
+                        "Cannot get length of the last string in SSTRecord" );
              }
-            if (remaining == LittleEndianConsts.SHORT_SIZE)
+            if ( remaining == LittleEndianConsts.SHORT_SIZE )
              {
-                setExpectedChars(LittleEndian.getShort(data, offset));
+                setExpectedChars( LittleEndian.getShort( data, offset ) );
                  _unfinished_string = "";
                  break;
              }
-            short char_count = LittleEndian.getShort(data, offset);
+            short char_count = LittleEndian.getShort( data, offset );
  
-            setupStringParameters(data, offset, char_count);
-            if (remaining < _total_length_bytes)
+            setupStringParameters( data, offset, char_count );
+            if ( remaining < _total_length_bytes )
              {
-                setExpectedChars(calculateCharCount(_total_length_bytes
-                                                    - remaining));
-                char_count          -= getExpectedChars();
+                setExpectedChars( calculateCharCount( _total_length_bytes
+                        - remaining ) );
+                char_count -= getExpectedChars();
                  _total_length_bytes = remaining;
              }
              else
              {
-                setExpectedChars(0);
+                setExpectedChars( 0 );
              }
-            processString(data, offset, char_count);
+            processString( data, offset, char_count );
              offset += _total_length_bytes;
-            if (getExpectedChars() != 0)
+            if ( getExpectedChars() != 0 )
              {
                  break;
              }
          }
      }
  
-    private void setupStringParameters(final byte [] data, final int index,
-                                       final int char_count)
+    private void setupStringParameters( final byte[] data, final int index,
+                                        final int char_count )
      {
-        byte flag = data[ index + LittleEndianConsts.SHORT_SIZE ];
+        byte flag = data[index + LittleEndianConsts.SHORT_SIZE];
  
-        _wide_char = (flag & 1) == 1;
-        boolean extended      = (flag & 4) == 4;
-        boolean formatted_run = (flag & 8) == 8;
+        _wide_char = ( flag & 1 ) == 1;
+        boolean extended = ( flag & 4 ) == 4;
+        boolean formatted_run = ( flag & 8 ) == 8;
  
-        _total_length_bytes = _string_minimal_overhead
-                              + calculateByteCount(char_count);
-        _string_data_offset = _string_minimal_overhead;
-        if (formatted_run)
+        _total_length_bytes = STRING_MINIMAL_OVERHEAD
+                + calculateByteCount( char_count );
+        _string_data_offset = STRING_MINIMAL_OVERHEAD;
+        if ( formatted_run )
          {
-            short run_count = LittleEndian.getShort(data,
-                                                    index
-                                                    + _string_data_offset);
+            short run_count = LittleEndian.getShort( data,
+                    index
+                    + _string_data_offset );
  
              _string_data_offset += LittleEndianConsts.SHORT_SIZE;
              _total_length_bytes += LittleEndianConsts.SHORT_SIZE
-                                   + (LittleEndianConsts.INT_SIZE
-                                      * run_count);
+                    + ( LittleEndianConsts.INT_SIZE
+                    * run_count );
          }
-        if (extended)
+        if ( extended )
          {
-            int extension_length = LittleEndian.getInt(data,
-                                                       index
-                                                       + _string_data_offset);
+            int extension_length = LittleEndian.getInt( data,
+                    index
+                    + _string_data_offset );
  
              _string_data_offset += LittleEndianConsts.INT_SIZE;
              _total_length_bytes += LittleEndianConsts.INT_SIZE
-                                   + extension_length;
+                    + extension_length;
          }
      }
  
-    private void processString(final byte [] data, final int index,
-                               final short char_count)
+    private void processString( final byte[] data, final int index,
+                                final short char_count )
      {
-        byte[] str_data = new byte[ _total_length_bytes ];
-        int    length   = _string_minimal_overhead
-                          + calculateByteCount(char_count);
-        byte[] bstring  = new byte[ length ];
+        byte[] str_data = new byte[_total_length_bytes];
+        int length = STRING_MINIMAL_OVERHEAD
+                + calculateByteCount( char_count );
+        byte[] bstring = new byte[length];
  
-        System.arraycopy(data, index, str_data, 0, str_data.length);
+        System.arraycopy( data, index, str_data, 0, str_data.length );
          int offset = 0;
  
-        LittleEndian.putShort(bstring, offset, char_count);
-        offset            += LittleEndianConsts.SHORT_SIZE;
-        bstring[ offset ] = str_data[ offset ];
-        System.arraycopy(str_data, _string_data_offset, bstring,
-                         _string_minimal_overhead,
-                         bstring.length - _string_minimal_overhead);
-        UnicodeString string = new UnicodeString(UnicodeString.sid,
-                                                 ( short ) bstring.length,
-                                                 bstring);
-
-        if (getExpectedChars() != 0)
+        LittleEndian.putShort( bstring, offset, char_count );
+        offset += LittleEndianConsts.SHORT_SIZE;
+        bstring[offset] = str_data[offset];
+        System.arraycopy( str_data, _string_data_offset, bstring,
+                STRING_MINIMAL_OVERHEAD,
+                bstring.length - STRING_MINIMAL_OVERHEAD );
+        UnicodeString string = new UnicodeString( UnicodeString.sid,
+                (short) bstring.length,
+                bstring );
+
+        if ( getExpectedChars() != 0 )
          {
              _unfinished_string = string.getString();
          }
          else
          {
-            Integer integer = new Integer(field_3_strings.size());
-
-            field_3_strings.put(integer, string);
+            Integer integer = new Integer( field_3_strings.size() );
+
+            // This retry loop is a nasty hack that lets us get around the issue of duplicate
+            // strings in the SST record.  There should never be duplicates but because we don't
+            // handle rich text records correctly this may occur.  Also some Excel alternatives
+            // do not seem correctly add strings to this table.
+            //
+            // The hack bit is that we add spaces to the end of the string until don't get an
+            // illegal argument exception when adding.  One day we will have to deal with this
+            // more gracefully.
+            boolean added = false;
+            while ( !added )
+            {
+                try
+                {
+                    field_3_strings.put( integer, string );
+                    added = true;
+                }
+                catch ( IllegalArgumentException duplicateValue )
+                {
+                    string.setString( string.getString() + " " );
+                }
+            }
          }
      }
  
-    private void setExpectedChars(final int count)
+    private void setExpectedChars( final int count )
      {
          __expected_chars = count;
      }
  
-    private int calculateByteCount(final int character_count)
+    private int calculateByteCount( final int character_count )
      {
-        return character_count * (_wide_char ? LittleEndianConsts.SHORT_SIZE
-                                             : LittleEndianConsts.BYTE_SIZE);
+        return character_count * ( _wide_char ? LittleEndianConsts.SHORT_SIZE
+                : LittleEndianConsts.BYTE_SIZE );
      }
  
-    private int calculateCharCount(final int byte_count)
+    private int calculateCharCount( final int byte_count )
+    {
+        return byte_count / ( _wide_char ? LittleEndianConsts.SHORT_SIZE
+                : LittleEndianConsts.BYTE_SIZE );
+    }
+
+    /**
+     * called by the class that is responsible for writing this sucker.
+     * Subclasses should implement this so that their data is passed back in a
+     * byte array.
+     *
+     * @return byte array containing instance data
+     */
+
+    public int serialize( int offset, byte[] data )
      {
-        return byte_count / (_wide_char ? LittleEndianConsts.SHORT_SIZE
-                                        : LittleEndianConsts.BYTE_SIZE);
+        SSTSerializer serializer = new SSTSerializer(
+                _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
+        return serializer.serialize( offset, data );
      }
  
+
      // we can probably simplify this later...this calculates the size
      // w/o serializing but still is a bit slow
      public int getRecordSize()
      {
-        _record_lengths = new ArrayList();
-        int retval      = 0;
-        int unicodesize = calculateUnicodeSize();
-
-        if (unicodesize > _max_data_space)
-        {
-            UnicodeString unistr             = null;
-            int           stringreminant     = 0;
-            int           unipos             = 0;
-            boolean       lastneedcontinue   = false;
-            int           stringbyteswritten = 0;
-            boolean       finished           = false;
-            boolean       first_record       = true;
-            int           totalWritten       = 0;
-
-            while (!finished)
-            {
-                int record = 0;
-                int pos    = 0;
-
-                if (first_record)
-                {
-
-                    // writing SST record
-                    record       = _max;
-                    pos          = 12;
-                    first_record = false;
-                    _record_lengths.add(new Integer(record
-                                                    - _std_record_overhead));
-                }
-                else
-                {
-
-                    // writing continue record
-                    pos = 0;
-                    int to_be_written = (unicodesize - stringbyteswritten)
-                                        + (lastneedcontinue ? 1
-                                                            : 0);
-                    int size          = Math.min(_max - _std_record_overhead,
-                                                 to_be_written);
-
-                    if (size == to_be_written)
-                    {
-                        finished = true;
-                    }
-                    record = size + _std_record_overhead;
-                    _record_lengths.add(new Integer(size));
-                    pos = 4;
-                }
-                if (lastneedcontinue)
-                {
-                    int available = _max - pos;
-
-                    if (stringreminant <= available)
-                    {
-
-                        // write reminant
-                        stringbyteswritten += stringreminant - 1;
-                        pos                += stringreminant;
-                        lastneedcontinue   = false;
-                    }
-                    else
-                    {
-
-                        // write as much of the remnant as possible
-                        int toBeWritten = unistr.maxBrokenLength(available);
-
-                        if (available != toBeWritten)
-                        {
-                            int shortrecord = record
-                                              - (available - toBeWritten);
-
-                            _record_lengths.set(
-                                _record_lengths.size() - 1,
-                                new Integer(
-                                    shortrecord - _std_record_overhead));
-                            record = shortrecord;
-                        }
-                        stringbyteswritten += toBeWritten - 1;
-                        pos                += toBeWritten;
-                        stringreminant     -= toBeWritten - 1;
-                        lastneedcontinue   = true;
-                    }
-                }
-                for (; unipos < field_3_strings.size(); unipos++)
-                {
-                    int     available = _max - pos;
-                    Integer intunipos = new Integer(unipos);
-
-                    unistr =
-                        (( UnicodeString ) field_3_strings.get(intunipos));
-                    if (unistr.getRecordSize() <= available)
-                    {
-                        stringbyteswritten += unistr.getRecordSize();
-                        pos                += unistr.getRecordSize();
-                    }
-                    else
-                    {
-                        if (available >= _string_minimal_overhead)
-                        {
-                            int toBeWritten =
-                                unistr.maxBrokenLength(available);
-
-                            stringbyteswritten += toBeWritten;
-                            stringreminant     =
-                                (unistr.getRecordSize() - toBeWritten)
-                                + LittleEndianConsts.BYTE_SIZE;
-                            if (available != toBeWritten)
-                            {
-                                int shortrecord = record
-                                                  - (available - toBeWritten);
-
-                                _record_lengths.set(
-                                    _record_lengths.size() - 1,
-                                    new Integer(
-                                        shortrecord - _std_record_overhead));
-                                record = shortrecord;
-                            }
-                            lastneedcontinue = true;
-                            unipos++;
-                        }
-                        else
-                        {
-                            int shortrecord = record - available;
-
-                            _record_lengths.set(
-                                _record_lengths.size() - 1,
-                                new Integer(
-                                    shortrecord - _std_record_overhead));
-                            record = shortrecord;
-                        }
-                        break;
-                    }
-                }
-                totalWritten += record;
-            }
-            retval = totalWritten;
-        }
-        else
-        {
+        SSTSerializer serializer = new SSTSerializer(
+                _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
  
-            // short data: write one simple SST record
-            retval = _sst_record_overhead + unicodesize;
-            _record_lengths.add(new Integer(unicodesize));
-        }
-        return retval;
+        return serializer.getRecordSize();
      }
  
-    private int calculateUnicodeSize()
-    {
-        int retval = 0;
+}
  
-        for (int k = 0; k < field_3_strings.size(); k++)
-        {
-            UnicodeString string =
-                ( UnicodeString ) field_3_strings.get(new Integer(k));
  
-            retval += string.getRecordSize();
-        }
-        return retval;
-    }
-}
diff --git a/src/java/org/apache/poi/hssf/record/SSTSerializer.java b/src/java/org/apache/poi/hssf/record/SSTSerializer.java

new file mode 100644 (file)

index 0000000..5ee3af0
--- /dev/null
+++ b/src/java/org/apache/poi/hssf/record/SSTSerializer.java
@@ -0,0 +1,296 @@
+package org.apache.poi.hssf.record;
+
+import org.apache.poi.util.BinaryTree;
+import org.apache.poi.util.LittleEndianConsts;
+
+import java.util.List;
+import java.util.ArrayList;
+
+class SSTSerializer
+{
+
+    private List recordLengths;
+    private BinaryTree strings;
+    private int numStrings;
+    private int numUniqueStrings;
+    private SSTRecordHeader sstRecordHeader;
+
+    public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings )
+    {
+        this.recordLengths = recordLengths;
+        this.strings = strings;
+        this.numStrings = numStrings;
+        this.numUniqueStrings = numUniqueStrings;
+        this.sstRecordHeader = new SSTRecordHeader(numStrings, numUniqueStrings);
+    }
+
+    /**
+     * Create a byte array consisting of an SST record and any
+     * required Continue records, ready to be written out.
+     * <p>
+     * If an SST record and any subsequent Continue records are read
+     * in to create this instance, this method should produce a byte
+     * array that is identical to the byte array produced by
+     * concatenating the input records' data.
+     *
+     * @return the byte array
+     */
+    public int serialize( int offset, byte[] data )
+    {
+        int record_size = getRecordSize();
+        int record_length_index = 0;
+
+        if ( calculateUnicodeSize() > SSTRecord.MAX_DATA_SPACE )
+            serializeLargeRecord( record_size, record_length_index, data, offset );
+        else
+            serializeSingleSSTRecord( data, offset, record_length_index );
+        return record_size;
+    }
+
+    private int calculateUnicodeSize()
+    {
+        int retval = 0;
+
+        for ( int k = 0; k < strings.size(); k++ )
+        {
+            retval += getUnicodeString(k).getRecordSize();
+        }
+        return retval;
+    }
+
+    // we can probably simplify this later...this calculates the size
+    // w/o serializing but still is a bit slow
+    public int getRecordSize()
+    {
+        recordLengths = new ArrayList();
+        int retval = 0;
+        int unicodesize = calculateUnicodeSize();
+
+        if ( unicodesize > SSTRecord.MAX_DATA_SPACE )
+        {
+            retval = calcRecordSizesForLongStrings( unicodesize );
+        }
+        else
+        {
+            // short data: write one simple SST record
+            retval = SSTRecord.SST_RECORD_OVERHEAD + unicodesize;
+            recordLengths.add( new Integer( unicodesize ) );
+        }
+        return retval;
+    }
+
+    private int calcRecordSizesForLongStrings( int unicodesize )
+    {
+        int retval;
+        UnicodeString unistr = null;
+        int stringreminant = 0;
+        int unipos = 0;
+        boolean lastneedcontinue = false;
+        int stringbyteswritten = 0;
+        boolean finished = false;
+        boolean first_record = true;
+        int totalWritten = 0;
+
+        while ( !finished )
+        {
+            int record = 0;
+            int pos = 0;
+
+            if ( first_record )
+            {
+
+                // writing SST record
+                record = SSTRecord.MAX_RECORD_SIZE;
+                pos = 12;
+                first_record = false;
+                recordLengths.add( new Integer( record - SSTRecord.STD_RECORD_OVERHEAD ) );
+            }
+            else
+            {
+
+                // writing continue record
+                pos = 0;
+                int to_be_written = ( unicodesize - stringbyteswritten ) + ( lastneedcontinue ? 1 : 0 );
+                int size = Math.min( SSTRecord.MAX_RECORD_SIZE - SSTRecord.STD_RECORD_OVERHEAD, to_be_written );
+
+                if ( size == to_be_written )
+                {
+                    finished = true;
+                }
+                record = size + SSTRecord.STD_RECORD_OVERHEAD;
+                recordLengths.add( new Integer( size ) );
+                pos = 4;
+            }
+            if ( lastneedcontinue )
+            {
+                int available = SSTRecord.MAX_RECORD_SIZE - pos;
+
+                if ( stringreminant <= available )
+                {
+
+                    // write reminant
+                    stringbyteswritten += stringreminant - 1;
+                    pos += stringreminant;
+                    lastneedcontinue = false;
+                }
+                else
+                {
+
+                    // write as much of the remnant as possible
+                    int toBeWritten = unistr.maxBrokenLength( available );
+
+                    if ( available != toBeWritten )
+                    {
+                        int shortrecord = record - ( available - toBeWritten );
+                        recordLengths.set( recordLengths.size() - 1,
+                                new Integer( shortrecord - SSTRecord.STD_RECORD_OVERHEAD ) );
+                        record = shortrecord;
+                    }
+                    stringbyteswritten += toBeWritten - 1;
+                    pos += toBeWritten;
+                    stringreminant -= toBeWritten - 1;
+                    lastneedcontinue = true;
+                }
+            }
+            for ( ; unipos < strings.size(); unipos++ )
+            {
+                int available = SSTRecord.MAX_RECORD_SIZE - pos;
+                Integer intunipos = new Integer( unipos );
+
+                unistr = ( (UnicodeString) strings.get( intunipos ) );
+                if ( unistr.getRecordSize() <= available )
+                {
+                    stringbyteswritten += unistr.getRecordSize();
+                    pos += unistr.getRecordSize();
+                }
+                else
+                {
+                    if ( available >= SSTRecord.STRING_MINIMAL_OVERHEAD )
+                    {
+                        int toBeWritten =
+                                unistr.maxBrokenLength( available );
+
+                        stringbyteswritten += toBeWritten;
+                        stringreminant =
+                                ( unistr.getRecordSize() - toBeWritten )
+                                + LittleEndianConsts.BYTE_SIZE;
+                        if ( available != toBeWritten )
+                        {
+                            int shortrecord = record
+                                    - ( available - toBeWritten );
+
+                            recordLengths.set(
+                                    recordLengths.size() - 1,
+                                    new Integer(
+                                            shortrecord - SSTRecord.STD_RECORD_OVERHEAD ) );
+                            record = shortrecord;
+                        }
+                        lastneedcontinue = true;
+                        unipos++;
+                    }
+                    else
+                    {
+                        int shortrecord = record - available;
+
+                        recordLengths.set( recordLengths.size() - 1,
+                                new Integer( shortrecord - SSTRecord.STD_RECORD_OVERHEAD ) );
+                        record = shortrecord;
+                    }
+                    break;
+                }
+            }
+            totalWritten += record;
+        }
+        retval = totalWritten;
+
+        return retval;
+    }
+
+
+    private void serializeSingleSSTRecord( byte[] data, int offset, int record_length_index )
+    {
+        // short data: write one simple SST record
+
+        int len = ( (Integer) recordLengths.get( record_length_index++ ) ).intValue();
+        int recordSize = SSTRecord.SST_RECORD_OVERHEAD + len - SSTRecord.STD_RECORD_OVERHEAD;
+        sstRecordHeader.writeSSTHeader( data, 0 + offset, recordSize );
+        int pos = SSTRecord.SST_RECORD_OVERHEAD;
+
+        for ( int k = 0; k < strings.size(); k++ )
+        {
+//            UnicodeString unistr = ( (UnicodeString) strings.get( new Integer( k ) ) );
+            System.arraycopy( getUnicodeString(k).serialize(), 0, data, pos + offset, getUnicodeString(k).getRecordSize() );
+            pos += getUnicodeString(k).getRecordSize();
+        }
+    }
+
+    /**
+     * Large records are serialized to an SST and to one or more CONTINUE records.  Joy.  They have the special
+     * characteristic that they can change the option field when a single string is split across to a
+     * CONTINUE record.
+     */
+    private void serializeLargeRecord( int record_size, int record_length_index, byte[] buffer, int offset )
+    {
+
+        byte[] stringReminant = null;
+        int stringIndex = 0;
+        boolean lastneedcontinue = false;
+        boolean first_record = true;
+        int totalWritten = 0;
+
+        while ( totalWritten != record_size )
+        {
+            int recordLength = ( (Integer) recordLengths.get( record_length_index++ ) ).intValue();
+            RecordProcessor recordProcessor = new RecordProcessor( buffer,
+                    recordLength, numStrings, numUniqueStrings );
+
+            // write the appropriate header
+            recordProcessor.writeRecordHeader( offset, totalWritten, recordLength, first_record );
+            first_record = false;
+
+            // now, write the rest of the data into the current
+            // record space
+            if ( lastneedcontinue )
+            {
+                lastneedcontinue = stringReminant.length > recordProcessor.getAvailable();
+                // the last string in the previous record was not written out completely
+                stringReminant = recordProcessor.writeStringRemainder( lastneedcontinue,
+                        stringReminant, offset, totalWritten );
+            }
+
+            // last string's remnant, if any, is cleaned up as best as can be done ... now let's try and write
+            // some more strings
+            for ( ; stringIndex < strings.size(); stringIndex++ )
+            {
+                UnicodeString unistr = getUnicodeString( stringIndex );
+
+                if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )
+                {
+                    recordProcessor.writeWholeString( unistr, offset, totalWritten );
+                }
+                else
+                {
+
+                    // can't write the entire string out
+                    if ( recordProcessor.getAvailable() >= SSTRecord.STRING_MINIMAL_OVERHEAD )
+                    {
+
+                        // we can write some of it
+                        stringReminant = recordProcessor.writePartString( unistr, offset, totalWritten );
+                        lastneedcontinue = true;
+                        stringIndex++;
+                    }
+                    break;
+                }
+            }
+            totalWritten += recordLength + SSTRecord.STD_RECORD_OVERHEAD;
+        }
+    }
+
+    private UnicodeString getUnicodeString( int index )
+    {
+        Integer intunipos = new Integer( index );
+        return ( (UnicodeString) strings.get( intunipos ) );
+    }
+
+}
diff --git a/src/java/org/apache/poi/hssf/record/UnicodeString.java b/src/java/org/apache/poi/hssf/record/UnicodeString.java

index 097be19b1c999da36c004f6c841eeb9653499d28..c303a42e3c56b2f71efd846bc92cd484fb2d7f2a 100644 (file)
--- a/src/java/org/apache/poi/hssf/record/UnicodeString.java
+++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java
@@ -280,7 +280,8 @@ public class UnicodeString
      {
          int charsize = 1;
  
-        if (getOptionFlags() == 1)
+        // Note: I suspect this may not be right
+        if ((getOptionFlags() & 0x01) == 1)
          {
              charsize = 2;
          }
@@ -290,7 +291,7 @@ public class UnicodeString
          data[ 2 + offset ] = getOptionFlags();
  
  //        System.out.println("Unicode: We've got "+retval[2]+" for our option flag");
-        if (getOptionFlags() == 0)
+        if ((getOptionFlags() & 0x01) == 0)
          {
              StringUtil.putCompressedUnicode(getString(), data, 0x3 + offset);
          }
@@ -306,7 +307,7 @@ public class UnicodeString
      {
          int charsize = 1;
  
-        if (getOptionFlags() == 1)
+        if ((getOptionFlags() & 0x01) == 1)
          {
              charsize = 2;
          }
author	Glen Stampoultzis <glens@apache.org>
	Mon, 27 May 2002 11:43:27 +0000 (11:43 +0000)
committer	Glen Stampoultzis <glens@apache.org>
	Mon, 27 May 2002 11:43:27 +0000 (11:43 +0000)
src/java/org/apache/poi/hssf/model/Workbook.java		patch \| blob \| history
src/java/org/apache/poi/hssf/record/ContinueRecord.java		patch \| blob \| history
src/java/org/apache/poi/hssf/record/RecordProcessor.java	[new file with mode: 0644]	patch \| blob
src/java/org/apache/poi/hssf/record/SSTRecord.java		patch \| blob \| history
src/java/org/apache/poi/hssf/record/SSTSerializer.java	[new file with mode: 0644]	patch \| blob
src/java/org/apache/poi/hssf/record/UnicodeString.java		patch \| blob \| history