]> source.dussan.org Git - poi.git/commitdiff
Implement the ExtRst part of a UnicodeString
authorNick Burch <nick@apache.org>
Tue, 19 Jan 2010 12:04:14 +0000 (12:04 +0000)
committerNick Burch <nick@apache.org>
Tue, 19 Jan 2010 12:04:14 +0000 (12:04 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@900746 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/hssf/record/common/UnicodeString.java
src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java
src/testcases/org/apache/poi/hssf/record/common/TestUnicodeString.java
src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java
test-data/spreadsheet/47847.xls [new file with mode: 0644]

index bd1758e819cdd1a12880b80c3a730becba2302fa..b10d2820463f937b68dcf81803a82ebbdc0a9876 100644 (file)
@@ -26,9 +26,9 @@ import org.apache.poi.hssf.record.RecordInputStream;
 import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
 import org.apache.poi.util.BitField;
 import org.apache.poi.util.BitFieldFactory;
-import org.apache.poi.util.HexDump;
 import org.apache.poi.util.LittleEndianInput;
 import org.apache.poi.util.LittleEndianOutput;
+import org.apache.poi.util.StringUtil;
 
 /**
  * Title: Unicode String<p/>
@@ -42,8 +42,8 @@ public final class UnicodeString implements Comparable<UnicodeString> {
     private short             field_1_charCount;
     private byte              field_2_optionflags;
     private String            field_3_string;
-    private List<FormatRun> field_4_format_runs;
-    private byte[] field_5_ext_rst;
+    private List<FormatRun>   field_4_format_runs;
+    private ExtRst            field_5_ext_rst;
     private static final BitField   highByte  = BitFieldFactory.getInstance(0x1);
     // 0x2 is reserved
     private static final BitField   extBit    = BitFieldFactory.getInstance(0x4);
@@ -98,6 +98,225 @@ public final class UnicodeString implements Comparable<UnicodeString> {
             out.writeShort(_fontIndex);
         }
     }
+    
+    // See page 681
+    public static class ExtRst implements Comparable<ExtRst> {
+       private short reserved;
+       
+       // This is a Phs (see page 881)
+       private short formattingFontIndex;
+       private short formattingOptions;
+       
+       // This is a RPHSSub (see page 894)
+       private int numberOfRuns;
+       private String phoneticText;
+       
+       // This is an array of PhRuns (see page 881)
+       private PhRun[] phRuns;
+       // Sometimes there's some cruft at the end
+       private byte[] extraData;
+
+       private void populateEmpty() {
+          reserved = 1;
+          phoneticText = "";
+          phRuns = new PhRun[0];
+          extraData = new byte[0];
+       }
+       
+       protected ExtRst() {
+          populateEmpty();
+       }
+       protected ExtRst(LittleEndianInput in, int expectedLength) {
+          reserved = in.readShort();
+          
+          // Old style detection (Reserved = 0xFF)
+          if(reserved == -1) {
+             populateEmpty();
+             return;
+          }
+          
+          // Spot corrupt records
+          if(reserved != 1) {
+             System.err.println("Warning - ExtRst was has wrong magic marker, expecting 1 but found " + reserved + " - ignoring");
+             // Grab all the remaining data, and ignore it
+             for(int i=0; i<expectedLength-2; i++) {
+                in.readByte();
+             }
+             // And make us be empty
+             populateEmpty();
+             return;
+          }
+          
+          // Carry on reading in as normal
+          short stringDataSize = in.readShort();
+          
+          formattingFontIndex = in.readShort();
+          formattingOptions   = in.readShort();
+          
+          // RPHSSub
+          numberOfRuns = in.readUShort();
+          short length1 = in.readShort();
+          // No really. Someone clearly forgot to read
+          //  the docs on their datastructure...
+          short length2 = in.readShort();
+          // And sometimes they write out garbage :(
+          if(length1 == 0 && length2 > 0) {
+             length2 = 0;
+          }
+          if(length1 != length2) {
+             throw new IllegalStateException(
+                   "The two length fields of the Phonetic Text don't agree! " +
+                   length1 + " vs " + length2
+             );
+          }
+          phoneticText = StringUtil.readUnicodeLE(in, length1);
+          
+          int runData = stringDataSize - 4 - 6 - (2*phoneticText.length());
+          int numRuns = (runData / 6);
+          phRuns = new PhRun[numRuns];
+          for(int i=0; i<phRuns.length; i++) {
+             phRuns[i] = new PhRun(in);
+          }
+
+          int extraDataLength = runData - (numRuns*6);
+          if(extraDataLength < 0) {
+             System.err.println("Warning - ExtRst overran by " + (0-extraDataLength) + " bytes");
+             extraDataLength = 0;
+          }
+          extraData = new byte[extraDataLength];
+          for(int i=0; i<extraData.length; i++) {
+             extraData[i] = in.readByte();
+          }
+       }
+       /**
+        * Returns our size, excluding our 
+        *  4 byte header
+        */
+       protected int getDataSize() {
+          return 4 + 6 + (2*phoneticText.length()) + 
+             (6*phRuns.length) + extraData.length;
+       }
+       protected void serialize(ContinuableRecordOutput out) {
+          int dataSize = getDataSize();
+          
+          out.writeContinueIfRequired(8);
+          out.writeShort(reserved);
+          out.writeShort(dataSize);
+          out.writeShort(formattingFontIndex);
+          out.writeShort(formattingOptions);
+          
+          out.writeContinueIfRequired(6);
+          out.writeShort(numberOfRuns);
+          out.writeShort(phoneticText.length());
+          out.writeShort(phoneticText.length());
+          
+          out.writeContinueIfRequired(phoneticText.length()*2);
+          StringUtil.putUnicodeLE(phoneticText, out);
+          
+          for(int i=0; i<phRuns.length; i++) {
+             phRuns[i].serialize(out);
+          }
+          
+          out.write(extraData);
+       }
+
+       public boolean equals(Object obj) {
+          if(! (obj instanceof ExtRst)) {
+             return false;
+          }
+          ExtRst other = (ExtRst)obj;
+          return (compareTo(other) == 0);
+       }
+       public int compareTo(ExtRst o) {
+          int result;
+          
+          result = reserved - o.reserved;
+          if(result != 0) return result;
+          result = formattingFontIndex - o.formattingFontIndex;
+          if(result != 0) return result;
+          result = formattingOptions - o.formattingOptions;
+          if(result != 0) return result;
+          result = numberOfRuns - o.numberOfRuns;
+          if(result != 0) return result;
+          
+          result = phoneticText.compareTo(o.phoneticText);
+          if(result != 0) return result;
+          
+          result = phRuns.length - o.phRuns.length;
+          if(result != 0) return result;
+          for(int i=0; i<phRuns.length; i++) {
+             result = phRuns[i].phoneticTextFirstCharacterOffset - o.phRuns[i].phoneticTextFirstCharacterOffset;
+             if(result != 0) return result;
+             result = phRuns[i].realTextFirstCharacterOffset - o.phRuns[i].realTextFirstCharacterOffset;
+             if(result != 0) return result;
+             result = phRuns[i].realTextFirstCharacterOffset - o.phRuns[i].realTextLength;
+             if(result != 0) return result;
+          }
+          
+          result = extraData.length - o.extraData.length;
+          if(result != 0) return result;
+          
+          // If we get here, it's the same
+          return 0;
+       }
+       
+       protected ExtRst clone() {
+          ExtRst ext = new ExtRst();
+          ext.reserved = reserved;
+          ext.formattingFontIndex = formattingFontIndex;
+          ext.formattingOptions = formattingOptions;
+          ext.numberOfRuns = numberOfRuns;
+          ext.phoneticText = new String(phoneticText);
+          ext.phRuns = new PhRun[phRuns.length];
+          for(int i=0; i<ext.phRuns.length; i++) {
+             ext.phRuns[i] = new PhRun(
+                   phRuns[i].phoneticTextFirstCharacterOffset,
+                   phRuns[i].realTextFirstCharacterOffset,
+                   phRuns[i].realTextLength
+             );
+          }
+          return ext;
+       }
+       
+       public short getFormattingFontIndex() {
+         return formattingFontIndex;
+       }
+       public short getFormattingOptions() {
+         return formattingOptions;
+       }
+       public int getNumberOfRuns() {
+         return numberOfRuns;
+       }
+       public String getPhoneticText() {
+         return phoneticText;
+       }
+       public PhRun[] getPhRuns() {
+         return phRuns;
+       }
+    }
+    public static class PhRun {
+       private int phoneticTextFirstCharacterOffset;
+       private int realTextFirstCharacterOffset;
+       private int realTextLength;
+       
+       public PhRun(int phoneticTextFirstCharacterOffset,
+            int realTextFirstCharacterOffset, int realTextLength) {
+         this.phoneticTextFirstCharacterOffset = phoneticTextFirstCharacterOffset;
+         this.realTextFirstCharacterOffset = realTextFirstCharacterOffset;
+         this.realTextLength = realTextLength;
+      }
+      private PhRun(LittleEndianInput in) {
+          phoneticTextFirstCharacterOffset = in.readUShort();
+          realTextFirstCharacterOffset = in.readUShort();
+          realTextLength = in.readUShort();
+       }
+       private void serialize(ContinuableRecordOutput out) {
+          out.writeContinueIfRequired(6);
+          out.writeShort(phoneticTextFirstCharacterOffset);
+          out.writeShort(realTextFirstCharacterOffset);
+          out.writeShort(realTextLength);
+       }
+    }
 
     private UnicodeString() {
      //Used for clone method.
@@ -160,22 +379,20 @@ public final class UnicodeString implements Comparable<UnicodeString> {
             return false;
         }
 
-        //Well the format runs are equal as well!, better check the ExtRst data
-        //Which by the way we dont know how to decode!
-        if ((field_5_ext_rst == null) && (other.field_5_ext_rst == null))
-          return true;
-        if (((field_5_ext_rst == null) && (other.field_5_ext_rst != null)) ||
-            ((field_5_ext_rst != null) && (other.field_5_ext_rst == null)))
-          return false;
-        size = field_5_ext_rst.length;
-        if (size != field_5_ext_rst.length)
-          return false;
-
-        //Check individual bytes!
-        for (int i=0;i<size;i++) {
-          if (field_5_ext_rst[i] != other.field_5_ext_rst[i])
-            return false;
+        // Well the format runs are equal as well!, better check the ExtRst data
+        if(field_5_ext_rst == null && other.field_5_ext_rst == null) {
+           // Good
+        } else if(field_5_ext_rst != null && other.field_5_ext_rst != null) {
+           int extCmp = field_5_ext_rst.compareTo(other.field_5_ext_rst);
+           if(extCmp == 0) {
+              // Good
+           } else {
+              return false;
+           }
+        } else {
+           return false;
         }
+
         //Phew!! After all of that we have finally worked out that the strings
         //are identical.
         return true;
@@ -218,10 +435,10 @@ public final class UnicodeString implements Comparable<UnicodeString> {
         }
 
         if (isExtendedText() && (extensionLength > 0)) {
-          field_5_ext_rst = new byte[extensionLength];
-          for (int i=0;i<extensionLength;i++) {
-            field_5_ext_rst[i] = in.readByte();
-            }
+          field_5_ext_rst = new ExtRst(in, extensionLength);
+          if(field_5_ext_rst.getDataSize()+4 != extensionLength) {
+             System.err.println("ExtRst was supposed to be " + extensionLength + " bytes long, but seems to actually be " + (field_5_ext_rst.getDataSize()+4));
+          }
         }
     }
 
@@ -395,10 +612,15 @@ public final class UnicodeString implements Comparable<UnicodeString> {
     }
 
 
-    void setExtendedRst(byte[] ext_rst) {
-      if (ext_rst != null)
-        field_2_optionflags = extBit.setByte(field_2_optionflags);
-      else field_2_optionflags = extBit.clearByte(field_2_optionflags);
+    public ExtRst getExtendedRst() {
+       return this.field_5_ext_rst;
+    }
+    void setExtendedRst(ExtRst ext_rst) {
+      if (ext_rst != null) {
+         field_2_optionflags = extBit.setByte(field_2_optionflags);
+      } else {
+         field_2_optionflags = extBit.clearByte(field_2_optionflags);
+      }
       this.field_5_ext_rst = ext_rst;
     }
 
@@ -452,12 +674,18 @@ public final class UnicodeString implements Comparable<UnicodeString> {
           }
         }
         if (field_5_ext_rst != null) {
-          buffer.append("    .field_5_ext_rst          = ").append("\n").append(HexDump.toHex(field_5_ext_rst)).append("\n");
+          buffer.append("    .field_5_ext_rst          = ").append("\n");
+          buffer.append( field_5_ext_rst.toString() ).append("\n");
         }
         buffer.append("[/UNICODESTRING]\n");
         return buffer.toString();
     }
 
+    /**
+     * Serialises out the String. There are special rules
+     *  about where we can and can't split onto
+     *  Continue records.
+     */
     public void serialize(ContinuableRecordOutput out) {
         int numberOfRichTextRuns = 0;
         int extendedDataSize = 0;
@@ -465,9 +693,11 @@ public final class UnicodeString implements Comparable<UnicodeString> {
             numberOfRichTextRuns = field_4_format_runs.size();
         }
         if (isExtendedText() && field_5_ext_rst != null) {
-            extendedDataSize = field_5_ext_rst.length;
+            extendedDataSize = 4 + field_5_ext_rst.getDataSize();
         }
-
+       
+        // Serialise the bulk of the String
+        // The writeString handles tricky continue stuff for us
         out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize);
 
         if (numberOfRichTextRuns > 0) {
@@ -477,25 +707,13 @@ public final class UnicodeString implements Comparable<UnicodeString> {
               if (out.getAvailableSpace() < 4) {
                   out.writeContinue();
               }
-                FormatRun r = field_4_format_runs.get(i);
-                r.serialize(out);
+              FormatRun r = field_4_format_runs.get(i);
+              r.serialize(out);
           }
         }
 
         if (extendedDataSize > 0) {
-            // OK ExtRst is actually not documented, so i am going to hope
-            // that we can actually continue on byte boundaries
-
-            int extPos = 0;
-            while (true) {
-                int nBytesToWrite = Math.min(extendedDataSize - extPos, out.getAvailableSpace());
-                out.write(field_5_ext_rst, extPos, nBytesToWrite);
-                extPos += nBytesToWrite;
-                if (extPos >= extendedDataSize) {
-                    break;
-                }
-                out.writeContinue();
-            }
+           field_5_ext_rst.serialize(out);
         }
     }
 
@@ -534,7 +752,6 @@ public final class UnicodeString implements Comparable<UnicodeString> {
         }
 
         //Well the format runs are equal as well!, better check the ExtRst data
-        //Which by the way we don't know how to decode!
         if ((field_5_ext_rst == null) && (str.field_5_ext_rst == null))
           return 0;
         if ((field_5_ext_rst == null) && (str.field_5_ext_rst != null))
@@ -542,15 +759,10 @@ public final class UnicodeString implements Comparable<UnicodeString> {
         if ((field_5_ext_rst != null) && (str.field_5_ext_rst == null))
           return -1;
 
-        size = field_5_ext_rst.length;
-        if (size != field_5_ext_rst.length)
-          return size - field_5_ext_rst.length;
+        result = field_5_ext_rst.compareTo(str.field_5_ext_rst); 
+        if (result != 0)
+           return result;
 
-        //Check individual bytes!
-        for (int i=0;i<size;i++) {
-          if (field_5_ext_rst[i] != str.field_5_ext_rst[i])
-            return field_5_ext_rst[i] - str.field_5_ext_rst[i];
-        }
         //Phew!! After all of that we have finally worked out that the strings
         //are identical.
         return 0;
@@ -575,12 +787,10 @@ public final class UnicodeString implements Comparable<UnicodeString> {
           str.field_4_format_runs = new ArrayList<FormatRun>();
           for (FormatRun r : field_4_format_runs) {
             str.field_4_format_runs.add(new FormatRun(r._character, r._fontIndex));
-            }
+          }
         }
         if (field_5_ext_rst != null) {
-          str.field_5_ext_rst = new byte[field_5_ext_rst.length];
-          System.arraycopy(field_5_ext_rst, 0, str.field_5_ext_rst, 0,
-                           field_5_ext_rst.length);
+           str.field_5_ext_rst = field_5_ext_rst.clone();
         }
 
         return str;
index b171a77a134678a02f7ae3ab70537351bd59a6ad..80380ac5e299f290b34efa623a8c61f28ecdb1fd 100644 (file)
@@ -33,9 +33,8 @@ public final class TestSSTRecordSizeCalculator extends TestCase {
        private static final int COMPRESSED_PLAIN_STRING_OVERHEAD = 3;
        private static final int OPTION_FIELD_SIZE = 1;
        
-       private final IntMapper strings = new IntMapper();
+       private final IntMapper<UnicodeString> strings = new IntMapper<UnicodeString>();
 
-       
        private void confirmSize(int expectedSize) {
                ContinuableRecordOutput cro = ContinuableRecordOutput.createForCountingOnly();
                SSTSerializer ss = new SSTSerializer(strings, 0, 0);
index 6ecab71a592a901f572f398bead1c1a8d03f43a5..591042d7eb35b4e8dd9d99abead81b3ca63c4a4f 100644 (file)
 
 package org.apache.poi.hssf.record.common;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
 import junit.framework.TestCase;
 
 import org.apache.poi.hssf.record.ContinueRecord;
 import org.apache.poi.hssf.record.RecordInputStream;
 import org.apache.poi.hssf.record.SSTRecord;
+import org.apache.poi.hssf.record.common.UnicodeString.ExtRst;
+import org.apache.poi.hssf.record.common.UnicodeString.FormatRun;
 import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
+import org.apache.poi.util.LittleEndianInputStream;
+import org.apache.poi.util.LittleEndianOutputStream;
 
 /**
  * Tests that {@link UnicodeString} record size calculates correctly.  The record size
@@ -85,13 +92,23 @@ public final class TestUnicodeString extends TestCase {
         //Test a compressed small string that has rich text and extended text
         s.setString("Test");
         s.setOptionFlags((byte)0xC);
-        s.setExtendedRst(new byte[]{(byte)0x1,(byte)0x2,(byte)0x3,(byte)0x4,(byte)0x5});
-        confirmSize(26, s);
+        confirmSize(17, s);
+        
+        // Extended phonetics data
+        // Minimum size is 14
+        // Also adds 4 bytes to hold the length
+        s.setExtendedRst(
+              new ExtRst()
+        );
+        confirmSize(35, s);
 
         //Test a uncompressed small string that has rich text and extended text
         s.setString(STR_16_BIT);
         s.setOptionFlags((byte)0xD);
-        confirmSize(30, s);
+        confirmSize(39, s);
+        
+        s.setExtendedRst(null);
+        confirmSize(21, s);
     }
 
     public void testPerfectStringSize() {
@@ -144,6 +161,146 @@ public final class TestUnicodeString extends TestCase {
       UnicodeString s = makeUnicodeString(strSize);
       confirmSize(MAX_DATA_SIZE*2, s);
     }
+    
+    public void testFormatRun() throws Exception {
+       FormatRun fr = new FormatRun((short)4, (short)0x15c);
+       assertEquals(4, fr.getCharacterPos());
+       assertEquals(0x15c, fr.getFontIndex());
+       
+       ByteArrayOutputStream baos = new ByteArrayOutputStream();
+       LittleEndianOutputStream out = new LittleEndianOutputStream(baos);
+       
+       fr.serialize(out);
+       
+       byte[] b = baos.toByteArray();
+       assertEquals(4, b.length);
+       assertEquals(4, b[0]);
+       assertEquals(0, b[1]);
+       assertEquals(0x5c, b[2]);
+       assertEquals(0x01, b[3]);
+       
+       LittleEndianInputStream inp = new LittleEndianInputStream(
+             new ByteArrayInputStream(b)
+       );
+       fr = new FormatRun(inp);
+       assertEquals(4, fr.getCharacterPos());
+       assertEquals(0x15c, fr.getFontIndex());
+    }
+    
+    public void testExtRstFromEmpty() throws Exception {
+       ExtRst ext = new ExtRst();
+       
+       assertEquals(0, ext.getNumberOfRuns());
+       assertEquals(0, ext.getFormattingFontIndex());
+       assertEquals(0, ext.getFormattingOptions());
+       assertEquals("", ext.getPhoneticText());
+       assertEquals(0, ext.getPhRuns().length);
+       assertEquals(10, ext.getDataSize()); // Excludes 4 byte header
+       
+       ByteArrayOutputStream baos = new ByteArrayOutputStream();
+       LittleEndianOutputStream out = new LittleEndianOutputStream(baos);
+       ContinuableRecordOutput cout = new ContinuableRecordOutput(out, 0xffff);
+       
+       ext.serialize(cout);
+       cout.writeContinue();
+       
+       byte[] b = baos.toByteArray();
+       assertEquals(20, b.length);
+       
+       // First 4 bytes from the outputstream
+       assertEquals(-1, b[0]);
+       assertEquals(-1, b[1]);
+       assertEquals(14, b[2]);
+       assertEquals(00, b[3]);
+       
+       // Reserved
+       assertEquals(1, b[4]);
+       assertEquals(0, b[5]);
+       // Data size
+       assertEquals(10, b[6]);
+       assertEquals(00, b[7]);
+       // Font*2
+       assertEquals(0, b[8]);
+       assertEquals(0, b[9]);
+       assertEquals(0, b[10]);
+       assertEquals(0, b[11]);
+       // 0 Runs
+       assertEquals(0, b[12]);
+       assertEquals(0, b[13]);
+       // Size=0, *2
+       assertEquals(0, b[14]);
+       assertEquals(0, b[15]);
+       assertEquals(0, b[16]);
+       assertEquals(0, b[17]);
+       
+       // Last 2 bytes from the outputstream
+       assertEquals(ContinueRecord.sid, b[18]);
+       assertEquals(0, b[19]);
+       
+       
+       // Load in again and re-test
+       byte[] data = new byte[14];
+       System.arraycopy(b, 4, data, 0, data.length);
+       LittleEndianInputStream inp = new LittleEndianInputStream(
+             new ByteArrayInputStream(data)
+       );
+       ext = new ExtRst(inp, data.length);
+       
+       assertEquals(0, ext.getNumberOfRuns());
+       assertEquals(0, ext.getFormattingFontIndex());
+       assertEquals(0, ext.getFormattingOptions());
+       assertEquals("", ext.getPhoneticText());
+       assertEquals(0, ext.getPhRuns().length);
+    }
+    
+    public void testExtRstFromData() throws Exception {
+       byte[] data = new byte[] {
+             01, 00, 0x0C, 00, 
+             00, 00, 0x37, 00, 
+             00, 00, 
+             00, 00, 00, 00, 
+             00, 00 // Cruft at the end, as found from real files
+       };
+       assertEquals(16, data.length);
+       
+       LittleEndianInputStream inp = new LittleEndianInputStream(
+             new ByteArrayInputStream(data)
+       );
+       ExtRst ext = new ExtRst(inp, data.length);
+       assertEquals(0x0c, ext.getDataSize()); // Excludes 4 byte header
+       
+       assertEquals(0, ext.getNumberOfRuns());
+       assertEquals(0x37, ext.getFormattingOptions());
+       assertEquals(0, ext.getFormattingFontIndex());
+       assertEquals("", ext.getPhoneticText());
+       assertEquals(0, ext.getPhRuns().length);
+    }
+    
+    public void testCorruptExtRstDetection() throws Exception {
+       byte[] data = new byte[] {
+             0x79, 0x79, 0x11, 0x11, 
+             0x22, 0x22, 0x33, 0x33, 
+       };
+       assertEquals(8, data.length);
+       
+       LittleEndianInputStream inp = new LittleEndianInputStream(
+             new ByteArrayInputStream(data)
+       );
+       ExtRst ext = new ExtRst(inp, data.length);
+       
+       // Will be empty
+       assertEquals(ext, new ExtRst());
+
+       // If written, will be the usual size
+       assertEquals(10, ext.getDataSize()); // Excludes 4 byte header
+     
+       // Is empty
+       assertEquals(0, ext.getNumberOfRuns());
+       assertEquals(0, ext.getFormattingOptions());
+       assertEquals(0, ext.getFormattingFontIndex());
+       assertEquals("", ext.getPhoneticText());
+       assertEquals(0, ext.getPhRuns().length);
+    }
 
 
     private static UnicodeString makeUnicodeString(String s) {
index d96ad74662a3d4532ca20f90341b37b715a3b4c2..337499415b3d2ee44687f2bf716afe3ec1002803 100644 (file)
@@ -36,6 +36,7 @@ import org.apache.poi.hssf.record.CellValueRecordInterface;
 import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
 import org.apache.poi.hssf.record.NameRecord;
 import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate;
+import org.apache.poi.hssf.record.common.UnicodeString;
 import org.apache.poi.hssf.record.formula.DeletedArea3DPtg;
 import org.apache.poi.hssf.record.formula.Ptg;
 import org.apache.poi.ss.usermodel.*;
@@ -1538,12 +1539,37 @@ public final class TestBugs extends BaseTestBugzillaIssues {
     }
     
     /**
-     * Round trip a file with an unusual ExtRst record
+     * Round trip a file with an unusual UnicodeString/ExtRst record parts
      */
-    public void test47847() {
-       HSSFWorkbook wb = openSample("47251.xls");
-       assertEquals(1, wb.getNumberOfSheets());
+    public void test47847() throws Exception {
+       HSSFWorkbook wb = openSample("47847.xls");
+       assertEquals(3, wb.getNumberOfSheets());
+       
+       // Find the SST record
+       UnicodeString withExt = wb.getWorkbook().getSSTString(0);
+       UnicodeString withoutExt = wb.getWorkbook().getSSTString(31);
+       
+       assertEquals("O:Alloc:Qty", withExt.getString());
+       assertTrue((withExt.getOptionFlags() & 0x0004) == 0x0004);
+       
+       assertEquals("RT", withoutExt.getString());
+       assertTrue((withoutExt.getOptionFlags() & 0x0004) == 0x0000);
+       
+       // Something about continues...
+
+       
+       // Write out and re-read
        wb = writeOutAndReadBack(wb);
-       assertEquals(1, wb.getNumberOfSheets());
+       assertEquals(3, wb.getNumberOfSheets());
+       
+       // Check it's the same now
+       withExt = wb.getWorkbook().getSSTString(0);
+       withoutExt = wb.getWorkbook().getSSTString(31);
+       
+       assertEquals("O:Alloc:Qty", withExt.getString());
+       assertTrue((withExt.getOptionFlags() & 0x0004) == 0x0004);
+       
+       assertEquals("RT", withoutExt.getString());
+       assertTrue((withoutExt.getOptionFlags() & 0x0004) == 0x0000);
     }
 }
diff --git a/test-data/spreadsheet/47847.xls b/test-data/spreadsheet/47847.xls
new file mode 100644 (file)
index 0000000..4a7a631
Binary files /dev/null and b/test-data/spreadsheet/47847.xls differ