Fix for bug 46545 - ObjRecord should ignore excessive padding written by previous...

author Josh Micich <josh@apache.org>

Thu, 29 Jan 2009 01:57:22 +0000 (01:57 +0000)

committer Josh Micich <josh@apache.org>

Thu, 29 Jan 2009 01:57:22 +0000 (01:57 +0000)
author Josh Micich <josh@apache.org>
Thu, 29 Jan 2009 01:57:22 +0000 (01:57 +0000)
committer Josh Micich <josh@apache.org>
Thu, 29 Jan 2009 01:57:22 +0000 (01:57 +0000)
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml

index 61605b2776879e871e9136a4ae6e23aa29f367b6..8167226ca4cac6cb3f33e58aa29166fe69cdcefb 100644 (file)
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,7 @@
  
                 <!-- Don't forget to update status.xml too! -->
          <release version="3.5-beta5" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions</action>
             <action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action>
             <action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
             <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml

index 595dfa169016b02cec38301e351eb69723bde7f6..131c087137d926898a8c940f0eb032fc5c831579 100644 (file)
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
         <!-- Don't forget to update changes.xml too! -->
      <changes>
          <release version="3.5-beta5" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions</action>
             <action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action>
             <action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
             <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>
diff --git a/src/java/org/apache/poi/hssf/record/ObjRecord.java b/src/java/org/apache/poi/hssf/record/ObjRecord.java

index 6f4a651cc84912c73d661ce44b6f1af39b0579f3..32231d9ae5b4b4ccd6aff831c6d97e111b70be6f 100644 (file)
--- a/src/java/org/apache/poi/hssf/record/ObjRecord.java
+++ b/src/java/org/apache/poi/hssf/record/ObjRecord.java
@@ -39,9 +39,9 @@ public final class ObjRecord extends Record {
         private static final int NORMAL_PAD_ALIGNMENT = 2;
         private static int MAX_PAD_ALIGNMENT = 4;
         
-       private List subrecords;
+       private List<SubRecord> subrecords;
         /** used when POI has no idea what is going on */
-       private byte[] _uninterpretedData;
+       private final byte[] _uninterpretedData;
         /**
          * Excel seems to tolerate padding to quad or double byte length
          */
@@ -52,13 +52,14 @@ public final class ObjRecord extends Record {
  
  
         public ObjRecord() {
-               subrecords = new ArrayList(2);
+               subrecords = new ArrayList<SubRecord>(2);
                 // TODO - ensure 2 sub-records (ftCmo 15h, and ftEnd 00h) are always created
+               _uninterpretedData = null;
         }
  
         public ObjRecord(RecordInputStream in) {
                 // TODO - problems with OBJ sub-records stream
-               // MS spec says first sub-records is always CommonObjectDataSubRecord,
+               // MS spec says first sub-record is always CommonObjectDataSubRecord,
                 // and last is
                 // always EndSubRecord. OOO spec does not mention ObjRecord(0x005D).
                 // Existing POI test data seems to violate that rule. Some test data
@@ -74,6 +75,7 @@ public final class ObjRecord extends Record {
                         // Excel tolerates the funny ObjRecord, and replaces it with a corrected version
                         // The exact logic/reasoning is not yet understood
                         _uninterpretedData = subRecordData;
+                       subrecords = null;
                         return;
                 }
                 if (subRecordData.length % 2 != 0) {
@@ -83,7 +85,7 @@ public final class ObjRecord extends Record {
  
  //             System.out.println(HexDump.toHex(subRecordData));
  
-               subrecords = new ArrayList();
+               subrecords = new ArrayList<SubRecord>();
                 ByteArrayInputStream bais = new ByteArrayInputStream(subRecordData);
                 LittleEndianInputStream subRecStream = new LittleEndianInputStream(bais);
                 while (true) {
@@ -98,13 +100,40 @@ public final class ObjRecord extends Record {
                         // At present (Oct-2008), most unit test samples have (subRecordData.length % 2 == 0)
                         _isPaddedToQuadByteMultiple = subRecordData.length % MAX_PAD_ALIGNMENT == 0;
                         if (nRemainingBytes >= (_isPaddedToQuadByteMultiple ? MAX_PAD_ALIGNMENT : NORMAL_PAD_ALIGNMENT)) {
-                               String msg = "Leftover " + nRemainingBytes 
-                               + " bytes in subrecord data " + HexDump.toHex(subRecordData);
-                               throw new RecordFormatException(msg);
+                               if (!canPaddingBeDiscarded(subRecordData, nRemainingBytes)) {
+                                       String msg = "Leftover " + nRemainingBytes 
+                                               + " bytes in subrecord data " + HexDump.toHex(subRecordData);
+                                       throw new RecordFormatException(msg);
+                               }
+                               _isPaddedToQuadByteMultiple = false;
                         }
                 } else {
                         _isPaddedToQuadByteMultiple = false;
                 }
+               _uninterpretedData = null;
+       }
+
+       /**
+        * Some XLS files have ObjRecords with nearly 8Kb of excessive padding. These were probably
+        * written by a version of POI (around 3.1) which incorrectly interpreted the second short of
+        * the ftLbs subrecord (0x1FEE) as a length, and read that many bytes as padding (other bugs
+        * helped allow this to occur).
+        * 
+        * Excel reads files with this excessive padding OK, truncating the over-sized ObjRecord back
+        * to the its proper size.  POI does the same.
+        */
+       private static boolean canPaddingBeDiscarded(byte[] data, int nRemainingBytes) {
+               if (data.length < 0x1FEE) {
+                       // this looks like a different problem
+                       return false;
+               }
+               // make sure none of the padding looks important
+               for(int i=data.length-nRemainingBytes; i<data.length; i++) {
+                       if (data[i] != 0x00) {
+                               return false;
+                       }
+               }
+               return true;
         }
  
         public String toString() {
@@ -112,7 +141,7 @@ public final class ObjRecord extends Record {
  
                 sb.append("[OBJ]\n");
                 for (int i = 0; i < subrecords.size(); i++) {
-                       SubRecord record = (SubRecord) subrecords.get(i);
+                       SubRecord record = subrecords.get(i);
                         sb.append("SUBRECORD: ").append(record.toString());
                 }
                 sb.append("[/OBJ]\n");
@@ -125,7 +154,7 @@ public final class ObjRecord extends Record {
                 }
                 int size = 0;
                 for (int i=subrecords.size()-1; i>=0; i--) {
-                       SubRecord record = (SubRecord) subrecords.get(i);
+                       SubRecord record = subrecords.get(i);
                         size += record.getDataSize()+4;
                 }
                 if (_isPaddedToQuadByteMultiple) {
@@ -151,7 +180,7 @@ public final class ObjRecord extends Record {
                 if (_uninterpretedData == null) {
  
                         for (int i = 0; i < subrecords.size(); i++) {
-                               SubRecord record = (SubRecord) subrecords.get(i);
+                               SubRecord record = subrecords.get(i);
                                 record.serialize(out);
                         }
                         int expectedEndIx = offset+dataSize;
@@ -169,7 +198,7 @@ public final class ObjRecord extends Record {
                 return sid;
         }
  
-       public List getSubRecords() {
+       public List<SubRecord> getSubRecords() {
                 return subrecords;
         }
  
@@ -177,11 +206,11 @@ public final class ObjRecord extends Record {
                 subrecords.clear();
         }
  
-       public void addSubRecord(int index, Object element) {
+       public void addSubRecord(int index, SubRecord element) {
                 subrecords.add(index, element);
         }
  
-       public boolean addSubRecord(Object o) {
+       public boolean addSubRecord(SubRecord o) {
                 return subrecords.add(o);
         }
  
@@ -189,8 +218,8 @@ public final class ObjRecord extends Record {
                 ObjRecord rec = new ObjRecord();
  
                 for (int i = 0; i < subrecords.size(); i++) {
-                       SubRecord record = (SubRecord) subrecords.get(i);
-                       rec.addSubRecord(record.clone());
+                       SubRecord record = subrecords.get(i);
+                       rec.addSubRecord((SubRecord) record.clone());
                 }
                 return rec;
         }
diff --git a/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java b/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java

index 9d0edd1c14c82708d6413f8faec5589b4c5d799e..926f2439755c967de08207f5cd63facf8fb6a17f 100644 (file)
--- a/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java
@@ -25,6 +25,7 @@ import junit.framework.AssertionFailedError;
  import junit.framework.TestCase;
  
  import org.apache.poi.util.HexRead;
+import org.apache.poi.util.LittleEndian;
  
  /**
   * Tests Subrecord components of an OBJ record.  Test data taken directly
@@ -85,21 +86,21 @@ public final class TestSubRecord extends TestCase {
         
         public void testReadManualComboWithFormula() {
                 byte[] data = HexRead.readFromString(""
-                       + "5D 00 66 00 "
-                       + "15 00 12 00 14 00 02 00 11 20 00 00 00 00 "
-                       + "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 "
-                       + "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 "
-                       + "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 "
-                       + "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F "
-                       + "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 "
-                       + "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero
-               );
+                       + "5D 00 66 00 "
+                       + "15 00 12 00 14 00 02 00 11 20 00 00 00 00 "
+                       + "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 "
+                       + "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 "
+                       + "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 "
+                       + "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F "
+                       + "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 "
+                       + "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero
+               );
                 
                 RecordInputStream in = TestcaseRecordInputStream.create(data);
                 ObjRecord or = new ObjRecord(in);
                 byte[] data2 = or.serialize();
                 if (data2.length == 8228) {
-                       throw new AssertionFailedError("Identified bug XXXXX");
+                       throw new AssertionFailedError("Identified bug 45778");
                 }
                 assertEquals("Encoded length", data.length, data2.length);
                 for (int i = 0; i < data.length; i++) {
@@ -109,4 +110,52 @@ public final class TestSubRecord extends TestCase {
                 }
                 assertTrue(Arrays.equals(data, data2));
         }
+
+       /**
+        * Some versions of POI (e.g. 3.1 - prior to svn r707450 / bug 45778) interpreted the ftLbs 
+        * subrecord second short (0x1FEE) as a length, and hence read lots of extra padding.  This 
+        * buffer-overrun in {@link RecordInputStream} happened silently due to problems later fixed
+        * in svn 707778. When the ObjRecord is written, the extra padding is written too, making the 
+        * record 8224 bytes long instead of 70.  
+        * (An aside: It seems more than a coincidence that this problem creates a record of exactly
+        * {@link RecordInputStream#MAX_RECORD_DATA_SIZE} but not enough is understood about 
+        * subrecords to explain this.)<br/>
+        * 
+        * Excel reads files with this excessive padding OK.  It also truncates the over-sized
+        * ObjRecord back to the proper size.  POI should do the same.
+        */
+       public void testWayTooMuchPadding_bug46545() {
+               byte[] data = HexRead.readFromString(""
+                       + "15 00 12 00 14 00 13 00 01 21 00 00 00"
+                       + "00 98 0B 5B 09 00 00 00 00 0C 00 14 00 00 00 00 00 00 00 00"
+                       + "00 00 00 01 00 01 00 00 00 10 00 00 00 "
+                       // ftLbs
+                       + "13 00 EE 1F 00 00 "
+                       + "01 00 00 00 01 06 00 00 02 00 08 00 75 00 "
+                       // ftEnd
+                       + "00 00 00 00"
+               );
+               final int LBS_START_POS = 0x002E;
+               final int WRONG_LBS_SIZE = 0x1FEE;
+               assertEquals(0x0013, LittleEndian.getShort(data, LBS_START_POS+0));
+               assertEquals(WRONG_LBS_SIZE, LittleEndian.getShort(data, LBS_START_POS+2));
+               int wrongTotalSize = LBS_START_POS + 4 + WRONG_LBS_SIZE;
+               byte[] wrongData = new byte[wrongTotalSize];
+               System.arraycopy(data, 0, wrongData, 0, data.length);
+               // wrongData has the ObjRecord data as would have been written by v3.1 
+               
+               RecordInputStream in = TestcaseRecordInputStream.create(ObjRecord.sid, wrongData);
+               ObjRecord or;
+               try {
+                       or = new ObjRecord(in);
+               } catch (RecordFormatException e) {
+                       if (e.getMessage().startsWith("Leftover 8154 bytes in subrecord data")) {
+                               throw new AssertionFailedError("Identified bug 46545");
+                       }
+                       throw e;
+               }
+               // make sure POI properly truncates the ObjRecord data
+               byte[] data2 = or.serialize();
+               TestcaseRecordInputStream.confirmRecordEncoding(ObjRecord.sid, data, data2);
+       }
  }
author	Josh Micich <josh@apache.org>
	Thu, 29 Jan 2009 01:57:22 +0000 (01:57 +0000)
committer	Josh Micich <josh@apache.org>
	Thu, 29 Jan 2009 01:57:22 +0000 (01:57 +0000)
src/documentation/content/xdocs/changes.xml		patch \| blob \| history
src/documentation/content/xdocs/status.xml		patch \| blob \| history
src/java/org/apache/poi/hssf/record/ObjRecord.java		patch \| blob \| history
src/testcases/org/apache/poi/hssf/record/TestSubRecord.java		patch \| blob \| history