From: Nick Burch Date: Tue, 25 Jun 2013 23:49:24 +0000 (+0000) Subject: Fix bug #54233 - Some HPSF documents require UnicodeStrings to be 4-byte aligned... X-Git-Tag: REL_3_10_BETA2~72 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=80677e58b386cccfd402d2fa4e97cdf4746204ca;p=poi.git Fix bug #54233 - Some HPSF documents require UnicodeStrings to be 4-byte aligned, spot these from the otherwise invalid length git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1496675 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/hpsf/UnicodeString.java b/src/java/org/apache/poi/hpsf/UnicodeString.java index 38c3ad4945..0d3cdbf292 100644 --- a/src/java/org/apache/poi/hpsf/UnicodeString.java +++ b/src/java/org/apache/poi/hpsf/UnicodeString.java @@ -23,17 +23,36 @@ import org.apache.poi.util.POILogger; import org.apache.poi.util.StringUtil; @Internal -class UnicodeString -{ - - private final static POILogger logger = POILogFactory - .getLogger( UnicodeString.class ); +class UnicodeString { + private final static POILogger logger = + POILogFactory.getLogger( UnicodeString.class ); private byte[] _value; - UnicodeString( byte[] data, int offset ) - { + UnicodeString(byte[] data, int offset) { int length = LittleEndian.getInt( data, offset ); + int dataOffset = offset + LittleEndian.INT_SIZE; + + if (! validLength(length, data, dataOffset)) { + // If the length looks wrong, this might be because the offset is sometimes expected + // to be on a 4 byte boundary. Try checking with that if so, rather than blowing up with + // and ArrayIndexOutOfBoundsException below + boolean valid = false; + int past4byte = offset % 4; + if (past4byte != 0) { + offset = offset + past4byte; + length = LittleEndian.getInt( data, offset ); + dataOffset = offset + LittleEndian.INT_SIZE; + + valid = validLength(length, data, dataOffset); + } + + if (!valid) { + throw new IllegalPropertySetDataException( + "UnicodeString started at offset #" + offset + + " is not NULL-terminated" ); + } + } if ( length == 0 ) { @@ -41,13 +60,30 @@ class UnicodeString return; } - _value = LittleEndian.getByteArray( data, offset - + LittleEndian.INT_SIZE, length * 2 ); + _value = LittleEndian.getByteArray( data, dataOffset, length * 2 ); + } + + /** + * Checks to see if the specified length seems valid, + * given the amount of data available still to read, + * and the requirement that the string be NULL-terminated + */ + boolean validLength(int length, byte[] data, int offset) { + if (length == 0) { + return true; + } + + int endOffset = offset + (length * 2); + if (endOffset <= data.length) { + // Data Length is OK, ensure it's null terminated too + if (data[endOffset-1] == 0 && data[endOffset-2] == 0) { + // Length looks plausible + return true; + } + } - if ( _value[length * 2 - 1] != 0 || _value[length * 2 - 2] != 0 ) - throw new IllegalPropertySetDataException( - "UnicodeString started at offset #" + offset - + " is not NULL-terminated" ); + // Something's up/invalid with that length for the given data+offset + return false; } int getSize() diff --git a/src/testcases/org/apache/poi/TestPOIDocumentMain.java b/src/testcases/org/apache/poi/TestPOIDocumentMain.java index ef4d891745..67552588f1 100644 --- a/src/testcases/org/apache/poi/TestPOIDocumentMain.java +++ b/src/testcases/org/apache/poi/TestPOIDocumentMain.java @@ -46,7 +46,6 @@ public final class TestPOIDocumentMain extends TestCase { * Set things up, two spreadsheets for our testing */ public void setUp() { - doc = HSSFTestDataSamples.openSampleWorkbook("DateFormats.xls"); doc2 = HSSFTestDataSamples.openSampleWorkbook("StringFormulas.xls"); } diff --git a/src/testcases/org/apache/poi/hpsf/basic/TestHPSFBugs.java b/src/testcases/org/apache/poi/hpsf/basic/TestHPSFBugs.java index 5652f45e14..c27b369c80 100644 --- a/src/testcases/org/apache/poi/hpsf/basic/TestHPSFBugs.java +++ b/src/testcases/org/apache/poi/hpsf/basic/TestHPSFBugs.java @@ -23,12 +23,20 @@ import java.util.Date; import junit.framework.TestCase; +import org.apache.poi.POIDataSamples; +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.PropertySetFactory; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * Tests various bugs have been fixed */ public final class TestHPSFBugs extends TestCase { + private static final POIDataSamples _samples = POIDataSamples.getHPSFInstance(); + /** * Ensure that we can create a new HSSF Workbook, * then add some properties to it, save + @@ -91,4 +99,31 @@ public final class TestHPSFBugs extends TestCase { assertEquals(12345, wb.getSummaryInformation().getCreateDateTime().getTime()); assertEquals("Apache", wb.getDocumentSummaryInformation().getCompany()); } + + /** + * Some files seem to want the length and data to be on a 4-byte boundary, + * and without that you'll hit an ArrayIndexOutOfBoundsException after + * reading junk + */ + public void test54233() throws Exception { + DocumentInputStream dis; + POIFSFileSystem fs = + new POIFSFileSystem(_samples.openResourceAsStream("TestNon4ByteBoundary.doc")); + + dis = fs.createDocumentInputStream(SummaryInformation.DEFAULT_STREAM_NAME); + SummaryInformation si = (SummaryInformation)PropertySetFactory.create(dis); + + dis = fs.createDocumentInputStream(DocumentSummaryInformation.DEFAULT_STREAM_NAME); + DocumentSummaryInformation dsi = (DocumentSummaryInformation)PropertySetFactory.create(dis); + + // Test + assertEquals("Microsoft Word 10.0", si.getApplicationName()); + assertEquals("", si.getTitle()); + assertEquals("", si.getAuthor()); + assertEquals("Cour de Justice", dsi.getCompany()); + + + // Write out and read back, should still be valid + // TODO + } }