From b4265b89039ac2ddd7be69fd2955834cf72f4a33 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Fri, 24 Oct 2008 00:58:49 +0000 Subject: [PATCH] Fix for unicode string bug in StyleRecord. Improvements to WriteAccessRecord. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@707519 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hssf/model/Workbook.java | 29 +- .../apache/poi/hssf/record/StyleRecord.java | 485 ++++++------------ .../poi/hssf/record/WriteAccessRecord.java | 216 ++++---- .../poi/hssf/usermodel/HSSFCellStyle.java | 4 +- src/java/org/apache/poi/util/StringUtil.java | 97 ++-- .../poi/hssf/record/AllRecordTests.java | 1 + .../poi/hssf/record/TestStyleRecord.java | 33 ++ .../org/apache/poi/util/TestStringUtil.java | 91 ---- 8 files changed, 375 insertions(+), 581 deletions(-) create mode 100644 src/testcases/org/apache/poi/hssf/record/TestStyleRecord.java diff --git a/src/java/org/apache/poi/hssf/model/Workbook.java b/src/java/org/apache/poi/hssf/model/Workbook.java index 54639adb18..222f0a6ab6 100644 --- a/src/java/org/apache/poi/hssf/model/Workbook.java +++ b/src/java/org/apache/poi/hssf/model/Workbook.java @@ -788,7 +788,7 @@ public final class Workbook implements Model { if(r instanceof ExtendedFormatRecord) { } else if(r instanceof StyleRecord) { StyleRecord sr = (StyleRecord)r; - if(sr.getIndex() == xfIndex) { + if(sr.getXFIndex() == xfIndex) { return sr; } } else { @@ -806,7 +806,7 @@ public final class Workbook implements Model { // Style records always follow after // the ExtendedFormat records StyleRecord newSR = new StyleRecord(); - newSR.setIndex((short)xfIndex); + newSR.setXFIndex(xfIndex); // Find the spot int addAt = -1; @@ -1782,45 +1782,44 @@ public final class Workbook implements Model { * @see org.apache.poi.hssf.record.StyleRecord * @see org.apache.poi.hssf.record.Record */ - protected Record createStyle(int id) { // we'll need multiple editions StyleRecord retval = new StyleRecord(); switch (id) { case 0 : - retval.setIndex(( short ) 0xffff8010); - retval.setBuiltin(( byte ) 3); + retval.setXFIndex(0x010); + retval.setBuiltinStyle(3); retval.setOutlineStyleLevel(( byte ) 0xffffffff); break; case 1 : - retval.setIndex(( short ) 0xffff8011); - retval.setBuiltin(( byte ) 6); + retval.setXFIndex(0x011); + retval.setBuiltinStyle(6); retval.setOutlineStyleLevel(( byte ) 0xffffffff); break; case 2 : - retval.setIndex(( short ) 0xffff8012); - retval.setBuiltin(( byte ) 4); + retval.setXFIndex(0x012); + retval.setBuiltinStyle(4); retval.setOutlineStyleLevel(( byte ) 0xffffffff); break; case 3 : - retval.setIndex(( short ) 0xffff8013); - retval.setBuiltin(( byte ) 7); + retval.setXFIndex(0x013); + retval.setBuiltinStyle(7); retval.setOutlineStyleLevel(( byte ) 0xffffffff); break; case 4 : - retval.setIndex(( short ) 0xffff8000); - retval.setBuiltin(( byte ) 0); + retval.setXFIndex(0x000); + retval.setBuiltinStyle(0); retval.setOutlineStyleLevel(( byte ) 0xffffffff); break; case 5 : - retval.setIndex(( short ) 0xffff8014); - retval.setBuiltin(( byte ) 5); + retval.setXFIndex(0x014); + retval.setBuiltinStyle(5); retval.setOutlineStyleLevel(( byte ) 0xffffffff); break; } diff --git a/src/java/org/apache/poi/hssf/record/StyleRecord.java b/src/java/org/apache/poi/hssf/record/StyleRecord.java index 5b746e8d4b..ad740e9b2e 100644 --- a/src/java/org/apache/poi/hssf/record/StyleRecord.java +++ b/src/java/org/apache/poi/hssf/record/StyleRecord.java @@ -19,332 +19,181 @@ package org.apache.poi.hssf.record; import org.apache.poi.util.BitField; import org.apache.poi.util.BitFieldFactory; +import org.apache.poi.util.HexDump; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.StringUtil; /** - * Title: Style Record

+ * Title: Style Record (0x0293)

* Description: Describes a builtin to the gui or user defined style

* REFERENCE: PG 390 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

* @author Andrew C. Oliver (acoliver at apache dot org) * @author aviks : string fixes for UserDefined Style - * @version 2.0-pre */ public final class StyleRecord extends Record { - public final static short sid = 0x0293; - - private static final BitField fHighByte = BitFieldFactory.getInstance(0x01); - - public final static short STYLE_USER_DEFINED = 0; - public final static short STYLE_BUILT_IN = 1; - - // shared by both user defined and builtin styles - private short field_1_xf_index; // TODO: bitfield candidate - - // only for built in styles - private byte field_2_builtin_style; - private byte field_3_outline_style_level; - - // only for user defined styles - private short field_2_name_length; //OO doc says 16 bit length, so we believe - private byte field_3_string_options; - private String field_4_name; - - public StyleRecord() - { - } - - public StyleRecord(RecordInputStream in) - { - field_1_xf_index = in.readShort(); - if (getType() == STYLE_BUILT_IN) - { - field_2_builtin_style = in.readByte(); - field_3_outline_style_level = in.readByte(); - } - else if (getType() == STYLE_USER_DEFINED) - { - field_2_name_length = in.readShort(); - - // Some files from Crystal Reports lack - // the remaining fields, which is naughty - if(in.remaining() > 0) { - field_3_string_options = in.readByte(); - - byte[] string = in.readRemainder(); - if (fHighByte.isSet(field_3_string_options)) { - field_4_name= StringUtil.getFromUnicodeBE(string, 0, field_2_name_length); - } else { - field_4_name=StringUtil.getFromCompressedUnicode(string, 0, field_2_name_length); - } - } - } - - // todo sanity check exception to make sure we're one or the other - } - - /** - * set the entire index field (including the type) (see bit setters that reference this method) - * @param index bitmask - */ - - public void setIndex(short index) - { - field_1_xf_index = index; - } - - // bitfields for field 1 - - /** - * set the type of the style (builtin or user-defined) - * @see #STYLE_USER_DEFINED - * @see #STYLE_BUILT_IN - * @param type of style (userdefined/builtin) - * @see #setIndex(short) - */ - - public void setType(short type) - { - field_1_xf_index = setField(field_1_xf_index, type, 0x8000, 15); - } - - /** - * set the actual index of the style extended format record - * @see #setIndex(short) - * @param index of the xf record - */ - - public void setXFIndex(short index) - { - field_1_xf_index = setField(field_1_xf_index, index, 0x1FFF, 0); - } - - // end bitfields - // only for user defined records - - /** - * if this is a user defined record set the length of the style name - * @param length of the style's name - * @see #setName(String) - */ - - public void setNameLength(byte length) - { - field_2_name_length = length; - } - - /** - * set the style's name - * @param name of the style - * @see #setNameLength(byte) - */ - - public void setName(String name) - { - field_4_name = name; - - // Fix up the length - field_2_name_length = (short)name.length(); - //TODO set name string options - } - - // end user defined - // only for buildin records - - /** - * if this is a builtin style set teh number of the built in style - * @param builtin style number (0-7) - * - */ - - public void setBuiltin(byte builtin) - { - field_2_builtin_style = builtin; - } - - /** - * set the row or column level of the style (if builtin 1||2) - */ - - public void setOutlineStyleLevel(byte level) - { - field_3_outline_style_level = level; - } - - // end builtin records - // field 1 - - /** - * get the entire index field (including the type) (see bit getters that reference this method) - * @return bitmask - */ - - public short getIndex() - { - return field_1_xf_index; - } - - // bitfields for field 1 - - /** - * get the type of the style (builtin or user-defined) - * @see #STYLE_USER_DEFINED - * @see #STYLE_BUILT_IN - * @return type of style (userdefined/builtin) - * @see #getIndex() - */ - - public short getType() - { - return ( short ) ((field_1_xf_index & 0x8000) >> 15); - } - - /** - * get the actual index of the style extended format record - * @see #getIndex() - * @return index of the xf record - */ - - public short getXFIndex() - { - return ( short ) (field_1_xf_index & 0x1FFF); - } - - // end bitfields - // only for user defined records - - /** - * if this is a user defined record get the length of the style name - * @return length of the style's name - * @see #getName() - */ - - public short getNameLength() - { - return field_2_name_length; - } - - /** - * get the style's name - * @return name of the style - * @see #getNameLength() - */ - - public String getName() - { - return field_4_name; - } - - // end user defined - // only for buildin records - - /** - * if this is a builtin style get the number of the built in style - * @return builtin style number (0-7) - * - */ - - public byte getBuiltin() - { - return field_2_builtin_style; - } - - /** - * get the row or column level of the style (if builtin 1||2) - */ - - public byte getOutlineStyleLevel() - { - return field_3_outline_style_level; - } - - // end builtin records - public String toString() - { - StringBuffer buffer = new StringBuffer(); - - buffer.append("[STYLE]\n"); - buffer.append(" .xf_index_raw = ") - .append(Integer.toHexString(getIndex())).append("\n"); - buffer.append(" .type = ") - .append(Integer.toHexString(getType())).append("\n"); - buffer.append(" .xf_index = ") - .append(Integer.toHexString(getXFIndex())).append("\n"); - if (getType() == STYLE_BUILT_IN) - { - buffer.append(" .builtin_style = ") - .append(Integer.toHexString(getBuiltin())).append("\n"); - buffer.append(" .outline_level = ") - .append(Integer.toHexString(getOutlineStyleLevel())) - .append("\n"); - } - else if (getType() == STYLE_USER_DEFINED) - { - buffer.append(" .name_length = ") - .append(Integer.toHexString(getNameLength())).append("\n"); - buffer.append(" .name = ").append(getName()) - .append("\n"); - } - buffer.append("[/STYLE]\n"); - return buffer.toString(); - } - - private short setField(int fieldValue, int new_value, int mask, - int shiftLeft) - { - return ( short ) ((fieldValue & ~mask) - | ((new_value << shiftLeft) & mask)); - } - - public int serialize(int offset, byte [] data) - { - LittleEndian.putShort(data, 0 + offset, sid); - if (getType() == STYLE_BUILT_IN) - { - LittleEndian.putShort(data, 2 + offset, - (( short ) 0x04)); // 4 bytes (8 total) - } - else - { - LittleEndian.putShort(data, 2 + offset, - (( short ) (getRecordSize()-4))); - } - LittleEndian.putShort(data, 4 + offset, getIndex()); - if (getType() == STYLE_BUILT_IN) - { - data[ 6 + offset ] = getBuiltin(); - data[ 7 + offset ] = getOutlineStyleLevel(); - } - else - { - LittleEndian.putShort(data, 6 + offset , getNameLength()); - data[8+offset]=this.field_3_string_options; - StringUtil.putCompressedUnicode(getName(), data, 9 + offset); - } - return getRecordSize(); - } - - public int getRecordSize() - { - int retval; - - if (getType() == STYLE_BUILT_IN) - { - retval = 8; - } - else - { - if (fHighByte.isSet(field_3_string_options)) { - retval= 9+2*getNameLength(); - }else { - retval = 9 + getNameLength(); - } - } - return retval; - } - - public short getSid() - { - return sid; - } + public final static short sid = 0x0293; + + private static final BitField is16BitUnicodeFlag = BitFieldFactory.getInstance(0x01); + + private static final BitField styleIndexMask = BitFieldFactory.getInstance(0x0FFF); + private static final BitField isBuiltinFlag = BitFieldFactory.getInstance(0x8000); + + /** shared by both user defined and built-in styles */ + private int field_1_xf_index; + + // only for built in styles + private int field_2_builtin_style; + private int field_3_outline_style_level; + + // only for user defined styles + private int field_3_string_options; + private String field_4_name; + + /** + * creates a new style record, initially set to 'built-in' + */ + public StyleRecord() { + field_1_xf_index = isBuiltinFlag.set(field_1_xf_index); + } + + public StyleRecord(RecordInputStream in) { + field_1_xf_index = in.readShort(); + if (isBuiltin()) { + field_2_builtin_style = in.readByte(); + field_3_outline_style_level = in.readByte(); + } else { + int field_2_name_length = in.readShort(); + + if(in.remaining() < 1) { + // Some files from Crystal Reports lack the is16BitUnicode byte + // the remaining fields, which is naughty + if (field_2_name_length != 0) { + throw new RecordFormatException("Ran out of data reading style record"); + } + // guess this is OK if the string length is zero + field_4_name = ""; + } else { + + int is16BitUnicode = in.readByte(); + if (is16BitUnicodeFlag.isSet(is16BitUnicode)) { + field_4_name = StringUtil.readUnicodeLE(in, field_2_name_length); + } else { + field_4_name = StringUtil.readCompressedUnicode(in, field_2_name_length); + } + } + } + } + + /** + * set the actual index of the style extended format record + * @param xfIndex of the xf record + */ + public void setXFIndex(int xfIndex) { + field_1_xf_index = styleIndexMask.setValue(field_1_xf_index, xfIndex); + } + + /** + * get the actual index of the style extended format record + * @see #getIndex() + * @return index of the xf record + */ + public int getXFIndex() { + return styleIndexMask.getValue(field_1_xf_index); + } + + /** + * set the style's name + * @param name of the style + */ + public void setName(String name) { + field_4_name = name; + field_3_string_options = StringUtil.hasMultibyte(name) ? 0x01 : 0x00; + field_1_xf_index = isBuiltinFlag.clear(field_1_xf_index); + } + + /** + * if this is a builtin style set the number of the built in style + * @param builtinStyleId style number (0-7) + * + */ + public void setBuiltinStyle(int builtinStyleId) { + field_1_xf_index = isBuiltinFlag.set(field_1_xf_index); + field_2_builtin_style = builtinStyleId; + } + + /** + * set the row or column level of the style (if builtin 1||2) + */ + public void setOutlineStyleLevel(int level) { + field_3_outline_style_level = level & 0x00FF; + } + + public boolean isBuiltin(){ + return isBuiltinFlag.isSet(field_1_xf_index); + } + + /** + * get the style's name + * @return name of the style + */ + public String getName() { + return field_4_name; + } + + public String toString() { + StringBuffer sb = new StringBuffer(); + + sb.append("[STYLE]\n"); + sb.append(" .xf_index_raw =").append(HexDump.shortToHex(field_1_xf_index)).append("\n"); + sb.append(" .type =").append(isBuiltin() ? "built-in" : "user-defined").append("\n"); + sb.append(" .xf_index =").append(HexDump.shortToHex(getXFIndex())).append("\n"); + if (isBuiltin()){ + sb.append(" .builtin_style=").append(HexDump.byteToHex(field_2_builtin_style)).append("\n"); + sb.append(" .outline_level=").append(HexDump.byteToHex(field_3_outline_style_level)).append("\n"); + } else { + sb.append(" .name =").append(getName()).append("\n"); + } + sb.append("[/STYLE]\n"); + return sb.toString(); + } + + + private int getDataSize() { + if (isBuiltin()) { + return 4; // short, byte, byte + } + int size = 2 + 3; // short + if (is16BitUnicodeFlag.isSet(field_3_string_options)) { + size += 2 * field_4_name.length(); + } else { + size += field_4_name.length(); + } + return size; + } + + public int serialize(int offset, byte [] data) { + int dataSize = getDataSize(); + LittleEndian.putShort(data, 0 + offset, sid); + LittleEndian.putUShort(data, 2 + offset, dataSize); + + LittleEndian.putUShort(data, 4 + offset, field_1_xf_index); + if (isBuiltin()) { + LittleEndian.putByte(data, 6 + offset, field_2_builtin_style); + LittleEndian.putByte(data, 7 + offset, field_3_outline_style_level); + } else { + LittleEndian.putUShort(data, 6 + offset, field_4_name.length()); + LittleEndian.putByte(data, 8 + offset, field_3_string_options); + StringUtil.putCompressedUnicode(getName(), data, 9 + offset); + } + return 4+dataSize; + } + + public int getRecordSize() { + return 4 + getDataSize(); + } + + public short getSid() + { + return sid; + } } diff --git a/src/java/org/apache/poi/hssf/record/WriteAccessRecord.java b/src/java/org/apache/poi/hssf/record/WriteAccessRecord.java index d73687f186..6b005c7f44 100644 --- a/src/java/org/apache/poi/hssf/record/WriteAccessRecord.java +++ b/src/java/org/apache/poi/hssf/record/WriteAccessRecord.java @@ -1,4 +1,3 @@ - /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -15,111 +14,130 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - package org.apache.poi.hssf.record; +import java.util.Arrays; + import org.apache.poi.util.LittleEndian; import org.apache.poi.util.StringUtil; /** - * Title: Write Access Record

- * Description: Stores the username of that who owns the spreadsheet generator - * (on unix the user's login, on Windoze its the name you typed when - * you installed the thing)

- * REFERENCE: PG 424 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

+ * Title: Write Access Record (0x005C)

+ * + * Description: Stores the username of that who owns the spreadsheet generator (on unix the user's + * login, on Windoze its the name you typed when you installed the thing) + *

+ * REFERENCE: PG 424 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2) + *

+ * * @author Andrew C. Oliver (acoliver at apache dot org) - * @version 2.0-pre */ - -public class WriteAccessRecord - extends Record -{ - public final static short sid = 0x5c; - private String field_1_username; - - public WriteAccessRecord() - { - } - - public WriteAccessRecord(RecordInputStream in) - { - byte[] data = in.readRemainder(); - //The string is always 112 characters (padded with spaces), therefore - //this record can not be continued. - - //What a wierd record, it is not really a unicode string because the - //header doesnt provide a correct size indication.??? - //But the header is present, so we need to skip over it. - //Odd, Odd, Odd ;-) - field_1_username = StringUtil.getFromCompressedUnicode(data, 3, data.length - 3); - } - - /** - * set the username for the user that created the report. HSSF uses the logged in user. - * @param username of the user who is logged in (probably "tomcat" or "apache") - */ - - public void setUsername(String username) - { - field_1_username = username; - } - - /** - * get the username for the user that created the report. HSSF uses the logged in user. On - * natively created M$ Excel sheet this would be the name you typed in when you installed it - * in most cases. - * @return username of the user who is logged in (probably "tomcat" or "apache") - */ - - public String getUsername() - { - return field_1_username; - } - - public String toString() - { - StringBuffer buffer = new StringBuffer(); - - buffer.append("[WRITEACCESS]\n"); - buffer.append(" .name = ") - .append(field_1_username.toString()).append("\n"); - buffer.append("[/WRITEACCESS]\n"); - return buffer.toString(); - } - - public int serialize(int offset, byte [] data) - { - String username = getUsername(); - StringBuffer temp = new StringBuffer(0x70 - (0x3)); - - temp.append(username); - while (temp.length() < 0x70 - 0x3) - { - temp.append( - " "); // (70 = fixed lenght -3 = the overhead bits of unicode string) - } - username = temp.toString(); - UnicodeString str = new UnicodeString(username); - str.setOptionFlags(( byte ) 0x0); - - LittleEndian.putShort(data, 0 + offset, sid); - LittleEndian.putShort(data, 2 + offset, (short)112); // 112 bytes (115 total) - UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); - stats.recordSize += 4; - stats.remainingSize-= 4; - str.serialize(stats, 4 + offset, data); - - return getRecordSize(); - } - - public int getRecordSize() - { - return 116; - } - - public short getSid() - { - return sid; - } +public final class WriteAccessRecord extends Record { + private static final byte PAD_CHAR = (byte) ' '; + public final static short sid = 0x005C; + private static final int DATA_SIZE = 112; + private String field_1_username; + /** this record is always padded to a constant length */ + private byte[] padding; + + public WriteAccessRecord() { + setUsername(""); + padding = new byte[DATA_SIZE - 3]; + } + + public WriteAccessRecord(RecordInputStream in) { + if (in.remaining() > DATA_SIZE) { + throw new RecordFormatException("Expected data size (" + DATA_SIZE + ") but got (" + + in.remaining() + ")"); + } + // The string is always 112 characters (padded with spaces), therefore + // this record can not be continued. + + int nChars = in.readUShort(); + int is16BitFlag = in.readUByte(); + int expectedPadSize = DATA_SIZE - 3; + if ((is16BitFlag & 0x01) == 0x00) { + field_1_username = StringUtil.readCompressedUnicode(in, nChars); + expectedPadSize -= nChars; + } else { + field_1_username = StringUtil.readUnicodeLE(in, nChars); + expectedPadSize -= nChars * 2; + } + padding = new byte[expectedPadSize]; + int padSize = in.remaining(); + in.readFully(padding, 0, padSize); + if (padSize < expectedPadSize) { + // this occurs in a couple of test examples: "42564.xls", + // "bug_42794.xls" + Arrays.fill(padding, padSize, expectedPadSize, PAD_CHAR); + } + } + + /** + * set the username for the user that created the report. HSSF uses the + * logged in user. + * + * @param username of the user who is logged in (probably "tomcat" or "apache") + */ + public void setUsername(String username) { + boolean is16bit = StringUtil.hasMultibyte(username); + int encodedByteCount = 3 + username.length() * (is16bit ? 2 : 1); + int paddingSize = DATA_SIZE - encodedByteCount; + if (paddingSize < 0) { + throw new IllegalArgumentException("Name is too long: " + username); + } + padding = new byte[paddingSize]; + Arrays.fill(padding, PAD_CHAR); + + field_1_username = username; + } + + /** + * get the username for the user that created the report. HSSF uses the + * logged in user. On natively created M$ Excel sheet this would be the name + * you typed in when you installed it in most cases. + * + * @return username of the user who is logged in (probably "tomcat" or "apache") + */ + public String getUsername() { + return field_1_username; + } + + public String toString() { + StringBuffer buffer = new StringBuffer(); + + buffer.append("[WRITEACCESS]\n"); + buffer.append(" .name = ").append(field_1_username.toString()).append("\n"); + buffer.append("[/WRITEACCESS]\n"); + return buffer.toString(); + } + + public int serialize(int offset, byte[] data) { + String username = getUsername(); + boolean is16bit = StringUtil.hasMultibyte(username); + + LittleEndian.putUShort(data, 0 + offset, sid); + LittleEndian.putUShort(data, 2 + offset, DATA_SIZE); + LittleEndian.putUShort(data, 4 + offset, username.length()); + LittleEndian.putByte(data, 6 + offset, is16bit ? 0x01 : 0x00); + int pos = offset + 7; + if (is16bit) { + StringUtil.putUnicodeLE(username, data, pos); + pos += username.length() * 2; + } else { + StringUtil.putCompressedUnicode(username, data, pos); + pos += username.length(); + } + System.arraycopy(padding, 0, data, pos, padding.length); + return 4 + DATA_SIZE; + } + + public int getRecordSize() { + return 4 + DATA_SIZE; + } + + public short getSid() { + return sid; + } } diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFCellStyle.java b/src/java/org/apache/poi/hssf/usermodel/HSSFCellStyle.java index d71619aaa8..41046b4b79 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFCellStyle.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFCellStyle.java @@ -962,7 +962,7 @@ public class HSSFCellStyle if(sr == null) { return null; } - if(sr.getType() == StyleRecord.STYLE_BUILT_IN) { + if(sr.isBuiltin()) { return null; } return sr.getName(); @@ -977,7 +977,7 @@ public class HSSFCellStyle if(sr == null) { sr = workbook.createStyleRecord(index); } - if(sr.getType() == StyleRecord.STYLE_BUILT_IN) { + if(sr.isBuiltin()) { throw new IllegalArgumentException("Unable to set user specified style names for built in styles!"); } sr.setName(styleName); diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index bb98de3d05..e78e9b40e9 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -20,9 +20,14 @@ package org.apache.poi.util; import java.io.UnsupportedEncodingException; import java.text.FieldPosition; import java.text.NumberFormat; + +import org.apache.poi.hssf.record.RecordInputStream; /** - * Title: String Utility Description: Collection of string handling utilities - * + * Title: String Utility Description: Collection of string handling utilities

+ * + * Note - none of the methods in this class deals with {@link ContinueRecord}s. For such + * functionality, consider using {@link RecordInputStream +} * * *@author Andrew C. Oliver *@author Sergei Kozello (sergeikozello at mail.ru) @@ -84,64 +89,11 @@ public class StringUtil { * @param string the byte array to be converted * @return the converted string */ - public static String getFromUnicodeLE(final byte[] string) { + public static String getFromUnicodeLE(byte[] string) { if(string.length == 0) { return ""; } return getFromUnicodeLE(string, 0, string.length / 2); } - /** - * Given a byte array of 16-bit unicode characters in big endian - * format (most important byte first), return a Java String representation - * of it. - * - * { 0x00, 0x16 } -0x16 - * - * @param string the byte array to be converted - * @param offset the initial offset into the - * byte array. it is assumed that string[ offset ] and string[ offset + - * 1 ] contain the first 16-bit unicode character - * @param len the length of the final string - * @return the converted string - * @exception ArrayIndexOutOfBoundsException if offset is out of bounds for - * the byte array (i.e., is negative or is greater than or equal to - * string.length) - * @exception IllegalArgumentException if len is too large (i.e., - * there is not enough data in string to create a String of that - * length) - */ - public static String getFromUnicodeBE( - final byte[] string, - final int offset, - final int len) - throws ArrayIndexOutOfBoundsException, IllegalArgumentException { - if ((offset < 0) || (offset >= string.length)) { - throw new ArrayIndexOutOfBoundsException("Illegal offset"); - } - if ((len < 0) || (((string.length - offset) / 2) < len)) { - throw new IllegalArgumentException("Illegal length"); - } - try { - return new String(string, offset, len * 2, "UTF-16BE"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - - /** - * Given a byte array of 16-bit unicode characters in big endian - * format (most important byte first), return a Java String representation - * of it. - * - * { 0x00, 0x16 } -0x16 - * - * @param string the byte array to be converted - * @return the converted string - */ - public static String getFromUnicodeBE(final byte[] string) { - if(string.length == 0) { return ""; } - return getFromUnicodeBE(string, 0, string.length / 2); - } - /** * Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java * String and return. @@ -163,6 +115,31 @@ public class StringUtil { throw new RuntimeException(e); } } + public static String readCompressedUnicode(LittleEndianInput in, int nChars) { + char[] buf = new char[nChars]; + for (int i = 0; i < buf.length; i++) { + buf[i] = (char) in.readUByte(); + } + return new String(buf); + } + /** + * InputStream in is expected to contain: + *

    + *
  1. ushort nChars
  2. + *
  3. byte is16BitFlag
  4. + *
  5. byte[]/char[] characterData
  6. + *
+ * For this encoding, the is16BitFlag is always present even if nChars==0. + */ + public static String readUnicodeString(LittleEndianInput in) { + + int nChars = in.readUShort(); + byte flag = in.readByte(); + if ((flag & 0x01) == 0) { + return readCompressedUnicode(in, nChars); + } + return readUnicodeLE(in, nChars); + } /** * Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1 @@ -220,6 +197,14 @@ public class StringUtil { } out.write(bytes); } + + public static String readUnicodeLE(LittleEndianInput in, int nChars) { + char[] buf = new char[nChars]; + for (int i = 0; i < buf.length; i++) { + buf[i] = (char) in.readUShort(); + } + return new String(buf); + } /** * Apply printf() like formatting to a string. diff --git a/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java b/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java index 5fb6f4aa03..edf62a9ad1 100755 --- a/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java +++ b/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java @@ -100,6 +100,7 @@ public final class AllRecordTests { result.addTestSuite(TestSheetPropertiesRecord.class); result.addTestSuite(TestSharedFormulaRecord.class); result.addTestSuite(TestStringRecord.class); + result.addTestSuite(TestStyleRecord.class); result.addTestSuite(TestSubRecord.class); result.addTestSuite(TestSupBookRecord.class); result.addTestSuite(TestTableRecord.class); diff --git a/src/testcases/org/apache/poi/hssf/record/TestStyleRecord.java b/src/testcases/org/apache/poi/hssf/record/TestStyleRecord.java new file mode 100644 index 0000000000..bfe11dd3a8 --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/record/TestStyleRecord.java @@ -0,0 +1,33 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record; + +import junit.framework.TestCase; +/** + * + */ +public final class TestStyleRecord extends TestCase { + public void testUnicodeReadName() { + byte[] data = { + 17, 0, 9, 0, 1, 56, 94, -60, -119, 95, 0, 83, 0, 104, 0, 101, 0, 101, 0, 116, 0, 49, 0, 92, 40, //92, 36 + }; + RecordInputStream in = TestcaseRecordInputStream.create(StyleRecord.sid, data); + StyleRecord sr = new StyleRecord(in); + assertEquals("\u5E38\u89C4_Sheet1", sr.getName()); // "_Sheet1" + } +} diff --git a/src/testcases/org/apache/poi/util/TestStringUtil.java b/src/testcases/org/apache/poi/util/TestStringUtil.java index b22439cd09..f1b5cfd954 100644 --- a/src/testcases/org/apache/poi/util/TestStringUtil.java +++ b/src/testcases/org/apache/poi/util/TestStringUtil.java @@ -42,43 +42,7 @@ public class TestStringUtil super( name ); } - /** - * test simple form of getFromUnicode - */ - public void testSimpleGetFromUnicode() - { - byte[] test_data = new byte[32]; - int index = 0; - - for ( int k = 0; k < 16; k++ ) - { - test_data[index++] = (byte) 0; - test_data[index++] = (byte) ( 'a' + k ); - } - - assertEquals( "abcdefghijklmnop", - StringUtil.getFromUnicodeBE( test_data ) ); - } - - /** - * test simple form of getFromUnicode with symbols with code below and more 127 - */ - public void testGetFromUnicodeSymbolsWithCodesMoreThan127() - { - byte[] test_data = new byte[]{0x04, 0x22, - 0x04, 0x35, - 0x04, 0x41, - 0x04, 0x42, - 0x00, 0x20, - 0x00, 0x74, - 0x00, 0x65, - 0x00, 0x73, - 0x00, 0x74, - }; - assertEquals( "\u0422\u0435\u0441\u0442 test", - StringUtil.getFromUnicodeBE( test_data ) ); - } /** * test getFromUnicodeHigh for symbols with code below and more 127 @@ -101,62 +65,7 @@ public class TestStringUtil StringUtil.getFromUnicodeLE( test_data ) ); } - /** - * Test more complex form of getFromUnicode - */ - public void testComplexGetFromUnicode() - { - byte[] test_data = new byte[32]; - int index = 0; - for ( int k = 0; k < 16; k++ ) - { - test_data[index++] = (byte) 0; - test_data[index++] = (byte) ( 'a' + k ); - } - assertEquals( "abcdefghijklmno", - StringUtil.getFromUnicodeBE( test_data, 0, 15 ) ); - assertEquals( "bcdefghijklmnop", - StringUtil.getFromUnicodeBE( test_data, 2, 15 ) ); - try - { - StringUtil.getFromUnicodeBE( test_data, -1, 16 ); - fail( "Should have caught ArrayIndexOutOfBoundsException" ); - } - catch ( ArrayIndexOutOfBoundsException ignored ) - { - // as expected - } - try - { - StringUtil.getFromUnicodeBE( test_data, 32, 16 ); - fail( "Should have caught ArrayIndexOutOfBoundsException" ); - } - catch ( ArrayIndexOutOfBoundsException ignored ) - { - // as expected - } - - try - { - StringUtil.getFromUnicodeBE( test_data, 1, 16 ); - fail( "Should have caught IllegalArgumentException" ); - } - catch ( IllegalArgumentException ignored ) - { - // as expected - } - - try - { - StringUtil.getFromUnicodeBE( test_data, 1, -1 ); - fail( "Should have caught IllegalArgumentException" ); - } - catch ( IllegalArgumentException ignored ) - { - // as expected - } - } /** * Test putCompressedUnicode -- 2.39.5