package org.apache.poi.hpsf;
-import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
@Internal
class CodePageString
{
- byte[] _value;
+ private static String codepageToEncoding( final int codepage )
+ throws UnsupportedEncodingException
+ {
+ if ( codepage <= 0 )
+ throw new UnsupportedEncodingException(
+ "Codepage number may not be " + codepage );
+ switch ( codepage )
+ {
+ case Constants.CP_UTF16:
+ return "UTF-16";
+ case Constants.CP_UTF16_BE:
+ return "UTF-16BE";
+ case Constants.CP_UTF8:
+ return "UTF-8";
+ case Constants.CP_037:
+ return "cp037";
+ case Constants.CP_GBK:
+ return "GBK";
+ case Constants.CP_MS949:
+ return "ms949";
+ case Constants.CP_WINDOWS_1250:
+ return "windows-1250";
+ case Constants.CP_WINDOWS_1251:
+ return "windows-1251";
+ case Constants.CP_WINDOWS_1252:
+ return "windows-1252";
+ case Constants.CP_WINDOWS_1253:
+ return "windows-1253";
+ case Constants.CP_WINDOWS_1254:
+ return "windows-1254";
+ case Constants.CP_WINDOWS_1255:
+ return "windows-1255";
+ case Constants.CP_WINDOWS_1256:
+ return "windows-1256";
+ case Constants.CP_WINDOWS_1257:
+ return "windows-1257";
+ case Constants.CP_WINDOWS_1258:
+ return "windows-1258";
+ case Constants.CP_JOHAB:
+ return "johab";
+ case Constants.CP_MAC_ROMAN:
+ return "MacRoman";
+ case Constants.CP_MAC_JAPAN:
+ return "SJIS";
+ case Constants.CP_MAC_CHINESE_TRADITIONAL:
+ return "Big5";
+ case Constants.CP_MAC_KOREAN:
+ return "EUC-KR";
+ case Constants.CP_MAC_ARABIC:
+ return "MacArabic";
+ case Constants.CP_MAC_HEBREW:
+ return "MacHebrew";
+ case Constants.CP_MAC_GREEK:
+ return "MacGreek";
+ case Constants.CP_MAC_CYRILLIC:
+ return "MacCyrillic";
+ case Constants.CP_MAC_CHINESE_SIMPLE:
+ return "EUC_CN";
+ case Constants.CP_MAC_ROMANIA:
+ return "MacRomania";
+ case Constants.CP_MAC_UKRAINE:
+ return "MacUkraine";
+ case Constants.CP_MAC_THAI:
+ return "MacThai";
+ case Constants.CP_MAC_CENTRAL_EUROPE:
+ return "MacCentralEurope";
+ case Constants.CP_MAC_ICELAND:
+ return "MacIceland";
+ case Constants.CP_MAC_TURKISH:
+ return "MacTurkish";
+ case Constants.CP_MAC_CROATIAN:
+ return "MacCroatian";
+ case Constants.CP_US_ACSII:
+ case Constants.CP_US_ASCII2:
+ return "US-ASCII";
+ case Constants.CP_KOI8_R:
+ return "KOI8-R";
+ case Constants.CP_ISO_8859_1:
+ return "ISO-8859-1";
+ case Constants.CP_ISO_8859_2:
+ return "ISO-8859-2";
+ case Constants.CP_ISO_8859_3:
+ return "ISO-8859-3";
+ case Constants.CP_ISO_8859_4:
+ return "ISO-8859-4";
+ case Constants.CP_ISO_8859_5:
+ return "ISO-8859-5";
+ case Constants.CP_ISO_8859_6:
+ return "ISO-8859-6";
+ case Constants.CP_ISO_8859_7:
+ return "ISO-8859-7";
+ case Constants.CP_ISO_8859_8:
+ return "ISO-8859-8";
+ case Constants.CP_ISO_8859_9:
+ return "ISO-8859-9";
+ case Constants.CP_ISO_2022_JP1:
+ case Constants.CP_ISO_2022_JP2:
+ case Constants.CP_ISO_2022_JP3:
+ return "ISO-2022-JP";
+ case Constants.CP_ISO_2022_KR:
+ return "ISO-2022-KR";
+ case Constants.CP_EUC_JP:
+ return "EUC-JP";
+ case Constants.CP_EUC_KR:
+ return "EUC-KR";
+ case Constants.CP_GB2312:
+ return "GB2312";
+ case Constants.CP_GB18030:
+ return "GB18030";
+ case Constants.CP_SJIS:
+ return "SJIS";
+ default:
+ return "cp" + codepage;
+ }
+ }
+
+ private byte[] _value;
CodePageString( final byte[] data, final int startOffset )
{
+ " is not NULL-terminated" );
}
+ CodePageString( String string, int codepage )
+ throws UnsupportedEncodingException
+ {
+ setJavaValue( string, codepage );
+ }
+
+ String getJavaValue( int codepage ) throws UnsupportedEncodingException
+ {
+ String result;
+ if ( codepage == -1 )
+ result = new String( _value );
+ else
+ result = new String( _value, codepageToEncoding( codepage ) );
+ return result.substring( 0, result.length() - 1 );
+ }
+
int getSize()
{
return LittleEndian.INT_SIZE + _value.length;
}
+
+ void setJavaValue( String string, int codepage )
+ throws UnsupportedEncodingException
+ {
+ if ( codepage == -1 )
+ _value = ( string + "\0" ).getBytes();
+ else
+ _value = ( string + "\0" )
+ .getBytes( codepageToEncoding( codepage ) );
+ }
+
+ int write( OutputStream out ) throws IOException
+ {
+ LittleEndian.putInt( _value.length, out );
+ out.write( _value );
+ return LittleEndian.INT_SIZE + _value.length;
+ }
}
import java.util.LinkedList;
import java.util.List;
-import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.LittleEndianConsts;
-
/**
* <p>Supports reading and writing of variant data.</p>
*
* @exception UnsupportedEncodingException if the specified codepage is not
* supported.
* @see Variant
+ * @deprecated Use {@link #read(byte[],int,long,int)} instead
+ */
+ @Deprecated
+ public static Object read( final byte[] src, final int offset,
+ final int length, final long type, final int codepage )
+ throws ReadingNotSupportedException, UnsupportedEncodingException
+ {
+ return read( src, offset, type, codepage );
+ }
+
+ /**
+ * <p>Reads a variant type from a byte array.</p>
+ *
+ * @param src The byte array
+ * @param offset The offset in the byte array where the variant starts
+ * @param type The variant type to read
+ * @param codepage The codepage to use for non-wide strings
+ * @return A Java object that corresponds best to the variant field. For
+ * example, a VT_I4 is returned as a {@link Long}, a VT_LPSTR as a
+ * {@link String}.
+ * @exception ReadingNotSupportedException if a property is to be written
+ * who's variant type HPSF does not yet support
+ * @exception UnsupportedEncodingException if the specified codepage is not
+ * supported.
+ * @see Variant
*/
public static Object read(final byte[] src, final int offset,
- final int length, final long type,
- final int codepage)
+ final long type, final int codepage)
throws ReadingNotSupportedException, UnsupportedEncodingException
{
- Object value;
- int o1 = offset;
- int l1 = length - LittleEndian.INT_SIZE;
- long lType = type;
+ TypedPropertyValue typedPropertyValue = new TypedPropertyValue(
+ (int) type, null );
+ int unpadded = typedPropertyValue.readValue( src, offset );
- /* Instead of trying to read 8-bit characters from a Unicode string,
- * read 16-bit characters. */
- if (codepage == Constants.CP_UNICODE && type == Variant.VT_LPSTR)
- lType = Variant.VT_LPWSTR;
+ switch ( (int) type )
+ {
+ case Variant.VT_EMPTY:
+ case Variant.VT_I4:
+ case Variant.VT_I8:
+ case Variant.VT_R8:
+ /*
+ * we have more property types that can be converted into Java
+ * objects, but current API need to be preserved, and it returns
+ * other types as byte arrays. In future major versions it shall be
+ * changed -- sergey
+ */
+ return typedPropertyValue.getValue();
- switch ((int) lType)
+ case Variant.VT_I2:
{
- case Variant.VT_EMPTY:
- {
- value = null;
- break;
- }
- case Variant.VT_I2:
- {
- /*
- * Read a short. In Java it is represented as an
- * Integer object.
- */
- value = Integer.valueOf(LittleEndian.getShort(src, o1));
- break;
- }
- case Variant.VT_I4:
- {
- /*
- * Read a word. In Java it is represented as an
- * Integer object.
- */
- value = Integer.valueOf(LittleEndian.getInt(src, o1));
- break;
- }
- case Variant.VT_I8:
- {
- /*
- * Read a double word. In Java it is represented as a
- * Long object.
- */
- value = Long.valueOf(LittleEndian.getLong(src, o1));
- break;
- }
- case Variant.VT_R8:
- {
- /*
- * Read an eight-byte double value. In Java it is represented as
- * a Double object.
- */
- value = new Double(LittleEndian.getDouble(src, o1));
- break;
- }
- case Variant.VT_FILETIME:
- {
- /*
- * Read a FILETIME object. In Java it is represented
- * as a Date object.
- */
- final long low = LittleEndian.getUInt(src, o1);
- o1 += LittleEndian.INT_SIZE;
- final long high = LittleEndian.getUInt(src, o1);
- value = Util.filetimeToDate((int) high, (int) low);
- break;
- }
- case Variant.VT_LPSTR:
- {
- /*
- * Read a byte string. In Java it is represented as a
- * String object. The 0x00 bytes at the end must be
- * stripped.
- */
- final int first = o1 + LittleEndian.INT_SIZE;
- long last = first + LittleEndian.getUInt(src, o1) - 1;
- o1 += LittleEndian.INT_SIZE;
- while (src[(int) last] == 0 && first <= last)
- last--;
- final int l = (int) (last - first + 1);
- value = codepage != -1 ?
- new String(src, first, l,
- codepageToEncoding(codepage)) :
- new String(src, first, l);
- break;
- }
- case Variant.VT_LPWSTR:
- {
- /*
- * Read a Unicode string. In Java it is represented as
- * a String object. The 0x00 bytes at the end must be
- * stripped.
- */
- final int first = o1 + LittleEndian.INT_SIZE;
- long last = first + LittleEndian.getUInt(src, o1) - 1;
- long l = last - first;
- o1 += LittleEndian.INT_SIZE;
- StringBuffer b = new StringBuffer((int) (last - first));
- for (int i = 0; i <= l; i++)
- {
- final int i1 = o1 + (i * 2);
- final int i2 = i1 + 1;
- final int high = src[i2] << 8;
- final int low = src[i1] & 0x00ff;
- final char c = (char) (high | low);
- b.append(c);
- }
- /* Strip 0x00 characters from the end of the string: */
- while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
- b.setLength(b.length() - 1);
- value = b.toString();
- break;
- }
- case Variant.VT_CF:
- {
- if(l1 < 0) {
- /**
- * YK: reading the ClipboardData packet (VT_CF) is not quite correct.
- * The size of the data is determined by the first four bytes of the packet
- * while the current implementation calculates it in the Section constructor.
- * Test files in Bugzilla 42726 and 45583 clearly show that this approach does not always work.
- * The workaround below attempts to gracefully handle such cases instead of throwing exceptions.
- *
- * August 20, 2009
- */
- l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
- }
- final byte[] v = new byte[l1];
- System.arraycopy(src, o1, v, 0, v.length);
- value = v;
- break;
- }
- case Variant.VT_BOOL:
- {
- /*
- * The first four bytes in src, from src[offset] to
- * src[offset + 3] contain the DWord for VT_BOOL, so
- * skip it, we don't need it.
- */
- // final int first = offset + LittleEndian.INT_SIZE;
- long bool = LittleEndian.getUInt(src, o1);
- if (bool != 0)
- value = Boolean.TRUE;
- else
- value = Boolean.FALSE;
- break;
- }
- default:
- {
- final byte[] v = new byte[l1];
- for (int i = 0; i < l1; i++)
- v[i] = src[(o1 + i)];
- throw new ReadingNotSupportedException(type, v);
- }
+ /*
+ * also for backward-compatibility with prev. versions of POI
+ * --sergey
+ */
+ return Integer.valueOf( ( (Short) typedPropertyValue.getValue() )
+ .intValue() );
+ }
+ case Variant.VT_FILETIME:
+ {
+ Filetime filetime = (Filetime) typedPropertyValue.getValue();
+ return Util.filetimeToDate( (int) filetime.getHigh(),
+ (int) filetime.getLow() );
+ }
+ case Variant.VT_LPSTR:
+ {
+ CodePageString string = (CodePageString) typedPropertyValue
+ .getValue();
+ return string.getJavaValue( codepage );
+ }
+ case Variant.VT_LPWSTR:
+ {
+ UnicodeString string = (UnicodeString) typedPropertyValue
+ .getValue();
+ return string.toJavaString();
+ }
+ case Variant.VT_CF:
+ {
+ // if(l1 < 0) {
+ /**
+ * YK: reading the ClipboardData packet (VT_CF) is not quite
+ * correct. The size of the data is determined by the first four
+ * bytes of the packet while the current implementation calculates
+ * it in the Section constructor. Test files in Bugzilla 42726 and
+ * 45583 clearly show that this approach does not always work. The
+ * workaround below attempts to gracefully handle such cases instead
+ * of throwing exceptions.
+ *
+ * August 20, 2009
+ */
+ // l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
+ // }
+ // final byte[] v = new byte[l1];
+ // System.arraycopy(src, o1, v, 0, v.length);
+ // value = v;
+ // break;
+ ClipboardData clipboardData = (ClipboardData) typedPropertyValue
+ .getValue();
+ return clipboardData.toByteArray();
}
- return value;
- }
+ case Variant.VT_BOOL:
+ {
+ VariantBool bool = (VariantBool) typedPropertyValue.getValue();
+ return Boolean.valueOf( bool.getValue() );
+ }
+ default:
+ {
+ /*
+ * it is not very good, but what can do without breaking current
+ * API? --sergey
+ */
+ final byte[] v = new byte[unpadded];
+ System.arraycopy( src, offset, v, 0, unpadded );
+ throw new ReadingNotSupportedException( type, v );
+ }
+ }
+ }
/**
* <p>Turns a codepage number into the equivalent character encoding's
}
case Variant.VT_LPSTR:
{
- final byte[] bytes =
- (codepage == -1 ?
- ((String) value).getBytes() :
- ((String) value).getBytes(codepageToEncoding(codepage)));
- length = TypeWriter.writeUIntToStream(out, bytes.length + 1);
- final byte[] b = new byte[bytes.length + 1];
- System.arraycopy(bytes, 0, b, 0, bytes.length);
- b[b.length - 1] = 0x00;
- out.write(b);
- length += b.length;
+ CodePageString codePageString = new CodePageString( (String) value,
+ codepage );
+ length += codePageString.write( out );
break;
}
case Variant.VT_LPWSTR:
}
case Variant.VT_EMPTY:
{
- TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY);
- length = LittleEndianConsts.INT_SIZE;
+ length += TypeWriter.writeUIntToStream( out, Variant.VT_EMPTY );
break;
}
case Variant.VT_I2:
{
- TypeWriter.writeToStream(out, ((Integer) value).shortValue());
- length = LittleEndianConsts.SHORT_SIZE;
+ length += TypeWriter.writeToStream( out,
+ ( (Integer) value ).shortValue() );
break;
}
case Variant.VT_I4:
}
case Variant.VT_I8:
{
- TypeWriter.writeToStream(out, ((Long) value).longValue());
- length = LittleEndianConsts.LONG_SIZE;
+ length += TypeWriter.writeToStream(out, ((Long) value).longValue());
break;
}
case Variant.VT_R8:
long filetime = Util.dateToFileTime((Date) value);
int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL);
int low = (int) (filetime & 0x00000000FFFFFFFFL);
- length += TypeWriter.writeUIntToStream
- (out, 0x0000000FFFFFFFFL & low);
- length += TypeWriter.writeUIntToStream
- (out, 0x0000000FFFFFFFFL & high);
+ Filetime filetimeValue = new Filetime( low, high);
+ length += filetimeValue.write( out );
break;
}
default: