rewrite VariantSupport to use TypedPropertyValue at max without breaking compatibility

author Sergey Vladimirov <sergey@apache.org>

Sat, 22 Oct 2011 02:02:34 +0000 (02:02 +0000)

committer Sergey Vladimirov <sergey@apache.org>

Sat, 22 Oct 2011 02:02:34 +0000 (02:02 +0000)
author Sergey Vladimirov <sergey@apache.org>
Sat, 22 Oct 2011 02:02:34 +0000 (02:02 +0000)
committer Sergey Vladimirov <sergey@apache.org>
Sat, 22 Oct 2011 02:02:34 +0000 (02:02 +0000)
diff --git a/src/java/org/apache/poi/hpsf/ClipboardData.java b/src/java/org/apache/poi/hpsf/ClipboardData.java

index 477a40b95f2515838c74507961bd9ee2034a4d51..72e680489fcdfc4356c09d55704881b5241efdd2 100644 (file)
--- a/src/java/org/apache/poi/hpsf/ClipboardData.java
+++ b/src/java/org/apache/poi/hpsf/ClipboardData.java
@@ -1,7 +1,12 @@
  package org.apache.poi.hpsf;
  
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.poi.util.Internal;
  import org.apache.poi.util.LittleEndian;
  
+@Internal
  class ClipboardData
  {
      private int _format;
@@ -24,4 +29,29 @@ class ClipboardData
      {
          return LittleEndian.INT_SIZE * 2 + _value.length;
      }
+
+    byte[] getValue()
+    {
+        return _value;
+    }
+
+    byte[] toByteArray()
+    {
+        byte[] result = new byte[getSize()];
+        LittleEndian.putInt( result, 0 * LittleEndian.INT_SIZE,
+                LittleEndian.INT_SIZE + _value.length );
+        LittleEndian.putInt( result, 1 * LittleEndian.INT_SIZE, _format );
+        LittleEndian.putInt( result, 2 * LittleEndian.INT_SIZE, _value.length );
+        System.arraycopy( _value, 0, result, LittleEndian.INT_SIZE
+                + LittleEndian.INT_SIZE, _value.length );
+        return result;
+    }
+
+    int write( OutputStream out ) throws IOException
+    {
+        LittleEndian.putInt( LittleEndian.INT_SIZE + _value.length, out );
+        LittleEndian.putInt( _format, out );
+        out.write( _value );
+        return 2 * LittleEndian.INT_SIZE + _value.length;
+    }
  }
diff --git a/src/java/org/apache/poi/hpsf/CodePageString.java b/src/java/org/apache/poi/hpsf/CodePageString.java

index c0e9de92e78927db05d2061035a17de16cdd4368..89f3684832d2e8fa74026a9f308612aa689bf61d 100644 (file)
--- a/src/java/org/apache/poi/hpsf/CodePageString.java
+++ b/src/java/org/apache/poi/hpsf/CodePageString.java
@@ -1,14 +1,133 @@
  package org.apache.poi.hpsf;
  
-import org.apache.poi.util.LittleEndian;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
  
  import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
  
  @Internal
  class CodePageString
  {
  
-    byte[] _value;
+    private static String codepageToEncoding( final int codepage )
+            throws UnsupportedEncodingException
+    {
+        if ( codepage <= 0 )
+            throw new UnsupportedEncodingException(
+                    "Codepage number may not be " + codepage );
+        switch ( codepage )
+        {
+        case Constants.CP_UTF16:
+            return "UTF-16";
+        case Constants.CP_UTF16_BE:
+            return "UTF-16BE";
+        case Constants.CP_UTF8:
+            return "UTF-8";
+        case Constants.CP_037:
+            return "cp037";
+        case Constants.CP_GBK:
+            return "GBK";
+        case Constants.CP_MS949:
+            return "ms949";
+        case Constants.CP_WINDOWS_1250:
+            return "windows-1250";
+        case Constants.CP_WINDOWS_1251:
+            return "windows-1251";
+        case Constants.CP_WINDOWS_1252:
+            return "windows-1252";
+        case Constants.CP_WINDOWS_1253:
+            return "windows-1253";
+        case Constants.CP_WINDOWS_1254:
+            return "windows-1254";
+        case Constants.CP_WINDOWS_1255:
+            return "windows-1255";
+        case Constants.CP_WINDOWS_1256:
+            return "windows-1256";
+        case Constants.CP_WINDOWS_1257:
+            return "windows-1257";
+        case Constants.CP_WINDOWS_1258:
+            return "windows-1258";
+        case Constants.CP_JOHAB:
+            return "johab";
+        case Constants.CP_MAC_ROMAN:
+            return "MacRoman";
+        case Constants.CP_MAC_JAPAN:
+            return "SJIS";
+        case Constants.CP_MAC_CHINESE_TRADITIONAL:
+            return "Big5";
+        case Constants.CP_MAC_KOREAN:
+            return "EUC-KR";
+        case Constants.CP_MAC_ARABIC:
+            return "MacArabic";
+        case Constants.CP_MAC_HEBREW:
+            return "MacHebrew";
+        case Constants.CP_MAC_GREEK:
+            return "MacGreek";
+        case Constants.CP_MAC_CYRILLIC:
+            return "MacCyrillic";
+        case Constants.CP_MAC_CHINESE_SIMPLE:
+            return "EUC_CN";
+        case Constants.CP_MAC_ROMANIA:
+            return "MacRomania";
+        case Constants.CP_MAC_UKRAINE:
+            return "MacUkraine";
+        case Constants.CP_MAC_THAI:
+            return "MacThai";
+        case Constants.CP_MAC_CENTRAL_EUROPE:
+            return "MacCentralEurope";
+        case Constants.CP_MAC_ICELAND:
+            return "MacIceland";
+        case Constants.CP_MAC_TURKISH:
+            return "MacTurkish";
+        case Constants.CP_MAC_CROATIAN:
+            return "MacCroatian";
+        case Constants.CP_US_ACSII:
+        case Constants.CP_US_ASCII2:
+            return "US-ASCII";
+        case Constants.CP_KOI8_R:
+            return "KOI8-R";
+        case Constants.CP_ISO_8859_1:
+            return "ISO-8859-1";
+        case Constants.CP_ISO_8859_2:
+            return "ISO-8859-2";
+        case Constants.CP_ISO_8859_3:
+            return "ISO-8859-3";
+        case Constants.CP_ISO_8859_4:
+            return "ISO-8859-4";
+        case Constants.CP_ISO_8859_5:
+            return "ISO-8859-5";
+        case Constants.CP_ISO_8859_6:
+            return "ISO-8859-6";
+        case Constants.CP_ISO_8859_7:
+            return "ISO-8859-7";
+        case Constants.CP_ISO_8859_8:
+            return "ISO-8859-8";
+        case Constants.CP_ISO_8859_9:
+            return "ISO-8859-9";
+        case Constants.CP_ISO_2022_JP1:
+        case Constants.CP_ISO_2022_JP2:
+        case Constants.CP_ISO_2022_JP3:
+            return "ISO-2022-JP";
+        case Constants.CP_ISO_2022_KR:
+            return "ISO-2022-KR";
+        case Constants.CP_EUC_JP:
+            return "EUC-JP";
+        case Constants.CP_EUC_KR:
+            return "EUC-KR";
+        case Constants.CP_GB2312:
+            return "GB2312";
+        case Constants.CP_GB18030:
+            return "GB18030";
+        case Constants.CP_SJIS:
+            return "SJIS";
+        default:
+            return "cp" + codepage;
+        }
+    }
+
+    private byte[] _value;
  
      CodePageString( final byte[] data, final int startOffset )
      {
@@ -24,8 +143,41 @@ class CodePageString
                              + " is not NULL-terminated" );
      }
  
+    CodePageString( String string, int codepage )
+            throws UnsupportedEncodingException
+    {
+        setJavaValue( string, codepage );
+    }
+
+    String getJavaValue( int codepage ) throws UnsupportedEncodingException
+    {
+        String result;
+        if ( codepage == -1 )
+            result = new String( _value );
+        else
+            result = new String( _value, codepageToEncoding( codepage ) );
+        return result.substring( 0, result.length() - 1 );
+    }
+
      int getSize()
      {
          return LittleEndian.INT_SIZE + _value.length;
      }
+
+    void setJavaValue( String string, int codepage )
+            throws UnsupportedEncodingException
+    {
+        if ( codepage == -1 )
+            _value = ( string + "\0" ).getBytes();
+        else
+            _value = ( string + "\0" )
+                    .getBytes( codepageToEncoding( codepage ) );
+    }
+
+    int write( OutputStream out ) throws IOException
+    {
+        LittleEndian.putInt( _value.length, out );
+        out.write( _value );
+        return LittleEndian.INT_SIZE + _value.length;
+    }
  }
diff --git a/src/java/org/apache/poi/hpsf/VariantSupport.java b/src/java/org/apache/poi/hpsf/VariantSupport.java

index f1752de8a061733df1ae617dabfbaaa9cf4ceb9a..914376713a1a85d25c00180d92b1a4d044e6254a 100644 (file)
--- a/src/java/org/apache/poi/hpsf/VariantSupport.java
+++ b/src/java/org/apache/poi/hpsf/VariantSupport.java
@@ -24,9 +24,6 @@ import java.util.Date;
  import java.util.LinkedList;
  import java.util.List;
  
-import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.LittleEndianConsts;
-
  /**
   * <p>Supports reading and writing of variant data.</p>
   *
@@ -153,169 +150,124 @@ public class VariantSupport extends Variant
       * @exception UnsupportedEncodingException if the specified codepage is not
       *            supported.
       * @see Variant
+     * @deprecated Use {@link #read(byte[],int,long,int)} instead
+     */
+    @Deprecated
+    public static Object read( final byte[] src, final int offset,
+            final int length, final long type, final int codepage )
+            throws ReadingNotSupportedException, UnsupportedEncodingException
+    {
+        return read( src, offset, type, codepage );
+    }
+
+    /**
+     * <p>Reads a variant type from a byte array.</p>
+     *
+     * @param src The byte array
+     * @param offset The offset in the byte array where the variant starts
+     * @param type The variant type to read
+     * @param codepage The codepage to use for non-wide strings
+     * @return A Java object that corresponds best to the variant field. For
+     *         example, a VT_I4 is returned as a {@link Long}, a VT_LPSTR as a
+     *         {@link String}.
+     * @exception ReadingNotSupportedException if a property is to be written
+     *            who's variant type HPSF does not yet support
+     * @exception UnsupportedEncodingException if the specified codepage is not
+     *            supported.
+     * @see Variant
       */
      public static Object read(final byte[] src, final int offset,
-                              final int length, final long type,
-                              final int codepage)
+                              final long type, final int codepage)
      throws ReadingNotSupportedException, UnsupportedEncodingException
      {
-        Object value;
-        int o1 = offset;
-        int l1 = length - LittleEndian.INT_SIZE;
-        long lType = type;
+        TypedPropertyValue typedPropertyValue = new TypedPropertyValue(
+                (int) type, null );
+        int unpadded = typedPropertyValue.readValue( src, offset );
  
-        /* Instead of trying to read 8-bit characters from a Unicode string,
-         * read 16-bit characters. */
-        if (codepage == Constants.CP_UNICODE && type == Variant.VT_LPSTR)
-            lType = Variant.VT_LPWSTR;
+        switch ( (int) type )
+        {
+        case Variant.VT_EMPTY:
+        case Variant.VT_I4:
+        case Variant.VT_I8:
+        case Variant.VT_R8:
+            /*
+             * we have more property types that can be converted into Java
+             * objects, but current API need to be preserved, and it returns
+             * other types as byte arrays. In future major versions it shall be
+             * changed -- sergey
+             */
+            return typedPropertyValue.getValue();
  
-        switch ((int) lType)
+        case Variant.VT_I2:
          {
-            case Variant.VT_EMPTY:
-            {
-                value = null;
-                break;
-            }
-            case Variant.VT_I2:
-            {
-                /*
-                 * Read a short. In Java it is represented as an
-                 * Integer object.
-                 */
-                value = Integer.valueOf(LittleEndian.getShort(src, o1));
-                break;
-            }
-            case Variant.VT_I4:
-            {
-                /*
-                 * Read a word. In Java it is represented as an
-                 * Integer object.
-                 */
-                value = Integer.valueOf(LittleEndian.getInt(src, o1));
-                break;
-            }
-            case Variant.VT_I8:
-            {
-                /*
-                 * Read a double word. In Java it is represented as a
-                 * Long object.
-                 */
-                value = Long.valueOf(LittleEndian.getLong(src, o1));
-                break;
-            }
-            case Variant.VT_R8:
-            {
-                /*
-                 * Read an eight-byte double value. In Java it is represented as
-                 * a Double object.
-                 */
-                value = new Double(LittleEndian.getDouble(src, o1));
-                break;
-            }
-            case Variant.VT_FILETIME:
-            {
-                /*
-                 * Read a FILETIME object. In Java it is represented
-                 * as a Date object.
-                 */
-                final long low = LittleEndian.getUInt(src, o1);
-                o1 += LittleEndian.INT_SIZE;
-                final long high = LittleEndian.getUInt(src, o1);
-                value = Util.filetimeToDate((int) high, (int) low);
-                break;
-            }
-            case Variant.VT_LPSTR:
-            {
-                /*
-                 * Read a byte string. In Java it is represented as a
-                 * String object. The 0x00 bytes at the end must be
-                 * stripped.
-                 */
-                final int first = o1 + LittleEndian.INT_SIZE;
-                long last = first + LittleEndian.getUInt(src, o1) - 1;
-                o1 += LittleEndian.INT_SIZE;
-                while (src[(int) last] == 0 && first <= last)
-                    last--;
-                final int l = (int) (last - first + 1);
-                value = codepage != -1 ?
-                    new String(src, first, l,
-                               codepageToEncoding(codepage)) :
-                    new String(src, first, l);
-                break;
-            }
-            case Variant.VT_LPWSTR:
-            {
-                /*
-                 * Read a Unicode string. In Java it is represented as
-                 * a String object. The 0x00 bytes at the end must be
-                 * stripped.
-                 */
-                final int first = o1 + LittleEndian.INT_SIZE;
-                long last = first + LittleEndian.getUInt(src, o1) - 1;
-                long l = last - first;
-                o1 += LittleEndian.INT_SIZE;
-                StringBuffer b = new StringBuffer((int) (last - first));
-                for (int i = 0; i <= l; i++)
-                {
-                    final int i1 = o1 + (i * 2);
-                    final int i2 = i1 + 1;
-                    final int high = src[i2] << 8;
-                    final int low = src[i1] & 0x00ff;
-                    final char c = (char) (high | low);
-                    b.append(c);
-                }
-                /* Strip 0x00 characters from the end of the string: */
-                while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
-                    b.setLength(b.length() - 1);
-                value = b.toString();
-                break;
-            }
-            case Variant.VT_CF:
-            {
-                if(l1 < 0) {
-                    /**
-                     *  YK: reading the ClipboardData packet (VT_CF) is not quite correct.
-                     *  The size of the data is determined by the first four bytes of the packet
-                     *  while the current implementation calculates it in the Section constructor.
-                     *  Test files in Bugzilla 42726 and 45583 clearly show that this approach does not always work.
-                     *  The workaround below attempts to gracefully handle such cases instead of throwing exceptions.
-                     *
-                     *  August 20, 2009
-                     */
-                    l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
-                }
-                final byte[] v = new byte[l1];
-                System.arraycopy(src, o1, v, 0, v.length);
-                value = v;
-                break;
-            }
-            case Variant.VT_BOOL:
-            {
-                /*
-                 * The first four bytes in src, from src[offset] to
-                 * src[offset + 3] contain the DWord for VT_BOOL, so
-                 * skip it, we don't need it.
-                 */
-                // final int first = offset + LittleEndian.INT_SIZE;
-                long bool = LittleEndian.getUInt(src, o1);
-                if (bool != 0)
-                    value = Boolean.TRUE;
-                else
-                    value = Boolean.FALSE;
-                break;
-            }
-            default:
-            {
-                final byte[] v = new byte[l1];
-                for (int i = 0; i < l1; i++)
-                    v[i] = src[(o1 + i)];
-                throw new ReadingNotSupportedException(type, v);
-            }
+            /*
+             * also for backward-compatibility with prev. versions of POI
+             * --sergey
+             */
+            return Integer.valueOf( ( (Short) typedPropertyValue.getValue() )
+                    .intValue() );
+        }
+        case Variant.VT_FILETIME:
+        {
+            Filetime filetime = (Filetime) typedPropertyValue.getValue();
+            return Util.filetimeToDate( (int) filetime.getHigh(),
+                    (int) filetime.getLow() );
+        }
+        case Variant.VT_LPSTR:
+        {
+            CodePageString string = (CodePageString) typedPropertyValue
+                    .getValue();
+            return string.getJavaValue( codepage );
+        }
+        case Variant.VT_LPWSTR:
+        {
+            UnicodeString string = (UnicodeString) typedPropertyValue
+                    .getValue();
+            return string.toJavaString();
+        }
+        case Variant.VT_CF:
+        {
+            // if(l1 < 0) {
+            /**
+             * YK: reading the ClipboardData packet (VT_CF) is not quite
+             * correct. The size of the data is determined by the first four
+             * bytes of the packet while the current implementation calculates
+             * it in the Section constructor. Test files in Bugzilla 42726 and
+             * 45583 clearly show that this approach does not always work. The
+             * workaround below attempts to gracefully handle such cases instead
+             * of throwing exceptions.
+             * 
+             * August 20, 2009
+             */
+            // l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
+            // }
+            // final byte[] v = new byte[l1];
+            // System.arraycopy(src, o1, v, 0, v.length);
+            // value = v;
+            // break;
+            ClipboardData clipboardData = (ClipboardData) typedPropertyValue
+                    .getValue();
+            return clipboardData.toByteArray();
          }
-        return value;
-    }
  
+        case Variant.VT_BOOL:
+        {
+            VariantBool bool = (VariantBool) typedPropertyValue.getValue();
+            return Boolean.valueOf( bool.getValue() );
+        }
  
+        default:
+        {
+            /*
+             * it is not very good, but what can do without breaking current
+             * API? --sergey
+             */
+            final byte[] v = new byte[unpadded];
+            System.arraycopy( src, offset, v, 0, unpadded );
+            throw new ReadingNotSupportedException( type, v );
+        }
+        }
+    }
  
      /**
       * <p>Turns a codepage number into the equivalent character encoding's
@@ -491,16 +443,9 @@ public class VariantSupport extends Variant
              }
              case Variant.VT_LPSTR:
              {
-                final byte[] bytes =
-                    (codepage == -1 ?
-                    ((String) value).getBytes() :
-                    ((String) value).getBytes(codepageToEncoding(codepage)));
-                length = TypeWriter.writeUIntToStream(out, bytes.length + 1);
-                final byte[] b = new byte[bytes.length + 1];
-                System.arraycopy(bytes, 0, b, 0, bytes.length);
-                b[b.length - 1] = 0x00;
-                out.write(b);
-                length += b.length;
+                CodePageString codePageString = new CodePageString( (String) value,
+                        codepage );
+                length += codePageString.write( out );
                  break;
              }
              case Variant.VT_LPWSTR:
@@ -533,14 +478,13 @@ public class VariantSupport extends Variant
              }
              case Variant.VT_EMPTY:
              {
-                TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY);
-                length = LittleEndianConsts.INT_SIZE;
+                length += TypeWriter.writeUIntToStream( out, Variant.VT_EMPTY );
                  break;
              }
              case Variant.VT_I2:
              {
-                TypeWriter.writeToStream(out, ((Integer) value).shortValue());
-                length = LittleEndianConsts.SHORT_SIZE;
+                length += TypeWriter.writeToStream( out,
+                        ( (Integer) value ).shortValue() );
                  break;
              }
              case Variant.VT_I4:
@@ -558,8 +502,7 @@ public class VariantSupport extends Variant
              }
              case Variant.VT_I8:
              {
-                TypeWriter.writeToStream(out, ((Long) value).longValue());
-                length = LittleEndianConsts.LONG_SIZE;
+                length += TypeWriter.writeToStream(out, ((Long) value).longValue());
                  break;
              }
              case Variant.VT_R8:
@@ -573,10 +516,8 @@ public class VariantSupport extends Variant
                  long filetime = Util.dateToFileTime((Date) value);
                  int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL);
                  int low = (int) (filetime & 0x00000000FFFFFFFFL);
-                length += TypeWriter.writeUIntToStream
-                    (out, 0x0000000FFFFFFFFL & low);
-                length += TypeWriter.writeUIntToStream
-                    (out, 0x0000000FFFFFFFFL & high);
+                Filetime filetimeValue = new Filetime( low, high);
+                length += filetimeValue.write( out );
                  break;
              }
              default:
author	Sergey Vladimirov <sergey@apache.org>
	Sat, 22 Oct 2011 02:02:34 +0000 (02:02 +0000)
committer	Sergey Vladimirov <sergey@apache.org>
	Sat, 22 Oct 2011 02:02:34 +0000 (02:02 +0000)
src/java/org/apache/poi/hpsf/ClipboardData.java		patch \| blob \| history
src/java/org/apache/poi/hpsf/CodePageString.java		patch \| blob \| history
src/java/org/apache/poi/hpsf/VariantSupport.java		patch \| blob \| history