From 87b09b2b0bcb5777009756f3f3be5befa70abee1 Mon Sep 17 00:00:00 2001 From: Rainer Klute Date: Sat, 6 May 2006 09:52:19 +0000 Subject: [PATCH] Support for many, many character encodings added. Thanks to Trejkaz for the patch! git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@400277 13f79535-47bb-0310-9956-ffa450edef68 --- src/java/org/apache/poi/hpsf/Constants.java | 101 +++++++++++++++++- .../org/apache/poi/hpsf/VariantSupport.java | 63 +++++++++++ 2 files changed, 162 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/poi/hpsf/Constants.java b/src/java/org/apache/poi/hpsf/Constants.java index 32a0addf20..01e18da378 100644 --- a/src/java/org/apache/poi/hpsf/Constants.java +++ b/src/java/org/apache/poi/hpsf/Constants.java @@ -26,16 +26,54 @@ package org.apache.poi.hpsf; */ public class Constants { + /**

Codepage 037, a special case.

*/ + public static final int CP_037 = 37; /**

Codepage for SJIS

*/ public static final int CP_SJIS = 932; + /**

Codepage for GBK, aka MS936.

*/ + public static final int CP_GBK = 936; + /**

Codepage for MS949

*/ public static final int CP_MS949 = 949; - /**

Codepage for UTF-16

*/ + /**

Codepage for UTF-16 (little-endian.)

*/ public static final int CP_UTF16 = 1200; + /**

Codepage for UTF-16 big-endian.

*/ + public static final int CP_UTF16_BE = 1201; + + /**

Codepage for Windows 1250.

*/ + public static final int CP_WINDOWS_1250 = 1250; + + /**

Codepage for Windows 1251.

*/ + public static final int CP_WINDOWS_1251 = 1251; + + /**

Codepage for Windows 1252.

*/ + public static final int CP_WINDOWS_1252 = 1252; + + /**

Codepage for Windows 1253.

*/ + public static final int CP_WINDOWS_1253 = 1253; + + /**

Codepage for Windows 1254.

*/ + public static final int CP_WINDOWS_1254 = 1254; + + /**

Codepage for Windows 1255.

*/ + public static final int CP_WINDOWS_1255 = 1255; + + /**

Codepage for Windows 1256.

*/ + public static final int CP_WINDOWS_1256 = 1256; + + /**

Codepage for Windows 1257.

*/ + public static final int CP_WINDOWS_1257 = 1257; + + /**

Codepage for Windows 1258.

*/ + public static final int CP_WINDOWS_1258 = 1258; + + /**

Codepage for Johab.

*/ + public static final int CP_JOHAB = 1361; + /**

Codepage for Macintosh Roman (Java: MacRoman)

*/ public static final int CP_MAC_ROMAN = 10000; @@ -89,10 +127,69 @@ public class Constants /**

Codepage for Macintosh Croatian (Java: MacCroatian)

*/ public static final int CP_MAC_CROATIAN = 10082; + /**

Codepage for US-ASCII.

*/ + public static final int CP_US_ACSII = 20127; + + /**

Codepage for KOI8-R

*/ + public static final int CP_KOI8_R = 20866; + + /**

Codepage for ISO-8859-1.

*/ + public static final int CP_ISO_8859_1 = 28591; + + /**

Codepage for ISO-8859-2.

*/ + public static final int CP_ISO_8859_2 = 28592; + + /**

Codepage for ISO-8859-3.

*/ + public static final int CP_ISO_8859_3 = 28593; + + /**

Codepage for ISO-8859-4.

*/ + public static final int CP_ISO_8859_4 = 28594; + + /**

Codepage for ISO-8859-5.

*/ + public static final int CP_ISO_8859_5 = 28595; + + /**

Codepage for ISO-8859-6.

*/ + public static final int CP_ISO_8859_6 = 28596; + + /**

Codepage for ISO-8859-7.

*/ + public static final int CP_ISO_8859_7 = 28597; + + /**

Codepage for ISO-8859-8.

*/ + public static final int CP_ISO_8859_8 = 28598; + + /**

Codepage for ISO-8859-9.

*/ + public static final int CP_ISO_8859_9 = 28599; + + /**

Codepage for ISO-2022-JP

*/ + public static final int CP_ISO_2022_JP1 = 50220; + + /**

Another codepage for ISO-2022-JP

*/ + public static final int CP_ISO_2022_JP2 = 50221; + + /**

Yet another codepage for ISO-2022-JP

*/ + public static final int CP_ISO_2022_JP3 = 50222; + + /**

Codepage for ISO-2022-KR

*/ + public static final int CP_ISO_2022_KR = 50225; + + /**

Codepage for EUC-JP

*/ + public static final int CP_EUC_JP = 51932; + + /**

Codepage for EUC-KR

*/ + public static final int CP_EUC_KR = 51949; + + /**

Codepage for GB2312.

*/ + public static final int CP_GB2312 = 52936; + + /**

Codepage for GB18030.

*/ + public static final int CP_GB18030 = 54936; + + /**

Another codepage for US-ASCII.

*/ + public static final int CP_US_ASCII2 = 65000; + /**

Codepage for UTF-8

*/ public static final int CP_UTF8 = 65001; /**

Codepage for Unicode

*/ public static final int CP_UNICODE = CP_UTF16; - } diff --git a/src/java/org/apache/poi/hpsf/VariantSupport.java b/src/java/org/apache/poi/hpsf/VariantSupport.java index 8994bb2fa1..f7317c67af 100644 --- a/src/java/org/apache/poi/hpsf/VariantSupport.java +++ b/src/java/org/apache/poi/hpsf/VariantSupport.java @@ -305,10 +305,36 @@ public class VariantSupport extends Variant { case Constants.CP_UTF16: return "UTF-16"; + case Constants.CP_UTF16_BE: + return "UTF-16BE"; case Constants.CP_UTF8: return "UTF-8"; + case Constants.CP_037: + return "cp037"; + case Constants.CP_GBK: + return "GBK"; case Constants.CP_MS949: return "ms949"; + case Constants.CP_WINDOWS_1250: + return "windows-1250"; + case Constants.CP_WINDOWS_1251: + return "windows-1251"; + case Constants.CP_WINDOWS_1252: + return "windows-1252"; + case Constants.CP_WINDOWS_1253: + return "windows-1253"; + case Constants.CP_WINDOWS_1254: + return "windows-1254"; + case Constants.CP_WINDOWS_1255: + return "windows-1255"; + case Constants.CP_WINDOWS_1256: + return "windows-1256"; + case Constants.CP_WINDOWS_1257: + return "windows-1257"; + case Constants.CP_WINDOWS_1258: + return "windows-1258"; + case Constants.CP_JOHAB: + return "johab"; case Constants.CP_MAC_ROMAN: return "MacRoman"; case Constants.CP_MAC_JAPAN: @@ -341,6 +367,43 @@ public class VariantSupport extends Variant return "MacTurkish"; case Constants.CP_MAC_CROATIAN: return "MacCroatian"; + case Constants.CP_US_ACSII: + case Constants.CP_US_ASCII2: + return "US-ASCII"; + case Constants.CP_KOI8_R: + return "KOI8-R"; + case Constants.CP_ISO_8859_1: + return "ISO-8859-1"; + case Constants.CP_ISO_8859_2: + return "ISO-8859-2"; + case Constants.CP_ISO_8859_3: + return "ISO-8859-3"; + case Constants.CP_ISO_8859_4: + return "ISO-8859-4"; + case Constants.CP_ISO_8859_5: + return "ISO-8859-5"; + case Constants.CP_ISO_8859_6: + return "ISO-8859-6"; + case Constants.CP_ISO_8859_7: + return "ISO-8859-7"; + case Constants.CP_ISO_8859_8: + return "ISO-8859-8"; + case Constants.CP_ISO_8859_9: + return "ISO-8859-9"; + case Constants.CP_ISO_2022_JP1: + case Constants.CP_ISO_2022_JP2: + case Constants.CP_ISO_2022_JP3: + return "ISO-2022-JP"; + case Constants.CP_ISO_2022_KR: + return "ISO-2022-KR"; + case Constants.CP_EUC_JP: + return "EUC-JP"; + case Constants.CP_EUC_KR: + return "EUC-KR"; + case Constants.CP_GB2312: + return "GB2312"; + case Constants.CP_GB18030: + return "GB18030"; case Constants.CP_SJIS: return "SJIS"; default: -- 2.39.5