Browse Source

Deprecate the old HPSF codepage Constants list, and change the code to use the new CodePageUtil class instead for their codepage work

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1497035 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_3_10_BETA2
Nick Burch 11 years ago
parent
commit
401dcb864d

+ 5
- 121
src/java/org/apache/poi/hpsf/CodePageString.java View File

@@ -20,6 +20,7 @@ import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;

import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
@@ -28,126 +29,9 @@ import org.apache.poi.util.POILogger;
@Internal
class CodePageString
{

private final static POILogger logger = POILogFactory
.getLogger( CodePageString.class );

private static String codepageToEncoding( final int codepage )
throws UnsupportedEncodingException
{
if ( codepage <= 0 )
throw new UnsupportedEncodingException(
"Codepage number may not be " + codepage );
switch ( codepage )
{
case Constants.CP_UTF16:
return "UTF-16";
case Constants.CP_UTF16_BE:
return "UTF-16BE";
case Constants.CP_UTF8:
return "UTF-8";
case Constants.CP_037:
return "cp037";
case Constants.CP_GBK:
return "GBK";
case Constants.CP_MS949:
return "ms949";
case Constants.CP_WINDOWS_1250:
return "windows-1250";
case Constants.CP_WINDOWS_1251:
return "windows-1251";
case Constants.CP_WINDOWS_1252:
return "windows-1252";
case Constants.CP_WINDOWS_1253:
return "windows-1253";
case Constants.CP_WINDOWS_1254:
return "windows-1254";
case Constants.CP_WINDOWS_1255:
return "windows-1255";
case Constants.CP_WINDOWS_1256:
return "windows-1256";
case Constants.CP_WINDOWS_1257:
return "windows-1257";
case Constants.CP_WINDOWS_1258:
return "windows-1258";
case Constants.CP_JOHAB:
return "johab";
case Constants.CP_MAC_ROMAN:
return "MacRoman";
case Constants.CP_MAC_JAPAN:
return "SJIS";
case Constants.CP_MAC_CHINESE_TRADITIONAL:
return "Big5";
case Constants.CP_MAC_KOREAN:
return "EUC-KR";
case Constants.CP_MAC_ARABIC:
return "MacArabic";
case Constants.CP_MAC_HEBREW:
return "MacHebrew";
case Constants.CP_MAC_GREEK:
return "MacGreek";
case Constants.CP_MAC_CYRILLIC:
return "MacCyrillic";
case Constants.CP_MAC_CHINESE_SIMPLE:
return "EUC_CN";
case Constants.CP_MAC_ROMANIA:
return "MacRomania";
case Constants.CP_MAC_UKRAINE:
return "MacUkraine";
case Constants.CP_MAC_THAI:
return "MacThai";
case Constants.CP_MAC_CENTRAL_EUROPE:
return "MacCentralEurope";
case Constants.CP_MAC_ICELAND:
return "MacIceland";
case Constants.CP_MAC_TURKISH:
return "MacTurkish";
case Constants.CP_MAC_CROATIAN:
return "MacCroatian";
case Constants.CP_US_ACSII:
case Constants.CP_US_ASCII2:
return "US-ASCII";
case Constants.CP_KOI8_R:
return "KOI8-R";
case Constants.CP_ISO_8859_1:
return "ISO-8859-1";
case Constants.CP_ISO_8859_2:
return "ISO-8859-2";
case Constants.CP_ISO_8859_3:
return "ISO-8859-3";
case Constants.CP_ISO_8859_4:
return "ISO-8859-4";
case Constants.CP_ISO_8859_5:
return "ISO-8859-5";
case Constants.CP_ISO_8859_6:
return "ISO-8859-6";
case Constants.CP_ISO_8859_7:
return "ISO-8859-7";
case Constants.CP_ISO_8859_8:
return "ISO-8859-8";
case Constants.CP_ISO_8859_9:
return "ISO-8859-9";
case Constants.CP_ISO_2022_JP1:
case Constants.CP_ISO_2022_JP2:
case Constants.CP_ISO_2022_JP3:
return "ISO-2022-JP";
case Constants.CP_ISO_2022_KR:
return "ISO-2022-KR";
case Constants.CP_EUC_JP:
return "EUC-JP";
case Constants.CP_EUC_KR:
return "EUC-KR";
case Constants.CP_GB2312:
return "GB2312";
case Constants.CP_GB18030:
return "GB18030";
case Constants.CP_SJIS:
return "SJIS";
default:
return "cp" + codepage;
}
}

private byte[] _value;

CodePageString( final byte[] data, final int startOffset )
@@ -182,7 +66,7 @@ class CodePageString
if ( codepage == -1 )
result = new String( _value );
else
result = new String( _value, codepageToEncoding( codepage ) );
result = CodePageUtil.getStringFromCodePage(_value, codepage);
final int terminator = result.indexOf( '\0' );
if ( terminator == -1 )
{
@@ -210,11 +94,11 @@ class CodePageString
void setJavaValue( String string, int codepage )
throws UnsupportedEncodingException
{
String stringNT = string + "\0";
if ( codepage == -1 )
_value = ( string + "\0" ).getBytes();
_value = stringNT.getBytes();
else
_value = ( string + "\0" )
.getBytes( codepageToEncoding( codepage ) );
_value = CodePageUtil.getBytesInCodePage(stringNT, codepage);
}

int write( OutputStream out ) throws IOException

+ 5
- 4
src/java/org/apache/poi/hpsf/Constants.java View File

@@ -17,11 +17,12 @@

package org.apache.poi.hpsf;

import org.apache.poi.util.CodePageUtil;

/**
* <p>Defines constants of general use.</p>
*
* @author Rainer Klute <a
* href="mailto:klute@rainer-klute.de">&lt;klute@rainer-klute.de&gt;</a>
* <p>Defines constants of general use (currently only codepages).</p>
*
* @deprecated Use {@link CodePageUtil} to lookup code pages
*/
public class Constants
{

+ 2
- 1
src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java View File

@@ -22,6 +22,7 @@ import java.util.Map;

import org.apache.poi.hpsf.wellknown.PropertyIDMap;
import org.apache.poi.hpsf.wellknown.SectionIDMap;
import org.apache.poi.util.CodePageUtil;

/**
* <p>Convenience class representing a DocumentSummary Information stream in a
@@ -617,7 +618,7 @@ public class DocumentSummaryInformation extends SpecialPropertySet
if (cpCodepage < 0)
cpCodepage = section.getCodepage();
if (cpCodepage < 0)
cpCodepage = Constants.CP_UNICODE;
cpCodepage = CodePageUtil.CP_UNICODE;
customProperties.setCodepage(cpCodepage);
section.setCodepage(cpCodepage);
section.setDictionary(dictionary);

+ 3
- 1
src/java/org/apache/poi/hpsf/MutableProperty.java View File

@@ -20,6 +20,8 @@ package org.apache.poi.hpsf;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.poi.util.CodePageUtil;

/**
* <p>Adds writing capability to the {@link Property} class.</p>
*
@@ -109,7 +111,7 @@ public class MutableProperty extends Property
long variantType = getType();

/* Ensure that wide strings are written if the codepage is Unicode. */
if (codepage == Constants.CP_UNICODE && variantType == Variant.VT_LPSTR)
if (codepage == CodePageUtil.CP_UNICODE && variantType == Variant.VT_LPSTR)
variantType = Variant.VT_LPWSTR;

length += TypeWriter.writeUIntToStream(out, variantType);

+ 6
- 7
src/java/org/apache/poi/hpsf/MutableSection.java View File

@@ -30,6 +30,7 @@ import java.util.ListIterator;
import java.util.Map;

import org.apache.poi.hpsf.wellknown.PropertyIDMap;
import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LittleEndian;

/**
@@ -418,7 +419,7 @@ public class MutableSection extends Section
* dictionary is present. In order to cope with this problem we
* add the codepage property and set it to Unicode. */
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
Integer.valueOf(Constants.CP_UNICODE));
Integer.valueOf(CodePageUtil.CP_UNICODE));
codepage = getCodepage();
}

@@ -509,7 +510,7 @@ public class MutableSection extends Section
final Long key = i.next();
final String value = dictionary.get(key);

if (codepage == Constants.CP_UNICODE)
if (codepage == CodePageUtil.CP_UNICODE)
{
/* Write the dictionary item in Unicode. */
int sLength = value.length() + 1;
@@ -517,8 +518,7 @@ public class MutableSection extends Section
sLength++;
length += TypeWriter.writeUIntToStream(out, key.longValue());
length += TypeWriter.writeUIntToStream(out, sLength);
final byte[] ca =
value.getBytes(VariantSupport.codepageToEncoding(codepage));
final byte[] ca = CodePageUtil.getBytesInCodePage(value, codepage);
for (int j = 2; j < ca.length; j += 2)
{
out.write(ca[j+1]);
@@ -540,8 +540,7 @@ public class MutableSection extends Section
* Unicode. */
length += TypeWriter.writeUIntToStream(out, key.longValue());
length += TypeWriter.writeUIntToStream(out, value.length() + 1);
final byte[] ba =
value.getBytes(VariantSupport.codepageToEncoding(codepage));
final byte[] ba = CodePageUtil.getBytesInCodePage(value, codepage);
for (int j = 0; j < ba.length; j++)
{
out.write(ba[j]);
@@ -634,7 +633,7 @@ public class MutableSection extends Section
(Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
if (codepage == null)
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
Integer.valueOf(Constants.CP_UNICODE));
Integer.valueOf(CodePageUtil.CP_UNICODE));
}
else
/* Setting the dictionary to null means to remove property 0.

+ 4
- 3
src/java/org/apache/poi/hpsf/Property.java View File

@@ -21,6 +21,7 @@ import java.io.UnsupportedEncodingException;
import java.util.LinkedHashMap;
import java.util.Map;

import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
@@ -240,7 +241,7 @@ public class Property
b.append(new String(src, o, (int) sLength));
break;
}
case Constants.CP_UNICODE:
case CodePageUtil.CP_UNICODE:
{
/* The length is the number of characters, i.e. the number
* of bytes is twice the number of the characters. */
@@ -252,7 +253,7 @@ public class Property
h[i2 + 1] = src[o + i2];
}
b.append(new String(h, 0, nrBytes,
VariantSupport.codepageToEncoding(codepage)));
CodePageUtil.codepageToEncoding(codepage)));
break;
}
default:
@@ -268,7 +269,7 @@ public class Property
/* Strip 0x00 characters from the end of the string: */
while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
b.setLength(b.length() - 1);
if (codepage == Constants.CP_UNICODE)
if (codepage == CodePageUtil.CP_UNICODE)
{
if (sLength % 2 == 1)
sLength++;

+ 2
- 111
src/java/org/apache/poi/hpsf/VariantSupport.java View File

@@ -24,6 +24,7 @@ import java.util.Date;
import java.util.LinkedList;
import java.util.List;

import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;

@@ -275,117 +276,7 @@ public class VariantSupport extends Variant
public static String codepageToEncoding(final int codepage)
throws UnsupportedEncodingException
{
if (codepage <= 0)
throw new UnsupportedEncodingException
("Codepage number may not be " + codepage);
switch (codepage)
{
case Constants.CP_UTF16:
return "UTF-16";
case Constants.CP_UTF16_BE:
return "UTF-16BE";
case Constants.CP_UTF8:
return "UTF-8";
case Constants.CP_037:
return "cp037";
case Constants.CP_GBK:
return "GBK";
case Constants.CP_MS949:
return "ms949";
case Constants.CP_WINDOWS_1250:
return "windows-1250";
case Constants.CP_WINDOWS_1251:
return "windows-1251";
case Constants.CP_WINDOWS_1252:
return "windows-1252";
case Constants.CP_WINDOWS_1253:
return "windows-1253";
case Constants.CP_WINDOWS_1254:
return "windows-1254";
case Constants.CP_WINDOWS_1255:
return "windows-1255";
case Constants.CP_WINDOWS_1256:
return "windows-1256";
case Constants.CP_WINDOWS_1257:
return "windows-1257";
case Constants.CP_WINDOWS_1258:
return "windows-1258";
case Constants.CP_JOHAB:
return "johab";
case Constants.CP_MAC_ROMAN:
return "MacRoman";
case Constants.CP_MAC_JAPAN:
return "SJIS";
case Constants.CP_MAC_CHINESE_TRADITIONAL:
return "Big5";
case Constants.CP_MAC_KOREAN:
return "EUC-KR";
case Constants.CP_MAC_ARABIC:
return "MacArabic";
case Constants.CP_MAC_HEBREW:
return "MacHebrew";
case Constants.CP_MAC_GREEK:
return "MacGreek";
case Constants.CP_MAC_CYRILLIC:
return "MacCyrillic";
case Constants.CP_MAC_CHINESE_SIMPLE:
return "EUC_CN";
case Constants.CP_MAC_ROMANIA:
return "MacRomania";
case Constants.CP_MAC_UKRAINE:
return "MacUkraine";
case Constants.CP_MAC_THAI:
return "MacThai";
case Constants.CP_MAC_CENTRAL_EUROPE:
return "MacCentralEurope";
case Constants.CP_MAC_ICELAND:
return "MacIceland";
case Constants.CP_MAC_TURKISH:
return "MacTurkish";
case Constants.CP_MAC_CROATIAN:
return "MacCroatian";
case Constants.CP_US_ACSII:
case Constants.CP_US_ASCII2:
return "US-ASCII";
case Constants.CP_KOI8_R:
return "KOI8-R";
case Constants.CP_ISO_8859_1:
return "ISO-8859-1";
case Constants.CP_ISO_8859_2:
return "ISO-8859-2";
case Constants.CP_ISO_8859_3:
return "ISO-8859-3";
case Constants.CP_ISO_8859_4:
return "ISO-8859-4";
case Constants.CP_ISO_8859_5:
return "ISO-8859-5";
case Constants.CP_ISO_8859_6:
return "ISO-8859-6";
case Constants.CP_ISO_8859_7:
return "ISO-8859-7";
case Constants.CP_ISO_8859_8:
return "ISO-8859-8";
case Constants.CP_ISO_8859_9:
return "ISO-8859-9";
case Constants.CP_ISO_2022_JP1:
case Constants.CP_ISO_2022_JP2:
case Constants.CP_ISO_2022_JP3:
return "ISO-2022-JP";
case Constants.CP_ISO_2022_KR:
return "ISO-2022-KR";
case Constants.CP_EUC_JP:
return "EUC-JP";
case Constants.CP_EUC_KR:
return "EUC-KR";
case Constants.CP_GB2312:
return "GB2312";
case Constants.CP_GB18030:
return "GB18030";
case Constants.CP_SJIS:
return "SJIS";
default:
return "cp" + codepage;
}
return CodePageUtil.codepageToEncoding(codepage);
}



+ 3
- 3
src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java View File

@@ -25,14 +25,14 @@ import java.io.IOException;
import junit.framework.Assert;
import junit.framework.TestCase;

import org.apache.poi.hpsf.Constants;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.HPSFException;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.Section;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.POIDataSamples;
import org.apache.poi.util.CodePageUtil;

/**
* <p>Tests whether Unicode string can be read from a
@@ -82,7 +82,7 @@ public class TestUnicode extends TestCase {
Assert.assertEquals(ps.getSectionCount(), 2);
Section s = (Section) ps.getSections().get(1);
Assert.assertEquals(s.getProperty(1),
Integer.valueOf(Constants.CP_UTF16));
Integer.valueOf(CodePageUtil.CP_UTF16));
Assert.assertEquals(s.getProperty(2),
Integer.valueOf(-96070278));
Assert.assertEquals(s.getProperty(3),

+ 7
- 7
src/testcases/org/apache/poi/hpsf/basic/TestWrite.java View File

@@ -38,7 +38,6 @@ import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hpsf.ClassID;
import org.apache.poi.hpsf.Constants;
import org.apache.poi.hpsf.HPSFRuntimeException;
import org.apache.poi.hpsf.IllegalPropertySetDataException;
import org.apache.poi.hpsf.MutableProperty;
@@ -61,6 +60,7 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.TempFile;

@@ -351,8 +351,8 @@ public class TestWrite extends TestCase

private static final int CODEPAGE_DEFAULT = -1;
private static final int CODEPAGE_1252 = 1252;
private static final int CODEPAGE_UTF8 = Constants.CP_UTF8;
private static final int CODEPAGE_UTF16 = Constants.CP_UTF16;
private static final int CODEPAGE_UTF8 = CodePageUtil.CP_UTF8;
private static final int CODEPAGE_UTF16 = CodePageUtil.CP_UTF16;



@@ -472,7 +472,7 @@ public class TestWrite extends TestCase
check(t, "\u00e4\u00f6\u00fc\u00c4\u00d6", cp);
check(t, "\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc", cp);
check(t, "\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df", cp);
if (cp == Constants.CP_UTF16 || cp == Constants.CP_UTF8)
if (cp == CodePageUtil.CP_UTF16 || cp == CodePageUtil.CP_UTF8)
check(t, "\u79D1\u5B78", cp);
}
catch (Exception ex)
@@ -759,13 +759,13 @@ public class TestWrite extends TestCase
final POIFSFileSystem poiFs = new POIFSFileSystem();
final MutablePropertySet ps1 = new MutablePropertySet();
final MutableSection s = (MutableSection) ps1.getSections().get(0);
final Map m = new HashMap(3, 1.0f);
final Map<Long,String> m = new HashMap<Long,String>(3, 1.0f);
m.put(Long.valueOf(1), "String 1");
m.put(Long.valueOf(2), "String 2");
m.put(Long.valueOf(3), "String 3");
s.setDictionary(m);
s.setFormatID(SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID[0]);
int codepage = Constants.CP_UNICODE;
int codepage = CodePageUtil.CP_UNICODE;
s.setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
Integer.valueOf(codepage));
poiFs.createDocument(ps1.toInputStream(), "Test");
@@ -811,7 +811,7 @@ public class TestWrite extends TestCase
final POIFSFileSystem poiFs = new POIFSFileSystem();
final MutablePropertySet ps1 = new MutablePropertySet();
final MutableSection s = (MutableSection) ps1.getSections().get(0);
final Map m = new HashMap(3, 1.0f);
final Map<Long,String> m = new HashMap<Long, String>(3, 1.0f);
m.put(Long.valueOf(1), "String 1");
m.put(Long.valueOf(2), "String 2");
m.put(Long.valueOf(3), "String 3");

+ 3
- 8
src/testcases/org/apache/poi/hpsf/basic/TestWriteWellKnown.java View File

@@ -28,9 +28,7 @@ import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import junit.framework.TestCase;

@@ -629,10 +627,7 @@ public class TestWriteWellKnown extends TestCase {
/* The document does not have custom properties. */
return;

for (final Iterator i = cps.entrySet().iterator(); i.hasNext();)
{
final Map.Entry e = (Entry) i.next();
final CustomProperty cp = (CustomProperty) e.getValue();
for (CustomProperty cp : cps.values()) {
cp.getName();
cp.getValue();
}
@@ -704,7 +699,7 @@ public class TestWriteWellKnown extends TestCase {
final int ID_2 = 3;
final String NAME_1 = "Schl\u00fcssel \u00e4";
final String VALUE_1 = "Wert 1";
final Map dictionary = new HashMap();
final Map<Long,String> dictionary = new HashMap<Long, String>();

DocumentSummaryInformation dsi = PropertySetFactory.newDocumentSummaryInformation();
CustomProperties cps;
@@ -717,7 +712,7 @@ public class TestWriteWellKnown extends TestCase {
/* Test an empty custom properties set. */
s = new MutableSection();
s.setFormatID(SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID[1]);
// s.setCodepage(Constants.CP_UNICODE);
// s.setCodepage(CodePageUtil.CP_UNICODE);
dsi.addSection(s);
cps = dsi.getCustomProperties();
assertEquals(0, cps.size());

Loading…
Cancel
Save