import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
+import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
@Internal
class CodePageString
{
-
private final static POILogger logger = POILogFactory
.getLogger( CodePageString.class );
- private static String codepageToEncoding( final int codepage )
- throws UnsupportedEncodingException
- {
- if ( codepage <= 0 )
- throw new UnsupportedEncodingException(
- "Codepage number may not be " + codepage );
- switch ( codepage )
- {
- case Constants.CP_UTF16:
- return "UTF-16";
- case Constants.CP_UTF16_BE:
- return "UTF-16BE";
- case Constants.CP_UTF8:
- return "UTF-8";
- case Constants.CP_037:
- return "cp037";
- case Constants.CP_GBK:
- return "GBK";
- case Constants.CP_MS949:
- return "ms949";
- case Constants.CP_WINDOWS_1250:
- return "windows-1250";
- case Constants.CP_WINDOWS_1251:
- return "windows-1251";
- case Constants.CP_WINDOWS_1252:
- return "windows-1252";
- case Constants.CP_WINDOWS_1253:
- return "windows-1253";
- case Constants.CP_WINDOWS_1254:
- return "windows-1254";
- case Constants.CP_WINDOWS_1255:
- return "windows-1255";
- case Constants.CP_WINDOWS_1256:
- return "windows-1256";
- case Constants.CP_WINDOWS_1257:
- return "windows-1257";
- case Constants.CP_WINDOWS_1258:
- return "windows-1258";
- case Constants.CP_JOHAB:
- return "johab";
- case Constants.CP_MAC_ROMAN:
- return "MacRoman";
- case Constants.CP_MAC_JAPAN:
- return "SJIS";
- case Constants.CP_MAC_CHINESE_TRADITIONAL:
- return "Big5";
- case Constants.CP_MAC_KOREAN:
- return "EUC-KR";
- case Constants.CP_MAC_ARABIC:
- return "MacArabic";
- case Constants.CP_MAC_HEBREW:
- return "MacHebrew";
- case Constants.CP_MAC_GREEK:
- return "MacGreek";
- case Constants.CP_MAC_CYRILLIC:
- return "MacCyrillic";
- case Constants.CP_MAC_CHINESE_SIMPLE:
- return "EUC_CN";
- case Constants.CP_MAC_ROMANIA:
- return "MacRomania";
- case Constants.CP_MAC_UKRAINE:
- return "MacUkraine";
- case Constants.CP_MAC_THAI:
- return "MacThai";
- case Constants.CP_MAC_CENTRAL_EUROPE:
- return "MacCentralEurope";
- case Constants.CP_MAC_ICELAND:
- return "MacIceland";
- case Constants.CP_MAC_TURKISH:
- return "MacTurkish";
- case Constants.CP_MAC_CROATIAN:
- return "MacCroatian";
- case Constants.CP_US_ACSII:
- case Constants.CP_US_ASCII2:
- return "US-ASCII";
- case Constants.CP_KOI8_R:
- return "KOI8-R";
- case Constants.CP_ISO_8859_1:
- return "ISO-8859-1";
- case Constants.CP_ISO_8859_2:
- return "ISO-8859-2";
- case Constants.CP_ISO_8859_3:
- return "ISO-8859-3";
- case Constants.CP_ISO_8859_4:
- return "ISO-8859-4";
- case Constants.CP_ISO_8859_5:
- return "ISO-8859-5";
- case Constants.CP_ISO_8859_6:
- return "ISO-8859-6";
- case Constants.CP_ISO_8859_7:
- return "ISO-8859-7";
- case Constants.CP_ISO_8859_8:
- return "ISO-8859-8";
- case Constants.CP_ISO_8859_9:
- return "ISO-8859-9";
- case Constants.CP_ISO_2022_JP1:
- case Constants.CP_ISO_2022_JP2:
- case Constants.CP_ISO_2022_JP3:
- return "ISO-2022-JP";
- case Constants.CP_ISO_2022_KR:
- return "ISO-2022-KR";
- case Constants.CP_EUC_JP:
- return "EUC-JP";
- case Constants.CP_EUC_KR:
- return "EUC-KR";
- case Constants.CP_GB2312:
- return "GB2312";
- case Constants.CP_GB18030:
- return "GB18030";
- case Constants.CP_SJIS:
- return "SJIS";
- default:
- return "cp" + codepage;
- }
- }
-
private byte[] _value;
CodePageString( final byte[] data, final int startOffset )
if ( codepage == -1 )
result = new String( _value );
else
- result = new String( _value, codepageToEncoding( codepage ) );
+ result = CodePageUtil.getStringFromCodePage(_value, codepage);
final int terminator = result.indexOf( '\0' );
if ( terminator == -1 )
{
void setJavaValue( String string, int codepage )
throws UnsupportedEncodingException
{
+ String stringNT = string + "\0";
if ( codepage == -1 )
- _value = ( string + "\0" ).getBytes();
+ _value = stringNT.getBytes();
else
- _value = ( string + "\0" )
- .getBytes( codepageToEncoding( codepage ) );
+ _value = CodePageUtil.getBytesInCodePage(stringNT, codepage);
}
int write( OutputStream out ) throws IOException
package org.apache.poi.hpsf;
+import org.apache.poi.util.CodePageUtil;
+
/**
- * <p>Defines constants of general use.</p>
- *
- * @author Rainer Klute <a
- * href="mailto:klute@rainer-klute.de"><klute@rainer-klute.de></a>
+ * <p>Defines constants of general use (currently only codepages).</p>
+ *
+ * @deprecated Use {@link CodePageUtil} to lookup code pages
*/
public class Constants
{
import org.apache.poi.hpsf.wellknown.PropertyIDMap;
import org.apache.poi.hpsf.wellknown.SectionIDMap;
+import org.apache.poi.util.CodePageUtil;
/**
* <p>Convenience class representing a DocumentSummary Information stream in a
if (cpCodepage < 0)
cpCodepage = section.getCodepage();
if (cpCodepage < 0)
- cpCodepage = Constants.CP_UNICODE;
+ cpCodepage = CodePageUtil.CP_UNICODE;
customProperties.setCodepage(cpCodepage);
section.setCodepage(cpCodepage);
section.setDictionary(dictionary);
import java.io.IOException;
import java.io.OutputStream;
+import org.apache.poi.util.CodePageUtil;
+
/**
* <p>Adds writing capability to the {@link Property} class.</p>
*
long variantType = getType();
/* Ensure that wide strings are written if the codepage is Unicode. */
- if (codepage == Constants.CP_UNICODE && variantType == Variant.VT_LPSTR)
+ if (codepage == CodePageUtil.CP_UNICODE && variantType == Variant.VT_LPSTR)
variantType = Variant.VT_LPWSTR;
length += TypeWriter.writeUIntToStream(out, variantType);
import java.util.Map;
import org.apache.poi.hpsf.wellknown.PropertyIDMap;
+import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LittleEndian;
/**
* dictionary is present. In order to cope with this problem we
* add the codepage property and set it to Unicode. */
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
- Integer.valueOf(Constants.CP_UNICODE));
+ Integer.valueOf(CodePageUtil.CP_UNICODE));
codepage = getCodepage();
}
final Long key = i.next();
final String value = dictionary.get(key);
- if (codepage == Constants.CP_UNICODE)
+ if (codepage == CodePageUtil.CP_UNICODE)
{
/* Write the dictionary item in Unicode. */
int sLength = value.length() + 1;
sLength++;
length += TypeWriter.writeUIntToStream(out, key.longValue());
length += TypeWriter.writeUIntToStream(out, sLength);
- final byte[] ca =
- value.getBytes(VariantSupport.codepageToEncoding(codepage));
+ final byte[] ca = CodePageUtil.getBytesInCodePage(value, codepage);
for (int j = 2; j < ca.length; j += 2)
{
out.write(ca[j+1]);
* Unicode. */
length += TypeWriter.writeUIntToStream(out, key.longValue());
length += TypeWriter.writeUIntToStream(out, value.length() + 1);
- final byte[] ba =
- value.getBytes(VariantSupport.codepageToEncoding(codepage));
+ final byte[] ba = CodePageUtil.getBytesInCodePage(value, codepage);
for (int j = 0; j < ba.length; j++)
{
out.write(ba[j]);
(Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
if (codepage == null)
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
- Integer.valueOf(Constants.CP_UNICODE));
+ Integer.valueOf(CodePageUtil.CP_UNICODE));
}
else
/* Setting the dictionary to null means to remove property 0.
import java.util.LinkedHashMap;
import java.util.Map;
+import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
b.append(new String(src, o, (int) sLength));
break;
}
- case Constants.CP_UNICODE:
+ case CodePageUtil.CP_UNICODE:
{
/* The length is the number of characters, i.e. the number
* of bytes is twice the number of the characters. */
h[i2 + 1] = src[o + i2];
}
b.append(new String(h, 0, nrBytes,
- VariantSupport.codepageToEncoding(codepage)));
+ CodePageUtil.codepageToEncoding(codepage)));
break;
}
default:
/* Strip 0x00 characters from the end of the string: */
while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
b.setLength(b.length() - 1);
- if (codepage == Constants.CP_UNICODE)
+ if (codepage == CodePageUtil.CP_UNICODE)
{
if (sLength % 2 == 1)
sLength++;
import java.util.LinkedList;
import java.util.List;
+import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
public static String codepageToEncoding(final int codepage)
throws UnsupportedEncodingException
{
- if (codepage <= 0)
- throw new UnsupportedEncodingException
- ("Codepage number may not be " + codepage);
- switch (codepage)
- {
- case Constants.CP_UTF16:
- return "UTF-16";
- case Constants.CP_UTF16_BE:
- return "UTF-16BE";
- case Constants.CP_UTF8:
- return "UTF-8";
- case Constants.CP_037:
- return "cp037";
- case Constants.CP_GBK:
- return "GBK";
- case Constants.CP_MS949:
- return "ms949";
- case Constants.CP_WINDOWS_1250:
- return "windows-1250";
- case Constants.CP_WINDOWS_1251:
- return "windows-1251";
- case Constants.CP_WINDOWS_1252:
- return "windows-1252";
- case Constants.CP_WINDOWS_1253:
- return "windows-1253";
- case Constants.CP_WINDOWS_1254:
- return "windows-1254";
- case Constants.CP_WINDOWS_1255:
- return "windows-1255";
- case Constants.CP_WINDOWS_1256:
- return "windows-1256";
- case Constants.CP_WINDOWS_1257:
- return "windows-1257";
- case Constants.CP_WINDOWS_1258:
- return "windows-1258";
- case Constants.CP_JOHAB:
- return "johab";
- case Constants.CP_MAC_ROMAN:
- return "MacRoman";
- case Constants.CP_MAC_JAPAN:
- return "SJIS";
- case Constants.CP_MAC_CHINESE_TRADITIONAL:
- return "Big5";
- case Constants.CP_MAC_KOREAN:
- return "EUC-KR";
- case Constants.CP_MAC_ARABIC:
- return "MacArabic";
- case Constants.CP_MAC_HEBREW:
- return "MacHebrew";
- case Constants.CP_MAC_GREEK:
- return "MacGreek";
- case Constants.CP_MAC_CYRILLIC:
- return "MacCyrillic";
- case Constants.CP_MAC_CHINESE_SIMPLE:
- return "EUC_CN";
- case Constants.CP_MAC_ROMANIA:
- return "MacRomania";
- case Constants.CP_MAC_UKRAINE:
- return "MacUkraine";
- case Constants.CP_MAC_THAI:
- return "MacThai";
- case Constants.CP_MAC_CENTRAL_EUROPE:
- return "MacCentralEurope";
- case Constants.CP_MAC_ICELAND:
- return "MacIceland";
- case Constants.CP_MAC_TURKISH:
- return "MacTurkish";
- case Constants.CP_MAC_CROATIAN:
- return "MacCroatian";
- case Constants.CP_US_ACSII:
- case Constants.CP_US_ASCII2:
- return "US-ASCII";
- case Constants.CP_KOI8_R:
- return "KOI8-R";
- case Constants.CP_ISO_8859_1:
- return "ISO-8859-1";
- case Constants.CP_ISO_8859_2:
- return "ISO-8859-2";
- case Constants.CP_ISO_8859_3:
- return "ISO-8859-3";
- case Constants.CP_ISO_8859_4:
- return "ISO-8859-4";
- case Constants.CP_ISO_8859_5:
- return "ISO-8859-5";
- case Constants.CP_ISO_8859_6:
- return "ISO-8859-6";
- case Constants.CP_ISO_8859_7:
- return "ISO-8859-7";
- case Constants.CP_ISO_8859_8:
- return "ISO-8859-8";
- case Constants.CP_ISO_8859_9:
- return "ISO-8859-9";
- case Constants.CP_ISO_2022_JP1:
- case Constants.CP_ISO_2022_JP2:
- case Constants.CP_ISO_2022_JP3:
- return "ISO-2022-JP";
- case Constants.CP_ISO_2022_KR:
- return "ISO-2022-KR";
- case Constants.CP_EUC_JP:
- return "EUC-JP";
- case Constants.CP_EUC_KR:
- return "EUC-KR";
- case Constants.CP_GB2312:
- return "GB2312";
- case Constants.CP_GB18030:
- return "GB18030";
- case Constants.CP_SJIS:
- return "SJIS";
- default:
- return "cp" + codepage;
- }
+ return CodePageUtil.codepageToEncoding(codepage);
}
import junit.framework.Assert;
import junit.framework.TestCase;
-import org.apache.poi.hpsf.Constants;
+import org.apache.poi.POIDataSamples;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.HPSFException;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.Section;
import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.POIDataSamples;
+import org.apache.poi.util.CodePageUtil;
/**
* <p>Tests whether Unicode string can be read from a
Assert.assertEquals(ps.getSectionCount(), 2);
Section s = (Section) ps.getSections().get(1);
Assert.assertEquals(s.getProperty(1),
- Integer.valueOf(Constants.CP_UTF16));
+ Integer.valueOf(CodePageUtil.CP_UTF16));
Assert.assertEquals(s.getProperty(2),
Integer.valueOf(-96070278));
Assert.assertEquals(s.getProperty(3),
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hpsf.ClassID;
-import org.apache.poi.hpsf.Constants;
import org.apache.poi.hpsf.HPSFRuntimeException;
import org.apache.poi.hpsf.IllegalPropertySetDataException;
import org.apache.poi.hpsf.MutableProperty;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.TempFile;
private static final int CODEPAGE_DEFAULT = -1;
private static final int CODEPAGE_1252 = 1252;
- private static final int CODEPAGE_UTF8 = Constants.CP_UTF8;
- private static final int CODEPAGE_UTF16 = Constants.CP_UTF16;
+ private static final int CODEPAGE_UTF8 = CodePageUtil.CP_UTF8;
+ private static final int CODEPAGE_UTF16 = CodePageUtil.CP_UTF16;
check(t, "\u00e4\u00f6\u00fc\u00c4\u00d6", cp);
check(t, "\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc", cp);
check(t, "\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df", cp);
- if (cp == Constants.CP_UTF16 || cp == Constants.CP_UTF8)
+ if (cp == CodePageUtil.CP_UTF16 || cp == CodePageUtil.CP_UTF8)
check(t, "\u79D1\u5B78", cp);
}
catch (Exception ex)
final POIFSFileSystem poiFs = new POIFSFileSystem();
final MutablePropertySet ps1 = new MutablePropertySet();
final MutableSection s = (MutableSection) ps1.getSections().get(0);
- final Map m = new HashMap(3, 1.0f);
+ final Map<Long,String> m = new HashMap<Long,String>(3, 1.0f);
m.put(Long.valueOf(1), "String 1");
m.put(Long.valueOf(2), "String 2");
m.put(Long.valueOf(3), "String 3");
s.setDictionary(m);
s.setFormatID(SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID[0]);
- int codepage = Constants.CP_UNICODE;
+ int codepage = CodePageUtil.CP_UNICODE;
s.setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
Integer.valueOf(codepage));
poiFs.createDocument(ps1.toInputStream(), "Test");
final POIFSFileSystem poiFs = new POIFSFileSystem();
final MutablePropertySet ps1 = new MutablePropertySet();
final MutableSection s = (MutableSection) ps1.getSections().get(0);
- final Map m = new HashMap(3, 1.0f);
+ final Map<Long,String> m = new HashMap<Long, String>(3, 1.0f);
m.put(Long.valueOf(1), "String 1");
m.put(Long.valueOf(2), "String 2");
m.put(Long.valueOf(3), "String 3");
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.Map;
-import java.util.Map.Entry;
import junit.framework.TestCase;
/* The document does not have custom properties. */
return;
- for (final Iterator i = cps.entrySet().iterator(); i.hasNext();)
- {
- final Map.Entry e = (Entry) i.next();
- final CustomProperty cp = (CustomProperty) e.getValue();
+ for (CustomProperty cp : cps.values()) {
cp.getName();
cp.getValue();
}
final int ID_2 = 3;
final String NAME_1 = "Schl\u00fcssel \u00e4";
final String VALUE_1 = "Wert 1";
- final Map dictionary = new HashMap();
+ final Map<Long,String> dictionary = new HashMap<Long, String>();
DocumentSummaryInformation dsi = PropertySetFactory.newDocumentSummaryInformation();
CustomProperties cps;
/* Test an empty custom properties set. */
s = new MutableSection();
s.setFormatID(SectionIDMap.DOCUMENT_SUMMARY_INFORMATION_ID[1]);
- // s.setCodepage(Constants.CP_UNICODE);
+ // s.setCodepage(CodePageUtil.CP_UNICODE);
dsi.addSection(s);
cps = dsi.getCustomProperties();
assertEquals(0, cps.size());