public class CodePageUtil
{
- public static final Set<Charset> VARIABLE_BYTE_CHARSETS = new HashSet<Charset>();
+ public static final Set<Charset> DOUBLE_BYTE_CHARSETS
+ = new HashSet<Charset>();
+
static {
+ DOUBLE_BYTE_CHARSETS.add(StringUtil.BIG5);
//others?
- VARIABLE_BYTE_CHARSETS.add(StringUtil.BIG5);
}
/** <p>Codepage 037, a special case</p> */
return "cp" + codepage;
}
}
+
+ /**
+ * This tries to convert a LE byte array in cp950
+ * (Microsoft's dialect of Big5) to a String.
+ * We know MS zero-padded ascii, and we drop those.
+ * There may be areas for improvement in this.
+ *
+ * @param data
+ * @param offset
+ * @param lengthInBytes
+ * @return
+ */
+ public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
+ StringBuilder sb = new StringBuilder();
+ LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
+ int c = reader.read();
+ while (c != -1) {
+ sb.append((char)c);
+ c = reader.read();
+ }
+ return sb.toString();
+ }
}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.util;
-
-import java.io.ByteArrayInputStream;
-
-/**
- * Stream that converts MSOffice's way of storing Big5, with
- * zero-byte padding for ASCII and in LittleEndianOrder.
- */
-@Internal
-public class LittleEndianBig5Stream extends ByteArrayInputStream {
- private static final int EOF = -1;
- private static final int INVALID_PAIR = -2;
- private static final int EMPTY_TRAILING = -3;
-
- //the char that is logically trailing in Big5 encoding
- //however in LittleEndian order, this is the first encountered.
- int trailing = EMPTY_TRAILING;
- public LittleEndianBig5Stream(byte[] buf) {
- super(buf);
- }
-
- public LittleEndianBig5Stream(byte[] buf, int offset, int length) {
- super(buf, offset, length);
- }
-
- @Override
- public int read() {
-
- if (trailing != EMPTY_TRAILING) {
- int tmp = trailing;
- trailing = EMPTY_TRAILING;
- return tmp;
- }
- int leading = readNext();
- while (leading == INVALID_PAIR) {
- leading = readNext();
- }
-
- if (leading == EOF) {
- return EOF;
- }
- return leading;
- }
-
- //returns leading, sets trailing appropriately
- //returns -1 if it hits the end of the stream
- //returns -2 for an invalid big5 code pair
- private final int readNext() {
- trailing = super.read();
- if (trailing == -1) {
- return EOF;
- }
- int leading = super.read();
- if (leading == EOF) {
- return EOF;
- }
- int lead = leading&0xff;
- if (lead > 0x80) {
- return leading;
- } else if (lead == 0) {
- int ret = trailing;
- trailing = EMPTY_TRAILING;
- return ret;
- } else {
- int ret = trailing;
- trailing = EMPTY_TRAILING;
- return ret;
- //return INVALID_PAIR;
- }
-
- }
-
- @Override
- public int read(byte[] buff, int off, int len) {
- int bytesRead = 0;
- for (int i = off; i < off+len; i++) {
- int b = read();
- if (b == -1) {
- if (bytesRead == 0) {
- return -1;
- } else {
- return bytesRead;
- }
- }
- bytesRead++;
- buff[i] = (byte)b;
- }
- return bytesRead;
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.util;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+
+/**
+ * Stream that converts CP950 (MSOffice's dialect of Big5), with
+ * zero-byte padding for ASCII and in LittleEndianOrder.
+ */
+@Internal
+public class LittleEndianCP950Reader extends Reader {
+
+ private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class);
+
+
+ private static final char UNMAPPABLE = (char) '?';
+ private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2);
+ private final CharBuffer charBuffer = CharBuffer.allocate(2);
+ private final CharsetDecoder decoder = StringUtil.BIG5.newDecoder();
+
+ //https://en.wikipedia.org/wiki/Code_page_950
+ //see private use area
+ private final static char range1Low = '\u8140';
+ private final static char range1High = '\u8DFE';
+ private final static char range2Low = '\u8E40';
+ private final static char range2High = '\uA0FE';
+ private final static char range3Low = '\uC6A1';
+ private final static char range3High = '\uC8FE';
+ private final static char range4Low = '\uFA40';
+ private final static char range4High = '\uFEFE';
+
+ private final byte[] data;
+ private final int startOffset;
+ private final int length;
+ private int offset;
+ private int trailing;
+ private int leading;
+ int cnt = 0;
+ //the char that is logically trailing in Big5 encoding
+ //however in LittleEndian order, this is the first encountered.
+ public LittleEndianCP950Reader(byte[] data) {
+ this(data, 0, data.length);
+ }
+
+ public LittleEndianCP950Reader(byte[] data, int offset, int length) {
+ this.data = data;
+ this.startOffset = offset;
+ this.offset = startOffset;
+ this.length = length;
+ }
+
+ @Override
+ public int read() {
+ if (offset + 1 > data.length || offset - startOffset > length) {
+ return -1;
+ }
+ trailing = data[offset++] & 0xff;
+ leading = data[offset++] & 0xff;
+ decoder.reset();
+ if (leading < 0x81) {
+ //return trailing alone
+ //there may be some subtleties here
+ return trailing;
+ } else if (leading == 0xf9) {
+ return handleF9(trailing);
+ } else {
+ int ch = (leading << 8) + trailing;
+ if (ch >= range1Low && ch <= range1High) {
+ return handleRange1(leading, trailing);
+ } else if (ch >= range2Low && ch <= range2High) {
+ return handleRange2(leading, trailing);
+ } else if (ch >= range3Low && ch <= range3High) {
+ return handleRange3(leading, trailing);
+ } else if (ch >= range4Low && ch <= range4High) {
+ return handleRange4(leading, trailing);
+ }
+
+ charBuffer.clear();
+ doubleByteBuffer.clear();
+ doubleByteBuffer.put((byte) leading);
+ doubleByteBuffer.put((byte) trailing);
+ doubleByteBuffer.flip();
+ decoder.decode(doubleByteBuffer, charBuffer, true);
+ charBuffer.flip();
+
+ if (charBuffer.length() == 0) {
+ LOGGER.log(POILogger.WARN, "couldn't create char for: "
+ + Integer.toString((leading & 0xff), 16)
+ + " " + Integer.toString((trailing & 0xff), 16));
+ return UNMAPPABLE;
+ } else {
+ return Character.codePointAt(charBuffer, 0);
+ }
+ }
+
+
+ }
+
+
+ @Override
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ //there may be some efficiencies, but this should do for now.
+
+ for (int i = off; i < off + len; i++) {
+ int c = read();
+ if (c == -1) {
+ return i - off;
+ }
+ cbuf[i] = (char) c;
+ }
+ return len;
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+
+ private int handleRange1(int leading, int trailing) {
+ return (0xeeb8 + (157 * (leading - 0x81))) +
+ ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62);
+ }
+
+ private int handleRange2(int leading, int trailing) {
+ return (0xe311 + (157 * (leading - 0x8e))) +
+ ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62);
+ }
+
+ private int handleRange3(int leading, int trailing) {
+ return (0xf672 + (157 * (leading - 0xc6))) +
+ ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62);
+ }
+
+ private int handleRange4(int leading, int trailing) {
+ return (0xe000 + (157 * (leading - 0xfa))) +
+ ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62);
+ }
+
+ private int handleF9(int trailing) {
+ switch (trailing) {
+ case 0x40:
+ return 0x7e98;
+ case 0x41:
+ return 0x7e9b;
+ case 0x42:
+ return 0x7e99;
+ case 0x43:
+ return 0x81e0;
+ case 0x44:
+ return 0x81e1;
+ case 0x45:
+ return 0x8646;
+ case 0x46:
+ return 0x8647;
+ case 0x47:
+ return 0x8648;
+ case 0x48:
+ return 0x8979;
+ case 0x49:
+ return 0x897a;
+ case 0x4a:
+ return 0x897c;
+ case 0x4b:
+ return 0x897b;
+ case 0x4c:
+ return 0x89ff;
+ case 0x4d:
+ return 0x8b98;
+ case 0x4e:
+ return 0x8b99;
+ case 0x4f:
+ return 0x8ea5;
+ case 0x50:
+ return 0x8ea4;
+ case 0x51:
+ return 0x8ea3;
+ case 0x52:
+ return 0x946e;
+ case 0x53:
+ return 0x946d;
+ case 0x54:
+ return 0x946f;
+ case 0x55:
+ return 0x9471;
+ case 0x56:
+ return 0x9473;
+ case 0x57:
+ return 0x9749;
+ case 0x58:
+ return 0x9872;
+ case 0x59:
+ return 0x995f;
+ case 0x5a:
+ return 0x9c68;
+ case 0x5b:
+ return 0x9c6e;
+ case 0x5c:
+ return 0x9c6d;
+ case 0x5d:
+ return 0x9e0b;
+ case 0x5e:
+ return 0x9e0d;
+ case 0x5f:
+ return 0x9e10;
+ case 0x60:
+ return 0x9e0f;
+ case 0x61:
+ return 0x9e12;
+ case 0x62:
+ return 0x9e11;
+ case 0x63:
+ return 0x9ea1;
+ case 0x64:
+ return 0x9ef5;
+ case 0x65:
+ return 0x9f09;
+ case 0x66:
+ return 0x9f47;
+ case 0x67:
+ return 0x9f78;
+ case 0x68:
+ return 0x9f7b;
+ case 0x69:
+ return 0x9f7a;
+ case 0x6a:
+ return 0x9f79;
+ case 0x6b:
+ return 0x571e;
+ case 0x6c:
+ return 0x7066;
+ case 0x6d:
+ return 0x7c6f;
+ case 0x6e:
+ return 0x883c;
+ case 0x6f:
+ return 0x8db2;
+ case 0x70:
+ return 0x8ea6;
+ case 0x71:
+ return 0x91c3;
+ case 0x72:
+ return 0x9474;
+ case 0x73:
+ return 0x9478;
+ case 0x74:
+ return 0x9476;
+ case 0x75:
+ return 0x9475;
+ case 0x76:
+ return 0x9a60;
+ case 0x77:
+ return 0x9c74;
+ case 0x78:
+ return 0x9c73;
+ case 0x79:
+ return 0x9c71;
+ case 0x7a:
+ return 0x9c75;
+ case 0x7b:
+ return 0x9e14;
+ case 0x7c:
+ return 0x9e13;
+ case 0x7d:
+ return 0x9ef6;
+ case 0x7e:
+ return 0x9f0a;
+ case 0xa1:
+ return 0x9fa4;
+ case 0xa2:
+ return 0x7068;
+ case 0xa3:
+ return 0x7065;
+ case 0xa4:
+ return 0x7cf7;
+ case 0xa5:
+ return 0x866a;
+ case 0xa6:
+ return 0x883e;
+ case 0xa7:
+ return 0x883d;
+ case 0xa8:
+ return 0x883f;
+ case 0xa9:
+ return 0x8b9e;
+ case 0xaa:
+ return 0x8c9c;
+ case 0xab:
+ return 0x8ea9;
+ case 0xac:
+ return 0x8ec9;
+ case 0xad:
+ return 0x974b;
+ case 0xae:
+ return 0x9873;
+ case 0xaf:
+ return 0x9874;
+ case 0xb0:
+ return 0x98cc;
+ case 0xb1:
+ return 0x9961;
+ case 0xb2:
+ return 0x99ab;
+ case 0xb3:
+ return 0x9a64;
+ case 0xb4:
+ return 0x9a66;
+ case 0xb5:
+ return 0x9a67;
+ case 0xb6:
+ return 0x9b24;
+ case 0xb7:
+ return 0x9e15;
+ case 0xb8:
+ return 0x9e17;
+ case 0xb9:
+ return 0x9f48;
+ case 0xba:
+ return 0x6207;
+ case 0xbb:
+ return 0x6b1e;
+ case 0xbc:
+ return 0x7227;
+ case 0xbd:
+ return 0x864c;
+ case 0xbe:
+ return 0x8ea8;
+ case 0xbf:
+ return 0x9482;
+ case 0xc0:
+ return 0x9480;
+ case 0xc1:
+ return 0x9481;
+ case 0xc2:
+ return 0x9a69;
+ case 0xc3:
+ return 0x9a68;
+ case 0xc4:
+ return 0x9b2e;
+ case 0xc5:
+ return 0x9e19;
+ case 0xc6:
+ return 0x7229;
+ case 0xc7:
+ return 0x864b;
+ case 0xc8:
+ return 0x8b9f;
+ case 0xc9:
+ return 0x9483;
+ case 0xca:
+ return 0x9c79;
+ case 0xcb:
+ return 0x9eb7;
+ case 0xcc:
+ return 0x7675;
+ case 0xcd:
+ return 0x9a6b;
+ case 0xce:
+ return 0x9c7a;
+ case 0xcf:
+ return 0x9e1d;
+ case 0xd0:
+ return 0x7069;
+ case 0xd1:
+ return 0x706a;
+ case 0xd2:
+ return 0x9ea4;
+ case 0xd3:
+ return 0x9f7e;
+ case 0xd4:
+ return 0x9f49;
+ case 0xd5:
+ return 0x9f98;
+ case 0xd6:
+ return 0x7881;
+ case 0xd7:
+ return 0x92b9;
+ case 0xd8:
+ return 0x88cf;
+ case 0xd9:
+ return 0x58bb;
+ case 0xda:
+ return 0x6052;
+ case 0xdb:
+ return 0x7ca7;
+ case 0xdc:
+ return 0x5afa;
+ case 0xdd:
+ return 0x2554;
+ case 0xde:
+ return 0x2566;
+ case 0xdf:
+ return 0x2557;
+ case 0xe0:
+ return 0x2560;
+ case 0xe1:
+ return 0x256c;
+ case 0xe2:
+ return 0x2563;
+ case 0xe3:
+ return 0x255a;
+ case 0xe4:
+ return 0x2569;
+ case 0xe5:
+ return 0x255d;
+ case 0xe6:
+ return 0x2552;
+ case 0xe7:
+ return 0x2564;
+ case 0xe8:
+ return 0x2555;
+ case 0xe9:
+ return 0x255e;
+ case 0xea:
+ return 0x256a;
+ case 0xeb:
+ return 0x2561;
+ case 0xec:
+ return 0x2558;
+ case 0xed:
+ return 0x2567;
+ case 0xee:
+ return 0x255b;
+ case 0xef:
+ return 0x2553;
+ case 0xf0:
+ return 0x2565;
+ case 0xf1:
+ return 0x2556;
+ case 0xf2:
+ return 0x255f;
+ case 0xf3:
+ return 0x256b;
+ case 0xf4:
+ return 0x2562;
+ case 0xf5:
+ return 0x2559;
+ case 0xf6:
+ return 0x2568;
+ case 0xf7:
+ return 0x255c;
+ case 0xf8:
+ return 0x2551;
+ case 0xf9:
+ return 0x2550;
+ case 0xfa:
+ return 0x256d;
+ case 0xfb:
+ return 0x256e;
+ case 0xfc:
+ return 0x2570;
+ case 0xfd:
+ return 0x256f;
+ case 0xfe:
+ return 0x2593;
+ default:
+ LOGGER.log(POILogger.WARN, "couldn't create char for: f9"
+ + " " + Integer.toString((trailing & 0xff), 16));
+ return UNMAPPABLE;
+ }
+ }
+}
package org.apache.poi.util;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
' ', // 0xf0ff not defined
};
- /**
- * This tries to convert a LE byte array in Big5 to a String.
- * We know MS zero-padded ascii, and we drop those.
- * However, there may be areas for improvement in this.
- *
- * @param data
- * @param offset
- * @param lengthInBytes
- * @return
- */
- public static String littleEndianBig5Stream(byte[] data, int offset, int lengthInBytes) {
- ByteArrayOutputStream os = new ByteArrayOutputStream();
- try {
- IOUtils.copy(new LittleEndianBig5Stream(data, offset, lengthInBytes), os);
- } catch (IOException e) {
- logger.log(POILogger.WARN,
- "IOException while copying a byte array stream to a byte array stream?!");
- }
- return new String(os.toByteArray(), BIG5);
- }
// Could be replaced with org.apache.commons.lang3.StringUtils#join
@Internal
System.arraycopy(_mainStream, _fib.getFibBase().getFcMin(), textData, 0, textData.length);
int numChars = textData.length;
- if (CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(guessedCharset)) {
+ if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
numChars /= 2;
}
package org.apache.poi.hwpf.model;
-import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.util.NotImplemented;
this.rawBytes = text;
}
- @Override
- protected void validateLengths(int start, int end, int length, PieceDescriptor pd) {
- //things are still wonky with Big5 char/byte length mapping
- //sometimes working w/ Java 8 but not w/ Java 7!
- //for now, if we're dealing w/ Big5 don't bother checking
- if (pd.getCharset() != null &&
- CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(pd.getCharset())) {
- return;
- }
- super.validateLengths(start, end, length, pd);
- }
/**
* @return nothing, ever. Always throws an UnsupportedOperationException
* @throws UnsupportedOperationException
boolean unicode = pieces[x].isUnicode();
int multiple = 1;
if (unicode ||
- (charset != null && CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(charset))) {
+ (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
multiple = 2;
}
@Override
protected int getEncodingMultiplier(TextPiece textPiece) {
Charset charset = textPiece.getPieceDescriptor().getCharset();
- if (charset != null && CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(charset)) {
+ if (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
return 2;
}
return 1;
import java.nio.charset.Charset;
+import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.util.StringUtil;
// Validate
int textLength = ((CharSequence) _buf).length();
- validateLengths(start, end, textLength, pd);
+ if (end - start != textLength) {
+ throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
+ }
if (end < start) {
throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end);
}
}
- protected void validateLengths(int start, int end, int textLength, PieceDescriptor pd) {
- if (end - start != textLength) {
- throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
- }
- }
/**
* Create the StringBuilder from the text and unicode flag
*/
private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
byte[] textBuffer = text;
if (StringUtil.BIG5.equals(pd.getCharset())) {
- String txt = new StringBuilder(StringUtil.littleEndianBig5Stream(text, 0, text.length)).toString();
- return new StringBuilder(txt);
+ return new StringBuilder(CodePageUtil.cp950ToString(text, 0, text.length));
}
String str = new String(textBuffer, 0, textBuffer.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset());
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
-import org.junit.Ignore;
import org.junit.Test;
/**
* Bug 51944 - PAPFormattedDiskPage.getPAPX - IndexOutOfBounds
*/
@Test
- @Ignore("Test now passes in Java 1.7 and 1.8, but not 1.6")
public void testBug51944() throws Exception
{
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug51944.doc");
*/
assertContains(txt, "\n9-55 xxxxx block5");
//TODO: figure out why these two aren't passing
-// assertContains(txt, "\u2019\u0078 block2");//make sure smart quote is extracted correctly
-// assertContains(txt, "We are able to");//not sure if we can get this easily?
+ //assertContains(txt, "\u2019\u0078 block2");//make sure smart quote is extracted correctly
+ //assertContains(txt, "We are able to");//not sure if we can get this easily?
}
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.util;
+
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.junit.Test;
+
+public class TestLittleEndianCP950Reader {
+
+ @Test
+ public void testPersonalUseMappings() throws Exception {
+ //ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit950.txt
+ byte[] data = new byte[2];
+ data[1] = (byte) 0xfe;
+ data[0] = (byte) 0xd3;
+ assertCharEquals('\uE2E5', data);
+
+ data[1] = (byte) 0x90;
+ data[0] = (byte) 0xb6;
+ assertCharEquals('\uE49F', data);
+
+ //actually found in document
+ //but this disagrees with file above
+ data[1] = (byte) 0x8E;
+ data[0] = (byte) 0xA8;
+ assertCharEquals('\uE357', data);
+
+ data[1] = (byte) 0x8E;
+ data[0] = (byte) 0xE6;
+ assertCharEquals('\uE395', data);
+
+ /*
+ //TODO: figure out why this isn't working
+ data[0] = (byte)0xF9;
+ data[1] = (byte)0xD8;
+ assertCharEquals('\u88CF', data);
+ */
+
+ }
+
+ @Test
+ public void one() {
+ byte b = (byte) 0xfe;
+ byte c = (byte) 0xd3;
+
+ int i = ((b & 0xff) << 8) + (c & 0xff);
+ System.out.println(i);
+ }
+
+ private void assertCharEquals(char expected, byte[] data) throws IOException {
+ Reader reader = new LittleEndianCP950Reader(data);
+ int c = reader.read();
+ assertEquals((int) expected, c);
+ int eof = reader.read();
+ assertEquals("should be end of stream", -1, eof);
+ }
+}