diff options
Diffstat (limited to 'src/java/org')
-rw-r--r-- | src/java/org/apache/poi/util/CodePageUtil.java | 25 | ||||
-rw-r--r-- | src/java/org/apache/poi/util/LittleEndianCP950Reader.java | 479 | ||||
-rw-r--r-- | src/java/org/apache/poi/util/StringUtil.java | 1 |
3 files changed, 0 insertions, 505 deletions
diff --git a/src/java/org/apache/poi/util/CodePageUtil.java b/src/java/org/apache/poi/util/CodePageUtil.java index da8f8a9842..2c1480253d 100644 --- a/src/java/org/apache/poi/util/CodePageUtil.java +++ b/src/java/org/apache/poi/util/CodePageUtil.java @@ -31,8 +31,6 @@ import java.util.Set; public class CodePageUtil { - public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(StringUtil.BIG5); - /** <p>Codepage 037, a special case</p> */ public static final int CP_037 = 37; @@ -446,27 +444,4 @@ public class CodePageUtil return "cp" + codepage; } } - - /** - * This tries to convert a LE byte array in cp950 - * (Microsoft's dialect of Big5) to a String. - * We know MS zero-padded ascii, and we drop those. - * There may be areas for improvement in this. - * - * @param data - * @param offset - * @param lengthInBytes - * @return Decoded String - */ - public static String cp950ToString(byte[] data, int offset, int lengthInBytes) { - StringBuilder sb = new StringBuilder(); - LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes); - int c = reader.read(); - while (c != -1) { - sb.append((char)c); - c = reader.read(); - } - reader.close(); - return sb.toString(); - } } diff --git a/src/java/org/apache/poi/util/LittleEndianCP950Reader.java b/src/java/org/apache/poi/util/LittleEndianCP950Reader.java deleted file mode 100644 index 61808afcaa..0000000000 --- a/src/java/org/apache/poi/util/LittleEndianCP950Reader.java +++ /dev/null @@ -1,479 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.util; -import java.io.IOException; -import java.io.Reader; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.CharsetDecoder; - -/** - * Stream that converts CP950 (MSOffice's dialect of Big5), with - * zero-byte padding for ASCII and in LittleEndianOrder. - */ -@Internal -public class LittleEndianCP950Reader extends Reader { - - private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class); - - - private static final char UNMAPPABLE = '?'; - private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2); - private final CharBuffer charBuffer = CharBuffer.allocate(2); - private final CharsetDecoder decoder = StringUtil.BIG5.newDecoder(); - - //https://en.wikipedia.org/wiki/Code_page_950 - //see private use area - private final static char range1Low = '\u8140'; - private final static char range1High = '\u8DFE'; - private final static char range2Low = '\u8E40'; - private final static char range2High = '\uA0FE'; - private final static char range3Low = '\uC6A1'; - private final static char range3High = '\uC8FE'; - private final static char range4Low = '\uFA40'; - private final static char range4High = '\uFEFE'; - - private final byte[] data; - private final int startOffset; - private final int length; - private int offset; - private int trailing; - private int leading; - int cnt; - //the char that is logically trailing in Big5 encoding - //however in LittleEndian order, this is the first encountered. - public LittleEndianCP950Reader(byte[] data) { - this(data, 0, data.length); - } - - public LittleEndianCP950Reader(byte[] data, int offset, int length) { - this.data = data; - this.startOffset = offset; - this.offset = startOffset; - this.length = length; - } - - @Override - public int read() { - if (offset + 1 > data.length || offset - startOffset > length) { - return -1; - } - trailing = data[offset++] & 0xff; - leading = data[offset++] & 0xff; - decoder.reset(); - if (leading < 0x81) { - //return trailing alone - //there may be some subtleties here - return trailing; - } else if (leading == 0xf9) { - return handleF9(trailing); - } else { - int ch = (leading << 8) + trailing; - if (ch >= range1Low && ch <= range1High) { - return handleRange1(leading, trailing); - } else if (ch >= range2Low && ch <= range2High) { - return handleRange2(leading, trailing); - } else if (ch >= range3Low && ch <= range3High) { - return handleRange3(leading, trailing); - } else if (ch >= range4Low && ch <= range4High) { - return handleRange4(leading, trailing); - } - - charBuffer.clear(); - doubleByteBuffer.clear(); - doubleByteBuffer.put((byte) leading); - doubleByteBuffer.put((byte) trailing); - doubleByteBuffer.flip(); - decoder.decode(doubleByteBuffer, charBuffer, true); - charBuffer.flip(); - - if (charBuffer.length() == 0) { - LOGGER.log(POILogger.WARN, "couldn't create char for: " - + Integer.toString((leading & 0xff), 16) - + " " + Integer.toString((trailing & 0xff), 16)); - return UNMAPPABLE; - } else { - return Character.codePointAt(charBuffer, 0); - } - } - - - } - - - @Override - public int read(char[] cbuf, int off, int len) throws IOException { - //there may be some efficiencies, but this should do for now. - - for (int i = off; i < off + len; i++) { - int c = read(); - if (c == -1) { - return i - off; - } - cbuf[i] = (char) c; - } - return len; - } - - @Override - public void close() { - } - - private int handleRange1(int leading, int trailing) { - return (0xeeb8 + (157 * (leading - 0x81))) + - ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62); - } - - private int handleRange2(int leading, int trailing) { - return (0xe311 + (157 * (leading - 0x8e))) + - ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62); - } - - private int handleRange3(int leading, int trailing) { - return (0xf672 + (157 * (leading - 0xc6))) + - ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62); - } - - private int handleRange4(int leading, int trailing) { - return (0xe000 + (157 * (leading - 0xfa))) + - ((trailing < 0x80) ? trailing - 0x40 : trailing - 0x62); - } - - private int handleF9(int trailing) { - switch (trailing) { - case 0x40: - return 0x7e98; - case 0x41: - return 0x7e9b; - case 0x42: - return 0x7e99; - case 0x43: - return 0x81e0; - case 0x44: - return 0x81e1; - case 0x45: - return 0x8646; - case 0x46: - return 0x8647; - case 0x47: - return 0x8648; - case 0x48: - return 0x8979; - case 0x49: - return 0x897a; - case 0x4a: - return 0x897c; - case 0x4b: - return 0x897b; - case 0x4c: - return 0x89ff; - case 0x4d: - return 0x8b98; - case 0x4e: - return 0x8b99; - case 0x4f: - return 0x8ea5; - case 0x50: - return 0x8ea4; - case 0x51: - return 0x8ea3; - case 0x52: - return 0x946e; - case 0x53: - return 0x946d; - case 0x54: - return 0x946f; - case 0x55: - return 0x9471; - case 0x56: - return 0x9473; - case 0x57: - return 0x9749; - case 0x58: - return 0x9872; - case 0x59: - return 0x995f; - case 0x5a: - return 0x9c68; - case 0x5b: - return 0x9c6e; - case 0x5c: - return 0x9c6d; - case 0x5d: - return 0x9e0b; - case 0x5e: - return 0x9e0d; - case 0x5f: - return 0x9e10; - case 0x60: - return 0x9e0f; - case 0x61: - return 0x9e12; - case 0x62: - return 0x9e11; - case 0x63: - return 0x9ea1; - case 0x64: - return 0x9ef5; - case 0x65: - return 0x9f09; - case 0x66: - return 0x9f47; - case 0x67: - return 0x9f78; - case 0x68: - return 0x9f7b; - case 0x69: - return 0x9f7a; - case 0x6a: - return 0x9f79; - case 0x6b: - return 0x571e; - case 0x6c: - return 0x7066; - case 0x6d: - return 0x7c6f; - case 0x6e: - return 0x883c; - case 0x6f: - return 0x8db2; - case 0x70: - return 0x8ea6; - case 0x71: - return 0x91c3; - case 0x72: - return 0x9474; - case 0x73: - return 0x9478; - case 0x74: - return 0x9476; - case 0x75: - return 0x9475; - case 0x76: - return 0x9a60; - case 0x77: - return 0x9c74; - case 0x78: - return 0x9c73; - case 0x79: - return 0x9c71; - case 0x7a: - return 0x9c75; - case 0x7b: - return 0x9e14; - case 0x7c: - return 0x9e13; - case 0x7d: - return 0x9ef6; - case 0x7e: - return 0x9f0a; - case 0xa1: - return 0x9fa4; - case 0xa2: - return 0x7068; - case 0xa3: - return 0x7065; - case 0xa4: - return 0x7cf7; - case 0xa5: - return 0x866a; - case 0xa6: - return 0x883e; - case 0xa7: - return 0x883d; - case 0xa8: - return 0x883f; - case 0xa9: - return 0x8b9e; - case 0xaa: - return 0x8c9c; - case 0xab: - return 0x8ea9; - case 0xac: - return 0x8ec9; - case 0xad: - return 0x974b; - case 0xae: - return 0x9873; - case 0xaf: - return 0x9874; - case 0xb0: - return 0x98cc; - case 0xb1: - return 0x9961; - case 0xb2: - return 0x99ab; - case 0xb3: - return 0x9a64; - case 0xb4: - return 0x9a66; - case 0xb5: - return 0x9a67; - case 0xb6: - return 0x9b24; - case 0xb7: - return 0x9e15; - case 0xb8: - return 0x9e17; - case 0xb9: - return 0x9f48; - case 0xba: - return 0x6207; - case 0xbb: - return 0x6b1e; - case 0xbc: - return 0x7227; - case 0xbd: - return 0x864c; - case 0xbe: - return 0x8ea8; - case 0xbf: - return 0x9482; - case 0xc0: - return 0x9480; - case 0xc1: - return 0x9481; - case 0xc2: - return 0x9a69; - case 0xc3: - return 0x9a68; - case 0xc4: - return 0x9b2e; - case 0xc5: - return 0x9e19; - case 0xc6: - return 0x7229; - case 0xc7: - return 0x864b; - case 0xc8: - return 0x8b9f; - case 0xc9: - return 0x9483; - case 0xca: - return 0x9c79; - case 0xcb: - return 0x9eb7; - case 0xcc: - return 0x7675; - case 0xcd: - return 0x9a6b; - case 0xce: - return 0x9c7a; - case 0xcf: - return 0x9e1d; - case 0xd0: - return 0x7069; - case 0xd1: - return 0x706a; - case 0xd2: - return 0x9ea4; - case 0xd3: - return 0x9f7e; - case 0xd4: - return 0x9f49; - case 0xd5: - return 0x9f98; - case 0xd6: - return 0x7881; - case 0xd7: - return 0x92b9; - case 0xd8: - return 0x88cf; - case 0xd9: - return 0x58bb; - case 0xda: - return 0x6052; - case 0xdb: - return 0x7ca7; - case 0xdc: - return 0x5afa; - case 0xdd: - return 0x2554; - case 0xde: - return 0x2566; - case 0xdf: - return 0x2557; - case 0xe0: - return 0x2560; - case 0xe1: - return 0x256c; - case 0xe2: - return 0x2563; - case 0xe3: - return 0x255a; - case 0xe4: - return 0x2569; - case 0xe5: - return 0x255d; - case 0xe6: - return 0x2552; - case 0xe7: - return 0x2564; - case 0xe8: - return 0x2555; - case 0xe9: - return 0x255e; - case 0xea: - return 0x256a; - case 0xeb: - return 0x2561; - case 0xec: - return 0x2558; - case 0xed: - return 0x2567; - case 0xee: - return 0x255b; - case 0xef: - return 0x2553; - case 0xf0: - return 0x2565; - case 0xf1: - return 0x2556; - case 0xf2: - return 0x255f; - case 0xf3: - return 0x256b; - case 0xf4: - return 0x2562; - case 0xf5: - return 0x2559; - case 0xf6: - return 0x2568; - case 0xf7: - return 0x255c; - case 0xf8: - return 0x2551; - case 0xf9: - return 0x2550; - case 0xfa: - return 0x256d; - case 0xfb: - return 0x256e; - case 0xfc: - return 0x2570; - case 0xfd: - return 0x256f; - case 0xfe: - return 0x2593; - default: - LOGGER.log(POILogger.WARN, "couldn't create char for: f9" - + " " + Integer.toString((trailing & 0xff), 16)); - return UNMAPPABLE; - } - } -} diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index a0778a3efa..d281c63386 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -34,7 +34,6 @@ public final class StringUtil { public static final Charset UTF16LE = StandardCharsets.UTF_16LE; public static final Charset UTF8 = StandardCharsets.UTF_8; public static final Charset WIN_1252 = Charset.forName("cp1252"); - public static final Charset BIG5 = Charset.forName("Big5"); private StringUtil() { // no instances of this class |