aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/java/org/apache/poi/util/CodePageUtil.java25
-rw-r--r--src/java/org/apache/poi/util/StringUtil.java1
-rw-r--r--src/multimodule/scratchpad/test9/module-info.classbin2652 -> 2690 bytes
-rw-r--r--src/multimodule/scratchpad/test9/module-info.java1
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java4
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java59
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/util/LittleEndianCP950Reader.java (renamed from src/java/org/apache/poi/util/LittleEndianCP950Reader.java)10
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/util/TestLittleEndianCP950Reader.java (renamed from src/testcases/org/apache/poi/util/TestLittleEndianCP950Reader.java)3
10 files changed, 76 insertions, 39 deletions
diff --git a/src/java/org/apache/poi/util/CodePageUtil.java b/src/java/org/apache/poi/util/CodePageUtil.java
index da8f8a9842..2c1480253d 100644
--- a/src/java/org/apache/poi/util/CodePageUtil.java
+++ b/src/java/org/apache/poi/util/CodePageUtil.java
@@ -31,8 +31,6 @@ import java.util.Set;
public class CodePageUtil
{
- public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(StringUtil.BIG5);
-
/** <p>Codepage 037, a special case</p> */
public static final int CP_037 = 37;
@@ -446,27 +444,4 @@ public class CodePageUtil
return "cp" + codepage;
}
}
-
- /**
- * This tries to convert a LE byte array in cp950
- * (Microsoft's dialect of Big5) to a String.
- * We know MS zero-padded ascii, and we drop those.
- * There may be areas for improvement in this.
- *
- * @param data
- * @param offset
- * @param lengthInBytes
- * @return Decoded String
- */
- public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
- StringBuilder sb = new StringBuilder();
- LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
- int c = reader.read();
- while (c != -1) {
- sb.append((char)c);
- c = reader.read();
- }
- reader.close();
- return sb.toString();
- }
}
diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java
index a0778a3efa..d281c63386 100644
--- a/src/java/org/apache/poi/util/StringUtil.java
+++ b/src/java/org/apache/poi/util/StringUtil.java
@@ -34,7 +34,6 @@ public final class StringUtil {
public static final Charset UTF16LE = StandardCharsets.UTF_16LE;
public static final Charset UTF8 = StandardCharsets.UTF_8;
public static final Charset WIN_1252 = Charset.forName("cp1252");
- public static final Charset BIG5 = Charset.forName("Big5");
private StringUtil() {
// no instances of this class
diff --git a/src/multimodule/scratchpad/test9/module-info.class b/src/multimodule/scratchpad/test9/module-info.class
index cfda7bb495..e74671e4fb 100644
--- a/src/multimodule/scratchpad/test9/module-info.class
+++ b/src/multimodule/scratchpad/test9/module-info.class
Binary files differ
diff --git a/src/multimodule/scratchpad/test9/module-info.java b/src/multimodule/scratchpad/test9/module-info.java
index 9d406fd36e..2aaef57a0a 100644
--- a/src/multimodule/scratchpad/test9/module-info.java
+++ b/src/multimodule/scratchpad/test9/module-info.java
@@ -82,6 +82,7 @@ module org.apache.poi.scratchpad {
exports org.apache.poi.hemf.hemfplus.extractor to junit;
exports org.apache.poi.hslf to junit;
exports org.apache.poi.hwmf to junit;
+ exports org.apache.poi.hwpf.util to junit;
opens org.apache.poi.hwpf.model to org.mockito;
opens org.apache.poi.hwpf.model.types to org.mockito;
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
index 238e0406f8..a8fb4078a7 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
@@ -37,7 +37,7 @@ import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.CodePageUtil;
+import org.apache.poi.hwpf.util.DoubleByteUtil;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.NotImplemented;
@@ -176,7 +176,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin(), MAX_RECORD_LENGTH);
int numChars = textData.length;
- if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
+ if (DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
numChars /= 2;
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java
index e3cb94c868..e6a0887f32 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java
@@ -20,7 +20,7 @@ import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
-import org.apache.poi.util.CodePageUtil;
+import org.apache.poi.hwpf.util.DoubleByteUtil;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
@@ -73,7 +73,7 @@ public class OldTextPieceTable extends TextPieceTable {
boolean unicode = pieces[x].isUnicode();
int multiple = 1;
if (unicode ||
- (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
+ (charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
multiple = 2;
}
@@ -106,7 +106,7 @@ public class OldTextPieceTable extends TextPieceTable {
@Override
protected int getEncodingMultiplier(TextPiece textPiece) {
Charset charset = textPiece.getPieceDescriptor().getCharset();
- if (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
+ if (charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
return 2;
}
return 1;
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
index 5c9fcf70d9..0c606cbf03 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
@@ -20,7 +20,7 @@ package org.apache.poi.hwpf.model;
import java.nio.charset.Charset;
-import org.apache.poi.util.CodePageUtil;
+import org.apache.poi.hwpf.util.DoubleByteUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.util.StringUtil;
@@ -77,8 +77,8 @@ public class TextPiece extends PropertyNode<TextPiece> {
* Create the StringBuilder from the text and unicode flag
*/
private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
- if (StringUtil.BIG5.equals(pd.getCharset())) {
- return new StringBuilder(CodePageUtil.cp950ToString(text, 0, text.length));
+ if (DoubleByteUtil.BIG5.equals(pd.getCharset())) {
+ return new StringBuilder(DoubleByteUtil.cp950ToString(text, 0, text.length));
}
String str = new String(text, 0, text.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset());
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java b/src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java
new file mode 100644
index 0000000000..5d55711ed9
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java
@@ -0,0 +1,59 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.util;
+
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * Utilities for working with double byte CodePages.
+ *
+ * <p>Provides constants for understanding numeric codepages,
+ * along with utilities to translate these into Java Character Sets.</p>
+ */
+public class DoubleByteUtil
+{
+
+ public static final Charset BIG5 = Charset.forName("Big5");
+
+ public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(BIG5);
+
+ /**
+ * This tries to convert a LE byte array in cp950
+ * (Microsoft's dialect of Big5) to a String.
+ * We know MS zero-padded ascii, and we drop those.
+ * There may be areas for improvement in this.
+ *
+ * @param data
+ * @param offset
+ * @param lengthInBytes
+ * @return Decoded String
+ */
+ public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
+ StringBuilder sb = new StringBuilder();
+ LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
+ int c = reader.read();
+ while (c != -1) {
+ sb.append((char)c);
+ c = reader.read();
+ }
+ reader.close();
+ return sb.toString();
+ }
+}
diff --git a/src/java/org/apache/poi/util/LittleEndianCP950Reader.java b/src/scratchpad/src/org/apache/poi/hwpf/util/LittleEndianCP950Reader.java
index 61808afcaa..195629bb04 100644
--- a/src/java/org/apache/poi/util/LittleEndianCP950Reader.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/util/LittleEndianCP950Reader.java
@@ -15,13 +15,18 @@
limitations under the License.
==================================================================== */
-package org.apache.poi.util;
+package org.apache.poi.hwpf.util;
+
import java.io.IOException;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
/**
* Stream that converts CP950 (MSOffice's dialect of Big5), with
* zero-byte padding for ASCII and in LittleEndianOrder.
@@ -31,11 +36,10 @@ public class LittleEndianCP950Reader extends Reader {
private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class);
-
private static final char UNMAPPABLE = '?';
private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2);
private final CharBuffer charBuffer = CharBuffer.allocate(2);
- private final CharsetDecoder decoder = StringUtil.BIG5.newDecoder();
+ private final CharsetDecoder decoder = DoubleByteUtil.BIG5.newDecoder();
//https://en.wikipedia.org/wiki/Code_page_950
//see private use area
diff --git a/src/testcases/org/apache/poi/util/TestLittleEndianCP950Reader.java b/src/scratchpad/testcases/org/apache/poi/hwpf/util/TestLittleEndianCP950Reader.java
index ef648e4f00..b6c7fd201a 100644
--- a/src/testcases/org/apache/poi/util/TestLittleEndianCP950Reader.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/util/TestLittleEndianCP950Reader.java
@@ -15,8 +15,7 @@
limitations under the License.
==================================================================== */
-package org.apache.poi.util;
-
+package org.apache.poi.hwpf.util;
import static org.junit.Assert.assertEquals;