aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Allison <tallison@apache.org>2019-04-08 19:51:16 +0000
committerTim Allison <tallison@apache.org>2019-04-08 19:51:16 +0000
commitad11a5d3631c0b54ab793407517a67552b9f8e2e (patch)
tree29b5ae0a4b5574260e99ba81d71b5827c8f39ae1
parent836b9abdc6ccb61944ba0bfa6b29f48f4ab3b877 (diff)
downloadpoi-ad11a5d3631c0b54ab793407517a67552b9f8e2e.tar.gz
poi-ad11a5d3631c0b54ab793407517a67552b9f8e2e.zip
Bug 63323 -- improve handling of multibyte characters
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1857135 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java7
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java28
3 files changed, 34 insertions, 7 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java b/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
index 05f5d200be..766f9020df 100644
--- a/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
+++ b/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
@@ -400,7 +400,11 @@ public class HwmfGraphics {
}
}
- String textString = new String(text, charset).substring(0,length).trim();
+ String textString = "";
+ if (text != null) {
+ textString = new String(text, charset).trim();
+ textString = textString.substring(0, Math.min(textString.length(), length));
+ }
if (textString.isEmpty()) {
return;
diff --git a/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java b/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
index 061f5cfc66..6af61b81ac 100644
--- a/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
+++ b/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
@@ -395,7 +395,12 @@ public class HwmfText {
}
public String getText(Charset charset) throws IOException {
- return new String(rawTextBytes, charset).substring(0, stringLength);
+ if (rawTextBytes == null) {
+ return "";
+ }
+ String ret = new String(rawTextBytes, charset);
+ return ret.substring(0,
+ Math.min(ret.length(), stringLength));
}
public Point2D getReference() {
diff --git a/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java b/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java
index 4a988250fe..7632284296 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java
@@ -35,6 +35,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Locale;
import java.util.zip.ZipEntry;
@@ -238,12 +239,12 @@ public class TestHwmfParsing {
}
@Test
- @Ignore("If we decide we can use the common crawl file attached to Bug 60677, " +
- "we can turn this back on")
public void testShift_JIS() throws Exception {
- //TODO: move test file to framework and fix this
- File f = new File("C:/data/file8.wmf");
- HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
+ //this file derives from common crawl: see Bug 60677
+ HwmfPicture wmf = null;
+ try (InputStream fis = samples.openResourceAsStream("60677.wmf")) {
+ wmf = new HwmfPicture(fis);
+ }
Charset charset = LocaleUtil.CHARSET_1252;
StringBuilder sb = new StringBuilder();
@@ -263,4 +264,21 @@ public class TestHwmfParsing {
String txt = sb.toString();
assertContains(txt, "\u822A\u7A7A\u60C5\u5831\u696D\u52D9\u3078\u306E\uFF27\uFF29\uFF33");
}
+
+ @Test
+ public void testLengths() throws Exception {
+ //both substring and length rely on char, not codepoints.
+ //This test confirms that the substring calls in HwmfText
+ //will not truncate even beyond-bmp data.
+ //The last character (Deseret AY U+1040C) is comprised of 2 utf16 surrogates/codepoints
+ String s = "\u666E\u6797\u65AF\uD801\uDC0C";
+ Charset utf16LE = StandardCharsets.UTF_16LE;
+ byte[] bytes = s.getBytes(utf16LE);
+ String rebuilt = new String(bytes, utf16LE);
+ rebuilt = rebuilt.substring(0, Math.min(bytes.length, rebuilt.length()));
+ assertEquals(s, rebuilt);
+ assertEquals(5, rebuilt.length());
+ long cnt = rebuilt.codePoints().count();
+ assertEquals(4, cnt);
+ }
}