aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--poi-ooxml/src/test/java/org/apache/poi/xwpf/TestXWPFBugs.java22
-rw-r--r--poi/src/main/java/org/apache/poi/poifs/filesystem/Ole10Native.java8
-rw-r--r--poi/src/main/java/org/apache/poi/util/StringUtil.java4
-rw-r--r--test-data/document/tika-3388.docxbin0 -> 16665 bytes
4 files changed, 28 insertions, 6 deletions
diff --git a/poi-ooxml/src/test/java/org/apache/poi/xwpf/TestXWPFBugs.java b/poi-ooxml/src/test/java/org/apache/poi/xwpf/TestXWPFBugs.java
index 7048bc13fe..cad6cd12f8 100644
--- a/poi-ooxml/src/test/java/org/apache/poi/xwpf/TestXWPFBugs.java
+++ b/poi-ooxml/src/test/java/org/apache/poi/xwpf/TestXWPFBugs.java
@@ -32,10 +32,14 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.poifs.crypt.CipherAlgorithm;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.crypt.HashAlgorithm;
+import org.apache.poi.poifs.filesystem.Ole10Native;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
@@ -159,4 +163,22 @@ class TestXWPFBugs {
assertEquals(731, document.getParagraphs().size());
}
}
+
+ @Test
+ void tika3388() throws Exception {
+ try (XWPFDocument document = new XWPFDocument(samples.openResourceAsStream("tika-3388.docx"))) {
+ assertEquals(1, document.getParagraphs().size());
+ PackagePartName partName = PackagingURIHelper.createPartName("/word/embeddings/oleObject1.bin");
+ PackagePart part = document.getPackage().getPart(partName);
+ assertNotNull(part);
+ try (
+ InputStream partStream = part.getInputStream();
+ POIFSFileSystem poifs = new POIFSFileSystem(partStream)
+ ) {
+ Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
+ assertEquals("C:\\Users\\ross\\AppData\\Local\\Microsoft\\Windows\\INetCache\\Content.Word\\約翰的測試文件\uD83D\uDD96.msg",
+ ole.getFileName());
+ }
+ }
+ }
}
diff --git a/poi/src/main/java/org/apache/poi/poifs/filesystem/Ole10Native.java b/poi/src/main/java/org/apache/poi/poifs/filesystem/Ole10Native.java
index 9b1545e3b0..681f4bd8ff 100644
--- a/poi/src/main/java/org/apache/poi/poifs/filesystem/Ole10Native.java
+++ b/poi/src/main/java/org/apache/poi/poifs/filesystem/Ole10Native.java
@@ -48,7 +48,7 @@ public class Ole10Native {
public static final String OLE10_NATIVE = "\u0001Ole10Native";
- private static final Charset ISO1 = StandardCharsets.ISO_8859_1;
+ private static final Charset UTF8 = StandardCharsets.UTF_8;
// arbitrarily selected; may need to increase
private static final int DEFAULT_MAX_RECORD_LENGTH = 100_000_000;
private static int MAX_RECORD_LENGTH = DEFAULT_MAX_RECORD_LENGTH;
@@ -407,14 +407,14 @@ public class Ole10Native {
// total size, will be determined later ..
leos.writeShort(getFlags1());
- leos.write(getLabel().getBytes(ISO1));
+ leos.write(getLabel().getBytes(UTF8));
leos.write(0);
- leos.write(getFileName().getBytes(ISO1));
+ leos.write(getFileName().getBytes(UTF8));
leos.write(0);
leos.writeShort(getFlags2());
leos.writeShort(getUnknown1());
leos.writeInt(getCommand().length() + 1);
- leos.write(getCommand().getBytes(ISO1));
+ leos.write(getCommand().getBytes(UTF8));
leos.write(0);
leos.writeInt(getDataSize());
leos.write(getDataBuffer());
diff --git a/poi/src/main/java/org/apache/poi/util/StringUtil.java b/poi/src/main/java/org/apache/poi/util/StringUtil.java
index 190d8dc99c..69e45c7d50 100644
--- a/poi/src/main/java/org/apache/poi/util/StringUtil.java
+++ b/poi/src/main/java/org/apache/poi/util/StringUtil.java
@@ -135,13 +135,13 @@ public final class StringUtil {
final int offset,
final int len) {
int len_to_use = Math.min(len, string.length - offset);
- return new String(string, offset, len_to_use, ISO_8859_1);
+ return new String(string, offset, len_to_use, UTF8);
}
public static String readCompressedUnicode(LittleEndianInput in, int nChars) {
byte[] buf = IOUtils.safelyAllocate(nChars, MAX_RECORD_LENGTH);
in.readFully(buf);
- return new String(buf, ISO_8859_1);
+ return new String(buf, UTF8);
}
/**
diff --git a/test-data/document/tika-3388.docx b/test-data/document/tika-3388.docx
new file mode 100644
index 0000000000..b884ea1ac6
--- /dev/null
+++ b/test-data/document/tika-3388.docx
Binary files differ