From 631fc8f9bca1bad6f4135628f7d39d74f34c1f8e Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 13 Jan 2015 22:13:24 +0000 Subject: [PATCH] Fix TIKA-1515 - Handle Excel 3 files with a 0x8001 codepage git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1651517 13f79535-47bb-0310-9956-ffa450edef68 --- src/java/org/apache/poi/hssf/record/OldStringRecord.java | 2 +- src/java/org/apache/poi/util/CodePageUtil.java | 4 ++++ .../org/apache/poi/hssf/extractor/TestOldExcelExtractor.java | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/poi/hssf/record/OldStringRecord.java b/src/java/org/apache/poi/hssf/record/OldStringRecord.java index 0a5b492238..4964f84803 100644 --- a/src/java/org/apache/poi/hssf/record/OldStringRecord.java +++ b/src/java/org/apache/poi/hssf/record/OldStringRecord.java @@ -75,7 +75,7 @@ public final class OldStringRecord { protected static String getString(byte[] data, CodepageRecord codepage) { int cp = CodePageUtil.CP_ISO_8859_1; if (codepage != null) { - cp = codepage.getCodepage(); + cp = codepage.getCodepage() & 0xffff; } try { return CodePageUtil.getStringFromCodePage(data, cp); diff --git a/src/java/org/apache/poi/util/CodePageUtil.java b/src/java/org/apache/poi/util/CodePageUtil.java index 02e0ed3c0c..145929182a 100644 --- a/src/java/org/apache/poi/util/CodePageUtil.java +++ b/src/java/org/apache/poi/util/CodePageUtil.java @@ -53,6 +53,7 @@ public class CodePageUtil /**

Codepage for Windows 1252

*/ public static final int CP_WINDOWS_1252 = 1252; + public static final int CP_WINDOWS_1252_BIFF23 = 32769; /**

Codepage for Windows 1253

*/ public static final int CP_WINDOWS_1253 = 1253; @@ -77,6 +78,7 @@ public class CodePageUtil /**

Codepage for Macintosh Roman (Java: MacRoman)

*/ public static final int CP_MAC_ROMAN = 10000; + public static final int CP_MAC_ROMAN_BIFF23 = 32768; /**

Codepage for Macintosh Japan (Java: unknown - use SJIS, cp942 or * cp943)

*/ @@ -298,6 +300,7 @@ public class CodePageUtil else return "windows-1251"; case CP_WINDOWS_1252: + case CP_WINDOWS_1252_BIFF23: if (javaLangFormat) return "Cp1252"; else @@ -335,6 +338,7 @@ public class CodePageUtil case CP_JOHAB: return "johab"; case CP_MAC_ROMAN: + case CP_MAC_ROMAN_BIFF23: return "MacRoman"; case CP_MAC_JAPAN: return "SJIS"; diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java index 92235e70a7..bb2cfea66f 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java @@ -38,7 +38,7 @@ public final class TestOldExcelExtractor extends POITestCase { } } - public void DISABLEDtestSimpleExcel3() { + public void testSimpleExcel3() { OldExcelExtractor extractor = createExtractor("testEXCEL_3.xls"); // Check we can call getText without error -- 2.39.5