Browse Source

[github-149] improve MAPIMessage.guess7BitEncoding, improve MAPIMessage.getHtmlBody. Thanks to Dominik Hölzl. This closes #149

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1860043 13f79535-47bb-0310-9956-ffa450edef68
pull/146/head
PJ Fanning 4 years ago
parent
commit
721180d35f

+ 468
- 0
src/java/org/apache/poi/util/LocaleUtil.java View File

@@ -616,5 +616,473 @@ public final class LocaleUtil {
}
}
/**
* Get default code page from LCID value
*
* @param lcid the LCID value
* @return the default code page
*/
public static int getDefaultCodePageFromLCID(int lcid) {
int languageId = lcid & 0xFFFF;
switch (languageId) {
case 0x0001: return 1256;
case 0x0002: return 1251;
case 0x0003: return 1252;
case 0x0004: return 936;
case 0x0005: return 1250;
case 0x0006: return 1252;
case 0x0007: return 1252;
case 0x0008: return 1253;
case 0x0009: return 1252;
case 0x000a: return 1252;
case 0x000b: return 1252;
case 0x000c: return 1252;
case 0x000d: return 1255;
case 0x000e: return 1250;
case 0x000f: return 1252;
case 0x0010: return 1252;
case 0x0011: return 932;
case 0x0012: return 949;
case 0x0013: return 1252;
case 0x0014: return 1252;
case 0x0015: return 1250;
case 0x0016: return 1252;
case 0x0017: return 1252;
case 0x0018: return 1250;
case 0x0019: return 1251;
case 0x001a: return 1250;
case 0x001b: return 1250;
case 0x001c: return 1250;
case 0x001d: return 1252;
case 0x001e: return 874;
case 0x001f: return 1254;
case 0x0020: return 1256;
case 0x0021: return 1252;
case 0x0022: return 1251;
case 0x0023: return 1251;
case 0x0024: return 1250;
case 0x0025: return 1257;
case 0x0026: return 1257;
case 0x0027: return 1257;
case 0x0028: return 1251;
case 0x0029: return 1256;
case 0x002a: return 1258;
case 0x002b: return 0;
case 0x002c: return 1254;
case 0x002d: return 1252;
case 0x002e: return 1252;
case 0x002f: return 1251;
case 0x0030: return 0;
case 0x0031: return 0;
case 0x0032: return 1252;
case 0x0033: return 32759;
case 0x0034: return 1252;
case 0x0035: return 1252;
case 0x0036: return 1252;
case 0x0037: return 0;
case 0x0038: return 1252;
case 0x0039: return 0;
case 0x003a: return 0;
case 0x003b: return 1252;
case 0x003c: return 1252;
case 0x003d: return 32759;
case 0x003e: return 1252;
case 0x003f: return 0;
case 0x0040: return 1251;
case 0x0041: return 1252;
case 0x0042: return 1250;
case 0x0043: return 1254;
case 0x0044: return 1251;
case 0x0045: return 0;
case 0x0046: return 0;
case 0x0047: return 0;
case 0x0048: return 0;
case 0x0049: return 0;
case 0x004a: return 0;
case 0x004b: return 0;
case 0x004c: return 0;
case 0x004d: return 0;
case 0x004e: return 0;
case 0x004f: return 0;
case 0x0050: return 1251;
case 0x0051: return 0;
case 0x0052: return 1252;
case 0x0053: return 0;
case 0x0054: return 0;
case 0x0055: return 0;
case 0x0056: return 1252;
case 0x0057: return 0;
case 0x0058: return 32759;
case 0x0059: return 1256;
case 0x005a: return 0;
case 0x005b: return 0;
case 0x005c: return 0;
case 0x005d: return 1252;
case 0x005e: return 0;
case 0x005f: return 1252;
case 0x0060: return 32759;
case 0x0061: return 0;
case 0x0062: return 1252;
case 0x0063: return 0;
case 0x0064: return 1252;
case 0x0065: return 0;
case 0x0066: return 32759;
case 0x0067: return 1252;
case 0x0068: return 1252;
case 0x0069: return 32759;
case 0x006a: return 1252;
case 0x006b: return 1252;
case 0x006c: return 1252;
case 0x006d: return 1251;
case 0x006e: return 1252;
case 0x006f: return 1252;
case 0x0070: return 1252;
case 0x0071: return 32759;
case 0x0072: return 0;
case 0x0073: return 0;
case 0x0074: return 1252;
case 0x0075: return 1252;
case 0x0076: return 32759;
case 0x0077: return 0;
case 0x0078: return 0;
case 0x0079: return 32759;
case 0x007a: return 1252;
case 0x007b: return 32759;
case 0x007c: return 1252;
case 0x007d: return 32759;
case 0x007e: return 1252;
case 0x007f: return 1252;
case 0x0080: return 1256;
case 0x0081: return 0;
case 0x0082: return 1252;
case 0x0083: return 1252;
case 0x0084: return 1252;
case 0x0085: return 1251;
case 0x0086: return 1252;
case 0x0087: return 1252;
case 0x0088: return 1252;
case 0x0089: return 32759;
case 0x008a: return 32759;
case 0x008b: return 32759;
case 0x008c: return 1256;
case 0x008d: return 32759;
case 0x008e: return 32759;
case 0x008f: return 32759;
case 0x0090: return 32759;
case 0x0091: return 1252;
case 0x0092: return 1256;
case 0x0093: return 32759;
case 0x0401: return 1256;
case 0x0402: return 1251;
case 0x0403: return 1252;
case 0x0404: return 950;
case 0x0405: return 1250;
case 0x0406: return 1252;
case 0x0407: return 1252;
case 0x0408: return 1253;
case 0x0409: return 1252;
case 0x040a: return 1252;
case 0x040b: return 1252;
case 0x040c: return 1252;
case 0x040d: return 1255;
case 0x040e: return 1250;
case 0x040f: return 1252;
case 0x0410: return 1252;
case 0x0411: return 932;
case 0x0412: return 949;
case 0x0413: return 1252;
case 0x0414: return 1252;
case 0x0415: return 1250;
case 0x0416: return 1252;
case 0x0417: return 1252;
case 0x0418: return 1250;
case 0x0419: return 1251;
case 0x041a: return 1250;
case 0x041b: return 1250;
case 0x041c: return 1250;
case 0x041d: return 1252;
case 0x041e: return 874;
case 0x041f: return 1254;
case 0x0420: return 1256;
case 0x0421: return 1252;
case 0x0422: return 1251;
case 0x0423: return 1251;
case 0x0424: return 1250;
case 0x0425: return 1257;
case 0x0426: return 1257;
case 0x0427: return 1257;
case 0x0428: return 1251;
case 0x0429: return 1256;
case 0x042a: return 1258;
case 0x042b: return 0;
case 0x042c: return 1254;
case 0x042d: return 1252;
case 0x042e: return 1252;
case 0x042f: return 1251;
case 0x0430: return 0;
case 0x0431: return 0;
case 0x0432: return 1252;
case 0x0433: return 32759;
case 0x0434: return 1252;
case 0x0435: return 1252;
case 0x0436: return 1252;
case 0x0437: return 0;
case 0x0438: return 1252;
case 0x0439: return 0;
case 0x043a: return 0;
case 0x043b: return 1252;
case 0x043d: return 32759;
case 0x043e: return 1252;
case 0x043f: return 0;
case 0x0440: return 1251;
case 0x0441: return 1252;
case 0x0442: return 1250;
case 0x0443: return 1254;
case 0x0444: return 1251;
case 0x0445: return 0;
case 0x0446: return 0;
case 0x0447: return 0;
case 0x0448: return 0;
case 0x0449: return 0;
case 0x044a: return 0;
case 0x044b: return 0;
case 0x044c: return 0;
case 0x044d: return 0;
case 0x044e: return 0;
case 0x044f: return 0;
case 0x0450: return 1251;
case 0x0451: return 0;
case 0x0452: return 1252;
case 0x0453: return 0;
case 0x0454: return 0;
case 0x0455: return 0;
case 0x0456: return 1252;
case 0x0457: return 0;
case 0x0458: return 32759;
case 0x0459: return 32759;
case 0x045a: return 0;
case 0x045b: return 0;
case 0x045c: return 0;
case 0x045d: return 0;
case 0x045e: return 0;
case 0x045f: return 32759;
case 0x0460: return 32759;
case 0x0461: return 0;
case 0x0462: return 1252;
case 0x0463: return 0;
case 0x0464: return 1252;
case 0x0465: return 0;
case 0x0466: return 32759;
case 0x0467: return 32759;
case 0x0468: return 1252;
case 0x0469: return 32759;
case 0x046a: return 1252;
case 0x046b: return 1252;
case 0x046c: return 1252;
case 0x046d: return 1251;
case 0x046e: return 1252;
case 0x046f: return 1252;
case 0x0470: return 1252;
case 0x0471: return 32759;
case 0x0472: return 0;
case 0x0473: return 0;
case 0x0474: return 1252;
case 0x0475: return 1252;
case 0x0476: return 32759;
case 0x0477: return 0;
case 0x0478: return 0;
case 0x0479: return 32759;
case 0x047a: return 1252;
case 0x047c: return 1252;
case 0x047e: return 1252;
case 0x0480: return 1256;
case 0x0481: return 0;
case 0x0482: return 1252;
case 0x0483: return 1252;
case 0x0484: return 1252;
case 0x0485: return 1251;
case 0x0486: return 1252;
case 0x0487: return 1252;
case 0x0488: return 1252;
case 0x048c: return 1256;
case 0x048d: return 32759;
case 0x048e: return 32759;
case 0x048f: return 32759;
case 0x0490: return 32759;
case 0x0491: return 1252;
case 0x0492: return 1256;
case 0x0493: return 32759;
case 0x0501: return 1250;
case 0x05fe: return 932;
case 0x0801: return 1256;
case 0x0803: return 1252;
case 0x0804: return 936;
case 0x0807: return 1252;
case 0x0809: return 1252;
case 0x080a: return 1252;
case 0x080c: return 1252;
case 0x0810: return 1252;
case 0x0811: return 32759;
case 0x0813: return 1252;
case 0x0814: return 1252;
case 0x0816: return 1252;
case 0x0818: return 0;
case 0x0819: return 32759;
case 0x081a: return 1250;
case 0x081d: return 1252;
case 0x0820: return 0;
case 0x0827: return 32759;
case 0x082c: return 1251;
case 0x082e: return 1252;
case 0x0832: return 1252;
case 0x083b: return 1252;
case 0x083c: return 1252;
case 0x083e: return 1252;
case 0x0843: return 1251;
case 0x0845: return 0;
case 0x0846: return 1256;
case 0x0849: return 0;
case 0x0850: return 0;
case 0x0851: return 32759;
case 0x0859: return 1256;
case 0x085d: return 1252;
case 0x085f: return 1252;
case 0x0860: return 32759;
case 0x0861: return 0;
case 0x0867: return 1252;
case 0x086b: return 1252;
case 0x0873: return 0;
case 0x09ff: return 1256;
case 0x0c01: return 1256;
case 0x0c04: return 950;
case 0x0c07: return 1252;
case 0x0c09: return 1252;
case 0x0c0a: return 1252;
case 0x0c0c: return 1252;
case 0x0c1a: return 1251;
case 0x0c3b: return 1252;
case 0x0c5f: return 32759;
case 0x0c6b: return 1252;
case 0x1001: return 1256;
case 0x1004: return 936;
case 0x1007: return 1252;
case 0x1009: return 1252;
case 0x100a: return 1252;
case 0x100c: return 1252;
case 0x101a: return 1250;
case 0x103b: return 1252;
case 0x1401: return 1256;
case 0x1404: return 950;
case 0x1407: return 1252;
case 0x1409: return 1252;
case 0x140a: return 1252;
case 0x140c: return 1252;
case 0x141a: return 1250;
case 0x143b: return 1252;
case 0x1801: return 1256;
case 0x1809: return 1252;
case 0x180a: return 1252;
case 0x180c: return 1252;
case 0x181a: return 1250;
case 0x183b: return 1252;
case 0x1c01: return 1256;
case 0x1c09: return 1252;
case 0x1c0a: return 1252;
case 0x1c0c: return 32759;
case 0x1c1a: return 1251;
case 0x1c3b: return 1252;
case 0x2001: return 1256;
case 0x2008: return 32759;
case 0x2009: return 1252;
case 0x200a: return 1252;
case 0x200c: return 0;
case 0x201a: return 1251;
case 0x203b: return 1252;
case 0x2401: return 1256;
case 0x2409: return 1252;
case 0x240a: return 1252;
case 0x240c: return 0;
case 0x241a: return 1250;
case 0x243b: return 1252;
case 0x2801: return 1256;
case 0x2809: return 1252;
case 0x280a: return 1252;
case 0x280c: return 0;
case 0x281a: return 1251;
case 0x2c01: return 1256;
case 0x2c09: return 1252;
case 0x2c0a: return 1252;
case 0x2c0c: return 0;
case 0x2c1a: return 1250;
case 0x3001: return 1256;
case 0x3009: return 1252;
case 0x300a: return 1252;
case 0x300c: return 0;
case 0x301a: return 1251;
case 0x3401: return 1256;
case 0x3409: return 1252;
case 0x340a: return 1252;
case 0x340c: return 0;
case 0x3801: return 1256;
case 0x3809: return 32759;
case 0x380a: return 1252;
case 0x380c: return 0;
case 0x3c01: return 1256;
case 0x3c09: return 0;
case 0x3c0a: return 1252;
case 0x3c0c: return 0;
case 0x4001: return 1256;
case 0x4009: return 1252;
case 0x400a: return 1252;
case 0x4401: return 32759;
case 0x4409: return 1252;
case 0x440a: return 1252;
case 0x4801: return 32759;
case 0x4809: return 1252;
case 0x480a: return 1252;
case 0x4c09: return 32759;
case 0x4c0a: return 1252;
case 0x5009: return 32759;
case 0x500a: return 1252;
case 0x5409: return 32759;
case 0x540a: return 1252;
case 0x5809: return 32759;
case 0x5c09: return 32759;
case 0x6009: return 32759;
case 0x6409: return 32759;
case 0x641a: return 1251;
case 0x681a: return 1250;
case 0x6c1a: return 1251;
case 0x701a: return 1250;
case 0x703b: return 1252;
case 0x742c: return 1251;
case 0x743b: return 1252;
case 0x7804: return 936;
case 0x7814: return 1252;
case 0x781a: return 1250;
case 0x782c: return 1254;
case 0x783b: return 1252;
case 0x7843: return 1251;
case 0x7850: return 1251;
case 0x785d: return 0;
case 0x7c04: return 950;
case 0x7c14: return 1252;
case 0x7c1a: return 1250;
case 0x7c28: return 1251;
case 0x7c2e: return 1252;
case 0x7c3b: return 1252;
case 0x7c43: return 1254;
case 0x7c46: return 1256;
case 0x7c50: return 0;
case 0x7c59: return 1256;
case 0x7c5c: return 0;
case 0x7c5d: return 1252;
case 0x7c5f: return 1252;
case 0x7c67: return 1252;
case 0x7c68: return 1252;
case 0x7c92: return 1256;
default: return 0;
}
}
}


+ 122
- 74
src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java View File

@@ -50,6 +50,7 @@ import org.apache.poi.hsmf.parsers.POIFSChunkParser;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;

@@ -210,8 +211,21 @@ public class MAPIMessage extends POIReadOnlyDocument {
* returnNullOnMissingChunk is set
*/
public String getHtmlBody() throws ChunkNotFoundException {
if(mainChunks.getHtmlBodyChunkBinary() != null) {
return mainChunks.getHtmlBodyChunkBinary().getAs7bitString();
ByteChunk htmlBodyBinaryChunk = mainChunks.getHtmlBodyChunkBinary();
if (htmlBodyBinaryChunk != null) {
List<PropertyValue> cpid = mainChunks.getProperties().get(MAPIProperty.INTERNET_CPID);
if (cpid != null && cpid.size() > 0) {
int codepage = ((LongPropertyValue) cpid.get(0)).getValue();
try {
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
byte[] htmlBodyBinary = htmlBodyBinaryChunk.getValue();
return new String(htmlBodyBinary, encoding);
} catch (UnsupportedEncodingException e) {
logger.log(POILogger.WARN, "HTML body binary: Invalid codepage ID ", codepage, " set for the message via ",
MAPIProperty.INTERNET_CPID, ", ignoring");
}
}
return htmlBodyBinaryChunk.getAs7bitString();
}
return getStringFromChunk(mainChunks.getHtmlBodyChunkString());
}
@@ -391,67 +405,86 @@ public class MAPIMessage extends POIReadOnlyDocument {
* <p>Bug #49441 has more on why this is needed</p>
*/
public void guess7BitEncoding() {
// First choice is a codepage property
for (MAPIProperty prop : new MAPIProperty[] {
MAPIProperty.MESSAGE_CODEPAGE,
MAPIProperty.INTERNET_CPID
}) {
List<PropertyValue> val = mainChunks.getProperties().get(prop);
if (val != null && val.size() > 0) {
int codepage = ((LongPropertyValue)val.get(0)).getValue();
try {
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
set7BitEncoding(encoding);
return;
} catch(UnsupportedEncodingException e) {
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage,
" set for the message via ", prop, ", ignoring");
}
String generalcodepage = null;
String htmlbodycodepage = null;
String bodycodepage = null;
//
// General codepage: Message codepage property.
//
List<PropertyValue> val = mainChunks.getProperties().get(MAPIProperty.MESSAGE_CODEPAGE);
if (val != null && val.size() > 0) {
int codepage = ((LongPropertyValue) val.get(0)).getValue();
try {
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
generalcodepage = encoding;
} catch (UnsupportedEncodingException e) {
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage, " set for the message via ",
MAPIProperty.MESSAGE_CODEPAGE, ", ignoring");
}
}
//
// General codepage fallback: Message locale ID property.
//
if (generalcodepage == null) {
val = mainChunks.getProperties().get(MAPIProperty.MESSAGE_LOCALE_ID);
if (val != null && val.size() > 0) {
int lcid = ((LongPropertyValue) val.get(0)).getValue();
int codepage = LocaleUtil.getDefaultCodePageFromLCID(lcid);
try {
if (codepage != 0) {
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
generalcodepage = encoding;
}
} catch (UnsupportedEncodingException e) {
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage, "from locale ID", lcid, " set for the message via ",
MAPIProperty.MESSAGE_LOCALE_ID, ", ignoring");
}
}
// Second choice is a charset on a content type header
try {
}
}
//
// General codepage fallback: Charset on a content type header.
//
if (generalcodepage == null) {
try {
String[] headers = getHeaders();
if(headers != null && headers.length > 0) {
// Look for a content type with a charset
Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);

for(String header : headers) {
if(header.startsWith("Content-Type")) {
Matcher m = p.matcher(header);
if(m.matches()) {
// Found it! Tell all the string chunks
String charset = m.group(1);

if (!charset.equalsIgnoreCase("utf-8")) {
set7BitEncoding(charset);
}
return;
}
if (headers != null && headers.length > 0) {
Pattern p = Pattern.compile("content-type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
for (String header : headers) {
if (header.toLowerCase().startsWith("content-type")) {
Matcher m = p.matcher(header);
if (m.matches()) {
String encoding = m.group(1);
generalcodepage = encoding;
}
}
}
}
}
} catch(ChunkNotFoundException e) {}
// Nothing suitable in the headers, try HTML
try {
String html = getHtmlBody();
if(html != null && html.length() > 0) {
// Look for a content type in the meta headers
Pattern p = Pattern.compile(
"<META\\s+HTTP-EQUIV=\"Content-Type\"\\s+CONTENT=\"text/html;\\s+charset=(.*?)\""
);
Matcher m = p.matcher(html);
if(m.find()) {
// Found it! Tell all the string chunks
String charset = m.group(1);
set7BitEncoding(charset);
}
} catch (ChunkNotFoundException e) {
}
}
//
// HTML and text body encoding: Internet CPID property.
// UTF-8 is ignored for text body. This seems to be a special Outlook behavior.
//
val = mainChunks.getProperties().get(MAPIProperty.INTERNET_CPID);
if (val != null && val.size() > 0) {
int codepage = ((LongPropertyValue) val.get(0)).getValue();
try {
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
htmlbodycodepage = encoding;
if (!encoding.equalsIgnoreCase("utf-8")) {
bodycodepage = encoding;
}
} catch(ChunkNotFoundException e) {}
}
} catch (UnsupportedEncodingException e) {
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage, " set for the message via ",
MAPIProperty.INTERNET_CPID, ", ignoring");
}
}
//
// Apply encoding
//
set7BitEncoding(generalcodepage, htmlbodycodepage, bodycodepage);
}

/**
* Many messages store their strings as unicode, which is
@@ -464,26 +497,41 @@ public class MAPIMessage extends POIReadOnlyDocument {
* @see #guess7BitEncoding()
*/
public void set7BitEncoding(String charset) {
set7BitEncoding(charset, charset, charset);
}
public void set7BitEncoding(String generalcharset, String htmlbodycharset, String bodycharset) {
for(Chunk c : mainChunks.getChunks()) {
if(c instanceof StringChunk) {
((StringChunk)c).set7BitEncoding(charset);
if (c.getChunkId() == MAPIProperty.BODY_HTML.id) {
if (htmlbodycharset != null) {
((StringChunk)c).set7BitEncoding(htmlbodycharset);
}
}
else if (c.getChunkId() == MAPIProperty.BODY.id) {
if (bodycharset != null) {
((StringChunk)c).set7BitEncoding(bodycharset);
}
}
else if (generalcharset != null) {
((StringChunk)c).set7BitEncoding(generalcharset);
}
}
}

if (nameIdChunks!=null) {
for(Chunk c : nameIdChunks.getChunks()) {
if(c instanceof StringChunk) {
((StringChunk)c).set7BitEncoding(charset);
}
}
}

for(RecipientChunks rc : recipientChunks) {
for(Chunk c : rc.getAll()) {
if(c instanceof StringChunk) {
((StringChunk)c).set7BitEncoding(charset);
}
}
if (generalcharset != null) {
if (nameIdChunks!=null) {
for(Chunk c : nameIdChunks.getChunks()) {
if(c instanceof StringChunk) {
((StringChunk)c).set7BitEncoding(generalcharset);
}
}
}
for(RecipientChunks rc : recipientChunks) {
for(Chunk c : rc.getAll()) {
if(c instanceof StringChunk) {
((StringChunk)c).set7BitEncoding(generalcharset);
}
}
}
}
}

+ 2
- 0
src/scratchpad/src/org/apache/poi/hsmf/datatypes/MAPIProperty.java View File

@@ -512,6 +512,8 @@ public class MAPIProperty {
new MAPIProperty(0x1a, ASCII_STRING, "MessageClass", "PR_MESSAGE_CLASS");
public static final MAPIProperty MESSAGE_CODEPAGE =
new MAPIProperty(0x3ffd, Types.LONG, "MessageCodepage", "PR_MESSAGE_CODEPAGE");
public static final MAPIProperty MESSAGE_LOCALE_ID =
new MAPIProperty(0x3ff1, Types.LONG, "MessageLocaleId", "PR_MESSAGE_LOCALE_ID");
public static final MAPIProperty MESSAGE_DELIVERY_ID =
new MAPIProperty(0x1b, BINARY, "MessageDeliveryId", "PR_MESSAGE_DELIVERY_ID");
public static final MAPIProperty MESSAGE_DELIVERY_TIME =

+ 2
- 1
src/scratchpad/testcases/org/apache/poi/hsmf/AllHSMFTests.java View File

@@ -39,7 +39,8 @@ import org.junit.runners.Suite;
TestPOIFSChunkParser.class,
TestMessageSubmissionChunkY2KRead.class,
TestMessageSubmissionChunk.class,
TestExtractEmbeddedMSG.class
TestExtractEmbeddedMSG.class,
Test7BitCodepage.class
})
public class AllHSMFTests {
}

+ 85
- 0
src/scratchpad/testcases/org/apache/poi/hsmf/Test7BitCodepage.java View File

@@ -0,0 +1,85 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */

package org.apache.poi.hsmf;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.poi.POIDataSamples;

/**
* Tests to verify if code page for general properties like subject,
* text body and html body is evaluated correctly.
*/
public final class Test7BitCodepage extends TestCase {
private final MAPIMessage ascii_cp1251_lcid1049;
private final MAPIMessage ascii_utf_8_cp1252_lcid1031;
private final MAPIMessage ascii_utf_8_cp1252_lcid1031_html;
private final MAPIMessage htmlbodybinary_cp1251;
private final MAPIMessage htmlbodybinary_utf_8;

/**
* Initialize this test, load up the messages.
* @throws Exception
*/
public Test7BitCodepage() throws IOException {
POIDataSamples samples = POIDataSamples.getHSMFInstance();
ascii_cp1251_lcid1049 = new MAPIMessage(samples.openResourceAsStream("ASCII_CP1251_LCID1049.msg"));
ascii_utf_8_cp1252_lcid1031 = new MAPIMessage(samples.openResourceAsStream("ASCII_UTF-8_CP1252_LCID1031.msg"));
ascii_utf_8_cp1252_lcid1031_html = new MAPIMessage(samples.openResourceAsStream("ASCII_UTF-8_CP1252_LCID1031_HTML.msg"));
htmlbodybinary_cp1251 = new MAPIMessage(samples.openResourceAsStream("HTMLBodyBinary_CP1251.msg"));
htmlbodybinary_utf_8 = new MAPIMessage(samples.openResourceAsStream("HTMLBodyBinary_UTF-8.msg"));
}

/**
* Evaluate encoding and check if the subject, text body and html body is decoded correctly.
*/
public void test7BitEncoding() throws Exception {
ascii_cp1251_lcid1049.guess7BitEncoding();
ascii_cp1251_lcid1049.setReturnNullOnMissingChunk(true);
ascii_utf_8_cp1252_lcid1031.guess7BitEncoding();
ascii_utf_8_cp1252_lcid1031.setReturnNullOnMissingChunk(true);
ascii_utf_8_cp1252_lcid1031_html.guess7BitEncoding();
ascii_utf_8_cp1252_lcid1031_html.setReturnNullOnMissingChunk(true);
htmlbodybinary_cp1251.guess7BitEncoding();
htmlbodybinary_cp1251.setReturnNullOnMissingChunk(true);
htmlbodybinary_utf_8.guess7BitEncoding();
htmlbodybinary_utf_8.setReturnNullOnMissingChunk(true);
assertEquals("Subject автоматически Subject", ascii_cp1251_lcid1049.getSubject());
assertEquals("Body автоматически Body", ascii_cp1251_lcid1049.getTextBody());
assertEquals("<!DOCTYPE html><html><meta charset=\\\"windows-1251\\\"><body>HTML автоматически</body></html>", ascii_cp1251_lcid1049.getHtmlBody());
assertEquals("Subject öäü Subject", ascii_utf_8_cp1252_lcid1031.getSubject());
assertEquals("Body öäü Body", ascii_utf_8_cp1252_lcid1031.getTextBody());
assertNull(ascii_utf_8_cp1252_lcid1031.getHtmlBody());
assertEquals("Subject öäü Subject", ascii_utf_8_cp1252_lcid1031_html.getSubject());
assertEquals("Body öäü Body", ascii_utf_8_cp1252_lcid1031_html.getTextBody());
assertEquals("<!DOCTYPE html><html><meta charset=\\\"utf-8\\\"><body>HTML öäü</body></html>", ascii_utf_8_cp1252_lcid1031_html.getHtmlBody());
assertEquals("Subject öäü Subject", htmlbodybinary_cp1251.getSubject());
assertNull(htmlbodybinary_cp1251.getTextBody());
assertEquals("<!DOCTYPE html><html><meta charset=\\\"utf-8\\\"><body>HTML автоматически</body></html>", htmlbodybinary_cp1251.getHtmlBody());
assertEquals("Subject öäü Subject", htmlbodybinary_utf_8.getSubject());
assertNull(htmlbodybinary_utf_8.getTextBody());
assertEquals("<!DOCTYPE html><html><meta charset=\\\"utf-8\\\"><body>HTML öäü</body></html>", htmlbodybinary_utf_8.getHtmlBody());
}
}

BIN
test-data/hsmf/ASCII_CP1251_LCID1049.msg View File


BIN
test-data/hsmf/ASCII_UTF-8_CP1252_LCID1031.msg View File


BIN
test-data/hsmf/ASCII_UTF-8_CP1252_LCID1031_HTML.msg View File


BIN
test-data/hsmf/HTMLBodyBinary_CP1251.msg View File


BIN
test-data/hsmf/HTMLBodyBinary_UTF-8.msg View File


Loading…
Cancel
Save