<changes>
<release version="3.8-beta3" date="2011-??-??">
+ <action dev="poi-developers" type="add">Add new method to HSMF of MAPIMessage.has7BitEncodingStrings() to make it easier to decide when encoding guessing is needed</action>
<action dev="poi-developers" type="fix">OutlookTextExtractor now requests 7 bit encoding guessing</action>
<action dev="poi-developers" type="add">Improve HSMF encoding guessing for 7 bit fields in MAPIMessage</action>
<action dev="poi-developers" type="add">Allow HSMF access to the HTML body contents in MAPIMessage</action>
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
+import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
}
}
+ /**
+ * Does this file contain any strings that
+ * are stored as 7 bit rather than unicode?
+ */
+ public boolean has7BitEncodingStrings() {
+ for(Chunk c : mainChunks.getAll()) {
+ if(c instanceof StringChunk) {
+ if( ((StringChunk)c).getType() == Types.ASCII_STRING ) {
+ return true;
+ }
+ }
+ }
+ for(Chunk c : nameIdChunks.getAll()) {
+ if(c instanceof StringChunk) {
+ if( ((StringChunk)c).getType() == Types.ASCII_STRING ) {
+ return true;
+ }
+ }
+ }
+ for(RecipientChunks rc : recipientChunks) {
+ for(Chunk c : rc.getAll()) {
+ if(c instanceof StringChunk) {
+ if( ((StringChunk)c).getType() == Types.ASCII_STRING ) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
/**
* Returns all the headers, one entry per line
*/
private MAPIMessage outlook30;
private MAPIMessage attachments;
private MAPIMessage noRecipientAddress;
+ private MAPIMessage unicode;
private MAPIMessage cyrillic;
private MAPIMessage chinese;
outlook30 = new MAPIMessage(samples.openResourceAsStream("outlook_30_msg.msg"));
attachments = new MAPIMessage(samples.openResourceAsStream("attachment_test_msg.msg"));
noRecipientAddress = new MAPIMessage(samples.openResourceAsStream("no_recipient_address.msg"));
+ unicode = new MAPIMessage(samples.openResourceAsStream("example_received_unicode.msg"));
cyrillic = new MAPIMessage(samples.openResourceAsStream("cyrillic_message.msg"));
chinese = new MAPIMessage(samples.openResourceAsStream("chinese-traditional.msg"));
}
noRecipientAddress.setReturnNullOnMissingChunk(false);
}
+ /**
+ * Test the 7 bit detection
+ */
+ public void test7BitDetection() throws Exception {
+ assertEquals(false, unicode.has7BitEncodingStrings());
+ assertEquals(true, simple.has7BitEncodingStrings());
+ assertEquals(true, chinese.has7BitEncodingStrings());
+ assertEquals(true, cyrillic.has7BitEncodingStrings());
+ }
+
/**
* We default to CP1252, but can sometimes do better
* if needed.