From f7656dc6bf39d1d319389a21d57a47d453a13b4e Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 1 Apr 2011 15:20:07 +0000 Subject: [PATCH] Add new method to HSMF of MAPIMessage.has7BitEncodingStrings() to make it easier to decide when encoding guessing is needed git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1087746 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../src/org/apache/poi/hsmf/MAPIMessage.java | 32 +++++++++++++++++++ .../org/apache/poi/hsmf/TestBasics.java | 12 +++++++ 3 files changed, 45 insertions(+) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index a4a3250d17..9dca32e47c 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Add new method to HSMF of MAPIMessage.has7BitEncodingStrings() to make it easier to decide when encoding guessing is needed OutlookTextExtractor now requests 7 bit encoding guessing Improve HSMF encoding guessing for 7 bit fields in MAPIMessage Allow HSMF access to the HTML body contents in MAPIMessage diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index e9cc82d14d..207f89e0e2 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -36,6 +36,7 @@ import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; import org.apache.poi.hsmf.datatypes.NameIdChunks; import org.apache.poi.hsmf.datatypes.RecipientChunks; +import org.apache.poi.hsmf.datatypes.Types; import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter; import org.apache.poi.hsmf.datatypes.StringChunk; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; @@ -396,6 +397,37 @@ public class MAPIMessage extends POIDocument { } } + /** + * Does this file contain any strings that + * are stored as 7 bit rather than unicode? + */ + public boolean has7BitEncodingStrings() { + for(Chunk c : mainChunks.getAll()) { + if(c instanceof StringChunk) { + if( ((StringChunk)c).getType() == Types.ASCII_STRING ) { + return true; + } + } + } + for(Chunk c : nameIdChunks.getAll()) { + if(c instanceof StringChunk) { + if( ((StringChunk)c).getType() == Types.ASCII_STRING ) { + return true; + } + } + } + for(RecipientChunks rc : recipientChunks) { + for(Chunk c : rc.getAll()) { + if(c instanceof StringChunk) { + if( ((StringChunk)c).getType() == Types.ASCII_STRING ) { + return true; + } + } + } + } + return false; + } + /** * Returns all the headers, one entry per line */ diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java index e2f6fe39d4..e5d9a6e1d6 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java @@ -34,6 +34,7 @@ public final class TestBasics extends TestCase { private MAPIMessage outlook30; private MAPIMessage attachments; private MAPIMessage noRecipientAddress; + private MAPIMessage unicode; private MAPIMessage cyrillic; private MAPIMessage chinese; @@ -48,6 +49,7 @@ public final class TestBasics extends TestCase { outlook30 = new MAPIMessage(samples.openResourceAsStream("outlook_30_msg.msg")); attachments = new MAPIMessage(samples.openResourceAsStream("attachment_test_msg.msg")); noRecipientAddress = new MAPIMessage(samples.openResourceAsStream("no_recipient_address.msg")); + unicode = new MAPIMessage(samples.openResourceAsStream("example_received_unicode.msg")); cyrillic = new MAPIMessage(samples.openResourceAsStream("cyrillic_message.msg")); chinese = new MAPIMessage(samples.openResourceAsStream("chinese-traditional.msg")); } @@ -182,6 +184,16 @@ public final class TestBasics extends TestCase { noRecipientAddress.setReturnNullOnMissingChunk(false); } + /** + * Test the 7 bit detection + */ + public void test7BitDetection() throws Exception { + assertEquals(false, unicode.has7BitEncodingStrings()); + assertEquals(true, simple.has7BitEncodingStrings()); + assertEquals(true, chinese.has7BitEncodingStrings()); + assertEquals(true, cyrillic.has7BitEncodingStrings()); + } + /** * We default to CP1252, but can sometimes do better * if needed. -- 2.39.5