From dcdb268d3bffa6fb652dbd4a15fd7afe788da608 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 1 Apr 2011 14:51:45 +0000 Subject: [PATCH] Improve HSMF encoding guessing for 7 bit fields, and allow HSMF access to the HTML body contents in MAPIMessage git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1087726 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 4 + .../src/org/apache/poi/hsmf/MAPIMessage.java | 77 ++++++++++++++---- .../org/apache/poi/hsmf/datatypes/Chunks.java | 6 ++ .../poi/hsmf/datatypes/StringChunk.java | 33 +++++--- .../org/apache/poi/hsmf/TestBasics.java | 24 ++++++ test-data/hsmf/chinese-traditional.msg | Bin 0 -> 48129 bytes 6 files changed, 115 insertions(+), 29 deletions(-) create mode 100644 test-data/hsmf/chinese-traditional.msg diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 2730545cce..c619413f20 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,10 @@ + + Improve HSMF encoding guessing for 7 bit fields in MAPIMessage + Allow HSMF access to the HTML body contents in MAPIMessage + Implement the load method on MemoryPackagePart 50967 - Support for continued ExtSSTRecords diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index f220ebfe55..e9cc82d14d 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -176,6 +176,16 @@ public class MAPIMessage extends POIDocument { return getStringFromChunk(mainChunks.textBodyChunk); } + /** + * Gets the html body of this Outlook Message, if this email + * contains a html version. + * @return The string representation of the 'html' version of the body, if available. + * @throws ChunkNotFoundException + */ + public String getHmtlBody() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.htmlBodyChunk); + } + /** * Gets the subject line of the Outlook Message * @throws ChunkNotFoundException @@ -331,28 +341,59 @@ public class MAPIMessage extends POIDocument { if(m.matches()) { // Found it! Tell all the string chunks String charset = m.group(1); - - for(Chunk c : mainChunks.getAll()) { - if(c instanceof StringChunk) { - ((StringChunk)c).set7BitEncoding(charset); - } - } - for(Chunk c : nameIdChunks.getAll()) { - if(c instanceof StringChunk) { - ((StringChunk)c).set7BitEncoding(charset); - } - } - for(RecipientChunks rc : recipientChunks) { - for(Chunk c : rc.getAll()) { - if(c instanceof StringChunk) { - ((StringChunk)c).set7BitEncoding(charset); - } - } - } + set7BitEncoding(charset); + return; } } } } catch(ChunkNotFoundException e) {} + + // Nothing suitable in the headers, try HTML + try { + String html = getHmtlBody(); + + // Look for a content type in the meta headers + Pattern p = Pattern.compile( + "B2__Lj;wtS%^tHM#u(~D&6U0P_y8vFLX&N8lbGW<(|3x*BNa*2T zF+mb4A|FBnu?U-zHw+_OJKEd_Qh5x-9zhU|%r6ne2JEk9(WN}|xeo;q5N)K`duc=( zQ2<=^@T-Hcn8<~&2<|Rm;zbbGL&+#2fRI>cX1SISE&?%J<1?)GZb-V^ng@4kUD?~n zKcC1T<^i`{B8xX)F4NJ3K)^M(lDpX{JiqswH~s~3Z`zoUhVolKM*c;_d?FKmi-{bI zoM|AB3NR2*0{2wbNo=m~2lefq*z}xbZK8>!E-+ z{!#y<9v%t!SI7UokoG>nQGlZXv76ivh~v+C{G-kf1jK1X3W#G1F$k`M0df4{_zwjP z1H@;yplc1-?|(90^AGBIY%k0m0*8QY!4cW#FXhAEMxS!|+6(#d$9Xq`4}XCRFvIzh zul#vHi*qYN&+Xw zX+G_bvf#TtU-|K!oL~FjnDtL8Z~4dZk)N?~pnZd{{AeF=EWZ+ZUnn#B9Id~cZGW^= zIG%rzKfm=a+IP^V!&iQ^-8i0qkUy^fuD||AdyQlLkNnXF=TQGJHZ748@+SbTCN{H*@tCqKuVul%_7zyU-SpCCK|KfatdmVh+zYJ)(^&i?oIm9W6yGDC9cg=)WlByMN#R4-%)DE|3hgMWk|0Mt)9sR5CfbE_dJF zonLlK+9fUCh|AR)^hNsYVp6qGi@E5?B%NV_4rSsCU%eXsUZZ00ox9)YPdp$#y}~lI z39))26e&7UouUz|Q{Y;xjGvgOQpRdxRq@G57@ImHGB!dP6A_Q?{a2v3@*~fiS$_Na zk6i^jTL0ph!!gKL|EvXCNAs_QS^wbs2fz1!9P@nnKMk~~>kxX9pXJYM|EFXABR|e-*q(g(KMJ(&fCxRwAJj4a9JfF6 z#d(!4|J6Y2NPd<-FZ++v?Y|QEI~srNI+x%656aB}WbF{P00pj{5PGVAmOp9%$NCS~ zGvR<(1_&HNB~VPaQ1y2H@=QJXNc{30F)s}mazOc0D=v$if(a^xA~ZCJij)PaA_5hG zlp;hDp$H8ORz}I96u~Mb6(J1`i3}YVo?SdoA`OU2hX2t4N_8Ac4eLti$lHb!#MGHl z^9K~j{#Xe;+n_DT2@Mh`g@Qnj^!yx3B9py8Xm7U8Qz9KFxa?BpE!4UY+BxoxMY`fS zlq^^%86~LoN)LI!Gu^8|>Xn}5)tV6+>y@6J;gzlp@=Dje66&&&&8u4~`j?kH>`|KKn?7HAh0u5iwLY6Y4*I4Kx;R)e zap=)Jp{8HC>n7LMXmY2^U<1+U8eZJ5yx61kGg!`vwPc;nXZXmYaS%6e5R>k{G8t6U&DVb0e5s6`( zDmenEk~RLSgp}B66aAF>{9;{xv48TS0-YbJRL3X7dTXLzv2J0pcuw)WyzwL~^$bw^ z6Ekyjf|xv+JmMqbR1^K8b@@6&MzP*tl{7A?^ZJm4HDXb&KA#K@2nq-Z4w420GZ`hv zCQnsOa%NdOLCmHBNyIT0kre9487SIte}59B0ba$rEHc}mp9iaBkit-$&8?RG{ecij zHIpVJX(Hmusj!%w==aDR|KbwANfV+#c&LG?>cokDBP0y?kx>y!kaJ&A{qfg+Km3EA zlcmKhCruC|GaHt=eE<1H_E|<4ax?OPMBGyzmSU6q%3W1e^$orn*yhL7h9Ys&1jS!0 zp8t&j+fq&tE7*3!lJ?Yaof~RbbwgXHY^;)68o*L{Y+6f=$0YQe%%-b!=19J-uD<%t zF|wiN^}XAca+!4LRZ&r0dzFQlXIs{8uYY^rIg{CjMd#A&ZP-55RMnJTHGkg{jRyDupeC$?Cb^P{oW65n_AIPhU*?Fbu(w?|#ru<{c zt$$X}-Y;s0s$=RN+4<3q+BY-C zl3WqGRI5bNwOZ@9>|2KN*Tog&s@W2(P;`}f90l}au{OWlYy zy;c#;pYHm#gL8WAd-acOJ91xsW>LX->>AOpzSW6k*KTRi7#*Y2io$M1JIt6Z2CFfW zVrQx*O1mgHW&Aq$AacT{;3DZ73u!sWP97bBo!s$1~ zBugjkA<^WFd>t4sboq?!qM>GY_Kt&NEY*75N|ppuVHlgKlf{cNMZ>#>^kR+eYg7G`<8)0X?rUf z)~(Bcb=6O#G?1+Np(>^7J(3Cu3zU${NGdc;BDvlLh-M40HKRGUGTPq!U^}gJ-;9=N zCegN#Js5gFm>kFi*jHaY;qhqeRURAl1t;%h2hZ?Hh+@is@zvZy5zV$6dEoxU*nu1FF!Ucb+hrp}&moRHs(cz7vYYo_=y_=-RGlNikPC zUP~|X?PJ@{RwX-?2G-7d8>_=#&%MUW(ym==!rog?$3i|fp5xy&ALpuKS5EbGDBi2t z`#*U*og6ck+@G^&YOQ)}>Z?E1zg^wRvFTDZb{6~H^r@a(`C2|5!f`yE5batn*X9q~ zR<>lyvsZV15$QY%`RjrJUfQ5`-_~z;RMvj*+Hup*Zx`1dM?C~9mA7UdNFl}K*8GN} z`{Kzlohs%pXr=(!f3$9raoWkIl-=|v=XBKT|HL}2i{Jm_t|PDhQf+$uG`xgGRL69x z&#q(9Zf~*lS5?osI5iUed(OvZmb0tg<1%vddd|nG@{m>EZ>`*ZZd<1%KEL_UZXu-s zV8~-z`1-bUZL7&lc7CXM z^Y8?*<7wDtH6jfNv^0yQOLbm}?5?+G%-|Z%Mh)>Pgxz+q)djsCWz{iNsh#>BFAZh4 zjDgp;hge%MK(Vqf+Z`=qTfJbD1B+_cUrXEDnp$m!DBHNnx$|^IWW26~Ow`ZI$oHFM zS$SFa{I1ue&B*BboV`O@+ODVP7Ai~aIiL!%|D1iwRzxr#QZj@W)i_>Mp}eRliG4D* z^!912pMo90I@APOD1+i5txJP0!W{SBz5;3zMA^hP82E*lV2IHo{;7reAvI-eTYkI-~)fBsEnX` za(Gs@kdnnth)Ech8IUSzA0QbOcr{y|?0H^P=OH0W?`@Pnvo}MLsTiI~8KiX~{s#Kq zMxn%iy{J(TPBuynWX9D@;*{v1TdAl{ks%m7@$BWuB;^+G*1)LsicBfxKO}EduRft@n7=AKKbUJFq0}{o>U@0YUFq0XRT?1o%h6KtL-mPw> zd&f%JrFFWvW{L`qT6Zq+OkiNWa(K34cwqRTwn2IicP(?S;tAzUMWo6Aa$1S(M>7@^nZ@O5ZV?niZ%m^e;PZnN@{X{o3Lv&iW zHj(yBpV&U}S?YpLF}0rZ8-Br@A$IYB<);%Z(thEtNBfoEU6gTu_{1;|VRo+n*HoZRke6GsVZhOn z4c}*yQNoNcp=7}NNs>r%GEo2~^9Kg!?+2T?q+I!!)GwGaEKa(hZ5$&z9sG~J2`-W_ z<)WyCr_gq-Nsk-v-Y6o-G3A%Lwg){6_| zg^{vT^!887OXoWvrED_G}Ry756b6Qy)(|p}$;mu%!GSCuN^Be{$us>S!;T&ex_$ zH}7wLrhzhuPTyJhr9A7`YF&CgwIOihirGbfUie7W{^7&|S=chkys*uhd1CPAU7${5eg3>G{>rlk(a1->JV(+Oz-Xv@7SdzZUE2 zf>O(Jyrv!7R!z_JeDJp1>id5vpQimd`{Ow+bnc1%w7aK=Xn^>HDYof%)760I>NX9# zyyc;b{l&7Dz&1L_984efz3^k-p!joJw)S_SUk<;E{(Veo;f3G6-t771!(#feipO7f zG3-(=K7Oe_a*y%f(S_^gPFwIszP@DXQSI2`y!8=FC54)dysTyViRlY9L(&)eY1Su5 zf?E%N^RUh!UzhW8_~qfB#zqvLTOV1x@O(+BB)PFRC!=7_mUYREd1S%GV*iwtnG=qt zG%l>2w*2>^X$MOTeZ!2cLR2FJ3n*Eqrh&J$v``k!j0jJd?KU!9Hmvm+DUG^70l+=cWIAYPQLg){@?q zZOWvx`VB1&dO4iVGVACZ&pdOkIIG|DwfiIJhedhfdGaEWJ}s-?_XP{rnF`g0v?Bc? z_58A|e&aT8qAS8nh4M||1-GA9zfYED$>>iTjU#Kyjk1bxqil6;mg4HHU!ga7R*JULTjcbTwx#A}=U-_uD*6kIimWEu zs0bj9vg*^N^a{_FqE%wL&iJT$P1vJ)d5)iLUnCDtc4BP5zpCo%jvYn=SNP zZB3?k=;zkHp>M1?LN~Y22isn5JEDKr_+Ht^`VVTVyLJtw zjXt}E8dsYNc2(5W)SBvA=#zAVc{jb+bDwCx_=x8xa{5HsKPo<{IZ$S*_}=rJ`A0GR zll+kRT-v|%X5+7E7b-pz(VvUI2r1RgJ^GdSpYoIHwlw;x`Lyv&+1ZM(YrZl4QgN|{ z{?*jK>~h;&bzgHo+Rby2nf4HWNDuZLl;)`)Y#dT{ER7zf_c0A^8P@i1{cpx)i+1e! zk=l7LwR0o2tIr==wULe}xGy~Zh4?;+tA8FIZy5iP`%-l{F$%wsw-3&IC`5AKy9{){ zF<+?o4|K;bOvso7TLUV|*+i|szvyCZxt?D{aND!?K5`i7Zv9Hn?ZT0k?-2&NlRfYH z4|Hd@>)pEDfG5d;?#!mSoBa*&f$mm&)@*5dz0Ew(-T9qiNYiOEmrNwNO;LE*?QSJk zPvkYYw(W=yamR6>`(N861g2f~^#;1%??#c_e!K0y8$OegB)c)jL;`vu?XthrwYQ2b z`!uk(!EL?7B*ey(4)!~`X2f-SwnR`seztm+TxKf|*temkdoo9pDu1-!b{JkQSp)!2 z@sZ8_dH$+b!b916TmLtkzu?1j|JtsN zR~G%Ta*g_tm$!YMB-ZZ00}?#Jne|(DS>9~Q~CA+HsHsFPP&Le9@f=kTc>p09`d8vvgH+z9wA;B$bT)AtnLY(;A=%dP7h zUjrWC+X>eL90+)FZT5X4?OssTfevn2b z`MU@bm^fD+7eS&;+!s7UB0rRcU0>;#ctA(IfQfVRx(EuH_%KWUzTkn!HvNK5=}S62 zUug5(MR3OeTY2wf;@9fq9wyF}-CMdqL6U8-IwrY;*-H?6-yt)#JTiBqLhi_$b{tJ_rINqbNoGs{x;?FATBb`kru&& z__b5~l1I5i-iFY5*L_wA1FV9FY#|CeyxA}#>Cn2a`iS} z;`dI^uUI`7LjHGz+xCY$JH_wn6u-Mud;t^Z>I3R<2Ady_2rRD~6Sou}Ld;~&bC^K(ydKCVu9rK*kW8SciVD%z9?|BP+2{H`RbNP7? zo;LG<2NBLZ=kkNUX36Bo=^iL!D0dCnfF7lpxF=H{j9+EqT>e6$kjW2e5kQ`FOpm^F zS()iH>->bopG=(N?@L@_;#__%g3C;tTh|DQ_D<=|o#MZCivQFpPB8V0Wh2A}H1F_& z($)C)zF_kEi_n)z&zoHrF3XwpxTE#gAel*TUGvW6vw}(QN(^E`?ya1E9s(wvWisZU ztbq(=+iUxUF+dI6EhgxC|EgWH*O&ZV)Y=F)TJ;N;=T!_^O0&s=+P?atLZ*N>d- zkNruu?Hk!bMOl~a*0tjruy21v|HIec{(|-&)Po52`17>?2;w0h?oFA=)WOg5@SGdz z_V`P{gDemKdxHBQzfpkP8rT7QX3og65ZUL?&;MJJg*RXRCZKmz|F``AvmW?(w0}q6 z<6Hj&q5geb*$!a8{dw(wIL+S=Sj#L^zO_vbXE830GctG0h3NkouLnVh>m+^c{7uy-zif{kNIt6kD?DOa4f8FW+ z&yPROe7yYc|DXK(pE&FD{rev{hH-rIZGY4c*YodxAYarGeEFm5Lw$hIQ~zUoKCi$3 z;Pl_Wz_%sTod`Yizqx<^1IPcZzyHK<{fGUI1GZJ||4xO+DHf8R+8^s*3bPlF^@jlG z^T~i%21~Yf1El5n_xAgl-rBFO$2Yvau>bYe{7Upzer%}!=JMZc|IZOqqY_m~;0`(| zI!c`w7m=K-N=lB6j~)#-xW8O6hC8>Dq>4yX#-yqfBP~>l8B7OoaGJ-S|7T~$kJb61 zBpu_@u|#jk>iBaTnTyD&i?VVvZk;Yd7pk{T7g_vH-a1`m;{qAZ!?0f9Zk;Y-d>VJ> zIO!&zE^_Alw-^@V{a^uu{`=Q%Z5(vVf4_77`yKl4kfZ+&Syt7~nRTvas|D+bU1g38qy{h9I+ur7@Y*pT_!7{yc*EE%c$Vk z6^wJ2rQqGg%DKy8MZ2mMoV!exJ|Kyz3N}e8CAde4+rv0_QM7-F-d$MdE(zeTVQ=u{ z2*$ZfRBN`Xm5!5+Y!WplDWY1HBPOepy}`SLYk{&)Tzkr~U`2as<-<`f%FGVuF0{sw zk~uRrCeinNa!`t&ck8#{-36VyOy4_ZlQQx$l|5Sq&Rr%q$19VDO6$G=?=IlnCFPhl z#V>X9FH=@{w+2(mLt;lJw3B{qqBDy2xDPXnlMGs=Av5!#*yX`9no}}km!oqRGkH3$ zA@0#Rhrzjvs#$$@apt{BzpT{Fsg>RbW0!}}xr!VY&-94R5mD5IIqjNE6MA<+rv-7U zSc5h*b05^_amKkzw142-<_e7=?5Bv($gf!EF8=`UF5`D|&RvSoy9+vZ3I7qCyOihp zQ-O2p@^Tj|w+uMCSjjne@gA@~;*`a?%W3fL!Z>${9w+tt7kGC`t;{I^=PqHZ`z8zo z?=DL&Eh$ImE)$an$BpwIQ=UB7#t?5{ zs5Kr1&n{8TGm5fW;wo0GR6mBET^5$Fjhv?{8_IfiF?jz8*Zg#JNsx z-=Iiqy)yr0#=nd%E<%y=~F~((o zZ2WG3F|z3}F}AL>QgHgPpyF`2s`YT~eVWBCvT=(I^!t7bR?1@QHb;K3)?Xt|kkfJh zn9_%J?Q+O`_=4;KI_6B|=S`-K3;fp~QXft`qW==yLaYsmD{TER;DeSA+vtzXf2UhK zQ+DSzOgUJ-Z?=q%I(G6*OVojjPedrqk#v;jFVUiyQKZ;6V#nE&_a-ON&DF0qZ8`W| zDpi=*o+OK1@$$C`!TTG3i1>POZ2UKBI{D1p<~_RaqUY8drP3`^z;jP!T!D;8_L6Qk zADebuTTOqva+d4d`WMOA&mR7T(8L^~zt(?a{I=}7ito3@ic-s_Q1nr8YsmW_e_490 zOnZE8bNXi5sG`o!Y0f;;(oFhIamiDes+zB~3oo~j%XDbMxfpJ|E|} zN;ki4Fg>qDr!FOxN}u27>#{Qc2~J&JkYt?kq|XFW4Be-^c!8}Y`R3*vU!?~%IQ-#jR% z=Qht=u_NUn)9iQWrAR)1rZ1hdZ0@o=9T+qGe&+jy18*1 zJ*Xy3Jw9!MexmXHluK_85YrErWcUqQvFZG9^VQ#04gcT{`Uk^+hx6N>Qa}A*zhiZ@ zai#pqipkY0x6O~p@B2t@zCN>Ld8xKIZ{vm1Q-v3%CFEu4m6|2QZJNc$o}9KKl?;A= zBY1RCEkBU6>HL)oYHI$=8!r|w+4N}1%2P8Lk1nr9RL`uD_T~ztvXdzw^~XMTE1F37cK$^uO9Jw z>!W6R4gI+16QX_kr{qto*QS-}>2l*Mnmy&)=^dVp@?GK@IsKf-RMTi`YI(Qjp!wxx zrZoCU#Yg7%Oz*dBY56Ga@A{98^og{i72CzfOvhVJw0&Xvgr;lD>R#FHy1EbG{KM(H z%ji87-1i$Z_G|jM!S2{gtMPjv+!f;oT>9(%{fCArhX;|sgaZf{CLBnlGvOd&7!!ID zoxlHhROrGKypb(_l!t@wL-y9}GktA-qwP)eTl71g^a0U_^jG39j9-?0RdL98xQsqW z|I&6|bV2+<+b`;iX}{{}E9%Q-Cq3Ilr$#-RQqXutM4uIZE&oQ{r|moa_r@Q}eypH> zs`;(#_lheugN%Q+6vzj((F4tcXrHzrqE_=z+DAPUec9Y*{7ij!+MP9bneJ}6nwCoI zSLN1MQw>9@hSe0^aM2|xMIW6$R+9X9(#7fXYubk-XU1Gk-m)}BLine%l39L*#Nfew zE!Swq5)wmf;=V+GCawb-pv7gb4fJe(M$0q*^MBJJS6naKWZpw$8{FS-dDatu+<%fV z_HW1EpP)T`1|VPln}8PgED`MUXLrHy??Cqi_rn+DxR;D0hA;mLpmntW!^+R^{GS5y z!~JT${PFxBKmPpMANR%+o#1}}Xh}c>`}L21f74<6JH7w*4)EpE{_OszL;g&?W8EV# zLOJ+(8=jLE8hi4y`_B&fJ1zfPz`H*${>a-g|7nmHD?LB?@%$&ri$AaPp91`DBNmWv z{l5gX{Mw&a|Lc&QFaH;L^XGT|Qw8~HoRI%-K+9+RQ}AUBpZ0$U^27Z^zVg2bw0!Dc z0$=m+;m`FwzWm?d&7arbpC%F@zf>pW-_M&rum10J|J%otKLuY!@u+{N`R@h({Q6%c zAO3=5$Pd5S;@kdvc=Km>-yO{VPRqZWCx7<)Q^)%EL5u8sK+C89OW-TU8!`TBc*}od#{Vwh&qx1L%=gz0 z`#&)o@>2uyt^b`o<)`3FQXcistiRHn;J<@6{~Oc(uUqkl%bWWBNpH{pIrIGm%Gw$J zzx)5Wb^ia>`G3xSgk$5zp8xj<{!cmoKh%{yx5=IV$MXAm#vdF;IuWi5xN9)xsSeF^{1MfG`*C7$E;5 zz-*ulV7Sc#T0MA5DTdti@b6)W7eLBfpu=DEgjWPlGa!coX01F0a$Q7}0IdP|B6k+b zn0){|@$=VUEt5XzuO)_5PciB7t@xPVaN$h^t3#Tyo=^ z(~q2fWzMF4xXnyK#D( z(+AxA&aD%;^#<2Ilk;>G4ZgD@nk(8;rA7HL&anJaQc>;_qp#baOT4KpBqQq zc&uXXtGH&nZedURk3nEjk7Dd!XaVb)FYbS%-3`xG^R>T06*1?-Q3(6|Z;btGCGf>F xa0or|N6R