From 7d8da8ea593d5d44634dd23e498bcc646a3f5a62 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Mon, 2 Apr 2018 17:15:42 +0000 Subject: [PATCH] Bug 61267: detect Word v2 files and report that they are not supported in Apache POI git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1828176 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/TestAllFiles.java | 2 ++ .../apache/poi/stress/HPSFFileHandler.java | 3 ++- .../poi/poifs/filesystem/FileMagic.java | 3 ++- .../apache/poi/poifs/storage/HeaderBlock.java | 3 +++ .../hwpf/usermodel/TestHWPFOldDocument.java | 19 +++++++++++++++--- test-data/document/word2.doc | Bin 0 -> 14662 bytes 6 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 test-data/document/word2.doc diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 77651274e4..333a8ebcdc 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -302,6 +302,8 @@ public class TestAllFiles { "spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion "spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion "spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion + "document/61612a.docx", + "document/word2.doc", // old Excel files, which we only support simple text extraction of "spreadsheet/testEXCEL_2.xls", diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java index a5baa4adbc..680cac8398 100644 --- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java @@ -54,7 +54,8 @@ public class HPSFFileHandler extends POIFSFileHandler { "spreadsheet/55982.xls", "spreadsheet/testEXCEL_3.xls", "spreadsheet/testEXCEL_4.xls", - "hpsf/Test_Humor-Generation.ppt" + "hpsf/Test_Humor-Generation.ppt", + "document/word2.doc" ); static final Set EXCLUDES_HANDLE_FILE = unmodifiableHashSet( diff --git a/src/java/org/apache/poi/poifs/filesystem/FileMagic.java b/src/java/org/apache/poi/poifs/filesystem/FileMagic.java index 6bde1ced8e..765cf6e323 100644 --- a/src/java/org/apache/poi/poifs/filesystem/FileMagic.java +++ b/src/java/org/apache/poi/poifs/filesystem/FileMagic.java @@ -77,10 +77,11 @@ public enum FileMagic { PDF("%PDF"), /** Some different HTML documents */ HTML("ohD;=n~$h_I5El(h*;92M3WU6Br%XuGorD_lG9fKp z$f?U>Ng?WmXiH4ovy$mTOV9U;#RbdE8@+04B9-V!v_!xL{%ABAsg}RR)brf~J%xO?XyTwYm-VHF zDN;tdCtt`Gx=o`(M=%(5OE>|EB3e~&SthrjI z*N~QpL`Dz!g@R* zug4!&vstTbHguNzN}7==Yw3KA?$u(7Wwh!}meJ!Depfi`_PBkj-Zf6?KGCgWsbsv` zt>VSfN(vq6)YVjdwRIy)lBte(WJIIdr>K#XI>+Y~ZV?RoLw*r(2Sci9r=aPbdPAwc z+NOG`Mrk8i-OP&xW~W|M6CIJZrkGgBw`ixmd(5=fSxJt`!{P$+MybDG^%lpth<2R;$`uq#W}0e_wZx){RuNA{ zTH+Cr)cUlDRwMg3b?prcm)kAqQ%qjLpxP%>B6B;XhHBBxlAdJ-Gi%lP0==(912JNC zt1z&`)gBaKpW0bXLrcpv8n&m}W4WB3FB@6WSYDx988ffSnKW%MUD>Twqqwrgg=})4 z-xCh_LN3*)CZc7kiF_jK(VeWwGGUqBW{CnX)C=V|@;lPrO$(}h&Tem?oVh!@NQL=o zo6Rhz%69sqC7N`zyIW1hW392aq=+=tC~9RC(V<*HkK4r%>XT<`-SW(aqG{xe{8(M7 z$*}ZnMl@};a;$+)E<>|&uAHZj$tk&5Bjd&x!#6CvF5zV@@Pxe{k6KAl>n<2F$!%o6 zB-sNq>9y8M?9KLCRhnuYh;#`nFWN~%Yq|(p|D#mXS=kXa`o&6IgX0OLVk=v zv#8LM7x7}&>{DCHX(L}6xzx33j2y9KagB^+JNkUW9SnO#C>->|C*mPp&ky-OwEYX6YaP1qY?G)B6Hz_41XCZzT0*=>|9Wf?0AU>(NW0v>SVZ> zm42-Ka%Jp|h*W|>m)j*ro~e~IOQtbyn6ik;yIUmFJvlQotYb#zC2RD{N`S%wD}r8+ z2>8NYb&*y*x1n|H;6#=wspsUXo-{KI?Z&9HoDW-d#+cD$raqs?6_QhmClFAJc1kA~ zNvT24q{o;tqgi_~6v?t-b?W8Zb`641n9ZULb=={Qny#dv6{{(1V-U3KCClukTz!3o z?OWe`>c1X#B?H6NcR{!+9&P3aUd+noTZMnwIoy7zK+={ z#{sRFvdYEc&@?tWfUb1Q(X*J>6I81SXm$eQ&1165#PX|5u~bGm+cOHPVXBN8{az+9 zrZIyAw0;s8a|p=kvdJhg1eO}zY&`Qjy&U)gA+=kkr1e))+Qxd2V9T)7?B$+C_jdJ` zkk94whrMC7za^iSB zfK6aH7_JY>m7yVDa!zZ4axmRaZzst+A6n1&l zoSm9xRZ<(XOUa&l9NX|hUL9M%2_t# z4ycv1G?`X|UMlsEwZCuFSDG0)zZT>jE*V?0g{uUYV>D#g9TwqmP87bKC+~oq>hXw0tCgXWhbH8p%Y2$;Dto1w?9%ng`8ew4qj>IyX9R)les%D zhOS3jJgQ5lqJx*?ujQD4H{q>z@(SB3j zZyo-U+Gg~Ngq1b)JYyrky;LUhM7kuKN_d3J<*nq@5{o2bBHA2_E*8rYOFBd}k?3fQ zw-G8N=Za`YBAKkb9!aHQZH@9Ck(eu*;>X3fYa$jzI4zYBspgnyjYdT>7VU_oA|1EnGCuv2%& z|9aA2q6XyWJZIwy+=|<<9vkpWY{FA`8n570e1wm&*Di`O5eMU|aKnQr8nF;fI1Wp3 zDlD9VvvCD(#cg;5kK)&O4!_0ocoT0y*;P^MFcFh+01iYL^RXB$XhQ<0K*uVq#y4>u zF2!ZI9e3aXJcx(zFrLA)*n&6kF8+)Od_pEaB|98PAcQcE#@DbIEjSS;VFj}2$0}Tg z%W(s4#Lc(`cSCu9t1|Z2xNnYq+VFR(9chB1?7`E0C{11U*(oyrd+-Pz#nX5eZ{b6H zgh{(o=MXpqvv3%E@MAGra6Fd7#0vCd6~2w{-~z0}wfF(nV*?(;6L<+P<9&R9iCpa@ zOuh6}I`7vW<3BYueW*nr3I1YX4N@Fw1Z{2;FmyJ9y?#|#7z zL=-VB#|bd80;l10oPjg34j1Bf+=2VC9-Hta-oWqiA(Z#|+A0s9%~!SQ?Qesv&S#d2 zEbl(b$?_ULlGnjYb(n&wn1i_pVLswmgcO#b3*9iW0{vKpi*PZn#z2hxs@fEoemo?KlZ1V>JfwEv&`&@DKP2?!-fQ7?0plJd0mp zGqzwW-ou1F8Q++SX_$q>a5#=Y5Fso^3y#Nf^uRlZw z75)ov;$wV9U0`^}vipJE*p>G0wDa9dQFg(u*ay?$ffvUjf^VP$C*frD;Zzh+!Wx`| z@8c@mh?{XY?!goIIbOu?@G@S(pRg5^zC!!s033w5sKbfp(k%9jh^bZ(%L2 z#x=MRH{ovFgZpql9>){df;aFk{){~)GW;iRX)^7P18@-Lq8?3X#xg9&-{C~8Krha~ znK&Qo@I(9)Zo`l8Q>@2EJb+Dj5^v#ce1MNIX$tvce;k0hsK-3G@O2!E6n4TaRee*fHt(_M4W^I z`fxVZ;9PtYm*H~Ugqv{}{vD6tQT!Iq<1M_6Kj0ngHce4>$HDk2=Aa&5!!c+^9LsS6 zPQ*ztu>xyw4lcqaxDgw$5l`W1ynSf20UPlgeuLMs1@GcL?9Shi?Sbi-ftlD3 zvvDMjMFU!pfPqz5jq|V;SKvxqg{yHZZo?yZ6i?$>yotIQtZ|rueQ^Y4<0#C*d>oCX zI35{v<5FCQ>v2DRiVfI^O?VQUu?1W4KUE*{eagO!EgXo0P>=0y?@_Hh-j9m%-^%a# z8vdxZHCIu6CI+8T`)s%>>;DPznp|i=1Q~Q=BOb)h@Ccs5(|851Vl%ejExe7bco&mq zGM8a*9E5{03y0xvaX5~^Y|Mch^6v~6fx6jREs88oO@Jo4NcnSadTy7j>Xe=W- z`Pj{A>)|czymsJV`w#VrnaZIu{+F@86U)ok{4B`W+=;cF*x8ANo!C~!iqGIrFF*g- z&+fbB`b#f7z27*cE&lb&5HuDH{%{tG9n5!RmwM!G27hX}`b6xqF>6 z^T#p7YPMq$<-A)|s#KlgW>%^^2Jm|!CPQ3R1 z=2td7e(%3s_k(q7SDt$E^5&@9Y5XtOxcsuV?oM9!R2geKv9l8kJF%@3t2!~D6Vo{{ zniF%$80yW<&urRo@6Fd;vhK8%nUhd&@Ta05Vpc1{n{|gNz4dEZ7An7F3AQ zWPCRTGRBkfTRrB%iR0dX`;8ZV{qz4=f9H>{xn$kierxSotJ1`u<8i3oNB6g$x`rdg zoPjx~%vE-=BdKA1bLgzm{YT!{wr||oWpdW1oA>ISx!0kO&)P8O(2#tij;LzvG>#X% zJDo<-3e?m{2V8{FN?U_`8A4fyRq4Kjx z`FWK5Ov1h|%`lPor(~a$_Xj2t?@m^xaK+P