From 879fc2dc1cd4bb4cf72c842663ac361527e53c9c Mon Sep 17 00:00:00 2001 From: Maxim Valyanskiy Date: Mon, 22 Aug 2011 07:56:43 +0000 Subject: [PATCH] bug#51686 - ConcurrentModificationException in Tika's OfficeParser git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1160137 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hwpf/HWPFDocumentCore.java | 33 +++++++----------- .../poi/hwpf/extractor/TestWordExtractor.java | 21 +++++++++++ test-data/document/testWORD.doc | Bin 0 -> 32768 bytes 3 files changed, 33 insertions(+), 21 deletions(-) create mode 100644 test-data/document/testWORD.doc diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java index 5301084a8e..d67588dc08 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java @@ -141,41 +141,32 @@ public abstract class HWPFDocumentCore extends POIDocument * @throws IOException If there is an unexpected IOException from the passed * in POIFSFileSystem. */ - public HWPFDocumentCore(DirectoryNode directory) throws IOException - { + public HWPFDocumentCore(DirectoryNode directory) throws IOException { // Sort out the hpsf properties - super(directory); + super(directory); // read in the main stream. DocumentEntry documentProps = (DocumentEntry) - directory.getEntry("WordDocument"); + directory.getEntry("WordDocument"); _mainStream = new byte[documentProps.getSize()]; directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream); // Create our FIB, and check for the doc being encrypted _fib = new FileInformationBlock(_mainStream); - if(_fib.isFEncrypted()) { - throw new EncryptedDocumentException("Cannot process encrypted word files!"); + if (_fib.isFEncrypted()) { + throw new EncryptedDocumentException("Cannot process encrypted word files!"); } - { - DirectoryEntry objectPoolEntry; - try - { - objectPoolEntry = (DirectoryEntry) directory - .getEntry( STREAM_OBJECT_POOL ); - } - catch ( FileNotFoundException exc ) - { - objectPoolEntry = directory - .createDirectory( STREAM_OBJECT_POOL ); - } - _objectPool = new ObjectPoolImpl( objectPoolEntry ); - } + try { + DirectoryEntry objectPoolEntry = (DirectoryEntry) directory + .getEntry(STREAM_OBJECT_POOL); + _objectPool = new ObjectPoolImpl(objectPoolEntry); + } catch (FileNotFoundException exc) { } + } - /** + /** * Returns the range which covers the whole of the document, but excludes * any headers and footers. */ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java index 48c20dfbe3..b4f81f2bb1 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java @@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFTestDataSamples; import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import java.io.IOException; +import java.io.InputStream; + /** * Test the different routes to extracting text * @@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase { assertEquals(p_text1_block, extractor.getText()); } } + + public void testRootEntiesNavigation() throws IOException { + InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc"); + + POIFSFileSystem fs = new POIFSFileSystem(is); + + String text = null; + + for (Entry entry : fs.getRoot()) { + if ("WordDocument".equals(entry.getName())) { + WordExtractor ex = new WordExtractor(fs); + text = ex.getText(); + } + } + + assertNotNull(text); + } } diff --git a/test-data/document/testWORD.doc b/test-data/document/testWORD.doc new file mode 100644 index 0000000000000000000000000000000000000000..c1f4f3d0b0c1e475bf03e9eba3ed7c7ac166d557 GIT binary patch literal 32768 zcmeHw2S8Lu7w+sXEK8FjAOh-wsEA4tMI{zcR6r?FY_YMx0;{l#%VLS5s2CHBza*Ae zh#|IEVo9vA8xu=nL8FK#HRk6`g5Vxe#}jrSU^L@iaH87v=!;12UJT{<ajbMSnGd`_@Cy69pIZK6K{QHFF3{lp)A z!_(0Zdlfg|p4#XUU9glrc4e$Wl&?berD>!U_2>D->svwbs-4rOd`|--XL!B)5q#;5 z5L@I!=c+Ve@RQ!Gh3yBM^pA{lMY^ZZ12Rp~4&pocsf+UEQ{3NS_;Pq!yu7F!F9h;@ z=51bHyq~xE{I%J9IzyY+N8YY2KOfGQ&--h$4a?#EJfC@gZ8lGrm#?AyvUtPvf0v(^ zS8esKEu63KpS5eFM`N=&2E3;jq=KOJrpz9{^h7b5e?ABL%F@zP<m5`Yx zS8AlO3QelqIz*W$RjS5FlT>P{Oq!7yui=6tWeTMPn7OB&byxsw7SII`~v1P2}sOP$r~iCdxCU8u?g_RFfjp5Sgyg zy+frkR5Uy`q<45XJ{MPjMi!qcH-m|iXK3V!QqFG;6RA?mrK%*UDpM_0R*N$;i%pU1 z5=v8KC@TexM17S~g$9JItwYjf2}lvE7%2;q)(smJ)4fmc@Tgu=j}(n2J;>Krg9slP zm&iw@PWJVX4oPvSCC>4c2~cs4eAzM4;WZYWu2R%ivO!dh1skN{DvqKYE98(aQ_GUo zvh)<2(ZEv!y2dq*Z{VGH_#Gj1^5CTflfdIkO+(iCIDH$ zL|_@P99RXc2G#&ufo;Hf;Ah|w@GDRTlml)Sj5PqHfIH9_hyr4OslYT~1@IZL8Q21x z28w_)z_Z8I{@s6ei~N@>ahda+dwlNnxo6z(W1Yo&xa=nmpV)m^Cum2ZWU^?)S9ns2 zB?70EeAN}!Pa?1pjiBgCzgXvD7EUSBrt_Fz1SWr#Y0`rfl*h_3<+_&(@{pfd=~4zt zCJ&WN=6!7?lZU_uYFrxFL@k1*q9l8gJIS2nO|n)2B?-}9@!=b^sm{_A>f>l3Kv(U17oxW&%Q$x{%XgozloXt3%QU}-MEqEdhmfgOOw ztpKYDYyEzDA9AIBI$57jLL z_PRR;3S5dH6rFLca=KMH8*ol8Z|fp46WWN{u{LlBEJPMeRwio2(u-(`Nhx$C@GcYm zahb;CNfiZg)|gs7`Vs$2ZUa4_=8jCH;fw1o7246;Q}8PC7qN2FOj{=IAkmjK#|^Q% znoj(6`KxXJ4*6p!uSNbM{pWBs*^m2Q66>F-tA5|t$^Q}hFisyBJs|w_=ANIG% zpX;)Jt^e09e<45rYJt8_^RL>+{%Q{bu9g40Kl(brjsF+W_5b?#uPVxtX(zS0dPo!mDRn0f$>^9oEofhHRmtGA(0 z=nPop71AWxX#RiUz>{i^J;6NejE!c(6hYe}Q*0WvG>0t^ism>s9?b+T%^Qyv2T6yM zg|D{wEjOI>3+7o@YyBfrUCkz1Q0pI=QtKyLyk6_M1)W~&J4ke8sBQlXt*>R(ck>_E z@H*6?DnWtPUqVS~Uc*QV58oR1Wcx9@_ouMctk)__ z(mUE~BPm5QC5vP-rhpyBz{mk32aFu}8#s_;(tz1xU)&NqM`pP5!8n+3v*7vgEq)0CaY5u|7>#N5%TODQ z&d6I^+wiBf4JDpML4c#J~IC?VJM3bl2g9(D|1RjDOTn_GC9yYkEuhFHf(Gh}qrTG3M8 ziito?sG}yVs1-+w=;)hJ783VB`ziJstSSzN+TspKC^iL>z_UznO~a%t25vQw%>Aab z6omBvXO)O0(}yX7A8I$kwrZfG?kBclvB(h*3UZ{bE(g38Dnr++I6;)EbC@Con@`xJ zV|WdO8zCd5kF@*fW=KWN{KRG)9pbA3J0w)LkmD@g4zd@j``QPvNya*fnHjVLFBzB( zvp#@*=Wm62df<)M7oN}sXXuy-ivivQhC!itUE}p43Z*>I3ItJlrC76Yjt_G1Mv1l( zzo>pnHWD>)W1YPPUA@hRhnY*DVUqL|j3^BBB+y$4=tbXW9@XfX8R!``-;Rkr1$CE| zLT$&=C`Nh_9U(+rO;J}N>T1eJvVrV{4bcnKtDiaKAZKH_dc{L#iHr{3Sm2plcNy*l z5ka)xm$?gC3q1AmnN;I)U8D+S9_&8rDuAxQ$Md)wq?``w3QixWwhqjSYCBd(ybj|O z*_m-#?99BmI`$EG>rpY~c$mo5lkRso5(Sjek#!Ef&(wmB)zZeIEDfhul-7yS>nA(v zF<^DK2v1$-^Tb<57QywG&U(2etS_`Wo~v&ajd~o7ikiCVYU+m4Njg)im0qWsR(GrD z19DCSXwS|R($UpDh6SVU9|~ghN;gO83iKi+c&0%;)Mzo@hO_#VXK2nSHVlZ%;ljFl zTSiBSAzm?q(qtnnkTn9uCDo)WuGALNKf1T&LbMzYNSLt1cCR}f$qZytiw1X7DK$8)KRk9^x;$g3&)Bro&p-Hn#C|{9?&Z^7OfB3}tlsXmy!hCX z@G)5}Pue(N`J&Z`SvhUY4__E_XIsp6Nh?ib+xTM(#`jw{>vBOI=h+WqZ}uCOGA%Ld z^2#-jp8nE&#~E+`7}3;bpFc96J?zYx0at%<@N2$cMeNPR9XdMCPMy_w*-vJk;}ltL zldrnGQFrge;OW=4c>VJJuiu2_-~XmC^NF`*=$V4Irylb4%RRWEp!DezE6?otmIK!u z43NFGwT_^3(1o*{9+cDdzyT|Bth=JPE* zZ(bTQ=^OXwt6KF&TLZQUfhwr6b@Xt!1?GGNv6qR+1H1&aa^OGDsr zwLCS$*N1*{W*rBMtft?awrlE%tT#{YcWyG;cX?hn-=$gXqSvGp57%?9x6ef%6kF_& zhHQdY>VGqdz*L9M(^yiu503n!?q_Qx}R$k?>XtS4@M0?(`f$U`XzzAfB4m@ z&zW{{>t?MytlhZZ@5tileUsL=yxQGF*=|m6#g+I^SNdM=yDM*{eD$e@g`2lMDSBRG zLr0FA=QI@!%mwIqgj)+~a$mjQNSRWWELW3qSRIM9*e|sWzL@ptn7p&?5_WiXkN@)f zbvw#fw|tk7g9Rr)Id|sj<*DtCU2r(PsF(GimicDZx$>hv?`|!f(DF0al|P2LecLki z;z7r2H^(|3-ZZoBh44ceZ)P<Z9E}?~evC8`1F0?O(U=dOPGe+i-8{ivIIjAMG(|?azl5^McDp zEgrqK=<~KiCcO9JyZj?-qdxl~RsQ9`CE~lGFP(WTY3q(# zCDw;74Y*$Vyv5qn@62dd@a~B#adp-<`0-xyDa%9iH{OkS?6UXFi24)WnUdnQq}N90 zhoP-x@v-s*a6by|MT)qcKvp5Hlsu+h>kizhyO zaH8Jnv4ak1f7{<|&$DOa+S;B>ZhCBn?>V*XG|3TI}UE-~+cy^XFPm z6s=hqacE!cWBHJ1?@~>ebI)DbVS_ebo5!+xMP1kz^M&{M-QTbHVunL*!`Kr$CwK5| zy!CNNyRS#RHLXFRr*}e^JTu!R&UZ)kGre$C`Ob|orJ^XO?N948`Ol8%_2R9H8}GKU zwCxmN_v1$~t3S4I`{ByH@BEj>%u2HwGNi*h!pE0xA9VcS*DX$$B8z9QZY_Hn`ebO% z8-p}EhL}9MyZ)=eV?VrOCwcf}=Jlb6vj(VlX(t>WI_G$Dt4$k!w2%)dx8Bk`u$6k} zhHee}9uSZC?vS-&VWRod(8B2182_F?7(Rl2UTid5vng)){E4kHp^^%aBz9}1S zFK%l8-TNA^H@|D*cIQZXleC%Bn+`pjAnV)6Iki*Xz`GCj)>(h!;hQ|f%u zVX0$e$>&GHbF{Z;wT-}D-1a(w5qhh?EbUoOkfZyC}vZT;yX%C`?4RGf3&zrd;C zWrv+Una8IOlGN+A+dlA4bY#-(s{yJl>TiGYT)DsPw9QZRzm94UHZ6AWz#f5Zj>I|% zTb{`O{Mq-Bez&_fk8AwA?XrP`rp7%!vy8p5eO;5w{`dQxJ-gg4?Tx1k=l1FRW~kMY z63NWs&^rm~d+PKnY*t{uu4(V?g}&*tR;*21@zbUJCPSZ;eOGd)eUc()qk2u>Hpde! z+l?D?DDLNoOOuszzjVENY<+I{6U#o!-4192`aPN`Y-ieHa#^;`nQg+xAFofFW$8F- zWmiS$^WE78_MNKpuG6*|(-ac#ZM_4vrX5ac(~b>tX}b2}-p|Su@0v_|9@c$IUMH_! zO@E7W?|;0^r`Lz_oZg{xznQzraeQbUbJtrg+g6v4w&`wrYsJDu`QDPK1D`xyEHCsr z6&cmxLcU9%JG%nSUfeu(uxWtT)gsvsg;&aaLq9L>clYg$9WRXkKKI6ZgBG1Wv{5*J z?v^S2=asE*{&k0~Zw>z9TAN|YYhO>UcfIX9`F~%DbvocH3>U z*>T@=airDP;w8U*nAS1RCvSAR$;{aeHtky$KIn&}wX?UpwR_O;^v?#Dc9NyGIy>lD zR#Za0qYJ)2cKF*FZAK@_4i=40{dCp7jm`QxymR15$bt3;CYvo~wq5wd zuSulezIII}fBLK2{EpdIwz?fXIdc!5Q@@{YSf9xIWD8wjVS-|8C*hQ=*(JZLI^lc)oe(uJyW>5})a^-3t^uKKl4@ z-GtUZ`=_jSkp$hGJ8I#+v~73(6Wqrs>bZyN<~p^zw>q>ef27p?AX^4;L=+rPO`*>r)h71k!T%}bF?&(ttWjp
D$sOVyMCy#!y zJzBN*zztp8u9l_Z?rtZKaqZ)-olK;Jan*Y%LbC;NBuNY(gzNHt^ON*oDSd~$0F zYfWKrC60tkmXLro2~t!z_z=YFLMqb)@FBR$tBY#GhqU2C+VUZ7`H(<9B+x^eLf4GZ zBC#l}$V1)mq1A?HyI?yQ&T8Q(Y?I=QjJq&`I)?suA zn+{7;UT!x9VV3~9oJALO=#mB<)ThI`l+H|-jZSr9t3bCJp>pWh3ckg|5`ghQ7O)Ih z1#AV*19aJ?44?&{6xa+D0i{40V2?&L1E_s<2RdlyzF+gNV>X35X5+kGX?v85+LzJ> zt`Kzw8HOS%LH=OY@IGCN(S4Lh%d)ex8E%K#V$Vld`XM50nS?G1>oS?3(M(iISLSS( zNvRhXZeGgWs)1YIU}uiY$B>84{!#j_u z7}-T6;L5fur3_SkE%qx(Ahn+alKk$V4(Wyx_z&p<4AC(RqXi^=DPuW6qVY%C0D!)q z)DCD51OaaVoq*0jFc1RF0Eo63R%SYwz%~QR4MC-QBjul?O4MqmwF1m&Q&GAI%r-;Y zE{q@CENQTMJ#I;2Pg84C^{;}1R0*9SR_Y9dXp^%+V3!l~Vi@c=*e^>REVRqn`+QhV zurLnx%TgC2wI5O!BDLR3Q^$$yatfXe%ZU@k!G0O-Qlz#;>Qbb(eQD}!6T6)3r^9lx zP2ym`jJB<*T~6t5!*Xm*<6yrmbpcYxA$0*#$GtSQpV%&^@X4?oKXDvv9c_{xFBGzA z7}HXh;M_W!`T~J^=2d_Ulz`E7bJ}C1YY4RWM26Qia#L;%sQ!%n=tK5{fXTv;@5(>p4 zQxj96$Yi=VlOi~qr&G~hY#b`^B0`RhYuXN%qdX}#%vIdW3F^>oK}X0;aT;1+CKj5~ z5KM-zJh`*!r!O|((y)c8!Ui0=wGUUirPxGBrq~36o1^_IY>>Bs4LF)WqOHZ8abs5Vv*QlX3?(HFyDn zOKecvQIdC^S!p54q*t!k4@$hff73b>MnT8aD2A zuj2-AeIXG&DHN^*^eGyd_}6(fCFun}y=I|TeO1pul1Hh>N7D~BUH_r82Be(9 zu<4yt7;GwsIw{TIoq?LZTZUjRp7QpRD>Nz6u5zVPuAw*4l#j}cq9tjnPohebRu!g5 z7%AGz+6COG|jJ_A% zmkXn|gdn^n5_rOYlF_FHZ}r?9x7u-ZrWR+caw%63ZQ(J*ij*y%6%Cf z+Rv%|9j7jTAZyOip)zZtM>0*phNK3P55jqMU)U;aMEYQZJ{_PK4#-u}DFAbLh)r+~{(3+aoD75~f_xSVF&TEEkJkmB);ZQOaqB?iC^XaLYsn$}EGz#Z@a8UwTjZUQs~ zngL!wbAafyfZY;k1^588g7gFENHxU=z-|My1pZs>iF)q5UTQ;HJOcjqz8>YnY}E$`>Gz`pm1APkRiOr1UWSQLma0dg;>q z(XR{2CyrCUebw@b9{t^3GNw3sM#szO8yqBi`dUH~w@X7IRo@NJrT+b|*I(ejH<96l zK;;{@{Lj^=J<6wXi`q`+_ci=|2%YWW%NH>EN&p?bi^eh15x-CTk`R0Bc(hH_Jc k$iDPfMDa&NxxPhiAO0_j`m|?{I(#{bS30BpkKw@o0S%Rj