From 6e50bfa19d8305b6f4f379bc952c005f9df2691d Mon Sep 17 00:00:00 2001 From: Tim Allison Date: Fri, 27 Sep 2013 15:45:55 +0000 Subject: [PATCH] POI-54722 table text in ppt files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1526960 13f79535-47bb-0310-9956-ffa450edef68 --- .../hslf/extractor/PowerPointExtractor.java | 23 ++++++++++++++++++ .../poi/hslf/extractor/TestExtractor.java | 19 +++++++++++++++ test-data/slideshow/54722.ppt | Bin 0 -> 80896 bytes 3 files changed, 42 insertions(+) create mode 100644 test-data/slideshow/54722.ppt diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index 6610cde2ce..f416d63907 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -252,6 +252,12 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor { // Slide text textRunsToText(ret, slide.getTextRuns()); + // Table text + for (Shape shape : slide.getShapes()){ + if (shape instanceof Table){ + extractTableText(ret, (Table)shape); + } + } // Slide footer, if set if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { ret.append(hf.getFooterText() + "\n"); @@ -306,6 +312,23 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor { return ret.toString(); } + private void extractTableText(StringBuffer ret, Table table) { + for (int row = 0; row < table.getNumberOfRows(); row++){ + for (int col = 0; col < table.getNumberOfColumns(); col++){ + TableCell cell = table.getCell(row, col); + //defensive null checks; don't know if they're necessary + if (cell != null){ + String txt = cell.getText(); + txt = (txt == null) ? "" : txt; + ret.append(txt); + if (col < table.getNumberOfColumns()-1){ + ret.append("\t"); + } + } + } + ret.append('\n'); + } + } private void textRunsToText(StringBuffer ret, TextRun[] runs) { if (runs==null) { return; diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index b0cf1c3bd0..ab1076c458 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -367,4 +367,23 @@ public final class TestExtractor extends POITestCase { assertEquals(expectText, extractor.getText()); } } + + public void testTable() throws Exception{ + ppe = new PowerPointExtractor(slTests.openResourceAsStream("54111.ppt")); + String text = ppe.getText(); + String target = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n"+ + "Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n"+ + "Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n"+ + "Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n"+ + "Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n"+ + "Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n"; + assertTrue(text.contains(target)); + + ppe = new PowerPointExtractor(slTests.openResourceAsStream("54722.ppt")); + text = ppe.getText(); + + target = "this\tText\tis\twithin\ta\n"+ + "table\t1\t2\t3\t4"; + assertTrue(text.contains(target)); + } } diff --git a/test-data/slideshow/54722.ppt b/test-data/slideshow/54722.ppt new file mode 100644 index 0000000000000000000000000000000000000000..c62a60b2e863908586e8f231dd08fcf84afcaa59 GIT binary patch literal 80896 zcmeHQ349bqy06ZJ5Fj`KR1_3Pi~<50?lY1L2xlS?6j2UMkYFH~gd;#8fpCWafdv9Y zu6TevkPOe&T|rzC#mowe>n^zJ3JL=IT!Ifoj-=oJ>z+>L=$T9^lUaOL@%v9#SJn4b z)&HyG>*|gkKi25mZTp&?W)7hjGq7^G9t*HZ_d{IL?t>XCu-=!;vP{YAfeip|MfjC4 zP-bl>*^p2N_9L6u1pGzOXgcLH|-cLPmnaUcpL}?IshGkPC#d%3(ytl z26P8{06l?TKyM%ncmn7HgaZ*kBoGBe1AT#hKz|?x7yt|e1_6VCA;3@|7Kj6$1cm{_ zfe}DFFcNqQcp4Z5JOhjdo&}x*#sFi1alm-sdEf;g0Z0TU051X)fh1rO@Dh*=Oa`U^ zDZo_VWgrzu1JZ$Mz;s{+FcZiCW&yK-IY1^b7nlcR1H+&x1(GK8d8ZElWcCmNcoVQgIdS~+s%l`P8v>A!vw3r}BU_J%CnhHjrS9+?Z zd-~g$`Qaa9{s0YSPvvM_P>@h>vckH0U)|sOsGaK7f+zA43ejAl5Cw~+;$Nb|z6s+e z4RNxlXQ3HK{lpiIbqd%2JO3$7g#X_RNEiVIEb!Dz$GJ;u`;g(LxRe=*sc|Vu zQ_@X6Xj!!j7|Yg~Y?oY(o1cv#C{6QW?4i})C1aruVpj|X+(%7IO-)3t@o9;vfi5(b zW5!E5O`t~keevfnHb2%pc+L7i*0$aCJv8R)IHb{KcCOCY;re1X>w)|BNbi9Bj)+Ad))zZdd+d6lC>M$n-H;c79j_~Px*p2PuI^`A zo1wnSU}WbF`Iz()yI`O`eg-l&HfGG!1pP;x`b=%6zEY4>k5DqQpA5Okc$|G=q+W)y z6Gp9GBm1ImC<(IP8;SD~*u^GeRZC+g^f?iGlS$ckCS&)by{zgG5FBjP6@j%e339X- z@`Zq4lT}wVbfrLB0@^cNX$!TmpQ1e(yU}>;%Czsg4gu)5+Q(@0YZ4?zVm+_a=33G@ zhjz~*BaOc-#RH%j2}3Ahr;)}M`DA{q9;QM#+hM#zqSab1>jQtRrAhT3Sh?z9C_=0` zN~o5TGT;FlOaEu0bGS2ms8pdH*xs&-LTAct? z_A9tf7KWL=jFC0)&qZM)W2HGH#R5u0@oNC7w69@lQRXwzqH?LuSsL4qTJxYBzNu7- z3#{LuGz8adiM=DU1qz$*OTkzp4oSEPl%+uU$dVsA9vIGATt+pdKalb$?HZPo2Uw}` zK2pa1*Fb9=(rrT)-5A;Nx=%pp0VZo@{Ao^Ufhsn#8wT0EjawfRB z?rzH1m+;%K+>+(pus^Ar%5EC8C3Pwy+CAlkH=ye3`u9ehF}03eh!mDB~njIFQB@;f?BrY71pnYz%2ol;gJbXHl38sb?Z)+sFvU9-0cET)VK zT-4{)giXF;f_)xiV zthBBPSG0j{Syon-a%GkR)~S<)Su8RuE-tPphmsX#NklF{aYRUZ-MV!Zn!KR#AZ>LV3D3^Wx_18)ph71|PE?&IoR8DIj>7cfth3wFwLoUlv z9-E-HkB%H%^|w=j%nHzE`bYg`F4ss&c?v@T1XpWhJId5qBQ;sQHPT+DrAb@6G@0W% zYoxtQI!@exMG#7elRM?j_xYSFd!M?1&wQS`m(Aq$w zz`pD_4|+)*)1=h(#j{?jWj*VqTGq2(s%1UvrCN5XLobbXo^*opR9b0EW@k&Io#(8U zt+b_j+R4h*Uh3(sw59q43dc^dkDlI2TUt+!%GF*PS*0y$wW0N_(w17;tlva0jV#E& zC;UJo^DFI$YcLBe#YK5u4S@4zWQNkin36{3U)mb!6027ljH?!;EV#yiAFHp(k;6xG zoXXz^$E>k6T|zoN>i;jE4(Mce9yHZ~XA}}TOF2aBWqYn zXS|GUsdxrwY^$*!QljvroTNO*nsBLy<5bGc{ z7`fAdWV}Zi0rr!?a8>2{K^vVBleOGy><4`lJSaCD<;ccW>7%|-Yw5j!`?54X9tF#f z0O~s+Nm+~hl=ndfCGKzirME*gJk&9?9Wm7wG=`o}VS-VgcVY#;%BQ|~G57n|9+%XkM; zC{Ohg>M2XnYerg8Nbg+$-kgDtE+`adz#AYWFT#Sf4HNM_2>l&Pz)yHR*AUObt%ER% z@rXYLN;B}>ycXK_;@R=j#*CfCNgkM;onZs?Ra)8#NurCvOXm>GWJ%D`-vYh&_ zom-+f{0-uzL|PTX;B`<@;-VhwgO!F>B-QJnlBOny?x8hfD&ku0e%9JQMC~z1uTJgG z-j4fv_dvg9V})uxpTdKAM18EjPjOlrXEQfMR#FlNV{jF>{ zdW%J!Yiss@(BPufxzXvn^;%6n9L;{cR!;;;8rOAPv^t?nZMB+M92nRW@0FZ=To_$x z^${xMa__}pw(hmD!Rp@o9A+ljME#n?pnSJN=QP)Ei}h=g4c`GS_zvk>dwhRh|NbWW zd~3eWCkl7!*ro;6-q#^tPh~gu9+huDoU7_HetUR6wj;c*3&y|qfN|<|^;eSxc&9Ni z@Br3R=Mk=XKHeJPw}%&GJHi89@Lklc>JiS%H;r(8zNs;^4tj5d-yS|u4Em+5;pB0U zPqYC0bSR?s-q1G(Cc+!a+p^ro8^V(ZmpAlpL$iBnEl}T6p(@r;d77aZJR_`gczi*) zPA_6BUZnZq+d)tq#l@!6NZ#{6M{%fimhdC?5L|DxX2sdrDKEJL%rn&z;** zr#3eIoOn<067O_+ruwmxK{7EbKs3hKTQASoWj#0^vn}~aDWOae- z)Dz3}K2R5cCvNSO(14!M!Ce!~G8CcjD)A`s5Nmn48Sb{=xdOZ9hj-wYzEkvPKOjcX zyzHL!x&#zAt_vU2ztqoWWYPx!el|XS7CWpkv}&w`^Toejp?OX59CrL|^1&%AR({oy znJt;kW_+_Sc<^A%aQbG!*3(D-l#@iQSNpM6Jc!zeg*l2A`(2-w5;_A)Qob`)#~95g zbQZoAyP%WrqjPC8n7>!}_BkHnG(9=~|6rM&Q6AGP21#hALndr466Lp-J2Uf}g-tiD%Gc)QsvG9E@ut2>h7CK;Y&{)W#&Nw`OpfUKKjn-wF z9aC_p&JOCS?d(wRDNSXA?s?g`1zBw+4Rm57x~KPN2eF{7ei|EeBH_petp(!&+mq1B z5ka|XP&uG`UJm?iMnpX)4vMPDfyH9MT1j89xL+%&@3c|`Vvf*SIale9{aT447!Bj` z{h<0?xyl9Y4YuP!y{9yl3%cj!!gf5=cjCgoSM|n&CtPUz1+5ht7p}%b2lTA=$Ac{g z4V*Ygs09wRoq|@1syOKAa}I)>I5=Dj9B6w4tq)akKwpRXb{sTx;vl5A_veDPLy%`! z6$hPt!odjC5-F)~vG1^Qpnc04tvrkzvYr6bi3okgTHoq3zmFxEd_8CHkXC>4%xrbj z=a%H7^60kpxAb%e*Fd+e2cxIEQ4Mt4`UQHr8`nU$?JldQyGadnlc%rkUg{2RZ_7Jt zpxbsg(9?Za4RqVCvUVB{Wy0yJQTUQ))x2&OVYPUAd9d$oc1Krxb zqpewvx3a?9hrl%8bUw+N7q{%bz1C4B1lVP|r_6Oy!(8l6#)W_m)+~aWW+yN)> zj4f}{QOW<;W8@=Vf9|jp+Pwd1MBA!oLi86*(U#ZB4}GJCR93N`r8vKnnV>hcCBN?pj~Q zgNu?+6>GuBdS50JAV=vL$;ax*U|3WHJsG^cXiEj?dy#LUMEhQZDx`Kezxn8bv>^Xn zwYN{U`rk+U^}l_sd~2eekJHx- z-5P!G^fd!i&=f(v`d>4|AWxf5%4;VS*4kqKCXC*Jt(E9~SWUkR!heaODRC(&|78gL z9W98bw^xxSe4DNO2M4aIe+!7lzt>P5%Y%DSdDTV|TI}oK>hGtiP2Vou^3(O$iNSBK z#@|o3GM@@X+E=I5Cq&98j%lvN0W^Om0yHPm0U8ro0NE%Ppjo~Fz(1Gl}9_=#NM! z+DWTSAmXiot-wq`QvTW-5#&1tDldXyKrkQ}5DW+g1OtKr!GK^uFd!HZ3xuQthZay%@_Z654@xs^h7cNa(uqH1(H7|eD-Vb(9 z8905{)-{%+2b`xW_&;i~EPef8(V|q{0uCZ5|3QgmR#wsOS8^>!4?5&J-=h4VN-P^U z?#WwOVEO33oC`Rlp!|DBEgRMst(ucta-`THSN9eoUJu;UkXz96F09PlFmLI`oXpG> zvvXFg+VIw)H^;=x+_-qA<>TKw^8>X=eEmbq2RoKoEG5nb98%Qs=8sB>@;6#O)fq6l z<@bG9vTkXC<&*z*$aTJjevG&Lt?w2sTWu+M-?@Nuis#IDUXt5P+=8CB?Z7_Je)Pbn zmN!>ywO9^UM4XvH3en>x0?_&&lzyB)6Hk1wCw-DCQLgxXr{Z=y|iv)d^5ro#N)(tvnj< zIWwM@Q-mO=T#edf{8leev$ zFfaSOU>jL7mj1Keif7W8@-ghDqAaGME&BK#^CaASRuB^VG42nGZLf&syR zU_dY+7!V8y1_T3w0l|P^fRlkT$jxVrEdX+Wg}@?UF|Y(!3M>Pb1G&HoAP-mxtO8a8 z`M?@rEl>`9>#SGKCJ8aYfM7r{P`eCVlI6$y@40qtI#$5Q!n0=!Bj=y^Ir8Bi=iiwY zS$GDw`+vC6Z(!Q6k#S?9W3HVnKx=Nw@}!+-hW3jY_~h{1r=p`b{NbPZVP*2o=^OHUvrC_Xf1F-9_)2VM{L*K4 zyz}nfgv^*!`PwmP5W#@~_x8p7?K*LrszIc{{^!-L13 zS&uP8`>*F}|8#0sa@b_~>XBh@Ut087v$E@dAGqz@_ge-iI)qiN419M}?*HP>Ug6<} z`mLYncu!nJ_*F|b^0JTr^7+Ey>i!E`xF-!pQQa!V?W%&?3Kve*sovVI&lU33Qd=P< zB5oJ}r7sm&vF#n#wY&#lYRH4%5b{1j*nP*$JCsk2P9! zk?msdwmGk-%s`2LYL3LBQ2%5$mBq5>*(ANZR(c!-gyMOlIT3Kn5Ft7ED0xsi=EAcu7Khidt0nm+G%>|64*+&>5|M$G%ptJ>5C# zr@3+;4MGHJ`$p@P;xvvlJ_8Z22h;}|06{=Q;10kD1OttL#y}H*#`Iml-9S^|9^hV} z8PFWK54ay_0XzVhfCm8@4-erQ0z3>n0<;2J18smufwlmR+4jI=z~ewD&;jTObOJg9 zU4X7YH=sK}^SmcO=I#xI0Z#yZfN&rJhyKF=*T|HnVH3(|`QrMYXF#({Anf7~ z)Bys3x`3AVpKqmTL2WVMh5cWmwiz&9=!#2~fyR|q$P4?wQa_DXPwS^~?#2CIlAw>~ zji>duMA_SA|4&kO6f{VL-y{PPR(V5xb?iDmyEOtMLx!8;Qf4Hk#-$`pNjLQ%x5_SH z?EBdzRQu*oAkhN`heNQyf>Nup4Pey(!&0#_?K(ZIg7(0Y3 ztrZkn1C&ob9_5o9wQ;}op3*=g8yhzudTjsr0ntiskd_iNc)-v>J)ayGoq zNO-Xk@IZUObL|Wdw=cZe9=LCh^bW}Hh*$(-ec_q5hu0g5a-k^E4S5mpc)P;W?V)&f Nsz0l>8S1MH{|8z;OrZb( literal 0 HcmV?d00001 -- 2.39.5