From 19ed26f156fea2dcf0d034d483e8b7605c0aecd9 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Mon, 3 Feb 2014 20:17:59 +0000 Subject: [PATCH] Bug 55924: Avoid errors during exporting XML if there are numeric cells without value, also verify in tests that XML can always be parsed git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1564045 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/xssf/extractor/XSSFExportToXml.java | 5 ++ .../xssf/extractor/TestXSSFExportToXML.java | 72 +++++++++++++++++- test-data/spreadsheet/55924.xlsx | Bin 0 -> 9741 bytes 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 test-data/spreadsheet/55924.xlsx diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java index 49b095eded..543b243c5e 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java @@ -174,6 +174,11 @@ public class XSSFExportToXml implements Comparator{ Node currentNode = getNodeByXPath(xpath,doc.getFirstChild(),doc,false); STXmlDataType.Enum dataType = simpleXmlCell.getXmlDataType(); mapCellOnNode(cell,currentNode,dataType); + + //remove nodes which are empty in order to keep the output xml valid + if("".equals(currentNode.getTextContent()) && currentNode.getParentNode() != null) { + currentNode.getParentNode().removeChild(currentNode); + } } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExportToXML.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExportToXML.java index fbbf1e0eb5..0bdef92863 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExportToXML.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExportToXML.java @@ -17,10 +17,16 @@ package org.apache.poi.xssf.extractor; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + import junit.framework.TestCase; import org.apache.poi.POIXMLDocumentPart; @@ -28,12 +34,15 @@ import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xssf.model.MapInfo; import org.apache.poi.xssf.usermodel.XSSFMap; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.Test; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; /** * @author Roberto Manicardi */ public final class TestXSSFExportToXML extends TestCase { - public void testExportToXML() throws Exception { XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("CustomXMLMappings.xlsx"); @@ -71,6 +80,8 @@ public final class TestXSSFExportToXML extends TestCase { assertEquals("gvvv", argomento); assertEquals("aaaa", progetto); assertEquals("aa", crediti); + + parseXML(xml); } } @@ -114,6 +125,8 @@ public final class TestXSSFExportToXML extends TestCase { assertEquals("ds", argomento); assertEquals("ro", progetto); assertEquals("ro", crediti); + + parseXML(xml); } } @@ -212,6 +225,8 @@ public final class TestXSSFExportToXML extends TestCase { assertEquals("15", euro); assertEquals("19", chf); + + parseXML(xmlData); } } @@ -239,6 +254,8 @@ public final class TestXSSFExportToXML extends TestCase { String date = xmlData.split("")[1].split("")[0].trim(); assertEquals("2012-01-13", date); + + parseXML(xmlData); } } @@ -270,6 +287,59 @@ public final class TestXSSFExportToXML extends TestCase { assertEquals("Hello World", stringValue); assertEquals("5.1", doubleValue); + + parseXML(xmlData); + } + } + + @Test + public void testXmlExportIgnoresEmptyCells_Bugzilla_55924() throws Exception { + + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("55924.xlsx"); + + for (POIXMLDocumentPart p : wb.getRelations()) { + + if (!(p instanceof MapInfo)) { + continue; + } + MapInfo mapInfo = (MapInfo) p; + + XSSFMap map = mapInfo.getXSSFMapById(1); + + assertNotNull("XSSFMap is null", map); + + XSSFExportToXml exporter = new XSSFExportToXml(map); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + exporter.exportToXML(os, true); + String xmlData = os.toString("UTF-8"); + + assertNotNull(xmlData); + assertFalse(xmlData.equals("")); + + String a = xmlData.split("")[1].split("")[0].trim(); + String euro = a.split("")[1].split("")[0].trim(); + assertEquals("1",euro); + + parseXML(xmlData); + } + } + + private void parseXML(String xmlData) throws IOException, SAXException, ParserConfigurationException { + DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); + docBuilderFactory.setNamespaceAware(true); + docBuilderFactory.setValidating(false); + DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); + docBuilder.setEntityResolver(new DummyEntityResolver()); + + docBuilder.parse(new ByteArrayInputStream(xmlData.getBytes("UTF-8"))); + } + + private static class DummyEntityResolver implements EntityResolver + { + @Override + public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException + { + return null; } } } diff --git a/test-data/spreadsheet/55924.xlsx b/test-data/spreadsheet/55924.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2ce091a7c3498f451ce23ff47839ce711782c915 GIT binary patch literal 9741 zcmeHtWmr^e*Z+_LGL(RHr$~dOG}7hJ-CYAimvncxw4x&jGSWDtQWDZqQX(zg|IzcD z*K;^N@AZEAe|et$Vee~Sd)C@>ueE-^weGdHx)KU1ApjkK0RRAK0dWjVkKB;}fLCY$ zz#RYvvVpXVGsManVxsBeYUOUk;qBx|Q;dqt@)UrK`2By6|6mLBYpS@kb78lrO$R-5 z%by0MmfID(n$Ccw$s+VUSTF_KRZ_V-L zm$@Fg$mP|Kv-JkWn5)+5mAEkI!(r%dFm%ff_?Ajgjpq|gE%JUnq7c6?6tP(L!Py?B zUrXP#5(gV!#))Y3cVT;1jruHv^CKzCONYYIFKt-5q#~ECWD5lpMQtrV*rrapt|$vu zXwf3Gt|-yt>W;8WKT7Am#kR7WV97PtJCt{!S`GE981`nQCB02OC!@l1qPWkwu?BKj zn*t{hr^D`RA&n5n<}f7{?b9%51B%A=;`G?lrq;#UsZ*1ChFWTp%G)sf%M<3rhr!o! z!pN7`@0Rwzvqf_;a{xN@QH&dAfL}3JY05kMElz0O4sjtv_rAk|UUkx5acjNNvT&hH zY4a8gViG~Is|3kOEQE%%guj7)DM)T!_isnxeA0c%@SfP2YSwcX2moAPqX5+ZhSfSP zF8V`604gJ(cpHIL6E`bIcTSEU=l^2#KNy&QS$bT`N7W84?Cu@;X6fbgZ_e@|bEHTRpa_l6c~ z<32T~v#}%WAxoez)0v%fH1RG?%Qc@ke^wJ4OV*fnL2u4SsYZBvZ_}+TnMVqa+!5*XT#>fE{ruz1 zvmB{Dg;1jXyixoQ=X4e*1x(Y1Y6sUaK`@D}sw3YW}&>@l7c7EI#VwKZ^*E==@= z*;&7bu975UC~V#^JhWWcedL`;AYEQ^Z>3`G3=HFDxoG}PL;@OtJ|E^t)l%M3_SG03Ae=sCM(!qyt+bfqUa zoYod*{AQ5N58xF|8<)o$vuy^i64IFp7n*CB%Jx`_QI(y`c+S&WT?)3X(@tpO2&c*9 z*D*zntlkRrjW>2g*4+vLEP?bR)HzA^eK6B^=?y~Rs%A!v$QjNb{kJ6KO_0MBd8uSU z+L%^fC3Nerxg0f~QBXO~X_~eJz)xl0?FBrySF5!u;&Y6Qk9xLoS7Y(BoJusYn5ZI= zL^&j+%xSSF*x}pxBfHJPp5d&T&6?#G^aNPLU#sDVgOGz*<~%hKE1NED$&EJgZ;Ec8 zS?Qf(-?K7SxZ^WpGCIvqv6A+tefLRuI@U48`z(MIuW(oI)9@5gs{kG%rE_0;y5v9S zZq>XyQ4d&kF5&_^ka~rg*awz%UlgwfRrcuE_j7)mV;t`Qml}ShfPRw_m*L6&e&y+T zWl+P3u=YRkd~ROml!gufR3N-{=MR5CY^|KEIDekGf1r6&Pt9fK0bv{Nq!$gu#fce4 ze#;|f@^vm$g}l=_lFBYSLl;ymF`^JaKE3n=siV;>6RBy7%aLsexd~FpWDp5OIU_of zSF9oFQyM`hA0La9p$qcAo?oK(IGhfRDw3vSSOL$xFnZ-1)hR|jDroEH6>+IVPL+by zW6|%eJUy2uzcW34zGKvtdyfHMvRW)7w|*;Jkuf(t=D>`kuTPYgyI-X~syajmU$YyT z8fi94YrJyInz5blNIj08#?OzZNK8^%Wn`u|>X@XtTk#uGVRSH)N*)oqNWnOt)Sy)+ zCL~I421U#wkdFa3Es`Vyi)q?;?4r4Md<<(&`7U>zxs#dE{KbgJddMDRV3!ciVxzw? zVW8vbc831$6VI!O4G+J%yaFWC`CBT``?5KQSC62n!}p(TF7a$R*J(hTPLMAT8M6)Y zRV#)StzOFj-;0fB?vjsL2lx%Z$&SkicF`^O1{nRX6<_Fi?V%I$kf~)3 zO&47TDn5)<7Mx)ZcLDg9=0`(Q-;_L8U z2Q@pp%_2ua=$H1)`M{&K3F;E@g24Undv{m7^*XQ5I+)uoE_{UWcx-8hV>t^ijy6BU zR*79+HuW&>It9zgo+LezDK=QfpxniP@J4_cnd!l;L>+h_8#J6?Pt5fKEXoC_khN7k z4mjRRM`Di!#kg?R%aSt8yxbOZjofS*a4cv;$)N~zmEOOn>n#`?>b7dG-AQTcuiM=e zYtd8JqKB--dt0&RXf{dEe4CeojUp+FrF88iEs0=jTjSfLN8(wCl9{p(SwO)5U1ldM?BPkl?9>Xv-X4wSSiwzDZWGz0R!*6 zs8)UtH$0IwRJ*qeksy(4;ZCk`9B@K9BhCv%k#4!?aZZ&h$MKj&n7zd3j=QnU_O6{k zr^e$5$|1q~=Hf{p42`&>hI^qqU;*g^krSjIw%LG8f}YumyCEWct}^xH7~P-_t3Naab)8}IWaOO%(QL=$i9wFRN5+#Fz8@ znRQ-TS8ocPJ}x=-*`{~Q7}jU~oK139k=WFj&2MazR&#!LIIZxTcL~%*vJ>ueFfqpt zYs@(CkCRb-2ag;vxoSrAupm__%_^G{zY9pM3Im>lQl~6W+jF7H+ya>PuIFZ3t)PENcl#A|%(F`4G*Ha*-*Qh6B$ z`Neun2KB7+4cMzt8mh0__nI`eFDkRr+Gw<5%r3-1vOCPKXs^Y_sdg7Vbxlio%eKHp zQaD;Hn9t|$4hd#WBMl?#G;;wZ(Y!vbw$VhMwG9Xp)JfV-BDIWk;tgj6_n;+_KR(iJR6>PQ0${utU4^-OdA@yS!NV2J% zPY^EV89<@BN@P|1Jd5vQ`^Y*>=kOR^lfw?EC$FFFW?wE834Edze=;4ja30QtZib=} z@ZkZK{9Snw!I16`IMb#hr7!J9#}y3NXI^sFj}Dv`aKSP*?_Y^kyxw2!w_02NB1`05 z5op_Qlh|SVj%qjJi3p;wp#LL@{U|EjZLO>z?*FP!f8PAeXFsdcu*5&BvI|62#vl@l z7qfDo40&#K4+-RRKesn!h=um*9HY_Bwy1A$@L}fYCUxx{FGW$bs-g@cjPFbbsc*?% zWiI*N3gJDYGTJRQqm2k?@^w&M=E@{U1X*gZ6!6Ch^>~#{#_`ej;eE)o#IWl&9JJJu z*$>b7R9*?YcWEhc-cD##Me7j&{8H0db0IE*-C_9^uiuHO@H$JKRGczDHn#-VE#WqK zYS~N4-kqeN%TF~dPwdGLi$9AY7W&UzE>u!QVvcy?iKwke{$Yln2H^RT(Bo3z&WKh+ z#7^oFj>a}0S~rQ70Uu@&IEznZX4~!?3}3~N(d^yKM<=wL&hT9J8I!XkSPC=m%N zBK5!>lz%G@xf5PaFH>YdIf#>DzpvXFONJD|BkN+ynV2F59vWjqgUCw^p<9QXNCTlzWMIUoPrmri;E= zyp!nibc1QgYz?2%Z74r&4d^Yqjn9(9{_3N%Wm4ysC2(hCY}$8RiXXx*HaIIIk9q+c3p==dfov=(pkdfA9(+*{;j;G}7tt)Zk+o?|5Q1>zipcO-vVE(8(|`lL%XUD&h4N z4qye%U#DvN9DmCflu#y?W)#rlKtm0M0s|PYF9MrSm}KcF^|!br7}b)NjV@0?KArVS zC;&{RFwGCaw)fK%tpQb_2zu13QL}>_!T1(M3)I=c%w|xg&y|*sQ>Y<^>;(o&(;(So z@suCN8wol=l}&M=r!w0>ZsJ07Ler(U*-&Df?aNw7PSIm;kmw;&i8ICVW#eh_KyN{+ zm$!>bXBeeb94L19_~Cq|L04&`BfA$@yphb;dp>Q+l^kn}&ANi+0u%0w$S} zzMx8!8J-JnOa6Lwx!Dd}N>w5_J(*_>V|sz29B6@=@H)6EnWXX84`?{CUm;5;DMLh1u zS+@FCragdg(u{cumz-wL8qF;*evF4TLhii7H_8yTaj7*-VMKaNmZ>ik>o&`Bs|EQ@ zVzsZvS!QqZ2R6SdK3LmuGS-kXx5AR)ZKtHrK?(ExM;ae|6FDP5i}{N88Uzn}-?K&G z3Q`bn8b)#ZFVhFp#TNk+NYmkH!0+3d(iCWSHn{WW#hclr)kbXt50Tc9%lC|PksmBS zKG93>4oSx$;BOW_8C@p}Jf0S^wIREkr>(DnM7K^`0$5pAj)Kk?7p?%C*puxonPx%B zrjv5>VYL|H*ybNMI17SQHl}vEi($ZFmc)=3e{xUM*R7Dej;4| z*wL(IK_-3?^J>5Ts`<01wMI3@M6Bsi^g!r}e+%YnZRDYKDtiZu|B2sW@!VcPqgP?~ zZiu$_yBECDucf1BVmaVCnPQjDaWrq#+u}|Luh#;Fp&w>4IR=a0HW5W6zQcDR5&4zciN@I2bZR`U11j zF1IMEv6O$Ayg8v(liayJt>nTm-?7iENGxzZZu#)+X+Ha;e?~1f)GZy>6!I{w?Nw{- zeY{DmxLVz66m(Dewm^LH9pQsgZcFa=h!B36*_i#OWRA4`#f~M9Mdp#a#&_ypI6!yD zpHC38@5vSfE!8u>A=1{FWlf5Yeo-Jst}m2+(r)4Nm9CnwjGn8^Sz|S%kbJLXBTidl z@8J7e?Kw0Z(2;x95QZ3$Z0TIF?_mxOtHpE-6ghO`L{cvlljX#RW*cvWN8o8dQgMY= zVocu0kSp?u2Gc2CROKwR4Y)%!Avo2^F=84L-0bgm#mE;$h?*Dy7EA-}ycwTx6CyDO z+bJW_%M)_oE2*G&6#AkR%+VUFM&F7^_UfEsYCTQckr9@UO)N`>_*R-}B<}!+r|T#u zr3qEXT40=uMQpR|cvPH=ZY(gM_scIWl!yBgpSv#oQl-X$la$(WAFri+c@med>DDG{ z{j+-}xUw+=uXvZme@mULCvzsR5HUT3h-sWZs1p&%DrT-fxib#bXTQWnXjCy2gf|Vj z%g7GF-+YoSto(Efdq6qcXn1lW9`37|;;^>kXT_@e7;B(g_ZYk_bet^e+LDn@*J4|$ zvOX@2bFo(vvvQsS zC|V2xHrG_!C>)oid#EXb&c^TnP}Wl!q!DN}wU_!H49ptf-~-a(PlNdQaAzxvHjv0M z!05^vAl7oxGj{$L@ExV=x6M$h!`~8HA`KtNzTPZVe`JxY(C$B~ zI&y1NW5s3NQsYau=Jz%=KOWsC&8=&67W}#kEkYxmP;u8s!vV7s!3fdxRJU%^K@?$e#x=+PzfA(9?9=|c+ z`_Y%jK_o`%KRgRDGe;yv&Yu^MowJRjm9&+kqx;YDCofS_4Keiy-;uwF?DfiUz7rKxvtWJ=X0j1$@Z!vUlS}h^*P(>=aPHFMATt!S*qhQdzzI5NW~b69#c=G2ztG zc)2dx;^z&L%7c}%1@rf8^h4afW!2R!ND%YCTzM{vudP5+XjKra?-$|UoBivE zOBq(|;35g%L7OjHZdDmgC@R{Lu<~A%a4q(}n-XO7T0xp%>qx9Mn$vpPxlWj~S$XiZ zEpI>7@pY=lE>5?0d@p{qIpLEp@y|4~OL`7@nk9lglrgmhu-x9%QRsThvZvw z6plPxt~8L>0~e1%8c2{a5bNtkDQ8AaD@j?%%~$3v-UmncNC%ORFBIMsguq7f_tH^8cWm5q zjFqG0B$5|XKWg|RqU?`NJ`yrJ;NN{z@~64~y#D4B6LqD(EBO1Q`%l3i*Ov&c|I6I_ zAA)}y{{2;O1TjMR_oKj@dTtJZerY1W{KrV>rtr;)#V_Gvte?U+=NC5>-0T+pQecDs zKX3ovdPg@E-rV2+Qs_)@Q{k^J%>P{rI^l1%{OlRr)N*rA`|H0HAS%~?P22vpxSyAw z6n|68&27^!Ex~txYWcN^x+(hiQuvo30PvL*0JvEg-;};tbo`QUrug~dzmy<1WpAdn yU$PlgKV|>7BzIHU-)a7rDga;