From 50f3168c1abf9d371521bc74fcd7f4f12a3e993a Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Mon, 16 Nov 2009 17:31:16 +0000 Subject: [PATCH] fixed XLSX2CSV to avoid exception when processing cells with multiple "t" elements, see Bugzilla 47757 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@880864 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/xssf/eventusermodel/XLSX2CSV.java | 54 +++++++++++++++++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 0ae0c956ba..b2c9c100f4 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 47757 - fixed XLSX2CSV to avoid exception when processing cells with multiple "t" elements 48195 - short-circuit evaluation of IF() and CHOOSE() 48161 - support for text extraction from PPT master slides 47970 - added a method to set arabic mode in HSSFSheet diff --git a/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java b/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java index 91092eda67..8a77e599df 100644 --- a/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java +++ b/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java @@ -34,6 +34,7 @@ import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; @@ -60,7 +61,12 @@ import org.xml.sax.helpers.DefaultHandler; * (read-only) class is used for the shared string table * because the standard POI SharedStringsTable grows very * quickly with the number of unique strings. - * + *

+ * Thanks to Eric Smith for a patch that fixes a problem + * triggered by cells with multiple "t" elements, which is + * how Excel represents different formats (e.g., one word + * plain and one word bold). + * * @author Chris Lott */ public class XLSX2CSV { @@ -78,6 +84,43 @@ public class XLSX2CSV { NUMBER, } + /** + * Each cell is enclosed in "si". Each cell can have multiple "t" elements. + * Example input + * + *

+ 	<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+	<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
+	  <si>
+	    <r>
+	      <rPr>
+	        <b />
+	        <sz val="11" />
+	        <color theme="1" />
+  	        <rFont val="Calibri" />
+  	        <family val="2" />
+  	        <scheme val="minor" />
+  	      </rPr>
+  	      <t>This:</t>
+  	    </r>
+	    <r>
+	      <rPr>
+  	        <sz val="11" />
+	        <color theme="1" />
+  	        <rFont val="Calibri" />
+  	        <family val="2" />
+  	        <scheme val="minor" />
+  	      </rPr>
+  	      <t xml:space="preserve">Causes Problems</t>
+  	    </r>
+  	  </si>
+	  <si>
+  	    <t>This does not</t>
+  	  </si>
+  	</sst>
+  	 * 
+ * + */ static class ReadonlySharedStringsTable extends DefaultHandler { /** @@ -192,8 +235,9 @@ public class XLSX2CSV { this.strings = new String[this.uniqueCount]; index = 0; characters = new StringBuffer(); - } else if ("t".equals(name)) { + } else if ("si".equals(name)) { characters.setLength(0); + } else if ("t".equals(name)) { tIsOpen = true; } } @@ -204,9 +248,11 @@ public class XLSX2CSV { */ public void endElement(String uri, String localName, String name) throws SAXException { - if ("t".equals(name)) { - strings[index] = characters.toString(); + if ("si".equals(name)) { + strings[index] = characters.toString(); ++index; + } else if ("t".equals(name)) { + tIsOpen = false; } } -- 2.39.5