aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYegor Kozlov <yegor@apache.org>2010-07-28 05:48:27 +0000
committerYegor Kozlov <yegor@apache.org>2010-07-28 05:48:27 +0000
commit8ddb1b6dbd35d98dd4933a9802a232e310a413bb (patch)
tree59a71474077d69b7cc4cf4a375971d8bbcce8c61
parente46e2c44a73cff9ffeff0b6499d4078e41c2a022 (diff)
downloadpoi-8ddb1b6dbd35d98dd4933a9802a232e310a413bb.tar.gz
poi-8ddb1b6dbd35d98dd4933a9802a232e310a413bb.zip
Support for escaped unicode characters in Shared String Table, see bug #49653
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@979952 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml7
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java43
-rw-r--r--src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java12
3 files changed, 57 insertions, 5 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 6ec1c7c1a3..4e21fa3a06 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,10 +34,11 @@
<changes>
<release version="3.7-beta2" date="2010-??-??">
- <action dev="POI-DEVELOPERS" type="add">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
- <action dev="POI-DEVELOPERS" type="add">49593 - preserve leading and trailing white spaces in XWPFRun</action>
+ <action dev="POI-DEVELOPERS" type="fix">49653 - Support for escaped unicode characters in Shared String Table</action>
+ <action dev="POI-DEVELOPERS" type="fix">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
+ <action dev="POI-DEVELOPERS" type="fix">49593 - preserve leading and trailing white spaces in XWPFRun</action>
<action dev="POI-DEVELOPERS" type="add">49455 - Insert the content of fldSimple fields into the XWPFWordTextExtractor output</action>
- <action dev="POI-DEVELOPERS" type="add">49640 - Fixed parsing formulas containing defined names beginning with an underscore</action>
+ <action dev="POI-DEVELOPERS" type="fix">49640 - Fixed parsing formulas containing defined names beginning with an underscore</action>
<action dev="POI-DEVELOPERS" type="add">49538 - Added implementation for POISSON()</action>
<action dev="POI-DEVELOPERS" type="add">49524 - Support for setting cell text to be vertically rotated, via style.setRotation(0xff)</action>
<action dev="POI-DEVELOPERS" type="fix">49609 - Case insensitive matching of OOXML part names</action>
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java
index 0afd1a5971..736a15b3ad 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java
@@ -18,6 +18,8 @@
package org.apache.poi.xssf.usermodel;
import java.util.ArrayList;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
import javax.xml.namespace.QName;
@@ -75,6 +77,8 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring;
* @author Yegor Kozlov
*/
public class XSSFRichTextString implements RichTextString {
+ private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-F]{4})_");
+
private CTRst st;
private StylesTable styles;
@@ -337,13 +341,13 @@ public class XSSFRichTextString implements RichTextString {
*/
public String getString() {
if(st.sizeOfRArray() == 0) {
- return st.getT();
+ return utfDecode(st.getT());
}
StringBuffer buf = new StringBuffer();
for(CTRElt r : st.getRList()){
buf.append(r.getT());
}
- return buf.toString();
+ return utfDecode(buf.toString());
}
/**
@@ -490,4 +494,39 @@ public class XSSFRichTextString implements RichTextString {
c.dispose();
}
}
+
+ /**
+ * For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
+ * the characters are escaped using the Unicode numerical character representation escape character
+ * format _xHHHH_, where H represents a hexadecimal character in the character's value.
+ * <p>
+ * Example: The Unicode character 0D is invalid in an XML 1.0 document,
+ * so it shall be escaped as <code>_x000D_</code>.
+ * </p>
+ * See section 3.18.9 in the OOXML spec.
+ *
+ * @param value the string to decode
+ * @return the decoded string
+ */
+ static String utfDecode(String value){
+ if(value == null) return null;
+
+ StringBuffer buf = new StringBuffer();
+ Matcher m = utfPtrn.matcher(value);
+ int idx = 0;
+ while(m.find()) {
+ int pos = m.start();
+ if( pos > idx) {
+ buf.append(value.substring(idx, pos));
+ }
+
+ String code = m.group(1);
+ int icode = Integer.decode("0x" + code);
+ buf.append((char)icode);
+
+ idx = m.end();
+ }
+ buf.append(value.substring(idx));
+ return buf.toString();
+ }
}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java
index e983079599..5251e06bb2 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java
@@ -130,4 +130,16 @@ public final class TestXSSFRichTextString extends TestCase {
assertEquals("<xml-fragment xml:space=\"preserve\"> Apache</xml-fragment>", xs.xmlText());
}
+
+ /**
+ * test that unicode representation_ xHHHH_ is properly processed
+ */
+ public void testUtfDecode() {
+ CTRst st = CTRst.Factory.newInstance();
+ st.setT("abc_x000D_2ef_x000D_");
+ XSSFRichTextString rt = new XSSFRichTextString(st);
+ //_x000D_ is converted into carriage return
+ assertEquals("abc\r2ef\r", rt.getString());
+
+ }
}