]> source.dussan.org Git - poi.git/commitdiff
Bugzilla 52784 - replace ISO control characters with question marks in SXSSF to be...
authorYegor Kozlov <yegor@apache.org>
Tue, 28 Feb 2012 13:52:09 +0000 (13:52 +0000)
committerYegor Kozlov <yegor@apache.org>
Tue, 28 Feb 2012 13:52:09 +0000 (13:52 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1294657 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFCell.java

index 3e6792d0786bf9f3a7a5b9b29cf5d16acce31e38..b1a1825ad217d80280856cefade2777ce21d1afd 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta6" date="2012-??-??">
+           <action dev="poi-developers" type="fix">52784 - replace ISO control characters with question marks in SXSSF to be consistent with XSSF </action>
            <action dev="poi-developers" type="add">52057 - updated formula test framework to be aware of recently added Functions </action>
            <action dev="poi-developers" type="add">52574 - support setting header / footer page margins in HSSF </action>
            <action dev="poi-developers" type="add">52583 - fixed WorkbookUtil#createSafeSheetName to escape colon </action>
index d575e053248f6b1a9a41f154e3237ec10ac8c86e..c9a34a4630713aa32d336c5a0e62e0b1ef950dac 100644 (file)
@@ -250,6 +250,7 @@ public class SheetDataWriter {
                     break;\r
                 // Special characters\r
                 case '\n':\r
+                case '\r':\r
                     if (counter > last) {\r
                         _out.write(chars, last, counter - last);\r
                     }\r
@@ -263,13 +264,6 @@ public class SheetDataWriter {
                     _out.write("&#x9;");\r
                     last = counter + 1;\r
                     break;\r
-                case '\r':\r
-                    if (counter > last) {\r
-                        _out.write(chars, last, counter - last);\r
-                    }\r
-                    _out.write("&#xd;");\r
-                    last = counter + 1;\r
-                    break;\r
                 case 0xa0:\r
                     if (counter > last) {\r
                         _out.write(chars, last, counter - last);\r
@@ -278,7 +272,14 @@ public class SheetDataWriter {
                     last = counter + 1;\r
                     break;\r
                 default:\r
-                    if (c < ' ' || c > 127) {\r
+                    // YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.\r
+                    // the same rule applies to unicode surrogates and "not a character" symbols.\r
+                    if( c < ' ' || Character.isLowSurrogate(c) || Character.isHighSurrogate(c) ||\r
+                            ('\uFFFE' <= c && c <= '\uFFFF')) {\r
+                        _out.write('?');\r
+                        last = counter + 1;\r
+                    }\r
+                    else if (c > 127) {\r
                         if (counter > last) {\r
                             _out.write(chars, last, counter - last);\r
                         }\r
index 119a51ee5d4c0a02a1de7d78aa881a951ddef4a0..638dbe20d10110d13733ff0c857b72816a87686b 100644 (file)
@@ -21,6 +21,8 @@ package org.apache.poi.xssf.streaming;
 
 import org.apache.poi.ss.usermodel.*;
 import org.apache.poi.xssf.SXSSFITestDataProvider;
+import org.apache.poi.xssf.XSSFITestDataProvider;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
 /**
  *
@@ -66,12 +68,40 @@ public class TestSXSSFCell extends BaseTestCell {
         Sheet sh = wb.createSheet();
         Row row = sh.createRow(0);
         Cell cell = row.createCell(0);
-        String sval = "<>\t\r\n\u00a0 &\"POI\'\u2122";
+        String sval = "\u0000\u0002\u0012<>\t\n\u00a0 &\"POI\'\u2122";
         cell.setCellValue(sval);
 
         wb = _testDataProvider.writeOutAndReadBack(wb);
 
-        assertEquals(sval, wb.getSheetAt(0).getRow(0).getCell(0).getStringCellValue());
+        // invalid characters are replaced with question marks
+        assertEquals("???<>\t\n\u00a0 &\"POI\'\u2122", wb.getSheetAt(0).getRow(0).getCell(0).getStringCellValue());
+
+    }
+
+    public void testEncodingbeloAscii(){
+        Workbook xwb = new XSSFWorkbook();
+        Cell xCell = xwb.createSheet().createRow(0).createCell(0);
+
+        Workbook swb = new SXSSFWorkbook();
+        Cell sCell = swb.createSheet().createRow(0).createCell(0);
+
+        StringBuffer sb = new StringBuffer();
+        // test all possible characters
+        for(int i = 0; i < Character.MAX_VALUE; i++) sb.append((char)i) ;
+
+        String str = sb.toString();
+
+        xCell.setCellValue(str);
+        assertEquals(str, xCell.getStringCellValue());
+        sCell.setCellValue(str);
+        assertEquals(str, sCell.getStringCellValue());
+
+        xwb = XSSFITestDataProvider.instance.writeOutAndReadBack(xwb);
+        swb = SXSSFITestDataProvider.instance.writeOutAndReadBack(swb);
+        xCell = xwb.getSheetAt(0).createRow(0).createCell(0);
+        sCell = swb.getSheetAt(0).createRow(0).createCell(0);
+
+        assertEquals(xCell.getStringCellValue(), sCell.getStringCellValue());
 
     }
 }