]> source.dussan.org Git - poi.git/commitdiff
Bug 53130 - SXSSF Shared Strings option support, to make generated xlsx files compati...
authorAndreas Beeker <kiwiwings@apache.org>
Fri, 14 Feb 2014 22:45:05 +0000 (22:45 +0000)
committerAndreas Beeker <kiwiwings@apache.org>
Fri, 14 Feb 2014 22:45:05 +0000 (22:45 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1568539 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java
src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFWorkbook.java

index a4ca0aadb319143ce5f212f3b10f2151ba6df4dc..8b72ad48d9ee58d22b0bbb2466a0d27b64f5aae3 100644 (file)
@@ -29,6 +29,8 @@ import java.io.Writer;
 import java.util.zip.GZIPInputStream;\r
 import java.util.zip.GZIPOutputStream;\r
 \r
+import org.apache.poi.xssf.model.SharedStringsTable;\r
+\r
 /**\r
  * Sheet writer that supports gzip compression of the temp files.\r
  */\r
@@ -37,6 +39,13 @@ public class GZIPSheetDataWriter extends SheetDataWriter {
     public GZIPSheetDataWriter() throws IOException {\r
         super();\r
     }\r
+       \r
+       /**\r
+     * @param sharedStringsTable the shared strings table, or null if inline text is used\r
+     */\r
+       public GZIPSheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException {\r
+        super(sharedStringsTable);\r
+    }\r
 \r
     /**\r
      * @return temp file to write sheet data\r
index 207eef1224ef6f64c33f3d28ffe05e4812af77ed..77690c06bee394434b13bcc5aff73e434117d924 100644 (file)
@@ -43,12 +43,21 @@ import java.util.zip.ZipEntry;
 import org.apache.poi.ss.formula.udf.UDFFinder;
 import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
 import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.xssf.model.SharedStringsTable;
 
 /**
  * Streaming version of XSSFWorkbook implementing the "BigGridDemo" strategy.
  *
- * @author Alex Geller, Four J's Development Tools
-*/
+ * SXSSFWorkbook defaults to using inline strings instead of a shared strings
+ * table. This is very efficient, since no document content needs to be kept in
+ * memory, but is also known to produce documents that are incompatible with
+ * some clients. With shared strings enabled all unique strings in the document
+ * has to be kept in memory. Depending on your document content this could use
+ * a lot more resources than with shared strings disabled.
+ *
+ * Carefully review your memory budget and compatibility needs before deciding
+ * whether to enable shared strings or not.
+ */
 public class SXSSFWorkbook implements Workbook
 {
     /**
@@ -72,6 +81,11 @@ public class SXSSFWorkbook implements Workbook
      */
     private boolean _compressTmpFiles = false;
 
+    /**
+     * shared string table - a cache of strings in this workbook
+     */
+    private SharedStringsTable _sharedStringSource = null;
+
     /**
      * Construct a new workbook
      */
@@ -165,15 +179,48 @@ public class SXSSFWorkbook implements Workbook
      * @param compressTmpFiles whether to use gzip compression for temporary files
      */
     public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles){
+       this(workbook,rowAccessWindowSize, compressTmpFiles, false);
+    }
+
+    /**
+     * Constructs an workbook from an existing workbook.
+     * <p>
+     * When a new node is created via createRow() and the total number
+     * of unflushed records would exceed the specified value, then the
+     * row with the lowest index value is flushed and cannot be accessed
+     * via getRow() anymore.
+     * </p>
+     * <p>
+     * A value of -1 indicates unlimited access. In this case all
+     * records that have not been flushed by a call to flush() are available
+     * for random access.
+     * <p>
+     * <p></p>
+     * A value of 0 is not allowed because it would flush any newly created row
+     * without having a chance to specify any cells.
+     * </p>
+     *
+     * @param workbook  the template workbook
+     * @param rowAccessWindowSize
+     * @param compressTmpFiles whether to use gzip compression for temporary files
+     * @param useSharedStringsTable whether to use a shared strings table
+     */
+    public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles, boolean useSharedStringsTable){
        setRandomAccessWindowSize(rowAccessWindowSize);
        setCompressTempFiles(compressTmpFiles);
        if (workbook == null)
        {
                _wb=new XSSFWorkbook();
+                if(useSharedStringsTable){
+                    _sharedStringSource = _wb.getSharedStringSource();
+                }
        }
        else
        {
                _wb=workbook;
+                if(useSharedStringsTable){
+                    _sharedStringSource = _wb.getSharedStringSource();
+                }
             for ( int i = 0; i < _wb.getNumberOfSheets(); i++ )
             {
                 XSSFSheet sheet = _wb.getSheetAt( i );
@@ -236,9 +283,9 @@ public class SXSSFWorkbook implements Workbook
 
     SheetDataWriter createSheetDataWriter() throws IOException {
         if(_compressTmpFiles) {
-            return new GZIPSheetDataWriter();
+            return new GZIPSheetDataWriter(_sharedStringSource);
         } else {
-            return new SheetDataWriter();
+            return new SheetDataWriter(_sharedStringSource);
         }
     }
 
index fa062f4c24ef95cc763cbaeabed7da4e0f2b0c00..b7cecfe81b5493e9ce1f41f94f161c3e1d90a15c 100644 (file)
@@ -32,6 +32,9 @@ import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellStyle;\r
 import org.apache.poi.ss.usermodel.FormulaError;\r
 import org.apache.poi.ss.util.CellReference;\r
+import org.apache.poi.xssf.model.SharedStringsTable;\r
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;\r
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;\r
 \r
 /**\r
  * Initially copied from BigGridDemo "SpreadsheetWriter".\r
@@ -48,11 +51,21 @@ public class SheetDataWriter {
     private int _numberOfCellsOfLastFlushedRow; // meaningful only of _numberOfFlushedRows>0\r
     private int _numberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0\r
 \r
+    /**\r
+     * Table of strings shared across this workbook.\r
+     * If two cells contain the same string, then the cell value is the same index into SharedStringsTable\r
+     */\r
+    private SharedStringsTable _sharedStringSource;\r
+\r
     public SheetDataWriter() throws IOException {\r
         _fd = createTempFile();\r
         _out = createWriter(_fd);\r
     }\r
 \r
+    public SheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException{\r
+        this();\r
+        this._sharedStringSource = sharedStringsTable;\r
+    }\r
     /**\r
      * Create a temp file to write sheet data. \r
      * By default, temp files are created in the default temporary-file directory\r
@@ -196,14 +209,24 @@ public class SheetDataWriter {
                 break;\r
             }\r
             case Cell.CELL_TYPE_STRING: {\r
-                _out.write(" t=\"inlineStr\">");\r
-                _out.write("<is><t");\r
-                if(hasLeadingTrailingSpaces(cell.getStringCellValue())) {\r
-                    _out.write(" xml:space=\"preserve\"");\r
+                if (_sharedStringSource != null) {\r
+                    XSSFRichTextString rt = new XSSFRichTextString(cell.getStringCellValue());\r
+                    int sRef = _sharedStringSource.addEntry(rt.getCTRst());\r
+\r
+                    _out.write(" t=\"" + STCellType.S.toString() + "\">");\r
+                    _out.write("<v>");\r
+                    _out.write(String.valueOf(sRef));\r
+                    _out.write("</v>");\r
+                } else {\r
+                    _out.write(" t=\"inlineStr\">");\r
+                    _out.write("<is><t");\r
+                    if (hasLeadingTrailingSpaces(cell.getStringCellValue())) {\r
+                        _out.write(" xml:space=\"preserve\"");\r
+                    }\r
+                    _out.write(">");\r
+                    outputQuotedString(cell.getStringCellValue());\r
+                    _out.write("</t></is>");\r
                 }\r
-                _out.write(">");\r
-                outputQuotedString(cell.getStringCellValue());\r
-                _out.write("</t></is>");\r
                 break;\r
             }\r
             case Cell.CELL_TYPE_NUMERIC: {\r
@@ -245,7 +268,7 @@ public class SheetDataWriter {
     }\r
 \r
     //Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java\r
-    protected void outputQuotedString(String s) throws IOException {\r
+     protected void outputQuotedString(String s) throws IOException {\r
         if (s == null || s.length() == 0) {\r
             return;\r
         }\r
index 6a9abba7533e6f329b52551bd374d90c184267cf..f33bd48b8b39c1ef2a72568b818a9fbefaeeeb1a 100644 (file)
@@ -23,6 +23,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.lang.reflect.Field;
 
 import org.apache.poi.ss.usermodel.BaseTestWorkbook;
 import org.apache.poi.ss.usermodel.Cell;
@@ -32,6 +33,7 @@ import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.ss.usermodel.WorkbookFactory;
 import org.apache.poi.ss.util.CellReference;
 import org.apache.poi.xssf.SXSSFITestDataProvider;
+import org.apache.poi.xssf.model.SharedStringsTable;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
 public final class TestSXSSFWorkbook extends BaseTestWorkbook {
@@ -90,6 +92,42 @@ public final class TestSXSSFWorkbook extends BaseTestWorkbook {
 
     }
 
+    public void testUseSharedStringsTable() throws Exception {
+        SXSSFWorkbook wb = new SXSSFWorkbook(null, 10, false, true);
+
+        Field f = SXSSFWorkbook.class.getDeclaredField("_sharedStringSource");
+        f.setAccessible(true);
+        SharedStringsTable sss = (SharedStringsTable)f.get(wb);
+        
+        assertNotNull(sss);
+
+        Row row = wb.createSheet("S1").createRow(0);
+
+        row.createCell(0).setCellValue("A");
+        row.createCell(1).setCellValue("B");
+        row.createCell(2).setCellValue("A");
+
+        XSSFWorkbook xssfWorkbook = (XSSFWorkbook) SXSSFITestDataProvider.instance.writeOutAndReadBack(wb);
+        sss = (SharedStringsTable)f.get(wb);
+        assertEquals(2, sss.getUniqueCount());
+        wb.dispose();
+
+        Sheet sheet1 = xssfWorkbook.getSheetAt(0);
+        assertEquals("S1", sheet1.getSheetName());
+        assertEquals(1, sheet1.getPhysicalNumberOfRows());
+        row = sheet1.getRow(0);
+        assertNotNull(row);
+        Cell cell = row.getCell(0);
+        assertNotNull(cell);
+        assertEquals("A", cell.getStringCellValue());
+        cell = row.getCell(1);
+        assertNotNull(cell);
+        assertEquals("B", cell.getStringCellValue());
+        cell = row.getCell(2);
+        assertNotNull(cell);
+        assertEquals("A", cell.getStringCellValue());
+    }
+
     public void testAddToExistingWorkbook() {
        XSSFWorkbook xssfWorkbook = new XSSFWorkbook();
        xssfWorkbook.createSheet("S1");