]> source.dussan.org Git - poi.git/commitdiff
Fix for SharedStringsTable in ooxml excel support, and related test updates now we...
authorNick Burch <nick@apache.org>
Wed, 9 Jan 2008 18:46:30 +0000 (18:46 +0000)
committerNick Burch <nick@apache.org>
Wed, 9 Jan 2008 18:46:30 +0000 (18:46 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@610506 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java
src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java

index bb476c1e68830f1566e00c08bb0bbebbfa016cb2..3766a046a1bbb213680329f09263ab8a1f851072 100644 (file)
@@ -49,7 +49,6 @@ public class HSSFXML extends HXFDocument {
        public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
        
        private WorkbookDocument workbookDoc;
-       
        private SharedStringsTable sharedStrings;
 
        public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
@@ -92,8 +91,14 @@ public class HSSFXML extends HXFDocument {
                        WorksheetDocument.Factory.parse(sheetPart.getInputStream());
                return sheetDoc.getWorksheet();
        }
-       
+
+       /**
+        * Returns the shared string at the given index
+        */
        public String getSharedString(int index) {
                return this.sharedStrings.get(index);
        }
+       protected SharedStringsTable _getSharedStringsTable() {
+               return sharedStrings;
+       }
 }
index d97454b01305d1c0e548eddf2683569782d5a3c1..b3e21925665693bb548f2539f6e436e70abe215a 100644 (file)
 package org.apache.poi.hssf.model;
 
 import java.io.IOException;
-import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.LinkedList;
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-
+import org.apache.xmlbeans.XmlException;
 import org.openxml4j.opc.PackagePart;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
-import org.xml.sax.SAXException;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
 
 
 public class SharedStringsTable extends LinkedList<String> {
-
-    private static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
-
-    /** XXX: should have been using an XMLBeans object, but it cannot parse the sharedStrings schema, so we'll use DOM temporarily.
-    CTSst sst;
-    */
-
+    public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
+    
+    private SstDocument doc; 
     private PackagePart part;
 
-    private DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
-    
-    private DocumentBuilder parser;
-    
-    public SharedStringsTable(PackagePart part) throws IOException {
+    public SharedStringsTable(PackagePart part) throws IOException, XmlException {
         this.part = part;
-        InputStream is = part.getInputStream();
-        try {
-            builderFactory.setNamespaceAware(true);
-            this.parser = builderFactory.newDocumentBuilder();
-            readFrom(is);
-        } catch (ParserConfigurationException e) {
-            throw new RuntimeException(e);
-        } catch (SAXException e) {
-            throw new RuntimeException(e);
-        } finally {
-            if (is != null) is.close();
-        }
-
-
+       doc = SstDocument.Factory.parse(
+                       part.getInputStream()
+       );
+       read();
     }
 
-    public void readFrom(InputStream is) throws IOException, SAXException {
-        Document doc = parser.parse(is);
-        Element root = doc.getDocumentElement();
-        NodeList sis = root.getElementsByTagNameNS(MAIN_SML_NS_URI, "si");
-        for (int i = 0 ; i < sis.getLength() ; ++i) {
-            Element si = (Element) sis.item(i);
-            NodeList ts = si.getElementsByTagNameNS(MAIN_SML_NS_URI, "t");
-            String t = "";
-            if (ts.getLength() > 0 && ts.item(0).getFirstChild() != null) {
-                t = ts.item(0).getFirstChild().getNodeValue();
-                add(t);
-            }
-        }
+    private void read() {
+       CTRst[] sts = doc.getSst().getSiArray();
+       for (int i = 0; i < sts.length; i++) {
+                       add(sts[i].getT());
+               }
+    }
+    
+    /**
+     * Writes the current shared strings table into
+     *  the associated OOXML PackagePart
+     */
+    public void write() throws IOException {
+       CTSst sst = doc.getSst();
+       
+       // Remove the old list
+       for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
+               sst.removeSi(i);
+       }
+       
+       // Add the new one
+       for(String s : this) {
+               sst.addNewSi().setT(s);
+       }
+       
+       // Update the counts
+       sst.setCount(this.size());
+       sst.setUniqueCount(this.size());
+       
+       // Write out
+       OutputStream out = part.getOutputStream();
+       doc.save(out);
+       out.close();
     }
 }
index 549f32eaa21af368bedc831aac7dd39ad9492204..b24556cd8ec565bbffc3386e94b4902d7e740adb 100644 (file)
@@ -42,6 +42,8 @@ public class HSSFXMLCell {
         switch (cell.getT().intValue()) {
         case STCellType.INT_S:
             return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
+        case STCellType.INT_INLINE_STR:
+               return cell.getV();
         case STCellType.INT_N:
             return cell.getV();
         // TODO: support other types
index 9c3ef65c4441d3eb48d47d9607c9f1ea9daeb477..97453265c447b8aa638b61084204c8052d32cfbd 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.hssf;
 
 import java.io.File;
 
+import org.apache.poi.hssf.model.SharedStringsTable;
 import org.apache.poi.hxf.HXFDocument;
 import org.openxml4j.opc.Package;
 import org.openxml4j.opc.PackagePart;
@@ -124,4 +125,36 @@ public class TestHSSFXML extends TestCase {
                assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
                assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
        }
+       
+       public void testSharedStringBasics() throws Exception {
+               HSSFXML xml = new HSSFXML(
+                               HXFDocument.openPackage(sampleFile)
+               );
+               assertNotNull(xml._getSharedStringsTable());
+               
+               SharedStringsTable sst = xml._getSharedStringsTable();
+               assertEquals(10, sst.size());
+               
+               assertEquals("Lorem", sst.get(0));
+               for(int i=0; i<sst.size(); i++) {
+                       assertEquals(sst.get(i), xml.getSharedString(i));
+               }
+               
+               // Add a few more, then save and reload, checking
+               //  changes have been kept
+               sst.add("Foo");
+               sst.add("Bar");
+               sst.set(0, "LoremLorem");
+               
+               sst.write();
+               
+               xml = new HSSFXML(xml.getPackage());
+               sst = xml._getSharedStringsTable();
+               assertEquals(12, sst.size());
+               
+               assertEquals("LoremLorem", sst.get(0));
+               for(int i=0; i<sst.size(); i++) {
+                       assertEquals(sst.get(i), xml.getSharedString(i));
+               }
+       }
 }
\ No newline at end of file
index f47639bf57ee1211e454ceabf4ae953d28f98523..a73b60bf72bd2fa3c35ed8395630e6e6a490575b 100644 (file)
@@ -101,32 +101,32 @@ public class TestHXFExcelExtractor extends TestCase {
                extractor.setIncludeSheetNames(false);
                text = extractor.getText();
                assertEquals(
-                               "0\t111\n" +
-                               "1\t222\n" +
-                               "2\t333\n" +
-                               "3\t444\n" +
-                               "4\t555\n" +
-                               "5\t666\n" +
-                               "6\t777\n" +
-                               "7\t888\n" +
-                               "8\t999\n" +
-                               "9\t4995\n" +
+                               "Lorem\t111\n" +
+                               "ipsum\t222\n" +
+                               "dolor\t333\n" +
+                               "sit\t444\n" +
+                               "amet\t555\n" +
+                               "consectetuer\t666\n" +
+                               "adipiscing\t777\n" +
+                               "elit\t888\n" +
+                               "Nunc\t999\n" +
+                               "at\t4995\n" +
                                "\n\n", text);
                
                // Now get formulas not their values
                extractor.setFormulasNotResults(true);
                text = extractor.getText();
                assertEquals(
-                               "0\t111\n" +
-                               "1\t222\n" +
-                               "2\t333\n" +
-                               "3\t444\n" +
-                               "4\t555\n" +
-                               "5\t666\n" +
-                               "6\t777\n" +
-                               "7\t888\n" +
-                               "8\t999\n" +
-                               "9\tSUM(B1:B9)\n" +
+                               "Lorem\t111\n" +
+                               "ipsum\t222\n" +
+                               "dolor\t333\n" +
+                               "sit\t444\n" +
+                               "amet\t555\n" +
+                               "consectetuer\t666\n" +
+                               "adipiscing\t777\n" +
+                               "elit\t888\n" +
+                               "Nunc\t999\n" +
+                               "at\tSUM(B1:B9)\n" +
                                "\n\n", text);
                
                // With sheet names too
@@ -134,16 +134,16 @@ public class TestHXFExcelExtractor extends TestCase {
                text = extractor.getText();
                assertEquals(
                                "Sheet1\n" +
-                               "0\t111\n" +
-                               "1\t222\n" +
-                               "2\t333\n" +
-                               "3\t444\n" +
-                               "4\t555\n" +
-                               "5\t666\n" +
-                               "6\t777\n" +
-                               "7\t888\n" +
-                               "8\t999\n" +
-                               "9\tSUM(B1:B9)\n\n" +
+                               "Lorem\t111\n" +
+                               "ipsum\t222\n" +
+                               "dolor\t333\n" +
+                               "sit\t444\n" +
+                               "amet\t555\n" +
+                               "consectetuer\t666\n" +
+                               "adipiscing\t777\n" +
+                               "elit\t888\n" +
+                               "Nunc\t999\n" +
+                               "at\tSUM(B1:B9)\n\n" +
                                "Sheet2\n\n" +
                                "Sheet3\n"
                                , text);
@@ -161,9 +161,10 @@ public class TestHXFExcelExtractor extends TestCase {
                assertTrue(text.length() > 0);
                
                // Might not have all formatting it should do!
+               // TODO decide if we should really have the "null" in there
                assertTrue(text.startsWith(
                                                "Avgtxfull\n" +
-                                               "3\t13\t3\t2\t2\t3\t2\t"        
+                                               "null\t(iii) AVERAGE TAX RATES ON ANNUAL"       
                ));
        }
        
@@ -184,8 +185,8 @@ public class TestHXFExcelExtractor extends TestCase {
                        POITextExtractor extractor = extractors[i];
                        
                        String text = extractor.getText().replaceAll("[\r\t]", "");
-                       System.out.println(text.length());
-                       System.out.println(text);
+                       //System.out.println(text.length());
+                       //System.out.println(text);
                        assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
                        Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
                        Matcher m = pattern.matcher(text);