]> source.dussan.org Git - poi.git/commitdiff
Patch from Ugo from bug #44185 - support getting shared strings for ooxml excel files...
authorNick Burch <nick@apache.org>
Tue, 8 Jan 2008 17:28:39 +0000 (17:28 +0000)
committerNick Burch <nick@apache.org>
Tue, 8 Jan 2008 17:28:39 +0000 (17:28 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@610074 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java
src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java

index bf2b1b1131cf9eb27c055c6861ace07b66636cbe..bb476c1e68830f1566e00c08bb0bbebbfa016cb2 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.hssf;
 
 import java.io.IOException;
 
+import org.apache.poi.hssf.model.SharedStringsTable;
 import org.apache.poi.hxf.HXFDocument;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
@@ -45,14 +46,24 @@ public class HSSFXML extends HXFDocument {
        public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
        public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
        public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
+       public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
        
        private WorkbookDocument workbookDoc;
        
+       private SharedStringsTable sharedStrings;
+
        public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
                super(container, MAIN_CONTENT_TYPE);
                
                workbookDoc =
                        WorkbookDocument.Factory.parse(basePart.getInputStream());
+               
+               PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart);
+               if (ssPart != null) {
+                       sharedStrings = new SharedStringsTable(ssPart);
+               } else {
+                       
+               }
        }
        
        /**
@@ -81,4 +92,8 @@ public class HSSFXML extends HXFDocument {
                        WorksheetDocument.Factory.parse(sheetPart.getInputStream());
                return sheetDoc.getWorksheet();
        }
+       
+       public String getSharedString(int index) {
+               return this.sharedStrings.get(index);
+       }
 }
index 59f83d6d041da2eb383bf6f765a33a40e13d1a72..34ae0680052ecf29cc4a226a9534ad037987aa21 100644 (file)
@@ -117,7 +117,7 @@ public class HXFExcelExtractor extends POIXMLTextExtractor {
                                                        }
                                                }
                                                if(!done) {
-                                                       HSSFXMLCell uCell = new HSSFXMLCell(cell);
+                                                       HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
                                                        text.append(uCell.getStringValue());
                                                }
                                        }
index b1b38283b437a6556fbc9fc9ff649ffbe67d62e7..549f32eaa21af368bedc831aac7dd39ad9492204 100644 (file)
 package org.apache.poi.hssf.usermodel;
 
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
 
 /**
  * User facing wrapper around an underlying cell object
  */
 public class HSSFXMLCell {
-       private CTCell cell;
-       public HSSFXMLCell(CTCell rawCell) {
-               this.cell = rawCell;
-       }
-       
-       /**
-        * Formats the cell's contents, based on its type,
-        *  and returns it as a string.
-        */
-       public String getStringValue() {
-               if(cell.getV() != null) {
-                       return cell.getV();
-               }
-               if(cell.getIs() != null) {
-                       return cell.getIs().getT();
-               }
-               // TODO: Formatting
-               return Long.toString(cell.getS());
-       }
-       
-       public String toString() {
-               return cell.getR() + " - " + getStringValue(); 
-       }
+    private CTCell cell;
+
+    /** The workbook to which this cell belongs */
+    private final HSSFXMLWorkbook workbook;
+
+    public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) {
+        this.cell = rawCell;
+        this.workbook = workbook;
+    }
+
+    /**
+     * Formats the cell's contents, based on its type,
+     *  and returns it as a string.
+     */
+    public String getStringValue() {
+
+        switch (cell.getT().intValue()) {
+        case STCellType.INT_S:
+            return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
+        case STCellType.INT_N:
+            return cell.getV();
+        // TODO: support other types
+        default:
+            return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'";
+        }
+    }
+
+    public String toString() {
+        return cell.getR() + " - " + getStringValue(); 
+    }
 }
index 16b93f61f409556400f28d455cb6fd6f93ce73d9..023b80f4d1a3293f868c2d2512c14a6afa8b77b0 100644 (file)
@@ -36,4 +36,8 @@ public class HSSFXMLWorkbook extends POIXMLDocument {
        public HSSFXML _getHSSFXML() {
                return hssfXML;
        }
+       
+       public String getSharedString(int index) {
+               return hssfXML.getSharedString(index);
+       }
 }
index 36a890b97a95ac6e90e8dce0b85fd8d132019b2a..c2b2aa6d9cf8d5b6c6d8e4260d1a5b1e77e5c63b 100644 (file)
@@ -102,6 +102,27 @@ public abstract class HXFDocument {
                return parts.get(0);
        }
 
+       /**
+        * Fetches the (single) PackagePart which is defined as
+        *  the supplied relation content type of the specified part, 
+        *  or null if none found.
+        * @param relationType The relation content type to search for
+        * @throws IllegalArgumentException If we find more than one part of that type
+        * TODO: this sucks! Make Package and PackagePart implement common intf that defines getRelationshipsByType & friends
+        */
+       protected PackagePart getSinglePartByRelationType(String relationType, PackagePart part) throws IllegalArgumentException, OpenXML4JException {
+               PackageRelationshipCollection rels =
+                       part.getRelationshipsByType(relationType);
+               if(rels.size() == 0) {
+                       return null;
+               }
+               if(rels.size() > 1) {
+                       throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
+               }
+               PackageRelationship rel = rels.getRelationship(0);
+               return getPackagePart(rel);
+       }
+       
        /**
         * Fetches the (single) PackagePart which is defined as
         *  the supplied relation content type of the base
@@ -109,7 +130,7 @@ public abstract class HXFDocument {
         * @param relationType The relation content type to search for
         * @throws IllegalArgumentException If we find more than one part of that type
         */
-       private PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
+       protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
                PackageRelationshipCollection rels =
                        container.getRelationshipsByType(relationType);
                if(rels.size() == 0) {
index d7fc4dc3a571078196397f22b8e25bfdb03f058e..f47639bf57ee1211e454ceabf4ae953d28f98523 100644 (file)
@@ -18,6 +18,8 @@ package org.apache.poi.hssf.extractor;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import junit.framework.TestCase;
 
@@ -170,7 +172,7 @@ public class TestHXFExcelExtractor extends TestCase {
         *  ExcelExtractor does, when we're both passed
         *  the same file, just saved as xls and xlsx
         */
-       public void BROKENtestComparedToOLE2() throws Exception {
+       public void testComparedToOLE2() throws Exception {
                HXFExcelExtractor ooxmlExtractor =
                        new HXFExcelExtractor(simpleXLSX.getPackage());
                ExcelExtractor ole2Extractor =
@@ -181,14 +183,13 @@ public class TestHXFExcelExtractor extends TestCase {
                for (int i = 0; i < extractors.length; i++) {
                        POITextExtractor extractor = extractors[i];
                        
-                       String text = extractor.getText().replace("\r", "");
+                       String text = extractor.getText().replaceAll("[\r\t]", "");
                        System.out.println(text.length());
                        System.out.println(text);
-                       assertTrue(text.startsWith("First Sheet\nTest spreadsheet\t\n2nd row\t2nd row 2nd column\n"));
-                       assertTrue(text.endsWith("13.0\nSheet3\n"));
-                       
-                       assertTrue(text.length() >= 214);
-                       assertTrue(text.length() <= 214);
+                       assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
+                       Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
+                       Matcher m = pattern.matcher(text);
+                       assertTrue(m.matches());                        
                }
        }
 }