]> source.dussan.org Git - poi.git/commitdiff
Improve the xlsx text extraction, and have proper tests for it
authorNick Burch <nick@apache.org>
Thu, 27 Dec 2007 13:02:17 +0000 (13:02 +0000)
committerNick Burch <nick@apache.org>
Thu, 27 Dec 2007 13:02:17 +0000 (13:02 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@607063 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java [new file with mode: 0644]
src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java

index 29dcc21176dc7018577b9eab57792919f93ba63f..d2092c422b907db904d401439c58e2fb6e0c2188 100644 (file)
@@ -20,6 +20,7 @@ import java.io.IOException;
 
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFXMLCell;
 import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
@@ -89,16 +90,18 @@ public class HXFExcelExtractor extends POIXMLTextExtractor {
                                                        text.append("\t");
                                                }
                                                
+                                               boolean done = false;
+                                               
                                                // Is it a formula one?
                                                if(cell.getF() != null) {
                                                        if(formulasNotResults) {
                                                                text.append(cell.getF().getStringValue());
-                                                       } else {
-                                                               text.append(cell.getV());
+                                                               done = true;
                                                        }
-                                               } else {
-                                                       // Probably just want the v value
-                                                       text.append(cell.getV());
+                                               }
+                                               if(!done) {
+                                                       HSSFXMLCell uCell = new HSSFXMLCell(cell);
+                                                       text.append(uCell.getStringValue());
                                                }
                                        }
                                        text.append("\n");
diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
new file mode 100644 (file)
index 0000000..b1b3828
--- /dev/null
@@ -0,0 +1,48 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.usermodel;
+
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+
+/**
+ * User facing wrapper around an underlying cell object
+ */
+public class HSSFXMLCell {
+       private CTCell cell;
+       public HSSFXMLCell(CTCell rawCell) {
+               this.cell = rawCell;
+       }
+       
+       /**
+        * Formats the cell's contents, based on its type,
+        *  and returns it as a string.
+        */
+       public String getStringValue() {
+               if(cell.getV() != null) {
+                       return cell.getV();
+               }
+               if(cell.getIs() != null) {
+                       return cell.getIs().getT();
+               }
+               // TODO: Formatting
+               return Long.toString(cell.getS());
+       }
+       
+       public String toString() {
+               return cell.getR() + " - " + getStringValue(); 
+       }
+}
index fafca345e09897bb5524ec2d071be4f63efabf50..94123dac77d44ddb77be1c529ab14d29807a463d 100644 (file)
@@ -66,10 +66,78 @@ public class TestHXFExcelExtractor extends TestCase {
                
                String text = extractor.getText();
                assertTrue(text.length() > 0);
-               System.err.println(text);
                
                // Check sheet names
                assertTrue(text.startsWith("Sheet1"));
                assertTrue(text.endsWith("Sheet3\n"));
+               
+               // Now without, will have text
+               extractor.setIncludeSheetNames(false);
+               text = extractor.getText();
+               assertEquals(
+                               "0\t111\n" +
+                               "1\t222\n" +
+                               "2\t333\n" +
+                               "3\t444\n" +
+                               "4\t555\n" +
+                               "5\t666\n" +
+                               "6\t777\n" +
+                               "7\t888\n" +
+                               "8\t999\n" +
+                               "9\t4995\n" +
+                               "\n\n", text);
+               
+               // Now get formulas not their values
+               extractor.setFormulasNotResults(true);
+               text = extractor.getText();
+               assertEquals(
+                               "0\t111\n" +
+                               "1\t222\n" +
+                               "2\t333\n" +
+                               "3\t444\n" +
+                               "4\t555\n" +
+                               "5\t666\n" +
+                               "6\t777\n" +
+                               "7\t888\n" +
+                               "8\t999\n" +
+                               "9\tSUM(B1:B9)\n" +
+                               "\n\n", text);
+               
+               // With sheet names too
+               extractor.setIncludeSheetNames(true);
+               text = extractor.getText();
+               assertEquals(
+                               "Sheet1\n" +
+                               "0\t111\n" +
+                               "1\t222\n" +
+                               "2\t333\n" +
+                               "3\t444\n" +
+                               "4\t555\n" +
+                               "5\t666\n" +
+                               "6\t777\n" +
+                               "7\t888\n" +
+                               "8\t999\n" +
+                               "9\tSUM(B1:B9)\n\n" +
+                               "Sheet2\n\n" +
+                               "Sheet3\n"
+                               , text);
+       }
+       
+       public void testGetComplexText() throws Exception {
+               new HXFExcelExtractor(xmlB.getPackage());
+               new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
+               
+               HXFExcelExtractor extractor = 
+                       new HXFExcelExtractor(xmlB.getPackage());
+               extractor.getText();
+               
+               String text = extractor.getText();
+               assertTrue(text.length() > 0);
+               
+               // Might not have all formatting it should do!
+               assertTrue(text.startsWith(
+                                               "Avgtxfull\n" +
+                                               "3\t13\t3\t2\t2\t3\t2\t"        
+               ));
        }
 }