From d67507164c0bafbc07eb34192982b6826f3696be Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sat, 8 Mar 2008 18:06:41 +0000 Subject: [PATCH] More on converting the excel extractor to the new code git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635030 13f79535-47bb-0310-9956-ffa450edef68 --- .../xssf/extractor/XSSFExcelExtractor.java | 14 +++-- .../apache/poi/xssf/usermodel/XSSFCell.java | 15 +++++- .../xssf/usermodel/XSSFRichTextString.java | 3 ++ .../extractor/TestXSSFExcelExtractor.java | 52 +++++++++---------- 4 files changed, 51 insertions(+), 33 deletions(-) diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index ba3bd1095b..9ebb3f053b 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -18,12 +18,14 @@ package org.apache.poi.xssf.extractor; import java.io.File; import java.io.IOException; +import java.util.Iterator; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.xmlbeans.XmlException; @@ -92,17 +94,21 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { for (Object rawR : sheet) { Row row = (Row)rawR; - for (Object rawC: row) { - Cell cell = (Cell)rawC; + for(Iterator ri = row.cellIterator(); ri.hasNext();) { + Cell cell = (Cell)ri.next(); // Is it a formula one? if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) { text.append(cell.getCellFormula()); + } else if(cell.getCellType() == Cell.CELL_TYPE_STRING) { + text.append(cell.getRichStringCellValue().getString()); } else { - text.append(cell.toString()); + XSSFCell xc = (XSSFCell)cell; + text.append(xc.getRawValue()); } - text.append(","); + if(ri.hasNext()) + text.append("\t"); } text.append("\n"); } diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java index 5d4294ef42..c1de5b63a8 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFCell.java @@ -78,7 +78,7 @@ public class XSSFCell implements Cell { } public String getCellFormula() { - if (STCellType.STR != cell.getT()) { + if(this.cell.getF() == null) { throw new NumberFormatException("You cannot get a formula from a non-formula cell"); } return this.cell.getF().getStringValue(); @@ -94,6 +94,12 @@ public class XSSFCell implements Cell { } public int getCellType() { + // Detecting formulas is quite pesky, + // as they don't get their type set + if(this.cell.getF() != null) { + return CELL_TYPE_FORMULA; + } + switch (this.cell.getT().intValue()) { case STCellType.INT_B: return CELL_TYPE_BOOLEAN; @@ -289,6 +295,13 @@ public class XSSFCell implements Cell { public String toString() { return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV(); } + + /** + * Returns the raw, underlying ooxml value for the cell + */ + public String getRawValue() { + return this.cell.getV(); + } /** * @throws RuntimeException if the bounds are exceeded. diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java index 03816309f2..8396004d52 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java @@ -79,6 +79,9 @@ public class XSSFRichTextString implements RichTextString { public String getString() { return string; } + public String toString() { + return string; + } public int length() { return string.length(); diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java index de1fc47011..f001d25c86 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java @@ -35,59 +35,56 @@ public class TestXSSFExcelExtractor extends TestCase { /** * A very simple file */ - private XSSFWorkbook xmlA; - private File fileA; + private File xmlA; /** * A fairly complex file */ - private XSSFWorkbook xmlB; + private File xmlB; /** * A fairly simple file - ooxml */ - private XSSFWorkbook simpleXLSX; + private File simpleXLSX; /** * A fairly simple file - ole2 */ - private HSSFWorkbook simpleXLS; + private File simpleXLS; protected void setUp() throws Exception { super.setUp(); - fileA = new File( + xmlA = new File( System.getProperty("HSSF.testdata.path") + File.separator + "sample.xlsx" ); - File fileB = new File( + assertTrue(xmlA.exists()); + xmlB = new File( System.getProperty("HSSF.testdata.path") + File.separator + "AverageTaxRates.xlsx" ); + assertTrue(xmlB.exists()); - File fileSOOXML = new File( + simpleXLSX = new File( System.getProperty("HSSF.testdata.path") + File.separator + "SampleSS.xlsx" ); - File fileSOLE2 = new File( + simpleXLS = new File( System.getProperty("HSSF.testdata.path") + File.separator + "SampleSS.xls" ); - - xmlA = new XSSFWorkbook(fileA.toString()); - xmlB = new XSSFWorkbook(fileB.toString()); - - simpleXLSX = new XSSFWorkbook(fileSOOXML.toString()); - simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2)); + assertTrue(simpleXLS.exists()); + assertTrue(simpleXLSX.exists()); } /** * Get text out of the simple file */ public void testGetSimpleText() throws Exception { - new XSSFExcelExtractor(fileA.toString()); - new XSSFExcelExtractor(xmlA); + new XSSFExcelExtractor(xmlA.toString()); + new XSSFExcelExtractor(new XSSFWorkbook(xmlA.toString())); XSSFExcelExtractor extractor = - new XSSFExcelExtractor(xmlA); + new XSSFExcelExtractor(xmlA.toString()); extractor.getText(); String text = extractor.getText(); @@ -110,8 +107,7 @@ public class TestXSSFExcelExtractor extends TestCase { "adipiscing\t777\n" + "elit\t888\n" + "Nunc\t999\n" + - "at\t4995\n" + - "\n\n", text); + "at\t4995\n", text); // Now get formulas not their values extractor.setFormulasNotResults(true); @@ -126,8 +122,7 @@ public class TestXSSFExcelExtractor extends TestCase { "adipiscing\t777\n" + "elit\t888\n" + "Nunc\t999\n" + - "at\tSUM(B1:B9)\n" + - "\n\n", text); + "at\tSUM(B1:B9)\n", text); // With sheet names too extractor.setIncludeSheetNames(true); @@ -143,17 +138,17 @@ public class TestXSSFExcelExtractor extends TestCase { "adipiscing\t777\n" + "elit\t888\n" + "Nunc\t999\n" + - "at\tSUM(B1:B9)\n\n" + - "Sheet2\n\n" + + "at\tSUM(B1:B9)\n" + + "Sheet2\n" + "Sheet3\n" , text); } public void testGetComplexText() throws Exception { - new XSSFExcelExtractor(xmlB); + new XSSFExcelExtractor(xmlB.toString()); XSSFExcelExtractor extractor = - new XSSFExcelExtractor(xmlB); + new XSSFExcelExtractor(new XSSFWorkbook(xmlB.toString())); extractor.getText(); String text = extractor.getText(); @@ -174,9 +169,10 @@ public class TestXSSFExcelExtractor extends TestCase { */ public void testComparedToOLE2() throws Exception { XSSFExcelExtractor ooxmlExtractor = - new XSSFExcelExtractor(simpleXLSX); + new XSSFExcelExtractor(simpleXLSX.toString()); ExcelExtractor ole2Extractor = - new ExcelExtractor(simpleXLS); + new ExcelExtractor(new HSSFWorkbook( + new FileInputStream(simpleXLS))); POITextExtractor[] extractors = new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; -- 2.39.5