import java.io.File;
import java.io.IOException;
+import java.util.Iterator;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
for (Object rawR : sheet) {
Row row = (Row)rawR;
- for (Object rawC: row) {
- Cell cell = (Cell)rawC;
+ for(Iterator ri = row.cellIterator(); ri.hasNext();) {
+ Cell cell = (Cell)ri.next();
// Is it a formula one?
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
text.append(cell.getCellFormula());
+ } else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
+ text.append(cell.getRichStringCellValue().getString());
} else {
- text.append(cell.toString());
+ XSSFCell xc = (XSSFCell)cell;
+ text.append(xc.getRawValue());
}
- text.append(",");
+ if(ri.hasNext())
+ text.append("\t");
}
text.append("\n");
}
}
public String getCellFormula() {
- if (STCellType.STR != cell.getT()) {
+ if(this.cell.getF() == null) {
throw new NumberFormatException("You cannot get a formula from a non-formula cell");
}
return this.cell.getF().getStringValue();
}
public int getCellType() {
+ // Detecting formulas is quite pesky,
+ // as they don't get their type set
+ if(this.cell.getF() != null) {
+ return CELL_TYPE_FORMULA;
+ }
+
switch (this.cell.getT().intValue()) {
case STCellType.INT_B:
return CELL_TYPE_BOOLEAN;
public String toString() {
return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV();
}
+
+ /**
+ * Returns the raw, underlying ooxml value for the cell
+ */
+ public String getRawValue() {
+ return this.cell.getV();
+ }
/**
* @throws RuntimeException if the bounds are exceeded.
/**
* A very simple file
*/
- private XSSFWorkbook xmlA;
- private File fileA;
+ private File xmlA;
/**
* A fairly complex file
*/
- private XSSFWorkbook xmlB;
+ private File xmlB;
/**
* A fairly simple file - ooxml
*/
- private XSSFWorkbook simpleXLSX;
+ private File simpleXLSX;
/**
* A fairly simple file - ole2
*/
- private HSSFWorkbook simpleXLS;
+ private File simpleXLS;
protected void setUp() throws Exception {
super.setUp();
- fileA = new File(
+ xmlA = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "sample.xlsx"
);
- File fileB = new File(
+ assertTrue(xmlA.exists());
+ xmlB = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "AverageTaxRates.xlsx"
);
+ assertTrue(xmlB.exists());
- File fileSOOXML = new File(
+ simpleXLSX = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "SampleSS.xlsx"
);
- File fileSOLE2 = new File(
+ simpleXLS = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "SampleSS.xls"
);
-
- xmlA = new XSSFWorkbook(fileA.toString());
- xmlB = new XSSFWorkbook(fileB.toString());
-
- simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
- simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
+ assertTrue(simpleXLS.exists());
+ assertTrue(simpleXLSX.exists());
}
/**
* Get text out of the simple file
*/
public void testGetSimpleText() throws Exception {
- new XSSFExcelExtractor(fileA.toString());
- new XSSFExcelExtractor(xmlA);
+ new XSSFExcelExtractor(xmlA.toString());
+ new XSSFExcelExtractor(new XSSFWorkbook(xmlA.toString()));
XSSFExcelExtractor extractor =
- new XSSFExcelExtractor(xmlA);
+ new XSSFExcelExtractor(xmlA.toString());
extractor.getText();
String text = extractor.getText();
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n" +
- "at\t4995\n" +
- "\n\n", text);
+ "at\t4995\n", text);
// Now get formulas not their values
extractor.setFormulasNotResults(true);
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n" +
- "at\tSUM(B1:B9)\n" +
- "\n\n", text);
+ "at\tSUM(B1:B9)\n", text);
// With sheet names too
extractor.setIncludeSheetNames(true);
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n" +
- "at\tSUM(B1:B9)\n\n" +
- "Sheet2\n\n" +
+ "at\tSUM(B1:B9)\n" +
+ "Sheet2\n" +
"Sheet3\n"
, text);
}
public void testGetComplexText() throws Exception {
- new XSSFExcelExtractor(xmlB);
+ new XSSFExcelExtractor(xmlB.toString());
XSSFExcelExtractor extractor =
- new XSSFExcelExtractor(xmlB);
+ new XSSFExcelExtractor(new XSSFWorkbook(xmlB.toString()));
extractor.getText();
String text = extractor.getText();
*/
public void testComparedToOLE2() throws Exception {
XSSFExcelExtractor ooxmlExtractor =
- new XSSFExcelExtractor(simpleXLSX);
+ new XSSFExcelExtractor(simpleXLSX.toString());
ExcelExtractor ole2Extractor =
- new ExcelExtractor(simpleXLS);
+ new ExcelExtractor(new HSSFWorkbook(
+ new FileInputStream(simpleXLS)));
POITextExtractor[] extractors =
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };