* Helper class to extract text from an OOXML Excel file
*/
public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
- public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
- XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
- XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
- XSSFRelation.MACROS_WORKBOOK
- };
-
- private Locale locale;
- private XSSFWorkbook workbook;
- private boolean includeSheetNames = true;
- private boolean formulasNotResults = false;
- private boolean includeCellComments = false;
- private boolean includeHeadersFooters = true;
+ public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
+ XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
+ XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
+ XSSFRelation.MACROS_WORKBOOK
+ };
+
+ private Locale locale;
+ private XSSFWorkbook workbook;
+ private boolean includeSheetNames = true;
+ private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
+ private boolean includeHeadersFooters = true;
/**
* @deprecated Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead.
*/
- public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
- this(new XSSFWorkbook(path));
- }
- public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
- this(new XSSFWorkbook(container));
- }
- public XSSFExcelExtractor(XSSFWorkbook workbook) {
- super(workbook);
- this.workbook = workbook;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XSSFExcelExtractor <filename.xlsx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new XSSFExcelExtractor(args[0]);
- System.out.println(extractor.getText());
- }
-
- /**
- * Should sheet names be included? Default is true
- */
- public void setIncludeSheetNames(boolean includeSheetNames) {
- this.includeSheetNames = includeSheetNames;
- }
- /**
- * Should we return the formula itself, and not
- * the result it produces? Default is false
- */
- public void setFormulasNotResults(boolean formulasNotResults) {
- this.formulasNotResults = formulasNotResults;
- }
- /**
- * Should cell comments be included? Default is true
+ public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
+ this(new XSSFWorkbook(path));
+ }
+ public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+ this(new XSSFWorkbook(container));
+ }
+ public XSSFExcelExtractor(XSSFWorkbook workbook) {
+ super(workbook);
+ this.workbook = workbook;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" XSSFExcelExtractor <filename.xlsx>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new XSSFExcelExtractor(args[0]);
+ System.out.println(extractor.getText());
+ }
+
+ /**
+ * Should sheet names be included? Default is true
+ */
+ public void setIncludeSheetNames(boolean includeSheetNames) {
+ this.includeSheetNames = includeSheetNames;
+ }
+ /**
+ * Should we return the formula itself, and not
+ * the result it produces? Default is false
+ */
+ public void setFormulasNotResults(boolean formulasNotResults) {
+ this.formulasNotResults = formulasNotResults;
+ }
+ /**
+ * Should cell comments be included? Default is false
*/
public void setIncludeCellComments(boolean includeCellComments) {
this.includeCellComments = includeCellComments;
}
- /**
+ /**
* Should headers and footers be included? Default is true
*/
public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
* on the styles applied to the cells)
*/
public void setLocale(Locale locale) {
- this.locale = locale;
+ this.locale = locale;
+ }
+
+
+ /**
+ * Retreives the text contents of the file
+ */
+ public String getText() {
+ DataFormatter formatter;
+ if(locale == null) {
+ formatter = new DataFormatter();
+ } else {
+ formatter = new DataFormatter(locale);
+ }
+
+ StringBuffer text = new StringBuffer();
+ for(int i=0; i<workbook.getNumberOfSheets(); i++) {
+ XSSFSheet sheet = workbook.getSheetAt(i);
+ if(includeSheetNames) {
+ text.append(workbook.getSheetName(i)).append("\n");
+ }
+
+ // Header(s), if present
+ if(includeHeadersFooters) {
+ text.append(
+ extractHeaderFooter(sheet.getFirstHeader())
+ );
+ text.append(
+ extractHeaderFooter(sheet.getOddHeader())
+ );
+ text.append(
+ extractHeaderFooter(sheet.getEvenHeader())
+ );
+ }
+
+ // Rows and cells
+ for (Object rawR : sheet) {
+ Row row = (Row)rawR;
+ for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
+ Cell cell = ri.next();
+
+ // Is it a formula one?
+ if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
+ if (formulasNotResults) {
+ text.append(cell.getCellFormula());
+ } else {
+ if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
+ handleStringCell(text, cell);
+ } else {
+ handleNonStringCell(text, cell, formatter);
+ }
+ }
+ } else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
+ handleStringCell(text, cell);
+ } else {
+ handleNonStringCell(text, cell, formatter);
+ }
+
+ // Output the comment, if requested and exists
+ Comment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
+ }
+
+ if(ri.hasNext())
+ text.append("\t");
+ }
+ text.append("\n");
+ }
+
+ // Finally footer(s), if present
+ if(includeHeadersFooters) {
+ text.append(
+ extractHeaderFooter(sheet.getFirstFooter())
+ );
+ text.append(
+ extractHeaderFooter(sheet.getOddFooter())
+ );
+ text.append(
+ extractHeaderFooter(sheet.getEvenFooter())
+ );
+ }
+ }
+
+ return text.toString();
+ }
+
+ private void handleStringCell(StringBuffer text, Cell cell) {
+ text.append(cell.getRichStringCellValue().getString());
+ }
+ private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) {
+ int type = cell.getCellType();
+ if (type == Cell.CELL_TYPE_FORMULA) {
+ type = cell.getCachedFormulaResultType();
+ }
+
+ if (type == Cell.CELL_TYPE_NUMERIC) {
+ CellStyle cs = cell.getCellStyle();
+
+ if (cs.getDataFormatString() != null) {
+ text.append(formatter.formatRawCellContents(
+ cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString()
+ ));
+ return;
+ }
+ }
+
+ // No supported styling applies to this cell
+ XSSFCell xcell = (XSSFCell)cell;
+ text.append( xcell.getRawValue() );
+ }
+
+ private String extractHeaderFooter(HeaderFooter hf) {
+ return ExcelExtractor._extractHeaderFooter(hf);
}
-
-
- /**
- * Retreives the text contents of the file
- */
- public String getText() {
- DataFormatter formatter;
- if(locale == null) {
- formatter = new DataFormatter();
- } else {
- formatter = new DataFormatter(locale);
- }
-
- StringBuffer text = new StringBuffer();
- for(int i=0; i<workbook.getNumberOfSheets(); i++) {
- XSSFSheet sheet = workbook.getSheetAt(i);
- if(includeSheetNames) {
- text.append(workbook.getSheetName(i)).append("\n");
- }
-
- // Header(s), if present
- if(includeHeadersFooters) {
- text.append(
- extractHeaderFooter(sheet.getFirstHeader())
- );
- text.append(
- extractHeaderFooter(sheet.getOddHeader())
- );
- text.append(
- extractHeaderFooter(sheet.getEvenHeader())
- );
- }
-
- // Rows and cells
- for (Object rawR : sheet) {
- Row row = (Row)rawR;
- for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
- Cell cell = ri.next();
-
- // Is it a formula one?
- if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
- if (formulasNotResults) {
- text.append(cell.getCellFormula());
- } else {
- if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
- handleStringCell(text, cell);
- } else {
- handleNonStringCell(text, cell, formatter);
- }
- }
- } else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
- handleStringCell(text, cell);
- } else {
- handleNonStringCell(text, cell, formatter);
- }
-
- // Output the comment, if requested and exists
- Comment comment = cell.getCellComment();
- if(includeCellComments && comment != null) {
- // Replace any newlines with spaces, otherwise it
- // breaks the output
- String commentText = comment.getString().getString().replace('\n', ' ');
- text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
- }
-
- if(ri.hasNext())
- text.append("\t");
- }
- text.append("\n");
- }
-
- // Finally footer(s), if present
- if(includeHeadersFooters) {
- text.append(
- extractHeaderFooter(sheet.getFirstFooter())
- );
- text.append(
- extractHeaderFooter(sheet.getOddFooter())
- );
- text.append(
- extractHeaderFooter(sheet.getEvenFooter())
- );
- }
- }
-
- return text.toString();
- }
-
- private void handleStringCell(StringBuffer text, Cell cell) {
- text.append(cell.getRichStringCellValue().getString());
- }
- private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) {
- int type = cell.getCellType();
- if (type == Cell.CELL_TYPE_FORMULA) {
- type = cell.getCachedFormulaResultType();
- }
-
- if (type == Cell.CELL_TYPE_NUMERIC) {
- CellStyle cs = cell.getCellStyle();
-
- if (cs.getDataFormatString() != null) {
- text.append(formatter.formatRawCellContents(
- cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString()
- ));
- return;
- }
- }
-
- // No supported styling applies to this cell
- XSSFCell xcell = (XSSFCell)cell;
- text.append( xcell.getRawValue() );
- }
-
- private String extractHeaderFooter(HeaderFooter hf) {
- return ExcelExtractor._extractHeaderFooter(hf);
- }
}