diff options
-rw-r--r-- | poi-ooxml/src/main/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java | 998 |
1 files changed, 502 insertions, 496 deletions
diff --git a/poi-ooxml/src/main/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/poi-ooxml/src/main/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 14b5046593..42dca23e30 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/poi-ooxml/src/main/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -38,12 +38,12 @@ import org.xml.sax.helpers.DefaultHandler; /** * This class handles the streaming processing of a sheet#.xml - * sheet part of a XSSF .xlsx file, and generates - * row and cell events for it. - * + * sheet part of a XSSF .xlsx file, and generates + * row and cell events for it. + * <p> * This allows to build functionality which reads huge files * without needing large amounts of main memory. - * + * <p> * See {@link SheetContentsHandler} for the interface that * you need to implement for reading information from a file. */ @@ -51,499 +51,505 @@ public class XSSFSheetXMLHandler extends DefaultHandler { private static final Logger LOG = LogManager.getLogger(XSSFSheetXMLHandler.class); /** - * These are the different kinds of cells we support. - * We keep track of the current one between - * the start and end. - */ - enum xssfDataType { - BOOLEAN, - ERROR, - FORMULA, - INLINE_STRING, - SST_STRING, - NUMBER, - } - - /** - * Table with the styles used for formatting - */ - private final Styles stylesTable; - - /** - * Table with cell comments - */ - private final Comments comments; - - /** - * Read only access to the shared strings table, for looking - * up (most) string cell's contents - */ - private final SharedStrings sharedStringsTable; - - /** - * Where our text is going - */ - private final SheetContentsHandler output; - - // Set when V start element is seen - private boolean vIsOpen; - // Set when F start element is seen - private boolean fIsOpen; - // Set when an Inline String "is" is seen - private boolean isIsOpen; - // Set when a header/footer element is seen - private boolean hfIsOpen; - - // Set when cell start element is seen; - // used when cell close element is seen. - private xssfDataType nextDataType; - - // Used to format numeric cell values. - private short formatIndex; - private String formatString; - private final DataFormatter formatter; - private int rowNum; - private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well - private String cellRef; - private final boolean formulasNotResults; - - // Gathers characters as they are seen. - private final StringBuilder value = new StringBuilder(64); - private final StringBuilder formula = new StringBuilder(64); - private final StringBuilder headerFooter = new StringBuilder(64); - - private Queue<CellAddress> commentCellRefs; - - /** - * Accepts objects needed while parsing. - * - * @param styles Table of styles - * @param strings Table of shared strings - */ - public XSSFSheetXMLHandler( - Styles styles, - Comments comments, - SharedStrings strings, - SheetContentsHandler sheetContentsHandler, - DataFormatter dataFormatter, - boolean formulasNotResults) { - this.stylesTable = styles; - this.comments = comments; - this.sharedStringsTable = strings; - this.output = sheetContentsHandler; - this.formulasNotResults = formulasNotResults; - this.nextDataType = xssfDataType.NUMBER; - this.formatter = dataFormatter; - init(comments); - } - - /** - * Accepts objects needed while parsing. - * - * @param styles Table of styles - * @param strings Table of shared strings - */ - public XSSFSheetXMLHandler( - Styles styles, - SharedStrings strings, - SheetContentsHandler sheetContentsHandler, - DataFormatter dataFormatter, - boolean formulasNotResults) { - this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults); - } - - /** - * Accepts objects needed while parsing. - * - * @param styles Table of styles - * @param strings Table of shared strings - */ - public XSSFSheetXMLHandler( - Styles styles, - SharedStrings strings, - SheetContentsHandler sheetContentsHandler, - boolean formulasNotResults) { - this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); - } - - private void init(Comments commentsTable) { - if (commentsTable != null) { - commentCellRefs = new LinkedList<>(); - for (Iterator<CellAddress> iter = commentsTable.getCellAddresses(); iter.hasNext(); ) { - commentCellRefs.add(iter.next()); - } - } - } - - private boolean isTextTag(String name) { - if("v".equals(name)) { - // Easy, normal v text tag - return true; - } - if("inlineStr".equals(name)) { - // Easy inline string - return true; - } - if("t".equals(name) && isIsOpen) { - // Inline string <is><t>...</t></is> pair - return true; - } - // It isn't a text tag - return false; - } - - @Override - @SuppressWarnings("unused") - public void startElement(String uri, String localName, String qName, - Attributes attributes) throws SAXException { - - if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { - return; - } - - if (isTextTag(localName)) { - vIsOpen = true; - // Clear contents cache - if (!isIsOpen) { - value.setLength(0); - } - } else if ("is".equals(localName)) { - // Inline string outer tag - isIsOpen = true; - } else if ("f".equals(localName)) { - // Clear contents cache - formula.setLength(0); - - // Mark us as being a formula if not already - nextDataType = xssfDataType.FORMULA; - - // Decide where to get the formula string from - String type = attributes.getValue("t"); - if(type != null && type.equals("shared")) { - // Is it the one that defines the shared, or uses it? - String ref = attributes.getValue("ref"); - String si = attributes.getValue("si"); - - if(ref != null) { - // This one defines it - // TODO Save it somewhere - fIsOpen = true; - } else { - // This one uses a shared formula - // TODO Retrieve the shared formula and tweak it to - // match the current cell - if(formulasNotResults) { - LOG.atWarn().log("shared formulas not yet supported!"); - } /*else { + * These are the different kinds of cells we support. + * We keep track of the current one between + * the start and end. + */ + enum xssfDataType { + BOOLEAN, + ERROR, + FORMULA, + INLINE_STRING, + SST_STRING, + NUMBER, + } + + /** + * Table with the styles used for formatting + */ + private final Styles stylesTable; + + /** + * Table with cell comments + */ + private final Comments comments; + + /** + * Read only access to the shared strings table, for looking + * up (most) string cell's contents + */ + private final SharedStrings sharedStringsTable; + + /** + * Where our text is going + */ + private final SheetContentsHandler output; + + // Set when V start element is seen + private boolean vIsOpen; + // Set when F start element is seen + private boolean fIsOpen; + // Set when an Inline String "is" is seen + private boolean isIsOpen; + // Set when a header/footer element is seen + private boolean hfIsOpen; + + // Set when cell start element is seen; + // used when cell close element is seen. + private xssfDataType nextDataType; + + // Used to format numeric cell values. + private short formatIndex; + private String formatString; + private final DataFormatter formatter; + private int rowNum; + private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well + private String cellRef; + private final boolean formulasNotResults; + + // Gathers characters as they are seen. + private final StringBuilder value = new StringBuilder(64); + private final StringBuilder formula = new StringBuilder(64); + private final StringBuilder headerFooter = new StringBuilder(64); + + private Queue<CellAddress> commentCellRefs; + + /** + * Accepts objects needed while parsing. + * + * @param styles Table of styles + * @param strings Table of shared strings + */ + public XSSFSheetXMLHandler( + Styles styles, + Comments comments, + SharedStrings strings, + SheetContentsHandler sheetContentsHandler, + DataFormatter dataFormatter, + boolean formulasNotResults) { + this.stylesTable = styles; + this.comments = comments; + this.sharedStringsTable = strings; + this.output = sheetContentsHandler; + this.formulasNotResults = formulasNotResults; + this.nextDataType = xssfDataType.NUMBER; + this.formatter = dataFormatter; + init(comments); + } + + /** + * Accepts objects needed while parsing. + * + * @param styles Table of styles + * @param strings Table of shared strings + */ + public XSSFSheetXMLHandler( + Styles styles, + SharedStrings strings, + SheetContentsHandler sheetContentsHandler, + DataFormatter dataFormatter, + boolean formulasNotResults) { + this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults); + } + + /** + * Accepts objects needed while parsing. + * + * @param styles Table of styles + * @param strings Table of shared strings + */ + public XSSFSheetXMLHandler( + Styles styles, + SharedStrings strings, + SheetContentsHandler sheetContentsHandler, + boolean formulasNotResults) { + this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); + } + + private void init(Comments commentsTable) { + if (commentsTable != null) { + commentCellRefs = new LinkedList<>(); + for (Iterator<CellAddress> iter = commentsTable.getCellAddresses(); iter.hasNext(); ) { + commentCellRefs.add(iter.next()); + } + } + } + + private boolean isTextTag(String name) { + if ("v".equals(name)) { + // Easy, normal v text tag + return true; + } + if ("inlineStr".equals(name)) { + // Easy inline string + return true; + } + if ("t".equals(name) && isIsOpen) { + // Inline string <is><t>...</t></is> pair + return true; + } + // It isn't a text tag + return false; + } + + @Override + @SuppressWarnings("unused") + public void startElement(String uri, String localName, String qName, + Attributes attributes) throws SAXException { + + if (uri != null && !uri.equals(NS_SPREADSHEETML)) { + return; + } + + if (isTextTag(localName)) { + vIsOpen = true; + // Clear contents cache + if (!isIsOpen) { + value.setLength(0); + } + } else if ("is".equals(localName)) { + // Inline string outer tag + isIsOpen = true; + } else if ("f".equals(localName)) { + // Clear contents cache + formula.setLength(0); + + // Mark us as being a formula if not already + nextDataType = xssfDataType.FORMULA; + + // Decide where to get the formula string from + String type = attributes.getValue("t"); + if (type != null && type.equals("shared")) { + // Is it the one that defines the shared, or uses it? + String ref = attributes.getValue("ref"); + String si = attributes.getValue("si"); + + if (ref != null) { + // This one defines it + // TODO Save it somewhere + fIsOpen = true; + } else { + // This one uses a shared formula + // TODO Retrieve the shared formula and tweak it to + // match the current cell + if (formulasNotResults) { + LOG.atWarn().log("shared formulas not yet supported!"); + } /*else { // It's a shared formula, so we can't get at the formula string yet // However, they don't care about the formula string, so that's ok! }*/ - } - } else { - fIsOpen = true; - } - } - else if("oddHeader".equals(localName) || "evenHeader".equals(localName) || - "firstHeader".equals(localName) || "firstFooter".equals(localName) || - "oddFooter".equals(localName) || "evenFooter".equals(localName)) { - hfIsOpen = true; - // Clear contents cache - headerFooter.setLength(0); - } - else if("row".equals(localName)) { - String rowNumStr = attributes.getValue("r"); - if(rowNumStr != null) { - rowNum = Integer.parseInt(rowNumStr) - 1; - } else { - rowNum = nextRowNum; - } - output.startRow(rowNum); - } - // c => cell - else if ("c".equals(localName)) { - // Set up defaults. - this.nextDataType = xssfDataType.NUMBER; - this.formatIndex = -1; - this.formatString = null; - cellRef = attributes.getValue("r"); - String cellType = attributes.getValue("t"); - String cellStyleStr = attributes.getValue("s"); - if ("b".equals(cellType)) - nextDataType = xssfDataType.BOOLEAN; - else if ("e".equals(cellType)) - nextDataType = xssfDataType.ERROR; - else if ("inlineStr".equals(cellType)) - nextDataType = xssfDataType.INLINE_STRING; - else if ("s".equals(cellType)) - nextDataType = xssfDataType.SST_STRING; - else if ("str".equals(cellType)) - nextDataType = xssfDataType.FORMULA; - else { - // Number, but almost certainly with a special style or format - XSSFCellStyle style = null; - if (stylesTable != null) { - if (cellStyleStr != null) { - int styleIndex = Integer.parseInt(cellStyleStr); - style = stylesTable.getStyleAt(styleIndex); - } else if (stylesTable.getNumCellStyles() > 0) { - style = stylesTable.getStyleAt(0); - } - } - if (style != null) { - this.formatIndex = style.getDataFormat(); - this.formatString = style.getDataFormatString(); - if (this.formatString == null) - this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex); - } - } - } - } - - @Override - public void endElement(String uri, String localName, String qName) - throws SAXException { - - if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { - return; - } - - // v => contents of a cell - if (isTextTag(localName)) { - vIsOpen = false; - - if (!isIsOpen) { - outputCell(); - value.setLength(0); - } - } else if ("f".equals(localName)) { - fIsOpen = false; - } else if ("is".equals(localName)) { - isIsOpen = false; - outputCell(); - value.setLength(0); - } else if ("row".equals(localName)) { - // Handle any "missing" cells which had comments attached - checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); - - // Finish up the row - output.endRow(rowNum); - - // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well - nextRowNum = rowNum + 1; - } else if ("sheetData".equals(localName)) { - // Handle any "missing" cells which had comments attached - checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); - - // indicate that this sheet is now done - output.endSheet(); - } - else if("oddHeader".equals(localName) || "evenHeader".equals(localName) || - "firstHeader".equals(localName)) { - hfIsOpen = false; - output.headerFooter(headerFooter.toString(), true, localName); - } - else if("oddFooter".equals(localName) || "evenFooter".equals(localName) || - "firstFooter".equals(localName)) { - hfIsOpen = false; - output.headerFooter(headerFooter.toString(), false, localName); - } - } - - /** - * Captures characters only if a suitable element is open. - * Originally was just "v"; extended for inlineStr also. - */ - @Override - public void characters(char[] ch, int start, int length) - throws SAXException { - if (vIsOpen) { - value.append(ch, start, length); - } - if (fIsOpen) { - formula.append(ch, start, length); - } - if (hfIsOpen) { - headerFooter.append(ch, start, length); - } - } - - private void outputCell() { - String thisStr = null; - - // Process the value contents as required, now we have it all - switch (nextDataType) { - case BOOLEAN: - char first = value.charAt(0); - thisStr = first == '0' ? "FALSE" : "TRUE"; - break; - - case ERROR: - thisStr = "ERROR:" + value; - break; - - case FORMULA: - if(formulasNotResults) { - thisStr = formula.toString(); - } else { - String fv = value.toString(); - - if (this.formatString != null) { - try { - // Try to use the value as a formattable number - double d = Double.parseDouble(fv); - thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); - } catch(NumberFormatException e) { - // Formula is a String result not a Numeric one - thisStr = fv; - } - } else { - // No formatting applied, just do raw value in all cases - thisStr = fv; - } - } - break; - - case INLINE_STRING: - // TODO: Can these ever have formatting on them? - XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); - thisStr = rtsi.toString(); - break; - - case SST_STRING: - String sstIndex = value.toString(); - if (sstIndex.length() > 0) { - try { - int idx = Integer.parseInt(sstIndex); - RichTextString rtss = sharedStringsTable.getItemAt(idx); - thisStr = rtss.toString(); - } catch (NumberFormatException ex) { - LOG.atError().withThrowable(ex).log("Failed to parse SST index '{}'", sstIndex); - } - } - break; - - case NUMBER: - String n = value.toString(); - if (this.formatString != null && n.length() > 0) - thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); - else - thisStr = n; - break; - - default: - thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; - break; - } - - // Do we have a comment for this cell? - checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); - XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; - - // Output - output.cell(cellRef, thisStr, comment); - } - - /** - * Do a check for, and output, comments in otherwise empty cells. - */ - private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) { - if (commentCellRefs != null && !commentCellRefs.isEmpty()) { - // If we've reached the end of the sheet data, output any - // comments we haven't yet already handled - if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) { - while (!commentCellRefs.isEmpty()) { - outputEmptyCellComment(commentCellRefs.remove()); - } - return; - } - - // At the end of a row, handle any comments for "missing" rows before us - if (this.cellRef == null) { - if (type == EmptyCellCommentsCheckType.END_OF_ROW) { - while (!commentCellRefs.isEmpty()) { - if (commentCellRefs.peek().getRow() == rowNum) { - outputEmptyCellComment(commentCellRefs.remove()); - } else { - return; - } - } - return; - } else { - throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum); - } - } - - CellAddress nextCommentCellRef; - do { - CellAddress cellRef = new CellAddress(this.cellRef); - CellAddress peekCellRef = commentCellRefs.peek(); - if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) { - // remove the comment cell ref from the list if we're about to handle it alongside the cell content - commentCellRefs.remove(); - return; - } else { - // fill in any gaps if there are empty cells with comment mixed in with non-empty cells - int comparison = peekCellRef.compareTo(cellRef); - if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) { - nextCommentCellRef = commentCellRefs.remove(); - outputEmptyCellComment(nextCommentCellRef); - } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) { - nextCommentCellRef = commentCellRefs.remove(); - outputEmptyCellComment(nextCommentCellRef); - } else { - nextCommentCellRef = null; - } - } - } while (nextCommentCellRef != null && !commentCellRefs.isEmpty()); - } - } - - - /** - * Output an empty-cell comment. - */ - private void outputEmptyCellComment(CellAddress cellRef) { - XSSFComment comment = comments.findCellComment(cellRef); - output.cell(cellRef.formatAsString(), null, comment); - } - - private enum EmptyCellCommentsCheckType { - CELL, - END_OF_ROW, - END_OF_SHEET_DATA - } - - /** - * This interface allows to provide callbacks when reading - * a sheet in streaming mode. - * - * The XSLX file is usually read via {@link XSSFReader}. - * - * By implementing the methods, you can process arbitrarily - * large files without exhausting main memory. - */ - public interface SheetContentsHandler { - /** A row with the (zero based) row number has started */ - void startRow(int rowNum); - - /** A row with the (zero based) row number has ended */ - void endRow(int rowNum); - - /** - * A cell, with the given formatted value (may be null), - * and possibly a comment (may be null), was encountered. - * - * Sheets that have missing or empty cells may result in - * sparse calls to <code>cell</code>. See the code in - * <code>poi-examples/src/main/java/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java</code> - * for an example of how to handle this scenario. - */ - void cell(String cellReference, String formattedValue, XSSFComment comment); - - /** A header or footer has been encountered */ - default void headerFooter(String text, boolean isHeader, String tagName) {} - - /** Signal that the end of a sheet was been reached */ - default void endSheet() {} - } + } + } else { + fIsOpen = true; + } + } else if ("oddHeader".equals(localName) || "evenHeader".equals(localName) || + "firstHeader".equals(localName) || "firstFooter".equals(localName) || + "oddFooter".equals(localName) || "evenFooter".equals(localName)) { + hfIsOpen = true; + // Clear contents cache + headerFooter.setLength(0); + } else if ("row".equals(localName)) { + String rowNumStr = attributes.getValue("r"); + if (rowNumStr != null) { + rowNum = Integer.parseInt(rowNumStr) - 1; + } else { + rowNum = nextRowNum; + } + output.startRow(rowNum); + } + // c => cell + else if ("c".equals(localName)) { + // Set up defaults. + this.nextDataType = xssfDataType.NUMBER; + this.formatIndex = -1; + this.formatString = null; + cellRef = attributes.getValue("r"); + String cellType = attributes.getValue("t"); + String cellStyleStr = attributes.getValue("s"); + if ("b".equals(cellType)) + nextDataType = xssfDataType.BOOLEAN; + else if ("e".equals(cellType)) + nextDataType = xssfDataType.ERROR; + else if ("inlineStr".equals(cellType)) + nextDataType = xssfDataType.INLINE_STRING; + else if ("s".equals(cellType)) + nextDataType = xssfDataType.SST_STRING; + else if ("str".equals(cellType)) + nextDataType = xssfDataType.FORMULA; + else { + // Number, but almost certainly with a special style or format + XSSFCellStyle style = null; + if (stylesTable != null) { + if (cellStyleStr != null) { + int styleIndex = Integer.parseInt(cellStyleStr); + style = stylesTable.getStyleAt(styleIndex); + } else if (stylesTable.getNumCellStyles() > 0) { + style = stylesTable.getStyleAt(0); + } + } + if (style != null) { + this.formatIndex = style.getDataFormat(); + this.formatString = style.getDataFormatString(); + if (this.formatString == null) + this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex); + } + } + } + } + + @Override + public void endElement(String uri, String localName, String qName) + throws SAXException { + + if (uri != null && !uri.equals(NS_SPREADSHEETML)) { + return; + } + + // v => contents of a cell + if (isTextTag(localName)) { + vIsOpen = false; + + if (!isIsOpen) { + outputCell(); + value.setLength(0); + } + } else if ("f".equals(localName)) { + fIsOpen = false; + } else if ("is".equals(localName)) { + isIsOpen = false; + outputCell(); + value.setLength(0); + } else if ("row".equals(localName)) { + // Handle any "missing" cells which had comments attached + checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); + + // Finish up the row + output.endRow(rowNum); + + // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well + nextRowNum = rowNum + 1; + } else if ("sheetData".equals(localName)) { + // Handle any "missing" cells which had comments attached + checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); + + // indicate that this sheet is now done + output.endSheet(); + } else if ("oddHeader".equals(localName) || "evenHeader".equals(localName) || + "firstHeader".equals(localName)) { + hfIsOpen = false; + output.headerFooter(headerFooter.toString(), true, localName); + } else if ("oddFooter".equals(localName) || "evenFooter".equals(localName) || + "firstFooter".equals(localName)) { + hfIsOpen = false; + output.headerFooter(headerFooter.toString(), false, localName); + } + } + + /** + * Captures characters only if a suitable element is open. + * Originally was just "v"; extended for inlineStr also. + */ + @Override + public void characters(char[] ch, int start, int length) + throws SAXException { + if (vIsOpen) { + value.append(ch, start, length); + } + if (fIsOpen) { + formula.append(ch, start, length); + } + if (hfIsOpen) { + headerFooter.append(ch, start, length); + } + } + + private void outputCell() { + String thisStr = null; + + // Process the value contents as required, now we have it all + switch (nextDataType) { + case BOOLEAN: + char first = value.charAt(0); + thisStr = first == '0' ? "FALSE" : "TRUE"; + break; + + case ERROR: + thisStr = "ERROR:" + value; + break; + + case FORMULA: + if (formulasNotResults) { + thisStr = formula.toString(); + } else { + String fv = value.toString(); + + if (this.formatString != null) { + try { + // Try to use the value as a formattable number + double d = Double.parseDouble(fv); + thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); + } catch (NumberFormatException e) { + // Formula is a String result not a Numeric one + thisStr = fv; + } + } else { + // No formatting applied, just do raw value in all cases + thisStr = fv; + } + } + break; + + case INLINE_STRING: + // TODO: Can these ever have formatting on them? + XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); + thisStr = rtsi.toString(); + break; + + case SST_STRING: + String sstIndex = value.toString(); + if (sstIndex.length() > 0) { + try { + int idx = Integer.parseInt(sstIndex); + RichTextString rtss = sharedStringsTable.getItemAt(idx); + thisStr = rtss.toString(); + } catch (NumberFormatException ex) { + LOG.atError().withThrowable(ex).log("Failed to parse SST index '{}'", sstIndex); + } + } + break; + + case NUMBER: + String n = value.toString(); + if (this.formatString != null && n.length() > 0) + thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); + else + thisStr = n; + break; + + default: + thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; + break; + } + + // Do we have a comment for this cell? + checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); + XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; + + // Output + output.cell(cellRef, thisStr, comment); + } + + /** + * Do a check for, and output, comments in otherwise empty cells. + */ + private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) { + if (commentCellRefs != null && !commentCellRefs.isEmpty()) { + // If we've reached the end of the sheet data, output any + // comments we haven't yet already handled + if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) { + while (!commentCellRefs.isEmpty()) { + outputEmptyCellComment(commentCellRefs.remove()); + } + return; + } + + // At the end of a row, handle any comments for "missing" rows before us + if (this.cellRef == null) { + if (type == EmptyCellCommentsCheckType.END_OF_ROW) { + while (!commentCellRefs.isEmpty()) { + if (commentCellRefs.peek().getRow() == rowNum) { + outputEmptyCellComment(commentCellRefs.remove()); + } else { + return; + } + } + return; + } else { + throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum); + } + } + + CellAddress nextCommentCellRef; + do { + CellAddress cellRef = new CellAddress(this.cellRef); + CellAddress peekCellRef = commentCellRefs.peek(); + if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) { + // remove the comment cell ref from the list if we're about to handle it alongside the cell content + commentCellRefs.remove(); + return; + } else { + // fill in any gaps if there are empty cells with comment mixed in with non-empty cells + int comparison = peekCellRef.compareTo(cellRef); + if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) { + nextCommentCellRef = commentCellRefs.remove(); + outputEmptyCellComment(nextCommentCellRef); + } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) { + nextCommentCellRef = commentCellRefs.remove(); + outputEmptyCellComment(nextCommentCellRef); + } else { + nextCommentCellRef = null; + } + } + } while (nextCommentCellRef != null && !commentCellRefs.isEmpty()); + } + } + + + /** + * Output an empty-cell comment. + */ + private void outputEmptyCellComment(CellAddress cellRef) { + XSSFComment comment = comments.findCellComment(cellRef); + output.cell(cellRef.formatAsString(), null, comment); + } + + private enum EmptyCellCommentsCheckType { + CELL, + END_OF_ROW, + END_OF_SHEET_DATA + } + + /** + * This interface allows to provide callbacks when reading + * a sheet in streaming mode. + * <p> + * The XSLX file is usually read via {@link XSSFReader}. + * <p> + * By implementing the methods, you can process arbitrarily + * large files without exhausting main memory. + */ + public interface SheetContentsHandler { + /** + * A row with the (zero based) row number has started + */ + void startRow(int rowNum); + + /** + * A row with the (zero based) row number has ended + */ + void endRow(int rowNum); + + /** + * A cell, with the given formatted value (may be null), + * and possibly a comment (may be null), was encountered. + * <p> + * Sheets that have missing or empty cells may result in + * sparse calls to <code>cell</code>. See the code in + * <code>poi-examples/src/main/java/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java</code> + * for an example of how to handle this scenario. + */ + void cell(String cellReference, String formattedValue, XSSFComment comment); + + /** + * A header or footer has been encountered + */ + default void headerFooter(String text, boolean isHeader, String tagName) { + } + + /** + * Signal that the end of a sheet was been reached + */ + default void endSheet() { + } + } } |