aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2014-07-24 20:13:54 +0000
committerNick Burch <nick@apache.org>2014-07-24 20:13:54 +0000
commit62bd48af7491276d453df4f68c617aa17b4fc9a7 (patch)
treed636b11ce6cb81a1bc092aac5b6a45da5cfdeb4e
parentf3dba528888a0ee4f1fe10f13bdbd025755913f9 (diff)
downloadpoi-62bd48af7491276d453df4f68c617aa17b4fc9a7.tar.gz
poi-62bd48af7491276d453df4f68c617aa17b4fc9a7.zip
Patch from Shaun Kalley from bug #56023 - Allow XSSF event model to find + return comments, and use this for the event based .xlsx text extractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1613266 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java159
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java36
-rw-r--r--src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java68
-rw-r--r--test-data/spreadsheet/commentTest.xlsxbin0 -> 36701 bytes
4 files changed, 246 insertions, 17 deletions
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
index 0ebb40858a..06c9f09b9a 100644
--- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
+++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
@@ -16,13 +16,22 @@
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Queue;
+
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
+import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTComment;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
@@ -54,6 +63,15 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
*/
private StylesTable stylesTable;
+ /**
+ * Table with cell comments
+ */
+ private CommentsTable commentsTable;
+
+ /**
+ * Read only access to the shared strings table, for looking
+ * up (most) string cell's contents
+ */
private ReadOnlySharedStringsTable sharedStringsTable;
/**
@@ -78,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
+ private int rowNum;
private String cellRef;
private boolean formulasNotResults;
@@ -86,6 +105,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private StringBuffer formula = new StringBuffer();
private StringBuffer headerFooter = new StringBuffer();
+ private Queue<CellReference> commentCellRefs;
+
/**
* Accepts objects needed while parsing.
*
@@ -94,17 +115,36 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
*/
public XSSFSheetXMLHandler(
StylesTable styles,
+ CommentsTable comments,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) {
this.stylesTable = styles;
+ this.commentsTable = comments;
this.sharedStringsTable = strings;
this.output = sheetContentsHandler;
this.formulasNotResults = formulasNotResults;
this.nextDataType = xssfDataType.NUMBER;
this.formatter = dataFormatter;
+ init();
+ }
+
+ /**
+ * Accepts objects needed while parsing.
+ *
+ * @param styles Table of styles
+ * @param strings Table of shared strings
+ */
+ public XSSFSheetXMLHandler(
+ StylesTable styles,
+ ReadOnlySharedStringsTable strings,
+ SheetContentsHandler sheetContentsHandler,
+ DataFormatter dataFormatter,
+ boolean formulasNotResults) {
+ this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults);
}
+
/**
* Accepts objects needed while parsing.
*
@@ -118,6 +158,16 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
boolean formulasNotResults) {
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
}
+
+ private void init() {
+ if (commentsTable != null) {
+ commentCellRefs = new LinkedList<CellReference>();
+ List<CTComment> commentList = commentsTable.getCTComments().getCommentList().getCommentList();
+ for (CTComment comment : commentList) {
+ commentCellRefs.add(new CellReference(comment.getRef()));
+ }
+ }
+ }
private boolean isTextTag(String name) {
if("v".equals(name)) {
@@ -190,7 +240,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
headerFooter.setLength(0);
}
else if("row".equals(name)) {
- int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
+ rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
output.startRow(rowNum);
}
// c => cell
@@ -304,14 +354,25 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
break;
}
+ // Do we have a comment for this cell?
+ checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL);
+ XSSFComment comment = commentsTable != null ? commentsTable.findCellComment(cellRef) : null;
+
// Output
- output.cell(cellRef, thisStr);
+ output.cell(cellRef, thisStr, comment);
} else if ("f".equals(name)) {
fIsOpen = false;
} else if ("is".equals(name)) {
isIsOpen = false;
} else if ("row".equals(name)) {
- output.endRow();
+ // Handle any "missing" cells which had comments attached
+ checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW);
+
+ // Finish up the row
+ output.endRow(rowNum);
+ } else if ("sheetData".equals(name)) {
+ // Handle any "missing" cells which had comments attached
+ checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
}
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
"firstHeader".equals(name)) {
@@ -342,6 +403,90 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
headerFooter.append(ch, start, length);
}
}
+
+ /**
+ * Do a check for, and output, comments in otherwise empty cells.
+ */
+ private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) {
+ if (commentCellRefs != null && !commentCellRefs.isEmpty()) {
+ // If we've reached the end of the sheet data, output any
+ // comments we haven't yet already handled
+ if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) {
+ while (!commentCellRefs.isEmpty()) {
+ outputEmptyCellComment(commentCellRefs.remove());
+ }
+ return;
+ }
+
+ // At the end of a row, handle any comments for "missing" rows before us
+ if (this.cellRef == null) {
+ if (type == EmptyCellCommentsCheckType.END_OF_ROW) {
+ while (!commentCellRefs.isEmpty()) {
+ if (commentCellRefs.peek().getRow() == rowNum) {
+ outputEmptyCellComment(commentCellRefs.remove());
+ } else {
+ return;
+ }
+ }
+ return;
+ } else {
+ throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum);
+ }
+ }
+
+ CellReference nextCommentCellRef;
+ do {
+ CellReference cellRef = new CellReference(this.cellRef);
+ CellReference peekCellRef = commentCellRefs.peek();
+ if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) {
+ // remove the comment cell ref from the list if we're about to handle it alongside the cell content
+ commentCellRefs.remove();
+ return;
+ } else {
+ // fill in any gaps if there are empty cells with comment mixed in with non-empty cells
+ int comparison = cellRefComparator.compare(peekCellRef, cellRef);
+ if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) {
+ nextCommentCellRef = commentCellRefs.remove();
+ outputEmptyCellComment(nextCommentCellRef);
+ } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) {
+ nextCommentCellRef = commentCellRefs.remove();
+ outputEmptyCellComment(nextCommentCellRef);
+ } else {
+ nextCommentCellRef = null;
+ }
+ }
+ } while (nextCommentCellRef != null && !commentCellRefs.isEmpty());
+ }
+ }
+
+
+ /**
+ * Output an empty-cell comment.
+ */
+ private void outputEmptyCellComment(CellReference cellRef) {
+ String cellRefString = cellRef.formatAsString();
+ XSSFComment comment = commentsTable.findCellComment(cellRefString);
+ output.emptyCellComment(cellRefString, comment);
+ }
+
+ private enum EmptyCellCommentsCheckType {
+ CELL,
+ END_OF_ROW,
+ END_OF_SHEET_DATA
+ }
+ private static final Comparator<CellReference> cellRefComparator = new Comparator<CellReference>() {
+ @Override
+ public int compare(CellReference o1, CellReference o2) {
+ int result = compare(o1.getRow(), o2.getRow());
+ if (result == 0) {
+ result = compare(o1.getCol(), o2.getCol());
+ }
+ return result;
+ }
+ public int compare(int x, int y) {
+ return (x < y) ? -1 : ((x == y) ? 0 : 1);
+ }
+ };
/**
* You need to implement this to handle the results
@@ -351,9 +496,11 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
/** A row with the (zero based) row number has started */
public void startRow(int rowNum);
/** A row with the (zero based) row number has ended */
- public void endRow();
- /** A cell, with the given formatted value, was encountered */
- public void cell(String cellReference, String formattedValue);
+ public void endRow(int rowNum);
+ /** A cell, with the given formatted value, and possibly a comment, was encountered */
+ public void cell(String cellReference, String formattedValue, XSSFComment comment);
+ /** A comment for an otherwise-empty cell was encountered */
+ public void emptyCellComment(String cellReference, XSSFComment comment);
/** A header or footer has been encountered */
public void headerFooter(String text, boolean isHeader, String tagName);
}
diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
index 6929242f3f..c598ed2e3d 100644
--- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
@@ -39,7 +39,9 @@ import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
+import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.StylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.xmlbeans.XmlException;
@@ -60,6 +62,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
private Locale locale;
private boolean includeTextBoxes = true;
private boolean includeSheetNames = true;
+ private boolean includeCellComments = false;
private boolean includeHeadersFooters = true;
private boolean formulasNotResults = false;
@@ -112,11 +115,10 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
/**
- * Would control the inclusion of cell comments from the document,
- * if we supported it
+ * Should cell comments be included? Default is false
*/
public void setIncludeCellComments(boolean includeCellComments) {
- throw new IllegalStateException("Comment extraction not supported in streaming mode, please use XSSFExcelExtractor");
+ this.includeCellComments = includeCellComments;
}
public void setLocale(Locale locale) {
@@ -159,6 +161,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
public void processSheet(
SheetContentsHandler sheetContentsExtractor,
StylesTable styles,
+ CommentsTable comments,
ReadOnlySharedStringsTable strings,
InputStream sheetInputStream)
throws IOException, SAXException {
@@ -176,7 +179,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
- styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
+ styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
@@ -203,7 +206,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
text.append(iter.getSheetName());
text.append('\n');
}
- processSheet(sheetExtractor, styles, strings, stream);
+ CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
+ processSheet(sheetExtractor, styles, comments, strings, stream);
if (includeHeadersFooters) {
sheetExtractor.appendHeaderText(text);
}
@@ -268,17 +272,32 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
firstCellOfRow = true;
}
- public void endRow() {
+ public void endRow(int rowNum) {
output.append('\n');
}
- public void cell(String cellRef, String formattedValue) {
+ public void cell(String cellRef, String formattedValue, XSSFComment comment) {
if(firstCellOfRow) {
firstCellOfRow = false;
} else {
output.append('\t');
}
- output.append(formattedValue);
+ if (formattedValue != null) {
+ output.append(formattedValue);
+ }
+ if (includeCellComments && comment != null) {
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ output.append(formattedValue != null ? " Comment by " : "Comment by ");
+ if (commentText.startsWith(comment.getAuthor() + ": ")) {
+ output.append(commentText);
+ } else {
+ output.append(comment.getAuthor()).append(": ").append(commentText);
+ }
+ }
+ }
+
+ public void emptyCellComment(String cellRef, XSSFComment comment) {
+ cell(cellRef, null, comment);
}
public void headerFooter(String text, boolean isHeader, String tagName) {
@@ -287,7 +306,6 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
}
-
/**
* Append the text for the named header or footer if found.
*/
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
index 98aeb627f2..80c1f116b5 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
@@ -20,13 +20,13 @@ package org.apache.poi.xssf.extractor;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import junit.framework.TestCase;
-
import org.apache.poi.POITextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.xssf.XSSFTestDataSamples;
+import junit.framework.TestCase;
+
/**
* Tests for {@link XSSFEventBasedExcelExtractor}
*/
@@ -240,4 +240,68 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
fixture.setIncludeHeadersFooters(false);
assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
}
+
+ /**
+ * Test that XSSFEventBasedExcelExtractor outputs comments when specified.
+ * The output will contain two improvements over the output from
+ * XSSFExcelExtractor in that (1) comments from empty cells will be
+ * outputted, and (2) the author will not be outputted twice.
+ * <p>
+ * This test will need to be modified if these improvements are ported to
+ * XSSFExcelExtractor.
+ */
+ public void testCommentsComparedToNonEventBasedExtractor()
+ throws Exception {
+
+ String expectedOutputWithoutComments =
+ "Sheet1\n" +
+ "\n" +
+ "abc\n" +
+ "\n" +
+ "123\n" +
+ "\n" +
+ "\n" +
+ "\n";
+
+ String nonEventBasedExtractorOutputWithComments =
+ "Sheet1\n" +
+ "\n" +
+ "abc Comment by Shaun Kalley: Shaun Kalley: Comment A2\n" +
+ "\n" +
+ "123 Comment by Shaun Kalley: Shaun Kalley: Comment B4\n" +
+ "\n" +
+ "\n" +
+ "\n";
+
+ String eventBasedExtractorOutputWithComments =
+ "Sheet1\n" +
+ "Comment by Shaun Kalley: Comment A1\tComment by Shaun Kalley: Comment B1\n" +
+ "abc Comment by Shaun Kalley: Comment A2\tComment by Shaun Kalley: Comment B2\n" +
+ "Comment by Shaun Kalley: Comment A3\tComment by Shaun Kalley: Comment B3\n" +
+ "Comment by Shaun Kalley: Comment A4\t123 Comment by Shaun Kalley: Comment B4\n" +
+ "Comment by Shaun Kalley: Comment A5\tComment by Shaun Kalley: Comment B5\n" +
+ "Comment by Shaun Kalley: Comment A7\tComment by Shaun Kalley: Comment B7\n" +
+ "Comment by Shaun Kalley: Comment A8\tComment by Shaun Kalley: Comment B8\n";
+
+ XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.openSampleWorkbook("commentTest.xlsx"));
+ try {
+ assertEquals(expectedOutputWithoutComments, extractor.getText());
+ extractor.setIncludeCellComments(true);
+ assertEquals(nonEventBasedExtractorOutputWithComments, extractor.getText());
+ } finally {
+ extractor.close();
+ }
+
+ XSSFEventBasedExcelExtractor fixture =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.openSamplePackage("commentTest.xlsx"));
+ try {
+ assertEquals(expectedOutputWithoutComments, fixture.getText());
+ fixture.setIncludeCellComments(true);
+ assertEquals(eventBasedExtractorOutputWithComments, fixture.getText());
+ } finally {
+ fixture.close();
+ }
+ }
}
diff --git a/test-data/spreadsheet/commentTest.xlsx b/test-data/spreadsheet/commentTest.xlsx
new file mode 100644
index 0000000000..10e7837d64
--- /dev/null
+++ b/test-data/spreadsheet/commentTest.xlsx
Binary files differ