]> source.dussan.org Git - poi.git/commitdiff
Patch from Shaun Kalley from bug #56023 - Allow XSSF event model to find + return...
authorNick Burch <nick@apache.org>
Thu, 24 Jul 2014 20:13:54 +0000 (20:13 +0000)
committerNick Burch <nick@apache.org>
Thu, 24 Jul 2014 20:13:54 +0000 (20:13 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1613266 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
test-data/spreadsheet/commentTest.xlsx [new file with mode: 0644]

index 0ebb40858a97d851d9e0796581e016016d7c3b85..06c9f09b9a2050dd59d96631190161630d776c60 100644 (file)
 ==================================================================== */
 package org.apache.poi.xssf.eventusermodel;
 
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Queue;
+
 import org.apache.poi.ss.usermodel.BuiltinFormats;
 import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.util.CellReference;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
+import org.apache.poi.xssf.model.CommentsTable;
 import org.apache.poi.xssf.model.StylesTable;
 import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFComment;
 import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTComment;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
@@ -54,6 +63,15 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
     */
    private StylesTable stylesTable;
 
+   /**
+    * Table with cell comments
+    */
+   private CommentsTable commentsTable;
+
+   /**
+    * Read only access to the shared strings table, for looking
+    *  up (most) string cell's contents
+    */
    private ReadOnlySharedStringsTable sharedStringsTable;
 
    /**
@@ -78,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
    private short formatIndex;
    private String formatString;
    private final DataFormatter formatter;
+   private int rowNum;
    private String cellRef;
    private boolean formulasNotResults;
 
@@ -86,6 +105,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
    private StringBuffer formula = new StringBuffer();
    private StringBuffer headerFooter = new StringBuffer();
 
+   private Queue<CellReference> commentCellRefs;
+
    /**
     * Accepts objects needed while parsing.
     *
@@ -94,17 +115,36 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
     */
    public XSSFSheetXMLHandler(
            StylesTable styles,
+           CommentsTable comments,
            ReadOnlySharedStringsTable strings,
            SheetContentsHandler sheetContentsHandler,
            DataFormatter dataFormatter,
            boolean formulasNotResults) {
        this.stylesTable = styles;
+       this.commentsTable = comments;
        this.sharedStringsTable = strings;
        this.output = sheetContentsHandler;
        this.formulasNotResults = formulasNotResults;
        this.nextDataType = xssfDataType.NUMBER;
        this.formatter = dataFormatter;
+       init();
+   }
+   
+   /**
+    * Accepts objects needed while parsing.
+    *
+    * @param styles  Table of styles
+    * @param strings Table of shared strings
+    */
+   public XSSFSheetXMLHandler(
+           StylesTable styles,
+           ReadOnlySharedStringsTable strings,
+           SheetContentsHandler sheetContentsHandler,
+           DataFormatter dataFormatter,
+           boolean formulasNotResults) {
+       this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults);
    }
+   
    /**
     * Accepts objects needed while parsing.
     *
@@ -118,6 +158,16 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
            boolean formulasNotResults) {
        this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
    }
+   
+   private void init() {
+       if (commentsTable != null) {
+           commentCellRefs = new LinkedList<CellReference>();
+           List<CTComment> commentList = commentsTable.getCTComments().getCommentList().getCommentList();
+           for (CTComment comment : commentList) {
+               commentCellRefs.add(new CellReference(comment.getRef()));
+           }
+       }   
+   }
 
    private boolean isTextTag(String name) {
       if("v".equals(name)) {
@@ -190,7 +240,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
           headerFooter.setLength(0);
        }
        else if("row".equals(name)) {
-           int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
+           rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
            output.startRow(rowNum);
        }
        // c => cell
@@ -304,14 +354,25 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
                    break;
            }
            
+           // Do we have a comment for this cell?
+           checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL);
+           XSSFComment comment = commentsTable != null ? commentsTable.findCellComment(cellRef) : null;
+           
            // Output
-           output.cell(cellRef, thisStr);
+           output.cell(cellRef, thisStr, comment);
        } else if ("f".equals(name)) {
           fIsOpen = false;
        } else if ("is".equals(name)) {
           isIsOpen = false;
        } else if ("row".equals(name)) {
-          output.endRow();
+          // Handle any "missing" cells which had comments attached
+          checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW);
+          
+          // Finish up the row
+          output.endRow(rowNum);
+       } else if ("sheetData".equals(name)) {
+           // Handle any "missing" cells which had comments attached
+           checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
        }
        else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
              "firstHeader".equals(name)) {
@@ -342,6 +403,90 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
           headerFooter.append(ch, start, length);
        }
    }
+   
+   /**
+    * Do a check for, and output, comments in otherwise empty cells.
+    */
+   private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) {
+       if (commentCellRefs != null && !commentCellRefs.isEmpty()) {
+           // If we've reached the end of the sheet data, output any
+           //  comments we haven't yet already handled
+           if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) {
+               while (!commentCellRefs.isEmpty()) {
+                   outputEmptyCellComment(commentCellRefs.remove());
+               }
+               return;
+           }
+
+           // At the end of a row, handle any comments for "missing" rows before us
+           if (this.cellRef == null) {
+               if (type == EmptyCellCommentsCheckType.END_OF_ROW) {
+                   while (!commentCellRefs.isEmpty()) {
+                       if (commentCellRefs.peek().getRow() == rowNum) {
+                           outputEmptyCellComment(commentCellRefs.remove());
+                       } else {
+                           return;
+                       }
+                   }
+                   return;
+               } else {
+                   throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum);
+               }
+           }
+
+           CellReference nextCommentCellRef;
+           do {
+               CellReference cellRef = new CellReference(this.cellRef);
+               CellReference peekCellRef = commentCellRefs.peek();
+               if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) {
+                   // remove the comment cell ref from the list if we're about to handle it alongside the cell content
+                   commentCellRefs.remove();
+                   return;
+               } else {
+                   // fill in any gaps if there are empty cells with comment mixed in with non-empty cells
+                   int comparison = cellRefComparator.compare(peekCellRef, cellRef);
+                   if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) {
+                       nextCommentCellRef = commentCellRefs.remove();
+                       outputEmptyCellComment(nextCommentCellRef);
+                   } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) {
+                       nextCommentCellRef = commentCellRefs.remove();
+                       outputEmptyCellComment(nextCommentCellRef);
+                   } else {
+                       nextCommentCellRef = null;
+                   }
+               }
+           } while (nextCommentCellRef != null && !commentCellRefs.isEmpty());
+       }
+   }
+
+
+   /**
+    * Output an empty-cell comment.
+    */
+   private void outputEmptyCellComment(CellReference cellRef) {
+       String cellRefString = cellRef.formatAsString();
+       XSSFComment comment = commentsTable.findCellComment(cellRefString);
+       output.emptyCellComment(cellRefString, comment);
+   }
+   
+   private enum EmptyCellCommentsCheckType {
+       CELL,
+       END_OF_ROW,
+       END_OF_SHEET_DATA
+   }
+   private static final Comparator<CellReference> cellRefComparator = new Comparator<CellReference>() {
+       @Override
+       public int compare(CellReference o1, CellReference o2) {
+           int result = compare(o1.getRow(), o2.getRow());
+           if (result == 0) {
+               result = compare(o1.getCol(), o2.getCol());
+           }
+           return result;
+       }
+       public int compare(int x, int y) {
+           return (x < y) ? -1 : ((x == y) ? 0 : 1);
+       }
+   };
 
    /**
     * You need to implement this to handle the results
@@ -351,9 +496,11 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
       /** A row with the (zero based) row number has started */
       public void startRow(int rowNum);
       /** A row with the (zero based) row number has ended */
-      public void endRow();
-      /** A cell, with the given formatted value, was encountered */
-      public void cell(String cellReference, String formattedValue);
+      public void endRow(int rowNum);
+      /** A cell, with the given formatted value, and possibly a comment, was encountered */
+      public void cell(String cellReference, String formattedValue, XSSFComment comment);
+      /** A comment for an otherwise-empty cell was encountered */
+      public void emptyCellComment(String cellReference, XSSFComment comment);
       /** A header or footer has been encountered */
       public void headerFooter(String text, boolean isHeader, String tagName);
    }
index 6929242f3f1a16f5376acb4b85eccb5bc5ef9caa..c598ed2e3dc5423bcc2c633629251975b741bdb6 100644 (file)
@@ -39,7 +39,9 @@ import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
 import org.apache.poi.xssf.eventusermodel.XSSFReader;
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
+import org.apache.poi.xssf.model.CommentsTable;
 import org.apache.poi.xssf.model.StylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
 import org.apache.poi.xssf.usermodel.XSSFShape;
 import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
 import org.apache.xmlbeans.XmlException;
@@ -60,6 +62,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     private Locale locale;
     private boolean includeTextBoxes = true;
     private boolean includeSheetNames = true;
+    private boolean includeCellComments = false;
     private boolean includeHeadersFooters = true;
     private boolean formulasNotResults = false;
 
@@ -112,11 +115,10 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     }
 
     /**
-     * Would control the inclusion of cell comments from the document,
-     *  if we supported it
+     * Should cell comments be included? Default is false
      */
     public void setIncludeCellComments(boolean includeCellComments) {
-        throw new IllegalStateException("Comment extraction not supported in streaming mode, please use XSSFExcelExtractor");
+        this.includeCellComments = includeCellComments;
     }
 
     public void setLocale(Locale locale) {
@@ -159,6 +161,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     public void processSheet(
             SheetContentsHandler sheetContentsExtractor,
             StylesTable styles,
+            CommentsTable comments,
             ReadOnlySharedStringsTable strings,
             InputStream sheetInputStream)
             throws IOException, SAXException {
@@ -176,7 +179,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
           SAXParser saxParser = saxFactory.newSAXParser();
           XMLReader sheetParser = saxParser.getXMLReader();
           ContentHandler handler = new XSSFSheetXMLHandler(
-                styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
+                styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
           sheetParser.setContentHandler(handler);
           sheetParser.parse(sheetSource);
        } catch(ParserConfigurationException e) {
@@ -203,7 +206,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
                  text.append(iter.getSheetName());
                  text.append('\n');
               }
-              processSheet(sheetExtractor, styles, strings, stream);
+              CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
+              processSheet(sheetExtractor, styles, comments, strings, stream);
               if (includeHeadersFooters) {
                   sheetExtractor.appendHeaderText(text);
               }
@@ -268,17 +272,32 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
             firstCellOfRow = true;
         }
 
-        public void endRow() {
+        public void endRow(int rowNum) {
             output.append('\n');
         }
 
-        public void cell(String cellRef, String formattedValue) {
+        public void cell(String cellRef, String formattedValue, XSSFComment comment) {
             if(firstCellOfRow) {
                 firstCellOfRow = false;
             } else {
                 output.append('\t');
             }
-            output.append(formattedValue);
+            if (formattedValue != null) {
+                output.append(formattedValue);
+            }
+            if (includeCellComments && comment != null) {
+                String commentText = comment.getString().getString().replace('\n', ' ');
+                output.append(formattedValue != null ? " Comment by " : "Comment by ");
+                if (commentText.startsWith(comment.getAuthor() + ": ")) {
+                    output.append(commentText);
+                } else {
+                    output.append(comment.getAuthor()).append(": ").append(commentText);
+                }
+            }
+        }
+
+        public void emptyCellComment(String cellRef, XSSFComment comment) {
+            cell(cellRef, null, comment);
         }
 
         public void headerFooter(String text, boolean isHeader, String tagName) {
@@ -287,7 +306,6 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
             }
         }
 
-
         /**
          * Append the text for the named header or footer if found.
          */
index 98aeb627f23a54f9818638ef28bbcfe7497efee7..80c1f116b5aed3b7c192e5f6add606fe61db9969 100644 (file)
@@ -20,13 +20,13 @@ package org.apache.poi.xssf.extractor;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import junit.framework.TestCase;
-
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.xssf.XSSFTestDataSamples;
 
+import junit.framework.TestCase;
+
 /**
  * Tests for {@link XSSFEventBasedExcelExtractor}
  */
@@ -240,4 +240,68 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
         fixture.setIncludeHeadersFooters(false);
         assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
     }
+
+    /**
+      * Test that XSSFEventBasedExcelExtractor outputs comments when specified.
+      * The output will contain two improvements over the output from
+     *  XSSFExcelExtractor in that (1) comments from empty cells will be
+      * outputted, and (2) the author will not be outputted twice.
+      * <p>
+      * This test will need to be modified if these improvements are ported to
+      * XSSFExcelExtractor.
+      */
+    public void testCommentsComparedToNonEventBasedExtractor()
+        throws Exception {
+
+        String expectedOutputWithoutComments =
+                "Sheet1\n" +
+                "\n" +
+                "abc\n" +
+                "\n" +
+                "123\n" +
+                "\n" +
+                "\n" +
+                "\n";
+
+        String nonEventBasedExtractorOutputWithComments =
+                "Sheet1\n" +
+                "\n" +
+                "abc Comment by Shaun Kalley: Shaun Kalley: Comment A2\n" +
+                "\n" +
+                "123 Comment by Shaun Kalley: Shaun Kalley: Comment B4\n" +
+                "\n" +
+                "\n" +
+                "\n";
+
+        String eventBasedExtractorOutputWithComments =
+                "Sheet1\n" +
+                "Comment by Shaun Kalley: Comment A1\tComment by Shaun Kalley: Comment B1\n" +
+                "abc Comment by Shaun Kalley: Comment A2\tComment by Shaun Kalley: Comment B2\n" +
+                "Comment by Shaun Kalley: Comment A3\tComment by Shaun Kalley: Comment B3\n" +
+                "Comment by Shaun Kalley: Comment A4\t123 Comment by Shaun Kalley: Comment B4\n" +
+                "Comment by Shaun Kalley: Comment A5\tComment by Shaun Kalley: Comment B5\n" +
+                "Comment by Shaun Kalley: Comment A7\tComment by Shaun Kalley: Comment B7\n" +
+                "Comment by Shaun Kalley: Comment A8\tComment by Shaun Kalley: Comment B8\n";
+
+        XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+                XSSFTestDataSamples.openSampleWorkbook("commentTest.xlsx"));
+        try {
+            assertEquals(expectedOutputWithoutComments, extractor.getText());
+            extractor.setIncludeCellComments(true);
+            assertEquals(nonEventBasedExtractorOutputWithComments, extractor.getText());
+        } finally {
+            extractor.close();
+        }
+
+        XSSFEventBasedExcelExtractor fixture =
+                new XSSFEventBasedExcelExtractor(
+                        XSSFTestDataSamples.openSamplePackage("commentTest.xlsx"));
+        try {
+            assertEquals(expectedOutputWithoutComments, fixture.getText());
+            fixture.setIncludeCellComments(true);
+            assertEquals(eventBasedExtractorOutputWithComments, fixture.getText());
+        } finally {
+            fixture.close();
+        }
+    }
 }
diff --git a/test-data/spreadsheet/commentTest.xlsx b/test-data/spreadsheet/commentTest.xlsx
new file mode 100644 (file)
index 0000000..10e7837
Binary files /dev/null and b/test-data/spreadsheet/commentTest.xlsx differ