]> source.dussan.org Git - poi.git/commitdiff
Patch from Shaun Kalley from bug #56022 - XSSF Event Text Extractor header/footer...
authorNick Burch <nick@apache.org>
Sun, 2 Feb 2014 16:39:53 +0000 (16:39 +0000)
committerNick Burch <nick@apache.org>
Sun, 2 Feb 2014 16:39:53 +0000 (16:39 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1563657 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
test-data/spreadsheet/headerFooterTest.xlsx [new file with mode: 0644]

index 0bf911341c8fea94fb3d41f44d12d1cf8afa4007..3f7e5a392e8dfc0970881afbe2aaf54a0bec65ad 100644 (file)
@@ -18,8 +18,10 @@ package org.apache.poi.xssf.extractor;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
+import java.util.Map;
 
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
@@ -56,9 +58,10 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     private POIXMLProperties properties;
 
     private Locale locale;
+    private boolean includeTextBoxes = true;
     private boolean includeSheetNames = true;
+    private boolean includeHeadersFooters = true;
     private boolean formulasNotResults = false;
-    private boolean includeTextBoxes = true;
 
     public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
         this(OPCPackage.open(path));
@@ -94,7 +97,12 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     public void setFormulasNotResults(boolean formulasNotResults) {
         this.formulasNotResults = formulasNotResults;
     }
-
+    /**
+     * Should headers and footers be included? Default is true
+     */
+    public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
+        this.includeHeadersFooters = includeHeadersFooters;
+    }
     /**
      * Should text from textboxes be included? Default is true
      */
@@ -186,7 +194,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
           XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
    
           StringBuffer text = new StringBuffer();
-          SheetTextExtractor sheetExtractor = new SheetTextExtractor(text);
+          SheetTextExtractor sheetExtractor = new SheetTextExtractor();
           
           while (iter.hasNext()) {
               InputStream stream = iter.next();
@@ -195,9 +203,17 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
                  text.append('\n');
               }
               processSheet(sheetExtractor, styles, strings, stream);
+              if (includeHeadersFooters) {
+                  sheetExtractor.appendHeaderText(text);
+              }
+              sheetExtractor.appendCellText(text);
               if (includeTextBoxes){
                   processShapes(iter.getShapes(), text);
               }
+              if (includeHeadersFooters) {
+                  sheetExtractor.appendFooterText(text);
+              }
+              sheetExtractor.reset();
               stream.close();
           }
           
@@ -238,10 +254,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
 
    protected class SheetTextExtractor implements SheetContentsHandler {
       private final StringBuffer output;
-      private boolean firstCellOfRow = true;
+      private boolean firstCellOfRow;
+      private final Map<String, String> headerFooterMap;
       
-      protected SheetTextExtractor(StringBuffer output) {
-         this.output = output;
+      protected SheetTextExtractor() {
+         this.output = new StringBuffer();
+         this.firstCellOfRow = true;
+         this.headerFooterMap = includeHeadersFooters ? new HashMap<String, String>() : null;
       }
       
       public void startRow(int rowNum) {
@@ -262,7 +281,84 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
       }
       
       public void headerFooter(String text, boolean isHeader, String tagName) {
-         // We don't include headers in the output yet, so ignore
+          if (headerFooterMap != null) {
+              headerFooterMap.put(tagName, text);
+          }
+      }
+      
+      
+      /**
+       * Append the text for the named header or footer if found.
+       */
+      private void appendHeaderFooterText(StringBuffer buffer, String name) {
+          String text = headerFooterMap.get(name);
+          if (text != null && text.length() > 0) {
+              // this is a naive way of handling the left, center, and right
+              // header and footer delimiters, but it seems to be as good as
+              // the method used by XSSFExcelExtractor
+              text = handleHeaderFooterDelimiter(text, "&L");
+              text = handleHeaderFooterDelimiter(text, "&C");
+              text = handleHeaderFooterDelimiter(text, "&R");
+              buffer.append(text).append('\n');
+          }
+      }
+      /**
+       * Remove the delimiter if its found at the beginning of the text,
+       * or replace it with a tab if its in the middle.
+       */
+      private String handleHeaderFooterDelimiter(String text, String delimiter) {
+          int index = text.indexOf(delimiter);
+          if (index == 0) {
+              text = text.substring(2);
+          } else if (index > 0) {
+              text = text.substring(0, index) + "\t" + text.substring(index + 2);
+          }
+          return text;
+      }
+
+      
+      /**
+       * Append the text for each header type in the same order
+       * they are appended in XSSFExcelExtractor.
+       * @see XSSFExcelExtractor#getText()
+       * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
+       */
+      private void appendHeaderText(StringBuffer buffer) {
+          appendHeaderFooterText(buffer, "firstHeader");
+          appendHeaderFooterText(buffer, "oddHeader");
+          appendHeaderFooterText(buffer, "evenHeader");
+      }
+      
+      /**
+       * Append the text for each footer type in the same order
+       * they are appended in XSSFExcelExtractor.
+       * @see XSSFExcelExtractor#getText()
+       * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
+       */
+      private void appendFooterText(StringBuffer buffer) {
+          // append the text for each footer type in the same order
+          // they are appended in XSSFExcelExtractor
+          appendHeaderFooterText(buffer, "firstFooter");
+          appendHeaderFooterText(buffer, "oddFooter");
+          appendHeaderFooterText(buffer, "evenFooter");
+      }
+
+      /**
+       * Append the cell contents we have collected.
+       */
+      private void appendCellText(StringBuffer buffer) {
+          buffer.append(output);
+      }
+      
+      /**
+       * Reset this <code>SheetTextExtractor</code> for the next sheet.
+       */
+      private void reset() {
+          output.setLength(0);
+          firstCellOfRow = true;
+          if (headerFooterMap != null) {
+              headerFooterMap.clear();
+          }
       }
    }
 }
index 0fb28ef9dcfb6bb5844df294850f3ec8e246240f..98aeb627f23a54f9818638ef28bbcfe7497efee7 100644 (file)
@@ -209,4 +209,35 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
             fixture.close();
         }
     }
+
+    /**
+     * Test that we return the same output headers and footers as the
+     * non-event-based XSSFExcelExtractor.
+     */
+    public void testHeadersAndFootersComparedToNonEventBasedExtractor()
+        throws Exception {
+
+        String expectedOutputWithHeadersAndFooters =
+                "Sheet1\n" +
+                "&\"Calibri,Regular\"&K000000top left\t&\"Calibri,Regular\"&K000000top center\t&\"Calibri,Regular\"&K000000top right\n" +
+                "abc\t123\n" +
+                "&\"Calibri,Regular\"&K000000bottom left\t&\"Calibri,Regular\"&K000000bottom center\t&\"Calibri,Regular\"&K000000bottom right\n";
+
+        String expectedOutputWithoutHeadersAndFooters =
+                "Sheet1\n" +
+                "abc\t123\n";
+
+        XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+                XSSFTestDataSamples.openSampleWorkbook("headerFooterTest.xlsx"));
+        assertEquals(expectedOutputWithHeadersAndFooters, extractor.getText());
+        extractor.setIncludeHeadersFooters(false);
+        assertEquals(expectedOutputWithoutHeadersAndFooters, extractor.getText());
+
+        XSSFEventBasedExcelExtractor fixture =
+                new XSSFEventBasedExcelExtractor(
+                        XSSFTestDataSamples.openSamplePackage("headerFooterTest.xlsx"));
+        assertEquals(expectedOutputWithHeadersAndFooters, fixture.getText());
+        fixture.setIncludeHeadersFooters(false);
+        assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
+    }
 }
diff --git a/test-data/spreadsheet/headerFooterTest.xlsx b/test-data/spreadsheet/headerFooterTest.xlsx
new file mode 100644 (file)
index 0000000..2c72125
Binary files /dev/null and b/test-data/spreadsheet/headerFooterTest.xlsx differ