import java.io.IOException;
import java.io.InputStream;
+import java.util.HashMap;
import java.util.List;
import java.util.Locale;
+import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
private POIXMLProperties properties;
private Locale locale;
+ private boolean includeTextBoxes = true;
private boolean includeSheetNames = true;
+ private boolean includeHeadersFooters = true;
private boolean formulasNotResults = false;
- private boolean includeTextBoxes = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(OPCPackage.open(path));
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
-
+ /**
+ * Should headers and footers be included? Default is true
+ */
+ public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
+ this.includeHeadersFooters = includeHeadersFooters;
+ }
/**
* Should text from textboxes be included? Default is true
*/
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
StringBuffer text = new StringBuffer();
- SheetTextExtractor sheetExtractor = new SheetTextExtractor(text);
+ SheetTextExtractor sheetExtractor = new SheetTextExtractor();
while (iter.hasNext()) {
InputStream stream = iter.next();
text.append('\n');
}
processSheet(sheetExtractor, styles, strings, stream);
+ if (includeHeadersFooters) {
+ sheetExtractor.appendHeaderText(text);
+ }
+ sheetExtractor.appendCellText(text);
if (includeTextBoxes){
processShapes(iter.getShapes(), text);
}
+ if (includeHeadersFooters) {
+ sheetExtractor.appendFooterText(text);
+ }
+ sheetExtractor.reset();
stream.close();
}
protected class SheetTextExtractor implements SheetContentsHandler {
private final StringBuffer output;
- private boolean firstCellOfRow = true;
+ private boolean firstCellOfRow;
+ private final Map<String, String> headerFooterMap;
- protected SheetTextExtractor(StringBuffer output) {
- this.output = output;
+ protected SheetTextExtractor() {
+ this.output = new StringBuffer();
+ this.firstCellOfRow = true;
+ this.headerFooterMap = includeHeadersFooters ? new HashMap<String, String>() : null;
}
public void startRow(int rowNum) {
}
public void headerFooter(String text, boolean isHeader, String tagName) {
- // We don't include headers in the output yet, so ignore
+ if (headerFooterMap != null) {
+ headerFooterMap.put(tagName, text);
+ }
+ }
+
+
+ /**
+ * Append the text for the named header or footer if found.
+ */
+ private void appendHeaderFooterText(StringBuffer buffer, String name) {
+ String text = headerFooterMap.get(name);
+ if (text != null && text.length() > 0) {
+ // this is a naive way of handling the left, center, and right
+ // header and footer delimiters, but it seems to be as good as
+ // the method used by XSSFExcelExtractor
+ text = handleHeaderFooterDelimiter(text, "&L");
+ text = handleHeaderFooterDelimiter(text, "&C");
+ text = handleHeaderFooterDelimiter(text, "&R");
+ buffer.append(text).append('\n');
+ }
+ }
+ /**
+ * Remove the delimiter if its found at the beginning of the text,
+ * or replace it with a tab if its in the middle.
+ */
+ private String handleHeaderFooterDelimiter(String text, String delimiter) {
+ int index = text.indexOf(delimiter);
+ if (index == 0) {
+ text = text.substring(2);
+ } else if (index > 0) {
+ text = text.substring(0, index) + "\t" + text.substring(index + 2);
+ }
+ return text;
+ }
+
+
+ /**
+ * Append the text for each header type in the same order
+ * they are appended in XSSFExcelExtractor.
+ * @see XSSFExcelExtractor#getText()
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
+ */
+ private void appendHeaderText(StringBuffer buffer) {
+ appendHeaderFooterText(buffer, "firstHeader");
+ appendHeaderFooterText(buffer, "oddHeader");
+ appendHeaderFooterText(buffer, "evenHeader");
+ }
+
+ /**
+ * Append the text for each footer type in the same order
+ * they are appended in XSSFExcelExtractor.
+ * @see XSSFExcelExtractor#getText()
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
+ */
+ private void appendFooterText(StringBuffer buffer) {
+ // append the text for each footer type in the same order
+ // they are appended in XSSFExcelExtractor
+ appendHeaderFooterText(buffer, "firstFooter");
+ appendHeaderFooterText(buffer, "oddFooter");
+ appendHeaderFooterText(buffer, "evenFooter");
+ }
+
+ /**
+ * Append the cell contents we have collected.
+ */
+ private void appendCellText(StringBuffer buffer) {
+ buffer.append(output);
+ }
+
+ /**
+ * Reset this <code>SheetTextExtractor</code> for the next sheet.
+ */
+ private void reset() {
+ output.setLength(0);
+ firstCellOfRow = true;
+ if (headerFooterMap != null) {
+ headerFooterMap.clear();
+ }
}
}
}
fixture.close();
}
}
+
+ /**
+ * Test that we return the same output headers and footers as the
+ * non-event-based XSSFExcelExtractor.
+ */
+ public void testHeadersAndFootersComparedToNonEventBasedExtractor()
+ throws Exception {
+
+ String expectedOutputWithHeadersAndFooters =
+ "Sheet1\n" +
+ "&\"Calibri,Regular\"&K000000top left\t&\"Calibri,Regular\"&K000000top center\t&\"Calibri,Regular\"&K000000top right\n" +
+ "abc\t123\n" +
+ "&\"Calibri,Regular\"&K000000bottom left\t&\"Calibri,Regular\"&K000000bottom center\t&\"Calibri,Regular\"&K000000bottom right\n";
+
+ String expectedOutputWithoutHeadersAndFooters =
+ "Sheet1\n" +
+ "abc\t123\n";
+
+ XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.openSampleWorkbook("headerFooterTest.xlsx"));
+ assertEquals(expectedOutputWithHeadersAndFooters, extractor.getText());
+ extractor.setIncludeHeadersFooters(false);
+ assertEquals(expectedOutputWithoutHeadersAndFooters, extractor.getText());
+
+ XSSFEventBasedExcelExtractor fixture =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.openSamplePackage("headerFooterTest.xlsx"));
+ assertEquals(expectedOutputWithHeadersAndFooters, fixture.getText());
+ fixture.setIncludeHeadersFooters(false);
+ assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
+ }
}