選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

XSSFBEventBasedExcelExtractor.java 6.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.extractor;
  16. import java.io.IOException;
  17. import java.io.InputStream;
  18. import org.apache.poi.POIXMLTextExtractor;
  19. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  20. import org.apache.poi.openxml4j.opc.OPCPackage;
  21. import org.apache.poi.ss.usermodel.DataFormatter;
  22. import org.apache.poi.xssf.binary.XSSFBCommentsTable;
  23. import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
  24. import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
  25. import org.apache.poi.xssf.binary.XSSFBSheetHandler;
  26. import org.apache.poi.xssf.binary.XSSFBStylesTable;
  27. import org.apache.poi.xssf.eventusermodel.XSSFBReader;
  28. import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
  29. import org.apache.poi.xssf.usermodel.XSSFRelation;
  30. import org.apache.xmlbeans.XmlException;
  31. import org.xml.sax.SAXException;
  32. /**
  33. * Implementation of a text extractor or xlsb Excel
  34. * files that uses SAX-like binary parsing.
  35. */
  36. public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
  37. implements org.apache.poi.ss.extractor.ExcelExtractor {
  38. public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
  39. XSSFRelation.XLSB_BINARY_WORKBOOK
  40. };
  41. private boolean handleHyperlinksInCells = false;
  42. public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
  43. super(path);
  44. }
  45. public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
  46. super(container);
  47. }
  48. public static void main(String[] args) throws Exception {
  49. if (args.length < 1) {
  50. System.err.println("Use:");
  51. System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>");
  52. System.exit(1);
  53. }
  54. POIXMLTextExtractor extractor =
  55. new XSSFBEventBasedExcelExtractor(args[0]);
  56. System.out.println(extractor.getText());
  57. extractor.close();
  58. }
  59. public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
  60. this.handleHyperlinksInCells = handleHyperlinksInCells;
  61. }
  62. /**
  63. * Should we return the formula itself, and not
  64. * the result it produces? Default is false
  65. * This is currently unsupported for xssfb
  66. */
  67. @Override
  68. public void setFormulasNotResults(boolean formulasNotResults) {
  69. throw new IllegalArgumentException("Not currently supported");
  70. }
  71. /**
  72. * Processes the given sheet
  73. */
  74. public void processSheet(
  75. SheetContentsHandler sheetContentsExtractor,
  76. XSSFBStylesTable styles,
  77. XSSFBCommentsTable comments,
  78. XSSFBSharedStringsTable strings,
  79. InputStream sheetInputStream)
  80. throws IOException, SAXException {
  81. DataFormatter formatter;
  82. if (locale == null) {
  83. formatter = new DataFormatter();
  84. } else {
  85. formatter = new DataFormatter(locale);
  86. }
  87. XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
  88. sheetInputStream,
  89. styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
  90. );
  91. xssfbSheetHandler.parse();
  92. }
  93. /**
  94. * Processes the file and returns the text
  95. */
  96. public String getText() {
  97. try {
  98. XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
  99. XSSFBReader xssfbReader = new XSSFBReader(container);
  100. XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
  101. XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
  102. StringBuffer text = new StringBuffer();
  103. SheetTextExtractor sheetExtractor = new SheetTextExtractor();
  104. XSSFBHyperlinksTable hyperlinksTable = null;
  105. while (iter.hasNext()) {
  106. InputStream stream = iter.next();
  107. if (includeSheetNames) {
  108. text.append(iter.getSheetName());
  109. text.append('\n');
  110. }
  111. if (handleHyperlinksInCells) {
  112. hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
  113. }
  114. XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
  115. processSheet(sheetExtractor, styles, comments, strings, stream);
  116. if (includeHeadersFooters) {
  117. sheetExtractor.appendHeaderText(text);
  118. }
  119. sheetExtractor.appendCellText(text);
  120. if (includeTextBoxes) {
  121. processShapes(iter.getShapes(), text);
  122. }
  123. if (includeHeadersFooters) {
  124. sheetExtractor.appendFooterText(text);
  125. }
  126. sheetExtractor.reset();
  127. stream.close();
  128. }
  129. return text.toString();
  130. } catch (IOException e) {
  131. System.err.println(e);
  132. return null;
  133. } catch (SAXException se) {
  134. System.err.println(se);
  135. return null;
  136. } catch (OpenXML4JException o4je) {
  137. System.err.println(o4je);
  138. return null;
  139. }
  140. }
  141. }