Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

XSSFBEventBasedExcelExtractor.java 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xssf.extractor;
  16. import java.io.IOException;
  17. import java.io.InputStream;
  18. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  19. import org.apache.poi.openxml4j.opc.OPCPackage;
  20. import org.apache.poi.ss.usermodel.DataFormatter;
  21. import org.apache.poi.util.POILogFactory;
  22. import org.apache.poi.util.POILogger;
  23. import org.apache.poi.xssf.binary.XSSFBCommentsTable;
  24. import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
  25. import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
  26. import org.apache.poi.xssf.binary.XSSFBSheetHandler;
  27. import org.apache.poi.xssf.binary.XSSFBStylesTable;
  28. import org.apache.poi.xssf.eventusermodel.XSSFBReader;
  29. import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
  30. import org.apache.poi.xssf.model.SharedStrings;
  31. import org.apache.poi.xssf.usermodel.XSSFRelation;
  32. import org.apache.xmlbeans.XmlException;
  33. import org.xml.sax.SAXException;
  34. /**
  35. * Implementation of a text extractor or xlsb Excel
  36. * files that uses SAX-like binary parsing.
  37. *
  38. * @since 3.16-beta3
  39. */
  40. public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor {
  41. private static final POILogger LOGGER = POILogFactory.getLogger(XSSFBEventBasedExcelExtractor.class);
  42. public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[]{
  43. XSSFRelation.XLSB_BINARY_WORKBOOK
  44. };
  45. private boolean handleHyperlinksInCells;
  46. public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
  47. super(path);
  48. }
  49. public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
  50. super(container);
  51. }
  52. public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
  53. this.handleHyperlinksInCells = handleHyperlinksInCells;
  54. }
  55. /**
  56. * Should we return the formula itself, and not
  57. * the result it produces? Default is false
  58. * This is currently unsupported for xssfb
  59. */
  60. @Override
  61. public void setFormulasNotResults(boolean formulasNotResults) {
  62. throw new IllegalArgumentException("Not currently supported");
  63. }
  64. /**
  65. * Processes the given sheet
  66. */
  67. public void processSheet(
  68. SheetContentsHandler sheetContentsExtractor,
  69. XSSFBStylesTable styles,
  70. XSSFBCommentsTable comments,
  71. SharedStrings strings,
  72. InputStream sheetInputStream)
  73. throws IOException {
  74. DataFormatter formatter;
  75. if (getLocale() == null) {
  76. formatter = new DataFormatter();
  77. } else {
  78. formatter = new DataFormatter(getLocale());
  79. }
  80. XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
  81. sheetInputStream,
  82. styles, comments, strings, sheetContentsExtractor, formatter, getFormulasNotResults()
  83. );
  84. xssfbSheetHandler.parse();
  85. }
  86. /**
  87. * Processes the file and returns the text
  88. */
  89. public String getText() {
  90. try {
  91. XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(getPackage());
  92. XSSFBReader xssfbReader = new XSSFBReader(getPackage());
  93. XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
  94. XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
  95. StringBuilder text = new StringBuilder(64);
  96. SheetTextExtractor sheetExtractor = new SheetTextExtractor();
  97. XSSFBHyperlinksTable hyperlinksTable = null;
  98. while (iter.hasNext()) {
  99. InputStream stream = iter.next();
  100. if (getIncludeSheetNames()) {
  101. text.append(iter.getSheetName());
  102. text.append('\n');
  103. }
  104. if (handleHyperlinksInCells) {
  105. hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
  106. }
  107. XSSFBCommentsTable comments = getIncludeCellComments() ? iter.getXSSFBSheetComments() : null;
  108. processSheet(sheetExtractor, styles, comments, strings, stream);
  109. if (getIncludeHeadersFooters()) {
  110. sheetExtractor.appendHeaderText(text);
  111. }
  112. sheetExtractor.appendCellText(text);
  113. if (getIncludeTextBoxes()) {
  114. processShapes(iter.getShapes(), text);
  115. }
  116. if (getIncludeHeadersFooters()) {
  117. sheetExtractor.appendFooterText(text);
  118. }
  119. sheetExtractor.reset();
  120. stream.close();
  121. }
  122. return text.toString();
  123. } catch (IOException | OpenXML4JException | SAXException e) {
  124. LOGGER.log(POILogger.WARN, e);
  125. return null;
  126. }
  127. }
  128. }