You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

POIOLE2TextExtractor.java 2.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.extractor;
  16. import org.apache.poi.POIDocument;
  17. import org.apache.poi.hpsf.DocumentSummaryInformation;
  18. import org.apache.poi.hpsf.SummaryInformation;
  19. import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
  20. import org.apache.poi.poifs.filesystem.DirectoryEntry;
  21. /**
  22. * Common Parent for OLE2 based Text Extractors
  23. * of POI Documents, such as .doc, .xls
  24. * You will typically find the implementation of
  25. * a given format's text extractor under
  26. * org.apache.poi.[format].extractor .
  27. *
  28. * @see org.apache.poi.hssf.extractor.ExcelExtractor
  29. * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
  30. * @see org.apache.poi.hwpf.extractor.WordExtractor
  31. */
  32. public interface POIOLE2TextExtractor extends POITextExtractor {
  33. /**
  34. * Returns the document information metadata for the document
  35. *
  36. * @return The Document Summary Information or null
  37. * if it could not be read for this document.
  38. */
  39. default DocumentSummaryInformation getDocSummaryInformation() {
  40. return getDocument().getDocumentSummaryInformation();
  41. }
  42. /**
  43. * Returns the summary information metadata for the document.
  44. *
  45. * @return The Summary information for the document or null
  46. * if it could not be read for this document.
  47. */
  48. default SummaryInformation getSummaryInformation() {
  49. return getDocument().getSummaryInformation();
  50. }
  51. /**
  52. * Returns an HPSF powered text extractor for the
  53. * document properties metadata, such as title and author.
  54. *
  55. * @return an instance of POIExtractor that can extract meta-data.
  56. */
  57. @Override
  58. default POITextExtractor getMetadataTextExtractor() {
  59. return new HPSFPropertiesExtractor(this);
  60. }
  61. /**
  62. * Return the underlying DirectoryEntry of this document.
  63. *
  64. * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
  65. */
  66. default DirectoryEntry getRoot() {
  67. return getDocument().getDirectory();
  68. }
  69. /**
  70. * Return the underlying POIDocument
  71. *
  72. * @return the underlying POIDocument
  73. */
  74. @Override
  75. POIDocument getDocument();
  76. }