You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

POIOLE2TextExtractor.java 3.3KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi;
  16. import org.apache.poi.hpsf.DocumentSummaryInformation;
  17. import org.apache.poi.hpsf.SummaryInformation;
  18. import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
  19. import org.apache.poi.poifs.filesystem.DirectoryEntry;
  20. /**
  21. * Common Parent for OLE2 based Text Extractors
  22. * of POI Documents, such as .doc, .xls
  23. * You will typically find the implementation of
  24. * a given format's text extractor under
  25. * org.apache.poi.[format].extractor .
  26. * @see org.apache.poi.hssf.extractor.ExcelExtractor
  27. * @see org.apache.poi.hslf.extractor.PowerPointExtractor
  28. * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
  29. * @see org.apache.poi.hwpf.extractor.WordExtractor
  30. */
  31. public abstract class POIOLE2TextExtractor extends POITextExtractor {
  32. /** The POIDocument that's open */
  33. protected POIDocument document;
  34. /**
  35. * Creates a new text extractor for the given document
  36. *
  37. * @param document The POIDocument to use in this extractor.
  38. */
  39. public POIOLE2TextExtractor(POIDocument document) {
  40. this.document = document;
  41. }
  42. /**
  43. * Creates a new text extractor, using the same
  44. * document as another text extractor. Normally
  45. * only used by properties extractors.
  46. */
  47. protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) {
  48. this.document = otherExtractor.document;
  49. }
  50. /**
  51. * Returns the document information metadata for the document
  52. *
  53. * @return The Document Summary Information or null
  54. * if it could not be read for this document.
  55. */
  56. public DocumentSummaryInformation getDocSummaryInformation() {
  57. return document.getDocumentSummaryInformation();
  58. }
  59. /**
  60. * Returns the summary information metadata for the document.
  61. *
  62. * @return The Summary information for the document or null
  63. * if it could not be read for this document.
  64. */
  65. public SummaryInformation getSummaryInformation() {
  66. return document.getSummaryInformation();
  67. }
  68. /**
  69. * Returns an HPSF powered text extractor for the
  70. * document properties metadata, such as title and author.
  71. *
  72. * @return an instance of POIExtractor that can extract meta-data.
  73. */
  74. public POITextExtractor getMetadataTextExtractor() {
  75. return new HPSFPropertiesExtractor(this);
  76. }
  77. /**
  78. * Return the underlying DirectoryEntry of this document.
  79. *
  80. * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
  81. */
  82. public DirectoryEntry getRoot()
  83. {
  84. return document.directory;
  85. }
  86. }