From d8a63301aa16edf6e7866b81f6ed71163ae2f661 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Thu, 10 Apr 2008 14:26:36 +0000 Subject: [PATCH] Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646313 via svnmerge from https://svn.apache.org:443/repos/asf/poi/trunk ........ r646312 | nick | 2008-04-09 13:46:42 +0100 (Wed, 09 Apr 2008) | 1 line Provide a common ole2 implementation of POITextExtractor, which gives access to the document metadata ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@646818 13f79535-47bb-0310-9956-ffa450edef68 --- build.xml | 1 + .../org/apache/poi/POIOLE2TextExtractor.java | 53 +++++++++++++++++++ .../poi/hssf/extractor/ExcelExtractor.java | 4 +- .../poi/extractor/ExtractorFactory.java | 3 +- .../hdgf/extractor/VisioTextExtractor.java | 4 +- .../hslf/extractor/PowerPointExtractor.java | 4 +- .../poi/hwpf/extractor/WordExtractor.java | 4 +- 7 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 src/java/org/apache/poi/POIOLE2TextExtractor.java diff --git a/build.xml b/build.xml index 5dd4a52fbc..64845c4d61 100644 --- a/build.xml +++ b/build.xml @@ -220,6 +220,7 @@ under the License. + diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java new file mode 100644 index 0000000000..f5aee4cc6d --- /dev/null +++ b/src/java/org/apache/poi/POIOLE2TextExtractor.java @@ -0,0 +1,53 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.SummaryInformation; + +/** + * Common Parent for OLE2 based Text Extractors + * of POI Documents, such as .doc, .xls + * You will typically find the implementation of + * a given format's text extractor under + * org.apache.poi.[format].extractor . + * @see org.apache.poi.hssf.extractor.ExcelExtractor + * @see org.apache.poi.hslf.extractor.PowerPointExtractor + * @see org.apache.poi.hdgf.extractor.VisioTextExtractor + * @see org.apache.poi.hwpf.extractor.WordExtractor + */ +public abstract class POIOLE2TextExtractor extends POITextExtractor { + /** + * Creates a new text extractor for the given document + */ + public POIOLE2TextExtractor(POIDocument document) { + super(document); + } + + /** + * Returns the document information metadata for the document + */ + public DocumentSummaryInformation getDocSummaryInformation() { + return document.getDocumentSummaryInformation(); + } + /** + * Returns the summary information metadata for the document + */ + public SummaryInformation getSummaryInformation() { + return document.getSummaryInformation(); + } +} diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index f45f54dff1..2a9c455cac 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -18,7 +18,7 @@ package org.apache.poi.hssf.extractor; import java.io.IOException; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; @@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * the XLS2CSVmra example * @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra */ -public class ExcelExtractor extends POITextExtractor{ +public class ExcelExtractor extends POIOLE2TextExtractor { private HSSFWorkbook wb; private boolean includeSheetNames = true; private boolean formulasNotResults = false; diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 318b68d8f0..12321bfac2 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -29,6 +29,7 @@ import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; @@ -104,7 +105,7 @@ public class ExtractorFactory { throw new IllegalArgumentException("No supported documents found in the OOXML package"); } - public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException { + public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException { // Look for certain entries in the stream, to figure it // out from for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) { diff --git a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java index 034714c7bc..9b1307cee3 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hdgf.HDGFDiagram; import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.Chunk.Command; @@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * Can opperate on the command line (outputs to stdout), or * can return the text for you (eg for use with Lucene). */ -public class VisioTextExtractor extends POITextExtractor { +public class VisioTextExtractor extends POIOLE2TextExtractor { private HDGFDiagram hdgf; private POIFSFileSystem fs; diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index f247227007..cd9fa28256 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -23,7 +23,7 @@ package org.apache.poi.hslf.extractor; import java.io.*; import java.util.HashSet; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.hslf.*; import org.apache.poi.hslf.model.*; @@ -36,7 +36,7 @@ import org.apache.poi.hslf.usermodel.*; * @author Nick Burch */ -public class PowerPointExtractor extends POITextExtractor +public class PowerPointExtractor extends POIOLE2TextExtractor { private HSLFSlideShow _hslfshow; private SlideShow _show; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index 6f15ee1f9a..85009459d7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -22,7 +22,7 @@ import java.io.FileInputStream; import java.io.UnsupportedEncodingException; import java.util.Iterator; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.TextPiece; import org.apache.poi.hwpf.usermodel.Paragraph; @@ -37,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * * @author Nick Burch (nick at torchbox dot com) */ -public class WordExtractor extends POITextExtractor { +public class WordExtractor extends POIOLE2TextExtractor { private POIFSFileSystem fs; private HWPFDocument doc; -- 2.39.5