<path id="ooxml.classpath">
<path refid="main.classpath"/>
<path refid="scratchpad.classpath"/>
+ <pathelement location="${scratchpad.output.dir}"/>
<fileset dir="${ooxml.lib}">
<include name="*.jar" />
</fileset>
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.SummaryInformation;
+
+/**
+ * Common Parent for OLE2 based Text Extractors
+ * of POI Documents, such as .doc, .xls
+ * You will typically find the implementation of
+ * a given format's text extractor under
+ * org.apache.poi.[format].extractor .
+ * @see org.apache.poi.hssf.extractor.ExcelExtractor
+ * @see org.apache.poi.hslf.extractor.PowerPointExtractor
+ * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
+ * @see org.apache.poi.hwpf.extractor.WordExtractor
+ */
+public abstract class POIOLE2TextExtractor extends POITextExtractor {
+ /**
+ * Creates a new text extractor for the given document
+ */
+ public POIOLE2TextExtractor(POIDocument document) {
+ super(document);
+ }
+
+ /**
+ * Returns the document information metadata for the document
+ */
+ public DocumentSummaryInformation getDocSummaryInformation() {
+ return document.getDocumentSummaryInformation();
+ }
+ /**
+ * Returns the summary information metadata for the document
+ */
+ public SummaryInformation getSummaryInformation() {
+ return document.getSummaryInformation();
+ }
+}
import java.io.IOException;
-import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
* the XLS2CSVmra example
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
*/
-public class ExcelExtractor extends POITextExtractor{
+public class ExcelExtractor extends POIOLE2TextExtractor {
private HSSFWorkbook wb;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
throw new IllegalArgumentException("No supported documents found in the OOXML package");
}
- public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
+ public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
// Look for certain entries in the stream, to figure it
// out from
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
import java.io.InputStream;
import java.util.ArrayList;
-import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.Chunk.Command;
* Can opperate on the command line (outputs to stdout), or
* can return the text for you (eg for use with Lucene).
*/
-public class VisioTextExtractor extends POITextExtractor {
+public class VisioTextExtractor extends POIOLE2TextExtractor {
private HDGFDiagram hdgf;
private POIFSFileSystem fs;
import java.io.*;
import java.util.HashSet;
-import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
* @author Nick Burch
*/
-public class PowerPointExtractor extends POITextExtractor
+public class PowerPointExtractor extends POIOLE2TextExtractor
{
private HSLFSlideShow _hslfshow;
private SlideShow _show;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
-import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.usermodel.Paragraph;
*
* @author Nick Burch (nick at torchbox dot com)
*/
-public class WordExtractor extends POITextExtractor {
+public class WordExtractor extends POIOLE2TextExtractor {
private POIFSFileSystem fs;
private HWPFDocument doc;