From: Nick Burch Date: Wed, 20 Aug 2008 20:13:08 +0000 (+0000) Subject: HPBF text extractor and unit tests X-Git-Tag: REL_3_2_FINAL~141 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=f56ab225218964b8205b2aeb3ca6034e1865ed22;p=poi.git HPBF text extractor and unit tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@687443 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java new file mode 100644 index 0000000000..2257283a0f --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java @@ -0,0 +1,78 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hpbf.extractor; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.POIOLE2TextExtractor; +import org.apache.poi.hpbf.HPBFDocument; +import org.apache.poi.hpbf.model.qcbits.QCBit; +import org.apache.poi.hpbf.model.qcbits.QCTextBit; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +/** + * Extract text from HPBF Publisher files + */ +public class PublisherTextExtractor extends POIOLE2TextExtractor { + private HPBFDocument doc; + + public PublisherTextExtractor(HPBFDocument doc) { + super(doc); + this.doc = doc; + } + public PublisherTextExtractor(POIFSFileSystem fs) throws IOException { + this(new HPBFDocument(fs)); + } + public PublisherTextExtractor(InputStream is) throws IOException { + this(new POIFSFileSystem(is)); + } + + public String getText() { + StringBuffer text = new StringBuffer(); + + // Get the text from the Quill Contents + QCBit[] bits = doc.getQuillContents().getBits(); + for(int i=0; i"); + } + + for(int i=0; i