import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
protected POIDocument(DirectoryNode dir) {
this.directory = dir;
}
+ /**
+ * @deprecated use {@link POIDocument#POIDocument(DirectoryNode)} instead
+ */
@Deprecated
protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) {
this.directory = dir;
- }
+ }
protected POIDocument(POIFSFileSystem fs) {
- this(fs.getRoot());
+ this(fs.getRoot());
+ }
+ protected POIDocument(NPOIFSFileSystem fs) {
+ this(fs.getRoot());
}
/**
* @param outFS the POIFSFileSystem to write the properties into
* @param writtenEntries a list of POIFS entries to add the property names too
*/
- protected void writeProperties(POIFSFileSystem outFS, List writtenEntries) throws IOException {
+ protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
SummaryInformation si = getSummaryInformation();
if(si != null) {
writePropertySet(SummaryInformation.DEFAULT_STREAM_NAME, si, outFS);
* @param excepts is a list of Strings specifying what nodes NOT to copy
*/
protected void copyNodes(POIFSFileSystem source, POIFSFileSystem target,
- List excepts) throws IOException {
+ List<String> excepts) throws IOException {
//System.err.println("CopyNodes called");
DirectoryEntry root = source.getRoot();
DirectoryEntry newRoot = target.getRoot();
- Iterator entries = root.getEntries();
-
+ Iterator<Entry> entries = root.getEntries();
while (entries.hasNext()) {
- Entry entry = (Entry)entries.next();
- if (!isInList(entry.getName(), excepts)) {
+ Entry entry = entries.next();
+ if (!excepts.contains(entry.getName())) {
copyNodeRecursively(entry,newRoot);
}
}
}
- /**
- * Checks to see if the String is in the list, used when copying
- * nodes between one POIFS and another
- */
- private boolean isInList(String entry, List list) {
- for (int k = 0; k < list.size(); k++) {
- if (list.get(k).equals(entry)) {
- return true;
- }
- }
- return false;
- }
-
/**
* Copies an Entry into a target POIFS directory, recursively
*/
DirectoryEntry newTarget = null;
if (entry.isDirectoryEntry()) {
newTarget = target.createDirectory(entry.getName());
- Iterator entries = ((DirectoryEntry)entry).getEntries();
+ Iterator<Entry> entries = ((DirectoryEntry)entry).getEntries();
while (entries.hasNext()) {
- copyNodeRecursively((Entry)entries.next(),newTarget);
+ copyNodeRecursively(entries.next(),newTarget);
}
} else {
DocumentEntry dentry = (DocumentEntry)entry;
// For tracking what we've written out, used if we're
// going to be preserving nodes
- List excepts = new ArrayList(1);
+ List<String> excepts = new ArrayList<String>(1);
// Write out the Workbook stream
fs.createDocument(new ByteArrayInputStream(bytes), "Workbook");
import org.apache.poi.hpbf.model.MainContents;
import org.apache.poi.hpbf.model.QuillContents;
import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Opens a new publisher document
*/
public HPBFDocument(POIFSFileSystem fs) throws IOException {
- this(fs.getRoot(), fs);
+ this(fs.getRoot());
+ }
+ public HPBFDocument(NPOIFSFileSystem fs) throws IOException {
+ this(fs.getRoot());
}
public HPBFDocument(InputStream inp) throws IOException {
- this(new POIFSFileSystem(inp));
+ this(new POIFSFileSystem(inp));
}
/**
- * Opens an embeded publisher document,
+ * Opens an embedded publisher document,
* at the given directory.
+ * @deprecated Use {@link #HPBFDocument(DirectoryNode)} instead
*/
+ @Deprecated
public HPBFDocument(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
- super(dir, fs);
+ this(dir);
+ }
+ /**
+ * Opens an embedded publisher document,
+ * at the given directory.
+ */
+ public HPBFDocument(DirectoryNode dir) throws IOException {
+ super(dir);
- // Go looking for our interesting child
- // streams
- mainContents = new MainContents(dir);
- quillContents = new QuillContents(dir);
+ // Go looking for our interesting child
+ // streams
+ mainContents = new MainContents(dir);
+ quillContents = new QuillContents(dir);
- // Now the Escher bits
- escherStm = new EscherStm(dir);
- escherDelayStm = new EscherDelayStm(dir);
+ // Now the Escher bits
+ escherStm = new EscherStm(dir);
+ escherDelayStm = new EscherDelayStm(dir);
}
public MainContents getMainContents() {
* Extract text from HPBF Publisher files
*/
public final class PublisherTextExtractor extends POIOLE2TextExtractor {
- private HPBFDocument doc;
- private boolean hyperlinksByDefault = false;
+ private HPBFDocument doc;
+ private boolean hyperlinksByDefault = false;
- public PublisherTextExtractor(HPBFDocument doc) {
- super(doc);
- this.doc = doc;
- }
+ public PublisherTextExtractor(HPBFDocument doc) {
+ super(doc);
+ this.doc = doc;
+ }
+ public PublisherTextExtractor(DirectoryNode dir) throws IOException {
+ this(new HPBFDocument(dir));
+ }
+ public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
+ this(new HPBFDocument(fs));
+ }
+ public PublisherTextExtractor(InputStream is) throws IOException {
+ this(new POIFSFileSystem(is));
+ }
+ @Deprecated
public PublisherTextExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
this(new HPBFDocument(dir, fs));
}
- public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
- this(new HPBFDocument(fs));
- }
- public PublisherTextExtractor(InputStream is) throws IOException {
- this(new POIFSFileSystem(is));
- }
/**
* Should a call to getText() return hyperlinks inline
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hpbf.extractor;
+
+import java.io.File;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hpbf.HPBFDocument;
+
+public final class TestPublisherTextExtractor extends TestCase {
+ private static final POIDataSamples _samples = POIDataSamples.getPublisherInstance();
+
+ public void testBasics() throws Exception {
+ HPBFDocument doc = new HPBFDocument(
+ _samples.openResourceAsStream("Sample.pub")
+ );
+
+ PublisherTextExtractor ext =
+ new PublisherTextExtractor(doc);
+ ext.getText();
+
+ ext = new PublisherTextExtractor(
+ _samples.openResourceAsStream("Simple.pub")
+ );
+ ext.getText();
+ }
+
+ public void testContents() throws Exception {
+ HPBFDocument doc = new HPBFDocument(
+ _samples.openResourceAsStream("Sample.pub")
+ );
+
+ PublisherTextExtractor ext =
+ new PublisherTextExtractor(doc);
+ String text = ext.getText();
+
+ assertEquals(
+"This is some text on the first page\n" +
+"It\u2019s in times new roman, font size 10, all normal\n" +
+"" +
+"This is in bold and italic\n" +
+"It\u2019s Arial, 20 point font\n" +
+"It\u2019s in the second textbox on the first page\n" +
+"" +
+"This is the second page\n\n" +
+"" +
+"It is also times new roman, 10 point\n" +
+"" +
+"Table on page 2\nTop right\n" +
+"P2 table left\nP2 table right\n" +
+"Bottom Left\nBottom Right\n" +
+"" +
+"This text is on page two\n" +
+"#This is a link to Apache POI\n" +
+"More normal text\n" +
+"Link to a file\n" +
+"" +
+"More text, more hyperlinks\n" +
+"email link\n" +
+"Final hyperlink\n" +
+"Within doc to page 1\n"
+ , text
+ );
+
+ // Now a simpler one
+ ext = new PublisherTextExtractor(
+ _samples.openResourceAsStream("Simple.pub")
+ );
+ text = ext.getText();
+ assertEquals(
+"0123456789\n" +
+"0123456789abcdef\n" +
+"0123456789abcdef0123456789abcdef\n" +
+"0123456789\n" +
+"0123456789abcdef\n" +
+"0123456789abcdef0123456789abcdef\n" +
+"0123456789abcdef0123456789abcdef0123456789abcdef\n"
+ , text
+ );
+ }
+
+ /**
+ * We have the same file saved for Publisher 98, Publisher
+ * 2000 and Publisher 2007. Check they all agree.
+ * @throws Exception
+ */
+ public void testMultipleVersions() throws Exception {
+ File f;
+ HPBFDocument doc;
+
+ doc = new HPBFDocument(
+ _samples.openResourceAsStream("Sample.pub")
+ );
+ String s2007 = (new PublisherTextExtractor(doc)).getText();
+
+ doc = new HPBFDocument(
+ _samples.openResourceAsStream("Sample2000.pub")
+ );
+ String s2000 = (new PublisherTextExtractor(doc)).getText();
+
+ doc = new HPBFDocument(
+ _samples.openResourceAsStream("Sample98.pub")
+ );
+ String s98 = (new PublisherTextExtractor(doc)).getText();
+
+ // Check they all agree
+ assertEquals(s2007, s2000);
+ assertEquals(s2007, s98);
+ }
+
+ /**
+ * Test that the hyperlink extraction stuff works as well
+ * as we can hope it to.
+ */
+ public void testWithHyperlinks() throws Exception {
+ HPBFDocument doc = new HPBFDocument(
+ _samples.openResourceAsStream("LinkAt10.pub")
+ );
+
+ PublisherTextExtractor ext =
+ new PublisherTextExtractor(doc);
+ ext.getText();
+
+ // Default is no hyperlinks
+ assertEquals("1234567890LINK\n", ext.getText());
+
+ // Turn on
+ ext.setHyperlinksByDefault(true);
+ assertEquals("1234567890LINK\n<http://poi.apache.org/>\n", ext.getText());
+
+
+ // Now a much more complex document
+ ext = new PublisherTextExtractor(
+ _samples.openResourceAsStream("Sample.pub")
+ );
+ ext.setHyperlinksByDefault(true);
+ String text = ext.getText();
+
+ assertTrue(text.endsWith(
+ "<http://poi.apache.org/>\n" +
+ "<C:\\Documents and Settings\\Nick\\My Documents\\Booleans.xlsx>\n" +
+ "<>\n" +
+ "<mailto:dev@poi.apache.org?subject=HPBF>\n" +
+ "<mailto:dev@poi.apache.org?subject=HPBF>\n"
+ ));
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.hpbf.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-
-import org.apache.poi.hpbf.HPBFDocument;
-import org.apache.poi.POIDataSamples;
-
-import junit.framework.TestCase;
-
-public final class TextPublisherTextExtractor extends TestCase {
- private static final POIDataSamples _samples = POIDataSamples.getPublisherInstance();
-
- public void testBasics() throws Exception {
- HPBFDocument doc = new HPBFDocument(
- _samples.openResourceAsStream("Sample.pub")
- );
-
- PublisherTextExtractor ext =
- new PublisherTextExtractor(doc);
- ext.getText();
-
- ext = new PublisherTextExtractor(
- _samples.openResourceAsStream("Simple.pub")
- );
- ext.getText();
- }
-
- public void testContents() throws Exception {
- HPBFDocument doc = new HPBFDocument(
- _samples.openResourceAsStream("Sample.pub")
- );
-
- PublisherTextExtractor ext =
- new PublisherTextExtractor(doc);
- String text = ext.getText();
-
- assertEquals(
-"This is some text on the first page\n" +
-"It\u2019s in times new roman, font size 10, all normal\n" +
-"" +
-"This is in bold and italic\n" +
-"It\u2019s Arial, 20 point font\n" +
-"It\u2019s in the second textbox on the first page\n" +
-"" +
-"This is the second page\n\n" +
-"" +
-"It is also times new roman, 10 point\n" +
-"" +
-"Table on page 2\nTop right\n" +
-"P2 table left\nP2 table right\n" +
-"Bottom Left\nBottom Right\n" +
-"" +
-"This text is on page two\n" +
-"#This is a link to Apache POI\n" +
-"More normal text\n" +
-"Link to a file\n" +
-"" +
-"More text, more hyperlinks\n" +
-"email link\n" +
-"Final hyperlink\n" +
-"Within doc to page 1\n"
- , text
- );
-
- // Now a simpler one
- ext = new PublisherTextExtractor(
- _samples.openResourceAsStream("Simple.pub")
- );
- text = ext.getText();
- assertEquals(
-"0123456789\n" +
-"0123456789abcdef\n" +
-"0123456789abcdef0123456789abcdef\n" +
-"0123456789\n" +
-"0123456789abcdef\n" +
-"0123456789abcdef0123456789abcdef\n" +
-"0123456789abcdef0123456789abcdef0123456789abcdef\n"
- , text
- );
- }
-
- /**
- * We have the same file saved for Publisher 98, Publisher
- * 2000 and Publisher 2007. Check they all agree.
- * @throws Exception
- */
- public void testMultipleVersions() throws Exception {
- File f;
- HPBFDocument doc;
-
- doc = new HPBFDocument(
- _samples.openResourceAsStream("Sample.pub")
- );
- String s2007 = (new PublisherTextExtractor(doc)).getText();
-
- doc = new HPBFDocument(
- _samples.openResourceAsStream("Sample2000.pub")
- );
- String s2000 = (new PublisherTextExtractor(doc)).getText();
-
- doc = new HPBFDocument(
- _samples.openResourceAsStream("Sample98.pub")
- );
- String s98 = (new PublisherTextExtractor(doc)).getText();
-
- // Check they all agree
- assertEquals(s2007, s2000);
- assertEquals(s2007, s98);
- }
-
- /**
- * Test that the hyperlink extraction stuff works as well
- * as we can hope it to.
- */
- public void testWithHyperlinks() throws Exception {
- HPBFDocument doc = new HPBFDocument(
- _samples.openResourceAsStream("LinkAt10.pub")
- );
-
- PublisherTextExtractor ext =
- new PublisherTextExtractor(doc);
- ext.getText();
-
- // Default is no hyperlinks
- assertEquals("1234567890LINK\n", ext.getText());
-
- // Turn on
- ext.setHyperlinksByDefault(true);
- assertEquals("1234567890LINK\n<http://poi.apache.org/>\n", ext.getText());
-
-
- // Now a much more complex document
- ext = new PublisherTextExtractor(
- _samples.openResourceAsStream("Sample.pub")
- );
- ext.setHyperlinksByDefault(true);
- String text = ext.getText();
-
- assertTrue(text.endsWith(
- "<http://poi.apache.org/>\n" +
- "<C:\\Documents and Settings\\Nick\\My Documents\\Booleans.xlsx>\n" +
- "<>\n" +
- "<mailto:dev@poi.apache.org?subject=HPBF>\n" +
- "<mailto:dev@poi.apache.org?subject=HPBF>\n"
- ));
- }
-}