aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2020-08-13 21:08:24 +0000
committerAndreas Beeker <kiwiwings@apache.org>2020-08-13 21:08:24 +0000
commitdfdf9e6d6f470b82ad2a6b77e3059dd0df23905b (patch)
treecf5cdd45adcf98e078beb6e3841cfc7a502a7dd6 /src/scratchpad
parent4bf968d6bd96d51347380f3127e64dbb525c664d (diff)
downloadpoi-dfdf9e6d6f470b82ad2a6b77e3059dd0df23905b.tar.gz
poi-dfdf9e6d6f470b82ad2a6b77e3059dd0df23905b.zip
#64411 - Provide JigSaw modules
- rework extractors - see bugzilla entry for more information git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1880839 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r--src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java132
-rw-r--r--src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java37
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java44
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java279
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java103
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java35
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java61
-rw-r--r--src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java30
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java53
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java51
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java34
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java53
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java282
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java14
14 files changed, 486 insertions, 722 deletions
diff --git a/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
index 73d9f74844..16711d0b11 100644
--- a/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
+++ b/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
@@ -17,44 +17,66 @@
package org.apache.poi.extractor.ole2;
import java.io.ByteArrayInputStream;
+import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Iterator;
import java.util.List;
+import java.util.stream.StreamSupport;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorProvider;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.extractor.POITextExtractor;
-import org.apache.poi.extractor.OLE2ExtractorFactory;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hslf.usermodel.HSLFShape;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.extractor.OutlookTextExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
- * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
+ * Scratchpad-specific logic for {@link ExtractorFactory} and
* {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
* no Scratchpad jar (though without functionality!)
* <p>Note - should not be used standalone, always use via the other
* two classes</p>
*/
@SuppressWarnings("WeakerAccess")
-public class OLE2ScratchpadExtractorFactory {
+public class OLE2ScratchpadExtractorFactory implements ExtractorProvider {
private static final POILogger logger = POILogFactory.getLogger(OLE2ScratchpadExtractorFactory.class);
+ @Override
+ public boolean accepts(FileMagic fm) {
+ return FileMagic.OLE2 == fm;
+ }
+
+ @Override
+ public POITextExtractor create(File file, String password) throws IOException {
+ return create(new POIFSFileSystem(file, true).getRoot(), password);
+ }
+
+ @Override
+ public POITextExtractor create(InputStream inputStream, String password) throws IOException {
+ return create(new POIFSFileSystem(inputStream).getRoot(), password);
+ }
+
/**
* Look for certain entries in the stream, to figure it
* out what format is desired
@@ -66,48 +88,54 @@ public class OLE2ScratchpadExtractorFactory {
*
* @throws IOException when the format specific extraction fails because of invalid entires
*/
- public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
- if (poifsDir.hasEntry("WordDocument")) {
- // Old or new style word document?
- try {
- return new WordExtractor(poifsDir);
- } catch (OldWordFileFormatException e) {
- return new Word6Extractor(poifsDir);
+ public POITextExtractor create(DirectoryNode poifsDir, String password) throws IOException {
+ final String oldPW = Biff8EncryptionKey.getCurrentUserPassword();
+ try {
+ Biff8EncryptionKey.setCurrentUserPassword(password);
+ if (poifsDir.hasEntry("WordDocument")) {
+ // Old or new style word document?
+ try {
+ return new WordExtractor(poifsDir);
+ } catch (OldWordFileFormatException e) {
+ return new Word6Extractor(poifsDir);
+ }
}
- }
- if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
- return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
- }
+ if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
+ return new SlideShowExtractor<HSLFShape, HSLFTextParagraph>(SlideShowFactory.create(poifsDir));
+ }
- if (poifsDir.hasEntry("VisioDocument")) {
- return new VisioTextExtractor(poifsDir);
- }
+ if (poifsDir.hasEntry("VisioDocument")) {
+ return new VisioTextExtractor(poifsDir);
+ }
- if (poifsDir.hasEntry("Quill")) {
- return new PublisherTextExtractor(poifsDir);
- }
+ if (poifsDir.hasEntry("Quill")) {
+ return new PublisherTextExtractor(poifsDir);
+ }
- final String[] outlookEntryNames = new String[] {
- // message bodies, saved as plain text (PtypString)
- // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
- // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
- // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
- // @see org.apache.poi.hsmf.Types.MAPIType
- "__substg1.0_1000001E", //PidTagBody ASCII
- "__substg1.0_1000001F", //PidTagBody Unicode
- "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
- "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
- "__substg1.0_0037001E", //PidTagSubject ASCII
- "__substg1.0_0037001F", //PidTagSubject Unicode
- };
- for (String entryName : outlookEntryNames) {
- if (poifsDir.hasEntry(entryName)) {
- return new OutlookTextExtractor(poifsDir);
+ final String[] outlookEntryNames = new String[]{
+ // message bodies, saved as plain text (PtypString)
+ // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
+ // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
+ // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
+ // @see org.apache.poi.hsmf.Types.MAPIType
+ "__substg1.0_1000001E", //PidTagBody ASCII
+ "__substg1.0_1000001F", //PidTagBody Unicode
+ "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
+ "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
+ "__substg1.0_0037001E", //PidTagSubject ASCII
+ "__substg1.0_0037001F", //PidTagSubject Unicode
+ };
+ for (String entryName : outlookEntryNames) {
+ if (poifsDir.hasEntry(entryName)) {
+ return new OutlookTextExtractor(poifsDir);
+ }
}
+ } finally {
+ Biff8EncryptionKey.setCurrentUserPassword(oldPW);
}
- throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+ return null;
}
/**
@@ -120,10 +148,9 @@ public class OLE2ScratchpadExtractorFactory {
* @param ext the extractor holding the directory to start parsing
* @param dirs a list to be filled with directory references holding embedded
* @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
- *
- * @throws IOException when the format specific extraction fails because of invalid entires
*/
- public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+ @Override
+ public void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) {
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
if (root == null) {
@@ -132,25 +159,16 @@ public class OLE2ScratchpadExtractorFactory {
if (ext instanceof ExcelExtractor) {
// These are in MBD... under the root
- Iterator<Entry> it = root.getEntries();
- while (it.hasNext()) {
- Entry entry = it.next();
- if (entry.getName().startsWith("MBD")) {
- dirs.add(entry);
- }
- }
+ StreamSupport.stream(root.spliterator(), false)
+ .filter(entry -> entry.getName().startsWith("MBD"))
+ .forEach(dirs::add);
} else if (ext instanceof WordExtractor) {
// These are in ObjectPool -> _... under the root
try {
- DirectoryEntry op = (DirectoryEntry)
- root.getEntry("ObjectPool");
- Iterator<Entry> it = op.getEntries();
- while(it.hasNext()) {
- Entry entry = it.next();
- if(entry.getName().startsWith("_")) {
- dirs.add(entry);
- }
- }
+ DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+ StreamSupport.stream(op.spliterator(), false)
+ .filter(entry -> entry.getName().startsWith("_"))
+ .forEach(dirs::add);
} catch(FileNotFoundException e) {
logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
// ignored here
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
index d21a7e5d6a..570eaacf4c 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
@@ -17,7 +17,6 @@
package org.apache.poi.hdgf.extractor;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
@@ -38,11 +37,11 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* Can operate on the command line (outputs to stdout), or
* can return the text for you (example: for use with Lucene).
*/
-public final class VisioTextExtractor extends POIOLE2TextExtractor {
+public final class VisioTextExtractor implements POIOLE2TextExtractor {
private HDGFDiagram hdgf;
+ private boolean doCloseFilesystem = true;
public VisioTextExtractor(HDGFDiagram hdgf) {
- super(hdgf);
this.hdgf = hdgf;
}
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
@@ -91,9 +90,7 @@ public final class VisioTextExtractor extends POIOLE2TextExtractor {
// Capture the text, as long as it isn't
// simply an empty string
String str = cmd.getValue().toString();
- if(str.isEmpty() || "\n".equals(str)) {
- // Ignore empty strings
- } else {
+ if (!(str.isEmpty() || "\n".equals(str))) {
text.add( str );
}
}
@@ -121,21 +118,23 @@ public final class VisioTextExtractor extends POIOLE2TextExtractor {
return text.toString();
}
- public static void main(String[] args) throws Exception {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println(" VisioTextExtractor <file.vsd>");
- System.exit(1);
- }
+ @Override
+ public HDGFDiagram getDocument() {
+ return hdgf;
+ }
- try (FileInputStream fis = new FileInputStream(args[0])) {
- VisioTextExtractor extractor =
- new VisioTextExtractor(fis);
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
- // Print not PrintLn as already has \n added to it
- System.out.print(extractor.getText());
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
- extractor.close();
- }
+ @Override
+ public HDGFDiagram getFilesystem() {
+ return hdgf;
}
}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
index bd442b8da6..ac7ed74153 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
@@ -17,35 +17,37 @@
package org.apache.poi.hpbf.extractor;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.hpbf.HPBFDocument;
import org.apache.poi.hpbf.model.qcbits.QCBit;
-import org.apache.poi.hpbf.model.qcbits.QCTextBit;
import org.apache.poi.hpbf.model.qcbits.QCPLCBit.Type12;
+import org.apache.poi.hpbf.model.qcbits.QCTextBit;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Extract text from HPBF Publisher files
*/
-public final class PublisherTextExtractor extends POIOLE2TextExtractor {
- private HPBFDocument doc;
+public final class PublisherTextExtractor implements POIOLE2TextExtractor {
+ private final HPBFDocument doc;
private boolean hyperlinksByDefault;
+ private boolean doCloseFilesystem = true;
public PublisherTextExtractor(HPBFDocument doc) {
- super(doc);
this.doc = doc;
}
+
public PublisherTextExtractor(DirectoryNode dir) throws IOException {
this(new HPBFDocument(dir));
}
+
public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
this(new HPBFDocument(fs));
}
+
public PublisherTextExtractor(InputStream is) throws IOException {
this(new POIFSFileSystem(is));
}
@@ -66,7 +68,7 @@ public final class PublisherTextExtractor extends POIOLE2TextExtractor {
// Get the text from the Quill Contents
QCBit[] bits = doc.getQuillContents().getBits();
for (QCBit bit1 : bits) {
- if (bit1 != null && bit1 instanceof QCTextBit) {
+ if (bit1 instanceof QCTextBit) {
QCTextBit t = (QCTextBit) bit1;
text.append(t.getText().replace('\r', '\n'));
}
@@ -79,7 +81,7 @@ public final class PublisherTextExtractor extends POIOLE2TextExtractor {
// how to tie that together.
if(hyperlinksByDefault) {
for (QCBit bit : bits) {
- if (bit != null && bit instanceof Type12) {
+ if (bit instanceof Type12) {
Type12 hyperlinks = (Type12) bit;
for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
text.append("<");
@@ -96,19 +98,23 @@ public final class PublisherTextExtractor extends POIOLE2TextExtractor {
return text.toString();
}
+ @Override
+ public HPBFDocument getDocument() {
+ return doc;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
- public static void main(String[] args) throws Exception {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println(" PublisherTextExtractor <file.pub>");
- }
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
- for (String arg : args) {
- try (FileInputStream fis = new FileInputStream(arg)) {
- PublisherTextExtractor te = new PublisherTextExtractor(fis);
- System.out.println(te.getText());
- te.close();
- }
- }
+ @Override
+ public HPBFDocument getFilesystem() {
+ return doc;
}
}
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
deleted file mode 100644
index 650f809253..0000000000
--- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.hslf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.extractor.POIOLE2TextExtractor;
-import org.apache.poi.hslf.usermodel.HSLFObjectShape;
-import org.apache.poi.hslf.usermodel.HSLFShape;
-import org.apache.poi.hslf.usermodel.HSLFSlideShow;
-import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
-import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
-import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.sl.usermodel.SlideShow;
-import org.apache.poi.sl.usermodel.SlideShowFactory;
-import org.apache.poi.util.Removal;
-
-/**
- * This class can be used to extract text from a PowerPoint file. Can optionally
- * also get the notes from one.
- *
- * @deprecated in POI 4.0.0, use {@link SlideShowExtractor} instead
- */
-@SuppressWarnings("WeakerAccess")
-@Deprecated
-@Removal(version="5.0.0")
-public final class PowerPointExtractor extends POIOLE2TextExtractor {
- private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate;
-
- private boolean slidesByDefault = true;
- private boolean notesByDefault;
- private boolean commentsByDefault;
- private boolean masterByDefault;
-
- /**
- * Basic extractor. Returns all the text, and optionally all the notes
- */
- public static void main(String[] args) throws IOException {
- if (args.length < 1) {
- System.err.println("Usage:");
- System.err.println("\tPowerPointExtractor [-notes] <file>");
- System.exit(1);
- }
-
- boolean notes = false;
- boolean comments = false;
- boolean master = true;
-
- String file;
- if (args.length > 1) {
- notes = true;
- file = args[1];
- if (args.length > 2) {
- comments = true;
- }
- } else {
- file = args[0];
- }
-
- try (PowerPointExtractor ppe = new PowerPointExtractor(file)) {
- System.out.println(ppe.getText(true, notes, comments, master));
- }
- }
-
- public PowerPointExtractor(final HSLFSlideShow slideShow) {
- super(slideShow.getSlideShowImpl());
- setFilesystem(slideShow);
- delegate = new SlideShowExtractor<>(slideShow);
- }
-
- /**
- * Creates a PowerPointExtractor, from a file
- *
- * @param fileName The name of the file to extract from
- */
- public PowerPointExtractor(String fileName) throws IOException {
- this(createHSLF(new File(fileName), Biff8EncryptionKey.getCurrentUserPassword(), true));
- }
-
- /**
- * Creates a PowerPointExtractor, from an Input Stream
- *
- * @param iStream The input stream containing the PowerPoint document
- */
- public PowerPointExtractor(InputStream iStream) throws IOException {
- this(createHSLF(iStream, Biff8EncryptionKey.getCurrentUserPassword()));
- }
-
- /**
- * Creates a PowerPointExtractor, from an open POIFSFileSystem
- *
- * @param fs the POIFSFileSystem containing the PowerPoint document
- */
- public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
- this(createHSLF(fs, Biff8EncryptionKey.getCurrentUserPassword()));
- }
-
- /**
- * Creates a PowerPointExtractor, from a specific place
- * inside an open {@link POIFSFileSystem}
- *
- * @param dir the POIFS Directory containing the PowerPoint document
- */
- public PowerPointExtractor(DirectoryNode dir) throws IOException {
- this(new HSLFSlideShow(dir));
- }
-
- /**
- * Creates a PowerPointExtractor, from a HSLFSlideShow
- *
- * @param ss the HSLFSlideShow to extract text from
- */
- public PowerPointExtractor(HSLFSlideShowImpl ss) {
- this(new HSLFSlideShow(ss));
- }
-
- /**
- * Should a call to getText() return slide text? Default is yes
- */
- public void setSlidesByDefault(final boolean slidesByDefault) {
- this.slidesByDefault = slidesByDefault;
- delegate.setSlidesByDefault(slidesByDefault);
- }
-
- /**
- * Should a call to getText() return notes text? Default is no
- */
- public void setNotesByDefault(final boolean notesByDefault) {
- this.notesByDefault = notesByDefault;
- delegate.setNotesByDefault(notesByDefault);
- }
-
- /**
- * Should a call to getText() return comments text? Default is no
- */
- public void setCommentsByDefault(final boolean commentsByDefault) {
- this.commentsByDefault = commentsByDefault;
- delegate.setCommentsByDefault(commentsByDefault);
- }
-
- /**
- * Should a call to getText() return text from master? Default is no
- */
- public void setMasterByDefault(final boolean masterByDefault) {
- this.masterByDefault = masterByDefault;
- delegate.setMasterByDefault(masterByDefault);
- }
-
- /**
- * Fetches all the slide text from the slideshow, but not the notes, unless
- * you've called setSlidesByDefault() and setNotesByDefault() to change this
- */
- @Override
- public String getText() {
- return delegate.getText();
- }
-
- /**
- * Fetches text from the slideshow, be it slide text or note text. Because
- * the final block of text in a TextRun normally have their last \n
- * stripped, we add it back
- *
- * @param getSlideText fetch slide text
- * @param getNoteText fetch note text
- */
- public String getText(boolean getSlideText, boolean getNoteText) {
- return getText(getSlideText,getNoteText,commentsByDefault,masterByDefault);
- }
-
- public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
- delegate.setSlidesByDefault(getSlideText);
- delegate.setNotesByDefault(getNoteText);
- delegate.setCommentsByDefault(getCommentText);
- delegate.setMasterByDefault(getMasterText);
- try {
- return delegate.getText();
- } finally {
- delegate.setSlidesByDefault(slidesByDefault);
- delegate.setNotesByDefault(notesByDefault);
- delegate.setCommentsByDefault(commentsByDefault);
- delegate.setMasterByDefault(masterByDefault);
- }
- }
-
- /**
- * Fetches all the notes text from the slideshow, but not the slide text
- */
- public String getNotes() {
- return getText(false, true, false, false);
- }
-
- @SuppressWarnings("unchecked")
- public List<HSLFObjectShape> getOLEShapes() {
- return (List<HSLFObjectShape>)delegate.getOLEShapes();
- }
-
- /**
- * Helper method to avoid problems with compiling code in Eclipse
- *
- * Eclipse javac has some bugs with complex casts, this method tries
- * to work around this.
- *
- * @param fs The {@link POIFSFileSystem} to read the document from
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created SlideShow
- *
- * @throws IOException if an error occurs while reading the data
- */
- private static HSLFSlideShow createHSLF(POIFSFileSystem fs, String password) throws IOException, EncryptedDocumentException {
- // Note: don't change the code here, it is required for Eclipse to compile the code
- SlideShow slideShowOrig = SlideShowFactory.create(fs, password);
- return (HSLFSlideShow)slideShowOrig;
- }
-
- /**
- * Helper method to avoid problems with compiling code in Eclipse
- *
- * Eclipse javac has some bugs with complex casts, this method tries
- * to work around this.
- *
- * @param inp The {@link InputStream} to read data from.
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created SlideShow
- *
- * @throws IOException if an error occurs while reading the data
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- */
- private static HSLFSlideShow createHSLF(InputStream inp, String password) throws IOException, EncryptedDocumentException {
- // Note: don't change the code here, it is required for Eclipse to compile the code
- SlideShow slideShowOrig = SlideShowFactory.create(inp, password);
- return (HSLFSlideShow)slideShowOrig;
- }
-
- /**
- * Helper method to avoid problems with compiling code in Eclipse
- *
- * Eclipse javac has some bugs with complex casts, this method tries
- * to work around this.
- *
- * @param file The file to read data from.
- * @param password The password that should be used or null if no password is necessary.
- * @param readOnly If the SlideShow should be opened in read-only mode to avoid writing back
- * changes when the document is closed.
- *
- * @return The created SlideShow
- *
- * @throws IOException if an error occurs while reading the data
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- */
- private static HSLFSlideShow createHSLF(File file, String password, boolean readOnly) throws IOException, EncryptedDocumentException {
- // Note: don't change the code here, it is required for Eclipse to compile the code
- SlideShow slideShowOrig = SlideShowFactory.create(file, password, readOnly);
- return (HSLFSlideShow)slideShowOrig;
- }
-}
diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
index 150326b6d0..8370f6c282 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
@@ -33,6 +33,7 @@ import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
+import org.apache.poi.POIDocument;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.common.usermodel.fonts.FontInfo;
import org.apache.poi.ddf.EscherBSERecord;
@@ -40,6 +41,9 @@ import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.hpsf.ClassID;
import org.apache.poi.hpsf.ClassIDPredefined;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException;
@@ -47,6 +51,7 @@ import org.apache.poi.hslf.model.HeadersFooters;
import org.apache.poi.hslf.model.MovieShape;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Ole10Native;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -66,7 +71,7 @@ import org.apache.poi.util.Units;
* TODO: - figure out how to match notes to their correct sheet (will involve
* understanding DocSlideList and DocNotesList) - handle Slide creation cleaner
*/
-public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagraph>, Closeable, GenericRecord {
+public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFShape,HSLFTextParagraph>, Closeable, GenericRecord {
//arbitrarily selected; may need to increase
private static final int MAX_RECORD_LENGTH = 10_000_000;
@@ -111,6 +116,8 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
* @param hslfSlideShow the HSLFSlideShow to base on
*/
public HSLFSlideShow(HSLFSlideShowImpl hslfSlideShow) {
+ super(hslfSlideShow.getDirectory());
+
loadSavePhase.set(LoadSavePhase.INIT);
// Get useful things from our base slideshow
@@ -1080,7 +1087,7 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
public HPSFPropertiesExtractor getMetadataTextExtractor() {
return new HPSFPropertiesExtractor(getSlideShowImpl());
}
-
+
int addToObjListAtom(RecordContainer exObj) {
ExObjList lst = getDocumentRecord().getExObjList(true);
ExObjListAtom objAtom = lst.getExObjListAtom();
@@ -1097,7 +1104,7 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
Map<String,ClassID> olemap = new HashMap<>();
olemap.put(POWERPOINT_DOCUMENT, ClassIDPredefined.POWERPOINT_V8.getClassID());
// as per BIFF8 spec
- olemap.put("Workbook", ClassIDPredefined.EXCEL_V8.getClassID());
+ olemap.put("Workbook", ClassIDPredefined.EXCEL_V8.getClassID());
// Typically from third party programs
olemap.put("WORKBOOK", ClassIDPredefined.EXCEL_V8.getClassID());
// Typically odd Crystal Reports exports
@@ -1179,4 +1186,94 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
public List<? extends GenericRecord> getGenericChildren() {
return Arrays.asList(_hslfSlideShow.getRecords());
}
+
+ @Override
+ public void write() throws IOException {
+ getSlideShowImpl().write();
+ }
+
+ @Override
+ public void write(File newFile) throws IOException {
+ getSlideShowImpl().write(newFile);
+ }
+
+ @Override
+ public DocumentSummaryInformation getDocumentSummaryInformation() {
+ return getSlideShowImpl().getDocumentSummaryInformation();
+ }
+
+ @Override
+ public SummaryInformation getSummaryInformation() {
+ return getSlideShowImpl().getSummaryInformation();
+ }
+
+ @Override
+ public void createInformationProperties() {
+ getSlideShowImpl().createInformationProperties();
+ }
+
+ @Override
+ public void readProperties() {
+ getSlideShowImpl().readProperties();
+ }
+
+ @Override
+ protected PropertySet getPropertySet(String setName) throws IOException {
+ return getSlideShowImpl().getPropertySetImpl(setName);
+ }
+
+ @Override
+ protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) throws IOException {
+ return getSlideShowImpl().getPropertySetImpl(setName, encryptionInfo);
+ }
+
+ @Override
+ protected void writeProperties() throws IOException {
+ getSlideShowImpl().writePropertiesImpl();
+ }
+
+ @Override
+ public void writeProperties(POIFSFileSystem outFS) throws IOException {
+ getSlideShowImpl().writeProperties(outFS);
+ }
+
+ @Override
+ protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
+ getSlideShowImpl().writePropertiesImpl(outFS, writtenEntries);
+ }
+
+ @Override
+ protected void validateInPlaceWritePossible() throws IllegalStateException {
+ getSlideShowImpl().validateInPlaceWritePossibleImpl();
+ }
+
+ @Override
+ public DirectoryNode getDirectory() {
+ return getSlideShowImpl().getDirectory();
+ }
+
+ @Override
+ protected void clearDirectory() {
+ getSlideShowImpl().clearDirectoryImpl();
+ }
+
+ @Override
+ protected boolean initDirectory() {
+ return getSlideShowImpl().initDirectoryImpl();
+ }
+
+ @Override
+ protected void replaceDirectory(DirectoryNode newDirectory) {
+ getSlideShowImpl().replaceDirectoryImpl(newDirectory);
+ }
+
+ @Override
+ protected String getEncryptedPropertyStreamName() {
+ return getSlideShowImpl().getEncryptedPropertyStreamName();
+ }
+
+ @Override
+ public EncryptionInfo getEncryptionInfo() throws IOException {
+ return getSlideShowImpl().getEncryptionInfo();
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
index 6f1c633ea3..d616180245 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
@@ -36,6 +36,7 @@ import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
@@ -714,8 +715,6 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
}
-
-
/* ******************* adding methods follow ********************* */
/**
@@ -850,6 +849,38 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
return "EncryptedSummary";
}
+ void writePropertiesImpl() throws IOException {
+ super.writeProperties();
+ }
+
+ PropertySet getPropertySetImpl(String setName) throws IOException {
+ return super.getPropertySet(setName);
+ }
+
+ PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
+ return super.getPropertySet(setName, encryptionInfo);
+ }
+
+ void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
+ super.writeProperties(outFS, writtenEntries);
+ }
+
+ void validateInPlaceWritePossibleImpl() throws IllegalStateException {
+ super.validateInPlaceWritePossible();
+ }
+
+ void clearDirectoryImpl() {
+ super.clearDirectory();
+ }
+
+ boolean initDirectoryImpl() {
+ return super.initDirectory();
+ }
+
+ void replaceDirectoryImpl(DirectoryNode newDirectory) {
+ super.replaceDirectory(newDirectory);
+ }
+
private static class BufAccessBAOS extends ByteArrayOutputStream {
public byte[] getBuf() {
return buf;
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
deleted file mode 100644
index 09132f639b..0000000000
--- a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hsmf.extractor;
-
-import org.apache.poi.hsmf.MAPIMessage;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.Removal;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * A text extractor for HSMF (Outlook) .msg files.
- * Outputs in a format somewhat like a plain text email.
- *
- * @deprecated use @{link OutlookTextExtractor} instead
- */
-@Deprecated
-@Removal(version = "5.0.0")
-public class OutlookTextExtactor extends OutlookTextExtractor {
- public OutlookTextExtactor(MAPIMessage msg) {
- super(msg);
- }
-
- public OutlookTextExtactor(DirectoryNode poifsDir) throws IOException {
- super(new MAPIMessage(poifsDir));
- }
-
- public OutlookTextExtactor(POIFSFileSystem fs) throws IOException {
- super(new MAPIMessage(fs));
- }
-
- public OutlookTextExtactor(InputStream inp) throws IOException {
- super(new MAPIMessage(inp));
- }
-
- public static void main(String[] args) throws Exception {
- for (String filename : args) {
- try (POIFSFileSystem poifs = new POIFSFileSystem(new File(filename));
- OutlookTextExtractor extractor = new OutlookTextExtractor(poifs)) {
- System.out.println(extractor.getText());
- }
- }
- }
-}
diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java
index e11f005fe3..a818d03280 100644
--- a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java
@@ -42,9 +42,12 @@ import org.apache.poi.util.LocaleUtil;
*
* @since 4.1.2
*/
-public class OutlookTextExtractor extends POIOLE2TextExtractor {
+public class OutlookTextExtractor implements POIOLE2TextExtractor {
+ private final MAPIMessage msg;
+ private boolean doCloseFilesystem = true;
+
public OutlookTextExtractor(MAPIMessage msg) {
- super(msg);
+ this.msg = msg;
}
public OutlookTextExtractor(DirectoryNode poifsDir) throws IOException {
@@ -76,14 +79,13 @@ public class OutlookTextExtractor extends POIOLE2TextExtractor {
* Returns the underlying MAPI message
*/
public MAPIMessage getMAPIMessage() {
- return (MAPIMessage) document;
+ return msg;
}
/**
* Outputs something a little like a RFC822 email
*/
public String getText() {
- MAPIMessage msg = (MAPIMessage) document;
StringBuilder s = new StringBuilder();
// See if we can get a suitable encoding for any
@@ -201,4 +203,24 @@ public class OutlookTextExtractor extends POIOLE2TextExtractor {
}
s.append("\n");
}
+
+ @Override
+ public MAPIMessage getDocument() {
+ return msg;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public MAPIMessage getFilesystem() {
+ return msg;
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java
index 1d509dd1c6..526d21be35 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java
@@ -31,13 +31,14 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* Class to extract the text from old (Word 6 / Word 95) Word Documents.
*
* This should only be used on the older files, for most uses you
- * should call {@link WordExtractor} which deals properly
+ * should call {@link WordExtractor} which deals properly
* with HWPF.
*
* @author Nick Burch
*/
-public final class Word6Extractor extends POIOLE2TextExtractor {
+public final class Word6Extractor implements POIOLE2TextExtractor {
private HWPFOldDocument doc;
+ private boolean doCloseFilesystem = true;
/**
* Create a new Word Extractor
@@ -49,12 +50,11 @@ public final class Word6Extractor extends POIOLE2TextExtractor {
/**
* Create a new Word Extractor
- *
+ *
* @param fs
* POIFSFileSystem containing the word file
*/
- public Word6Extractor( POIFSFileSystem fs ) throws IOException
- {
+ public Word6Extractor( POIFSFileSystem fs ) throws IOException {
this( fs.getRoot() );
}
@@ -62,14 +62,11 @@ public final class Word6Extractor extends POIOLE2TextExtractor {
* @deprecated Use {@link #Word6Extractor(DirectoryNode)} instead
*/
@Deprecated
- public Word6Extractor( DirectoryNode dir, POIFSFileSystem fs )
- throws IOException
- {
+ public Word6Extractor( DirectoryNode dir, POIFSFileSystem fs ) throws IOException {
this( dir );
}
- public Word6Extractor( DirectoryNode dir ) throws IOException
- {
+ public Word6Extractor( DirectoryNode dir ) throws IOException {
this( new HWPFOldDocument( dir ) );
}
@@ -78,7 +75,6 @@ public final class Word6Extractor extends POIOLE2TextExtractor {
* @param doc The HWPFOldDocument to extract from
*/
public Word6Extractor(HWPFOldDocument doc) {
- super(doc);
this.doc = doc;
}
@@ -101,7 +97,7 @@ public final class Word6Extractor extends POIOLE2TextExtractor {
ret = new String[doc.getTextTable().getTextPieces().size()];
for(int i=0; i<ret.length; i++) {
ret[i] = doc.getTextTable().getTextPieces().get(i).getStringBuilder().toString();
-
+
// Fix the line endings
ret[i] = ret[i].replaceAll("\r", "\ufffe");
ret[i] = ret[i].replaceAll("\ufffe","\r\n");
@@ -111,25 +107,40 @@ public final class Word6Extractor extends POIOLE2TextExtractor {
return ret;
}
- public String getText()
- {
- try
- {
+ public String getText() {
+ try {
WordToTextConverter wordToTextConverter = new WordToTextConverter();
wordToTextConverter.processDocument( doc );
return wordToTextConverter.getText();
- }
- catch ( Exception exc )
- {
+ } catch ( Exception exc ) {
// fall-back
StringBuilder text = new StringBuilder();
- for ( String t : getParagraphText() )
- {
+ for ( String t : getParagraphText() ) {
text.append( t );
}
return text.toString();
}
}
+
+ @Override
+ public HWPFOldDocument getDocument() {
+ return doc;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public HWPFOldDocument getFilesystem() {
+ return doc;
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
index b0216a03e3..1cd5d0d654 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
@@ -17,7 +17,6 @@
package org.apache.poi.hwpf.extractor;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -39,8 +38,9 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
*
* @author Nick Burch
*/
-public final class WordExtractor extends POIOLE2TextExtractor {
- private HWPFDocument doc;
+public final class WordExtractor implements POIOLE2TextExtractor {
+ private final HWPFDocument doc;
+ private boolean doCloseFilesystem = true;
/**
* Create a new Word Extractor
@@ -73,30 +73,10 @@ public final class WordExtractor extends POIOLE2TextExtractor {
* The HWPFDocument to extract from
*/
public WordExtractor( HWPFDocument doc ) {
- super( doc );
this.doc = doc;
}
/**
- * Command line extractor, so people will stop moaning that they can't just
- * run this.
- */
- public static void main( String[] args ) throws IOException {
- if ( args.length == 0 ) {
- System.err.println( "Use:" );
- System.err
- .println( " java org.apache.poi.hwpf.extractor.WordExtractor <filename>" );
- System.exit( 1 );
- }
-
- // Process the first argument as a file
- InputStream fin = new FileInputStream( args[0] );
- try (WordExtractor extractor = new WordExtractor(fin)) {
- System.out.println(extractor.getText());
- }
- }
-
- /**
* Get the text from the word file, as an array with one String per
* paragraph
*/
@@ -142,7 +122,7 @@ public final class WordExtractor extends POIOLE2TextExtractor {
return getParagraphText( r );
}
- protected static String[] getParagraphText( Range r ) {
+ static String[] getParagraphText( Range r ) {
String[] ret;
ret = new String[r.numParagraphs()];
for ( int i = 0; i < ret.length; i++ ) {
@@ -287,8 +267,27 @@ public final class WordExtractor extends POIOLE2TextExtractor {
/**
* Removes any fields (eg macros, page markers etc) from the string.
*/
- public static String stripFields( String text )
- {
+ public static String stripFields( String text ) {
return Range.stripFields( text );
}
+
+ @Override
+ public HWPFDocument getDocument() {
+ return doc;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public HWPFDocument getFilesystem() {
+ return doc;
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
index a1db11c170..aaa1cb4be8 100644
--- a/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
@@ -19,12 +19,9 @@ package org.apache.poi.hdgf.extractor;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.PrintStream;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hdgf.HDGFDiagram;
@@ -32,7 +29,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.Test;
public final class TestVisioExtractor {
- private static POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
+ private static final POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
private final String defFilename = "Test_Visio-Some_Random_Text.vsd";
private final int defTextChunks = 5;
@@ -63,7 +60,7 @@ public final class TestVisioExtractor {
is3.close();
HDGFDiagram hdgf3 = new HDGFDiagram(poifs3);
-
+
VisioTextExtractor extractor3 = new VisioTextExtractor(hdgf3);
assertNotNull(extractor3);
assertNotNull(extractor3.getAllText());
@@ -97,7 +94,7 @@ public final class TestVisioExtractor {
@Test
public void testProblemFiles() throws Exception {
String[] files = {
- "44594.vsd", "44594-2.vsd",
+ "44594.vsd", "44594-2.vsd",
"ShortChunk1.vsd", "ShortChunk2.vsd", "ShortChunk3.vsd",
"NegativeChunkLength.vsd", "NegativeChunkLength2.vsd"
};
@@ -108,31 +105,6 @@ public final class TestVisioExtractor {
}
}
- @Test
- public void testMain() throws Exception {
- PrintStream oldOut = System.out;
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream capture = new PrintStream(baos);
- System.setOut(capture);
-
- String path = _dgTests.getFile(defFilename).getPath();
- VisioTextExtractor.main(new String[] {path});
-
- // Put things back
- System.setOut(oldOut);
-
- // Check
- capture.flush();
- String text = baos.toString();
- // YK: stdout can contain lots of other stuff if logging is sent to console
- // ( -Dorg.apache.poi.util.POILogger=org.apache.poi.util.SystemOutLogger)
- assertTrue( text.contains(
- "text\nView\n" +
- "Test View\nI am a test view\n" +
- "Some random text, on a page\n"
- ));
- }
-
private VisioTextExtractor openExtractor(String fileName) throws IOException {
try (InputStream is = _dgTests.openResourceAsStream(fileName)) {
return new VisioTextExtractor(is);
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java
index e38ef007ad..007fff036a 100644
--- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java
@@ -42,7 +42,6 @@ import org.apache.poi.hsmf.datatypes.PropertyValue;
import org.apache.poi.hsmf.datatypes.PropertyValue.LongPropertyValue;
import org.apache.poi.hsmf.datatypes.PropertyValue.TimePropertyValue;
import org.apache.poi.hsmf.dev.HSMFDump;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hsmf.extractor.OutlookTextExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LocaleUtil;
@@ -74,23 +73,23 @@ public final class TestFixedSizedProperties {
fsMessageFails = new POIFSFileSystem(samples.getFile(messageFails));
mapiMessageSucceeds = new MAPIMessage(fsMessageSucceeds);
- mapiMessageFails = new MAPIMessage(fsMessageFails);
-
+ mapiMessageFails = new MAPIMessage(fsMessageFails);
+
messageDateFormat = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss", Locale.ROOT);
- messageDateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
+ messageDateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
userTimeZone = LocaleUtil.getUserTimeZone();
LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC);
}
-
-
+
+
@AfterClass
public static void closeFS() throws Exception {
LocaleUtil.setUserTimeZone(userTimeZone);
fsMessageSucceeds.close();
fsMessageFails.close();
}
-
+
/**
* Check we can find a sensible number of properties on a few
* of our test files
@@ -98,21 +97,21 @@ public final class TestFixedSizedProperties {
@Test
public void testPropertiesFound() {
Map<MAPIProperty,List<PropertyValue>> props;
-
+
props = mapiMessageSucceeds.getMainChunks().getProperties();
assertTrue(props.toString(), props.size() > 10);
-
+
props = mapiMessageFails.getMainChunks().getProperties();
assertTrue(props.toString(), props.size() > 10);
}
-
+
/**
* Check we find properties of a variety of different types
*/
@Test
public void testPropertyValueTypes() {
Chunks mainChunks = mapiMessageSucceeds.getMainChunks();
-
+
// Ask to have the values looked up
Map<MAPIProperty,List<PropertyValue>> props = mainChunks.getProperties();
HashSet<Class<? extends PropertyValue>> seenTypes =
@@ -126,7 +125,7 @@ public final class TestFixedSizedProperties {
assertTrue(seenTypes.toString(), seenTypes.contains(LongPropertyValue.class));
assertTrue(seenTypes.toString(), seenTypes.contains(TimePropertyValue.class));
assertFalse(seenTypes.toString(), seenTypes.contains(ChunkBasedPropertyValue.class));
-
+
// Ask for the raw values
seenTypes.clear();
for (PropertyValue pv : mainChunks.getRawProperties().values()) {
@@ -144,31 +143,21 @@ public final class TestFixedSizedProperties {
@Test
public void testReadMessageDateSucceedsWithOutlookTextExtractor() throws Exception {
OutlookTextExtractor ext = new OutlookTextExtractor(mapiMessageSucceeds);
- ext.setFilesystem(null); // Don't close re-used test resources here
-
+ ext.setCloseFilesystem(false);
+
String text = ext.getText();
assertContains(text, "Date: Fri, 22 Jun 2012 18:32:54 +0000\n");
ext.close();
}
- @Test
- public void testReadMessageDateSucceedsWithOutlookTextExtactor() throws Exception {
- OutlookTextExtactor ext = new OutlookTextExtactor(mapiMessageSucceeds);
- ext.setFilesystem(null); // Don't close re-used test resources here
-
- String text = ext.getText();
- assertContains(text, "Date: Fri, 22 Jun 2012 18:32:54 +0000\n");
- ext.close();
- }
-
/**
* Test to see if we can read the Date Chunk with OutlookTextExtractor.
*/
@Test
public void testReadMessageDateFailsWithOutlookTextExtractor() throws Exception {
OutlookTextExtractor ext = new OutlookTextExtractor(mapiMessageFails);
- ext.setFilesystem(null); // Don't close re-used test resources here
-
+ ext.setCloseFilesystem(false);
+
String text = ext.getText();
assertContains(text, "Date: Thu, 21 Jun 2012 14:14:04 +0000\n");
ext.close();
@@ -182,7 +171,7 @@ public final class TestFixedSizedProperties {
PrintStream stream = new PrintStream(new ByteArrayOutputStream());
HSMFDump dump = new HSMFDump(fsMessageSucceeds);
dump.dump(stream);
- }
+ }
/**
* Test to see if we can read the Date Chunk with HSMFDump.
@@ -202,19 +191,19 @@ public final class TestFixedSizedProperties {
// Check via the message date
Calendar clientSubmitTime = mapiMessageSucceeds.getMessageDate();
assertEquals(
- "Fri, 22 Jun 2012 18:32:54",
+ "Fri, 22 Jun 2012 18:32:54",
messageDateFormat.format(clientSubmitTime.getTime()));
-
+
// Fetch the property value directly
Map<MAPIProperty,List<PropertyValue>> props =
mapiMessageSucceeds.getMainChunks().getProperties();
- List<PropertyValue> pv = props.get(MAPIProperty.CLIENT_SUBMIT_TIME);
+ List<PropertyValue> pv = props.get(MAPIProperty.CLIENT_SUBMIT_TIME);
assertNotNull(pv);
assertEquals(1, pv.size());
-
+
clientSubmitTime = (Calendar)pv.get(0).getValue();
assertEquals(
- "Fri, 22 Jun 2012 18:32:54",
+ "Fri, 22 Jun 2012 18:32:54",
messageDateFormat.format(clientSubmitTime.getTime()));
}
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
index 4d8bfb693f..2767228501 100644
--- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
@@ -20,7 +20,6 @@ package org.apache.poi.hsmf.extractor;
import static org.apache.poi.POITestCase.assertContains;
import static org.apache.poi.POITestCase.assertNotContained;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
@@ -57,68 +56,62 @@ public final class TestOutlookTextExtractor {
@Test
public void testQuick() throws Exception {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("quick.msg"), true);
- MAPIMessage msg = new MAPIMessage(poifs);
-
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
- String text = ext.getText();
-
- assertContains(text, "From: Kevin Roast\n");
- assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
- assertNotContained(text, "CC:");
- assertNotContained(text, "BCC:");
- assertNotContained(text, "Attachment:");
- assertContains(text, "Subject: Test the content transformer\n");
- Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55);
- SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT);
- f.setTimeZone(LocaleUtil.getUserTimeZone());
- String dateText = f.format(cal.getTime());
- assertContains(text, "Date: " + dateText + "\n");
- assertContains(text, "The quick brown fox jumps over the lazy dog");
-
- ext.close();
- poifs.close();
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("quick.msg"), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+ String text = ext.getText();
+
+ assertContains(text, "From: Kevin Roast\n");
+ assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
+ assertNotContained(text, "CC:");
+ assertNotContained(text, "BCC:");
+ assertNotContained(text, "Attachment:");
+ assertContains(text, "Subject: Test the content transformer\n");
+ Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55);
+ SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT);
+ f.setTimeZone(LocaleUtil.getUserTimeZone());
+ String dateText = f.format(cal.getTime());
+ assertContains(text, "Date: " + dateText + "\n");
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
}
@Test
public void testSimple() throws Exception {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true);
- MAPIMessage msg = new MAPIMessage(poifs);
-
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
- String text = ext.getText();
-
- assertContains(text, "From: Travis Ferguson\n");
- assertContains(text, "To: travis@overwrittenstack.com\n");
- assertNotContained(text, "CC:");
- assertNotContained(text, "BCC:");
- assertContains(text, "Subject: test message\n");
- assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n");
- assertContains(text, "This is a test message.");
-
- ext.close();
- poifs.close();
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+ String text = ext.getText();
+
+ assertContains(text, "From: Travis Ferguson\n");
+ assertContains(text, "To: travis@overwrittenstack.com\n");
+ assertNotContained(text, "CC:");
+ assertNotContained(text, "BCC:");
+ assertContains(text, "Subject: test message\n");
+ assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n");
+ assertContains(text, "This is a test message.");
+ }
}
@Test
public void testConstructors() throws Exception {
- FileInputStream fis = new FileInputStream(samples.getFile("simple_test_msg.msg"));
- OutlookTextExtractor ext = new OutlookTextExtractor(fis);
- String inp = ext.getText();
- ext.close();
- fis.close();
-
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true);
- ext = new OutlookTextExtractor(poifs);
- String poifsTxt = ext.getText();
- ext.close();
- poifs.close();
-
- fis = new FileInputStream(samples.getFile("simple_test_msg.msg"));
- ext = new OutlookTextExtractor(new MAPIMessage(fis));
- String mapi = ext.getText();
- ext.close();
- fis.close();
+ String inp;
+ try (FileInputStream fis = new FileInputStream(samples.getFile("simple_test_msg.msg"));
+ OutlookTextExtractor ext = new OutlookTextExtractor(fis)) {
+ inp = ext.getText();
+ }
+
+ String poifsTxt;
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("simple_test_msg.msg"), true);
+ OutlookTextExtractor ext = new OutlookTextExtractor(poifs)){
+ poifsTxt = ext.getText();
+ }
+
+ String mapi;
+ try (FileInputStream fis = new FileInputStream(samples.getFile("simple_test_msg.msg"));
+ OutlookTextExtractor ext = new OutlookTextExtractor(new MAPIMessage(fis))) {
+ mapi = ext.getText();
+ }
assertEquals(inp, poifsTxt);
assertEquals(inp, mapi);
@@ -142,25 +135,22 @@ public final class TestOutlookTextExtractor {
"example_sent_regular.msg", "example_sent_unicode.msg"
};
for (String file : files) {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true);
- MAPIMessage msg = new MAPIMessage(poifs);
-
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
- String text = ext.getText();
-
- assertContains(text, "From: Mike Farman\n");
- assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
- "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
- assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " +
- "'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
- assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " +
- "'Vonka Jan' <jan.vonka@alfresco.com>\n");
- assertContains(text, "Subject: This is a test message please ignore\n");
- assertContains(text, "Date:");
- assertContains(text, "The quick brown fox jumps over the lazy dog");
-
- ext.close();
- poifs.close();
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+ String text = ext.getText();
+
+ assertContains(text, "From: Mike Farman\n");
+ assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
+ "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
+ assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " +
+ "'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
+ assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " +
+ "'Vonka Jan' <jan.vonka@alfresco.com>\n");
+ assertContains(text, "Subject: This is a test message please ignore\n");
+ assertContains(text, "Date:");
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
}
}
@@ -182,25 +172,21 @@ public final class TestOutlookTextExtractor {
"example_received_regular.msg", "example_received_unicode.msg"
};
for (String file : files) {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true);
- MAPIMessage msg = new MAPIMessage(poifs);
-
-
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
- String text = ext.getText();
-
- assertContains(text, "From: Mike Farman\n");
- assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
- "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
- assertContains(text, "CC: nickb@alfresco.com; " +
- "nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
- assertNotContained(text, "BCC:");
- assertContains(text, "Subject: This is a test message please ignore\n");
- assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly
- assertContains(text, "The quick brown fox jumps over the lazy dog");
-
- ext.close();
- poifs.close();
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile(file), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+ String text = ext.getText();
+
+ assertContains(text, "From: Mike Farman\n");
+ assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
+ "'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
+ assertContains(text, "CC: nickb@alfresco.com; " +
+ "nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
+ assertNotContained(text, "BCC:");
+ assertContains(text, "Subject: This is a test message please ignore\n");
+ assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly
+ assertContains(text, "The quick brown fox jumps over the lazy dog");
+ }
}
}
@@ -210,85 +196,59 @@ public final class TestOutlookTextExtractor {
@SuppressWarnings("JavadocReference")
@Test
public void testWithAttachments() throws Exception {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true);
- MAPIMessage msg = new MAPIMessage(poifs);
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
-
- // Check the normal bits
- String text = ext.getText();
-
- assertContains(text, "From: Nicolas1");
- assertContains(text, "To: 'nicolas1.23456@free.fr'");
- assertNotContained(text, "CC:");
- assertNotContained(text, "BCC:");
- assertContains(text, "Subject: test");
- assertContains(text, "Date: Wed, 22 Apr");
- assertContains(text, "Attachment: test-unicode.doc\n");
- assertContains(text, "Attachment: pj1.txt\n");
- assertContains(text, "contenu");
-
- // Embeded bits are checked in
- // TestExtractorFactory
-
- ext.close();
- poifs.close();
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+
+ // Check the normal bits
+ String text = ext.getText();
+
+ assertContains(text, "From: Nicolas1");
+ assertContains(text, "To: 'nicolas1.23456@free.fr'");
+ assertNotContained(text, "CC:");
+ assertNotContained(text, "BCC:");
+ assertContains(text, "Subject: test");
+ assertContains(text, "Date: Wed, 22 Apr");
+ assertContains(text, "Attachment: test-unicode.doc\n");
+ assertContains(text, "Attachment: pj1.txt\n");
+ assertContains(text, "contenu");
+
+ // Embeded bits are checked in
+ // TestExtractorFactory
+ }
}
@Test
public void testWithAttachedMessage() throws Exception {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true);
- MAPIMessage msg = new MAPIMessage(poifs);
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
- String text = ext.getText();
-
- // Check we got bits from the main message
- assertContains(text, "Master mail");
- assertContains(text, "ante in lacinia euismod");
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+ String text = ext.getText();
- // But not the attached message
- assertNotContained(text, "Test mail attachment");
- assertNotContained(text, "Lorem ipsum dolor sit");
+ // Check we got bits from the main message
+ assertContains(text, "Master mail");
+ assertContains(text, "ante in lacinia euismod");
- ext.close();
- poifs.close();
+ // But not the attached message
+ assertNotContained(text, "Test mail attachment");
+ assertNotContained(text, "Lorem ipsum dolor sit");
+ }
}
@Test
public void testEncodings() throws Exception {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true);
- MAPIMessage msg = new MAPIMessage(poifs);
- OutlookTextExtractor ext = new OutlookTextExtractor(msg);
- String text = ext.getText();
-
- // Check the english bits
- assertContains(text, "From: Tests Chang@FT");
- assertContains(text, "tests.chang@fengttt.com");
-
- // And check some chinese bits
- assertContains(text, "(\u5f35\u6bd3\u502b)");
- assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )");
+ try (POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true);
+ MAPIMessage msg = new MAPIMessage(poifs);
+ OutlookTextExtractor ext = new OutlookTextExtractor(msg)) {
+ String text = ext.getText();
- ext.close();
- poifs.close();
- }
+ // Check the english bits
+ assertContains(text, "From: Tests Chang@FT");
+ assertContains(text, "tests.chang@fengttt.com");
- @Test
- public void testEncodingsDeprecatedClass() throws Exception {
- POIFSFileSystem poifs = new POIFSFileSystem(samples.getFile("chinese-traditional.msg"), true);
- MAPIMessage msg = new MAPIMessage(poifs);
- OutlookTextExtactor ext = new OutlookTextExtactor(msg);
- assertTrue("OutlookTextExtactor instanceof OutlookTextExtractor", ext instanceof OutlookTextExtractor);
- String text = ext.getText();
-
- // Check the english bits
- assertContains(text, "From: Tests Chang@FT");
- assertContains(text, "tests.chang@fengttt.com");
-
- // And check some chinese bits
- assertContains(text, "(\u5f35\u6bd3\u502b)");
- assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )");
-
- ext.close();
- poifs.close();
+ // And check some chinese bits
+ assertContains(text, "(\u5f35\u6bd3\u502b)");
+ assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )");
+ }
}
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
index c605130a67..1962f2facf 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
@@ -17,16 +17,16 @@
package org.apache.poi.hwpf.extractor;
-import org.apache.poi.POIDataSamples;
-import org.apache.poi.extractor.POITextExtractor;
-import org.apache.poi.extractor.OLE2ExtractorFactory;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.junit.Test;
+import static org.junit.Assert.assertNotNull;
import java.io.IOException;
import java.io.InputStream;
-import static org.junit.Assert.assertNotNull;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.junit.Test;
/**
* Tests for bugs with the WordExtractor
@@ -61,7 +61,7 @@ public final class TestWordExtractorBugs {
@Test
public void testBug60374() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(SAMPLES.openResourceAsStream("cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC"));
- final POITextExtractor extractor = OLE2ExtractorFactory.createExtractor(fs);
+ final POITextExtractor extractor = ExtractorFactory.createExtractor(fs);
// Check it gives text without error
assertNotNull(extractor.getText());