|
|
@@ -18,12 +18,21 @@ package org.apache.poi.stress; |
|
|
|
|
|
|
|
import static org.junit.Assert.assertNotNull; |
|
|
|
|
|
|
|
import java.io.ByteArrayInputStream; |
|
|
|
import java.io.ByteArrayOutputStream; |
|
|
|
import java.io.File; |
|
|
|
import java.io.FileInputStream; |
|
|
|
import java.io.IOException; |
|
|
|
import java.io.InputStream; |
|
|
|
import java.io.PrintWriter; |
|
|
|
import java.io.StringWriter; |
|
|
|
|
|
|
|
import org.apache.poi.hdf.extractor.WordDocument; |
|
|
|
import org.apache.poi.hwpf.HWPFDocument; |
|
|
|
import org.apache.poi.hwpf.extractor.WordExtractor; |
|
|
|
import org.junit.Test; |
|
|
|
|
|
|
|
@SuppressWarnings("deprecation") |
|
|
|
public class HWPFFileHandler extends POIFSFileHandler { |
|
|
|
@Override |
|
|
|
public void handleFile(InputStream stream) throws Exception { |
|
|
@@ -33,16 +42,53 @@ public class HWPFFileHandler extends POIFSFileHandler { |
|
|
|
assertNotNull(doc.getEndnotes()); |
|
|
|
|
|
|
|
handlePOIDocument(doc); |
|
|
|
|
|
|
|
// fails for many documents, but is deprecated anyway... |
|
|
|
// handleWordDocument(doc); |
|
|
|
} |
|
|
|
|
|
|
|
protected void handleWordDocument(HWPFDocument doc) throws IOException { |
|
|
|
ByteArrayOutputStream outStream = new ByteArrayOutputStream(); |
|
|
|
doc.write(outStream); |
|
|
|
|
|
|
|
WordDocument wordDoc = new WordDocument(new ByteArrayInputStream(outStream.toByteArray())); |
|
|
|
|
|
|
|
StringWriter docTextWriter = new StringWriter(); |
|
|
|
PrintWriter out = new PrintWriter(docTextWriter); |
|
|
|
try { |
|
|
|
wordDoc.writeAllText(out); |
|
|
|
} finally { |
|
|
|
out.close(); |
|
|
|
} |
|
|
|
docTextWriter.close(); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// a test-case to test this locally without executing the full TestAllFiles |
|
|
|
@Test |
|
|
|
public void test() throws Exception { |
|
|
|
InputStream stream = new FileInputStream("test-data/document/HeaderFooterUnicode.doc"); |
|
|
|
File file = new File("test-data/document/47304.doc"); |
|
|
|
|
|
|
|
InputStream stream = new FileInputStream(file); |
|
|
|
try { |
|
|
|
handleFile(stream); |
|
|
|
} finally { |
|
|
|
stream.close(); |
|
|
|
} |
|
|
|
|
|
|
|
handleExtracting(file); |
|
|
|
|
|
|
|
stream = new FileInputStream(file); |
|
|
|
try { |
|
|
|
WordExtractor extractor = new WordExtractor(stream); |
|
|
|
try { |
|
|
|
assertNotNull(extractor.getText()); |
|
|
|
} finally { |
|
|
|
extractor.close(); |
|
|
|
} |
|
|
|
} finally { |
|
|
|
stream.close(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |