aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/java/org/apache/poi/POIDocument.java20
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.docbin0 -> 424448 bytes
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java45
3 files changed, 62 insertions, 3 deletions
diff --git a/src/java/org/apache/poi/POIDocument.java b/src/java/org/apache/poi/POIDocument.java
index ece7a3f13d..8d91c06e79 100644
--- a/src/java/org/apache/poi/POIDocument.java
+++ b/src/java/org/apache/poi/POIDocument.java
@@ -67,14 +67,28 @@ public abstract class POIDocument {
/**
* Find, and create objects for, the standard
- * Documment Information Properties (HPSF)
+ * Documment Information Properties (HPSF).
+ * If a given property set is missing or corrupt,
+ * it will remain null;
*/
protected void readProperties() {
+ PropertySet ps;
+
// DocumentSummaryInformation
- dsInf = (DocumentSummaryInformation)getPropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME);
+ ps = getPropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME);
+ if(ps != null && ps instanceof DocumentSummaryInformation) {
+ dsInf = (DocumentSummaryInformation)ps;
+ } else if(ps != null) {
+ logger.log(POILogger.WARN, "DocumentSummaryInformation property set came back with wrong class - ", ps.getClass());
+ }
// SummaryInformation
- sInf = (SummaryInformation)getPropertySet(SummaryInformation.DEFAULT_STREAM_NAME);
+ ps = getPropertySet(SummaryInformation.DEFAULT_STREAM_NAME);
+ if(ps instanceof SummaryInformation) {
+ sInf = (SummaryInformation)ps;
+ } else if(ps != null) {
+ logger.log(POILogger.WARN, "SummaryInformation property set came back with wrong class - ", ps.getClass());
+ }
}
/**
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.doc
new file mode 100644
index 0000000000..b980089439
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.doc
Binary files differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
new file mode 100644
index 0000000000..b87f586d36
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.poi.hwpf.extractor;
+
+import java.io.FileInputStream;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for bugs with the WordExtractor
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestWordExtractorBugs extends TestCase {
+ private String dirname;
+ protected void setUp() throws Exception {
+ dirname = System.getProperty("HWPF.testdata.path");
+ }
+
+ public void testProblemMetadata() throws Exception {
+ String filename = dirname + "/ProblemExtracting.doc";
+ WordExtractor extractor =
+ new WordExtractor(new FileInputStream(filename));
+
+ // Check it gives text without error
+ extractor.getText();
+ extractor.getParagraphText();
+ extractor.getTextFromPieces();
+ }
+
+}