bug#51686 - ConcurrentModificationException in Tika's OfficeParser

author Maxim Valyanskiy <maxcom@apache.org>

Mon, 22 Aug 2011 07:56:43 +0000 (07:56 +0000)

committer Maxim Valyanskiy <maxcom@apache.org>

Mon, 22 Aug 2011 07:56:43 +0000 (07:56 +0000)
author Maxim Valyanskiy <maxcom@apache.org>
Mon, 22 Aug 2011 07:56:43 +0000 (07:56 +0000)
committer Maxim Valyanskiy <maxcom@apache.org>
Mon, 22 Aug 2011 07:56:43 +0000 (07:56 +0000)
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java

index 5301084a8ea42d028b666c80b5112d087adb0c9d..d67588dc080149067fb107d1d7f82f56db3d6773 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
@@ -141,41 +141,32 @@ public abstract class HWPFDocumentCore extends POIDocument
     * @throws IOException If there is an unexpected IOException from the passed
     *         in POIFSFileSystem.
     */
-  public HWPFDocumentCore(DirectoryNode directory) throws IOException
-  {
+  public HWPFDocumentCore(DirectoryNode directory) throws IOException {
      // Sort out the hpsf properties
-       super(directory);
+    super(directory);
  
      // read in the main stream.
      DocumentEntry documentProps = (DocumentEntry)
-       directory.getEntry("WordDocument");
+            directory.getEntry("WordDocument");
      _mainStream = new byte[documentProps.getSize()];
  
      directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream);
  
      // Create our FIB, and check for the doc being encrypted
      _fib = new FileInformationBlock(_mainStream);
-    if(_fib.isFEncrypted()) {
-       throw new EncryptedDocumentException("Cannot process encrypted word files!");
+    if (_fib.isFEncrypted()) {
+      throw new EncryptedDocumentException("Cannot process encrypted word files!");
      }
  
-        {
-            DirectoryEntry objectPoolEntry;
-            try
-            {
-                objectPoolEntry = (DirectoryEntry) directory
-                        .getEntry( STREAM_OBJECT_POOL );
-            }
-            catch ( FileNotFoundException exc )
-            {
-                objectPoolEntry = directory
-                        .createDirectory( STREAM_OBJECT_POOL );
-            }
-            _objectPool = new ObjectPoolImpl( objectPoolEntry );
-        }
+    try {
+      DirectoryEntry objectPoolEntry = (DirectoryEntry) directory
+              .getEntry(STREAM_OBJECT_POOL);
+      _objectPool = new ObjectPoolImpl(objectPoolEntry);
+    } catch (FileNotFoundException exc) {
      }
+  }
  
-    /**
+  /**
       * Returns the range which covers the whole of the document, but excludes
       * any headers and footers.
       */
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java

index 48c20dfbe39983ec9454661bd3d4f58828993071..b4f81f2bb15bec683712d6cfe83814bb4848ba8e 100644 (file)
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
@@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument;
  import org.apache.poi.hwpf.HWPFTestDataSamples;
  import org.apache.poi.hwpf.OldWordFileFormatException;
  import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
  import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
  import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  
+import java.io.IOException;
+import java.io.InputStream;
+
  /**
   * Test the different routes to extracting text
   *
@@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase {
            assertEquals(p_text1_block, extractor.getText());
         }
      }
+
+    public void testRootEntiesNavigation() throws IOException {
+        InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc");
+
+        POIFSFileSystem fs = new POIFSFileSystem(is);
+
+        String text = null;
+
+        for (Entry entry : fs.getRoot()) {
+            if ("WordDocument".equals(entry.getName())) {
+                WordExtractor ex = new WordExtractor(fs);
+                text = ex.getText();
+            }
+        }
+
+        assertNotNull(text);
+    }
  }
diff --git a/test-data/document/testWORD.doc b/test-data/document/testWORD.doc

new file mode 100644 (file)

index 0000000..c1f4f3d

Binary files /dev/null and b/test-data/document/testWORD.doc differ
author	Maxim Valyanskiy <maxcom@apache.org>
	Mon, 22 Aug 2011 07:56:43 +0000 (07:56 +0000)
committer	Maxim Valyanskiy <maxcom@apache.org>
	Mon, 22 Aug 2011 07:56:43 +0000 (07:56 +0000)
src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java		patch \| blob \| history
test-data/document/testWORD.doc	[new file with mode: 0644]	patch \| blob