From: Maxim Valyanskiy Date: Mon, 22 Aug 2011 07:56:43 +0000 (+0000) Subject: bug#51686 - ConcurrentModificationException in Tika's OfficeParser X-Git-Tag: REL_3_8_BETA4~1^2~17 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=879fc2dc1cd4bb4cf72c842663ac361527e53c9c;p=poi.git bug#51686 - ConcurrentModificationException in Tika's OfficeParser git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1160137 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java index 5301084a8e..d67588dc08 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java @@ -141,41 +141,32 @@ public abstract class HWPFDocumentCore extends POIDocument * @throws IOException If there is an unexpected IOException from the passed * in POIFSFileSystem. */ - public HWPFDocumentCore(DirectoryNode directory) throws IOException - { + public HWPFDocumentCore(DirectoryNode directory) throws IOException { // Sort out the hpsf properties - super(directory); + super(directory); // read in the main stream. DocumentEntry documentProps = (DocumentEntry) - directory.getEntry("WordDocument"); + directory.getEntry("WordDocument"); _mainStream = new byte[documentProps.getSize()]; directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream); // Create our FIB, and check for the doc being encrypted _fib = new FileInformationBlock(_mainStream); - if(_fib.isFEncrypted()) { - throw new EncryptedDocumentException("Cannot process encrypted word files!"); + if (_fib.isFEncrypted()) { + throw new EncryptedDocumentException("Cannot process encrypted word files!"); } - { - DirectoryEntry objectPoolEntry; - try - { - objectPoolEntry = (DirectoryEntry) directory - .getEntry( STREAM_OBJECT_POOL ); - } - catch ( FileNotFoundException exc ) - { - objectPoolEntry = directory - .createDirectory( STREAM_OBJECT_POOL ); - } - _objectPool = new ObjectPoolImpl( objectPoolEntry ); - } + try { + DirectoryEntry objectPoolEntry = (DirectoryEntry) directory + .getEntry(STREAM_OBJECT_POOL); + _objectPool = new ObjectPoolImpl(objectPoolEntry); + } catch (FileNotFoundException exc) { } + } - /** + /** * Returns the range which covers the whole of the document, but excludes * any headers and footers. */ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java index 48c20dfbe3..b4f81f2bb1 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java @@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFTestDataSamples; import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import java.io.IOException; +import java.io.InputStream; + /** * Test the different routes to extracting text * @@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase { assertEquals(p_text1_block, extractor.getText()); } } + + public void testRootEntiesNavigation() throws IOException { + InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc"); + + POIFSFileSystem fs = new POIFSFileSystem(is); + + String text = null; + + for (Entry entry : fs.getRoot()) { + if ("WordDocument".equals(entry.getName())) { + WordExtractor ex = new WordExtractor(fs); + text = ex.getText(); + } + } + + assertNotNull(text); + } } diff --git a/test-data/document/testWORD.doc b/test-data/document/testWORD.doc new file mode 100644 index 0000000000..c1f4f3d0b0 Binary files /dev/null and b/test-data/document/testWORD.doc differ