]> source.dussan.org Git - poi.git/commitdiff
Fix name of new OLE2ScratchpadExtractorFactory
authorDominik Stadler <centic@apache.org>
Tue, 12 Jul 2016 15:40:18 +0000 (15:40 +0000)
committerDominik Stadler <centic@apache.org>
Tue, 12 Jul 2016 15:40:18 +0000 (15:40 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1752304 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java [deleted file]
src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java [new file with mode: 0644]

index d1577898f1ca4354fc52c65c11497daf0f5b5b0d..737e9e35102c450695afe85288b0ea1a94f0ad5c 100644 (file)
@@ -154,7 +154,7 @@ public class OLE2ExtractorFactory {
     private static Class<?> getScratchpadClass() {
         try {
             return OLE2ExtractorFactory.class.getClassLoader().loadClass(
-                    "org.apache.poi.extractor.OLE2ScrachpadExtractorFactory"
+                    "org.apache.poi.extractor.OLE2ScratchpadExtractorFactory"
             );
         } catch (ClassNotFoundException e) {
             LOGGER.log(POILogger.ERROR, "POI Scratchpad jar missing");
diff --git a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java
deleted file mode 100644 (file)
index 6f84282..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.ByteArrayInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hsmf.MAPIMessage;
-import org.apache.poi.hsmf.datatypes.AttachmentChunks;
-import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
-import org.apache.poi.hwpf.OldWordFileFormatException;
-import org.apache.poi.hwpf.extractor.Word6Extractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.Entry;
-
-/**
- * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
- *  {@link ExtractorFactory}, which permit the other two to run with
- *  no Scratchpad jar (though without functionality!)
- * <p>Note - should not be used standalone, always use via the other
- *  two classes</p>
- */
-@SuppressWarnings("WeakerAccess")
-public class OLE2ScrachpadExtractorFactory {
-    /**
-     * Look for certain entries in the stream, to figure it
-     * out what format is desired
-     * Note - doesn't check for core-supported formats!
-     * Note - doesn't check for OOXML-supported formats
-     */
-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
-        if (poifsDir.hasEntry("WordDocument")) {
-            // Old or new style word document?
-            try {
-                return new WordExtractor(poifsDir);
-            } catch (OldWordFileFormatException e) {
-                return new Word6Extractor(poifsDir);
-            }
-        }
-
-        if (poifsDir.hasEntry("PowerPoint Document")) {
-            return new PowerPointExtractor(poifsDir);
-        }
-
-        if (poifsDir.hasEntry("VisioDocument")) {
-            return new VisioTextExtractor(poifsDir);
-        }
-
-        if (poifsDir.hasEntry("Quill")) {
-            return new PublisherTextExtractor(poifsDir);
-        }
-
-        final String[] outlookEntryNames = new String[] {
-                // message bodies, saved as plain text (PtypString)
-                // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
-                // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
-                // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
-                // @see org.apache.poi.hsmf.Types.MAPIType
-                "__substg1.0_1000001E", //PidTagBody ASCII
-                "__substg1.0_1000001F", //PidTagBody Unicode
-                "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
-                "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
-                "__substg1.0_0037001E", //PidTagSubject ASCII
-                "__substg1.0_0037001F", //PidTagSubject Unicode
-        };
-        for (String entryName : outlookEntryNames) {
-            if (poifsDir.hasEntry(entryName)) {
-                return new OutlookTextExtactor(poifsDir);
-            }
-        }
-
-        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
-    }
-
-       /**
-        * Returns an array of text extractors, one for each of
-        *  the embedded documents in the file (if there are any).
-        * If there are no embedded documents, you'll get back an
-        *  empty array. Otherwise, you'll get one open
-        *  {@link POITextExtractor} for each embedded file.
-        */
-       public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
-      // Find all the embedded directories
-               DirectoryEntry root = ext.getRoot();
-               if(root == null) {
-                       throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
-               }
-
-               if(ext instanceof WordExtractor) {
-                   // These are in ObjectPool -> _... under the root
-                   try {
-                       DirectoryEntry op = (DirectoryEntry)
-                               root.getEntry("ObjectPool");
-                       Iterator<Entry> it = op.getEntries();
-                       while(it.hasNext()) {
-                           Entry entry = it.next();
-                           if(entry.getName().startsWith("_")) {
-                               dirs.add(entry);
-                           }
-                       }
-                   } catch(FileNotFoundException e) {
-                       // ignored here
-                   }
-                   //} else if(ext instanceof PowerPointExtractor) {
-                   // Tricky, not stored directly in poifs
-                   // TODO
-               } else if(ext instanceof OutlookTextExtactor) {
-                   // Stored in the Attachment blocks
-                   MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
-                   for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
-                       if(attachment.attachData != null) {
-                           byte[] data = attachment.attachData.getValue();
-                           nonPOIFS.add( new ByteArrayInputStream(data) );
-                       } else if(attachment.attachmentDirectory != null) {
-                           dirs.add(attachment.attachmentDirectory.getDirectory());
-                       }
-                   }
-               }
-       }
-}
diff --git a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java
new file mode 100644 (file)
index 0000000..07ae641
--- /dev/null
@@ -0,0 +1,145 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+
+/**
+ * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
+ *  {@link ExtractorFactory}, which permit the other two to run with
+ *  no Scratchpad jar (though without functionality!)
+ * <p>Note - should not be used standalone, always use via the other
+ *  two classes</p>
+ */
+@SuppressWarnings("WeakerAccess")
+public class OLE2ScratchpadExtractorFactory {
+    /**
+     * Look for certain entries in the stream, to figure it
+     * out what format is desired
+     * Note - doesn't check for core-supported formats!
+     * Note - doesn't check for OOXML-supported formats
+     */
+    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
+        if (poifsDir.hasEntry("WordDocument")) {
+            // Old or new style word document?
+            try {
+                return new WordExtractor(poifsDir);
+            } catch (OldWordFileFormatException e) {
+                return new Word6Extractor(poifsDir);
+            }
+        }
+
+        if (poifsDir.hasEntry("PowerPoint Document")) {
+            return new PowerPointExtractor(poifsDir);
+        }
+
+        if (poifsDir.hasEntry("VisioDocument")) {
+            return new VisioTextExtractor(poifsDir);
+        }
+
+        if (poifsDir.hasEntry("Quill")) {
+            return new PublisherTextExtractor(poifsDir);
+        }
+
+        final String[] outlookEntryNames = new String[] {
+                // message bodies, saved as plain text (PtypString)
+                // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
+                // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
+                // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
+                // @see org.apache.poi.hsmf.Types.MAPIType
+                "__substg1.0_1000001E", //PidTagBody ASCII
+                "__substg1.0_1000001F", //PidTagBody Unicode
+                "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
+                "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
+                "__substg1.0_0037001E", //PidTagSubject ASCII
+                "__substg1.0_0037001F", //PidTagSubject Unicode
+        };
+        for (String entryName : outlookEntryNames) {
+            if (poifsDir.hasEntry(entryName)) {
+                return new OutlookTextExtactor(poifsDir);
+            }
+        }
+
+        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+    }
+
+       /**
+        * Returns an array of text extractors, one for each of
+        *  the embedded documents in the file (if there are any).
+        * If there are no embedded documents, you'll get back an
+        *  empty array. Otherwise, you'll get one open
+        *  {@link POITextExtractor} for each embedded file.
+        */
+       public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+      // Find all the embedded directories
+               DirectoryEntry root = ext.getRoot();
+               if(root == null) {
+                       throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+               }
+
+               if(ext instanceof WordExtractor) {
+                   // These are in ObjectPool -> _... under the root
+                   try {
+                       DirectoryEntry op = (DirectoryEntry)
+                               root.getEntry("ObjectPool");
+                       Iterator<Entry> it = op.getEntries();
+                       while(it.hasNext()) {
+                           Entry entry = it.next();
+                           if(entry.getName().startsWith("_")) {
+                               dirs.add(entry);
+                           }
+                       }
+                   } catch(FileNotFoundException e) {
+                       // ignored here
+                   }
+                   //} else if(ext instanceof PowerPointExtractor) {
+                   // Tricky, not stored directly in poifs
+                   // TODO
+               } else if(ext instanceof OutlookTextExtactor) {
+                   // Stored in the Attachment blocks
+                   MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+                   for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
+                       if(attachment.attachData != null) {
+                           byte[] data = attachment.attachData.getValue();
+                           nonPOIFS.add( new ByteArrayInputStream(data) );
+                       } else if(attachment.attachmentDirectory != null) {
+                           dirs.add(attachment.attachmentDirectory.getDirectory());
+                       }
+                   }
+               }
+       }
+}