]> source.dussan.org Git - poi.git/commitdiff
whitespace
authorJaven O'Neal <onealj@apache.org>
Sun, 17 Jul 2016 08:26:51 +0000 (08:26 +0000)
committerJaven O'Neal <onealj@apache.org>
Sun, 17 Jul 2016 08:26:51 +0000 (08:26 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1753028 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java

index 05cad8af93c3925fe1fc5bb17db668632db3ee75..6cffd39fdf9ee32beaa0a7762915540f62d53492 100644 (file)
@@ -72,180 +72,180 @@ import org.apache.xmlbeans.XmlException;
  */
 @SuppressWarnings("WeakerAccess")
 public class ExtractorFactory {
-       public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
-       protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
-       protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
-
-   /**
-    * Should this thread prefer event based over usermodel based extractors?
-    * (usermodel extractors tend to be more accurate, but use more memory)
-    * Default is false.
-    */
-       public static boolean getThreadPrefersEventExtractors() {
-          return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
-       }
-
-   /**
-    * Should all threads prefer event based over usermodel based extractors?
-    * (usermodel extractors tend to be more accurate, but use more memory)
-    * Default is to use the thread level setting, which defaults to false.
-    */
-       public static Boolean getAllThreadsPreferEventExtractors() {
-          return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
-       }
-
-   /**
-    * Should this thread prefer event based over usermodel based extractors?
-    * Will only be used if the All Threads setting is null.
-    */
-   public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
-       OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
-   }
-
-   /**
-    * Should all threads prefer event based over usermodel based extractors?
-    * If set, will take preference over the Thread level setting.
-    */
-   public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
-       OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
-   }
-
-   /**
-    * Should this thread use event based extractors is available?
-    * Checks the all-threads one first, then thread specific.
-    */
-   protected static boolean getPreferEventExtractor() {
-       return OLE2ExtractorFactory.getPreferEventExtractor();
-   }
-
-       public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-           NPOIFSFileSystem fs = null;
+    public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
+    protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
+    protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
+
+    /**
+     * Should this thread prefer event based over usermodel based extractors?
+     * (usermodel extractors tend to be more accurate, but use more memory)
+     * Default is false.
+     */
+    public static boolean getThreadPrefersEventExtractors() {
+        return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
+    }
+
+    /**
+     * Should all threads prefer event based over usermodel based extractors?
+     * (usermodel extractors tend to be more accurate, but use more memory)
+     * Default is to use the thread level setting, which defaults to false.
+     */
+    public static Boolean getAllThreadsPreferEventExtractors() {
+        return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
+    }
+
+    /**
+     * Should this thread prefer event based over usermodel based extractors?
+     * Will only be used if the All Threads setting is null.
+     */
+    public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
+         OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
+    }
+
+    /**
+     * Should all threads prefer event based over usermodel based extractors?
+     * If set, will take preference over the Thread level setting.
+     */
+    public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
+         OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
+    }
+
+    /**
+     * Should this thread use event based extractors is available?
+     * Checks the all-threads one first, then thread specific.
+     */
+    protected static boolean getPreferEventExtractor() {
+         return OLE2ExtractorFactory.getPreferEventExtractor();
+    }
+
+    public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+        NPOIFSFileSystem fs = null;
         try {
             fs = new NPOIFSFileSystem(f);
             POIOLE2TextExtractor extractor = createExtractor(fs);
             extractor.setFilesystem(fs);
             return extractor;
+
         } catch (OfficeXmlFileException e) {
             // ensure file-handle release
-                       IOUtils.closeQuietly(fs);
-
+            IOUtils.closeQuietly(fs);
             return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
+
         } catch (NotOLE2FileException ne) {
             // ensure file-handle release
-                       IOUtils.closeQuietly(fs);
-
+            IOUtils.closeQuietly(fs);
             throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
-               } catch (OpenXML4JException e) {
-                       // ensure file-handle release
-                       IOUtils.closeQuietly(fs);
 
-                       throw e;
-               } catch (XmlException e) {
-                       // ensure file-handle release
-                       IOUtils.closeQuietly(fs);
+        } catch (OpenXML4JException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
 
-                       throw e;
-               } catch (IOException e) {
-                       // ensure file-handle release
-                       IOUtils.closeQuietly(fs);
+        } catch (XmlException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
+
+        } catch (IOException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
 
-                       throw e;
         } catch (RuntimeException e) {
-                       // ensure file-handle release
-                       IOUtils.closeQuietly(fs);
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
+        }
+     }
 
-                       throw e;
-               }
+    public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+        // Figure out the kind of stream
+        // If clearly doesn't do mark/reset, wrap up
+        if (! inp.markSupported()) {
+            inp = new PushbackInputStream(inp, 8);
+        }
+
+        if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
+            return createExtractor(new NPOIFSFileSystem(inp));
+        }
+        if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
+            return createExtractor(OPCPackage.open(inp));
+        }
+        throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
     }
 
-       public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-               // Figure out the kind of stream
-               // If clearly doesn't do mark/reset, wrap up
-               if(! inp.markSupported()) {
-                       inp = new PushbackInputStream(inp, 8);
-               }
-
-               if(NPOIFSFileSystem.hasPOIFSHeader(inp)) {
-                       return createExtractor(new NPOIFSFileSystem(inp));
-               }
-               if(DocumentFactoryHelper.hasOOXMLHeader(inp)) {
-                       return createExtractor(OPCPackage.open(inp));
-               }
-               throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
-       }
-
-       /**
-        * Tries to determine the actual type of file and produces a matching text-extractor for it.
-        *
-        * @param pkg An {@link OPCPackage}.
-        * @return A {@link POIXMLTextExtractor} for the given file.
-        * @throws IOException If an error occurs while reading the file 
-        * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
-        * @throws XmlException If an XML parsing error occurs.
-        * @throws IllegalArgumentException If no matching file type could be found.
-        */
-       public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+    /**
+     * Tries to determine the actual type of file and produces a matching text-extractor for it.
+     *
+     * @param pkg An {@link OPCPackage}.
+     * @return A {@link POIXMLTextExtractor} for the given file.
+     * @throws IOException If an error occurs while reading the file 
+     * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
+     * @throws XmlException If an XML parsing error occurs.
+     * @throws IllegalArgumentException If no matching file type could be found.
+     */
+    public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
         try {
-          // Check for the normal Office core document
-           PackageRelationshipCollection core =
-                pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
-           
-           // If nothing was found, try some of the other OOXML-based core types
-           if (core.size() == 0) {
-               // Could it be an OOXML-Strict one?
-               core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
-           }
-           if (core.size() == 0) {
-               // Could it be a visio one?
-               core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
-               if (core.size() == 1)
-                   return new XDGFVisioExtractor(pkg);
-           }
-           
-           // Should just be a single core document, complain if not
-           if (core.size() != 1) {
-               throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
-           }
-    
-           // Grab the core document part, and try to identify from that
-           PackagePart corePart = pkg.getPart(core.getRelationship(0));
-    
-           // Is it XSSF?
-           for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
-              if(corePart.getContentType().equals(rel.getContentType())) {
-                 if(getPreferEventExtractor()) {
-                    return new XSSFEventBasedExcelExtractor(pkg);
-                 }
-    
-                 return new XSSFExcelExtractor(pkg);
-              }
-           }
-    
-           // Is it XWPF?
-           for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
-              if(corePart.getContentType().equals(rel.getContentType())) {
-                 return new XWPFWordExtractor(pkg);
-              }
-           }
-    
-           // Is it XSLF?
-           for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
-              if(corePart.getContentType().equals(rel.getContentType())) {
-                 return new XSLFPowerPointExtractor(pkg);
-              }
-           }
-    
-           // special handling for SlideShow-Theme-files, 
-           if(XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
-               return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
-           }
-           
-           throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
-           } catch (IOException e) {
-               // ensure that we close the package again if there is an error opening it, however
-               // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
-               pkg.revert();
-               throw e;
+            // Check for the normal Office core document
+            PackageRelationshipCollection core;
+            core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
+              
+            // If nothing was found, try some of the other OOXML-based core types
+            if (core.size() == 0) {
+                // Could it be an OOXML-Strict one?
+                core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
+            }
+            if (core.size() == 0) {
+                // Could it be a visio one?
+                core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
+                if (core.size() == 1)
+                    return new XDGFVisioExtractor(pkg);
+            }
+              
+            // Should just be a single core document, complain if not
+            if (core.size() != 1) {
+                throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
+            }
+     
+            // Grab the core document part, and try to identify from that
+            PackagePart corePart = pkg.getPart(core.getRelationship(0));
+
+            // Is it XSSF?
+            for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
+                if (corePart.getContentType().equals(rel.getContentType())) {
+                    if (getPreferEventExtractor()) {
+                        return new XSSFEventBasedExcelExtractor(pkg);
+                    }
+                    return new XSSFExcelExtractor(pkg);
+                }
+            }
+     
+            // Is it XWPF?
+            for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
+                if (corePart.getContentType().equals(rel.getContentType())) {
+                    return new XWPFWordExtractor(pkg);
+                }
+            }
+     
+            // Is it XSLF?
+            for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
+                if (corePart.getContentType().equals(rel.getContentType())) {
+                    return new XSLFPowerPointExtractor(pkg);
+                }
+            }
+     
+            // special handling for SlideShow-Theme-files, 
+            if (XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
+                return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+            }
+
+            throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
+
+        } catch (IOException e) {
+            // ensure that we close the package again if there is an error opening it, however
+            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
+            pkg.revert();
+            throw e;
         } catch (OpenXML4JException e) {
             // ensure that we close the package again if there is an error opening it, however
             // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
@@ -256,27 +256,25 @@ public class ExtractorFactory {
             // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
             pkg.revert();
             throw e;
-           } catch (RuntimeException e) {
-           // ensure that we close the package again if there is an error opening it, however
-           // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
-           pkg.revert();
-           
-           throw e;
-           }
-       }
-
-       public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-           return OLE2ExtractorFactory.createExtractor(fs);
-       }
+        } catch (RuntimeException e) {
+            // ensure that we close the package again if there is an error opening it, however
+            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
+            pkg.revert();
+            throw e;
+        }
+    }
+
+    public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return OLE2ExtractorFactory.createExtractor(fs);
+    }
     public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
         return OLE2ExtractorFactory.createExtractor(fs);
-     }
+    }
     public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
         return OLE2ExtractorFactory.createExtractor(fs);
-     }
+    }
 
-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
-            OpenXML4JException, XmlException
+    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
     {
         // First, check for OOXML
         for (String entryName : poifsDir.getEntryNames()) {
@@ -285,104 +283,102 @@ public class ExtractorFactory {
                 return createExtractor(pkg);
             }
         }
-        
+
         // If not, ask the OLE2 code to check, with Scratchpad if possible
         return OLE2ExtractorFactory.createExtractor(poifsDir);
     }
 
-       /**
-        * Returns an array of text extractors, one for each of
-        *  the embedded documents in the file (if there are any).
-        * If there are no embedded documents, you'll get back an
-        *  empty array. Otherwise, you'll get one open
-        *  {@link POITextExtractor} for each embedded file.
-        */
-       public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
-          // All the embedded directories we spotted
-               ArrayList<Entry> dirs = new ArrayList<Entry>();
-               // For anything else not directly held in as a POIFS directory
-               ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
-
-      // Find all the embedded directories
-               DirectoryEntry root = ext.getRoot();
-               if(root == null) {
-                       throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
-               }
-
-               if(ext instanceof ExcelExtractor) {
-                       // These are in MBD... under the root
-                       Iterator<Entry> it = root.getEntries();
-                       while(it.hasNext()) {
-                               Entry entry = it.next();
-                               if(entry.getName().startsWith("MBD")) {
-                                       dirs.add(entry);
-                               }
-                       }
-               } else if(ext instanceof WordExtractor) {
-                       // These are in ObjectPool -> _... under the root
-                       try {
-                               DirectoryEntry op = (DirectoryEntry)
-                                       root.getEntry("ObjectPool");
-                               Iterator<Entry> it = op.getEntries();
-                               while(it.hasNext()) {
-                                       Entry entry = it.next();
-                                       if(entry.getName().startsWith("_")) {
-                                               dirs.add(entry);
-                                       }
-                               }
-                       } catch(FileNotFoundException e) {
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+        // All the embedded directories we spotted
+        ArrayList<Entry> dirs = new ArrayList<Entry>();
+        // For anything else not directly held in as a POIFS directory
+        ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
+
+        // Find all the embedded directories
+        DirectoryEntry root = ext.getRoot();
+        if (root == null) {
+            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+        }
+
+        if (ext instanceof ExcelExtractor) {
+            // These are in MBD... under the root
+            Iterator<Entry> it = root.getEntries();
+            while (it.hasNext()) {
+                Entry entry = it.next();
+                if (entry.getName().startsWith("MBD")) {
+                    dirs.add(entry);
+                }
+            }
+        } else if (ext instanceof WordExtractor) {
+            // These are in ObjectPool -> _... under the root
+            try {
+                DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+                Iterator<Entry> it = op.getEntries();
+                while (it.hasNext()) {
+                    Entry entry = it.next();
+                    if (entry.getName().startsWith("_")) {
+                        dirs.add(entry);
+                    }
+                }
+            } catch (FileNotFoundException e) {
                 // ignored here
             }
-               //} else if(ext instanceof PowerPointExtractor) {
-                       // Tricky, not stored directly in poifs
-                       // TODO
-               } else if(ext instanceof OutlookTextExtactor) {
-                  // Stored in the Attachment blocks
-                  MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
-                  for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
-                     if(attachment.attachData != null) {
-                        byte[] data = attachment.attachData.getValue();
-                        nonPOIFS.add( new ByteArrayInputStream(data) );
-                     } else if(attachment.attachmentDirectory != null) {
-                         dirs.add(attachment.attachmentDirectory.getDirectory());
-                     }
-                  }
-               }
-
-               // Create the extractors
-               if(dirs.size() == 0 && nonPOIFS.size() == 0){
-                       return new POITextExtractor[0];
-               }
-
-               ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
+        //} else if(ext instanceof PowerPointExtractor) {
+            // Tricky, not stored directly in poifs
+            // TODO
+        } else if (ext instanceof OutlookTextExtactor) {
+            // Stored in the Attachment blocks
+            MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
+                if (attachment.attachData != null) {
+                    byte[] data = attachment.attachData.getValue();
+                    nonPOIFS.add( new ByteArrayInputStream(data) );
+                } else if (attachment.attachmentDirectory != null) {
+                    dirs.add(attachment.attachmentDirectory.getDirectory());
+                }
+            }
+        }
+
+        // Create the extractors
+        if (dirs.size() == 0 && nonPOIFS.size() == 0){
+            return new POITextExtractor[0];
+        }
+
+        ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
         for (Entry dir : dirs) {
-            e.add(createExtractor(
-                    (DirectoryNode) dir
-            ));
+            e.add(createExtractor((DirectoryNode) dir));
         }
         for (InputStream nonPOIF : nonPOIFS) {
             try {
-                e.add(createExtractor(nonPOIF));
+                 e.add(createExtractor(nonPOIF));
             } catch (IllegalArgumentException ie) {
                 // Ignore, just means it didn't contain
                 //  a format we support as yet
             } catch (XmlException xe) {
-                throw new IOException(xe.getMessage());
+                 throw new IOException(xe.getMessage());
             } catch (OpenXML4JException oe) {
-                throw new IOException(oe.getMessage());
+                 throw new IOException(oe.getMessage());
             }
         }
-               return e.toArray(new POITextExtractor[e.size()]);
-       }
-
-       /**
-        * Returns an array of text extractors, one for each of
-        *  the embedded documents in the file (if there are any).
-        * If there are no embedded documents, you'll get back an
-        *  empty array. Otherwise, you'll get one open
-        *  {@link POITextExtractor} for each embedded file.
-        */
-       public static POITextExtractor[] getEmbededDocsTextExtractors(@SuppressWarnings("UnusedParameters") POIXMLTextExtractor ext) {
-               throw new IllegalStateException("Not yet supported");
-       }
+        return e.toArray(new POITextExtractor[e.size()]);
+    }
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    @SuppressWarnings("UnusedParameters")
+    public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
+        throw new IllegalStateException("Not yet supported");
+    }
 }