123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hslf.extractor;
-
- import java.io.File;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.List;
-
- import org.apache.poi.EncryptedDocumentException;
- import org.apache.poi.extractor.POIOLE2TextExtractor;
- import org.apache.poi.hslf.usermodel.HSLFObjectShape;
- import org.apache.poi.hslf.usermodel.HSLFShape;
- import org.apache.poi.hslf.usermodel.HSLFSlideShow;
- import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
- import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
- import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
- import org.apache.poi.poifs.filesystem.DirectoryNode;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- import org.apache.poi.sl.extractor.SlideShowExtractor;
- import org.apache.poi.sl.usermodel.SlideShow;
- import org.apache.poi.sl.usermodel.SlideShowFactory;
- import org.apache.poi.util.Removal;
-
- /**
- * This class can be used to extract text from a PowerPoint file. Can optionally
- * also get the notes from one.
- *
- * @deprecated in POI 4.0.0, use {@link SlideShowExtractor} instead
- */
- @SuppressWarnings("WeakerAccess")
- @Deprecated
- @Removal(version="5.0.0")
- public final class PowerPointExtractor extends POIOLE2TextExtractor {
- private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate;
-
- private boolean slidesByDefault = true;
- private boolean notesByDefault;
- private boolean commentsByDefault;
- private boolean masterByDefault;
-
- /**
- * Basic extractor. Returns all the text, and optionally all the notes
- */
- public static void main(String[] args) throws IOException {
- if (args.length < 1) {
- System.err.println("Usage:");
- System.err.println("\tPowerPointExtractor [-notes] <file>");
- System.exit(1);
- }
-
- boolean notes = false;
- boolean comments = false;
- boolean master = true;
-
- String file;
- if (args.length > 1) {
- notes = true;
- file = args[1];
- if (args.length > 2) {
- comments = true;
- }
- } else {
- file = args[0];
- }
-
- try (PowerPointExtractor ppe = new PowerPointExtractor(file)) {
- System.out.println(ppe.getText(true, notes, comments, master));
- }
- }
-
- public PowerPointExtractor(final HSLFSlideShow slideShow) {
- super(slideShow.getSlideShowImpl());
- setFilesystem(slideShow);
- delegate = new SlideShowExtractor<>(slideShow);
- }
-
- /**
- * Creates a PowerPointExtractor, from a file
- *
- * @param fileName The name of the file to extract from
- */
- public PowerPointExtractor(String fileName) throws IOException {
- this(createHSLF(new File(fileName), Biff8EncryptionKey.getCurrentUserPassword(), true));
- }
-
- /**
- * Creates a PowerPointExtractor, from an Input Stream
- *
- * @param iStream The input stream containing the PowerPoint document
- */
- public PowerPointExtractor(InputStream iStream) throws IOException {
- this(createHSLF(iStream, Biff8EncryptionKey.getCurrentUserPassword()));
- }
-
- /**
- * Creates a PowerPointExtractor, from an open POIFSFileSystem
- *
- * @param fs the POIFSFileSystem containing the PowerPoint document
- */
- public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
- this(createHSLF(fs, Biff8EncryptionKey.getCurrentUserPassword()));
- }
-
- /**
- * Creates a PowerPointExtractor, from a specific place
- * inside an open {@link POIFSFileSystem}
- *
- * @param dir the POIFS Directory containing the PowerPoint document
- */
- public PowerPointExtractor(DirectoryNode dir) throws IOException {
- this(new HSLFSlideShow(dir));
- }
-
- /**
- * Creates a PowerPointExtractor, from a HSLFSlideShow
- *
- * @param ss the HSLFSlideShow to extract text from
- */
- public PowerPointExtractor(HSLFSlideShowImpl ss) {
- this(new HSLFSlideShow(ss));
- }
-
- /**
- * Should a call to getText() return slide text? Default is yes
- */
- public void setSlidesByDefault(final boolean slidesByDefault) {
- this.slidesByDefault = slidesByDefault;
- delegate.setSlidesByDefault(slidesByDefault);
- }
-
- /**
- * Should a call to getText() return notes text? Default is no
- */
- public void setNotesByDefault(final boolean notesByDefault) {
- this.notesByDefault = notesByDefault;
- delegate.setNotesByDefault(notesByDefault);
- }
-
- /**
- * Should a call to getText() return comments text? Default is no
- */
- public void setCommentsByDefault(final boolean commentsByDefault) {
- this.commentsByDefault = commentsByDefault;
- delegate.setCommentsByDefault(commentsByDefault);
- }
-
- /**
- * Should a call to getText() return text from master? Default is no
- */
- public void setMasterByDefault(final boolean masterByDefault) {
- this.masterByDefault = masterByDefault;
- delegate.setMasterByDefault(masterByDefault);
- }
-
- /**
- * Fetches all the slide text from the slideshow, but not the notes, unless
- * you've called setSlidesByDefault() and setNotesByDefault() to change this
- */
- @Override
- public String getText() {
- return delegate.getText();
- }
-
- /**
- * Fetches text from the slideshow, be it slide text or note text. Because
- * the final block of text in a TextRun normally have their last \n
- * stripped, we add it back
- *
- * @param getSlideText fetch slide text
- * @param getNoteText fetch note text
- */
- public String getText(boolean getSlideText, boolean getNoteText) {
- return getText(getSlideText,getNoteText,commentsByDefault,masterByDefault);
- }
-
- public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
- delegate.setSlidesByDefault(getSlideText);
- delegate.setNotesByDefault(getNoteText);
- delegate.setCommentsByDefault(getCommentText);
- delegate.setMasterByDefault(getMasterText);
- try {
- return delegate.getText();
- } finally {
- delegate.setSlidesByDefault(slidesByDefault);
- delegate.setNotesByDefault(notesByDefault);
- delegate.setCommentsByDefault(commentsByDefault);
- delegate.setMasterByDefault(masterByDefault);
- }
- }
-
- /**
- * Fetches all the notes text from the slideshow, but not the slide text
- */
- public String getNotes() {
- return getText(false, true, false, false);
- }
-
- @SuppressWarnings("unchecked")
- public List<HSLFObjectShape> getOLEShapes() {
- return (List<HSLFObjectShape>)delegate.getOLEShapes();
- }
-
- /**
- * Helper method to avoid problems with compiling code in Eclipse
- *
- * Eclipse javac has some bugs with complex casts, this method tries
- * to work around this.
- *
- * @param fs The {@link POIFSFileSystem} to read the document from
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created SlideShow
- *
- * @throws IOException if an error occurs while reading the data
- */
- private static HSLFSlideShow createHSLF(POIFSFileSystem fs, String password) throws IOException, EncryptedDocumentException {
- // Note: don't change the code here, it is required for Eclipse to compile the code
- SlideShow slideShowOrig = SlideShowFactory.create(fs, password);
- return (HSLFSlideShow)slideShowOrig;
- }
-
- /**
- * Helper method to avoid problems with compiling code in Eclipse
- *
- * Eclipse javac has some bugs with complex casts, this method tries
- * to work around this.
- *
- * @param inp The {@link InputStream} to read data from.
- * @param password The password that should be used or null if no password is necessary.
- *
- * @return The created SlideShow
- *
- * @throws IOException if an error occurs while reading the data
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- */
- private static HSLFSlideShow createHSLF(InputStream inp, String password) throws IOException, EncryptedDocumentException {
- // Note: don't change the code here, it is required for Eclipse to compile the code
- SlideShow slideShowOrig = SlideShowFactory.create(inp, password);
- return (HSLFSlideShow)slideShowOrig;
- }
-
- /**
- * Helper method to avoid problems with compiling code in Eclipse
- *
- * Eclipse javac has some bugs with complex casts, this method tries
- * to work around this.
- *
- * @param file The file to read data from.
- * @param password The password that should be used or null if no password is necessary.
- * @param readOnly If the SlideShow should be opened in read-only mode to avoid writing back
- * changes when the document is closed.
- *
- * @return The created SlideShow
- *
- * @throws IOException if an error occurs while reading the data
- * @throws EncryptedDocumentException If the wrong password is given for a protected file
- */
- private static HSLFSlideShow createHSLF(File file, String password, boolean readOnly) throws IOException, EncryptedDocumentException {
- // Note: don't change the code here, it is required for Eclipse to compile the code
- SlideShow slideShowOrig = SlideShowFactory.create(file, password, readOnly);
- return (HSLFSlideShow)slideShowOrig;
- }
- }
|