public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes)
throws IOException
{
- super(directory, fs);
+ this(directory, preserveNodes);
+ }
+ /**
+ * given a POI POIFSFileSystem object, and a specific directory
+ * within it, read in its Workbook and populate the high and
+ * low level models. If you're reading in a workbook...start here.
+ *
+ * @param directory the POI filesystem directory to process from
+ * @param preserveNodes whether to preseve other nodes, such as
+ * macros. This takes more memory, so only say yes if you
+ * need to. If set, will store all of the POIFSFileSystem
+ * in memory
+ * @see org.apache.poi.poifs.filesystem.POIFSFileSystem
+ * @exception IOException if the stream cannot be read
+ */
+ public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes)
+ throws IOException
+ {
+ super(directory);
String workbookName = getWorkbookDirEntryName(directory);
this.preserveNodes = preserveNodes;
this._property = property;
this._filesystem = filesystem;
- if(property.getSize() <= POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) {
+ if(property.getSize() < POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) {
_stream = new NPOIFSStream(_filesystem.getMiniStore(), property.getStartBlock());
_block_size = _filesystem.getMiniStore().getBlockStoreBlockSize();
} else {
/**
* Creates a POIFSFileSystem from a <tt>File</tt>. This uses less memory than
- * creating from an <tt>InputStream</tt>.
+ * creating from an <tt>InputStream</tt>. The File will be opened read-only
*
* Note that with this constructor, you will need to call {@link #close()}
* when you're done to have the underlying file closed, as the file is
*/
public NPOIFSFileSystem(File file)
throws IOException
+ {
+ this(file, true);
+ }
+
+ /**
+ * Creates a POIFSFileSystem from a <tt>File</tt>. This uses less memory than
+ * creating from an <tt>InputStream</tt>.
+ *
+ * Note that with this constructor, you will need to call {@link #close()}
+ * when you're done to have the underlying file closed, as the file is
+ * kept open during normal operation to read the data out.
+ *
+ * @param file the File from which to read the data
+ *
+ * @exception IOException on errors reading, or on invalid data
+ */
+ public NPOIFSFileSystem(File file, boolean readOnly)
+ throws IOException
+ {
+ this(
+ (new RandomAccessFile(file, readOnly? "r" : "rw")).getChannel(),
+ true
+ );
+ }
+
+ /**
+ * Creates a POIFSFileSystem from an open <tt>FileChannel</tt>. This uses
+ * less memory than creating from an <tt>InputStream</tt>.
+ *
+ * Note that with this constructor, you will need to call {@link #close()}
+ * when you're done to have the underlying Channel closed, as the channel is
+ * kept open during normal operation to read the data out.
+ *
+ * @param channel the FileChannel from which to read the data
+ *
+ * @exception IOException on errors reading, or on invalid data
+ */
+ public NPOIFSFileSystem(FileChannel channel)
+ throws IOException
+ {
+ this(channel, false);
+ }
+
+ private NPOIFSFileSystem(FileChannel channel, boolean closeChannelOnError)
+ throws IOException
{
this();
-
- // Open the underlying channel
- FileChannel channel = (new RandomAccessFile(file, "r")).getChannel();
-
- // Get the header
- ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
- IOUtils.readFully(channel, headerBuffer);
-
- // Have the header processed
- _header = new HeaderBlock(headerBuffer);
-
- // Now process the various entries
- _data = new FileBackedDataSource(channel);
- readCoreContents();
+
+ try {
+ // Get the header
+ ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
+ IOUtils.readFully(channel, headerBuffer);
+
+ // Have the header processed
+ _header = new HeaderBlock(headerBuffer);
+
+ // Now process the various entries
+ _data = new FileBackedDataSource(channel);
+ readCoreContents();
+ } catch(IOException e) {
+ if(closeChannelOnError) {
+ channel.close();
+ }
+ throw e;
+ }
}
/**
// Oh joy, we need a new XBAT too...
xbat = createBAT(offset+1, false);
xbat.setValueAt(0, offset);
- bat.setValueAt(offset+1, POIFSConstants.DIFAT_SECTOR_BLOCK);
+ bat.setValueAt(1, POIFSConstants.DIFAT_SECTOR_BLOCK);
// Will go one place higher as XBAT added in
offset++;
public static boolean checkIfEncrypted(HSLFSlideShow hss) {
// Easy way to check - contains a stream
// "EncryptedSummary"
- POIFSFileSystem fs = hss.getPOIFSFileSystem();
try {
- fs.getRoot().getEntry("EncryptedSummary");
+ hss.getPOIFSDirectory().getEntry("EncryptedSummary");
return true;
} catch(FileNotFoundException fnfe) {
// Doesn't have encrypted properties
protected POIFSFileSystem getPOIFSFileSystem() {
return directory.getFileSystem();
}
+ protected DirectoryNode getPOIFSDirectory() {
+ return directory;
+ }
/**
* Constructs a Powerpoint document from fileName. Parses the document
* in a POIFSFileSystem, probably not the default.
* Used typically to open embeded documents.
*
+ * @param directory The Directory that contains the Word document.
* @param pfilesystem The POIFSFileSystem that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
+ {
+ this(directory);
+ }
+ /**
+ * This constructor loads a Word document from a specific point
+ * in a POIFSFileSystem, probably not the default.
+ * Used typically to open embeded documents.
+ *
+ * @param directory The Directory that contains the Word document.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in POIFSFileSystem.
+ */
+ public HWPFDocument(DirectoryNode directory) throws IOException
{
// Sort out the hpsf properties
- super(directory, pfilesystem);
+ super(directory);
readProperties();
// read in the main stream.
import org.apache.poi.hwpf.usermodel.HeaderStories;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
this.fs = fs;
}
+ /**
+ * Create a new Word Extractor
+ * @param dir DirectoryNode containing the word file
+ */
+ public WordExtractor(DirectoryNode dir) throws IOException {
+ this(new HWPFDocument(dir));
+ this.fs = fs;
+ }
+
/**
* Create a new Word Extractor
* @param doc The HWPFDocument to extract from
*/
package org.apache.poi.hwpf.extractor;
+import java.io.File;
import java.io.FileInputStream;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
text.indexOf("The footer, with") > -1
);
}
+
+ /**
+ * Tests that we can work with both {@link POIFSFileSystem}
+ * and {@link NPOIFSFileSystem}
+ */
+ public void testDifferentPOIFS() throws Exception {
+ String dirname = System.getProperty("HWPF.testdata.path");
+ File f = new File(dirname, "test2.doc");
+
+ // Open the two filesystems
+ DirectoryNode[] files = new DirectoryNode[2];
+ files[0] = (new POIFSFileSystem(new FileInputStream(f))).getRoot();
+ files[1] = (new NPOIFSFileSystem(f)).getRoot();
+
+ // Open directly
+ for(DirectoryNode dir : files) {
+ WordExtractor extractor = new WordExtractor(dir);
+ assertEquals(p_text1_block, extractor.getText());
+ }
+
+ // Open via a HWPFDocument
+ for(DirectoryNode dir : files) {
+ HWPFDocument doc = new HWPFDocument(dir);
+ WordExtractor extractor = new WordExtractor(doc);
+ assertEquals(p_text1_block, extractor.getText());
+ }
+ }
}
* @return an open <tt>InputStream</tt> for the specified sample file\r
*/\r
public static InputStream openSampleFileStream(String sampleFileName) {\r
- \r
+ File f = getSampeFile(sampleFileName);\r
+ try {\r
+ return new FileInputStream(f);\r
+ } catch (FileNotFoundException e) {\r
+ throw new RuntimeException(e);\r
+ }\r
+ }\r
+ public static File getSampeFile(String sampleFileName) {\r
if(!_isInitialised) {\r
try {\r
initialise();\r
_isInitialised = true;\r
}\r
}\r
- if (_sampleDataIsAvaliableOnClassPath) {\r
- InputStream result = openClasspathResource(sampleFileName);\r
- if(result == null) {\r
- throw new RuntimeException("specified test sample file '" + sampleFileName \r
- + "' not found on the classpath");\r
- }\r
-// System.out.println("opening cp: " + sampleFileName);\r
- // wrap to avoid temp warning method about auto-closing input stream\r
- return new NonSeekableInputStream(result);\r
- }\r
if (_resolvedDataDir == null) {\r
throw new RuntimeException("Must set system property '"\r
+ TEST_DATA_DIR_SYS_PROPERTY_NAME\r
+ "' not found in data dir '" + _resolvedDataDir.getAbsolutePath() + "'");\r
}\r
// System.out.println("opening " + f.getAbsolutePath());\r
- try {\r
- return new FileInputStream(f);\r
- } catch (FileNotFoundException e) {\r
- throw new RuntimeException(e);\r
- }\r
+ return f;\r
}\r
\r
private static void initialise() {\r
import org.apache.poi.hssf.record.RecordFormatException;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.formula.Area3DPtg;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.TempFile;
/**
nr = wb.getWorkbook().getNameRecord(2);
assertEquals("Sheet2!E:F,Sheet2!$A$9:$IV$12", nr.getAreaReference(wb)); // E:F,9:12
}
+
+ /**
+ * Tests that we can work with both {@link POIFSFileSystem}
+ * and {@link NPOIFSFileSystem}
+ */
+ public void testDifferentPOIFS() throws Exception {
+ // Open the two filesystems
+ DirectoryNode[] files = new DirectoryNode[2];
+ files[0] = (new POIFSFileSystem(HSSFTestDataSamples.openSampleFileStream("Simple.xls"))).getRoot();
+ files[1] = (new NPOIFSFileSystem(HSSFTestDataSamples.getSampeFile("Simple.xls"))).getRoot();
+
+ // Open without preserving nodes
+ for(DirectoryNode dir : files) {
+ HSSFWorkbook workbook = new HSSFWorkbook(dir, false);
+ HSSFSheet sheet = workbook.getSheetAt(0);
+ HSSFCell cell = sheet.getRow(0).getCell(0);
+ assertEquals("replaceMe", cell .getRichStringCellValue().getString());
+ }
+
+ // Now re-check with preserving
+ for(DirectoryNode dir : files) {
+ HSSFWorkbook workbook = new HSSFWorkbook(dir, true);
+ HSSFSheet sheet = workbook.getSheetAt(0);
+ HSSFCell cell = sheet.getRow(0).getCell(0);
+ assertEquals("replaceMe", cell .getRichStringCellValue().getString());
+ }
+ }
}