From: Nick Burch Date: Sun, 19 Dec 2010 08:06:48 +0000 (+0000) Subject: Initial work on a NIO POIFSFileSystem. Currently is able to open the file and read... X-Git-Tag: REL_3_8_BETA1~88 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=302df8aa4c8f5435efc2bbde929de2431308904d;p=poi.git Initial work on a NIO POIFSFileSystem. Currently is able to open the file and read the header, but no more git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1050773 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java new file mode 100644 index 0000000000..eea94f8679 --- /dev/null +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java @@ -0,0 +1,655 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.poifs.filesystem; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PushbackInputStream; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.channels.ReadableByteChannel; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.apache.poi.poifs.common.POIFSBigBlockSize; +import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.dev.POIFSViewable; +import org.apache.poi.poifs.nio.ByteArrayBackedDataSource; +import org.apache.poi.poifs.nio.DataSource; +import org.apache.poi.poifs.nio.FileBackedDataSource; +import org.apache.poi.poifs.property.DirectoryProperty; +import org.apache.poi.poifs.property.Property; +import org.apache.poi.poifs.property.PropertyTable; +import org.apache.poi.poifs.storage.BATBlock; +import org.apache.poi.poifs.storage.BlockAllocationTableReader; +import org.apache.poi.poifs.storage.BlockAllocationTableWriter; +import org.apache.poi.poifs.storage.BlockList; +import org.apache.poi.poifs.storage.BlockWritable; +import org.apache.poi.poifs.storage.HeaderBlock; +import org.apache.poi.poifs.storage.HeaderBlockConstants; +import org.apache.poi.poifs.storage.HeaderBlockWriter; +import org.apache.poi.poifs.storage.RawDataBlockList; +import org.apache.poi.poifs.storage.SmallBlockTableReader; +import org.apache.poi.poifs.storage.SmallBlockTableWriter; +import org.apache.poi.util.CloseIgnoringInputStream; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LongField; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * This is the main class of the POIFS system; it manages the entire + * life cycle of the filesystem. + * This is the new NIO version + */ + +public class NPOIFSFileSystem + implements POIFSViewable +{ + private static final POILogger _logger = + POILogFactory.getLogger(NPOIFSFileSystem.class); + + /** + * Convenience method for clients that want to avoid the auto-close behaviour of the constructor. + */ + public static InputStream createNonClosingInputStream(InputStream is) { + return new CloseIgnoringInputStream(is); + } + + private PropertyTable _property_table; + private List _blocks; + private HeaderBlock _header; + private DirectoryNode _root; + + private DataSource _data; + + private List _documents; // TODO - probably remove this shortly + + /** + * What big block size the file uses. Most files + * use 512 bytes, but a few use 4096 + */ + private POIFSBigBlockSize bigBlockSize = + POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS; + + /** + * Constructor, intended for writing + */ + public NPOIFSFileSystem() + { + _property_table = new PropertyTable(bigBlockSize); + _blocks = new ArrayList(); + _root = null; + } + + /** + * Creates a POIFSFileSystem from a File. This uses less memory than + * creating from an InputStream + * + * @param file the File from which to read the data + * + * @exception IOException on errors reading, or on invalid data + */ + public NPOIFSFileSystem(File file) + throws IOException + { + this(); + + // Open the underlying channel + FileChannel channel = (new RandomAccessFile(file, "r")).getChannel(); + + // Get the header + ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE); + IOUtils.readFully(channel, headerBuffer); + + // Have the header processed + _header = new HeaderBlock(headerBuffer); + + // Now process the various entries + _data = new FileBackedDataSource(channel); + readCoreContents(); + } + + /** + * Create a POIFSFileSystem from an InputStream. Normally the stream is read until + * EOF. The stream is always closed.

+ * + * Some streams are usable after reaching EOF (typically those that return true + * for markSupported()). In the unlikely case that the caller has such a stream + * and needs to use it after this constructor completes, a work around is to wrap the + * stream in order to trap the close() call. A convenience method ( + * createNonClosingInputStream()) has been provided for this purpose: + *

+     * InputStream wrappedStream = POIFSFileSystem.createNonClosingInputStream(is);
+     * HSSFWorkbook wb = new HSSFWorkbook(wrappedStream);
+     * is.reset();
+     * doSomethingElse(is);
+     * 
+ * Note also the special case of ByteArrayInputStream for which the close() + * method does nothing. + *
+     * ByteArrayInputStream bais = ...
+     * HSSFWorkbook wb = new HSSFWorkbook(bais); // calls bais.close() !
+     * bais.reset(); // no problem
+     * doSomethingElse(bais);
+     * 
+ * + * @param stream the InputStream from which to read the data + * + * @exception IOException on errors reading, or on invalid data + */ + + public NPOIFSFileSystem(InputStream stream) + throws IOException + { + this(); + + ReadableByteChannel channel = null; + boolean success = false; + + try { + // Turn our InputStream into something NIO based + channel = Channels.newChannel(stream); + + // Get the header + ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE); + IOUtils.readFully(channel, headerBuffer); + + // Have the header processed + _header = new HeaderBlock(headerBuffer); + + // We need to buffer the whole file into memory when + // working with an InputStream. Do so now + int maxSize = _header.getBATCount() * + _header.getBigBlockSize().getBATEntriesPerBlock() * + _header.getBigBlockSize().getBigBlockSize(); + ByteBuffer data = ByteBuffer.allocate(maxSize); + data.put(headerBuffer); + IOUtils.readFully(channel, data); + success = true; + + // Turn it into a DataSource + _data = new ByteArrayBackedDataSource(data.array(), data.position()); + } finally { + // As per the constructor contract, always close the stream + if(channel != null) + channel.close(); + closeInputStream(stream, success); + } + + // Now process the various entries + readCoreContents(); + } + /** + * @param stream the stream to be closed + * @param success false if an exception is currently being thrown in the calling method + */ + private void closeInputStream(InputStream stream, boolean success) { + try { + stream.close(); + } catch (IOException e) { + if(success) { + throw new RuntimeException(e); + } + // else not success? Try block did not complete normally + // just print stack trace and leave original ex to be thrown + e.printStackTrace(); + } + } + + /** + * Checks that the supplied InputStream (which MUST + * support mark and reset, or be a PushbackInputStream) + * has a POIFS (OLE2) header at the start of it. + * If your InputStream does not support mark / reset, + * then wrap it in a PushBackInputStream, then be + * sure to always use that, and not the original! + * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream + */ + public static boolean hasPOIFSHeader(InputStream inp) throws IOException { + // We want to peek at the first 8 bytes + inp.mark(8); + + byte[] header = new byte[8]; + IOUtils.readFully(inp, header); + LongField signature = new LongField(HeaderBlockConstants._signature_offset, header); + + // Wind back those 8 bytes + if(inp instanceof PushbackInputStream) { + PushbackInputStream pin = (PushbackInputStream)inp; + pin.unread(header); + } else { + inp.reset(); + } + + // Did it match the signature? + return (signature.get() == HeaderBlockConstants._signature); + } + + /** + * Read and process the PropertiesTable and the + * FAT / XFAT blocks, so that we're ready to + * work with the file + */ + private void readCoreContents() throws IOException { + // Grab the block size + bigBlockSize = _header.getBigBlockSize(); + + // Read the properties + // TODO + + // Read the FAT blocks + // TODO + + // Now read the XFAT blocks + // TODO + } + + /** + * Create a new document to be added to the root directory + * + * @param stream the InputStream from which the document's data + * will be obtained + * @param name the name of the new POIFSDocument + * + * @return the new DocumentEntry + * + * @exception IOException on error creating the new POIFSDocument + */ + + public DocumentEntry createDocument(final InputStream stream, + final String name) + throws IOException + { + return getRoot().createDocument(name, stream); + } + + /** + * create a new DocumentEntry in the root entry; the data will be + * provided later + * + * @param name the name of the new DocumentEntry + * @param size the size of the new DocumentEntry + * @param writer the writer of the new DocumentEntry + * + * @return the new DocumentEntry + * + * @exception IOException + */ + + public DocumentEntry createDocument(final String name, final int size, + final POIFSWriterListener writer) + throws IOException + { + return getRoot().createDocument(name, size, writer); + } + + /** + * create a new DirectoryEntry in the root directory + * + * @param name the name of the new DirectoryEntry + * + * @return the new DirectoryEntry + * + * @exception IOException on name duplication + */ + + public DirectoryEntry createDirectory(final String name) + throws IOException + { + return getRoot().createDirectory(name); + } + + /** + * Write the filesystem out + * + * @param stream the OutputStream to which the filesystem will be + * written + * + * @exception IOException thrown on errors writing to the stream + */ + + public void writeFilesystem(final OutputStream stream) + throws IOException + { + + // get the property table ready + _property_table.preWrite(); + + // create the small block store, and the SBAT + SmallBlockTableWriter sbtw = + new SmallBlockTableWriter(bigBlockSize, _documents, _property_table.getRoot()); + + // create the block allocation table + BlockAllocationTableWriter bat = + new BlockAllocationTableWriter(bigBlockSize); + + // create a list of BATManaged objects: the documents plus the + // property table and the small block table + List bm_objects = new ArrayList(); + + bm_objects.addAll(_documents); + bm_objects.add(_property_table); + bm_objects.add(sbtw); + bm_objects.add(sbtw.getSBAT()); + + // walk the list, allocating space for each and assigning each + // a starting block number + Iterator iter = bm_objects.iterator(); + + while (iter.hasNext()) + { + BATManaged bmo = ( BATManaged ) iter.next(); + int block_count = bmo.countBlocks(); + + if (block_count != 0) + { + bmo.setStartBlock(bat.allocateSpace(block_count)); + } + else + { + + // Either the BATManaged object is empty or its data + // is composed of SmallBlocks; in either case, + // allocating space in the BAT is inappropriate + } + } + + // allocate space for the block allocation table and take its + // starting block + int batStartBlock = bat.createBlocks(); + + // get the extended block allocation table blocks + HeaderBlockWriter header_block_writer = new HeaderBlockWriter(bigBlockSize); + BATBlock[] xbat_blocks = + header_block_writer.setBATBlocks(bat.countBlocks(), + batStartBlock); + + // set the property table start block + header_block_writer.setPropertyStart(_property_table.getStartBlock()); + + // set the small block allocation table start block + header_block_writer.setSBATStart(sbtw.getSBAT().getStartBlock()); + + // set the small block allocation table block count + header_block_writer.setSBATBlockCount(sbtw.getSBATBlockCount()); + + // the header is now properly initialized. Make a list of + // writers (the header block, followed by the documents, the + // property table, the small block store, the small block + // allocation table, the block allocation table, and the + // extended block allocation table blocks) + List writers = new ArrayList(); + + writers.add(header_block_writer); + writers.addAll(_documents); + writers.add(_property_table); + writers.add(sbtw); + writers.add(sbtw.getSBAT()); + writers.add(bat); + for (int j = 0; j < xbat_blocks.length; j++) + { + writers.add(xbat_blocks[ j ]); + } + + // now, write everything out + iter = writers.iterator(); + while (iter.hasNext()) + { + BlockWritable writer = ( BlockWritable ) iter.next(); + + writer.writeBlocks(stream); + } + } + + /** + * read in a file and write it back out again + * + * @param args names of the files; arg[ 0 ] is the input file, + * arg[ 1 ] is the output file + * + * @exception IOException + */ + + public static void main(String args[]) + throws IOException + { + if (args.length != 2) + { + System.err.println( + "two arguments required: input filename and output filename"); + System.exit(1); + } + FileInputStream istream = new FileInputStream(args[ 0 ]); + FileOutputStream ostream = new FileOutputStream(args[ 1 ]); + + new NPOIFSFileSystem(istream).writeFilesystem(ostream); + istream.close(); + ostream.close(); + } + + /** + * get the root entry + * + * @return the root entry + */ + + public DirectoryNode getRoot() + { + if (_root == null) + { + // TODO +// _root = new DirectoryNode(_property_table.getRoot(), this, null); + } + return _root; + } + + /** + * open a document in the root entry's list of entries + * + * @param documentName the name of the document to be opened + * + * @return a newly opened DocumentInputStream + * + * @exception IOException if the document does not exist or the + * name is that of a DirectoryEntry + */ + + public DocumentInputStream createDocumentInputStream( + final String documentName) + throws IOException + { + return getRoot().createDocumentInputStream(documentName); + } + + /** + * add a new POIFSDocument + * + * @param document the POIFSDocument being added + */ + + void addDocument(final POIFSDocument document) + { + _documents.add(document); + _property_table.addProperty(document.getDocumentProperty()); + } + + /** + * add a new DirectoryProperty + * + * @param directory the DirectoryProperty being added + */ + + void addDirectory(final DirectoryProperty directory) + { + _property_table.addProperty(directory); + } + + /** + * remove an entry + * + * @param entry to be removed + */ + + void remove(EntryNode entry) + { + _property_table.removeProperty(entry.getProperty()); + if (entry.isDocumentEntry()) + { + _documents.remove((( DocumentNode ) entry).getDocument()); + } + } + + private void processProperties(final BlockList small_blocks, + final BlockList big_blocks, + final Iterator properties, + final DirectoryNode dir, + final int headerPropertiesStartAt) + throws IOException + { + while (properties.hasNext()) + { + Property property = ( Property ) properties.next(); + String name = property.getName(); + DirectoryNode parent = (dir == null) + ? (( DirectoryNode ) getRoot()) + : dir; + + if (property.isDirectory()) + { + DirectoryNode new_dir = + ( DirectoryNode ) parent.createDirectory(name); + + new_dir.setStorageClsid( property.getStorageClsid() ); + + processProperties( + small_blocks, big_blocks, + (( DirectoryProperty ) property).getChildren(), + new_dir, headerPropertiesStartAt); + } + else + { + int startBlock = property.getStartBlock(); + int size = property.getSize(); + POIFSDocument document = null; + + if (property.shouldUseSmallBlocks()) + { + document = + new POIFSDocument(name, + small_blocks.fetchBlocks(startBlock, headerPropertiesStartAt), + size); + } + else + { + document = + new POIFSDocument(name, + big_blocks.fetchBlocks(startBlock, headerPropertiesStartAt), + size); + } + parent.createDocument(document); + } + } + } + + /* ********** START begin implementation of POIFSViewable ********** */ + + /** + * Get an array of objects, some of which may implement + * POIFSViewable + * + * @return an array of Object; may not be null, but may be empty + */ + + public Object [] getViewableArray() + { + if (preferArray()) + { + return (( POIFSViewable ) getRoot()).getViewableArray(); + } + return new Object[ 0 ]; + } + + /** + * Get an Iterator of objects, some of which may implement + * POIFSViewable + * + * @return an Iterator; may not be null, but may have an empty + * back end store + */ + + public Iterator getViewableIterator() + { + if (!preferArray()) + { + return (( POIFSViewable ) getRoot()).getViewableIterator(); + } + return Collections.EMPTY_LIST.iterator(); + } + + /** + * Give viewers a hint as to whether to call getViewableArray or + * getViewableIterator + * + * @return true if a viewer should call getViewableArray, false if + * a viewer should call getViewableIterator + */ + + public boolean preferArray() + { + return (( POIFSViewable ) getRoot()).preferArray(); + } + + /** + * Provides a short description of the object, to be used when a + * POIFSViewable object has not provided its contents. + * + * @return short description + */ + + public String getShortDescription() + { + return "POIFS FileSystem"; + } + + /** + * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes + */ + public int getBigBlockSize() { + return bigBlockSize.getBigBlockSize(); + } + /** + * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes + */ + public POIFSBigBlockSize getBigBlockSizeDetails() { + return bigBlockSize; + } + + /* ********** END begin implementation of POIFSViewable ********** */ +} + diff --git a/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java b/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java index e302f17917..d4f81f7ee2 100644 --- a/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java +++ b/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java @@ -22,6 +22,7 @@ import junit.framework.TestSuite; import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry; import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests; +import org.apache.poi.poifs.nio.TestDataSource; import org.apache.poi.poifs.property.AllPOIFSPropertyTests; import org.apache.poi.poifs.storage.AllPOIFSStorageTests; /** @@ -33,6 +34,7 @@ public final class AllPOIFSTests { public static Test suite() { TestSuite result = new TestSuite("Tests for org.apache.poi.poifs"); result.addTestSuite(TestPOIFSReaderRegistry.class); + result.addTestSuite(TestDataSource.class); result.addTest(AllPOIFSFileSystemTests.suite()); result.addTest(AllPOIFSPropertyTests.suite()); result.addTest(AllPOIFSStorageTests.suite()); diff --git a/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java b/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java index 295b5ad574..bc46e17c9a 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java @@ -39,6 +39,7 @@ public final class AllPOIFSFileSystemTests { result.addTestSuite(TestOffice2007XMLException.class); result.addTestSuite(TestPOIFSDocumentPath.class); result.addTestSuite(TestPOIFSFileSystem.class); + result.addTestSuite(TestNPOIFSFileSystem.class); result.addTestSuite(TestPropertySorter.class); result.addTestSuite(TestOle10Native.class); return result; diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java b/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java new file mode 100644 index 0000000000..387d15b800 --- /dev/null +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java @@ -0,0 +1,59 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.filesystem; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import junit.framework.TestCase; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.poifs.common.POIFSBigBlockSize; +import org.apache.poi.poifs.storage.HeaderBlock; +import org.apache.poi.poifs.storage.RawDataBlockList; + +/** + * Tests for the new NIO POIFSFileSystem implementation + */ +public final class TestNPOIFSFileSystem extends TestCase { + private static final POIDataSamples _inst = POIDataSamples.getPOIFSInstance(); + + public void testBasicOpen() throws Exception { + NPOIFSFileSystem fsA, fsB; + + // With a simple 512 block file + fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi")); + fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi")); + for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB}) { + assertEquals(512, fs.getBigBlockSize()); + } + + // Now with a simple 4096 block file + fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize4096.zvi")); + fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize4096.zvi")); + for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB}) { + assertEquals(4096, fs.getBigBlockSize()); + } + } + +}