]> source.dussan.org Git - poi.git/commitdiff
Implement a NPOIFS document reader, and add tests which use it
authorNick Burch <nick@apache.org>
Tue, 28 Dec 2010 08:52:50 +0000 (08:52 +0000)
committerNick Burch <nick@apache.org>
Tue, 28 Dec 2010 08:52:50 +0000 (08:52 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053279 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java
src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java
src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java [new file with mode: 0644]
src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java
src/java/org/apache/poi/poifs/filesystem/NPOIFSMiniStore.java
src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java

index 92261929ac0f34fa3fca627a03e6cd9173835316..fe15102b2af7a564eb266735517e403040381750 100644 (file)
@@ -120,8 +120,7 @@ public class DirectoryNode
             }
             else
             {
-                childNode = new DocumentNode(( DocumentProperty ) child,
-                                             this);
+                childNode = new DocumentNode((DocumentProperty) child, this);
             }
             _entries.add(childNode);
             _byname.put(childNode.getName(), childNode);
index ecd110a3292a85a38e4b79a3208dbf065ff76e18..577f3d93b1191d576b896f076537e9e577fab7ce 100644 (file)
@@ -67,11 +67,16 @@ public final class DocumentInputStream extends InputStream implements LittleEndi
                if (!(document instanceof DocumentNode)) {
                        throw new IOException("Cannot open internal document storage");
                }
+               DocumentNode documentNode = (DocumentNode)document;
+               if(documentNode.getDocument() == null) {
+         throw new IOException("Cannot open internal document storage");
+               }
+                     
                _current_offset = 0;
                _marked_offset = 0;
                _document_size = document.getSize();
                _closed = false;
-               _document = ((DocumentNode) document).getDocument();
+               _document = documentNode.getDocument();
                _currentBlock = getDataInputBlock(0);
        }
 
diff --git a/src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java b/src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java
new file mode 100644 (file)
index 0000000..52c06f8
--- /dev/null
@@ -0,0 +1,316 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.poifs.filesystem;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+
+import org.apache.poi.poifs.property.DocumentProperty;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LittleEndianInput;
+
+/**
+ * This class provides methods to read a DocumentEntry managed by a
+ * {@link POIFSFileSystem} instance.
+ *
+ * @author Marc Johnson (mjohnson at apache dot org)
+ */
+public final class NDocumentInputStream extends InputStream implements LittleEndianInput {
+       /** returned by read operations if we're at end of document */
+       private static final int EOF = -1;
+
+       private static final int SIZE_SHORT = 2;
+       private static final int SIZE_INT = 4;
+       private static final int SIZE_LONG = 8;
+
+       /** current offset into the Document */
+       private int _current_offset;
+       /** current block count */
+       private int _current_block_count;
+
+       /** current marked offset into the Document (used by mark and reset) */
+       private int _marked_offset;
+       /** and the block count for it */
+   private int _marked_offset_count;
+
+       /** the Document's size */
+       private int _document_size;
+
+       /** have we been closed? */
+       private boolean _closed;
+
+       /** the actual Document */
+       private NPOIFSDocument _document;
+       
+       private Iterator<ByteBuffer> _data;
+       private ByteBuffer _buffer;
+
+       /**
+        * Create an InputStream from the specified DocumentEntry
+        * 
+        * @param document the DocumentEntry to be read
+        * 
+        * @exception IOException if the DocumentEntry cannot be opened (like, maybe it has
+        *                been deleted?)
+        */
+       public NDocumentInputStream(DocumentEntry document) throws IOException {
+               if (!(document instanceof DocumentNode)) {
+                       throw new IOException("Cannot open internal document storage");
+               }
+               _current_offset = 0;
+               _current_block_count = 0;
+               _marked_offset = 0;
+               _marked_offset_count = 0;
+               _document_size = document.getSize();
+               _closed = false;
+               
+      DocumentNode doc = (DocumentNode)document;
+               DocumentProperty property = (DocumentProperty)doc.getProperty();
+               _document = new NPOIFSDocument(
+                     property, 
+                     ((DirectoryNode)doc.getParent()).getNFileSystem()
+               );
+               _data = _document.getBlockIterator();
+       }
+
+       /**
+        * Create an InputStream from the specified Document
+        * 
+        * @param document the Document to be read
+        */
+       public NDocumentInputStream(NPOIFSDocument document) {
+      _current_offset = 0;
+      _current_block_count = 0;
+      _marked_offset = 0;
+      _marked_offset_count = 0;
+               _document_size = document.getSize();
+               _closed = false;
+               _document = document;
+      _data = _document.getBlockIterator();
+       }
+
+       public int available() {
+               if (_closed) {
+                       throw new IllegalStateException("cannot perform requested operation on a closed stream");
+               }
+               return _document_size - _current_offset;
+       }
+
+       public void close() {
+               _closed = true;
+       }
+
+       public void mark(int ignoredReadlimit) {
+               _marked_offset = _current_offset;
+               _marked_offset_count = _current_block_count;
+       }
+
+       /**
+        * Tests if this input stream supports the mark and reset methods.
+        * 
+        * @return <code>true</code> always
+        */
+       public boolean markSupported() {
+               return true;
+       }
+
+       public int read() throws IOException {
+               dieIfClosed();
+               if (atEOD()) {
+                       return EOF;
+               }
+               byte[] b = new byte[1];
+               int result = read(b, 0, 1);
+               if(result >= 0) {
+                  if(b[0] < 0) {
+                     return b[0]+256;
+                  }
+                  return b[0];
+               }
+               return result;
+       }
+
+       public int read(byte[] b) throws IOException {
+               return read(b, 0, b.length);
+       }
+
+       public int read(byte[] b, int off, int len) throws IOException {
+               dieIfClosed();
+               if (b == null) {
+                       throw new IllegalArgumentException("buffer must not be null");
+               }
+               if (off < 0 || len < 0 || b.length < off + len) {
+                       throw new IndexOutOfBoundsException("can't read past buffer boundaries");
+               }
+               if (len == 0) {
+                       return 0;
+               }
+               if (atEOD()) {
+                       return EOF;
+               }
+               int limit = Math.min(available(), len);
+               readFully(b, off, limit);
+               return limit;
+       }
+
+       /**
+        * Repositions this stream to the position at the time the mark() method was
+        * last called on this input stream. If mark() has not been called this
+        * method repositions the stream to its beginning.
+        */
+       public void reset() {
+          // Special case for reset to the start
+          if(_marked_offset == 0 && _marked_offset_count == 0) {
+             _current_block_count = _marked_offset_count;
+             _current_offset = _marked_offset;
+             _data = _document.getBlockIterator();
+             _buffer = null;
+             return;
+          }
+          
+               // Start again, then wind on to the required block
+               _data = _document.getBlockIterator();
+               _current_offset = 0;
+               for(int i=0; i<_marked_offset_count; i++) {
+                  _buffer = _data.next();
+                  _current_offset += _buffer.remaining();
+               }
+               
+      _current_block_count = _marked_offset_count;
+      
+      // Do we need to position within it?
+      if(_current_offset != _marked_offset) {
+               // Grab the right block
+         _buffer = _data.next();
+         _current_block_count++;
+         
+               // Skip to the right place in it
+               _buffer.position(_marked_offset - _current_offset);
+      }
+
+      // All done
+      _current_offset = _marked_offset;
+       }
+
+       public long skip(long n) throws IOException {
+               dieIfClosed();
+               if (n < 0) {
+                       return 0;
+               }
+               int new_offset = _current_offset + (int) n;
+
+               if (new_offset < _current_offset) {
+                       // wrap around in converting a VERY large long to an int
+                       new_offset = _document_size;
+               } else if (new_offset > _document_size) {
+                       new_offset = _document_size;
+               }
+               
+               long rval = new_offset - _current_offset;
+               
+               // TODO Do this better
+               byte[] skip = new byte[(int)rval];
+               readFully(skip);
+               return rval;
+       }
+
+       private void dieIfClosed() throws IOException {
+               if (_closed) {
+                       throw new IOException("cannot perform requested operation on a closed stream");
+               }
+       }
+
+       private boolean atEOD() {
+               return _current_offset == _document_size;
+       }
+
+       private void checkAvaliable(int requestedSize) {
+               if (_closed) {
+                       throw new IllegalStateException("cannot perform requested operation on a closed stream");
+               }
+               if (requestedSize > _document_size - _current_offset) {
+                       throw new RuntimeException("Buffer underrun - requested " + requestedSize
+                                       + " bytes but " + (_document_size - _current_offset) + " was available");
+               }
+       }
+
+       public byte readByte() {
+               return (byte) readUByte();
+       }
+
+       public double readDouble() {
+               return Double.longBitsToDouble(readLong());
+       }
+
+       public void readFully(byte[] buf) {
+               readFully(buf, 0, buf.length);
+       }
+
+       public short readShort() {
+               return (short) readUShort();
+       }
+
+       public void readFully(byte[] buf, int off, int len) {
+               checkAvaliable(len);
+
+               int read = 0;
+               while(read < len) {
+                  if(_buffer == null || _buffer.remaining() == 0) {
+                     _current_block_count++;
+                     _buffer = _data.next();
+                  }
+                  
+                  int limit = Math.min(len-read, _buffer.remaining());
+                  _buffer.get(buf, off+read, limit);
+         _current_offset += limit;
+                  read += limit;
+               }
+       }
+
+       public long readLong() {
+               checkAvaliable(SIZE_LONG);
+               byte[] data = new byte[SIZE_LONG];
+               readFully(data, 0, SIZE_LONG);
+               return LittleEndian.getLong(data, 0);
+       }
+
+       public int readInt() {
+               checkAvaliable(SIZE_INT);
+      byte[] data = new byte[SIZE_INT];
+      readFully(data, 0, SIZE_INT);
+      return LittleEndian.getInt(data);
+       }
+
+       public int readUShort() {
+               checkAvaliable(SIZE_SHORT);
+      byte[] data = new byte[SIZE_SHORT];
+      readFully(data, 0, SIZE_SHORT);
+      return LittleEndian.getShort(data);
+       }
+
+       public int readUByte() {
+               checkAvaliable(1);
+      byte[] data = new byte[1];
+      readFully(data, 0, 1);
+      if(data[0] >= 0)
+         return data[0];
+      return data[0] + 256;
+       }
+}
index 08c66b677431719604b14640e20df5ff8de66d9f..09536d4ade5e5f2330ca01f77becbcc90ce14393 100644 (file)
@@ -100,6 +100,14 @@ public final class NPOIFSDocument implements POIFSViewable {
       this._property = new DocumentProperty(name, contents.length);
       _property.setStartBlock(_stream.getStartBlock());     
    }
+   
+   int getDocumentBlockSize() {
+      return _block_size;
+   }
+   
+   Iterator<ByteBuffer> getBlockIterator() {
+      return _stream.getBlockIterator();
+   }
 
    /**
     * @return size of the document
index 7323045e5b9bf0997abe7c673e669381bbd5a2f1..156b73d9acbfc7357758c75f8298f19615c4ce75 100644 (file)
@@ -70,13 +70,16 @@ public class NPOIFSMiniStore extends BlockStore
        }
        ByteBuffer dataBlock = it.next();
        
-       // Skip forward to the right place
+       // Our blocks are small, so duplicating it is fine 
+       byte[] data = new byte[POIFSConstants.SMALL_BLOCK_SIZE];
        dataBlock.position(
              dataBlock.position() + bigBlockOffset
        );
+       dataBlock.get(data, 0, data.length);
        
-       // All done
-       return dataBlock;
+       // Return a ByteBuffer on this
+       ByteBuffer miniBuffer = ByteBuffer.wrap(data);
+       return miniBuffer;
     }
     
     /**
index 17e2694d00e76eafa8e9be8a20c1f3595c9a5501..2848c8be2698607885803c7f16086a697ff68cd4 100644 (file)
@@ -23,6 +23,10 @@ import java.util.Iterator;
 import junit.framework.TestCase;
 
 import org.apache.poi.POIDataSamples;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.PropertySetFactory;
+import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.poifs.property.NPropertyTable;
 import org.apache.poi.poifs.property.Property;
@@ -446,12 +450,25 @@ public final class TestNPOIFSFileSystem extends TestCase {
       NPOIFSFileSystem fsD = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize4096.zvi"));
       for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB,fsC,fsD}) {
          DirectoryEntry root = fs.getRoot();
-         Entry dsi = root.getEntry("\u0005DocumentSummaryInformation");
+         Entry si = root.getEntry("\u0005SummaryInformation");
+         
+         assertEquals(true, si.isDocumentEntry());
+         DocumentNode doc = (DocumentNode)si;
          
-         assertEquals(true, dsi.isDocumentEntry());
-         DocumentEntry doc = (DocumentEntry)dsi;
+         // Check we can read it
+         NDocumentInputStream inp = new NDocumentInputStream(doc);
+         byte[] contents = new byte[doc.getSize()];
+         assertEquals(doc.getSize(), inp.read(contents));
          
+         // Now try to build the property set
+         inp = new NDocumentInputStream(doc);
+         PropertySet ps = PropertySetFactory.create(inp);
+         SummaryInformation inf = (SummaryInformation)ps;
          
+         // Check some bits in it
+         assertEquals(null, inf.getApplicationName());
+         assertEquals(null, inf.getAuthor());
+         assertEquals(null, inf.getSubject());
       }
    }