]> source.dussan.org Git - jackcess.git/commitdiff
rework compound content api; add more ole blob unit tests
authorJames Ahlborn <jtahlborn@yahoo.com>
Sat, 28 Sep 2013 02:30:18 +0000 (02:30 +0000)
committerJames Ahlborn <jtahlborn@yahoo.com>
Sat, 28 Sep 2013 02:30:18 +0000 (02:30 +0000)
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@810 f203690c-595d-4dc9-a70b-905162fa7fd2

src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java
src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java
src/main/java/com/healthmarketscience/jackcess/impl/CompoundOleUtil.java
src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java
src/main/java/com/healthmarketscience/jackcess/util/OleBlob.java
src/test/data/V2007/testOleV2007.accdb [new file with mode: 0755]
src/test/java/com/healthmarketscience/jackcess/impl/JetFormatTest.java
src/test/java/com/healthmarketscience/jackcess/util/OleBlobTest.java

index c988ca089ba7a6df9853617823a0ac0c43a6b6a7..663ff95940f11e73132dcc0339a872c89bd1a1fd 100644 (file)
@@ -30,6 +30,7 @@ package com.healthmarketscience.jackcess.impl;
 import java.io.Closeable;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.nio.ByteBuffer;
@@ -608,6 +609,17 @@ public final class ByteUtil {
     return newArr;
   }
 
+  /**
+   * Copies the given InputStream to the given OutputStream.
+   */
+  public static void copy(InputStream in, OutputStream out) throws IOException {
+    byte[] buf = new byte[8 * 1024];
+    int read = 0;
+    while((read = in.read(buf)) > -1) {
+      out.write(buf, 0, read);
+    }
+  }
+
   /**
    * Closes the given Closeable if non-null, swallows any IOExceptions.
    */
index 0f9714de7a041e901c27a79cbda0bd1e87c11037..7d26fd7081ac2c23f00d575354b7143d5dc0ac10 100644 (file)
@@ -1724,12 +1724,7 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl> {
     ByteArrayOutputStream bout = new ByteArrayOutputStream();
 
     if(value instanceof InputStream) {
-      byte[] buf = new byte[8 * 1024];
-      InputStream in = (InputStream)value;
-      int read = 0;
-      while((read = in.read(buf)) != -1) {
-        bout.write(buf, 0, read);
-      }
+      ByteUtil.copy((InputStream)value, bout);
     } else {
       // if all else fails, serialize it
       ObjectOutputStream oos = new ObjectOutputStream(bout);
index 602db41021eec03a8f55c99e13ee2102ef4e386e..3a3b234d8a83d3f69f1d50e59b9e9cc98749a780 100644 (file)
@@ -22,6 +22,7 @@ package com.healthmarketscience.jackcess.impl;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.net.URLDecoder;
 import java.net.URLEncoder;
@@ -30,6 +31,7 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
+import com.healthmarketscience.jackcess.RuntimeIOException;
 import static com.healthmarketscience.jackcess.impl.OleUtil.*;
 import com.healthmarketscience.jackcess.util.MemFileChannel;
 import static com.healthmarketscience.jackcess.util.OleBlob.*;
@@ -37,7 +39,6 @@ import org.apache.commons.lang.builder.ToStringBuilder;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 
 /**
@@ -48,6 +49,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
  * support in OleUtil can be utilized without requiring POI.
  *
  * @author James Ahlborn
+ * @usage _advanced_class_
  */
 public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
 {
@@ -65,6 +67,9 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
   {
   }
 
+  /**
+   * Creates a nes CompoundContent for the given blob information.
+   */
   public ContentImpl createCompoundPackageContent(
       OleBlobImpl blob, String prettyName, String className, String typeName,
       ByteBuffer blobBb, int dataBlockLen)
@@ -73,6 +78,50 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
                                    blobBb.position(), dataBlockLen);
   }
 
+  /**
+   * Gets a DocumentEntry from compound storage based on a fully qualified,
+   * encoded entry name.
+   *
+   * @param entryName fully qualified, encoded entry name
+   * @param dir root directory of the compound storage
+   *
+   * @return the relevant DocumentEntry
+   * @throws FileNotFoundException if the entry does not exist
+   * @throws IOException if some other io error occurs
+   */
+  public static DocumentEntry getDocumentEntry(String entryName,
+                                               DirectoryEntry dir) 
+    throws IOException 
+  {
+    // split entry name into individual components and decode them
+    List<String> entryNames = new ArrayList<String>();
+    for(String str : entryName.split(ENTRY_SEPARATOR)) {
+      if(str.length() == 0) {
+        continue;
+      }
+      entryNames.add(decodeEntryName(str));
+    }
+
+    DocumentEntry entry = null;
+    Iterator<String> iter = entryNames.iterator();
+    while(iter.hasNext()) {
+      org.apache.poi.poifs.filesystem.Entry tmpEntry = dir.getEntry(iter.next());
+      if(tmpEntry instanceof DirectoryEntry) {
+        dir = (DirectoryEntry)tmpEntry;
+      } else if(!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) {
+        entry = (DocumentEntry)tmpEntry;
+      } else {
+        break;
+      }        
+    }
+      
+    if(entry == null) {
+      throw new FileNotFoundException("Could not find document " + entryName);
+    }
+
+    return entry;
+  }
+
   private static String encodeEntryName(String name) {
     try {
       return URLEncoder.encode(name, ENTRY_NAME_CHARSET);
@@ -113,69 +162,39 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
       return _fs;
     }
 
-    public List<String> getEntries() throws IOException {
-      return getEntries(new ArrayList<String>(), getFileSystem().getRoot(),
-                        ENTRY_SEPARATOR, false);
+    public Iterator<Entry> iterator() {
+      try {
+      return getEntries(new ArrayList<Entry>(), getFileSystem().getRoot(),
+                        ENTRY_SEPARATOR).iterator();
+      } catch(IOException e) {
+        throw new RuntimeIOException(e);
+      }
     }
 
-    public InputStream getEntryStream(String entryName) throws IOException {
-      return new DocumentInputStream(getDocumentEntry(entryName));
+    public EntryImpl getEntry(String entryName) throws IOException {
+      return new EntryImpl(entryName, 
+                           getDocumentEntry(entryName, getFileSystem().getRoot()));
     }
 
     public boolean hasContentsEntry() throws IOException {
       return getFileSystem().getRoot().hasEntry(CONTENTS_ENTRY);
     }
 
-    public InputStream getContentsEntryStream() throws IOException {
-      return getEntryStream(CONTENTS_ENTRY);
-    }
-
-    private DocumentEntry getDocumentEntry(String entryName) throws IOException {
-
-      // split entry name into individual components and decode them
-      List<String> entryNames = new ArrayList<String>();
-      for(String str : entryName.split(ENTRY_SEPARATOR)) {
-        if(str.length() == 0) {
-          continue;
-        }
-        entryNames.add(decodeEntryName(str));
-      }
-
-      DirectoryEntry dir = getFileSystem().getRoot();
-      DocumentEntry entry = null;
-      Iterator<String> iter = entryNames.iterator();
-      while(iter.hasNext()) {
-        Entry tmpEntry = dir.getEntry(iter.next());
-        if(tmpEntry instanceof DirectoryEntry) {
-          dir = (DirectoryEntry)tmpEntry;
-        } else if(!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) {
-          entry = (DocumentEntry)tmpEntry;
-        } else {
-          break;
-        }        
-      }
-      
-      if(entry == null) {
-        throw new FileNotFoundException("Could not find document " + entryName);
-      }
-
-      return entry;
+    public EntryImpl getContentsEntry() throws IOException {
+      return getEntry(CONTENTS_ENTRY);
     }
 
-    private List<String> getEntries(List<String> entries, DirectoryEntry dir, 
-                                    String prefix, boolean includeDetails) {
-      for(Entry entry : dir) {
+    private List<Entry> getEntries(List<Entry> entries, DirectoryEntry dir, 
+                                   String prefix) {
+      for(org.apache.poi.poifs.filesystem.Entry entry : dir) {
         if (entry instanceof DirectoryEntry) {
           // .. recurse into this directory
-          getEntries(entries, (DirectoryEntry)entry, prefix + ENTRY_SEPARATOR,
-                     includeDetails);
+          getEntries(entries, (DirectoryEntry)entry, prefix + ENTRY_SEPARATOR);
         } else if(entry instanceof DocumentEntry) {
           // grab the entry name/detils
+          DocumentEntry de = (DocumentEntry)entry;
           String entryName = prefix + encodeEntryName(entry.getName());
-          if(includeDetails) {
-            entryName += " (" + ((DocumentEntry)entry).getSize() + ")";
-          }
-          entries.add(entryName);
+          entries.add(new EntryImpl(entryName, de));
         }
       }
       return entries;
@@ -194,15 +213,67 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
 
       try {
         sb.append("hasContentsEntry", hasContentsEntry());
-        sb.append("entries",
-                  getEntries(new ArrayList<String>(), getFileSystem().getRoot(),
-                             ENTRY_SEPARATOR, true));
+        sb.append("entries", getEntries(new ArrayList<Entry>(), 
+                                        getFileSystem().getRoot(),
+                                        ENTRY_SEPARATOR));
       } catch(IOException e) {  
         sb.append("entries", "<" + e + ">");
       }
 
       return sb.toString();
     }
+
+    private final class EntryImpl implements CompoundContent.Entry
+    {
+      private final String _name;
+      private final DocumentEntry _docEntry;
+
+      private EntryImpl(String name, DocumentEntry docEntry) {
+        _name = name;
+        _docEntry = docEntry;
+      }
+
+      public ContentType getType() {
+        return ContentType.UNKNOWN;
+      }
+
+      public String getName() {
+        return _name;
+      }
+
+      public CompoundContentImpl getParent() {
+        return CompoundContentImpl.this;
+      }
+
+      public OleBlobImpl getBlob() {
+        return getParent().getBlob();
+      }
+
+      public long length() {
+        return _docEntry.getSize();
+      }
+
+      public InputStream getStream() throws IOException {
+        return new DocumentInputStream(_docEntry);
+      }
+
+      public void writeTo(OutputStream out) throws IOException {
+        InputStream in = null;
+        try {
+          ByteUtil.copy(in = getStream(), out);
+        } finally {
+          ByteUtil.closeQuietly(in);
+        }
+      }
+
+      @Override
+      public String toString() {
+        return CustomToStringStyle.valueBuilder(this)
+          .append("name", _name)
+          .append("length", length())
+          .toString();
+      }
+    } 
   }
 
 }
index 7af74fdd64df7d93c2c12602af5c4d22a4393677..1c15715dd4ae4ee8b2c58bd157c3ed03fa96feb0 100644 (file)
@@ -44,6 +44,7 @@ import org.apache.commons.lang.builder.ToStringBuilder;
  * Utility code for working with OLE data.
  *
  * @author James Ahlborn
+ * @usage _advanced_class_
  */
 public class OleUtil 
 {
index 6cc9a6406b8e1d6cbeef170e596047680d308dc3..893eac5d1f2e93fa11b60741f68dfb7a6d6183c1 100644 (file)
@@ -27,7 +27,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.sql.Blob;
-import java.util.List;
 
 import com.healthmarketscience.jackcess.impl.OleUtil;
 
@@ -216,23 +215,35 @@ public interface OleBlob extends Blob, Closeable
    * ContentType#COMPOUND_STORAGE} type.  Compound storage is a complex
    * embedding format also known as OLE2.  In some situations (mostly
    * non-microsoft office file formats) the actual content is available from
-   * the {@link #getContentsEntryStream} method (if {@link #hasContentsEntry}
+   * the {@link #getContentsEntry} method (if {@link #hasContentsEntry}
    * returns {@code true}).  In other situations (e.g. microsoft office file
    * formats), the actual content is most or all of the compound content (but
-   * retrieving the final file may be a complex operation, beyond the scope of
+   * retrieving the final file may be a complex operation beyond the scope of
    * jackcess).  Note that the CompoundContent type will only be available if
    * the POI library is in the classpath, otherwise compound content will be
    * returned as OtherContent.
    */
-  public interface CompoundContent extends PackageContent, EmbeddedContent
+  public interface CompoundContent extends PackageContent, EmbeddedContent,
+                                           Iterable<CompoundContent.Entry>
   {
-    public List<String> getEntries() throws IOException;
-
-    public InputStream getEntryStream(String entryName) throws IOException;
+    public Entry getEntry(String entryName) throws IOException;
 
     public boolean hasContentsEntry() throws IOException;
 
-    public InputStream getContentsEntryStream() throws IOException;
+    public Entry getContentsEntry() throws IOException;
+
+    /**
+     * A document entry in the compound storage.
+     */
+    public interface Entry extends EmbeddedContent
+    {
+      public String getName();
+
+      /**
+       * Returns the CompoundContent which owns this entry.
+       */
+      public CompoundContent getParent();
+    }
   }  
 
   /**
diff --git a/src/test/data/V2007/testOleV2007.accdb b/src/test/data/V2007/testOleV2007.accdb
new file mode 100755 (executable)
index 0000000..398818e
Binary files /dev/null and b/src/test/data/V2007/testOleV2007.accdb differ
index 8ff22323fa66948b355134da4f7fc396b68ca361..c5028ccb178a3d64b0704ed68d541691effc6316 100644 (file)
@@ -46,7 +46,8 @@ public class JetFormatTest extends TestCase {
     PROMOTION("testPromotion"),
     COMPLEX("complexDataTest"),
     UNSUPPORTED("unsupportedFieldsTest"),
-    LINKED("linkerTest");
+    LINKED("linkerTest"),
+    BLOB("testOle");
 
     private final String _basename;
 
index 1c3e10473442e4579874c3b7a9630f9296a8d6da..b519664dc9fbcca5dae7a6c86fc95e0ecca85dff 100644 (file)
@@ -20,6 +20,8 @@ USA
 package com.healthmarketscience.jackcess.util;
 
 import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.util.Arrays;
 
 import com.healthmarketscience.jackcess.ColumnBuilder;
@@ -30,9 +32,15 @@ import static com.healthmarketscience.jackcess.DatabaseTest.*;
 import com.healthmarketscience.jackcess.Row;
 import com.healthmarketscience.jackcess.Table;
 import com.healthmarketscience.jackcess.TableBuilder;
+import com.healthmarketscience.jackcess.complex.Attachment;
+import com.healthmarketscience.jackcess.complex.ComplexValueForeignKey;
 import com.healthmarketscience.jackcess.impl.ByteUtil;
+import com.healthmarketscience.jackcess.impl.CompoundOleUtil;
 import static com.healthmarketscience.jackcess.impl.JetFormatTest.*;
 import junit.framework.TestCase;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 
 /**
  *
@@ -149,4 +157,150 @@ public class OleBlobTest extends TestCase
       db.close();      
     }    
   }
+
+  public void testReadBlob() throws Exception
+  {
+    for(TestDB testDb : TestDB.getSupportedForBasename(Basename.BLOB, true)) {
+      Database db = open(testDb);
+
+      Table t = db.getTable("Table1");
+
+      for(Row row : t) {
+
+        OleBlob oleBlob = null;
+        try {
+
+          String name = (String)row.get("name");
+          oleBlob = OleBlob.Builder.fromInternalData((byte[])row.get("ole_data"));
+          OleBlob.Content content = oleBlob.getContent();
+          Attachment attach = null;
+          if(content.getType() != OleBlob.ContentType.LINK) {
+            attach = ((ComplexValueForeignKey)row.get("attach_data"))
+              .getAttachments().get(0);
+          }
+
+          switch(content.getType()) {
+          case LINK:
+            OleBlob.LinkContent lc = (OleBlob.LinkContent)content;
+            if("test_link".equals(name)) {
+              assertEquals("Z:\\jackcess_test\\ole\\test_data.txt", lc.getLinkPath());
+            } else {
+              assertEquals("Z:\\jackcess_test\\ole\\test_datau2.txt", lc.getLinkPath());
+            }
+            break;
+
+          case SIMPLE_PACKAGE:
+            OleBlob.SimplePackageContent spc = (OleBlob.SimplePackageContent)content;
+            byte[] packageBytes = toByteArray(spc.getStream(), spc.length());
+            assertTrue(Arrays.equals(attach.getFileData(), packageBytes));
+            break;
+
+          case COMPOUND_STORAGE:
+            OleBlob.CompoundContent cc = (OleBlob.CompoundContent)content;
+            if(cc.hasContentsEntry()) {
+              OleBlob.CompoundContent.Entry entry = cc.getContentsEntry();
+              byte[] entryBytes = toByteArray(entry.getStream(), entry.length());
+              assertTrue(Arrays.equals(attach.getFileData(), entryBytes));
+            } else {
+
+              if("test_word.doc".equals(name)) {
+                checkCompoundEntries(cc, 
+                                     "/%02OlePres000", 466,
+                                     "/WordDocument", 4096,
+                                     "/%05SummaryInformation", 4096,
+                                     "/%05DocumentSummaryInformation", 4096,
+                                     "/%03AccessObjSiteData", 56,
+                                     "/%02OlePres001", 1620,
+                                     "/1Table", 6380,
+                                     "/%01CompObj", 114,
+                                     "/%01Ole", 20);
+                checkCompoundStorage(cc, attach);
+              } else if("test_excel.xls".equals(name)) {
+                checkCompoundEntries(cc, 
+                                     "/%02OlePres000", 1326,
+                                     "/%03AccessObjSiteData", 56,
+                                     "/%05SummaryInformation", 200,
+                                     "/%05DocumentSummaryInformation", 264,
+                                     "/%02OlePres001", 4208,
+                                     "/%01CompObj", 107,
+                                     "/Workbook", 13040,
+                                     "/%01Ole", 20);
+                // the excel data seems to be modified when embedded as ole,
+                // so we can't reallly test it against the attachment data
+              } else {
+                throw new RuntimeException("unexpected compound entry " + name);
+              }
+            }
+            break;
+
+          case OTHER:
+            OleBlob.OtherContent oc = (OleBlob.OtherContent)content;
+            byte[] otherBytes = toByteArray(oc.getStream(), oc.length());
+            assertTrue(Arrays.equals(attach.getFileData(), otherBytes));
+            break;
+
+          default:
+            throw new RuntimeException("unexpected type " + content.getType());
+          }
+
+        } finally {
+          ByteUtil.closeQuietly(oleBlob);
+        }
+      }
+
+      db.close();
+    } 
+  }
+
+  private static void checkCompoundEntries(OleBlob.CompoundContent cc, 
+                                           Object... entryInfo)
+    throws Exception
+  {
+    int idx = 0;
+    for(OleBlob.CompoundContent.Entry e : cc) {
+      String entryName = (String)entryInfo[idx];
+      int entryLen = (Integer)entryInfo[idx + 1];
+
+      assertEquals(entryName, e.getName());
+      assertEquals(entryLen, e.length());
+
+      idx += 2;
+    }
+  }
+
+  private static void checkCompoundStorage(OleBlob.CompoundContent cc, 
+                                           Attachment attach)
+    throws Exception
+  {
+    File tmpData = File.createTempFile("attach_", ".dat");
+
+    try {
+      FileOutputStream fout = new FileOutputStream(tmpData);
+      fout.write(attach.getFileData());
+      fout.close();
+
+      NPOIFSFileSystem attachFs = new NPOIFSFileSystem(tmpData, true);
+
+      for(OleBlob.CompoundContent.Entry e : cc) {
+        DocumentEntry attachE = null;
+        try {
+          attachE = CompoundOleUtil.getDocumentEntry(e.getName(), attachFs.getRoot());
+        } catch(FileNotFoundException fnfe) {
+          // ignored, the ole data has extra entries
+          continue;
+        }
+
+        byte[] attachEBytes = toByteArray(new DocumentInputStream(attachE), 
+                                          attachE.getSize());
+        byte[] entryBytes = toByteArray(e.getStream(), e.length());
+
+        assertTrue(Arrays.equals(attachEBytes, entryBytes));
+      }
+
+      ByteUtil.closeQuietly(attachFs);
+      
+    } finally {
+      tmpData.delete();
+    }    
+  }
 }