diff options
author | James Ahlborn <jtahlborn@yahoo.com> | 2013-09-28 02:30:18 +0000 |
---|---|---|
committer | James Ahlborn <jtahlborn@yahoo.com> | 2013-09-28 02:30:18 +0000 |
commit | 1281baf89cf29452741bafc1a85e897e61aa86ee (patch) | |
tree | fadd442b7d9b20cfcbdf30cfec8bd5f1a6d885a5 | |
parent | 2b3eb67f9319fe0c8c42281741d48b294b029459 (diff) | |
download | jackcess-1281baf89cf29452741bafc1a85e897e61aa86ee.tar.gz jackcess-1281baf89cf29452741bafc1a85e897e61aa86ee.zip |
rework compound content api; add more ole blob unit tests
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@810 f203690c-595d-4dc9-a70b-905162fa7fd2
8 files changed, 312 insertions, 67 deletions
diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java b/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java index c988ca0..663ff95 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java @@ -30,6 +30,7 @@ package com.healthmarketscience.jackcess.impl; import java.io.Closeable; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.io.PrintWriter; import java.nio.ByteBuffer; @@ -609,6 +610,17 @@ public final class ByteUtil { } /** + * Copies the given InputStream to the given OutputStream. + */ + public static void copy(InputStream in, OutputStream out) throws IOException { + byte[] buf = new byte[8 * 1024]; + int read = 0; + while((read = in.read(buf)) > -1) { + out.write(buf, 0, read); + } + } + + /** * Closes the given Closeable if non-null, swallows any IOExceptions. */ public static void closeQuietly(Closeable c) { diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java b/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java index 0f9714d..7d26fd7 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java @@ -1724,12 +1724,7 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl> { ByteArrayOutputStream bout = new ByteArrayOutputStream(); if(value instanceof InputStream) { - byte[] buf = new byte[8 * 1024]; - InputStream in = (InputStream)value; - int read = 0; - while((read = in.read(buf)) != -1) { - bout.write(buf, 0, read); - } + ByteUtil.copy((InputStream)value, bout); } else { // if all else fails, serialize it ObjectOutputStream oos = new ObjectOutputStream(bout); diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/CompoundOleUtil.java b/src/main/java/com/healthmarketscience/jackcess/impl/CompoundOleUtil.java index 602db41..3a3b234 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/CompoundOleUtil.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/CompoundOleUtil.java @@ -22,6 +22,7 @@ package com.healthmarketscience.jackcess.impl; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; @@ -30,6 +31,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import com.healthmarketscience.jackcess.RuntimeIOException; import static com.healthmarketscience.jackcess.impl.OleUtil.*; import com.healthmarketscience.jackcess.util.MemFileChannel; import static com.healthmarketscience.jackcess.util.OleBlob.*; @@ -37,7 +39,6 @@ import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentInputStream; -import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; /** @@ -48,6 +49,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; * support in OleUtil can be utilized without requiring POI. * * @author James Ahlborn + * @usage _advanced_class_ */ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory { @@ -65,6 +67,9 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory { } + /** + * Creates a nes CompoundContent for the given blob information. + */ public ContentImpl createCompoundPackageContent( OleBlobImpl blob, String prettyName, String className, String typeName, ByteBuffer blobBb, int dataBlockLen) @@ -73,6 +78,50 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory blobBb.position(), dataBlockLen); } + /** + * Gets a DocumentEntry from compound storage based on a fully qualified, + * encoded entry name. + * + * @param entryName fully qualified, encoded entry name + * @param dir root directory of the compound storage + * + * @return the relevant DocumentEntry + * @throws FileNotFoundException if the entry does not exist + * @throws IOException if some other io error occurs + */ + public static DocumentEntry getDocumentEntry(String entryName, + DirectoryEntry dir) + throws IOException + { + // split entry name into individual components and decode them + List<String> entryNames = new ArrayList<String>(); + for(String str : entryName.split(ENTRY_SEPARATOR)) { + if(str.length() == 0) { + continue; + } + entryNames.add(decodeEntryName(str)); + } + + DocumentEntry entry = null; + Iterator<String> iter = entryNames.iterator(); + while(iter.hasNext()) { + org.apache.poi.poifs.filesystem.Entry tmpEntry = dir.getEntry(iter.next()); + if(tmpEntry instanceof DirectoryEntry) { + dir = (DirectoryEntry)tmpEntry; + } else if(!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) { + entry = (DocumentEntry)tmpEntry; + } else { + break; + } + } + + if(entry == null) { + throw new FileNotFoundException("Could not find document " + entryName); + } + + return entry; + } + private static String encodeEntryName(String name) { try { return URLEncoder.encode(name, ENTRY_NAME_CHARSET); @@ -113,69 +162,39 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory return _fs; } - public List<String> getEntries() throws IOException { - return getEntries(new ArrayList<String>(), getFileSystem().getRoot(), - ENTRY_SEPARATOR, false); + public Iterator<Entry> iterator() { + try { + return getEntries(new ArrayList<Entry>(), getFileSystem().getRoot(), + ENTRY_SEPARATOR).iterator(); + } catch(IOException e) { + throw new RuntimeIOException(e); + } } - public InputStream getEntryStream(String entryName) throws IOException { - return new DocumentInputStream(getDocumentEntry(entryName)); + public EntryImpl getEntry(String entryName) throws IOException { + return new EntryImpl(entryName, + getDocumentEntry(entryName, getFileSystem().getRoot())); } public boolean hasContentsEntry() throws IOException { return getFileSystem().getRoot().hasEntry(CONTENTS_ENTRY); } - public InputStream getContentsEntryStream() throws IOException { - return getEntryStream(CONTENTS_ENTRY); - } - - private DocumentEntry getDocumentEntry(String entryName) throws IOException { - - // split entry name into individual components and decode them - List<String> entryNames = new ArrayList<String>(); - for(String str : entryName.split(ENTRY_SEPARATOR)) { - if(str.length() == 0) { - continue; - } - entryNames.add(decodeEntryName(str)); - } - - DirectoryEntry dir = getFileSystem().getRoot(); - DocumentEntry entry = null; - Iterator<String> iter = entryNames.iterator(); - while(iter.hasNext()) { - Entry tmpEntry = dir.getEntry(iter.next()); - if(tmpEntry instanceof DirectoryEntry) { - dir = (DirectoryEntry)tmpEntry; - } else if(!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) { - entry = (DocumentEntry)tmpEntry; - } else { - break; - } - } - - if(entry == null) { - throw new FileNotFoundException("Could not find document " + entryName); - } - - return entry; + public EntryImpl getContentsEntry() throws IOException { + return getEntry(CONTENTS_ENTRY); } - private List<String> getEntries(List<String> entries, DirectoryEntry dir, - String prefix, boolean includeDetails) { - for(Entry entry : dir) { + private List<Entry> getEntries(List<Entry> entries, DirectoryEntry dir, + String prefix) { + for(org.apache.poi.poifs.filesystem.Entry entry : dir) { if (entry instanceof DirectoryEntry) { // .. recurse into this directory - getEntries(entries, (DirectoryEntry)entry, prefix + ENTRY_SEPARATOR, - includeDetails); + getEntries(entries, (DirectoryEntry)entry, prefix + ENTRY_SEPARATOR); } else if(entry instanceof DocumentEntry) { // grab the entry name/detils + DocumentEntry de = (DocumentEntry)entry; String entryName = prefix + encodeEntryName(entry.getName()); - if(includeDetails) { - entryName += " (" + ((DocumentEntry)entry).getSize() + ")"; - } - entries.add(entryName); + entries.add(new EntryImpl(entryName, de)); } } return entries; @@ -194,15 +213,67 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory try { sb.append("hasContentsEntry", hasContentsEntry()); - sb.append("entries", - getEntries(new ArrayList<String>(), getFileSystem().getRoot(), - ENTRY_SEPARATOR, true)); + sb.append("entries", getEntries(new ArrayList<Entry>(), + getFileSystem().getRoot(), + ENTRY_SEPARATOR)); } catch(IOException e) { sb.append("entries", "<" + e + ">"); } return sb.toString(); } + + private final class EntryImpl implements CompoundContent.Entry + { + private final String _name; + private final DocumentEntry _docEntry; + + private EntryImpl(String name, DocumentEntry docEntry) { + _name = name; + _docEntry = docEntry; + } + + public ContentType getType() { + return ContentType.UNKNOWN; + } + + public String getName() { + return _name; + } + + public CompoundContentImpl getParent() { + return CompoundContentImpl.this; + } + + public OleBlobImpl getBlob() { + return getParent().getBlob(); + } + + public long length() { + return _docEntry.getSize(); + } + + public InputStream getStream() throws IOException { + return new DocumentInputStream(_docEntry); + } + + public void writeTo(OutputStream out) throws IOException { + InputStream in = null; + try { + ByteUtil.copy(in = getStream(), out); + } finally { + ByteUtil.closeQuietly(in); + } + } + + @Override + public String toString() { + return CustomToStringStyle.valueBuilder(this) + .append("name", _name) + .append("length", length()) + .toString(); + } + } } } diff --git a/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java b/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java index 7af74fd..1c15715 100644 --- a/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java +++ b/src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java @@ -44,6 +44,7 @@ import org.apache.commons.lang.builder.ToStringBuilder; * Utility code for working with OLE data. * * @author James Ahlborn + * @usage _advanced_class_ */ public class OleUtil { diff --git a/src/main/java/com/healthmarketscience/jackcess/util/OleBlob.java b/src/main/java/com/healthmarketscience/jackcess/util/OleBlob.java index 6cc9a64..893eac5 100644 --- a/src/main/java/com/healthmarketscience/jackcess/util/OleBlob.java +++ b/src/main/java/com/healthmarketscience/jackcess/util/OleBlob.java @@ -27,7 +27,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.sql.Blob; -import java.util.List; import com.healthmarketscience.jackcess.impl.OleUtil; @@ -216,23 +215,35 @@ public interface OleBlob extends Blob, Closeable * ContentType#COMPOUND_STORAGE} type. Compound storage is a complex * embedding format also known as OLE2. In some situations (mostly * non-microsoft office file formats) the actual content is available from - * the {@link #getContentsEntryStream} method (if {@link #hasContentsEntry} + * the {@link #getContentsEntry} method (if {@link #hasContentsEntry} * returns {@code true}). In other situations (e.g. microsoft office file * formats), the actual content is most or all of the compound content (but - * retrieving the final file may be a complex operation, beyond the scope of + * retrieving the final file may be a complex operation beyond the scope of * jackcess). Note that the CompoundContent type will only be available if * the POI library is in the classpath, otherwise compound content will be * returned as OtherContent. */ - public interface CompoundContent extends PackageContent, EmbeddedContent + public interface CompoundContent extends PackageContent, EmbeddedContent, + Iterable<CompoundContent.Entry> { - public List<String> getEntries() throws IOException; - - public InputStream getEntryStream(String entryName) throws IOException; + public Entry getEntry(String entryName) throws IOException; public boolean hasContentsEntry() throws IOException; - public InputStream getContentsEntryStream() throws IOException; + public Entry getContentsEntry() throws IOException; + + /** + * A document entry in the compound storage. + */ + public interface Entry extends EmbeddedContent + { + public String getName(); + + /** + * Returns the CompoundContent which owns this entry. + */ + public CompoundContent getParent(); + } } /** diff --git a/src/test/data/V2007/testOleV2007.accdb b/src/test/data/V2007/testOleV2007.accdb Binary files differnew file mode 100755 index 0000000..398818e --- /dev/null +++ b/src/test/data/V2007/testOleV2007.accdb diff --git a/src/test/java/com/healthmarketscience/jackcess/impl/JetFormatTest.java b/src/test/java/com/healthmarketscience/jackcess/impl/JetFormatTest.java index 8ff2232..c5028cc 100644 --- a/src/test/java/com/healthmarketscience/jackcess/impl/JetFormatTest.java +++ b/src/test/java/com/healthmarketscience/jackcess/impl/JetFormatTest.java @@ -46,7 +46,8 @@ public class JetFormatTest extends TestCase { PROMOTION("testPromotion"), COMPLEX("complexDataTest"), UNSUPPORTED("unsupportedFieldsTest"), - LINKED("linkerTest"); + LINKED("linkerTest"), + BLOB("testOle"); private final String _basename; diff --git a/src/test/java/com/healthmarketscience/jackcess/util/OleBlobTest.java b/src/test/java/com/healthmarketscience/jackcess/util/OleBlobTest.java index 1c3e104..b519664 100644 --- a/src/test/java/com/healthmarketscience/jackcess/util/OleBlobTest.java +++ b/src/test/java/com/healthmarketscience/jackcess/util/OleBlobTest.java @@ -20,6 +20,8 @@ USA package com.healthmarketscience.jackcess.util; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.util.Arrays; import com.healthmarketscience.jackcess.ColumnBuilder; @@ -30,9 +32,15 @@ import static com.healthmarketscience.jackcess.DatabaseTest.*; import com.healthmarketscience.jackcess.Row; import com.healthmarketscience.jackcess.Table; import com.healthmarketscience.jackcess.TableBuilder; +import com.healthmarketscience.jackcess.complex.Attachment; +import com.healthmarketscience.jackcess.complex.ComplexValueForeignKey; import com.healthmarketscience.jackcess.impl.ByteUtil; +import com.healthmarketscience.jackcess.impl.CompoundOleUtil; import static com.healthmarketscience.jackcess.impl.JetFormatTest.*; import junit.framework.TestCase; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; /** * @@ -149,4 +157,150 @@ public class OleBlobTest extends TestCase db.close(); } } + + public void testReadBlob() throws Exception + { + for(TestDB testDb : TestDB.getSupportedForBasename(Basename.BLOB, true)) { + Database db = open(testDb); + + Table t = db.getTable("Table1"); + + for(Row row : t) { + + OleBlob oleBlob = null; + try { + + String name = (String)row.get("name"); + oleBlob = OleBlob.Builder.fromInternalData((byte[])row.get("ole_data")); + OleBlob.Content content = oleBlob.getContent(); + Attachment attach = null; + if(content.getType() != OleBlob.ContentType.LINK) { + attach = ((ComplexValueForeignKey)row.get("attach_data")) + .getAttachments().get(0); + } + + switch(content.getType()) { + case LINK: + OleBlob.LinkContent lc = (OleBlob.LinkContent)content; + if("test_link".equals(name)) { + assertEquals("Z:\\jackcess_test\\ole\\test_data.txt", lc.getLinkPath()); + } else { + assertEquals("Z:\\jackcess_test\\ole\\test_datau2.txt", lc.getLinkPath()); + } + break; + + case SIMPLE_PACKAGE: + OleBlob.SimplePackageContent spc = (OleBlob.SimplePackageContent)content; + byte[] packageBytes = toByteArray(spc.getStream(), spc.length()); + assertTrue(Arrays.equals(attach.getFileData(), packageBytes)); + break; + + case COMPOUND_STORAGE: + OleBlob.CompoundContent cc = (OleBlob.CompoundContent)content; + if(cc.hasContentsEntry()) { + OleBlob.CompoundContent.Entry entry = cc.getContentsEntry(); + byte[] entryBytes = toByteArray(entry.getStream(), entry.length()); + assertTrue(Arrays.equals(attach.getFileData(), entryBytes)); + } else { + + if("test_word.doc".equals(name)) { + checkCompoundEntries(cc, + "/%02OlePres000", 466, + "/WordDocument", 4096, + "/%05SummaryInformation", 4096, + "/%05DocumentSummaryInformation", 4096, + "/%03AccessObjSiteData", 56, + "/%02OlePres001", 1620, + "/1Table", 6380, + "/%01CompObj", 114, + "/%01Ole", 20); + checkCompoundStorage(cc, attach); + } else if("test_excel.xls".equals(name)) { + checkCompoundEntries(cc, + "/%02OlePres000", 1326, + "/%03AccessObjSiteData", 56, + "/%05SummaryInformation", 200, + "/%05DocumentSummaryInformation", 264, + "/%02OlePres001", 4208, + "/%01CompObj", 107, + "/Workbook", 13040, + "/%01Ole", 20); + // the excel data seems to be modified when embedded as ole, + // so we can't reallly test it against the attachment data + } else { + throw new RuntimeException("unexpected compound entry " + name); + } + } + break; + + case OTHER: + OleBlob.OtherContent oc = (OleBlob.OtherContent)content; + byte[] otherBytes = toByteArray(oc.getStream(), oc.length()); + assertTrue(Arrays.equals(attach.getFileData(), otherBytes)); + break; + + default: + throw new RuntimeException("unexpected type " + content.getType()); + } + + } finally { + ByteUtil.closeQuietly(oleBlob); + } + } + + db.close(); + } + } + + private static void checkCompoundEntries(OleBlob.CompoundContent cc, + Object... entryInfo) + throws Exception + { + int idx = 0; + for(OleBlob.CompoundContent.Entry e : cc) { + String entryName = (String)entryInfo[idx]; + int entryLen = (Integer)entryInfo[idx + 1]; + + assertEquals(entryName, e.getName()); + assertEquals(entryLen, e.length()); + + idx += 2; + } + } + + private static void checkCompoundStorage(OleBlob.CompoundContent cc, + Attachment attach) + throws Exception + { + File tmpData = File.createTempFile("attach_", ".dat"); + + try { + FileOutputStream fout = new FileOutputStream(tmpData); + fout.write(attach.getFileData()); + fout.close(); + + NPOIFSFileSystem attachFs = new NPOIFSFileSystem(tmpData, true); + + for(OleBlob.CompoundContent.Entry e : cc) { + DocumentEntry attachE = null; + try { + attachE = CompoundOleUtil.getDocumentEntry(e.getName(), attachFs.getRoot()); + } catch(FileNotFoundException fnfe) { + // ignored, the ole data has extra entries + continue; + } + + byte[] attachEBytes = toByteArray(new DocumentInputStream(attachE), + attachE.getSize()); + byte[] entryBytes = toByteArray(e.getStream(), e.length()); + + assertTrue(Arrays.equals(attachEBytes, entryBytes)); + } + + ByteUtil.closeQuietly(attachFs); + + } finally { + tmpData.delete(); + } + } } |