Browse Source

rework compound content api; add more ole blob unit tests

git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/trunk@810 f203690c-595d-4dc9-a70b-905162fa7fd2
tags/jackcess-2.0.1
James Ahlborn 10 years ago
parent
commit
1281baf89c

+ 12
- 0
src/main/java/com/healthmarketscience/jackcess/impl/ByteUtil.java View File

@@ -30,6 +30,7 @@ package com.healthmarketscience.jackcess.impl;
import java.io.Closeable;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
@@ -608,6 +609,17 @@ public final class ByteUtil {
return newArr;
}

/**
* Copies the given InputStream to the given OutputStream.
*/
public static void copy(InputStream in, OutputStream out) throws IOException {
byte[] buf = new byte[8 * 1024];
int read = 0;
while((read = in.read(buf)) > -1) {
out.write(buf, 0, read);
}
}

/**
* Closes the given Closeable if non-null, swallows any IOExceptions.
*/

+ 1
- 6
src/main/java/com/healthmarketscience/jackcess/impl/ColumnImpl.java View File

@@ -1724,12 +1724,7 @@ public class ColumnImpl implements Column, Comparable<ColumnImpl> {
ByteArrayOutputStream bout = new ByteArrayOutputStream();

if(value instanceof InputStream) {
byte[] buf = new byte[8 * 1024];
InputStream in = (InputStream)value;
int read = 0;
while((read = in.read(buf)) != -1) {
bout.write(buf, 0, read);
}
ByteUtil.copy((InputStream)value, bout);
} else {
// if all else fails, serialize it
ObjectOutputStream oos = new ObjectOutputStream(bout);

+ 123
- 52
src/main/java/com/healthmarketscience/jackcess/impl/CompoundOleUtil.java View File

@@ -22,6 +22,7 @@ package com.healthmarketscience.jackcess.impl;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
@@ -30,6 +31,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import com.healthmarketscience.jackcess.RuntimeIOException;
import static com.healthmarketscience.jackcess.impl.OleUtil.*;
import com.healthmarketscience.jackcess.util.MemFileChannel;
import static com.healthmarketscience.jackcess.util.OleBlob.*;
@@ -37,7 +39,6 @@ import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;

/**
@@ -48,6 +49,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
* support in OleUtil can be utilized without requiring POI.
*
* @author James Ahlborn
* @usage _advanced_class_
*/
public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
{
@@ -65,6 +67,9 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
{
}

/**
* Creates a nes CompoundContent for the given blob information.
*/
public ContentImpl createCompoundPackageContent(
OleBlobImpl blob, String prettyName, String className, String typeName,
ByteBuffer blobBb, int dataBlockLen)
@@ -73,6 +78,50 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
blobBb.position(), dataBlockLen);
}

/**
* Gets a DocumentEntry from compound storage based on a fully qualified,
* encoded entry name.
*
* @param entryName fully qualified, encoded entry name
* @param dir root directory of the compound storage
*
* @return the relevant DocumentEntry
* @throws FileNotFoundException if the entry does not exist
* @throws IOException if some other io error occurs
*/
public static DocumentEntry getDocumentEntry(String entryName,
DirectoryEntry dir)
throws IOException
{
// split entry name into individual components and decode them
List<String> entryNames = new ArrayList<String>();
for(String str : entryName.split(ENTRY_SEPARATOR)) {
if(str.length() == 0) {
continue;
}
entryNames.add(decodeEntryName(str));
}

DocumentEntry entry = null;
Iterator<String> iter = entryNames.iterator();
while(iter.hasNext()) {
org.apache.poi.poifs.filesystem.Entry tmpEntry = dir.getEntry(iter.next());
if(tmpEntry instanceof DirectoryEntry) {
dir = (DirectoryEntry)tmpEntry;
} else if(!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) {
entry = (DocumentEntry)tmpEntry;
} else {
break;
}
}
if(entry == null) {
throw new FileNotFoundException("Could not find document " + entryName);
}

return entry;
}

private static String encodeEntryName(String name) {
try {
return URLEncoder.encode(name, ENTRY_NAME_CHARSET);
@@ -113,69 +162,39 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory
return _fs;
}

public List<String> getEntries() throws IOException {
return getEntries(new ArrayList<String>(), getFileSystem().getRoot(),
ENTRY_SEPARATOR, false);
public Iterator<Entry> iterator() {
try {
return getEntries(new ArrayList<Entry>(), getFileSystem().getRoot(),
ENTRY_SEPARATOR).iterator();
} catch(IOException e) {
throw new RuntimeIOException(e);
}
}

public InputStream getEntryStream(String entryName) throws IOException {
return new DocumentInputStream(getDocumentEntry(entryName));
public EntryImpl getEntry(String entryName) throws IOException {
return new EntryImpl(entryName,
getDocumentEntry(entryName, getFileSystem().getRoot()));
}

public boolean hasContentsEntry() throws IOException {
return getFileSystem().getRoot().hasEntry(CONTENTS_ENTRY);
}

public InputStream getContentsEntryStream() throws IOException {
return getEntryStream(CONTENTS_ENTRY);
}

private DocumentEntry getDocumentEntry(String entryName) throws IOException {

// split entry name into individual components and decode them
List<String> entryNames = new ArrayList<String>();
for(String str : entryName.split(ENTRY_SEPARATOR)) {
if(str.length() == 0) {
continue;
}
entryNames.add(decodeEntryName(str));
}

DirectoryEntry dir = getFileSystem().getRoot();
DocumentEntry entry = null;
Iterator<String> iter = entryNames.iterator();
while(iter.hasNext()) {
Entry tmpEntry = dir.getEntry(iter.next());
if(tmpEntry instanceof DirectoryEntry) {
dir = (DirectoryEntry)tmpEntry;
} else if(!iter.hasNext() && (tmpEntry instanceof DocumentEntry)) {
entry = (DocumentEntry)tmpEntry;
} else {
break;
}
}
if(entry == null) {
throw new FileNotFoundException("Could not find document " + entryName);
}

return entry;
public EntryImpl getContentsEntry() throws IOException {
return getEntry(CONTENTS_ENTRY);
}

private List<String> getEntries(List<String> entries, DirectoryEntry dir,
String prefix, boolean includeDetails) {
for(Entry entry : dir) {
private List<Entry> getEntries(List<Entry> entries, DirectoryEntry dir,
String prefix) {
for(org.apache.poi.poifs.filesystem.Entry entry : dir) {
if (entry instanceof DirectoryEntry) {
// .. recurse into this directory
getEntries(entries, (DirectoryEntry)entry, prefix + ENTRY_SEPARATOR,
includeDetails);
getEntries(entries, (DirectoryEntry)entry, prefix + ENTRY_SEPARATOR);
} else if(entry instanceof DocumentEntry) {
// grab the entry name/detils
DocumentEntry de = (DocumentEntry)entry;
String entryName = prefix + encodeEntryName(entry.getName());
if(includeDetails) {
entryName += " (" + ((DocumentEntry)entry).getSize() + ")";
}
entries.add(entryName);
entries.add(new EntryImpl(entryName, de));
}
}
return entries;
@@ -194,15 +213,67 @@ public class CompoundOleUtil implements OleUtil.CompoundPackageFactory

try {
sb.append("hasContentsEntry", hasContentsEntry());
sb.append("entries",
getEntries(new ArrayList<String>(), getFileSystem().getRoot(),
ENTRY_SEPARATOR, true));
sb.append("entries", getEntries(new ArrayList<Entry>(),
getFileSystem().getRoot(),
ENTRY_SEPARATOR));
} catch(IOException e) {
sb.append("entries", "<" + e + ">");
}

return sb.toString();
}

private final class EntryImpl implements CompoundContent.Entry
{
private final String _name;
private final DocumentEntry _docEntry;

private EntryImpl(String name, DocumentEntry docEntry) {
_name = name;
_docEntry = docEntry;
}

public ContentType getType() {
return ContentType.UNKNOWN;
}

public String getName() {
return _name;
}

public CompoundContentImpl getParent() {
return CompoundContentImpl.this;
}

public OleBlobImpl getBlob() {
return getParent().getBlob();
}

public long length() {
return _docEntry.getSize();
}

public InputStream getStream() throws IOException {
return new DocumentInputStream(_docEntry);
}

public void writeTo(OutputStream out) throws IOException {
InputStream in = null;
try {
ByteUtil.copy(in = getStream(), out);
} finally {
ByteUtil.closeQuietly(in);
}
}

@Override
public String toString() {
return CustomToStringStyle.valueBuilder(this)
.append("name", _name)
.append("length", length())
.toString();
}
}
}

}

+ 1
- 0
src/main/java/com/healthmarketscience/jackcess/impl/OleUtil.java View File

@@ -44,6 +44,7 @@ import org.apache.commons.lang.builder.ToStringBuilder;
* Utility code for working with OLE data.
*
* @author James Ahlborn
* @usage _advanced_class_
*/
public class OleUtil
{

+ 19
- 8
src/main/java/com/healthmarketscience/jackcess/util/OleBlob.java View File

@@ -27,7 +27,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.sql.Blob;
import java.util.List;

import com.healthmarketscience.jackcess.impl.OleUtil;

@@ -216,23 +215,35 @@ public interface OleBlob extends Blob, Closeable
* ContentType#COMPOUND_STORAGE} type. Compound storage is a complex
* embedding format also known as OLE2. In some situations (mostly
* non-microsoft office file formats) the actual content is available from
* the {@link #getContentsEntryStream} method (if {@link #hasContentsEntry}
* the {@link #getContentsEntry} method (if {@link #hasContentsEntry}
* returns {@code true}). In other situations (e.g. microsoft office file
* formats), the actual content is most or all of the compound content (but
* retrieving the final file may be a complex operation, beyond the scope of
* retrieving the final file may be a complex operation beyond the scope of
* jackcess). Note that the CompoundContent type will only be available if
* the POI library is in the classpath, otherwise compound content will be
* returned as OtherContent.
*/
public interface CompoundContent extends PackageContent, EmbeddedContent
public interface CompoundContent extends PackageContent, EmbeddedContent,
Iterable<CompoundContent.Entry>
{
public List<String> getEntries() throws IOException;

public InputStream getEntryStream(String entryName) throws IOException;
public Entry getEntry(String entryName) throws IOException;

public boolean hasContentsEntry() throws IOException;

public InputStream getContentsEntryStream() throws IOException;
public Entry getContentsEntry() throws IOException;

/**
* A document entry in the compound storage.
*/
public interface Entry extends EmbeddedContent
{
public String getName();

/**
* Returns the CompoundContent which owns this entry.
*/
public CompoundContent getParent();
}
}

/**

BIN
src/test/data/V2007/testOleV2007.accdb View File


+ 2
- 1
src/test/java/com/healthmarketscience/jackcess/impl/JetFormatTest.java View File

@@ -46,7 +46,8 @@ public class JetFormatTest extends TestCase {
PROMOTION("testPromotion"),
COMPLEX("complexDataTest"),
UNSUPPORTED("unsupportedFieldsTest"),
LINKED("linkerTest");
LINKED("linkerTest"),
BLOB("testOle");

private final String _basename;


+ 154
- 0
src/test/java/com/healthmarketscience/jackcess/util/OleBlobTest.java View File

@@ -20,6 +20,8 @@ USA
package com.healthmarketscience.jackcess.util;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.Arrays;

import com.healthmarketscience.jackcess.ColumnBuilder;
@@ -30,9 +32,15 @@ import static com.healthmarketscience.jackcess.DatabaseTest.*;
import com.healthmarketscience.jackcess.Row;
import com.healthmarketscience.jackcess.Table;
import com.healthmarketscience.jackcess.TableBuilder;
import com.healthmarketscience.jackcess.complex.Attachment;
import com.healthmarketscience.jackcess.complex.ComplexValueForeignKey;
import com.healthmarketscience.jackcess.impl.ByteUtil;
import com.healthmarketscience.jackcess.impl.CompoundOleUtil;
import static com.healthmarketscience.jackcess.impl.JetFormatTest.*;
import junit.framework.TestCase;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;

/**
*
@@ -149,4 +157,150 @@ public class OleBlobTest extends TestCase
db.close();
}
}

public void testReadBlob() throws Exception
{
for(TestDB testDb : TestDB.getSupportedForBasename(Basename.BLOB, true)) {
Database db = open(testDb);

Table t = db.getTable("Table1");

for(Row row : t) {

OleBlob oleBlob = null;
try {

String name = (String)row.get("name");
oleBlob = OleBlob.Builder.fromInternalData((byte[])row.get("ole_data"));
OleBlob.Content content = oleBlob.getContent();
Attachment attach = null;
if(content.getType() != OleBlob.ContentType.LINK) {
attach = ((ComplexValueForeignKey)row.get("attach_data"))
.getAttachments().get(0);
}

switch(content.getType()) {
case LINK:
OleBlob.LinkContent lc = (OleBlob.LinkContent)content;
if("test_link".equals(name)) {
assertEquals("Z:\\jackcess_test\\ole\\test_data.txt", lc.getLinkPath());
} else {
assertEquals("Z:\\jackcess_test\\ole\\test_datau2.txt", lc.getLinkPath());
}
break;

case SIMPLE_PACKAGE:
OleBlob.SimplePackageContent spc = (OleBlob.SimplePackageContent)content;
byte[] packageBytes = toByteArray(spc.getStream(), spc.length());
assertTrue(Arrays.equals(attach.getFileData(), packageBytes));
break;

case COMPOUND_STORAGE:
OleBlob.CompoundContent cc = (OleBlob.CompoundContent)content;
if(cc.hasContentsEntry()) {
OleBlob.CompoundContent.Entry entry = cc.getContentsEntry();
byte[] entryBytes = toByteArray(entry.getStream(), entry.length());
assertTrue(Arrays.equals(attach.getFileData(), entryBytes));
} else {

if("test_word.doc".equals(name)) {
checkCompoundEntries(cc,
"/%02OlePres000", 466,
"/WordDocument", 4096,
"/%05SummaryInformation", 4096,
"/%05DocumentSummaryInformation", 4096,
"/%03AccessObjSiteData", 56,
"/%02OlePres001", 1620,
"/1Table", 6380,
"/%01CompObj", 114,
"/%01Ole", 20);
checkCompoundStorage(cc, attach);
} else if("test_excel.xls".equals(name)) {
checkCompoundEntries(cc,
"/%02OlePres000", 1326,
"/%03AccessObjSiteData", 56,
"/%05SummaryInformation", 200,
"/%05DocumentSummaryInformation", 264,
"/%02OlePres001", 4208,
"/%01CompObj", 107,
"/Workbook", 13040,
"/%01Ole", 20);
// the excel data seems to be modified when embedded as ole,
// so we can't reallly test it against the attachment data
} else {
throw new RuntimeException("unexpected compound entry " + name);
}
}
break;

case OTHER:
OleBlob.OtherContent oc = (OleBlob.OtherContent)content;
byte[] otherBytes = toByteArray(oc.getStream(), oc.length());
assertTrue(Arrays.equals(attach.getFileData(), otherBytes));
break;

default:
throw new RuntimeException("unexpected type " + content.getType());
}

} finally {
ByteUtil.closeQuietly(oleBlob);
}
}

db.close();
}
}

private static void checkCompoundEntries(OleBlob.CompoundContent cc,
Object... entryInfo)
throws Exception
{
int idx = 0;
for(OleBlob.CompoundContent.Entry e : cc) {
String entryName = (String)entryInfo[idx];
int entryLen = (Integer)entryInfo[idx + 1];

assertEquals(entryName, e.getName());
assertEquals(entryLen, e.length());

idx += 2;
}
}

private static void checkCompoundStorage(OleBlob.CompoundContent cc,
Attachment attach)
throws Exception
{
File tmpData = File.createTempFile("attach_", ".dat");

try {
FileOutputStream fout = new FileOutputStream(tmpData);
fout.write(attach.getFileData());
fout.close();

NPOIFSFileSystem attachFs = new NPOIFSFileSystem(tmpData, true);

for(OleBlob.CompoundContent.Entry e : cc) {
DocumentEntry attachE = null;
try {
attachE = CompoundOleUtil.getDocumentEntry(e.getName(), attachFs.getRoot());
} catch(FileNotFoundException fnfe) {
// ignored, the ole data has extra entries
continue;
}

byte[] attachEBytes = toByteArray(new DocumentInputStream(attachE),
attachE.getSize());
byte[] entryBytes = toByteArray(e.getStream(), e.length());

assertTrue(Arrays.equals(attachEBytes, entryBytes));
}

ByteUtil.closeQuietly(attachFs);
} finally {
tmpData.delete();
}
}
}

Loading…
Cancel
Save