https://svn.apache.org:443/repos/asf/poi/trunk ........ r646854 | nick | 2008-04-10 16:54:02 +0100 (Thu, 10 Apr 2008) | 1 line Initial support for getting and changing chart titles and series titles ........ r646870 | nick | 2008-04-10 17:59:10 +0100 (Thu, 10 Apr 2008) | 1 line Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document ........ r647152 | nick | 2008-04-11 14:04:11 +0100 (Fri, 11 Apr 2008) | 1 line Update the sample embeded documents to be from know sources, so we can test better ........ r647186 | nick | 2008-04-11 15:43:05 +0100 (Fri, 11 Apr 2008) | 1 line Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@647203 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_5_BETA2
@@ -577,6 +577,8 @@ under the License. | |||
file="${main.src.test}/org/apache/poi/hwpf/data"/> | |||
<sysproperty key="HPSF.testdata.path" | |||
file="${main.src.test}/org/apache/poi/hpsf/data"/> | |||
<sysproperty key="POIFS.testdata.path" | |||
file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain"/> | |||
<formatter type="xml"/> | |||
@@ -612,6 +614,8 @@ under the License. | |||
file="${main.src.test}/org/apache/poi/hpsf/data"/> | |||
<sysproperty key="HWPF.testdata.path" | |||
file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/> | |||
<sysproperty key="POIFS.testdata.path" | |||
file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain" usefile="no"/> | |||
<batchtest todir="${main.reports.test}"> | |||
@@ -641,6 +645,7 @@ under the License. | |||
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/> | |||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/> | |||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain" usefile="no"/> | |||
<formatter type="xml"/> | |||
@@ -657,6 +662,7 @@ under the License. | |||
<classpath refid="test.classpath"/> | |||
<sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/> | |||
<sysproperty key="HPSF.testdata.path" file="${main.src.test}/org/apache/poi/hpsf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain" usefile="no"/> | |||
<test name="${testcase}"/> | |||
@@ -695,6 +701,7 @@ under the License. | |||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/> | |||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/> | |||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain"/> | |||
<formatter type="xml"/> | |||
@@ -729,6 +736,7 @@ under the License. | |||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/> | |||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/> | |||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain" usefile="no"/> |
@@ -37,6 +37,10 @@ | |||
<!-- Don't forget to update status.xml too! --> | |||
<release version="3.0.3-beta1" date="2008-04-??"> | |||
<action dev="POI-DEVELOPERS" type="add">Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents</action> | |||
<action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action> | |||
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action> | |||
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action> | |||
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action> | |||
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action> | |||
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action> |
@@ -34,6 +34,10 @@ | |||
<!-- Don't forget to update changes.xml too! --> | |||
<changes> | |||
<release version="3.0.3-beta1" date="2008-04-??"> | |||
<action dev="POI-DEVELOPERS" type="add">Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents</action> | |||
<action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action> | |||
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action> | |||
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action> | |||
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action> | |||
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action> | |||
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action> |
@@ -29,6 +29,7 @@ import org.apache.poi.hpsf.PropertySet; | |||
import org.apache.poi.hpsf.PropertySetFactory; | |||
import org.apache.poi.hpsf.SummaryInformation; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | |||
import org.apache.poi.poifs.filesystem.Entry; | |||
@@ -50,12 +51,23 @@ public abstract class POIDocument { | |||
protected DocumentSummaryInformation dsInf; | |||
/** The open POIFS FileSystem that contains our document */ | |||
protected POIFSFileSystem filesystem; | |||
/** The directory that our document lives in */ | |||
protected DirectoryNode directory; | |||
/** For our own logging use */ | |||
protected POILogger logger = POILogFactory.getLogger(this.getClass()); | |||
/* Have the property streams been read yet? (Only done on-demand) */ | |||
protected boolean initialized = false; | |||
protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) { | |||
this.filesystem = fs; | |||
this.directory = dir; | |||
} | |||
protected POIDocument(POIFSFileSystem fs) { | |||
this(fs.getRoot(), fs); | |||
} | |||
/** | |||
* Fetch the Document Summary Information of the document | |||
@@ -110,7 +122,7 @@ public abstract class POIDocument { | |||
DocumentInputStream dis; | |||
try { | |||
// Find the entry, and get an input stream for it | |||
dis = filesystem.createDocumentInputStream(setName); | |||
dis = directory.createDocumentInputStream(setName); | |||
} catch(IOException ie) { | |||
// Oh well, doesn't exist | |||
logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie); |
@@ -17,6 +17,8 @@ | |||
package org.apache.poi.hssf.record; | |||
import java.io.ByteArrayInputStream; | |||
/** | |||
* This is purely for the biff viewer. During normal operations we don't want | |||
* to be seeing this. | |||
@@ -35,6 +37,21 @@ public class DrawingRecordForBiffViewer | |||
super(in); | |||
} | |||
public DrawingRecordForBiffViewer(DrawingRecord r) | |||
{ | |||
super(convertToInputStream(r)); | |||
convertRawBytesToEscherRecords(); | |||
} | |||
private static RecordInputStream convertToInputStream(DrawingRecord r) | |||
{ | |||
byte[] data = r.serialize(); | |||
RecordInputStream rinp = new RecordInputStream( | |||
new ByteArrayInputStream(data) | |||
); | |||
rinp.nextRecord(); | |||
return rinp; | |||
} | |||
protected String getRecordName() | |||
{ | |||
return "MSODRAWING"; |
@@ -77,6 +77,8 @@ public class RecordFactory | |||
NoteRecord.class, ObjectProtectRecord.class, ScenarioProtectRecord.class, | |||
FileSharingRecord.class, ChartTitleFormatRecord.class, | |||
DVRecord.class, DVALRecord.class, UncalcedRecord.class, | |||
ChartRecord.class, LegendRecord.class, ChartTitleFormatRecord.class, | |||
SeriesRecord.class, SeriesTextRecord.class, | |||
HyperlinkRecord.class, | |||
ExternalNameRecord.class, // TODO - same changes in non-@deprecated version of this class | |||
SupBookRecord.class, |
@@ -1571,18 +1571,14 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet | |||
} | |||
/** | |||
* Returns the top-level drawing patriach, if there is | |||
* one. | |||
* This will hold any graphics or charts for the sheet. | |||
* Returns the agregate escher records for this sheet, | |||
* it there is one. | |||
* WARNING - calling this will trigger a parsing of the | |||
* associated escher records. Any that aren't supported | |||
* (such as charts and complex drawing types) will almost | |||
* certainly be lost or corrupted when written out. Only | |||
* use this with simple drawings, otherwise call | |||
* {@link HSSFSheet#createDrawingPatriarch()} and | |||
* start from scratch! | |||
* certainly be lost or corrupted when written out. | |||
*/ | |||
public HSSFPatriarch getDrawingPatriarch() { | |||
public EscherAggregate getDrawingEscherAggregate() { | |||
book.findDrawingGroup(); | |||
// If there's now no drawing manager, then there's | |||
@@ -1601,6 +1597,25 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet | |||
// Grab our aggregate record, and wire it up | |||
EscherAggregate agg = (EscherAggregate) sheet.findFirstRecordBySid(EscherAggregate.sid); | |||
return agg; | |||
} | |||
/** | |||
* Returns the top-level drawing patriach, if there is | |||
* one. | |||
* This will hold any graphics or charts for the sheet. | |||
* WARNING - calling this will trigger a parsing of the | |||
* associated escher records. Any that aren't supported | |||
* (such as charts and complex drawing types) will almost | |||
* certainly be lost or corrupted when written out. Only | |||
* use this with simple drawings, otherwise call | |||
* {@link HSSFSheet#createDrawingPatriarch()} and | |||
* start from scratch! | |||
*/ | |||
public HSSFPatriarch getDrawingPatriarch() { | |||
EscherAggregate agg = getDrawingEscherAggregate(); | |||
if(agg == null) return null; | |||
HSSFPatriarch patriarch = new HSSFPatriarch(this, agg); | |||
agg.setPatriarch(patriarch); | |||
@@ -62,6 +62,7 @@ import org.apache.poi.hssf.record.formula.Area3DPtg; | |||
import org.apache.poi.hssf.record.formula.MemFuncPtg; | |||
import org.apache.poi.hssf.record.formula.UnionPtg; | |||
import org.apache.poi.hssf.util.CellReference; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.ss.usermodel.CreationHelper; | |||
import org.apache.poi.util.POILogFactory; | |||
@@ -155,6 +156,7 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm | |||
protected HSSFWorkbook( Workbook book ) | |||
{ | |||
super(null, null); | |||
workbook = book; | |||
sheets = new ArrayList( INITIAL_CAPACITY ); | |||
names = new ArrayList( INITIAL_CAPACITY ); | |||
@@ -176,17 +178,37 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm | |||
* @see org.apache.poi.poifs.filesystem.POIFSFileSystem | |||
* @exception IOException if the stream cannot be read | |||
*/ | |||
public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes) | |||
throws IOException | |||
{ | |||
this(fs.getRoot(), fs, preserveNodes); | |||
} | |||
/** | |||
* given a POI POIFSFileSystem object, and a specific directory | |||
* within it, read in its Workbook and populate the high and | |||
* low level models. If you're reading in a workbook...start here. | |||
* | |||
* @param directory the POI filesystem directory to process from | |||
* @param fs the POI filesystem that contains the Workbook stream. | |||
* @param preserveNodes whether to preseve other nodes, such as | |||
* macros. This takes more memory, so only say yes if you | |||
* need to. If set, will store all of the POIFSFileSystem | |||
* in memory | |||
* @see org.apache.poi.poifs.filesystem.POIFSFileSystem | |||
* @exception IOException if the stream cannot be read | |||
*/ | |||
public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes) | |||
throws IOException | |||
{ | |||
super(directory, fs); | |||
this.preserveNodes = preserveNodes; | |||
this.filesystem = fs; | |||
// If we're not preserving nodes, don't track the | |||
// POIFS any more | |||
if(! preserveNodes) { | |||
this.filesystem = null; | |||
this.directory = null; | |||
} | |||
sheets = new ArrayList(INITIAL_CAPACITY); | |||
@@ -197,13 +219,13 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm | |||
// put theirs in one called "WORKBOOK" | |||
String workbookName = "Workbook"; | |||
try { | |||
fs.getRoot().getEntry(workbookName); | |||
directory.getEntry(workbookName); | |||
// Is the default name | |||
} catch(FileNotFoundException fe) { | |||
// Try the upper case form | |||
try { | |||
workbookName = "WORKBOOK"; | |||
fs.getRoot().getEntry(workbookName); | |||
directory.getEntry(workbookName); | |||
} catch(FileNotFoundException wfe) { | |||
// Doesn't contain it in either form | |||
throw new IllegalArgumentException("The supplied POIFSFileSystem contained neither a 'Workbook' entry, nor a 'WORKBOOK' entry. Is it really an excel file?"); | |||
@@ -213,7 +235,7 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm | |||
// Grab the data from the workbook stream, however | |||
// it happens to be spelt. | |||
InputStream stream = fs.createDocumentInputStream(workbookName); | |||
InputStream stream = directory.createDocumentInputStream(workbookName); | |||
EventRecordFactory factory = new EventRecordFactory(); | |||
@@ -0,0 +1,81 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.poifs.dev; | |||
import java.io.FileInputStream; | |||
import java.io.IOException; | |||
import java.util.Iterator; | |||
import org.apache.poi.poifs.filesystem.DirectoryEntry; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.DocumentNode; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
/** | |||
* A lister of the entries in POIFS files. | |||
* | |||
* Much simpler than {@link POIFSViewer} | |||
*/ | |||
public class POIFSLister { | |||
/** | |||
* Display the entries of multiple POIFS files | |||
* | |||
* @param args the names of the files to be displayed | |||
*/ | |||
public static void main(final String args[]) throws IOException { | |||
if (args.length == 0) | |||
{ | |||
System.err.println("Must specify at least one file to view"); | |||
System.exit(1); | |||
} | |||
for (int j = 0; j < args.length; j++) | |||
{ | |||
viewFile(args[ j ]); | |||
} | |||
} | |||
public static void viewFile(final String filename) throws IOException | |||
{ | |||
POIFSFileSystem fs = new POIFSFileSystem( | |||
new FileInputStream(filename) | |||
); | |||
displayDirectory(fs.getRoot(), ""); | |||
} | |||
public static void displayDirectory(DirectoryNode dir, String indent) { | |||
System.out.println(indent + dir.getName() + " -"); | |||
String newIndent = indent + " "; | |||
for(Iterator it = dir.getEntries(); it.hasNext(); ) { | |||
Object entry = it.next(); | |||
if(entry instanceof DirectoryNode) { | |||
displayDirectory((DirectoryNode)entry, newIndent); | |||
} else { | |||
DocumentNode doc = (DocumentNode)entry; | |||
String name = doc.getName(); | |||
if(name.charAt(0) < 10) { | |||
String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1); | |||
name = name.substring(1) + " <" + altname + ">"; | |||
} | |||
System.out.println(newIndent + name); | |||
} | |||
} | |||
} | |||
} |
@@ -105,6 +105,31 @@ public class DirectoryNode | |||
{ | |||
return _path; | |||
} | |||
/** | |||
* open a document in the directory's entry's list of entries | |||
* | |||
* @param documentName the name of the document to be opened | |||
* | |||
* @return a newly opened DocumentInputStream | |||
* | |||
* @exception IOException if the document does not exist or the | |||
* name is that of a DirectoryEntry | |||
*/ | |||
public DocumentInputStream createDocumentInputStream( | |||
final String documentName) | |||
throws IOException | |||
{ | |||
Entry document = getEntry(documentName); | |||
if (!document.isDocumentEntry()) | |||
{ | |||
throw new IOException("Entry '" + documentName | |||
+ "' is not a DocumentEntry"); | |||
} | |||
return new DocumentInputStream(( DocumentEntry ) document); | |||
} | |||
/** | |||
* create a new DocumentEntry |
@@ -287,7 +287,7 @@ public class POIFSFileSystem | |||
{ | |||
return getRoot().createDirectory(name); | |||
} | |||
/** | |||
* Write the filesystem out | |||
* | |||
@@ -422,7 +422,7 @@ public class POIFSFileSystem | |||
* @return the root entry | |||
*/ | |||
public DirectoryEntry getRoot() | |||
public DirectoryNode getRoot() | |||
{ | |||
if (_root == null) | |||
{ | |||
@@ -446,14 +446,7 @@ public class POIFSFileSystem | |||
final String documentName) | |||
throws IOException | |||
{ | |||
Entry document = getRoot().getEntry(documentName); | |||
if (!document.isDocumentEntry()) | |||
{ | |||
throw new IOException("Entry '" + documentName | |||
+ "' is not a DocumentEntry"); | |||
} | |||
return new DocumentInputStream(( DocumentEntry ) document); | |||
return getRoot().createDocumentInputStream(documentName); | |||
} | |||
/** |
@@ -53,7 +53,7 @@ public class HDGFDiagram extends POIDocument { | |||
private PointerFactory ptrFactory; | |||
public HDGFDiagram(POIFSFileSystem fs) throws IOException { | |||
filesystem = fs; | |||
super(fs); | |||
DocumentEntry docProps = | |||
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument"); |
@@ -45,6 +45,7 @@ import org.apache.poi.hslf.record.Record; | |||
import org.apache.poi.hslf.record.UserEditAtom; | |||
import org.apache.poi.hslf.usermodel.ObjectData; | |||
import org.apache.poi.hslf.usermodel.PictureData; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.DocumentInputStream; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
@@ -124,7 +125,21 @@ public class HSLFSlideShow extends POIDocument | |||
*/ | |||
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException | |||
{ | |||
this.filesystem = filesystem; | |||
this(filesystem.getRoot(), filesystem); | |||
} | |||
/** | |||
* Constructs a Powerpoint document from a specific point in a | |||
* POIFS Filesystem. Parses the document and places all the | |||
* important stuff into data structures. | |||
* | |||
* @param dir the POIFS directory to read from | |||
* @param filesystem the POIFS FileSystem to read from | |||
* @throws IOException if there is a problem while parsing the document. | |||
*/ | |||
public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException | |||
{ | |||
super(dir, filesystem); | |||
// First up, grab the "Current User" stream | |||
// We need this before we can detect Encrypted Documents | |||
@@ -186,11 +201,11 @@ public class HSLFSlideShow extends POIDocument | |||
{ | |||
// Get the main document stream | |||
DocumentEntry docProps = | |||
(DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document"); | |||
(DocumentEntry)directory.getEntry("PowerPoint Document"); | |||
// Grab the document stream | |||
_docstream = new byte[docProps.getSize()]; | |||
filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream); | |||
directory.createDocumentInputStream("PowerPoint Document").read(_docstream); | |||
} | |||
/** | |||
@@ -272,7 +287,7 @@ public class HSLFSlideShow extends POIDocument | |||
*/ | |||
private void readCurrentUserStream() { | |||
try { | |||
currentUser = new CurrentUserAtom(filesystem); | |||
currentUser = new CurrentUserAtom(directory); | |||
} catch(IOException ie) { | |||
logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie); | |||
currentUser = new CurrentUserAtom(); | |||
@@ -293,9 +308,9 @@ public class HSLFSlideShow extends POIDocument | |||
byte[] pictstream; | |||
try { | |||
DocumentEntry entry = (DocumentEntry)filesystem.getRoot().getEntry("Pictures"); | |||
DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures"); | |||
pictstream = new byte[entry.getSize()]; | |||
DocumentInputStream is = filesystem.createDocumentInputStream("Pictures"); | |||
DocumentInputStream is = directory.createDocumentInputStream("Pictures"); | |||
is.read(pictstream); | |||
} catch (FileNotFoundException e){ | |||
// Silently catch exceptions if the presentation doesn't |
@@ -93,9 +93,15 @@ public class CurrentUserAtom | |||
* Find the Current User in the filesystem, and create from that | |||
*/ | |||
public CurrentUserAtom(POIFSFileSystem fs) throws IOException { | |||
this(fs.getRoot()); | |||
} | |||
/** | |||
* Find the Current User in the filesystem, and create from that | |||
*/ | |||
public CurrentUserAtom(DirectoryNode dir) throws IOException { | |||
// Decide how big it is | |||
DocumentEntry docProps = | |||
(DocumentEntry)fs.getRoot().getEntry("Current User"); | |||
(DocumentEntry)dir.getEntry("Current User"); | |||
_contents = new byte[docProps.getSize()]; | |||
// Check it's big enough - if it's not at least 28 bytes long, then | |||
@@ -105,7 +111,7 @@ public class CurrentUserAtom | |||
} | |||
// Grab the contents | |||
InputStream in = fs.createDocumentInputStream("Current User"); | |||
InputStream in = dir.createDocumentInputStream("Current User"); | |||
in.read(_contents); | |||
// Set everything up |
@@ -19,14 +19,58 @@ | |||
package org.apache.poi.hssf.usermodel; | |||
import org.apache.poi.hssf.record.*; | |||
import org.apache.poi.hssf.record.formula.Area3DPtg; | |||
import java.util.ArrayList; | |||
import java.util.Iterator; | |||
import java.util.List; | |||
import java.util.Stack; | |||
import org.apache.poi.hssf.record.AreaFormatRecord; | |||
import org.apache.poi.hssf.record.AxisLineFormatRecord; | |||
import org.apache.poi.hssf.record.AxisOptionsRecord; | |||
import org.apache.poi.hssf.record.AxisParentRecord; | |||
import org.apache.poi.hssf.record.AxisRecord; | |||
import org.apache.poi.hssf.record.AxisUsedRecord; | |||
import org.apache.poi.hssf.record.BOFRecord; | |||
import org.apache.poi.hssf.record.BarRecord; | |||
import org.apache.poi.hssf.record.BeginRecord; | |||
import org.apache.poi.hssf.record.CategorySeriesAxisRecord; | |||
import org.apache.poi.hssf.record.ChartFormatRecord; | |||
import org.apache.poi.hssf.record.ChartRecord; | |||
import org.apache.poi.hssf.record.ChartTitleFormatRecord; | |||
import org.apache.poi.hssf.record.DataFormatRecord; | |||
import org.apache.poi.hssf.record.DefaultDataLabelTextPropertiesRecord; | |||
import org.apache.poi.hssf.record.DimensionsRecord; | |||
import org.apache.poi.hssf.record.EOFRecord; | |||
import org.apache.poi.hssf.record.EndRecord; | |||
import org.apache.poi.hssf.record.FontBasisRecord; | |||
import org.apache.poi.hssf.record.FontIndexRecord; | |||
import org.apache.poi.hssf.record.FooterRecord; | |||
import org.apache.poi.hssf.record.FrameRecord; | |||
import org.apache.poi.hssf.record.HCenterRecord; | |||
import org.apache.poi.hssf.record.HeaderRecord; | |||
import org.apache.poi.hssf.record.LegendRecord; | |||
import org.apache.poi.hssf.record.LineFormatRecord; | |||
import org.apache.poi.hssf.record.LinkedDataFormulaField; | |||
import org.apache.poi.hssf.record.LinkedDataRecord; | |||
import org.apache.poi.hssf.record.PlotAreaRecord; | |||
import org.apache.poi.hssf.record.PlotGrowthRecord; | |||
import org.apache.poi.hssf.record.PrintSetupRecord; | |||
import org.apache.poi.hssf.record.ProtectRecord; | |||
import org.apache.poi.hssf.record.Record; | |||
import org.apache.poi.hssf.record.SCLRecord; | |||
import org.apache.poi.hssf.record.SeriesIndexRecord; | |||
import org.apache.poi.hssf.record.SeriesRecord; | |||
import org.apache.poi.hssf.record.SeriesTextRecord; | |||
import org.apache.poi.hssf.record.SeriesToChartGroupRecord; | |||
import org.apache.poi.hssf.record.SheetPropertiesRecord; | |||
import org.apache.poi.hssf.record.TextRecord; | |||
import org.apache.poi.hssf.record.TickRecord; | |||
import org.apache.poi.hssf.record.UnitsRecord; | |||
import org.apache.poi.hssf.record.UnknownRecord; | |||
import org.apache.poi.hssf.record.VCenterRecord; | |||
import org.apache.poi.hssf.record.ValueRangeRecord; | |||
import org.apache.poi.hssf.record.formula.Area3DPtg; | |||
/** | |||
* Has methods for construction of a chart object. | |||
* | |||
@@ -35,11 +79,13 @@ import java.util.Stack; | |||
public class HSSFChart | |||
{ | |||
private ChartRecord chartRecord; | |||
private SeriesRecord seriesRecord; | |||
private LegendRecord legendRecord; | |||
private ChartTitleFormatRecord chartTitleFormat; | |||
private SeriesTextRecord chartTitleText; | |||
private List series = new ArrayList(); | |||
private HSSFChart(ChartRecord chartRecord) { | |||
this.chartRecord = chartRecord; | |||
} | |||
@@ -121,8 +167,8 @@ public class HSSFChart | |||
/** | |||
* Returns all the charts for the given sheet. | |||
* | |||
* NOTE: Does not yet work... checking it in just so others | |||
* can take a look. | |||
* NOTE: You won't be able to do very much with | |||
* these charts yet, as this is very limited support | |||
*/ | |||
public static HSSFChart[] getSheetCharts(HSSFSheet sheet) { | |||
List charts = new ArrayList(); | |||
@@ -132,33 +178,49 @@ public class HSSFChart | |||
List records = sheet.getSheet().getRecords(); | |||
for(Iterator it = records.iterator(); it.hasNext();) { | |||
Record r = (Record)it.next(); | |||
System.err.println(r); | |||
if(r instanceof DrawingRecord) { | |||
DrawingRecord dr = (DrawingRecord)r; | |||
} | |||
if(r instanceof ChartRecord) { | |||
lastChart = new HSSFChart((ChartRecord)r); | |||
charts.add(lastChart); | |||
} | |||
if(r instanceof LegendRecord) { | |||
lastChart.legendRecord = (LegendRecord)r; | |||
} | |||
if(r instanceof SeriesRecord) { | |||
lastChart.seriesRecord = (SeriesRecord)r; | |||
HSSFSeries series = lastChart.new HSSFSeries( (SeriesRecord)r ); | |||
lastChart.series.add(series); | |||
} | |||
if(r instanceof ChartTitleFormatRecord) { | |||
lastChart.chartTitleFormat = | |||
(ChartTitleFormatRecord)r; | |||
} | |||
if(r instanceof SeriesTextRecord) { | |||
lastChart.chartTitleText = | |||
(SeriesTextRecord)r; | |||
// Applies to a series, unless we've seen | |||
// a legend already | |||
SeriesTextRecord str = (SeriesTextRecord)r; | |||
if(lastChart.legendRecord == null && | |||
lastChart.series.size() > 0) { | |||
HSSFSeries series = (HSSFSeries) | |||
lastChart.series.get(lastChart.series.size()-1); | |||
series.seriesTitleText = str; | |||
} else { | |||
lastChart.chartTitleText = str; | |||
} | |||
} | |||
} | |||
return (HSSFChart[]) | |||
charts.toArray( new HSSFChart[charts.size()] ); | |||
} | |||
/** | |||
* Returns the series of the chart | |||
*/ | |||
public HSSFSeries[] getSeries() { | |||
return (HSSFSeries[]) | |||
series.toArray(new HSSFSeries[series.size()]); | |||
} | |||
/** | |||
* Returns the chart's title, if there is one, | |||
@@ -184,7 +246,6 @@ public class HSSFChart | |||
} | |||
} | |||
private EOFRecord createEOFRecord() | |||
{ | |||
@@ -858,4 +919,51 @@ public class HSSFChart | |||
r.setUnits( (short) 0 ); | |||
return r; | |||
} | |||
/** | |||
* A series in a chart | |||
*/ | |||
public class HSSFSeries { | |||
private SeriesRecord series; | |||
private SeriesTextRecord seriesTitleText; | |||
private HSSFSeries(SeriesRecord series) { | |||
this.series = series; | |||
} | |||
public short getNumValues() { | |||
return series.getNumValues(); | |||
} | |||
/** | |||
* See {@link SeriesRecord} | |||
*/ | |||
public short getValueType() { | |||
return series.getValuesDataType(); | |||
} | |||
/** | |||
* Returns the series' title, if there is one, | |||
* or null if not | |||
*/ | |||
public String getSeriesTitle() { | |||
if(seriesTitleText != null) { | |||
return seriesTitleText.getText(); | |||
} | |||
return null; | |||
} | |||
/** | |||
* Changes the series' title, but only if there | |||
* was one already. | |||
* TODO - add in the records if not | |||
*/ | |||
public void setSeriesTitle(String title) { | |||
if(seriesTitleText != null) { | |||
seriesTitleText.setText(title); | |||
} else { | |||
throw new IllegalStateException("No series title found to change"); | |||
} | |||
} | |||
} | |||
} |
@@ -29,6 +29,7 @@ import java.io.ByteArrayInputStream; | |||
import java.util.Iterator; | |||
import org.apache.poi.POIDocument; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
@@ -95,7 +96,7 @@ public class HWPFDocument extends POIDocument | |||
protected HWPFDocument() | |||
{ | |||
super(null, null); | |||
} | |||
/** | |||
@@ -132,7 +133,7 @@ public class HWPFDocument extends POIDocument | |||
//do Ole stuff | |||
this( verifyAndBuildPOIFS(istream) ); | |||
} | |||
/** | |||
* This constructor loads a Word document from a POIFSFileSystem | |||
* | |||
@@ -141,16 +142,31 @@ public class HWPFDocument extends POIDocument | |||
* in POIFSFileSystem. | |||
*/ | |||
public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException | |||
{ | |||
this(pfilesystem.getRoot(), pfilesystem); | |||
} | |||
/** | |||
* This constructor loads a Word document from a specific point | |||
* in a POIFSFileSystem, probably not the default. | |||
* Used typically to open embeded documents. | |||
* | |||
* @param pfilesystem The POIFSFileSystem that contains the Word document. | |||
* @throws IOException If there is an unexpected IOException from the passed | |||
* in POIFSFileSystem. | |||
*/ | |||
public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException | |||
{ | |||
// Sort out the hpsf properties | |||
filesystem = pfilesystem; | |||
super(directory, pfilesystem); | |||
readProperties(); | |||
// read in the main stream. | |||
DocumentEntry documentProps = | |||
(DocumentEntry)filesystem.getRoot().getEntry("WordDocument"); | |||
DocumentEntry documentProps = (DocumentEntry) | |||
directory.getEntry("WordDocument"); | |||
_mainStream = new byte[documentProps.getSize()]; | |||
filesystem.createDocumentInputStream("WordDocument").read(_mainStream); | |||
directory.createDocumentInputStream("WordDocument").read(_mainStream); | |||
// use the fib to determine the name of the table stream. | |||
_fib = new FileInformationBlock(_mainStream); | |||
@@ -165,14 +181,14 @@ public class HWPFDocument extends POIDocument | |||
DocumentEntry tableProps; | |||
try { | |||
tableProps = | |||
(DocumentEntry)filesystem.getRoot().getEntry(name); | |||
(DocumentEntry)directory.getEntry(name); | |||
} catch(FileNotFoundException fnfe) { | |||
throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)"); | |||
} | |||
// read in the table stream. | |||
_tableStream = new byte[tableProps.getSize()]; | |||
filesystem.createDocumentInputStream(name).read(_tableStream); | |||
directory.createDocumentInputStream(name).read(_tableStream); | |||
_fib.fillVariableFields(_mainStream, _tableStream); | |||
@@ -180,7 +196,7 @@ public class HWPFDocument extends POIDocument | |||
try | |||
{ | |||
DocumentEntry dataProps = | |||
(DocumentEntry) filesystem.getRoot().getEntry("Data"); | |||
(DocumentEntry)directory.getEntry("Data"); | |||
_dataStream = new byte[dataProps.getSize()]; | |||
filesystem.createDocumentInputStream("Data").read(_dataStream); | |||
} |
@@ -21,6 +21,12 @@ | |||
package org.apache.poi.hslf.extractor; | |||
import java.io.FileInputStream; | |||
import org.apache.poi.hslf.HSLFSlideShow; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import junit.framework.TestCase; | |||
/** | |||
@@ -35,6 +41,8 @@ public class TextExtractor extends TestCase { | |||
private PowerPointExtractor ppe2; | |||
/** Where to go looking for our test files */ | |||
private String dirname; | |||
/** Where our embeded files live */ | |||
private String pdirname; | |||
public TextExtractor() throws Exception { | |||
dirname = System.getProperty("HSLF.testdata.path"); | |||
@@ -42,6 +50,8 @@ public class TextExtractor extends TestCase { | |||
ppe = new PowerPointExtractor(filename); | |||
String filename2 = dirname + "/with_textbox.ppt"; | |||
ppe2 = new PowerPointExtractor(filename2); | |||
pdirname = System.getProperty("POIFS.testdata.path"); | |||
} | |||
public void testReadSheetText() throws Exception { | |||
@@ -123,9 +133,87 @@ public class TextExtractor extends TestCase { | |||
char[] expC = exp.toCharArray(); | |||
char[] actC = act.toCharArray(); | |||
for(int i=0; i<expC.length; i++) { | |||
System.out.println(i + "\t" + expC[i] + " " + actC[i]); | |||
assertEquals(expC[i],actC[i]); | |||
assertEquals("Char " + i, expC[i], actC[i]); | |||
} | |||
assertEquals(exp,act); | |||
} | |||
public void testExtractFromEmbeded() throws Exception { | |||
String filename3 = pdirname + "/excel_with_embeded.xls"; | |||
POIFSFileSystem fs = new POIFSFileSystem( | |||
new FileInputStream(filename3) | |||
); | |||
HSLFSlideShow ss; | |||
DirectoryNode dirA = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B6"); | |||
DirectoryNode dirB = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B3"); | |||
assertNotNull(dirA.getEntry("PowerPoint Document")); | |||
assertNotNull(dirB.getEntry("PowerPoint Document")); | |||
// Check the first file | |||
ss = new HSLFSlideShow(dirA, fs); | |||
ppe = new PowerPointExtractor(ss); | |||
assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n", | |||
ppe.getText(true, false) | |||
); | |||
// And the second | |||
ss = new HSLFSlideShow(dirB, fs); | |||
ppe = new PowerPointExtractor(ss); | |||
assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n", | |||
ppe.getText(true, false) | |||
); | |||
} | |||
/** | |||
* A powerpoint file with embeded powerpoint files | |||
* TODO - figure out how to handle this, as ppt | |||
* appears to embed not as ole2 streams | |||
*/ | |||
public void DISABLEDtestExtractFromOwnEmbeded() throws Exception { | |||
String filename3 = pdirname + "/ppt_with_embeded.ppt"; | |||
POIFSFileSystem fs = new POIFSFileSystem( | |||
new FileInputStream(filename3) | |||
); | |||
HSLFSlideShow ss; | |||
DirectoryNode dirA = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B6"); | |||
DirectoryNode dirB = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B3"); | |||
assertNotNull(dirA.getEntry("PowerPoint Document")); | |||
assertNotNull(dirB.getEntry("PowerPoint Document")); | |||
// Check the first file | |||
ss = new HSLFSlideShow(dirA, fs); | |||
ppe = new PowerPointExtractor(ss); | |||
assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n", | |||
ppe.getText(true, false) | |||
); | |||
// And the second | |||
ss = new HSLFSlideShow(dirB, fs); | |||
ppe = new PowerPointExtractor(ss); | |||
assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n", | |||
ppe.getText(true, false) | |||
); | |||
// Check the master doc two ways | |||
ss = new HSLFSlideShow(fs.getRoot(), fs); | |||
ppe = new PowerPointExtractor(ss); | |||
assertEquals("I have embeded files in me\n", | |||
ppe.getText(true, false) | |||
); | |||
ss = new HSLFSlideShow(fs); | |||
ppe = new PowerPointExtractor(ss); | |||
assertEquals("I have embeded files in me\n", | |||
ppe.getText(true, false) | |||
); | |||
} | |||
} |
@@ -19,6 +19,8 @@ package org.apache.poi.hssf.usermodel; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import org.apache.poi.hssf.record.SeriesRecord; | |||
import junit.framework.TestCase; | |||
public class TestHSSFChart extends TestCase { | |||
@@ -29,14 +31,65 @@ public class TestHSSFChart extends TestCase { | |||
} | |||
public void testSingleChart() throws Exception { | |||
HSSFWorkbook wb = new HSSFWorkbook( | |||
new FileInputStream(new File(dirName, "WithChart.xls")) | |||
); | |||
HSSFSheet s1 = wb.getSheetAt(0); | |||
HSSFSheet s2 = wb.getSheetAt(1); | |||
HSSFSheet s3 = wb.getSheetAt(2); | |||
assertEquals(0, HSSFChart.getSheetCharts(s1).length); | |||
assertEquals(1, HSSFChart.getSheetCharts(s2).length); | |||
assertEquals(0, HSSFChart.getSheetCharts(s3).length); | |||
HSSFChart[] charts; | |||
// Check the chart on the 2nd sheet | |||
charts = HSSFChart.getSheetCharts(s2); | |||
assertEquals(1, charts.length); | |||
assertEquals(2, charts[0].getSeries().length); | |||
assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle()); | |||
assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle()); | |||
assertEquals(null, charts[0].getChartTitle()); | |||
} | |||
public void testTwoCharts() throws Exception { | |||
HSSFWorkbook wb = new HSSFWorkbook( | |||
new FileInputStream(new File(dirName, "WithTwoCharts.xls")) | |||
); | |||
HSSFSheet s1 = wb.getSheetAt(0); | |||
HSSFSheet s2 = wb.getSheetAt(1); | |||
HSSFSheet s3 = wb.getSheetAt(2); | |||
assertEquals(0, HSSFChart.getSheetCharts(s1).length); | |||
assertEquals(1, HSSFChart.getSheetCharts(s2).length); | |||
assertEquals(1, HSSFChart.getSheetCharts(s3).length); | |||
HSSFChart[] charts; | |||
// Check the chart on the 2nd sheet | |||
charts = HSSFChart.getSheetCharts(s2); | |||
assertEquals(1, charts.length); | |||
assertEquals(2, charts[0].getSeries().length); | |||
assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle()); | |||
assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle()); | |||
assertEquals(null, charts[0].getChartTitle()); | |||
// And the third sheet | |||
charts = HSSFChart.getSheetCharts(s3); | |||
assertEquals(1, charts.length); | |||
assertEquals(2, charts[0].getSeries().length); | |||
assertEquals("Squares", charts[0].getSeries()[0].getSeriesTitle()); | |||
assertEquals("Base Numbers", charts[0].getSeries()[1].getSeriesTitle()); | |||
assertEquals(null, charts[0].getChartTitle()); | |||
} | |||
public void BROKENtestThreeCharts() throws Exception { | |||
public void testThreeCharts() throws Exception { | |||
HSSFWorkbook wb = new HSSFWorkbook( | |||
new FileInputStream(new File(dirName, "WithThreeCharts.xls")) | |||
); | |||
@@ -51,11 +104,30 @@ public class TestHSSFChart extends TestCase { | |||
HSSFChart[] charts; | |||
// Check the charts on the 2nd sheet | |||
charts = HSSFChart.getSheetCharts(s2); | |||
assertNull(charts[0].getChartTitle()); | |||
assertEquals(2, charts.length); | |||
assertEquals(2, charts[0].getSeries().length); | |||
assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle()); | |||
assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle()); | |||
assertEquals(6, charts[0].getSeries()[0].getNumValues()); | |||
assertEquals(6, charts[0].getSeries()[1].getNumValues()); | |||
assertEquals(SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC, charts[0].getSeries()[0].getValueType()); | |||
assertEquals(SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC, charts[0].getSeries()[1].getValueType()); | |||
assertEquals(null, charts[0].getChartTitle()); | |||
assertEquals(1, charts[1].getSeries().length); | |||
assertEquals(null, charts[1].getSeries()[0].getSeriesTitle()); | |||
assertEquals("Pie Chart Title Thingy", charts[1].getChartTitle()); | |||
// And the third sheet | |||
charts = HSSFChart.getSheetCharts(s3); | |||
assertEquals("Sheet 3 Chart with Title", charts[1].getChartTitle()); | |||
assertEquals(1, charts.length); | |||
assertEquals(2, charts[0].getSeries().length); | |||
assertEquals("Squares", charts[0].getSeries()[0].getSeriesTitle()); | |||
assertEquals("Base Numbers", charts[0].getSeries()[1].getSeriesTitle()); | |||
assertEquals("Sheet 3 Chart with Title", charts[0].getChartTitle()); | |||
} | |||
} |
@@ -17,15 +17,13 @@ | |||
package org.apache.poi.hwpf.extractor; | |||
import java.io.FileInputStream; | |||
import java.util.Iterator; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.hwpf.model.TextPiece; | |||
import org.apache.poi.hwpf.usermodel.Paragraph; | |||
import org.apache.poi.hwpf.usermodel.Range; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
/** | |||
* Test the different routes to extracting text | |||
* | |||
@@ -54,12 +52,16 @@ public class TestWordExtractor extends TestCase { | |||
private WordExtractor extractor; | |||
// Corrupted document - can't do paragraph based stuff | |||
private WordExtractor extractor2; | |||
// A word doc embeded in an excel file | |||
private String filename3; | |||
protected void setUp() throws Exception { | |||
String dirname = System.getProperty("HWPF.testdata.path"); | |||
String pdirname = System.getProperty("POIFS.testdata.path"); | |||
String filename = dirname + "/test2.doc"; | |||
String filename2 = dirname + "/test.doc"; | |||
filename3 = pdirname + "/excel_with_embeded.xls"; | |||
extractor = new WordExtractor(new FileInputStream(filename)); | |||
extractor2 = new WordExtractor(new FileInputStream(filename2)); | |||
@@ -101,4 +103,50 @@ public class TestWordExtractor extends TestCase { | |||
String text = extractor.getTextFromPieces(); | |||
assertEquals(p_text1_block, text); | |||
} | |||
/** | |||
* Test that we can get data from two different | |||
* embeded word documents | |||
* @throws Exception | |||
*/ | |||
public void testExtractFromEmbeded() throws Exception { | |||
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename3)); | |||
HWPFDocument doc; | |||
WordExtractor extractor3; | |||
DirectoryNode dirA = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B7"); | |||
DirectoryNode dirB = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B2"); | |||
// Should have WordDocument and 1Table | |||
assertNotNull(dirA.getEntry("1Table")); | |||
assertNotNull(dirA.getEntry("WordDocument")); | |||
assertNotNull(dirB.getEntry("1Table")); | |||
assertNotNull(dirB.getEntry("WordDocument")); | |||
// Check each in turn | |||
doc = new HWPFDocument(dirA, fs); | |||
extractor3 = new WordExtractor(doc); | |||
assertNotNull(extractor3.getText()); | |||
assertTrue(extractor3.getText().length() > 20); | |||
assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n", | |||
extractor3.getText()); | |||
assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle()); | |||
assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject()); | |||
doc = new HWPFDocument(dirB, fs); | |||
extractor3 = new WordExtractor(doc); | |||
assertNotNull(extractor3.getText()); | |||
assertTrue(extractor3.getText().length() > 20); | |||
assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n", | |||
extractor3.getText()); | |||
assertEquals("Sample Doc 2", extractor3.getSummaryInformation().getTitle()); | |||
assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject()); | |||
} | |||
} |
@@ -17,12 +17,15 @@ | |||
package org.apache.poi.hssf.extractor; | |||
import java.io.FileInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.hssf.usermodel.HSSFWorkbook; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
/** | |||
* | |||
@@ -118,4 +121,72 @@ public final class TestExcelExtractor extends TestCase { | |||
assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText()); | |||
} | |||
/** | |||
* Embded in a non-excel file | |||
*/ | |||
public void testWithEmbeded() throws Exception { | |||
String pdirname = System.getProperty("POIFS.testdata.path"); | |||
String filename = pdirname + "/word_with_embeded.doc"; | |||
POIFSFileSystem fs = new POIFSFileSystem( | |||
new FileInputStream(filename) | |||
); | |||
DirectoryNode objPool = (DirectoryNode) | |||
fs.getRoot().getEntry("ObjectPool"); | |||
DirectoryNode dirA = (DirectoryNode) | |||
objPool.getEntry("_1269427460"); | |||
DirectoryNode dirB = (DirectoryNode) | |||
objPool.getEntry("_1269427461"); | |||
HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true); | |||
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); | |||
ExcelExtractor exA = new ExcelExtractor(wbA); | |||
ExcelExtractor exB = new ExcelExtractor(wbB); | |||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", | |||
exA.getText()); | |||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); | |||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", | |||
exB.getText()); | |||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); | |||
} | |||
/** | |||
* Excel embeded in excel | |||
*/ | |||
public void testWithEmbededInOwn() throws Exception { | |||
String pdirname = System.getProperty("POIFS.testdata.path"); | |||
String filename = pdirname + "/excel_with_embeded.xls"; | |||
POIFSFileSystem fs = new POIFSFileSystem( | |||
new FileInputStream(filename) | |||
); | |||
DirectoryNode dirA = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B5"); | |||
DirectoryNode dirB = (DirectoryNode) | |||
fs.getRoot().getEntry("MBD0000A3B4"); | |||
HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true); | |||
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); | |||
ExcelExtractor exA = new ExcelExtractor(wbA); | |||
ExcelExtractor exB = new ExcelExtractor(wbB); | |||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", | |||
exA.getText()); | |||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); | |||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", | |||
exB.getText()); | |||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); | |||
// And the base file too | |||
ExcelExtractor ex = new ExcelExtractor(fs); | |||
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", | |||
ex.getText()); | |||
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); | |||
} | |||
} |