file="${main.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HPSF.testdata.path"
file="${main.src.test}/org/apache/poi/hpsf/data"/>
+ <sysproperty key="POIFS.testdata.path"
+ file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
<formatter type="xml"/>
file="${main.src.test}/org/apache/poi/hpsf/data"/>
<sysproperty key="HWPF.testdata.path"
file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
+ <sysproperty key="POIFS.testdata.path"
+ file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<batchtest todir="${main.reports.test}">
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<formatter type="xml"/>
<classpath refid="test.classpath"/>
<sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/>
<sysproperty key="HPSF.testdata.path" file="${main.src.test}/org/apache/poi/hpsf/data"/>
+ <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<test name="${testcase}"/>
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
<formatter type="xml"/>
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<!-- Don't forget to update status.xml too! -->
<release version="3.0.3-beta1" date="2008-04-??">
+ <action dev="POI-DEVELOPERS" type="add">Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents</action>
+ <action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
+ <action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
+ <action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.0.3-beta1" date="2008-04-??">
+ <action dev="POI-DEVELOPERS" type="add">Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents</action>
+ <action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
+ <action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
+ <action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
protected DocumentSummaryInformation dsInf;
/** The open POIFS FileSystem that contains our document */
protected POIFSFileSystem filesystem;
+ /** The directory that our document lives in */
+ protected DirectoryNode directory;
/** For our own logging use */
protected POILogger logger = POILogFactory.getLogger(this.getClass());
/* Have the property streams been read yet? (Only done on-demand) */
protected boolean initialized = false;
+
+
+ protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) {
+ this.filesystem = fs;
+ this.directory = dir;
+ }
+ protected POIDocument(POIFSFileSystem fs) {
+ this(fs.getRoot(), fs);
+ }
/**
* Fetch the Document Summary Information of the document
DocumentInputStream dis;
try {
// Find the entry, and get an input stream for it
- dis = filesystem.createDocumentInputStream(setName);
+ dis = directory.createDocumentInputStream(setName);
} catch(IOException ie) {
// Oh well, doesn't exist
logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie);
package org.apache.poi.hssf.record;
+import java.io.ByteArrayInputStream;
+
/**
* This is purely for the biff viewer. During normal operations we don't want
* to be seeing this.
super(in);
}
+ public DrawingRecordForBiffViewer(DrawingRecord r)
+ {
+ super(convertToInputStream(r));
+ convertRawBytesToEscherRecords();
+ }
+ private static RecordInputStream convertToInputStream(DrawingRecord r)
+ {
+ byte[] data = r.serialize();
+ RecordInputStream rinp = new RecordInputStream(
+ new ByteArrayInputStream(data)
+ );
+ rinp.nextRecord();
+ return rinp;
+ }
+
protected String getRecordName()
{
return "MSODRAWING";
NoteRecord.class, ObjectProtectRecord.class, ScenarioProtectRecord.class,
FileSharingRecord.class, ChartTitleFormatRecord.class,
DVRecord.class, DVALRecord.class, UncalcedRecord.class,
+ ChartRecord.class, LegendRecord.class, ChartTitleFormatRecord.class,
+ SeriesRecord.class, SeriesTextRecord.class,
HyperlinkRecord.class,
ExternalNameRecord.class, // TODO - same changes in non-@deprecated version of this class
SupBookRecord.class,
}
/**
- * Returns the top-level drawing patriach, if there is
- * one.
- * This will hold any graphics or charts for the sheet.
+ * Returns the agregate escher records for this sheet,
+ * it there is one.
* WARNING - calling this will trigger a parsing of the
* associated escher records. Any that aren't supported
* (such as charts and complex drawing types) will almost
- * certainly be lost or corrupted when written out. Only
- * use this with simple drawings, otherwise call
- * {@link HSSFSheet#createDrawingPatriarch()} and
- * start from scratch!
+ * certainly be lost or corrupted when written out.
*/
- public HSSFPatriarch getDrawingPatriarch() {
+ public EscherAggregate getDrawingEscherAggregate() {
book.findDrawingGroup();
// If there's now no drawing manager, then there's
// Grab our aggregate record, and wire it up
EscherAggregate agg = (EscherAggregate) sheet.findFirstRecordBySid(EscherAggregate.sid);
+ return agg;
+ }
+
+ /**
+ * Returns the top-level drawing patriach, if there is
+ * one.
+ * This will hold any graphics or charts for the sheet.
+ * WARNING - calling this will trigger a parsing of the
+ * associated escher records. Any that aren't supported
+ * (such as charts and complex drawing types) will almost
+ * certainly be lost or corrupted when written out. Only
+ * use this with simple drawings, otherwise call
+ * {@link HSSFSheet#createDrawingPatriarch()} and
+ * start from scratch!
+ */
+ public HSSFPatriarch getDrawingPatriarch() {
+ EscherAggregate agg = getDrawingEscherAggregate();
+ if(agg == null) return null;
+
HSSFPatriarch patriarch = new HSSFPatriarch(this, agg);
agg.setPatriarch(patriarch);
import org.apache.poi.hssf.record.formula.MemFuncPtg;
import org.apache.poi.hssf.record.formula.UnionPtg;
import org.apache.poi.hssf.util.CellReference;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.CreationHelper;
import org.apache.poi.util.POILogFactory;
protected HSSFWorkbook( Workbook book )
{
+ super(null, null);
workbook = book;
sheets = new ArrayList( INITIAL_CAPACITY );
names = new ArrayList( INITIAL_CAPACITY );
* @see org.apache.poi.poifs.filesystem.POIFSFileSystem
* @exception IOException if the stream cannot be read
*/
-
public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes)
throws IOException
{
+ this(fs.getRoot(), fs, preserveNodes);
+ }
+
+ /**
+ * given a POI POIFSFileSystem object, and a specific directory
+ * within it, read in its Workbook and populate the high and
+ * low level models. If you're reading in a workbook...start here.
+ *
+ * @param directory the POI filesystem directory to process from
+ * @param fs the POI filesystem that contains the Workbook stream.
+ * @param preserveNodes whether to preseve other nodes, such as
+ * macros. This takes more memory, so only say yes if you
+ * need to. If set, will store all of the POIFSFileSystem
+ * in memory
+ * @see org.apache.poi.poifs.filesystem.POIFSFileSystem
+ * @exception IOException if the stream cannot be read
+ */
+ public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes)
+ throws IOException
+ {
+ super(directory, fs);
this.preserveNodes = preserveNodes;
- this.filesystem = fs;
// If we're not preserving nodes, don't track the
// POIFS any more
if(! preserveNodes) {
this.filesystem = null;
+ this.directory = null;
}
sheets = new ArrayList(INITIAL_CAPACITY);
// put theirs in one called "WORKBOOK"
String workbookName = "Workbook";
try {
- fs.getRoot().getEntry(workbookName);
+ directory.getEntry(workbookName);
// Is the default name
} catch(FileNotFoundException fe) {
// Try the upper case form
try {
workbookName = "WORKBOOK";
- fs.getRoot().getEntry(workbookName);
+ directory.getEntry(workbookName);
} catch(FileNotFoundException wfe) {
// Doesn't contain it in either form
throw new IllegalArgumentException("The supplied POIFSFileSystem contained neither a 'Workbook' entry, nor a 'WORKBOOK' entry. Is it really an excel file?");
// Grab the data from the workbook stream, however
// it happens to be spelt.
- InputStream stream = fs.createDocumentInputStream(workbookName);
+ InputStream stream = directory.createDocumentInputStream(workbookName);
EventRecordFactory factory = new EventRecordFactory();
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.poifs.dev;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.DocumentNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * A lister of the entries in POIFS files.
+ *
+ * Much simpler than {@link POIFSViewer}
+ */
+public class POIFSLister {
+ /**
+ * Display the entries of multiple POIFS files
+ *
+ * @param args the names of the files to be displayed
+ */
+ public static void main(final String args[]) throws IOException {
+ if (args.length == 0)
+ {
+ System.err.println("Must specify at least one file to view");
+ System.exit(1);
+ }
+
+ for (int j = 0; j < args.length; j++)
+ {
+ viewFile(args[ j ]);
+ }
+ }
+
+ public static void viewFile(final String filename) throws IOException
+ {
+ POIFSFileSystem fs = new POIFSFileSystem(
+ new FileInputStream(filename)
+ );
+ displayDirectory(fs.getRoot(), "");
+ }
+
+ public static void displayDirectory(DirectoryNode dir, String indent) {
+ System.out.println(indent + dir.getName() + " -");
+ String newIndent = indent + " ";
+
+ for(Iterator it = dir.getEntries(); it.hasNext(); ) {
+ Object entry = it.next();
+ if(entry instanceof DirectoryNode) {
+ displayDirectory((DirectoryNode)entry, newIndent);
+ } else {
+ DocumentNode doc = (DocumentNode)entry;
+ String name = doc.getName();
+ if(name.charAt(0) < 10) {
+ String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1);
+ name = name.substring(1) + " <" + altname + ">";
+ }
+ System.out.println(newIndent + name);
+ }
+ }
+ }
+}
\ No newline at end of file
{
return _path;
}
+
+ /**
+ * open a document in the directory's entry's list of entries
+ *
+ * @param documentName the name of the document to be opened
+ *
+ * @return a newly opened DocumentInputStream
+ *
+ * @exception IOException if the document does not exist or the
+ * name is that of a DirectoryEntry
+ */
+
+ public DocumentInputStream createDocumentInputStream(
+ final String documentName)
+ throws IOException
+ {
+ Entry document = getEntry(documentName);
+
+ if (!document.isDocumentEntry())
+ {
+ throw new IOException("Entry '" + documentName
+ + "' is not a DocumentEntry");
+ }
+ return new DocumentInputStream(( DocumentEntry ) document);
+ }
/**
* create a new DocumentEntry
{
return getRoot().createDirectory(name);
}
-
+
/**
* Write the filesystem out
*
* @return the root entry
*/
- public DirectoryEntry getRoot()
+ public DirectoryNode getRoot()
{
if (_root == null)
{
final String documentName)
throws IOException
{
- Entry document = getRoot().getEntry(documentName);
-
- if (!document.isDocumentEntry())
- {
- throw new IOException("Entry '" + documentName
- + "' is not a DocumentEntry");
- }
- return new DocumentInputStream(( DocumentEntry ) document);
+ return getRoot().createDocumentInputStream(documentName);
}
/**
private PointerFactory ptrFactory;
public HDGFDiagram(POIFSFileSystem fs) throws IOException {
- filesystem = fs;
+ super(fs);
DocumentEntry docProps =
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
import org.apache.poi.hslf.record.UserEditAtom;
import org.apache.poi.hslf.usermodel.ObjectData;
import org.apache.poi.hslf.usermodel.PictureData;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
*/
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
{
- this.filesystem = filesystem;
+ this(filesystem.getRoot(), filesystem);
+ }
+
+ /**
+ * Constructs a Powerpoint document from a specific point in a
+ * POIFS Filesystem. Parses the document and places all the
+ * important stuff into data structures.
+ *
+ * @param dir the POIFS directory to read from
+ * @param filesystem the POIFS FileSystem to read from
+ * @throws IOException if there is a problem while parsing the document.
+ */
+ public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException
+ {
+ super(dir, filesystem);
// First up, grab the "Current User" stream
// We need this before we can detect Encrypted Documents
{
// Get the main document stream
DocumentEntry docProps =
- (DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document");
+ (DocumentEntry)directory.getEntry("PowerPoint Document");
// Grab the document stream
_docstream = new byte[docProps.getSize()];
- filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream);
+ directory.createDocumentInputStream("PowerPoint Document").read(_docstream);
}
/**
*/
private void readCurrentUserStream() {
try {
- currentUser = new CurrentUserAtom(filesystem);
+ currentUser = new CurrentUserAtom(directory);
} catch(IOException ie) {
logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie);
currentUser = new CurrentUserAtom();
byte[] pictstream;
try {
- DocumentEntry entry = (DocumentEntry)filesystem.getRoot().getEntry("Pictures");
+ DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures");
pictstream = new byte[entry.getSize()];
- DocumentInputStream is = filesystem.createDocumentInputStream("Pictures");
+ DocumentInputStream is = directory.createDocumentInputStream("Pictures");
is.read(pictstream);
} catch (FileNotFoundException e){
// Silently catch exceptions if the presentation doesn't
* Find the Current User in the filesystem, and create from that
*/
public CurrentUserAtom(POIFSFileSystem fs) throws IOException {
+ this(fs.getRoot());
+ }
+ /**
+ * Find the Current User in the filesystem, and create from that
+ */
+ public CurrentUserAtom(DirectoryNode dir) throws IOException {
// Decide how big it is
DocumentEntry docProps =
- (DocumentEntry)fs.getRoot().getEntry("Current User");
+ (DocumentEntry)dir.getEntry("Current User");
_contents = new byte[docProps.getSize()];
// Check it's big enough - if it's not at least 28 bytes long, then
}
// Grab the contents
- InputStream in = fs.createDocumentInputStream("Current User");
+ InputStream in = dir.createDocumentInputStream("Current User");
in.read(_contents);
// Set everything up
package org.apache.poi.hssf.usermodel;
-import org.apache.poi.hssf.record.*;
-import org.apache.poi.hssf.record.formula.Area3DPtg;
-
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
+import org.apache.poi.hssf.record.AreaFormatRecord;
+import org.apache.poi.hssf.record.AxisLineFormatRecord;
+import org.apache.poi.hssf.record.AxisOptionsRecord;
+import org.apache.poi.hssf.record.AxisParentRecord;
+import org.apache.poi.hssf.record.AxisRecord;
+import org.apache.poi.hssf.record.AxisUsedRecord;
+import org.apache.poi.hssf.record.BOFRecord;
+import org.apache.poi.hssf.record.BarRecord;
+import org.apache.poi.hssf.record.BeginRecord;
+import org.apache.poi.hssf.record.CategorySeriesAxisRecord;
+import org.apache.poi.hssf.record.ChartFormatRecord;
+import org.apache.poi.hssf.record.ChartRecord;
+import org.apache.poi.hssf.record.ChartTitleFormatRecord;
+import org.apache.poi.hssf.record.DataFormatRecord;
+import org.apache.poi.hssf.record.DefaultDataLabelTextPropertiesRecord;
+import org.apache.poi.hssf.record.DimensionsRecord;
+import org.apache.poi.hssf.record.EOFRecord;
+import org.apache.poi.hssf.record.EndRecord;
+import org.apache.poi.hssf.record.FontBasisRecord;
+import org.apache.poi.hssf.record.FontIndexRecord;
+import org.apache.poi.hssf.record.FooterRecord;
+import org.apache.poi.hssf.record.FrameRecord;
+import org.apache.poi.hssf.record.HCenterRecord;
+import org.apache.poi.hssf.record.HeaderRecord;
+import org.apache.poi.hssf.record.LegendRecord;
+import org.apache.poi.hssf.record.LineFormatRecord;
+import org.apache.poi.hssf.record.LinkedDataFormulaField;
+import org.apache.poi.hssf.record.LinkedDataRecord;
+import org.apache.poi.hssf.record.PlotAreaRecord;
+import org.apache.poi.hssf.record.PlotGrowthRecord;
+import org.apache.poi.hssf.record.PrintSetupRecord;
+import org.apache.poi.hssf.record.ProtectRecord;
+import org.apache.poi.hssf.record.Record;
+import org.apache.poi.hssf.record.SCLRecord;
+import org.apache.poi.hssf.record.SeriesIndexRecord;
+import org.apache.poi.hssf.record.SeriesRecord;
+import org.apache.poi.hssf.record.SeriesTextRecord;
+import org.apache.poi.hssf.record.SeriesToChartGroupRecord;
+import org.apache.poi.hssf.record.SheetPropertiesRecord;
+import org.apache.poi.hssf.record.TextRecord;
+import org.apache.poi.hssf.record.TickRecord;
+import org.apache.poi.hssf.record.UnitsRecord;
+import org.apache.poi.hssf.record.UnknownRecord;
+import org.apache.poi.hssf.record.VCenterRecord;
+import org.apache.poi.hssf.record.ValueRangeRecord;
+import org.apache.poi.hssf.record.formula.Area3DPtg;
+
/**
* Has methods for construction of a chart object.
*
public class HSSFChart
{
private ChartRecord chartRecord;
- private SeriesRecord seriesRecord;
+ private LegendRecord legendRecord;
private ChartTitleFormatRecord chartTitleFormat;
private SeriesTextRecord chartTitleText;
+ private List series = new ArrayList();
+
private HSSFChart(ChartRecord chartRecord) {
this.chartRecord = chartRecord;
}
/**
* Returns all the charts for the given sheet.
*
- * NOTE: Does not yet work... checking it in just so others
- * can take a look.
+ * NOTE: You won't be able to do very much with
+ * these charts yet, as this is very limited support
*/
public static HSSFChart[] getSheetCharts(HSSFSheet sheet) {
List charts = new ArrayList();
List records = sheet.getSheet().getRecords();
for(Iterator it = records.iterator(); it.hasNext();) {
Record r = (Record)it.next();
- System.err.println(r);
-
- if(r instanceof DrawingRecord) {
- DrawingRecord dr = (DrawingRecord)r;
- }
if(r instanceof ChartRecord) {
lastChart = new HSSFChart((ChartRecord)r);
charts.add(lastChart);
}
+ if(r instanceof LegendRecord) {
+ lastChart.legendRecord = (LegendRecord)r;
+ }
if(r instanceof SeriesRecord) {
- lastChart.seriesRecord = (SeriesRecord)r;
+ HSSFSeries series = lastChart.new HSSFSeries( (SeriesRecord)r );
+ lastChart.series.add(series);
}
if(r instanceof ChartTitleFormatRecord) {
lastChart.chartTitleFormat =
(ChartTitleFormatRecord)r;
}
if(r instanceof SeriesTextRecord) {
- lastChart.chartTitleText =
- (SeriesTextRecord)r;
+ // Applies to a series, unless we've seen
+ // a legend already
+ SeriesTextRecord str = (SeriesTextRecord)r;
+ if(lastChart.legendRecord == null &&
+ lastChart.series.size() > 0) {
+ HSSFSeries series = (HSSFSeries)
+ lastChart.series.get(lastChart.series.size()-1);
+ series.seriesTitleText = str;
+ } else {
+ lastChart.chartTitleText = str;
+ }
}
}
return (HSSFChart[])
charts.toArray( new HSSFChart[charts.size()] );
}
+
+ /**
+ * Returns the series of the chart
+ */
+ public HSSFSeries[] getSeries() {
+ return (HSSFSeries[])
+ series.toArray(new HSSFSeries[series.size()]);
+ }
/**
* Returns the chart's title, if there is one,
}
}
-
private EOFRecord createEOFRecord()
{
r.setUnits( (short) 0 );
return r;
}
+
+
+ /**
+ * A series in a chart
+ */
+ public class HSSFSeries {
+ private SeriesRecord series;
+ private SeriesTextRecord seriesTitleText;
+
+ private HSSFSeries(SeriesRecord series) {
+ this.series = series;
+ }
+
+ public short getNumValues() {
+ return series.getNumValues();
+ }
+ /**
+ * See {@link SeriesRecord}
+ */
+ public short getValueType() {
+ return series.getValuesDataType();
+ }
+
+ /**
+ * Returns the series' title, if there is one,
+ * or null if not
+ */
+ public String getSeriesTitle() {
+ if(seriesTitleText != null) {
+ return seriesTitleText.getText();
+ }
+ return null;
+ }
+
+ /**
+ * Changes the series' title, but only if there
+ * was one already.
+ * TODO - add in the records if not
+ */
+ public void setSeriesTitle(String title) {
+ if(seriesTitleText != null) {
+ seriesTitleText.setText(title);
+ } else {
+ throw new IllegalStateException("No series title found to change");
+ }
+ }
+ }
}
import java.util.Iterator;
import org.apache.poi.POIDocument;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.common.POIFSConstants;
protected HWPFDocument()
{
-
+ super(null, null);
}
/**
//do Ole stuff
this( verifyAndBuildPOIFS(istream) );
}
-
+
/**
* This constructor loads a Word document from a POIFSFileSystem
*
* in POIFSFileSystem.
*/
public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
+ {
+ this(pfilesystem.getRoot(), pfilesystem);
+ }
+
+ /**
+ * This constructor loads a Word document from a specific point
+ * in a POIFSFileSystem, probably not the default.
+ * Used typically to open embeded documents.
+ *
+ * @param pfilesystem The POIFSFileSystem that contains the Word document.
+ * @throws IOException If there is an unexpected IOException from the passed
+ * in POIFSFileSystem.
+ */
+ public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
{
// Sort out the hpsf properties
- filesystem = pfilesystem;
+ super(directory, pfilesystem);
readProperties();
// read in the main stream.
- DocumentEntry documentProps =
- (DocumentEntry)filesystem.getRoot().getEntry("WordDocument");
+ DocumentEntry documentProps = (DocumentEntry)
+ directory.getEntry("WordDocument");
_mainStream = new byte[documentProps.getSize()];
- filesystem.createDocumentInputStream("WordDocument").read(_mainStream);
+
+ directory.createDocumentInputStream("WordDocument").read(_mainStream);
// use the fib to determine the name of the table stream.
_fib = new FileInformationBlock(_mainStream);
DocumentEntry tableProps;
try {
tableProps =
- (DocumentEntry)filesystem.getRoot().getEntry(name);
+ (DocumentEntry)directory.getEntry(name);
} catch(FileNotFoundException fnfe) {
throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
}
// read in the table stream.
_tableStream = new byte[tableProps.getSize()];
- filesystem.createDocumentInputStream(name).read(_tableStream);
+ directory.createDocumentInputStream(name).read(_tableStream);
_fib.fillVariableFields(_mainStream, _tableStream);
try
{
DocumentEntry dataProps =
- (DocumentEntry) filesystem.getRoot().getEntry("Data");
+ (DocumentEntry)directory.getEntry("Data");
_dataStream = new byte[dataProps.getSize()];
filesystem.createDocumentInputStream("Data").read(_dataStream);
}
package org.apache.poi.hslf.extractor;
+import java.io.FileInputStream;
+
+import org.apache.poi.hslf.HSLFSlideShow;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
import junit.framework.TestCase;
/**
private PowerPointExtractor ppe2;
/** Where to go looking for our test files */
private String dirname;
+ /** Where our embeded files live */
+ private String pdirname;
public TextExtractor() throws Exception {
dirname = System.getProperty("HSLF.testdata.path");
ppe = new PowerPointExtractor(filename);
String filename2 = dirname + "/with_textbox.ppt";
ppe2 = new PowerPointExtractor(filename2);
+
+ pdirname = System.getProperty("POIFS.testdata.path");
}
public void testReadSheetText() throws Exception {
char[] expC = exp.toCharArray();
char[] actC = act.toCharArray();
for(int i=0; i<expC.length; i++) {
- System.out.println(i + "\t" + expC[i] + " " + actC[i]);
- assertEquals(expC[i],actC[i]);
+ assertEquals("Char " + i, expC[i], actC[i]);
}
assertEquals(exp,act);
}
+
+ public void testExtractFromEmbeded() throws Exception {
+ String filename3 = pdirname + "/excel_with_embeded.xls";
+ POIFSFileSystem fs = new POIFSFileSystem(
+ new FileInputStream(filename3)
+ );
+ HSLFSlideShow ss;
+
+ DirectoryNode dirA = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B6");
+ DirectoryNode dirB = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B3");
+
+ assertNotNull(dirA.getEntry("PowerPoint Document"));
+ assertNotNull(dirB.getEntry("PowerPoint Document"));
+
+ // Check the first file
+ ss = new HSLFSlideShow(dirA, fs);
+ ppe = new PowerPointExtractor(ss);
+ assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
+ ppe.getText(true, false)
+ );
+
+ // And the second
+ ss = new HSLFSlideShow(dirB, fs);
+ ppe = new PowerPointExtractor(ss);
+ assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
+ ppe.getText(true, false)
+ );
+ }
+
+ /**
+ * A powerpoint file with embeded powerpoint files
+ * TODO - figure out how to handle this, as ppt
+ * appears to embed not as ole2 streams
+ */
+ public void DISABLEDtestExtractFromOwnEmbeded() throws Exception {
+ String filename3 = pdirname + "/ppt_with_embeded.ppt";
+ POIFSFileSystem fs = new POIFSFileSystem(
+ new FileInputStream(filename3)
+ );
+ HSLFSlideShow ss;
+
+ DirectoryNode dirA = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B6");
+ DirectoryNode dirB = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B3");
+
+ assertNotNull(dirA.getEntry("PowerPoint Document"));
+ assertNotNull(dirB.getEntry("PowerPoint Document"));
+
+ // Check the first file
+ ss = new HSLFSlideShow(dirA, fs);
+ ppe = new PowerPointExtractor(ss);
+ assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
+ ppe.getText(true, false)
+ );
+
+ // And the second
+ ss = new HSLFSlideShow(dirB, fs);
+ ppe = new PowerPointExtractor(ss);
+ assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
+ ppe.getText(true, false)
+ );
+
+
+ // Check the master doc two ways
+ ss = new HSLFSlideShow(fs.getRoot(), fs);
+ ppe = new PowerPointExtractor(ss);
+ assertEquals("I have embeded files in me\n",
+ ppe.getText(true, false)
+ );
+
+ ss = new HSLFSlideShow(fs);
+ ppe = new PowerPointExtractor(ss);
+ assertEquals("I have embeded files in me\n",
+ ppe.getText(true, false)
+ );
+ }
}
import java.io.File;
import java.io.FileInputStream;
+import org.apache.poi.hssf.record.SeriesRecord;
+
import junit.framework.TestCase;
public class TestHSSFChart extends TestCase {
}
public void testSingleChart() throws Exception {
+ HSSFWorkbook wb = new HSSFWorkbook(
+ new FileInputStream(new File(dirName, "WithChart.xls"))
+ );
+
+ HSSFSheet s1 = wb.getSheetAt(0);
+ HSSFSheet s2 = wb.getSheetAt(1);
+ HSSFSheet s3 = wb.getSheetAt(2);
+
+ assertEquals(0, HSSFChart.getSheetCharts(s1).length);
+ assertEquals(1, HSSFChart.getSheetCharts(s2).length);
+ assertEquals(0, HSSFChart.getSheetCharts(s3).length);
+ HSSFChart[] charts;
+
+ // Check the chart on the 2nd sheet
+ charts = HSSFChart.getSheetCharts(s2);
+ assertEquals(1, charts.length);
+
+ assertEquals(2, charts[0].getSeries().length);
+ assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle());
+ assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle());
+ assertEquals(null, charts[0].getChartTitle());
}
public void testTwoCharts() throws Exception {
+ HSSFWorkbook wb = new HSSFWorkbook(
+ new FileInputStream(new File(dirName, "WithTwoCharts.xls"))
+ );
+ HSSFSheet s1 = wb.getSheetAt(0);
+ HSSFSheet s2 = wb.getSheetAt(1);
+ HSSFSheet s3 = wb.getSheetAt(2);
+
+ assertEquals(0, HSSFChart.getSheetCharts(s1).length);
+ assertEquals(1, HSSFChart.getSheetCharts(s2).length);
+ assertEquals(1, HSSFChart.getSheetCharts(s3).length);
+
+ HSSFChart[] charts;
+
+ // Check the chart on the 2nd sheet
+ charts = HSSFChart.getSheetCharts(s2);
+ assertEquals(1, charts.length);
+
+ assertEquals(2, charts[0].getSeries().length);
+ assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle());
+ assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle());
+ assertEquals(null, charts[0].getChartTitle());
+
+ // And the third sheet
+ charts = HSSFChart.getSheetCharts(s3);
+ assertEquals(1, charts.length);
+
+ assertEquals(2, charts[0].getSeries().length);
+ assertEquals("Squares", charts[0].getSeries()[0].getSeriesTitle());
+ assertEquals("Base Numbers", charts[0].getSeries()[1].getSeriesTitle());
+ assertEquals(null, charts[0].getChartTitle());
}
-
- public void BROKENtestThreeCharts() throws Exception {
+
+ public void testThreeCharts() throws Exception {
HSSFWorkbook wb = new HSSFWorkbook(
new FileInputStream(new File(dirName, "WithThreeCharts.xls"))
);
HSSFChart[] charts;
+ // Check the charts on the 2nd sheet
charts = HSSFChart.getSheetCharts(s2);
- assertNull(charts[0].getChartTitle());
+ assertEquals(2, charts.length);
+
+ assertEquals(2, charts[0].getSeries().length);
+ assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle());
+ assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle());
+ assertEquals(6, charts[0].getSeries()[0].getNumValues());
+ assertEquals(6, charts[0].getSeries()[1].getNumValues());
+ assertEquals(SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC, charts[0].getSeries()[0].getValueType());
+ assertEquals(SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC, charts[0].getSeries()[1].getValueType());
+ assertEquals(null, charts[0].getChartTitle());
+
+ assertEquals(1, charts[1].getSeries().length);
+ assertEquals(null, charts[1].getSeries()[0].getSeriesTitle());
assertEquals("Pie Chart Title Thingy", charts[1].getChartTitle());
+ // And the third sheet
charts = HSSFChart.getSheetCharts(s3);
- assertEquals("Sheet 3 Chart with Title", charts[1].getChartTitle());
+ assertEquals(1, charts.length);
+
+ assertEquals(2, charts[0].getSeries().length);
+ assertEquals("Squares", charts[0].getSeries()[0].getSeriesTitle());
+ assertEquals("Base Numbers", charts[0].getSeries()[1].getSeriesTitle());
+ assertEquals("Sheet 3 Chart with Title", charts[0].getChartTitle());
}
}
package org.apache.poi.hwpf.extractor;
import java.io.FileInputStream;
-import java.util.Iterator;
-
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.model.TextPiece;
-import org.apache.poi.hwpf.usermodel.Paragraph;
-import org.apache.poi.hwpf.usermodel.Range;
import junit.framework.TestCase;
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
/**
* Test the different routes to extracting text
*
private WordExtractor extractor;
// Corrupted document - can't do paragraph based stuff
private WordExtractor extractor2;
+ // A word doc embeded in an excel file
+ private String filename3;
protected void setUp() throws Exception {
String dirname = System.getProperty("HWPF.testdata.path");
+ String pdirname = System.getProperty("POIFS.testdata.path");
String filename = dirname + "/test2.doc";
String filename2 = dirname + "/test.doc";
+ filename3 = pdirname + "/excel_with_embeded.xls";
extractor = new WordExtractor(new FileInputStream(filename));
extractor2 = new WordExtractor(new FileInputStream(filename2));
String text = extractor.getTextFromPieces();
assertEquals(p_text1_block, text);
}
+
+
+ /**
+ * Test that we can get data from two different
+ * embeded word documents
+ * @throws Exception
+ */
+ public void testExtractFromEmbeded() throws Exception {
+ POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename3));
+ HWPFDocument doc;
+ WordExtractor extractor3;
+
+ DirectoryNode dirA = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B7");
+ DirectoryNode dirB = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B2");
+
+ // Should have WordDocument and 1Table
+ assertNotNull(dirA.getEntry("1Table"));
+ assertNotNull(dirA.getEntry("WordDocument"));
+
+ assertNotNull(dirB.getEntry("1Table"));
+ assertNotNull(dirB.getEntry("WordDocument"));
+
+ // Check each in turn
+ doc = new HWPFDocument(dirA, fs);
+ extractor3 = new WordExtractor(doc);
+
+ assertNotNull(extractor3.getText());
+ assertTrue(extractor3.getText().length() > 20);
+ assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n",
+ extractor3.getText());
+ assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle());
+ assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject());
+
+
+ doc = new HWPFDocument(dirB, fs);
+ extractor3 = new WordExtractor(doc);
+
+ assertNotNull(extractor3.getText());
+ assertTrue(extractor3.getText().length() > 20);
+ assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n",
+ extractor3.getText());
+ assertEquals("Sample Doc 2", extractor3.getSummaryInformation().getTitle());
+ assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject());
+ }
}
package org.apache.poi.hssf.extractor;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import junit.framework.TestCase;
import org.apache.poi.hssf.HSSFTestDataSamples;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
*
assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText());
}
+
+ /**
+ * Embded in a non-excel file
+ */
+ public void testWithEmbeded() throws Exception {
+ String pdirname = System.getProperty("POIFS.testdata.path");
+ String filename = pdirname + "/word_with_embeded.doc";
+ POIFSFileSystem fs = new POIFSFileSystem(
+ new FileInputStream(filename)
+ );
+
+ DirectoryNode objPool = (DirectoryNode)
+ fs.getRoot().getEntry("ObjectPool");
+ DirectoryNode dirA = (DirectoryNode)
+ objPool.getEntry("_1269427460");
+ DirectoryNode dirB = (DirectoryNode)
+ objPool.getEntry("_1269427461");
+
+ HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
+ HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
+
+ ExcelExtractor exA = new ExcelExtractor(wbA);
+ ExcelExtractor exB = new ExcelExtractor(wbB);
+
+ assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
+ exA.getText());
+ assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
+
+ assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
+ exB.getText());
+ assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
+ }
+
+ /**
+ * Excel embeded in excel
+ */
+ public void testWithEmbededInOwn() throws Exception {
+ String pdirname = System.getProperty("POIFS.testdata.path");
+ String filename = pdirname + "/excel_with_embeded.xls";
+ POIFSFileSystem fs = new POIFSFileSystem(
+ new FileInputStream(filename)
+ );
+
+ DirectoryNode dirA = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B5");
+ DirectoryNode dirB = (DirectoryNode)
+ fs.getRoot().getEntry("MBD0000A3B4");
+
+ HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
+ HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
+
+ ExcelExtractor exA = new ExcelExtractor(wbA);
+ ExcelExtractor exB = new ExcelExtractor(wbB);
+
+ assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
+ exA.getText());
+ assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
+
+ assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
+ exB.getText());
+ assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
+
+ // And the base file too
+ ExcelExtractor ex = new ExcelExtractor(fs);
+ assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
+ ex.getText());
+ assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
+ }
}