<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<sysproperty key="java.awt.headless" value="true"/>
<zipfileset dir="." prefix="${zipdir}">
<exclude name="build/**"/>
<exclude name="scripts/**"/>
+ <exclude name="TEST*"/>
<exclude name="*.ipr"/>
<exclude name="*.iml"/>
<exclude name="*.iws"/>
<exclude name="*.swp"/>
+ <exclude name=".classpath"/>
+ <exclude name=".project"/>
</zipfileset>
</zip>
<tarfileset dir="." prefix="${zipdir}">
<exclude name="build/**"/>
<exclude name="scripts/**"/>
+ <exclude name="TEST*"/>
<exclude name="*.ipr"/>
<exclude name="*.iml"/>
<exclude name="*.iws"/>
<exclude name="*.swp"/>
+ <exclude name=".classpath"/>
+ <exclude name=".project"/>
</tarfileset>
</tar>
<menu-item label="HSLF" href="slideshow/index.html"/>
<menu-item label="HSMF" href="hsmf/index.html"/>
<menu-item label="HDGF" href="hdgf/index.html"/>
+ <menu-item label="HPBF" href="hpbf/index.html"/>
<menu-item label="POI-Ruby" href="poi-ruby.html"/>
<menu-item label="POI-Utils" href="utils/index.html"/>
<menu-item label="Text Extraction" href="text-extraction.html"/>
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1.1-alpha1" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support for additional HSSF header and footer fields, including bold and full file path</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required</action>
<action dev="POI-DEVELOPERS" type="add">45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)</action>
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ====================================================================
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
+
+<document>
+ <header>
+ <title>POI-HPBF - A Guide to the Publisher File Format</title>
+ <subtitle>Overview</subtitle>
+ <authors>
+ <person name="Nick Burch" email="nick at torchbox dot com"/>
+ </authors>
+ </header>
+
+ <body>
+ <section><title>Document Streams</title>
+ <p>
+ The file is made up of a number of POIFS streams. A typical
+ file will be made up as follows:
+ </p>
+<source>
+Root Entry -
+ Objects -
+ (no children)
+ SummaryInformation <(0x05)SummaryInformation>
+ DocumentSummaryInformation <(0x05)DocumentSummaryInformation>
+ Escher -
+ EscherStm
+ EscherDelayStm
+ Quill -
+ QuillSub -
+ CONTENTS
+ CompObj <(0x01)CompObj>
+ Envelope
+ Contents
+ Internal <(0x03)Internal>
+ CompObj <(0x01)CompObj>
+ VBA -
+ (no children)
+</source>
+ </section>
+ <section><title>Changing Text</title>
+ <p>If you make a change to the text of a file, but not change
+ how much text there is, then the <em>CONTENTS</em> stream
+ will undergo a small change, and the <em>Contents</em> stream
+ will undergo a large change.</p>
+ <p>If you make a change to the text of a file, and change the
+ amount of text there is, then both the <em>Contents</em> and
+ the <em>CONTENTS</em> streams change.</p>
+ </section>
+ <section><title>Changing Shapes</title>
+ <p>If you alter the size of a textbox, but make no text changes,
+ then both <em>Contents</em> and <em>CONTENTS</em> streams
+ change. There are no changes to the Escher streams.</p>
+ <p>If you set the background colour of a textbox, but make
+ no changes to the text, (to finish off)</p>
+ </section>
+ <section><title>Structure of CONTENTS</title>
+ <p>First we have "CHNKINK ", followed by 24 bytes.</p>
+ <p>Next we have 20 sequences of 24 bytes each. If the first two bytes
+ at 0x1800, then that sequence entry exists, but if it's 0x0000 then
+ the entry doesn't exist. If it does exist, we then have 4 bytes of
+ upper case ASCII text, followed by three little endian shorts.
+ The first of these seems to be the count of that type, the second is
+ usually 1, the third is usually zero. The we have another 4 bytes of
+ upper case ASCII text, normally but not always the same as the first
+ text. Finally, we have an unsigned little endian 32 bit offset to
+ the start of the data for this, then an unsigned little endian
+ 32 bit offset of the length of this section.</p>
+ <p>Normally, the first sequence entry is for TEXT, and the text data
+ will start at 0x200. After that is normally two or three STSH entries
+ (so the first short has values 0, then 1, then 2). After that it
+ seems to vary.</p>
+ <p>At 0x200 we have the text, stored as little endian 16 bit unicode.</p>
+ <p>After the text comes all sorts of other stuff, presumably as
+ described by the sequences.</p>
+ <p>For a contents stream of length 7168 / 0x1c00 bytes, the start
+ looks something like:</p>
+<source>
+CHNKINK // "CHNKINK "
+04 00 07 00 // Normally 04 00 07 00
+13 00 00 03 // Normally ## 00 00 03
+00 02 00 00 // Normally 00 ## 00 00
+00 1c 00 00 // Normally length of the stream
+f8 01 13 00 // Normally f8 01 11/13 00
+ff ff ff ff // Normally seems to be ffffffff
+
+18 00
+TEXT 00 00 01 00 00 00 // TEXT 0 1 0
+TEXT 00 02 00 00 d0 03 00 00 // TEXT from: 200 (512), len: 3d0 (976)
+18 00
+STSH 00 00 01 00 00 00 // STSH 0 1 0
+STSH d0 05 00 00 1e 00 00 00 // STSH from: 5d0 (1488), len: 1e (30)
+18 00
+STSH 01 00 01 00 00 00 // STSH 1 1 0
+STSH ee 05 00 00 b8 01 00 00 // STSH from: 5ee (1518), len: 1b8 (440)
+18 00
+STSH 02 00 01 00 00 00 // STSH 2 1 0
+STSH a6 07 00 00 3c 00 00 00 // STSH from: 7a6 (1958), len: 3c (60)
+18 00
+FDPP 00 00 01 00 00 00 // FDPP 0 1 0
+FDPP 00 08 00 00 00 02 00 00 // FDPP from: 800 (2048), len: 200 (512)
+18 00
+FDPC 00 00 01 00 00 00 // FDPC 0 1 0
+FDPC 00 0a 00 00 00 02 00 00 // FDPC from: a00 (2560), len: 200 (512)
+18 00
+FDPC 01 00 01 00 00 00 // FDPC 1 1 0
+FDPC 00 0c 00 00 00 02 00 00 // FDPC from: c00 (3072), len: 200 (512)
+18 00
+SYID 00 00 01 00 00 00 // SYID 0 1 0
+SYID 00 0e 00 00 20 00 00 00 // SYID from: e00 (3584), len: 20 (32)
+18 00
+SGP 00 00 01 00 00 00 // SGP 0 1 0
+SGP 20 0e 00 00 0a 00 00 00 // SGP from: e20 (3616), len: a (10)
+18 00
+INK 00 00 01 00 00 00 // INK 0 1 0
+INK 2a 0e 00 00 04 00 00 00 // INK from: e2a (3626), len: 4 (4)
+18 00
+BTEP 00 00 01 00 00 00 // BTEP 0 1 0
+PLC 2e 0e 00 00 18 00 00 00 // PLC from: e2e (3630), len: 18 (24)
+18 00
+BTEC 00 00 01 00 00 00 // BTEC 0 1 0
+PLC 46 0e 00 00 20 00 00 00 // PLC from: e46 (3654), len: 20 (32)
+18 00
+FONT 00 00 01 00 00 00 // FONT 0 1 0
+FONT 66 0e 00 00 48 03 00 00 // FONT from: e66 (3686), len: 348 (840)
+18 00
+TCD 03 00 01 00 00 00 // TCD 3 1 0
+PLC ae 11 00 00 24 00 00 00 // PLC from: 11ae (4526), len: 24 (36)
+18 00
+TOKN 04 00 01 00 00 00 // TOKN 4 1 0
+PLC d2 11 00 00 0a 01 00 00 // PLC from: 11d2 (4562), len: 10a (266)
+18 00
+TOKN 05 00 01 00 00 00 // TOKN 5 1 0
+PLC dc 12 00 00 2a 01 00 00 // PLC from: 12dc (4828), len: 12a (298)
+18 00
+STRS 00 00 01 00 00 00 // STRS 0 1 0
+PLC 06 14 00 00 46 00 00 00 // PLC from: 1406 (5126), len: 46 (70)
+18 00
+MCLD 00 00 01 00 00 00 // MCLD 0 1 0
+MCLD 4c 14 00 00 16 06 00 00 // MCLD from: 144c (5196), len: 616 (1558)
+18 00
+PL 00 00 01 00 00 00 // PL 0 1 0
+PL 62 1a 00 00 48 00 00 00 // PL from: 1a62 (6754), len: 48 (72)
+00 00 // Blank entry follows
+00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+(the text will then start)
+</source>
+ </section>
+ </body>
+</document>
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ====================================================================
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
+
+<document>
+ <header>
+ <title>POI-HPBF - Java API To Access Microsoft Publisher Format Files</title>
+ <subtitle>Overview</subtitle>
+ <authors>
+ <person name="Nick Burch" email="nick at apache dot org"/>
+ </authors>
+ </header>
+
+ <body>
+ <section>
+ <title>Overview</title>
+
+ <p>HPBF is the POI Project's pure Java implementation of the Visio file format.</p>
+ <p>Currently, HPBF is in the experimental stage, while we try
+ to figure out the file format. Our initial aim is to provide
+ a text extractor for the format, with low level code following
+ after that if demand and developer interest warrant it.</p>
+ <p>At this time, there is no <em>usermodel</em> api or similar.</p>
+ <p>Our current understanding of the file format is documented
+ <link href="file-format.html">here</link>.</p>
+ <note>
+ This code currently lives the
+ <link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
+ of the POI SVN repository.
+ Ensure that you have the scratchpad jar or the scratchpad
+ build area in your
+ classpath before experimenting with this code.
+ </note>
+ </section>
+ </body>
+</document>
href="./slideshow/index.html">the HSLF project page for more
information</link>.</p>
</section>
- <section><title>HDGF for Visio Documents</title>
- <p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
- Java. It currently only supports reading at a very low level, and
- simple text extraction. Please see <link
- href="./hdgf/index.html">the HDGF project page for more
- information</link>.</p>
- </section>
<section><title>HPSF for Document Properties</title>
<p>HPSF is our port of the OLE 2 property set format to pure
Java. Property sets are mostly use to store a document's properties
(title, author, date of last modification etc.), but they can be used
for application-specific purposes as well.</p>
- <p>HPSF supports reading and writing of properties. However, you will
- need to be using version 3.0 of POI to utilise the write support.</p>
-
+ <p>HPSF supports both reading and writing of properties.</p>
<p>Please see <link href="./hpsf/index.html">the HPSF project
page</link> for more information.</p>
</section>
-
+ <section><title>HDGF for Visio Documents</title>
+ <p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
+ Java. It currently only supports reading at a very low level, and
+ simple text extraction. Please see <link
+ href="./hdgf/index.html">the HDGF project page for more
+ information</link>.</p>
+ </section>
+ <section><title>HPBF for Publisher Documents</title>
+ <p>HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure
+ Java. At the moment, we are still figuring out the file format, but we hope
+ to have simple text extraction shortly. Please see <link
+ href="./hpbf/index.html">the HPBF project page for more
+ information</link>.</p>
+ </section>
</section>
<section><title>Contributing </title>
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1.1-alpha1" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support for additional HSSF header and footer fields, including bold and full file path</action>
<action dev="POI-DEVELOPERS" type="add">45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required</action>
<action dev="POI-DEVELOPERS" type="add">45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)</action>
protected ArrayList records = null;
int preoffset = 0; // offset of the sheet in a new file
- protected int dimsloc = -1; // TODO - is it legal for dims record to be missing?
+ protected int dimsloc = -1; // TODO - remove dimsloc
protected PrintGridlinesRecord printGridlines = null;
protected GridsetRecord gridset = null;
private GutsRecord _gutsRecord;
private MergedCellsTable _mergedCellsTable;
/** always present in this POI object, not always written to Excel file */
/*package*/ColumnInfoRecordsAggregate _columnInfos;
- protected DimensionsRecord dims;
+ /** the DimensionsRecord is always present */
+ private DimensionsRecord _dimensions;
protected RowRecordsAggregate _rowsAggregate = null;
private DataValidityTable _dataValidityTable= null;
private ConditionalFormattingTable condFormatting;
records.add(retval._columnInfos);
}
- retval.dims = ( DimensionsRecord ) rec;
+ retval._dimensions = ( DimensionsRecord ) rec;
retval.dimsloc = records.size();
}
else if (rec.getSid() == DefaultColWidthRecord.sid)
records.add(rec);
}
- if (retval.dimsloc < 0) {
+ if (retval._dimensions == null) {
throw new RuntimeException("DimensionsRecord was not found");
}
retval.records = records;
public static Sheet createSheet()
{
+ // TODO - convert this method to a constructor
+
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "Sheet createsheet from scratch called");
Sheet retval = new Sheet();
records.add( retval.printGridlines );
retval.gridset = createGridset();
records.add( retval.gridset );
- records.add( retval.createGuts() );
+ retval._gutsRecord = createGuts();
+ records.add( retval._gutsRecord );
retval.defaultrowheight = createDefaultRowHeight();
records.add( retval.defaultrowheight );
records.add( retval.createWSBool() );
ColumnInfoRecordsAggregate columns = new ColumnInfoRecordsAggregate();
records.add( columns );
retval._columnInfos = columns;
- retval.dims = createDimensions();
- records.add(retval.dims);
+ retval._dimensions = createDimensions();
+ records.add(retval._dimensions);
retval.dimsloc = records.size()-1;
records.add(retval.windowTwo = retval.createWindowTwo());
retval.selection = createSelection();
if (_rowsAggregate == null)
{
_rowsAggregate = new RowRecordsAggregate();
- records.add(getDimsLoc() + 1, _rowsAggregate);
+ records.add(dimsloc + 1, _rowsAggregate);
}
}
private MergedCellsTable getMergedRecords() {
.append(lastrow).append("lastcol").append(lastcol)
.toString());
}
- dims.setFirstCol(firstcol);
- dims.setFirstRow(firstrow);
- dims.setLastCol(lastcol);
- dims.setLastRow(lastrow);
+ _dimensions.setFirstCol(firstcol);
+ _dimensions.setFirstRow(firstrow);
+ _dimensions.setLastCol(lastcol);
+ _dimensions.setLastRow(lastrow);
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "Sheet.setDimensions exiting");
}
if(log.check(POILogger.DEBUG)) {
log.log(POILogger.DEBUG, "add value record row" + row);
}
- DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc());
+ DimensionsRecord d = _dimensions;
if (col.getColumn() > d.getLastCol())
{
*/
public void removeValueRecord(int row, CellValueRecordInterface col) {
- log.logFormatted(POILogger.DEBUG, "remove value record row,dimsloc %,%",
- new int[]{row, dimsloc} );
+ log.logFormatted(POILogger.DEBUG, "remove value record row %",
+ new int[]{row } );
_rowsAggregate.removeCell(col);
}
checkRows();
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "addRow ");
- DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc());
+ DimensionsRecord d = _dimensions;
if (row.getRowNumber() >= d.getLastRow())
{
}
}
- /**
- * get the location of the DimensionsRecord (which is the last record before the value section)
- * @return location in the array of records of the DimensionsRecord
- */
-
- public int getDimsLoc()
- {
- if (log.check( POILogger.DEBUG ))
- log.log(POILogger.DEBUG, "getDimsLoc dimsloc= " + dimsloc);
- return dimsloc;
- }
-
- /**
- * in the event the record is a dimensions record, resets both the loc index and dimsloc index
- */
- public void checkDimsLoc(Record rec, int recloc) {
- if (rec.getSid() == DimensionsRecord.sid) {
- dimsloc = recloc;
- }
- }
-
/**
* @return the serialized size of this sheet
*/
}
if (width != -1) {
+ width *= 256;
if (width > Short.MAX_VALUE) { //width can be bigger that Short.MAX_VALUE!
width = Short.MAX_VALUE;
}
- sheet.setColumnWidth(column, (short) (width * 256));
+ sheet.setColumnWidth(column, (short) (width));
}
}
System.exit(1);
}
- for (int j = 0; j < args.length; j++)
- {
- viewFile(args[ j ]);
+ boolean withSizes = false;
+ for (int j = 0; j < args.length; j++) {
+ if(args[j].equalsIgnoreCase("-size") ||
+ args[j].equalsIgnoreCase("-sizes")) {
+ withSizes = true;
+ } else {
+ viewFile(args[j], withSizes);
+ }
}
}
- public static void viewFile(final String filename) throws IOException
+ public static void viewFile(final String filename, boolean withSizes) throws IOException
{
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename)
);
- displayDirectory(fs.getRoot(), "");
+ displayDirectory(fs.getRoot(), "", withSizes);
}
- public static void displayDirectory(DirectoryNode dir, String indent) {
+ public static void displayDirectory(DirectoryNode dir, String indent, boolean withSizes) {
System.out.println(indent + dir.getName() + " -");
String newIndent = indent + " ";
+ boolean hadChildren = false;
for(Iterator it = dir.getEntries(); it.hasNext(); ) {
+ hadChildren = true;
Object entry = it.next();
if(entry instanceof DirectoryNode) {
- displayDirectory((DirectoryNode)entry, newIndent);
+ displayDirectory((DirectoryNode)entry, newIndent, withSizes);
} else {
DocumentNode doc = (DocumentNode)entry;
String name = doc.getName();
+ String size = "";
if(name.charAt(0) < 10) {
String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1);
name = name.substring(1) + " <" + altname + ">";
}
- System.out.println(newIndent + name);
+ if(withSizes) {
+ size = " [" +
+ doc.getSize() + " / 0x" +
+ Integer.toHexString(doc.getSize()) +
+ "]";
+ }
+ System.out.println(newIndent + name + size);
}
}
+ if(!hadChildren) {
+ System.out.println(newIndent + "(no children)");
+ }
}
}
\ No newline at end of file
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.dev;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.ddf.DefaultEscherRecordFactory;
+import org.apache.poi.ddf.EscherRecord;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
+
+/**
+ * For dumping out the contents of HPBF (Publisher)
+ * files, while we try to figure out how they're
+ * constructed.
+ */
+public class HPBFDumper {
+ private POIFSFileSystem fs;
+ public HPBFDumper(POIFSFileSystem fs) {
+ this.fs = fs;
+ }
+ public HPBFDumper(InputStream inp) throws IOException {
+ this(new POIFSFileSystem(inp));
+ }
+
+ private static byte[] getData(DirectoryNode dir, String name) throws IOException {
+ DocumentEntry docProps =
+ (DocumentEntry)dir.getEntry(name);
+
+ // Grab the document stream
+ byte[] d = new byte[docProps.getSize()];
+ dir.createDocumentInputStream(name).read(d);
+
+ // All done
+ return d;
+ }
+
+ /**
+ * Dumps out the given number of bytes as hex,
+ * two chars
+ */
+ private String dumpBytes(byte[] data, int offset, int len) {
+ StringBuffer ret = new StringBuffer();
+ for(int i=0; i<len; i++) {
+ int j = i + offset;
+ int b = data[j];
+ if(b < 0) { b += 256; }
+
+ String bs = Integer.toHexString(b);
+ if(bs.length() == 1)
+ ret.append('0');
+ ret.append(bs);
+ ret.append(' ');
+ }
+ return ret.toString();
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" HPBFDumper <filename>");
+ System.exit(1);
+ }
+ HPBFDumper dump = new HPBFDumper(
+ new FileInputStream(args[0])
+ );
+
+ System.out.println("Dumping " + args[0]);
+ dump.dumpContents();
+ dump.dumpEnvelope();
+ dump.dumpEscher();
+ dump.dump001CompObj(dump.fs.getRoot());
+ dump.dumpQuill();
+
+ // Still to go:
+ // (0x03)Internal
+ // Objects
+ }
+
+ /**
+ * Dump out the escher parts of the file.
+ * Escher -> EscherStm and EscherDelayStm
+ */
+ public void dumpEscher() throws IOException {
+ DirectoryNode escherDir = (DirectoryNode)
+ fs.getRoot().getEntry("Escher");
+
+ dumpEscherStm(escherDir);
+ dumpEscherDelayStm(escherDir);
+ }
+ private void dumpEscherStream(byte[] data) {
+ DefaultEscherRecordFactory erf =
+ new DefaultEscherRecordFactory();
+
+ // Dump
+ int left = data.length;
+ while(left > 0) {
+ EscherRecord er = erf.createRecord(data, 0);
+ er.fillFields(data, 0, erf);
+ left -= er.getRecordSize();
+
+ System.out.println(er.toString());
+ }
+ }
+ protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
+ byte[] data = getData(escherDir, "EscherStm");
+ System.out.println("");
+ System.out.println("EscherStm - " + data.length + " bytes long:");
+ if(data.length > 0)
+ dumpEscherStream(data);
+ }
+ protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
+ byte[] data = getData(escherDir, "EscherDelayStm");
+ System.out.println("");
+ System.out.println("EscherDelayStm - " + data.length + " bytes long:");
+ if(data.length > 0)
+ dumpEscherStream(data);
+ }
+
+ public void dumpEnvelope() throws IOException {
+ byte[] data = getData(fs.getRoot(), "Envelope");
+
+ System.out.println("");
+ System.out.println("Envelope - " + data.length + " bytes long:");
+ }
+
+ public void dumpContents() throws IOException {
+ byte[] data = getData(fs.getRoot(), "Contents");
+
+ System.out.println("");
+ System.out.println("Contents - " + data.length + " bytes long:");
+
+ // 8 bytes, always seems to be
+ // E8 AC 2C 00 E8 03 05 01
+ // E8 AC 2C 00 E8 03 05 01
+
+ // 4 bytes - size of contents
+ // 13/15 00 00 01
+
+ // ....
+
+ // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
+
+ // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
+
+ // 01 18 30 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+
+ // 01 18 31 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+
+ // 01 18 32 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+ }
+
+ public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
+ byte[] data = getData(dir, "CONTENTS");
+
+ System.out.println("");
+ System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+ // Between the start and 0x200 we have
+ // CHNKINK(space) + 24 bytes
+ // 0x1800
+ // TEXT + 6 bytes
+ // TEXT + 8 bytes
+ // 0x1800
+ // STSH + 6 bytes
+ // STSH + 8 bytes
+ // 0x1800
+ // STSH + 6 bytes
+ // STSH + 8 bytes
+ // but towards 0x200 the pattern may
+ // break down a little bit
+
+ // After the second of a given type,
+ // it seems to be 4 bytes giving the start,
+ // then 4 bytes giving the length, then
+ // 18 00
+ System.out.println(
+ new String(data, 0, 8) +
+ dumpBytes(data, 8, 0x20-8)
+ );
+
+ int pos = 0x20;
+ boolean sixNotEight = true;
+ while(pos < 0x200) {
+ if(sixNotEight) {
+ System.out.println(
+ dumpBytes(data, pos, 2)
+ );
+ pos += 2;
+ }
+ String text = new String(data, pos, 4);
+ int blen = 8;
+ if(sixNotEight)
+ blen = 6;
+ System.out.println(
+ text + " " + dumpBytes(data, pos+4, blen)
+ );
+
+ pos += 4 + blen;
+ sixNotEight = ! sixNotEight;
+ }
+
+ // Text from 0x200 onwards until we get
+ // to \r(00)\n(00)(00)(00)
+ int textStop = -1;
+ for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
+ if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
+ textStop = i;
+ }
+ }
+ if(textStop > 0) {
+ int len = (textStop - 0x200) / 2;
+ System.out.println("");
+ System.out.println(
+ StringUtil.getFromUnicodeLE(data, 0x200, len)
+ );
+ }
+
+ // The font list comes slightly later
+
+ // The hyperlinks may come before the fonts,
+ // or slightly in front
+ }
+ public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+ byte[] data = getData(dir, "CONTENTS");
+
+ System.out.println("");
+ System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+ String[] startType = new String[20];
+ String[] endType = new String[20];
+ int[] optA = new int[20];
+ int[] optB = new int[20];
+ int[] optC = new int[20];
+ int[] from = new int[20];
+ int[] len = new int[20];
+
+ for(int i=0; i<20; i++) {
+ int offset = 0x20 + i*24;
+ if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+ // Has data
+ startType[i] = new String(data, offset+2, 4);
+ optA[i] = LittleEndian.getUShort(data, offset+6);
+ optB[i] = LittleEndian.getUShort(data, offset+8);
+ optC[i] = LittleEndian.getUShort(data, offset+10);
+ endType[i] = new String(data, offset+12, 4);
+ from[i] = (int)LittleEndian.getUInt(data, offset+16);
+ len[i] = (int)LittleEndian.getUInt(data, offset+20);
+ } else {
+ // Doesn't have data
+ }
+ }
+
+ String text = StringUtil.getFromUnicodeLE(
+ data, from[0], len[0]/2
+ );
+
+ // Dump
+ for(int i=0; i<20; i++) {
+ String num = Integer.toString(i);
+ if(i < 10) {
+ num = "0" + i;
+ }
+ System.out.print(num + " ");
+
+ if(startType[i] == null) {
+ System.out.println("(not present)");
+ } else {
+ System.out.println(
+ "\t" +
+ startType[i] + " " +
+ optA[i] + " " +
+ optB[i] + " " +
+ optC[i]
+ );
+ System.out.println(
+ "\t" +
+ endType[i] + " " +
+ "from: " +
+ Integer.toHexString(from[i]) +
+ " (" + from[i] + ")" +
+ ", len: " +
+ Integer.toHexString(len[i]) +
+ " (" + len[i] + ")"
+ );
+ }
+ }
+
+ // Text
+ System.out.println("");
+ System.out.println("TEXT:");
+ System.out.println(text);
+ System.out.println("");
+
+ // All the others
+ for(int i=0; i<20; i++) {
+ if(startType[i] == null) {
+ continue;
+ }
+ int start = from[i];
+
+ System.out.println(
+ startType[i] + " -> " + endType[i] +
+ " @ " + Integer.toHexString(start) +
+ " (" + start + ")"
+ );
+ System.out.println("\t" + dumpBytes(data, start, 4));
+ System.out.println("\t" + dumpBytes(data, start+4, 4));
+ System.out.println("\t" + dumpBytes(data, start+8, 4));
+ System.out.println("\t(etc)");
+ }
+ }
+
+ protected void dump001CompObj(DirectoryNode dir) {
+ // TODO
+ }
+
+ public void dumpQuill() throws IOException {
+ DirectoryNode quillDir = (DirectoryNode)
+ fs.getRoot().getEntry("Quill");
+ DirectoryNode quillSubDir = (DirectoryNode)
+ quillDir.getEntry("QuillSub");
+
+ dump001CompObj(quillSubDir);
+ dumpCONTENTSraw(quillSubDir);
+ dumpCONTENTSguessed(quillSubDir);
+ }
+}
--- /dev/null
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
--- /dev/null
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+We’ve added some more text in here, to push all the offsets about a bit.
+
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+Ditto with more text in here.
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
--- /dev/null
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+
+This is the second page12345678
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
--- /dev/null
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
import org.apache.poi.hssf.record.ColumnInfoRecord;
import org.apache.poi.hssf.record.DimensionsRecord;
import org.apache.poi.hssf.record.EOFRecord;
+import org.apache.poi.hssf.record.GutsRecord;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.aggregates.ColumnInfoRecordsAggregate;
import org.apache.poi.hssf.record.aggregates.PageSettingsBlock;
import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.CellRangeAddress;
/**
if (false) {
// make sure that RRA and VRA are in the right place
// (Aug 2008) since the VRA is now part of the RRA, there is much less chance that
- // they could get out of order. Still, one could write serialize the sheet here,
- // and read back with EventRecordFactory to make sure...
+ // they could get out of order. Still, one could write serialize the sheet here,
+ // and read back with EventRecordFactory to make sure...
}
assertEquals(242, dbCellRecordPos);
}
return _indexRecord;
}
}
+
+ /**
+ * Checks for bug introduced around r682282-r683880 that caused a second GUTS records
+ * which in turn got the dimensions record out of alignment
+ */
+ public void testGutsRecord_bug45640() {
+
+ Sheet sheet = Sheet.createSheet();
+ sheet.addRow(new RowRecord(0));
+ sheet.addRow(new RowRecord(1));
+ sheet.groupRowRange( 0, 1, true );
+ sheet.toString();
+ List recs = sheet.getRecords();
+ int count=0;
+ for(int i=0; i< recs.size(); i++) {
+ if (recs.get(i) instanceof GutsRecord) {
+ count++;
+ }
+ }
+ if (count == 2) {
+ throw new AssertionFailedError("Identified bug 45640");
+ }
+ assertEquals(1, count);
+ }
}