https://svn.apache.org/repos/asf/poi/trunk ........ r686207 | nick | 2008-08-15 13:43:02 +0100 (Fri, 15 Aug 2008) | 1 line Add sample publisher files from bug #45602 to svn ........ r686216 | nick | 2008-08-15 15:05:30 +0100 (Fri, 15 Aug 2008) | 1 line Add a few more source package excludes ........ r686278 | nick | 2008-08-15 17:57:30 +0100 (Fri, 15 Aug 2008) | 1 line More sample hpbf docs, with a description ........ r686290 | nick | 2008-08-15 18:42:25 +0100 (Fri, 15 Aug 2008) | 1 line Start on a HPBF dumper ........ r686621 | nick | 2008-08-17 17:36:40 +0100 (Sun, 17 Aug 2008) | 1 line Few little tweaks to dev helpers ........ r686624 | nick | 2008-08-17 18:39:10 +0100 (Sun, 17 Aug 2008) | 1 line More work understanding hpbf ........ r686625 | nick | 2008-08-17 19:02:31 +0100 (Sun, 17 Aug 2008) | 1 line More work understanding hpbf ........ r686628 | nick | 2008-08-17 19:21:34 +0100 (Sun, 17 Aug 2008) | 1 line More work understanding hpbf ........ r686640 | nick | 2008-08-17 21:15:51 +0100 (Sun, 17 Aug 2008) | 1 line Further HPBF documentation, and some more sample files used ........ r686844 | yegor | 2008-08-18 19:33:58 +0100 (Mon, 18 Aug 2008) | 1 line fixed bug #45645: Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE ........ r686977 | josh | 2008-08-19 08:44:57 +0100 (Tue, 19 Aug 2008) | 1 line Fix for bug 45640 - avoid creating multiple GUTS records ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@687333 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_5_BETA3
@@ -649,6 +649,7 @@ under the License. | |||
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/> | |||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/> | |||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/> | |||
<sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
@@ -707,6 +708,7 @@ under the License. | |||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/> | |||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/> | |||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/> | |||
<sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<formatter type="plain"/> | |||
@@ -742,6 +744,7 @@ under the License. | |||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/> | |||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/> | |||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/> | |||
<sysproperty key="HPBF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hpbf/data"/> | |||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
<sysproperty key="java.awt.headless" value="true"/> | |||
@@ -1284,10 +1287,13 @@ FORREST_HOME environment variable!</echo> | |||
<zipfileset dir="." prefix="${zipdir}"> | |||
<exclude name="build/**"/> | |||
<exclude name="scripts/**"/> | |||
<exclude name="TEST*"/> | |||
<exclude name="*.ipr"/> | |||
<exclude name="*.iml"/> | |||
<exclude name="*.iws"/> | |||
<exclude name="*.swp"/> | |||
<exclude name=".classpath"/> | |||
<exclude name=".project"/> | |||
</zipfileset> | |||
</zip> | |||
@@ -1313,10 +1319,13 @@ FORREST_HOME environment variable!</echo> | |||
<tarfileset dir="." prefix="${zipdir}"> | |||
<exclude name="build/**"/> | |||
<exclude name="scripts/**"/> | |||
<exclude name="TEST*"/> | |||
<exclude name="*.ipr"/> | |||
<exclude name="*.iml"/> | |||
<exclude name="*.iws"/> | |||
<exclude name="*.swp"/> | |||
<exclude name=".classpath"/> | |||
<exclude name=".project"/> | |||
</tarfileset> | |||
</tar> | |||
@@ -41,6 +41,7 @@ | |||
<menu-item label="HSLF" href="slideshow/index.html"/> | |||
<menu-item label="HSMF" href="hsmf/index.html"/> | |||
<menu-item label="HDGF" href="hdgf/index.html"/> | |||
<menu-item label="HPBF" href="hpbf/index.html"/> | |||
<menu-item label="POI-Ruby" href="poi-ruby.html"/> | |||
<menu-item label="POI-Utils" href="utils/index.html"/> | |||
<menu-item label="Text Extraction" href="text-extraction.html"/> |
@@ -64,6 +64,7 @@ | |||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action> | |||
</release> | |||
<release version="3.1.1-alpha1" date="2008-??-??"> | |||
<action dev="POI-DEVELOPERS" type="fix">45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE</action> | |||
<action dev="POI-DEVELOPERS" type="add">45623 - Support for additional HSSF header and footer fields, including bold and full file path</action> | |||
<action dev="POI-DEVELOPERS" type="add">45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required</action> | |||
<action dev="POI-DEVELOPERS" type="add">45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)</action> |
@@ -0,0 +1,170 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<!-- | |||
==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== | |||
--> | |||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd"> | |||
<document> | |||
<header> | |||
<title>POI-HPBF - A Guide to the Publisher File Format</title> | |||
<subtitle>Overview</subtitle> | |||
<authors> | |||
<person name="Nick Burch" email="nick at torchbox dot com"/> | |||
</authors> | |||
</header> | |||
<body> | |||
<section><title>Document Streams</title> | |||
<p> | |||
The file is made up of a number of POIFS streams. A typical | |||
file will be made up as follows: | |||
</p> | |||
<source> | |||
Root Entry - | |||
Objects - | |||
(no children) | |||
SummaryInformation <(0x05)SummaryInformation> | |||
DocumentSummaryInformation <(0x05)DocumentSummaryInformation> | |||
Escher - | |||
EscherStm | |||
EscherDelayStm | |||
Quill - | |||
QuillSub - | |||
CONTENTS | |||
CompObj <(0x01)CompObj> | |||
Envelope | |||
Contents | |||
Internal <(0x03)Internal> | |||
CompObj <(0x01)CompObj> | |||
VBA - | |||
(no children) | |||
</source> | |||
</section> | |||
<section><title>Changing Text</title> | |||
<p>If you make a change to the text of a file, but not change | |||
how much text there is, then the <em>CONTENTS</em> stream | |||
will undergo a small change, and the <em>Contents</em> stream | |||
will undergo a large change.</p> | |||
<p>If you make a change to the text of a file, and change the | |||
amount of text there is, then both the <em>Contents</em> and | |||
the <em>CONTENTS</em> streams change.</p> | |||
</section> | |||
<section><title>Changing Shapes</title> | |||
<p>If you alter the size of a textbox, but make no text changes, | |||
then both <em>Contents</em> and <em>CONTENTS</em> streams | |||
change. There are no changes to the Escher streams.</p> | |||
<p>If you set the background colour of a textbox, but make | |||
no changes to the text, (to finish off)</p> | |||
</section> | |||
<section><title>Structure of CONTENTS</title> | |||
<p>First we have "CHNKINK ", followed by 24 bytes.</p> | |||
<p>Next we have 20 sequences of 24 bytes each. If the first two bytes | |||
at 0x1800, then that sequence entry exists, but if it's 0x0000 then | |||
the entry doesn't exist. If it does exist, we then have 4 bytes of | |||
upper case ASCII text, followed by three little endian shorts. | |||
The first of these seems to be the count of that type, the second is | |||
usually 1, the third is usually zero. The we have another 4 bytes of | |||
upper case ASCII text, normally but not always the same as the first | |||
text. Finally, we have an unsigned little endian 32 bit offset to | |||
the start of the data for this, then an unsigned little endian | |||
32 bit offset of the length of this section.</p> | |||
<p>Normally, the first sequence entry is for TEXT, and the text data | |||
will start at 0x200. After that is normally two or three STSH entries | |||
(so the first short has values 0, then 1, then 2). After that it | |||
seems to vary.</p> | |||
<p>At 0x200 we have the text, stored as little endian 16 bit unicode.</p> | |||
<p>After the text comes all sorts of other stuff, presumably as | |||
described by the sequences.</p> | |||
<p>For a contents stream of length 7168 / 0x1c00 bytes, the start | |||
looks something like:</p> | |||
<source> | |||
CHNKINK // "CHNKINK " | |||
04 00 07 00 // Normally 04 00 07 00 | |||
13 00 00 03 // Normally ## 00 00 03 | |||
00 02 00 00 // Normally 00 ## 00 00 | |||
00 1c 00 00 // Normally length of the stream | |||
f8 01 13 00 // Normally f8 01 11/13 00 | |||
ff ff ff ff // Normally seems to be ffffffff | |||
18 00 | |||
TEXT 00 00 01 00 00 00 // TEXT 0 1 0 | |||
TEXT 00 02 00 00 d0 03 00 00 // TEXT from: 200 (512), len: 3d0 (976) | |||
18 00 | |||
STSH 00 00 01 00 00 00 // STSH 0 1 0 | |||
STSH d0 05 00 00 1e 00 00 00 // STSH from: 5d0 (1488), len: 1e (30) | |||
18 00 | |||
STSH 01 00 01 00 00 00 // STSH 1 1 0 | |||
STSH ee 05 00 00 b8 01 00 00 // STSH from: 5ee (1518), len: 1b8 (440) | |||
18 00 | |||
STSH 02 00 01 00 00 00 // STSH 2 1 0 | |||
STSH a6 07 00 00 3c 00 00 00 // STSH from: 7a6 (1958), len: 3c (60) | |||
18 00 | |||
FDPP 00 00 01 00 00 00 // FDPP 0 1 0 | |||
FDPP 00 08 00 00 00 02 00 00 // FDPP from: 800 (2048), len: 200 (512) | |||
18 00 | |||
FDPC 00 00 01 00 00 00 // FDPC 0 1 0 | |||
FDPC 00 0a 00 00 00 02 00 00 // FDPC from: a00 (2560), len: 200 (512) | |||
18 00 | |||
FDPC 01 00 01 00 00 00 // FDPC 1 1 0 | |||
FDPC 00 0c 00 00 00 02 00 00 // FDPC from: c00 (3072), len: 200 (512) | |||
18 00 | |||
SYID 00 00 01 00 00 00 // SYID 0 1 0 | |||
SYID 00 0e 00 00 20 00 00 00 // SYID from: e00 (3584), len: 20 (32) | |||
18 00 | |||
SGP 00 00 01 00 00 00 // SGP 0 1 0 | |||
SGP 20 0e 00 00 0a 00 00 00 // SGP from: e20 (3616), len: a (10) | |||
18 00 | |||
INK 00 00 01 00 00 00 // INK 0 1 0 | |||
INK 2a 0e 00 00 04 00 00 00 // INK from: e2a (3626), len: 4 (4) | |||
18 00 | |||
BTEP 00 00 01 00 00 00 // BTEP 0 1 0 | |||
PLC 2e 0e 00 00 18 00 00 00 // PLC from: e2e (3630), len: 18 (24) | |||
18 00 | |||
BTEC 00 00 01 00 00 00 // BTEC 0 1 0 | |||
PLC 46 0e 00 00 20 00 00 00 // PLC from: e46 (3654), len: 20 (32) | |||
18 00 | |||
FONT 00 00 01 00 00 00 // FONT 0 1 0 | |||
FONT 66 0e 00 00 48 03 00 00 // FONT from: e66 (3686), len: 348 (840) | |||
18 00 | |||
TCD 03 00 01 00 00 00 // TCD 3 1 0 | |||
PLC ae 11 00 00 24 00 00 00 // PLC from: 11ae (4526), len: 24 (36) | |||
18 00 | |||
TOKN 04 00 01 00 00 00 // TOKN 4 1 0 | |||
PLC d2 11 00 00 0a 01 00 00 // PLC from: 11d2 (4562), len: 10a (266) | |||
18 00 | |||
TOKN 05 00 01 00 00 00 // TOKN 5 1 0 | |||
PLC dc 12 00 00 2a 01 00 00 // PLC from: 12dc (4828), len: 12a (298) | |||
18 00 | |||
STRS 00 00 01 00 00 00 // STRS 0 1 0 | |||
PLC 06 14 00 00 46 00 00 00 // PLC from: 1406 (5126), len: 46 (70) | |||
18 00 | |||
MCLD 00 00 01 00 00 00 // MCLD 0 1 0 | |||
MCLD 4c 14 00 00 16 06 00 00 // MCLD from: 144c (5196), len: 616 (1558) | |||
18 00 | |||
PL 00 00 01 00 00 00 // PL 0 1 0 | |||
PL 62 1a 00 00 48 00 00 00 // PL from: 1a62 (6754), len: 48 (72) | |||
00 00 // Blank entry follows | |||
00 00 00 00 00 00 | |||
00 00 00 00 00 00 00 00 | |||
00 00 00 00 00 00 00 00 | |||
(the text will then start) | |||
</source> | |||
</section> | |||
</body> | |||
</document> |
@@ -0,0 +1,53 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<!-- | |||
==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== | |||
--> | |||
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd"> | |||
<document> | |||
<header> | |||
<title>POI-HPBF - Java API To Access Microsoft Publisher Format Files</title> | |||
<subtitle>Overview</subtitle> | |||
<authors> | |||
<person name="Nick Burch" email="nick at apache dot org"/> | |||
</authors> | |||
</header> | |||
<body> | |||
<section> | |||
<title>Overview</title> | |||
<p>HPBF is the POI Project's pure Java implementation of the Visio file format.</p> | |||
<p>Currently, HPBF is in the experimental stage, while we try | |||
to figure out the file format. Our initial aim is to provide | |||
a text extractor for the format, with low level code following | |||
after that if demand and developer interest warrant it.</p> | |||
<p>At this time, there is no <em>usermodel</em> api or similar.</p> | |||
<p>Our current understanding of the file format is documented | |||
<link href="file-format.html">here</link>.</p> | |||
<note> | |||
This code currently lives the | |||
<link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link> | |||
of the POI SVN repository. | |||
Ensure that you have the scratchpad jar or the scratchpad | |||
build area in your | |||
classpath before experimenting with this code. | |||
</note> | |||
</section> | |||
</body> | |||
</document> |
@@ -141,26 +141,30 @@ | |||
href="./slideshow/index.html">the HSLF project page for more | |||
information</link>.</p> | |||
</section> | |||
<section><title>HDGF for Visio Documents</title> | |||
<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure | |||
Java. It currently only supports reading at a very low level, and | |||
simple text extraction. Please see <link | |||
href="./hdgf/index.html">the HDGF project page for more | |||
information</link>.</p> | |||
</section> | |||
<section><title>HPSF for Document Properties</title> | |||
<p>HPSF is our port of the OLE 2 property set format to pure | |||
Java. Property sets are mostly use to store a document's properties | |||
(title, author, date of last modification etc.), but they can be used | |||
for application-specific purposes as well.</p> | |||
<p>HPSF supports reading and writing of properties. However, you will | |||
need to be using version 3.0 of POI to utilise the write support.</p> | |||
<p>HPSF supports both reading and writing of properties.</p> | |||
<p>Please see <link href="./hpsf/index.html">the HPSF project | |||
page</link> for more information.</p> | |||
</section> | |||
<section><title>HDGF for Visio Documents</title> | |||
<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure | |||
Java. It currently only supports reading at a very low level, and | |||
simple text extraction. Please see <link | |||
href="./hdgf/index.html">the HDGF project page for more | |||
information</link>.</p> | |||
</section> | |||
<section><title>HPBF for Publisher Documents</title> | |||
<p>HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure | |||
Java. At the moment, we are still figuring out the file format, but we hope | |||
to have simple text extraction shortly. Please see <link | |||
href="./hpbf/index.html">the HPBF project page for more | |||
information</link>.</p> | |||
</section> | |||
</section> | |||
<section><title>Contributing </title> |
@@ -61,6 +61,7 @@ | |||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action> | |||
</release> | |||
<release version="3.1.1-alpha1" date="2008-??-??"> | |||
<action dev="POI-DEVELOPERS" type="fix">45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE</action> | |||
<action dev="POI-DEVELOPERS" type="add">45623 - Support for additional HSSF header and footer fields, including bold and full file path</action> | |||
<action dev="POI-DEVELOPERS" type="add">45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required</action> | |||
<action dev="POI-DEVELOPERS" type="add">45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)</action> |
@@ -106,7 +106,7 @@ public final class Sheet implements Model { | |||
protected ArrayList records = null; | |||
int preoffset = 0; // offset of the sheet in a new file | |||
protected int dimsloc = -1; // TODO - is it legal for dims record to be missing? | |||
protected int dimsloc = -1; // TODO - remove dimsloc | |||
protected PrintGridlinesRecord printGridlines = null; | |||
protected GridsetRecord gridset = null; | |||
private GutsRecord _gutsRecord; | |||
@@ -125,7 +125,8 @@ public final class Sheet implements Model { | |||
private MergedCellsTable _mergedCellsTable; | |||
/** always present in this POI object, not always written to Excel file */ | |||
/*package*/ColumnInfoRecordsAggregate _columnInfos; | |||
protected DimensionsRecord dims; | |||
/** the DimensionsRecord is always present */ | |||
private DimensionsRecord _dimensions; | |||
protected RowRecordsAggregate _rowsAggregate = null; | |||
private DataValidityTable _dataValidityTable= null; | |||
private ConditionalFormattingTable condFormatting; | |||
@@ -287,7 +288,7 @@ public final class Sheet implements Model { | |||
records.add(retval._columnInfos); | |||
} | |||
retval.dims = ( DimensionsRecord ) rec; | |||
retval._dimensions = ( DimensionsRecord ) rec; | |||
retval.dimsloc = records.size(); | |||
} | |||
else if (rec.getSid() == DefaultColWidthRecord.sid) | |||
@@ -333,7 +334,7 @@ public final class Sheet implements Model { | |||
records.add(rec); | |||
} | |||
if (retval.dimsloc < 0) { | |||
if (retval._dimensions == null) { | |||
throw new RuntimeException("DimensionsRecord was not found"); | |||
} | |||
retval.records = records; | |||
@@ -404,6 +405,8 @@ public final class Sheet implements Model { | |||
public static Sheet createSheet() | |||
{ | |||
// TODO - convert this method to a constructor | |||
if (log.check( POILogger.DEBUG )) | |||
log.log(POILogger.DEBUG, "Sheet createsheet from scratch called"); | |||
Sheet retval = new Sheet(); | |||
@@ -423,7 +426,8 @@ public final class Sheet implements Model { | |||
records.add( retval.printGridlines ); | |||
retval.gridset = createGridset(); | |||
records.add( retval.gridset ); | |||
records.add( retval.createGuts() ); | |||
retval._gutsRecord = createGuts(); | |||
records.add( retval._gutsRecord ); | |||
retval.defaultrowheight = createDefaultRowHeight(); | |||
records.add( retval.defaultrowheight ); | |||
records.add( retval.createWSBool() ); | |||
@@ -440,8 +444,8 @@ public final class Sheet implements Model { | |||
ColumnInfoRecordsAggregate columns = new ColumnInfoRecordsAggregate(); | |||
records.add( columns ); | |||
retval._columnInfos = columns; | |||
retval.dims = createDimensions(); | |||
records.add(retval.dims); | |||
retval._dimensions = createDimensions(); | |||
records.add(retval._dimensions); | |||
retval.dimsloc = records.size()-1; | |||
records.add(retval.windowTwo = retval.createWindowTwo()); | |||
retval.selection = createSelection(); | |||
@@ -460,7 +464,7 @@ public final class Sheet implements Model { | |||
if (_rowsAggregate == null) | |||
{ | |||
_rowsAggregate = new RowRecordsAggregate(); | |||
records.add(getDimsLoc() + 1, _rowsAggregate); | |||
records.add(dimsloc + 1, _rowsAggregate); | |||
} | |||
} | |||
private MergedCellsTable getMergedRecords() { | |||
@@ -556,10 +560,10 @@ public final class Sheet implements Model { | |||
.append(lastrow).append("lastcol").append(lastcol) | |||
.toString()); | |||
} | |||
dims.setFirstCol(firstcol); | |||
dims.setFirstRow(firstrow); | |||
dims.setLastCol(lastcol); | |||
dims.setLastRow(lastrow); | |||
_dimensions.setFirstCol(firstcol); | |||
_dimensions.setFirstRow(firstrow); | |||
_dimensions.setLastCol(lastcol); | |||
_dimensions.setLastRow(lastrow); | |||
if (log.check( POILogger.DEBUG )) | |||
log.log(POILogger.DEBUG, "Sheet.setDimensions exiting"); | |||
} | |||
@@ -696,7 +700,7 @@ public final class Sheet implements Model { | |||
if(log.check(POILogger.DEBUG)) { | |||
log.log(POILogger.DEBUG, "add value record row" + row); | |||
} | |||
DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc()); | |||
DimensionsRecord d = _dimensions; | |||
if (col.getColumn() > d.getLastCol()) | |||
{ | |||
@@ -720,8 +724,8 @@ public final class Sheet implements Model { | |||
*/ | |||
public void removeValueRecord(int row, CellValueRecordInterface col) { | |||
log.logFormatted(POILogger.DEBUG, "remove value record row,dimsloc %,%", | |||
new int[]{row, dimsloc} ); | |||
log.logFormatted(POILogger.DEBUG, "remove value record row %", | |||
new int[]{row } ); | |||
_rowsAggregate.removeCell(col); | |||
} | |||
@@ -766,7 +770,7 @@ public final class Sheet implements Model { | |||
checkRows(); | |||
if (log.check( POILogger.DEBUG )) | |||
log.log(POILogger.DEBUG, "addRow "); | |||
DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc()); | |||
DimensionsRecord d = _dimensions; | |||
if (row.getRowNumber() >= d.getLastRow()) | |||
{ | |||
@@ -1330,27 +1334,6 @@ public final class Sheet implements Model { | |||
} | |||
} | |||
/** | |||
* get the location of the DimensionsRecord (which is the last record before the value section) | |||
* @return location in the array of records of the DimensionsRecord | |||
*/ | |||
public int getDimsLoc() | |||
{ | |||
if (log.check( POILogger.DEBUG )) | |||
log.log(POILogger.DEBUG, "getDimsLoc dimsloc= " + dimsloc); | |||
return dimsloc; | |||
} | |||
/** | |||
* in the event the record is a dimensions record, resets both the loc index and dimsloc index | |||
*/ | |||
public void checkDimsLoc(Record rec, int recloc) { | |||
if (rec.getSid() == DimensionsRecord.sid) { | |||
dimsloc = recloc; | |||
} | |||
} | |||
/** | |||
* @return the serialized size of this sheet | |||
*/ |
@@ -1806,10 +1806,11 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet | |||
} | |||
if (width != -1) { | |||
width *= 256; | |||
if (width > Short.MAX_VALUE) { //width can be bigger that Short.MAX_VALUE! | |||
width = Short.MAX_VALUE; | |||
} | |||
sheet.setColumnWidth(column, (short) (width * 256)); | |||
sheet.setColumnWidth(column, (short) (width)); | |||
} | |||
} | |||
@@ -45,37 +45,54 @@ public class POIFSLister { | |||
System.exit(1); | |||
} | |||
for (int j = 0; j < args.length; j++) | |||
{ | |||
viewFile(args[ j ]); | |||
boolean withSizes = false; | |||
for (int j = 0; j < args.length; j++) { | |||
if(args[j].equalsIgnoreCase("-size") || | |||
args[j].equalsIgnoreCase("-sizes")) { | |||
withSizes = true; | |||
} else { | |||
viewFile(args[j], withSizes); | |||
} | |||
} | |||
} | |||
public static void viewFile(final String filename) throws IOException | |||
public static void viewFile(final String filename, boolean withSizes) throws IOException | |||
{ | |||
POIFSFileSystem fs = new POIFSFileSystem( | |||
new FileInputStream(filename) | |||
); | |||
displayDirectory(fs.getRoot(), ""); | |||
displayDirectory(fs.getRoot(), "", withSizes); | |||
} | |||
public static void displayDirectory(DirectoryNode dir, String indent) { | |||
public static void displayDirectory(DirectoryNode dir, String indent, boolean withSizes) { | |||
System.out.println(indent + dir.getName() + " -"); | |||
String newIndent = indent + " "; | |||
boolean hadChildren = false; | |||
for(Iterator it = dir.getEntries(); it.hasNext(); ) { | |||
hadChildren = true; | |||
Object entry = it.next(); | |||
if(entry instanceof DirectoryNode) { | |||
displayDirectory((DirectoryNode)entry, newIndent); | |||
displayDirectory((DirectoryNode)entry, newIndent, withSizes); | |||
} else { | |||
DocumentNode doc = (DocumentNode)entry; | |||
String name = doc.getName(); | |||
String size = ""; | |||
if(name.charAt(0) < 10) { | |||
String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1); | |||
name = name.substring(1) + " <" + altname + ">"; | |||
} | |||
System.out.println(newIndent + name); | |||
if(withSizes) { | |||
size = " [" + | |||
doc.getSize() + " / 0x" + | |||
Integer.toHexString(doc.getSize()) + | |||
"]"; | |||
} | |||
System.out.println(newIndent + name + size); | |||
} | |||
} | |||
if(!hadChildren) { | |||
System.out.println(newIndent + "(no children)"); | |||
} | |||
} | |||
} |
@@ -0,0 +1,353 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.hpbf.dev; | |||
import java.io.FileInputStream; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import org.apache.poi.ddf.DefaultEscherRecordFactory; | |||
import org.apache.poi.ddf.EscherRecord; | |||
import org.apache.poi.poifs.filesystem.DirectoryNode; | |||
import org.apache.poi.poifs.filesystem.DocumentEntry; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.util.StringUtil; | |||
/** | |||
* For dumping out the contents of HPBF (Publisher) | |||
* files, while we try to figure out how they're | |||
* constructed. | |||
*/ | |||
public class HPBFDumper { | |||
private POIFSFileSystem fs; | |||
public HPBFDumper(POIFSFileSystem fs) { | |||
this.fs = fs; | |||
} | |||
public HPBFDumper(InputStream inp) throws IOException { | |||
this(new POIFSFileSystem(inp)); | |||
} | |||
private static byte[] getData(DirectoryNode dir, String name) throws IOException { | |||
DocumentEntry docProps = | |||
(DocumentEntry)dir.getEntry(name); | |||
// Grab the document stream | |||
byte[] d = new byte[docProps.getSize()]; | |||
dir.createDocumentInputStream(name).read(d); | |||
// All done | |||
return d; | |||
} | |||
/** | |||
* Dumps out the given number of bytes as hex, | |||
* two chars | |||
*/ | |||
private String dumpBytes(byte[] data, int offset, int len) { | |||
StringBuffer ret = new StringBuffer(); | |||
for(int i=0; i<len; i++) { | |||
int j = i + offset; | |||
int b = data[j]; | |||
if(b < 0) { b += 256; } | |||
String bs = Integer.toHexString(b); | |||
if(bs.length() == 1) | |||
ret.append('0'); | |||
ret.append(bs); | |||
ret.append(' '); | |||
} | |||
return ret.toString(); | |||
} | |||
public static void main(String[] args) throws Exception { | |||
if(args.length < 1) { | |||
System.err.println("Use:"); | |||
System.err.println(" HPBFDumper <filename>"); | |||
System.exit(1); | |||
} | |||
HPBFDumper dump = new HPBFDumper( | |||
new FileInputStream(args[0]) | |||
); | |||
System.out.println("Dumping " + args[0]); | |||
dump.dumpContents(); | |||
dump.dumpEnvelope(); | |||
dump.dumpEscher(); | |||
dump.dump001CompObj(dump.fs.getRoot()); | |||
dump.dumpQuill(); | |||
// Still to go: | |||
// (0x03)Internal | |||
// Objects | |||
} | |||
/** | |||
* Dump out the escher parts of the file. | |||
* Escher -> EscherStm and EscherDelayStm | |||
*/ | |||
public void dumpEscher() throws IOException { | |||
DirectoryNode escherDir = (DirectoryNode) | |||
fs.getRoot().getEntry("Escher"); | |||
dumpEscherStm(escherDir); | |||
dumpEscherDelayStm(escherDir); | |||
} | |||
private void dumpEscherStream(byte[] data) { | |||
DefaultEscherRecordFactory erf = | |||
new DefaultEscherRecordFactory(); | |||
// Dump | |||
int left = data.length; | |||
while(left > 0) { | |||
EscherRecord er = erf.createRecord(data, 0); | |||
er.fillFields(data, 0, erf); | |||
left -= er.getRecordSize(); | |||
System.out.println(er.toString()); | |||
} | |||
} | |||
protected void dumpEscherStm(DirectoryNode escherDir) throws IOException { | |||
byte[] data = getData(escherDir, "EscherStm"); | |||
System.out.println(""); | |||
System.out.println("EscherStm - " + data.length + " bytes long:"); | |||
if(data.length > 0) | |||
dumpEscherStream(data); | |||
} | |||
protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException { | |||
byte[] data = getData(escherDir, "EscherDelayStm"); | |||
System.out.println(""); | |||
System.out.println("EscherDelayStm - " + data.length + " bytes long:"); | |||
if(data.length > 0) | |||
dumpEscherStream(data); | |||
} | |||
public void dumpEnvelope() throws IOException { | |||
byte[] data = getData(fs.getRoot(), "Envelope"); | |||
System.out.println(""); | |||
System.out.println("Envelope - " + data.length + " bytes long:"); | |||
} | |||
public void dumpContents() throws IOException { | |||
byte[] data = getData(fs.getRoot(), "Contents"); | |||
System.out.println(""); | |||
System.out.println("Contents - " + data.length + " bytes long:"); | |||
// 8 bytes, always seems to be | |||
// E8 AC 2C 00 E8 03 05 01 | |||
// E8 AC 2C 00 E8 03 05 01 | |||
// 4 bytes - size of contents | |||
// 13/15 00 00 01 | |||
// .... | |||
// E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... .......... | |||
// 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... .. | |||
// 01 18 30 00 03 20 00 00 | |||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 | |||
// 00 00 00 88 1E 00 00 00 | |||
// 01 18 31 00 03 20 00 00 | |||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 | |||
// 00 00 00 88 1E 00 00 00 | |||
// 01 18 32 00 03 20 00 00 | |||
// E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 | |||
// 00 00 00 88 1E 00 00 00 | |||
} | |||
public void dumpCONTENTSraw(DirectoryNode dir) throws IOException { | |||
byte[] data = getData(dir, "CONTENTS"); | |||
System.out.println(""); | |||
System.out.println("CONTENTS - " + data.length + " bytes long:"); | |||
// Between the start and 0x200 we have | |||
// CHNKINK(space) + 24 bytes | |||
// 0x1800 | |||
// TEXT + 6 bytes | |||
// TEXT + 8 bytes | |||
// 0x1800 | |||
// STSH + 6 bytes | |||
// STSH + 8 bytes | |||
// 0x1800 | |||
// STSH + 6 bytes | |||
// STSH + 8 bytes | |||
// but towards 0x200 the pattern may | |||
// break down a little bit | |||
// After the second of a given type, | |||
// it seems to be 4 bytes giving the start, | |||
// then 4 bytes giving the length, then | |||
// 18 00 | |||
System.out.println( | |||
new String(data, 0, 8) + | |||
dumpBytes(data, 8, 0x20-8) | |||
); | |||
int pos = 0x20; | |||
boolean sixNotEight = true; | |||
while(pos < 0x200) { | |||
if(sixNotEight) { | |||
System.out.println( | |||
dumpBytes(data, pos, 2) | |||
); | |||
pos += 2; | |||
} | |||
String text = new String(data, pos, 4); | |||
int blen = 8; | |||
if(sixNotEight) | |||
blen = 6; | |||
System.out.println( | |||
text + " " + dumpBytes(data, pos+4, blen) | |||
); | |||
pos += 4 + blen; | |||
sixNotEight = ! sixNotEight; | |||
} | |||
// Text from 0x200 onwards until we get | |||
// to \r(00)\n(00)(00)(00) | |||
int textStop = -1; | |||
for(int i=0x200; i<data.length-2 && textStop == -1; i++) { | |||
if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) { | |||
textStop = i; | |||
} | |||
} | |||
if(textStop > 0) { | |||
int len = (textStop - 0x200) / 2; | |||
System.out.println(""); | |||
System.out.println( | |||
StringUtil.getFromUnicodeLE(data, 0x200, len) | |||
); | |||
} | |||
// The font list comes slightly later | |||
// The hyperlinks may come before the fonts, | |||
// or slightly in front | |||
} | |||
public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException { | |||
byte[] data = getData(dir, "CONTENTS"); | |||
System.out.println(""); | |||
System.out.println("CONTENTS - " + data.length + " bytes long:"); | |||
String[] startType = new String[20]; | |||
String[] endType = new String[20]; | |||
int[] optA = new int[20]; | |||
int[] optB = new int[20]; | |||
int[] optC = new int[20]; | |||
int[] from = new int[20]; | |||
int[] len = new int[20]; | |||
for(int i=0; i<20; i++) { | |||
int offset = 0x20 + i*24; | |||
if(data[offset] == 0x18 && data[offset+1] == 0x00) { | |||
// Has data | |||
startType[i] = new String(data, offset+2, 4); | |||
optA[i] = LittleEndian.getUShort(data, offset+6); | |||
optB[i] = LittleEndian.getUShort(data, offset+8); | |||
optC[i] = LittleEndian.getUShort(data, offset+10); | |||
endType[i] = new String(data, offset+12, 4); | |||
from[i] = (int)LittleEndian.getUInt(data, offset+16); | |||
len[i] = (int)LittleEndian.getUInt(data, offset+20); | |||
} else { | |||
// Doesn't have data | |||
} | |||
} | |||
String text = StringUtil.getFromUnicodeLE( | |||
data, from[0], len[0]/2 | |||
); | |||
// Dump | |||
for(int i=0; i<20; i++) { | |||
String num = Integer.toString(i); | |||
if(i < 10) { | |||
num = "0" + i; | |||
} | |||
System.out.print(num + " "); | |||
if(startType[i] == null) { | |||
System.out.println("(not present)"); | |||
} else { | |||
System.out.println( | |||
"\t" + | |||
startType[i] + " " + | |||
optA[i] + " " + | |||
optB[i] + " " + | |||
optC[i] | |||
); | |||
System.out.println( | |||
"\t" + | |||
endType[i] + " " + | |||
"from: " + | |||
Integer.toHexString(from[i]) + | |||
" (" + from[i] + ")" + | |||
", len: " + | |||
Integer.toHexString(len[i]) + | |||
" (" + len[i] + ")" | |||
); | |||
} | |||
} | |||
// Text | |||
System.out.println(""); | |||
System.out.println("TEXT:"); | |||
System.out.println(text); | |||
System.out.println(""); | |||
// All the others | |||
for(int i=0; i<20; i++) { | |||
if(startType[i] == null) { | |||
continue; | |||
} | |||
int start = from[i]; | |||
System.out.println( | |||
startType[i] + " -> " + endType[i] + | |||
" @ " + Integer.toHexString(start) + | |||
" (" + start + ")" | |||
); | |||
System.out.println("\t" + dumpBytes(data, start, 4)); | |||
System.out.println("\t" + dumpBytes(data, start+4, 4)); | |||
System.out.println("\t" + dumpBytes(data, start+8, 4)); | |||
System.out.println("\t(etc)"); | |||
} | |||
} | |||
protected void dump001CompObj(DirectoryNode dir) { | |||
// TODO | |||
} | |||
public void dumpQuill() throws IOException { | |||
DirectoryNode quillDir = (DirectoryNode) | |||
fs.getRoot().getEntry("Quill"); | |||
DirectoryNode quillSubDir = (DirectoryNode) | |||
quillDir.getEntry("QuillSub"); | |||
dump001CompObj(quillSubDir); | |||
dumpCONTENTSraw(quillSubDir); | |||
dumpCONTENTSguessed(quillSubDir); | |||
} | |||
} |
@@ -0,0 +1,29 @@ | |||
This is some text on the first page | |||
It’s in times new roman, font size 10, all normal | |||
This is in bold and italic | |||
It’s Arial, 20 point font | |||
It’s in the second textbox on the first page | |||
This is the second page | |||
It is also times new roman, 10 point | |||
Table on page 2 Top right | |||
P2 table left P2 table right | |||
Bottom Left Bottom Right | |||
This text is on page two | |||
This is a link to Apache POI | |||
More normal text | |||
Link to a file | |||
More text, more hyperlinks | |||
email link | |||
Final hyperlink | |||
Within doc to page 1 |
@@ -0,0 +1,34 @@ | |||
This is some text on the first page | |||
It’s in times new roman, font size 10, all normal | |||
We’ve added some more text in here, to push all the offsets about a bit. | |||
This is in bold and italic | |||
It’s Arial, 20 point font | |||
It’s in the second textbox on the first page | |||
Ditto with more text in here. | |||
This is the second page | |||
It is also times new roman, 10 point | |||
Table on page 2 Top right | |||
P2 table left P2 table right | |||
Bottom Left Bottom Right | |||
This text is on page two | |||
This is a link to Apache POI | |||
More normal text | |||
Link to a file | |||
More text, more hyperlinks | |||
email link | |||
Final hyperlink | |||
Within doc to page 1 |
@@ -0,0 +1,29 @@ | |||
This is some text on the first page | |||
It’s in times new roman, font size 10, all normal | |||
This is in bold and italic | |||
It’s Arial, 20 point font | |||
It’s in the second textbox on the first page | |||
This is the second page12345678 | |||
It is also times new roman, 10 point | |||
Table on page 2 Top right | |||
P2 table left P2 table right | |||
Bottom Left Bottom Right | |||
This text is on page two | |||
This is a link to Apache POI | |||
More normal text | |||
Link to a file | |||
More text, more hyperlinks | |||
email link | |||
Final hyperlink | |||
Within doc to page 1 |
@@ -0,0 +1,29 @@ | |||
This is some text on the first page | |||
It’s in times new roman, font size 10, all normal | |||
This is in bold and italic | |||
It’s Arial, 20 point font | |||
It’s in the second textbox on the first page | |||
This is the second page | |||
It is also times new roman, 10 point | |||
Table on page 2 Top right | |||
P2 table left P2 table right | |||
Bottom Left Bottom Right | |||
This text is on page two | |||
This is a link to Apache POI | |||
More normal text | |||
Link to a file | |||
More text, more hyperlinks | |||
email link | |||
Final hyperlink | |||
Within doc to page 1 |
@@ -32,6 +32,7 @@ import org.apache.poi.hssf.record.CellValueRecordInterface; | |||
import org.apache.poi.hssf.record.ColumnInfoRecord; | |||
import org.apache.poi.hssf.record.DimensionsRecord; | |||
import org.apache.poi.hssf.record.EOFRecord; | |||
import org.apache.poi.hssf.record.GutsRecord; | |||
import org.apache.poi.hssf.record.IndexRecord; | |||
import org.apache.poi.hssf.record.MergeCellsRecord; | |||
import org.apache.poi.hssf.record.Record; | |||
@@ -41,6 +42,8 @@ import org.apache.poi.hssf.record.UncalcedRecord; | |||
import org.apache.poi.hssf.record.aggregates.ColumnInfoRecordsAggregate; | |||
import org.apache.poi.hssf.record.aggregates.PageSettingsBlock; | |||
import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate; | |||
import org.apache.poi.hssf.usermodel.HSSFSheet; | |||
import org.apache.poi.hssf.usermodel.HSSFWorkbook; | |||
import org.apache.poi.hssf.util.CellRangeAddress; | |||
/** | |||
@@ -438,8 +441,8 @@ public final class TestSheet extends TestCase { | |||
if (false) { | |||
// make sure that RRA and VRA are in the right place | |||
// (Aug 2008) since the VRA is now part of the RRA, there is much less chance that | |||
// they could get out of order. Still, one could write serialize the sheet here, | |||
// and read back with EventRecordFactory to make sure... | |||
// they could get out of order. Still, one could write serialize the sheet here, | |||
// and read back with EventRecordFactory to make sure... | |||
} | |||
assertEquals(242, dbCellRecordPos); | |||
} | |||
@@ -475,5 +478,29 @@ public final class TestSheet extends TestCase { | |||
return _indexRecord; | |||
} | |||
} | |||
/** | |||
* Checks for bug introduced around r682282-r683880 that caused a second GUTS records | |||
* which in turn got the dimensions record out of alignment | |||
*/ | |||
public void testGutsRecord_bug45640() { | |||
Sheet sheet = Sheet.createSheet(); | |||
sheet.addRow(new RowRecord(0)); | |||
sheet.addRow(new RowRecord(1)); | |||
sheet.groupRowRange( 0, 1, true ); | |||
sheet.toString(); | |||
List recs = sheet.getRecords(); | |||
int count=0; | |||
for(int i=0; i< recs.size(); i++) { | |||
if (recs.get(i) instanceof GutsRecord) { | |||
count++; | |||
} | |||
} | |||
if (count == 2) { | |||
throw new AssertionFailedError("Identified bug 45640"); | |||
} | |||
assertEquals(1, count); | |||
} | |||
} | |||