From: Nick Burch
Date: Wed, 20 Aug 2008 13:47:57 +0000 (+0000)
Subject: Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-63924...
X-Git-Tag: REL_3_5_BETA3~41
X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=48ef5fb2d3523381ad6ef2c135b7625860a5d9f3;p=poi.git
Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682532,682534-682619,682622-682777,682779-682998,683000-683019,683021-683022,683024-683080,683082-683092,683094-683095,683097-683127,683129-683131,683133-683166,683168-683698,683700-683705,683707-683757,683759-683787,683789-683870,683872-683879,683881-683900,683902-684066,684068-684074,684076-684222,684224-684254,684257-684281,684283-684286,684288-684292,684294-684298,684300-684301,684303-684308,684310-684317,684320,684323-684335,684337-684348,684350-684354,684356-684361,684363-684369,684371-684453,684455-684883,684885-684937,684940-684958,684960-684970,684972-684985,684987-685053,685055-685063,685065-685259,685261-685262,685264-685266,685268-685282,685285-686035,686037-686045,686047-686052,686054-687331 via svnmerge from
https://svn.apache.org/repos/asf/poi/trunk
........
r686207 | nick | 2008-08-15 13:43:02 +0100 (Fri, 15 Aug 2008) | 1 line
Add sample publisher files from bug #45602 to svn
........
r686216 | nick | 2008-08-15 15:05:30 +0100 (Fri, 15 Aug 2008) | 1 line
Add a few more source package excludes
........
r686278 | nick | 2008-08-15 17:57:30 +0100 (Fri, 15 Aug 2008) | 1 line
More sample hpbf docs, with a description
........
r686290 | nick | 2008-08-15 18:42:25 +0100 (Fri, 15 Aug 2008) | 1 line
Start on a HPBF dumper
........
r686621 | nick | 2008-08-17 17:36:40 +0100 (Sun, 17 Aug 2008) | 1 line
Few little tweaks to dev helpers
........
r686624 | nick | 2008-08-17 18:39:10 +0100 (Sun, 17 Aug 2008) | 1 line
More work understanding hpbf
........
r686625 | nick | 2008-08-17 19:02:31 +0100 (Sun, 17 Aug 2008) | 1 line
More work understanding hpbf
........
r686628 | nick | 2008-08-17 19:21:34 +0100 (Sun, 17 Aug 2008) | 1 line
More work understanding hpbf
........
r686640 | nick | 2008-08-17 21:15:51 +0100 (Sun, 17 Aug 2008) | 1 line
Further HPBF documentation, and some more sample files used
........
r686844 | yegor | 2008-08-18 19:33:58 +0100 (Mon, 18 Aug 2008) | 1 line
fixed bug #45645: Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE
........
r686977 | josh | 2008-08-19 08:44:57 +0100 (Tue, 19 Aug 2008) | 1 line
Fix for bug 45640 - avoid creating multiple GUTS records
........
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@687333 13f79535-47bb-0310-9956-ffa450edef68
---
diff --git a/build.xml b/build.xml
index cbbfde883d..3eed344cb2 100644
--- a/build.xml
+++ b/build.xml
@@ -649,6 +649,7 @@ under the License.
+
@@ -707,6 +708,7 @@ under the License.
+
@@ -742,6 +744,7 @@ under the License.
+
@@ -1284,10 +1287,13 @@ FORREST_HOME environment variable!
+
+
+
@@ -1313,10 +1319,13 @@ FORREST_HOME environment variable!
+
+
+
diff --git a/src/documentation/content/xdocs/book.xml b/src/documentation/content/xdocs/book.xml
index a55b67bad0..d9bc0e91df 100644
--- a/src/documentation/content/xdocs/book.xml
+++ b/src/documentation/content/xdocs/book.xml
@@ -41,6 +41,7 @@
+
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index 269a13fd4f..62fee5b1c2 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -64,6 +64,7 @@
Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx
+ 45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE45623 - Support for additional HSSF header and footer fields, including bold and full file path45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)
diff --git a/src/documentation/content/xdocs/hpbf/file-format.xml b/src/documentation/content/xdocs/hpbf/file-format.xml
new file mode 100644
index 0000000000..97d5a33d7c
--- /dev/null
+++ b/src/documentation/content/xdocs/hpbf/file-format.xml
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+ POI-HPBF - A Guide to the Publisher File Format
+ Overview
+
+
+
+
+
+
+ Document Streams
+
+ The file is made up of a number of POIFS streams. A typical
+ file will be made up as follows:
+
+
+
+ Changing Text
+
If you make a change to the text of a file, but not change
+ how much text there is, then the CONTENTS stream
+ will undergo a small change, and the Contents stream
+ will undergo a large change.
+
If you make a change to the text of a file, and change the
+ amount of text there is, then both the Contents and
+ the CONTENTS streams change.
+
+ Changing Shapes
+
If you alter the size of a textbox, but make no text changes,
+ then both Contents and CONTENTS streams
+ change. There are no changes to the Escher streams.
+
If you set the background colour of a textbox, but make
+ no changes to the text, (to finish off)
+
+ Structure of CONTENTS
+
First we have "CHNKINK ", followed by 24 bytes.
+
Next we have 20 sequences of 24 bytes each. If the first two bytes
+ at 0x1800, then that sequence entry exists, but if it's 0x0000 then
+ the entry doesn't exist. If it does exist, we then have 4 bytes of
+ upper case ASCII text, followed by three little endian shorts.
+ The first of these seems to be the count of that type, the second is
+ usually 1, the third is usually zero. The we have another 4 bytes of
+ upper case ASCII text, normally but not always the same as the first
+ text. Finally, we have an unsigned little endian 32 bit offset to
+ the start of the data for this, then an unsigned little endian
+ 32 bit offset of the length of this section.
+
Normally, the first sequence entry is for TEXT, and the text data
+ will start at 0x200. After that is normally two or three STSH entries
+ (so the first short has values 0, then 1, then 2). After that it
+ seems to vary.
+
At 0x200 we have the text, stored as little endian 16 bit unicode.
+
After the text comes all sorts of other stuff, presumably as
+ described by the sequences.
+
For a contents stream of length 7168 / 0x1c00 bytes, the start
+ looks something like:
+
+
+
+
diff --git a/src/documentation/content/xdocs/hpbf/index.xml b/src/documentation/content/xdocs/hpbf/index.xml
new file mode 100755
index 0000000000..c74dc23621
--- /dev/null
+++ b/src/documentation/content/xdocs/hpbf/index.xml
@@ -0,0 +1,53 @@
+
+
+
+
+
+
+ POI-HPBF - Java API To Access Microsoft Publisher Format Files
+ Overview
+
+
+
+
+
+
+
+ Overview
+
+
HPBF is the POI Project's pure Java implementation of the Visio file format.
+
Currently, HPBF is in the experimental stage, while we try
+ to figure out the file format. Our initial aim is to provide
+ a text extractor for the format, with low level code following
+ after that if demand and developer interest warrant it.
+
At this time, there is no usermodel api or similar.
+
Our current understanding of the file format is documented
+ here.
+
+ This code currently lives the
+ scratchpad area
+ of the POI SVN repository.
+ Ensure that you have the scratchpad jar or the scratchpad
+ build area in your
+ classpath before experimenting with this code.
+
+
+
+
diff --git a/src/documentation/content/xdocs/index.xml b/src/documentation/content/xdocs/index.xml
index d112140c71..f8336be0b5 100644
--- a/src/documentation/content/xdocs/index.xml
+++ b/src/documentation/content/xdocs/index.xml
@@ -141,26 +141,30 @@
href="./slideshow/index.html">the HSLF project page for more
information.
- HDGF for Visio Documents
-
HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
- Java. It currently only supports reading at a very low level, and
- simple text extraction. Please see the HDGF project page for more
- information.
- HPSF for Document Properties
HPSF is our port of the OLE 2 property set format to pure
Java. Property sets are mostly use to store a document's properties
(title, author, date of last modification etc.), but they can be used
for application-specific purposes as well.
-
HPSF supports reading and writing of properties. However, you will
- need to be using version 3.0 of POI to utilise the write support.
-
+
HPSF supports both reading and writing of properties.
Please see the HPSF project
page for more information.
-
+ HDGF for Visio Documents
+
HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
+ Java. It currently only supports reading at a very low level, and
+ simple text extraction. Please see the HDGF project page for more
+ information.
+
+ HPBF for Publisher Documents
+
HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure
+ Java. At the moment, we are still figuring out the file format, but we hope
+ to have simple text extraction shortly. Please see the HPBF project page for more
+ information.
+ Contributing
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index b59455ebf1..96272b165c 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -61,6 +61,7 @@
Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx
+ 45645 - Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE45623 - Support for additional HSSF header and footer fields, including bold and full file path45623 - Support stripping HSSF header and footer fields (eg page number) out of header and footer text if required45622 - Support stripping HWPF fields (eg macros) out of text, via Range.stripFields(text)
diff --git a/src/java/org/apache/poi/hssf/model/Sheet.java b/src/java/org/apache/poi/hssf/model/Sheet.java
index 710d57118d..336b1dbd46 100644
--- a/src/java/org/apache/poi/hssf/model/Sheet.java
+++ b/src/java/org/apache/poi/hssf/model/Sheet.java
@@ -106,7 +106,7 @@ public final class Sheet implements Model {
protected ArrayList records = null;
int preoffset = 0; // offset of the sheet in a new file
- protected int dimsloc = -1; // TODO - is it legal for dims record to be missing?
+ protected int dimsloc = -1; // TODO - remove dimsloc
protected PrintGridlinesRecord printGridlines = null;
protected GridsetRecord gridset = null;
private GutsRecord _gutsRecord;
@@ -125,7 +125,8 @@ public final class Sheet implements Model {
private MergedCellsTable _mergedCellsTable;
/** always present in this POI object, not always written to Excel file */
/*package*/ColumnInfoRecordsAggregate _columnInfos;
- protected DimensionsRecord dims;
+ /** the DimensionsRecord is always present */
+ private DimensionsRecord _dimensions;
protected RowRecordsAggregate _rowsAggregate = null;
private DataValidityTable _dataValidityTable= null;
private ConditionalFormattingTable condFormatting;
@@ -287,7 +288,7 @@ public final class Sheet implements Model {
records.add(retval._columnInfos);
}
- retval.dims = ( DimensionsRecord ) rec;
+ retval._dimensions = ( DimensionsRecord ) rec;
retval.dimsloc = records.size();
}
else if (rec.getSid() == DefaultColWidthRecord.sid)
@@ -333,7 +334,7 @@ public final class Sheet implements Model {
records.add(rec);
}
- if (retval.dimsloc < 0) {
+ if (retval._dimensions == null) {
throw new RuntimeException("DimensionsRecord was not found");
}
retval.records = records;
@@ -404,6 +405,8 @@ public final class Sheet implements Model {
public static Sheet createSheet()
{
+ // TODO - convert this method to a constructor
+
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "Sheet createsheet from scratch called");
Sheet retval = new Sheet();
@@ -423,7 +426,8 @@ public final class Sheet implements Model {
records.add( retval.printGridlines );
retval.gridset = createGridset();
records.add( retval.gridset );
- records.add( retval.createGuts() );
+ retval._gutsRecord = createGuts();
+ records.add( retval._gutsRecord );
retval.defaultrowheight = createDefaultRowHeight();
records.add( retval.defaultrowheight );
records.add( retval.createWSBool() );
@@ -440,8 +444,8 @@ public final class Sheet implements Model {
ColumnInfoRecordsAggregate columns = new ColumnInfoRecordsAggregate();
records.add( columns );
retval._columnInfos = columns;
- retval.dims = createDimensions();
- records.add(retval.dims);
+ retval._dimensions = createDimensions();
+ records.add(retval._dimensions);
retval.dimsloc = records.size()-1;
records.add(retval.windowTwo = retval.createWindowTwo());
retval.selection = createSelection();
@@ -460,7 +464,7 @@ public final class Sheet implements Model {
if (_rowsAggregate == null)
{
_rowsAggregate = new RowRecordsAggregate();
- records.add(getDimsLoc() + 1, _rowsAggregate);
+ records.add(dimsloc + 1, _rowsAggregate);
}
}
private MergedCellsTable getMergedRecords() {
@@ -556,10 +560,10 @@ public final class Sheet implements Model {
.append(lastrow).append("lastcol").append(lastcol)
.toString());
}
- dims.setFirstCol(firstcol);
- dims.setFirstRow(firstrow);
- dims.setLastCol(lastcol);
- dims.setLastRow(lastrow);
+ _dimensions.setFirstCol(firstcol);
+ _dimensions.setFirstRow(firstrow);
+ _dimensions.setLastCol(lastcol);
+ _dimensions.setLastRow(lastrow);
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "Sheet.setDimensions exiting");
}
@@ -696,7 +700,7 @@ public final class Sheet implements Model {
if(log.check(POILogger.DEBUG)) {
log.log(POILogger.DEBUG, "add value record row" + row);
}
- DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc());
+ DimensionsRecord d = _dimensions;
if (col.getColumn() > d.getLastCol())
{
@@ -720,8 +724,8 @@ public final class Sheet implements Model {
*/
public void removeValueRecord(int row, CellValueRecordInterface col) {
- log.logFormatted(POILogger.DEBUG, "remove value record row,dimsloc %,%",
- new int[]{row, dimsloc} );
+ log.logFormatted(POILogger.DEBUG, "remove value record row %",
+ new int[]{row } );
_rowsAggregate.removeCell(col);
}
@@ -766,7 +770,7 @@ public final class Sheet implements Model {
checkRows();
if (log.check( POILogger.DEBUG ))
log.log(POILogger.DEBUG, "addRow ");
- DimensionsRecord d = ( DimensionsRecord ) records.get(getDimsLoc());
+ DimensionsRecord d = _dimensions;
if (row.getRowNumber() >= d.getLastRow())
{
@@ -1330,27 +1334,6 @@ public final class Sheet implements Model {
}
}
- /**
- * get the location of the DimensionsRecord (which is the last record before the value section)
- * @return location in the array of records of the DimensionsRecord
- */
-
- public int getDimsLoc()
- {
- if (log.check( POILogger.DEBUG ))
- log.log(POILogger.DEBUG, "getDimsLoc dimsloc= " + dimsloc);
- return dimsloc;
- }
-
- /**
- * in the event the record is a dimensions record, resets both the loc index and dimsloc index
- */
- public void checkDimsLoc(Record rec, int recloc) {
- if (rec.getSid() == DimensionsRecord.sid) {
- dimsloc = recloc;
- }
- }
-
/**
* @return the serialized size of this sheet
*/
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java
index e61d7a1cc8..3e51fe8317 100644
--- a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java
@@ -1806,10 +1806,11 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet
}
if (width != -1) {
+ width *= 256;
if (width > Short.MAX_VALUE) { //width can be bigger that Short.MAX_VALUE!
width = Short.MAX_VALUE;
}
- sheet.setColumnWidth(column, (short) (width * 256));
+ sheet.setColumnWidth(column, (short) (width));
}
}
diff --git a/src/java/org/apache/poi/poifs/dev/POIFSLister.java b/src/java/org/apache/poi/poifs/dev/POIFSLister.java
index c9fa349d66..cdd9902c40 100644
--- a/src/java/org/apache/poi/poifs/dev/POIFSLister.java
+++ b/src/java/org/apache/poi/poifs/dev/POIFSLister.java
@@ -45,37 +45,54 @@ public class POIFSLister {
System.exit(1);
}
- for (int j = 0; j < args.length; j++)
- {
- viewFile(args[ j ]);
+ boolean withSizes = false;
+ for (int j = 0; j < args.length; j++) {
+ if(args[j].equalsIgnoreCase("-size") ||
+ args[j].equalsIgnoreCase("-sizes")) {
+ withSizes = true;
+ } else {
+ viewFile(args[j], withSizes);
+ }
}
}
- public static void viewFile(final String filename) throws IOException
+ public static void viewFile(final String filename, boolean withSizes) throws IOException
{
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename)
);
- displayDirectory(fs.getRoot(), "");
+ displayDirectory(fs.getRoot(), "", withSizes);
}
- public static void displayDirectory(DirectoryNode dir, String indent) {
+ public static void displayDirectory(DirectoryNode dir, String indent, boolean withSizes) {
System.out.println(indent + dir.getName() + " -");
String newIndent = indent + " ";
+ boolean hadChildren = false;
for(Iterator it = dir.getEntries(); it.hasNext(); ) {
+ hadChildren = true;
Object entry = it.next();
if(entry instanceof DirectoryNode) {
- displayDirectory((DirectoryNode)entry, newIndent);
+ displayDirectory((DirectoryNode)entry, newIndent, withSizes);
} else {
DocumentNode doc = (DocumentNode)entry;
String name = doc.getName();
+ String size = "";
if(name.charAt(0) < 10) {
String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1);
name = name.substring(1) + " <" + altname + ">";
}
- System.out.println(newIndent + name);
+ if(withSizes) {
+ size = " [" +
+ doc.getSize() + " / 0x" +
+ Integer.toHexString(doc.getSize()) +
+ "]";
+ }
+ System.out.println(newIndent + name + size);
}
}
+ if(!hadChildren) {
+ System.out.println(newIndent + "(no children)");
+ }
}
}
\ No newline at end of file
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
new file mode 100644
index 0000000000..6c52bbb04c
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
@@ -0,0 +1,353 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.dev;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.ddf.DefaultEscherRecordFactory;
+import org.apache.poi.ddf.EscherRecord;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
+
+/**
+ * For dumping out the contents of HPBF (Publisher)
+ * files, while we try to figure out how they're
+ * constructed.
+ */
+public class HPBFDumper {
+ private POIFSFileSystem fs;
+ public HPBFDumper(POIFSFileSystem fs) {
+ this.fs = fs;
+ }
+ public HPBFDumper(InputStream inp) throws IOException {
+ this(new POIFSFileSystem(inp));
+ }
+
+ private static byte[] getData(DirectoryNode dir, String name) throws IOException {
+ DocumentEntry docProps =
+ (DocumentEntry)dir.getEntry(name);
+
+ // Grab the document stream
+ byte[] d = new byte[docProps.getSize()];
+ dir.createDocumentInputStream(name).read(d);
+
+ // All done
+ return d;
+ }
+
+ /**
+ * Dumps out the given number of bytes as hex,
+ * two chars
+ */
+ private String dumpBytes(byte[] data, int offset, int len) {
+ StringBuffer ret = new StringBuffer();
+ for(int i=0; i");
+ System.exit(1);
+ }
+ HPBFDumper dump = new HPBFDumper(
+ new FileInputStream(args[0])
+ );
+
+ System.out.println("Dumping " + args[0]);
+ dump.dumpContents();
+ dump.dumpEnvelope();
+ dump.dumpEscher();
+ dump.dump001CompObj(dump.fs.getRoot());
+ dump.dumpQuill();
+
+ // Still to go:
+ // (0x03)Internal
+ // Objects
+ }
+
+ /**
+ * Dump out the escher parts of the file.
+ * Escher -> EscherStm and EscherDelayStm
+ */
+ public void dumpEscher() throws IOException {
+ DirectoryNode escherDir = (DirectoryNode)
+ fs.getRoot().getEntry("Escher");
+
+ dumpEscherStm(escherDir);
+ dumpEscherDelayStm(escherDir);
+ }
+ private void dumpEscherStream(byte[] data) {
+ DefaultEscherRecordFactory erf =
+ new DefaultEscherRecordFactory();
+
+ // Dump
+ int left = data.length;
+ while(left > 0) {
+ EscherRecord er = erf.createRecord(data, 0);
+ er.fillFields(data, 0, erf);
+ left -= er.getRecordSize();
+
+ System.out.println(er.toString());
+ }
+ }
+ protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
+ byte[] data = getData(escherDir, "EscherStm");
+ System.out.println("");
+ System.out.println("EscherStm - " + data.length + " bytes long:");
+ if(data.length > 0)
+ dumpEscherStream(data);
+ }
+ protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
+ byte[] data = getData(escherDir, "EscherDelayStm");
+ System.out.println("");
+ System.out.println("EscherDelayStm - " + data.length + " bytes long:");
+ if(data.length > 0)
+ dumpEscherStream(data);
+ }
+
+ public void dumpEnvelope() throws IOException {
+ byte[] data = getData(fs.getRoot(), "Envelope");
+
+ System.out.println("");
+ System.out.println("Envelope - " + data.length + " bytes long:");
+ }
+
+ public void dumpContents() throws IOException {
+ byte[] data = getData(fs.getRoot(), "Contents");
+
+ System.out.println("");
+ System.out.println("Contents - " + data.length + " bytes long:");
+
+ // 8 bytes, always seems to be
+ // E8 AC 2C 00 E8 03 05 01
+ // E8 AC 2C 00 E8 03 05 01
+
+ // 4 bytes - size of contents
+ // 13/15 00 00 01
+
+ // ....
+
+ // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
+
+ // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
+
+ // 01 18 30 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+
+ // 01 18 31 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+
+ // 01 18 32 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+ }
+
+ public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
+ byte[] data = getData(dir, "CONTENTS");
+
+ System.out.println("");
+ System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+ // Between the start and 0x200 we have
+ // CHNKINK(space) + 24 bytes
+ // 0x1800
+ // TEXT + 6 bytes
+ // TEXT + 8 bytes
+ // 0x1800
+ // STSH + 6 bytes
+ // STSH + 8 bytes
+ // 0x1800
+ // STSH + 6 bytes
+ // STSH + 8 bytes
+ // but towards 0x200 the pattern may
+ // break down a little bit
+
+ // After the second of a given type,
+ // it seems to be 4 bytes giving the start,
+ // then 4 bytes giving the length, then
+ // 18 00
+ System.out.println(
+ new String(data, 0, 8) +
+ dumpBytes(data, 8, 0x20-8)
+ );
+
+ int pos = 0x20;
+ boolean sixNotEight = true;
+ while(pos < 0x200) {
+ if(sixNotEight) {
+ System.out.println(
+ dumpBytes(data, pos, 2)
+ );
+ pos += 2;
+ }
+ String text = new String(data, pos, 4);
+ int blen = 8;
+ if(sixNotEight)
+ blen = 6;
+ System.out.println(
+ text + " " + dumpBytes(data, pos+4, blen)
+ );
+
+ pos += 4 + blen;
+ sixNotEight = ! sixNotEight;
+ }
+
+ // Text from 0x200 onwards until we get
+ // to \r(00)\n(00)(00)(00)
+ int textStop = -1;
+ for(int i=0x200; i 0) {
+ int len = (textStop - 0x200) / 2;
+ System.out.println("");
+ System.out.println(
+ StringUtil.getFromUnicodeLE(data, 0x200, len)
+ );
+ }
+
+ // The font list comes slightly later
+
+ // The hyperlinks may come before the fonts,
+ // or slightly in front
+ }
+ public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+ byte[] data = getData(dir, "CONTENTS");
+
+ System.out.println("");
+ System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+ String[] startType = new String[20];
+ String[] endType = new String[20];
+ int[] optA = new int[20];
+ int[] optB = new int[20];
+ int[] optC = new int[20];
+ int[] from = new int[20];
+ int[] len = new int[20];
+
+ for(int i=0; i<20; i++) {
+ int offset = 0x20 + i*24;
+ if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+ // Has data
+ startType[i] = new String(data, offset+2, 4);
+ optA[i] = LittleEndian.getUShort(data, offset+6);
+ optB[i] = LittleEndian.getUShort(data, offset+8);
+ optC[i] = LittleEndian.getUShort(data, offset+10);
+ endType[i] = new String(data, offset+12, 4);
+ from[i] = (int)LittleEndian.getUInt(data, offset+16);
+ len[i] = (int)LittleEndian.getUInt(data, offset+20);
+ } else {
+ // Doesn't have data
+ }
+ }
+
+ String text = StringUtil.getFromUnicodeLE(
+ data, from[0], len[0]/2
+ );
+
+ // Dump
+ for(int i=0; i<20; i++) {
+ String num = Integer.toString(i);
+ if(i < 10) {
+ num = "0" + i;
+ }
+ System.out.print(num + " ");
+
+ if(startType[i] == null) {
+ System.out.println("(not present)");
+ } else {
+ System.out.println(
+ "\t" +
+ startType[i] + " " +
+ optA[i] + " " +
+ optB[i] + " " +
+ optC[i]
+ );
+ System.out.println(
+ "\t" +
+ endType[i] + " " +
+ "from: " +
+ Integer.toHexString(from[i]) +
+ " (" + from[i] + ")" +
+ ", len: " +
+ Integer.toHexString(len[i]) +
+ " (" + len[i] + ")"
+ );
+ }
+ }
+
+ // Text
+ System.out.println("");
+ System.out.println("TEXT:");
+ System.out.println(text);
+ System.out.println("");
+
+ // All the others
+ for(int i=0; i<20; i++) {
+ if(startType[i] == null) {
+ continue;
+ }
+ int start = from[i];
+
+ System.out.println(
+ startType[i] + " -> " + endType[i] +
+ " @ " + Integer.toHexString(start) +
+ " (" + start + ")"
+ );
+ System.out.println("\t" + dumpBytes(data, start, 4));
+ System.out.println("\t" + dumpBytes(data, start+4, 4));
+ System.out.println("\t" + dumpBytes(data, start+8, 4));
+ System.out.println("\t(etc)");
+ }
+ }
+
+ protected void dump001CompObj(DirectoryNode dir) {
+ // TODO
+ }
+
+ public void dumpQuill() throws IOException {
+ DirectoryNode quillDir = (DirectoryNode)
+ fs.getRoot().getEntry("Quill");
+ DirectoryNode quillSubDir = (DirectoryNode)
+ quillDir.getEntry("QuillSub");
+
+ dump001CompObj(quillSubDir);
+ dumpCONTENTSraw(quillSubDir);
+ dumpCONTENTSguessed(quillSubDir);
+ }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub
new file mode 100755
index 0000000000..b870168319
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt
new file mode 100644
index 0000000000..279395e5de
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt
@@ -0,0 +1,29 @@
+This is some text on the first page
+Itâs in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+Itâs Arial, 20 point font
+Itâs in the second textbox on the first page
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub
new file mode 100755
index 0000000000..610362c471
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt
new file mode 100644
index 0000000000..f8a68bb649
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt
@@ -0,0 +1,34 @@
+This is some text on the first page
+Itâs in times new roman, font size 10, all normal
+
+Weâve added some more text in here, to push all the offsets about a bit.
+
+
+
+This is in bold and italic
+Itâs Arial, 20 point font
+Itâs in the second textbox on the first page
+
+Ditto with more text in here.
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub
new file mode 100755
index 0000000000..2c6174e5e8
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub
new file mode 100755
index 0000000000..4f19bec93d
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt
new file mode 100644
index 0000000000..c2d791b9af
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt
@@ -0,0 +1,29 @@
+This is some text on the first page
+Itâs in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+Itâs Arial, 20 point font
+Itâs in the second textbox on the first page
+
+
+This is the second page12345678
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub
new file mode 100755
index 0000000000..445df85f09
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt
new file mode 100644
index 0000000000..279395e5de
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt
@@ -0,0 +1,29 @@
+This is some text on the first page
+Itâs in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+Itâs Arial, 20 point font
+Itâs in the second textbox on the first page
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2 Top right
+P2 table left P2 table right
+Bottom Left Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub
new file mode 100755
index 0000000000..8adffda77a
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub
new file mode 100644
index 0000000000..00deec14dc
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub
new file mode 100644
index 0000000000..94900925af
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub differ
diff --git a/src/testcases/org/apache/poi/hssf/model/TestSheet.java b/src/testcases/org/apache/poi/hssf/model/TestSheet.java
index 0b245c0a78..6c6dd9fb25 100644
--- a/src/testcases/org/apache/poi/hssf/model/TestSheet.java
+++ b/src/testcases/org/apache/poi/hssf/model/TestSheet.java
@@ -32,6 +32,7 @@ import org.apache.poi.hssf.record.CellValueRecordInterface;
import org.apache.poi.hssf.record.ColumnInfoRecord;
import org.apache.poi.hssf.record.DimensionsRecord;
import org.apache.poi.hssf.record.EOFRecord;
+import org.apache.poi.hssf.record.GutsRecord;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.Record;
@@ -41,6 +42,8 @@ import org.apache.poi.hssf.record.UncalcedRecord;
import org.apache.poi.hssf.record.aggregates.ColumnInfoRecordsAggregate;
import org.apache.poi.hssf.record.aggregates.PageSettingsBlock;
import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.CellRangeAddress;
/**
@@ -438,8 +441,8 @@ public final class TestSheet extends TestCase {
if (false) {
// make sure that RRA and VRA are in the right place
// (Aug 2008) since the VRA is now part of the RRA, there is much less chance that
- // they could get out of order. Still, one could write serialize the sheet here,
- // and read back with EventRecordFactory to make sure...
+ // they could get out of order. Still, one could write serialize the sheet here,
+ // and read back with EventRecordFactory to make sure...
}
assertEquals(242, dbCellRecordPos);
}
@@ -475,5 +478,29 @@ public final class TestSheet extends TestCase {
return _indexRecord;
}
}
+
+ /**
+ * Checks for bug introduced around r682282-r683880 that caused a second GUTS records
+ * which in turn got the dimensions record out of alignment
+ */
+ public void testGutsRecord_bug45640() {
+
+ Sheet sheet = Sheet.createSheet();
+ sheet.addRow(new RowRecord(0));
+ sheet.addRow(new RowRecord(1));
+ sheet.groupRowRange( 0, 1, true );
+ sheet.toString();
+ List recs = sheet.getRecords();
+ int count=0;
+ for(int i=0; i< recs.size(); i++) {
+ if (recs.get(i) instanceof GutsRecord) {
+ count++;
+ }
+ }
+ if (count == 2) {
+ throw new AssertionFailedError("Identified bug 45640");
+ }
+ assertEquals(1, count);
+ }
}