From: Nick Burch Date: Sun, 17 Aug 2008 20:15:51 +0000 (+0000) Subject: Further HPBF documentation, and some more sample files used X-Git-Tag: REL_3_2_FINAL~147 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=2118d52949c2e571c00dab9263740d635b83908b;p=poi.git Further HPBF documentation, and some more sample files used git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686640 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/book.xml b/src/documentation/content/xdocs/book.xml index ab1af452e8..39424524f9 100644 --- a/src/documentation/content/xdocs/book.xml +++ b/src/documentation/content/xdocs/book.xml @@ -41,6 +41,7 @@ + diff --git a/src/documentation/content/xdocs/hpbf/file-format.xml b/src/documentation/content/xdocs/hpbf/file-format.xml index 591204951e..97d5a33d7c 100644 --- a/src/documentation/content/xdocs/hpbf/file-format.xml +++ b/src/documentation/content/xdocs/hpbf/file-format.xml @@ -38,19 +38,19 @@ Root Entry - Objects - (no children) - SummaryInformation <(0x05)SummaryInformation> - DocumentSummaryInformation <(0x05)DocumentSummaryInformation> + SummaryInformation <(0x05)SummaryInformation> + DocumentSummaryInformation <(0x05)DocumentSummaryInformation> Escher - EscherStm EscherDelayStm Quill - QuillSub - CONTENTS - CompObj <(0x01)CompObj> + CompObj <(0x01)CompObj> Envelope Contents - Internal <(0x03)Internal> - CompObj <(0x01)CompObj> + Internal <(0x03)Internal> + CompObj <(0x01)CompObj> VBA - (no children) @@ -69,7 +69,7 @@ Root Entry - then both Contents and CONTENTS streams change. There are no changes to the Escher streams.

If you set the background colour of a textbox, but make - no changes to the text, + no changes to the text, (to finish off)

Structure of CONTENTS

First we have "CHNKINK ", followed by 24 bytes.

@@ -162,6 +162,8 @@ PL 62 1a 00 00 48 00 00 00 // PL from: 1a62 (6754), len: 48 (72) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + +(the text will then start)
diff --git a/src/documentation/content/xdocs/hpbf/index.xml b/src/documentation/content/xdocs/hpbf/index.xml index 2601a4174d..c74dc23621 100755 --- a/src/documentation/content/xdocs/hpbf/index.xml +++ b/src/documentation/content/xdocs/hpbf/index.xml @@ -39,7 +39,7 @@ after that if demand and developer interest warrant it.

At this time, there is no usermodel api or similar.

Our current understanding of the file format is documented - here.

+ here.

This code currently lives the scratchpad area diff --git a/src/documentation/content/xdocs/index.xml b/src/documentation/content/xdocs/index.xml index 3cf305954d..17e4336d02 100644 --- a/src/documentation/content/xdocs/index.xml +++ b/src/documentation/content/xdocs/index.xml @@ -146,26 +146,30 @@ href="./hslf/index.html">the HSLF project page for more information.

-
HDGF for Visio Documents -

HDGF is our port of the Microsoft Viso 97(-2003) file format to pure - Java. It currently only supports reading at a very low level, and - simple text extraction. Please see the HDGF project page for more - information.

-
HPSF for Document Properties

HPSF is our port of the OLE 2 property set format to pure Java. Property sets are mostly use to store a document's properties (title, author, date of last modification etc.), but they can be used for application-specific purposes as well.

-

HPSF supports reading and writing of properties. However, you will - need to be using version 3.0 of POI to utilise the write support.

- +

HPSF supports both reading and writing of properties.

Please see the HPSF project page for more information.

- +
HDGF for Visio Documents +

HDGF is our port of the Microsoft Viso 97(-2003) file format to pure + Java. It currently only supports reading at a very low level, and + simple text extraction. Please see the HDGF project page for more + information.

+
+
HPBF for Publisher Documents +

HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure + Java. At the moment, we are still figuring out the file format, but we hope + to have simple text extraction shortly. Please see the HPBF project page for more + information.

+
Contributing diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java index ae3cb56d56..6c52bbb04c 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java @@ -310,8 +310,30 @@ public class HPBFDumper { ); } } + + // Text System.out.println(""); + System.out.println("TEXT:"); System.out.println(text); + System.out.println(""); + + // All the others + for(int i=0; i<20; i++) { + if(startType[i] == null) { + continue; + } + int start = from[i]; + + System.out.println( + startType[i] + " -> " + endType[i] + + " @ " + Integer.toHexString(start) + + " (" + start + ")" + ); + System.out.println("\t" + dumpBytes(data, start, 4)); + System.out.println("\t" + dumpBytes(data, start+4, 4)); + System.out.println("\t" + dumpBytes(data, start+8, 4)); + System.out.println("\t(etc)"); + } } protected void dump001CompObj(DirectoryNode dir) { diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub new file mode 100755 index 0000000000..610362c471 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt new file mode 100644 index 0000000000..f8a68bb649 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt @@ -0,0 +1,34 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + +We’ve added some more text in here, to push all the offsets about a bit. + + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + +Ditto with more text in here. + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub new file mode 100755 index 0000000000..4f19bec93d Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt new file mode 100644 index 0000000000..c2d791b9af --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page12345678 + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub new file mode 100755 index 0000000000..445df85f09 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub differ diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt new file mode 100644 index 0000000000..279395e5de --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1