From e9a647c3f4a5394b57fffbba5a648142486461d1 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sun, 17 Aug 2008 18:02:31 +0000 Subject: [PATCH] More work understanding hpbf git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686625 13f79535-47bb-0310-9956-ffa450edef68 --- .../content/xdocs/hpbf/file-format.xml | 20 +++++ .../org/apache/poi/hpbf/dev/HPBFDumper.java | 83 +++++++++++++++++-- 2 files changed, 97 insertions(+), 6 deletions(-) diff --git a/src/documentation/content/xdocs/hpbf/file-format.xml b/src/documentation/content/xdocs/hpbf/file-format.xml index e130f7ba13..088ecf651d 100644 --- a/src/documentation/content/xdocs/hpbf/file-format.xml +++ b/src/documentation/content/xdocs/hpbf/file-format.xml @@ -71,5 +71,25 @@ Root Entry -

If you set the background colour of a textbox, but make no changes to the text, +

Structure of CONTENTS +

First we have "CHNKINK ", followed by 24 bytes.

+

Next we have 20 sequences of 24 bytes each. If the first two bytes + at 0x1800, then that sequence entry exists, but if it's 0x0000 then + the entry doesn't exist. If it does exist, we then have 4 bytes of + upper case ASCII text, followed by three little endian shorts. + The first of these seems to be the count of that type, the second is + usually 1, the third is usually zero. The we have another 4 bytes of + upper case ASCII text, normally but not always the same as the first + text. Finally, we have an unsigned little endian 32 bit offset to + the start of the data for this, then an unsigned little endian + 32 bit offset of the length of this section.

+

Normally, the first sequence entry is for TEXT, and the text data + will start at 0x200. After that is normally two or three STSH entries + (so the first short has values 0, then 1, then 2). After that it + seems to vary.

+

At 0x200 we have the text, stored as little endian 16 bit unicode.

+

After the text comes all sorts of other stuff, presumably as + described by the sequences.

+
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java index 47ee17ecea..e2472b47c7 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java @@ -175,20 +175,23 @@ public class HPBFDumper { // 00 00 00 88 1E 00 00 00 } - public void dumpCONTENTS(DirectoryNode dir) throws IOException { + public void dumpCONTENTSraw(DirectoryNode dir) throws IOException { byte[] data = getData(dir, "CONTENTS"); System.out.println(""); System.out.println("CONTENTS - " + data.length + " bytes long:"); // Between the start and 0x200 we have - // CHNKINK(space) + 24 bytes + 0x1800 + // CHNKINK(space) + 24 bytes + // 0x1800 // TEXT + 6 bytes - // TEXT + 8 bytes + 0x1800 + // TEXT + 8 bytes + // 0x1800 // STSH + 6 bytes - // STSH + 8 bytes + 0x1800 + // STSH + 8 bytes + // 0x1800 // STSH + 6 bytes - // STSH + 8 bytes + 0x1800 + // STSH + 8 bytes // but towards 0x200 the pattern may // break down a little bit @@ -237,6 +240,73 @@ public class HPBFDumper { // The hyperlinks may come before the fonts, // or slightly in front } + public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(""); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + String[] startType = new String[20]; + String[] endType = new String[20]; + int[] optA = new int[20]; + int[] optB = new int[20]; + int[] optC = new int[20]; + int[] from = new int[20]; + int[] len = new int[20]; + + for(int i=0; i<20; i++) { + int offset = 0x20 + i*24; + if(data[offset] == 0x18 && data[offset+1] == 0x00) { + // Has data + startType[i] = new String(data, offset+2, 4); + optA[i] = LittleEndian.getUShort(data, offset+6); + optB[i] = LittleEndian.getUShort(data, offset+8); + optC[i] = LittleEndian.getUShort(data, offset+10); + endType[i] = new String(data, offset+12, 4); + from[i] = (int)LittleEndian.getUInt(data, offset+16); + len[i] = (int)LittleEndian.getUInt(data, offset+20); + } else { + // Doesn't have data + } + } + + String text = StringUtil.getFromUnicodeLE( + data, from[0], len[0]/2 + ); + + // Dump + for(int i=0; i<20; i++) { + String num = Integer.toString(i); + if(i < 10) { + num = "0" + i; + } + System.out.print(num + " "); + + if(startType[i] == null) { + System.out.println("(not present)"); + } else { + System.out.println( + "\t" + + startType[i] + " " + + optA[i] + " " + + optB[i] + " " + + optC[i] + ); + System.out.println( + "\t" + + endType[i] + " " + + "from: " + + Integer.toHexString(from[i]) + + " (" + from[i] + ")" + + ", len: " + + Integer.toHexString(len[i]) + + " (" + len[i] + ")" + ); + } + } + System.out.println(""); + System.out.println(text); + } protected void dump001CompObj(DirectoryNode dir) { // TODO @@ -249,6 +319,7 @@ public class HPBFDumper { quillDir.getEntry("QuillSub"); dump001CompObj(quillSubDir); - dumpCONTENTS(quillSubDir); + dumpCONTENTSraw(quillSubDir); + dumpCONTENTSguessed(quillSubDir); } } -- 2.39.5