More work understanding hpbf

author Nick Burch <nick@apache.org>

Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)

committer Nick Burch <nick@apache.org>

Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
author Nick Burch <nick@apache.org>
Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
committer Nick Burch <nick@apache.org>
Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
diff --git a/src/documentation/content/xdocs/hpbf/file-format.xml b/src/documentation/content/xdocs/hpbf/file-format.xml

index e130f7ba137f8bc38d9428b5627cd81378add16e..088ecf651d4069d87ca69b10360982e756ef20ee 100644 (file)
--- a/src/documentation/content/xdocs/hpbf/file-format.xml
+++ b/src/documentation/content/xdocs/hpbf/file-format.xml
@@ -71,5 +71,25 @@ Root Entry -
          <p>If you set the background colour of a textbox, but make
            no changes to the text,
                 </section>
+        <section><title>Structure of CONTENTS</title>
+        <p>First we have "CHNKINK ", followed by 24 bytes.</p>
+        <p>Next we have 20 sequences of 24 bytes each. If the first two bytes
+         at 0x1800, then that sequence entry exists, but if it's 0x0000 then
+         the entry doesn't exist. If it does exist, we then have 4 bytes of
+         upper case ASCII text, followed by three little endian shorts.
+         The first of these seems to be the count of that type, the second is
+         usually 1, the third is usually zero. The we have another 4 bytes of
+         upper case ASCII text, normally but not always the same as the first
+         text. Finally, we have an unsigned little endian 32 bit offset to
+         the start of the data for this, then an unsigned little endian
+         32 bit offset of the length of this section.</p>
+        <p>Normally, the first sequence entry is for TEXT, and the text data
+         will start at 0x200. After that is normally two or three STSH entries
+         (so the first short has values 0, then 1, then 2). After that it
+         seems to vary.</p>
+        <p>At 0x200 we have the text, stored as little endian 16 bit unicode.</p>
+        <p>After the text comes all sorts of other stuff, presumably as 
+         described by the sequences.</p>
+               </section>
         </body>
  </document>
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java

index 47ee17ecea0d9f6d7aeee0d4cfbc57f7d978471d..e2472b47c7880bc3047691d736ffb66b6fe43358 100644 (file)
--- a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
@@ -175,20 +175,23 @@ public class HPBFDumper {
                 // 00 00 00 88 1E 00 00 00
         }
         
-       public void dumpCONTENTS(DirectoryNode dir) throws IOException {
+       public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
                 byte[] data = getData(dir, "CONTENTS");
                 
                 System.out.println("");
                 System.out.println("CONTENTS - " + data.length + " bytes long:");
                 
                 // Between the start and 0x200 we have
-               //  CHNKINK(space) + 24 bytes + 0x1800
+               //  CHNKINK(space) + 24 bytes
+               //  0x1800
                 //  TEXT + 6 bytes
-               //  TEXT + 8 bytes + 0x1800
+               //  TEXT + 8 bytes
+               //  0x1800
                 //  STSH + 6 bytes
-               //  STSH + 8 bytes + 0x1800
+               //  STSH + 8 bytes
+               //  0x1800
                 //  STSH + 6 bytes
-               //  STSH + 8 bytes + 0x1800
+               //  STSH + 8 bytes
                 // but towards 0x200 the pattern may
                 //  break down a little bit
                 
@@ -237,6 +240,73 @@ public class HPBFDumper {
                 // The hyperlinks may come before the fonts,
                 //  or slightly in front
         }
+       public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+               byte[] data = getData(dir, "CONTENTS");
+               
+               System.out.println("");
+               System.out.println("CONTENTS - " + data.length + " bytes long:");
+               
+               String[] startType = new String[20];
+               String[] endType = new String[20];
+               int[] optA = new int[20];
+               int[] optB = new int[20];
+               int[] optC = new int[20];
+               int[] from = new int[20];
+               int[] len = new int[20];
+               
+               for(int i=0; i<20; i++) {
+                       int offset = 0x20 + i*24;
+                       if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+                               // Has data
+                               startType[i] = new String(data, offset+2, 4);
+                               optA[i] = LittleEndian.getUShort(data, offset+6);
+                               optB[i] = LittleEndian.getUShort(data, offset+8);
+                               optC[i] = LittleEndian.getUShort(data, offset+10);
+                               endType[i] = new String(data, offset+12, 4);
+                               from[i] = (int)LittleEndian.getUInt(data, offset+16);
+                               len[i] = (int)LittleEndian.getUInt(data, offset+20);
+                       } else {
+                               // Doesn't have data
+                       }
+               }
+               
+               String text = StringUtil.getFromUnicodeLE(
+                               data, from[0], len[0]/2
+               );
+               
+               // Dump
+               for(int i=0; i<20; i++) {
+                       String num = Integer.toString(i);
+                       if(i < 10) {
+                               num = "0" + i;
+                       }
+                       System.out.print(num + " ");
+                       
+                       if(startType[i] == null) {
+                               System.out.println("(not present)");
+                       } else {
+                               System.out.println(
+                                               "\t" +
+                                               startType[i] + " " + 
+                                               optA[i] + " " + 
+                                               optB[i] + " " +
+                                               optC[i]
+                               );
+                               System.out.println(
+                                               "\t" +
+                                               endType[i] + " " +
+                                               "from: " +
+                                               Integer.toHexString(from[i]) +
+                                               " (" + from[i] + ")" + 
+                                               ", len: " +
+                                               Integer.toHexString(len[i]) +
+                                               " (" + len[i] + ")"
+                               );
+                       }
+               }
+               System.out.println("");
+               System.out.println(text);
+       }
         
         protected void dump001CompObj(DirectoryNode dir) {
                 // TODO
@@ -249,6 +319,7 @@ public class HPBFDumper {
                         quillDir.getEntry("QuillSub");
  
                 dump001CompObj(quillSubDir);
-               dumpCONTENTS(quillSubDir);
+               dumpCONTENTSraw(quillSubDir);
+               dumpCONTENTSguessed(quillSubDir);
         }
  }
author	Nick Burch <nick@apache.org>
	Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
committer	Nick Burch <nick@apache.org>
	Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
src/documentation/content/xdocs/hpbf/file-format.xml		patch \| blob \| history
src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java		patch \| blob \| history