]> source.dussan.org Git - poi.git/commitdiff
More work understanding hpbf
authorNick Burch <nick@apache.org>
Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
committerNick Burch <nick@apache.org>
Sun, 17 Aug 2008 18:02:31 +0000 (18:02 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686625 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/hpbf/file-format.xml
src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java

index e130f7ba137f8bc38d9428b5627cd81378add16e..088ecf651d4069d87ca69b10360982e756ef20ee 100644 (file)
@@ -71,5 +71,25 @@ Root Entry -
         <p>If you set the background colour of a textbox, but make
           no changes to the text,
                </section>
+        <section><title>Structure of CONTENTS</title>
+        <p>First we have "CHNKINK ", followed by 24 bytes.</p>
+        <p>Next we have 20 sequences of 24 bytes each. If the first two bytes
+         at 0x1800, then that sequence entry exists, but if it's 0x0000 then
+         the entry doesn't exist. If it does exist, we then have 4 bytes of
+         upper case ASCII text, followed by three little endian shorts.
+         The first of these seems to be the count of that type, the second is
+         usually 1, the third is usually zero. The we have another 4 bytes of
+         upper case ASCII text, normally but not always the same as the first
+         text. Finally, we have an unsigned little endian 32 bit offset to
+         the start of the data for this, then an unsigned little endian
+         32 bit offset of the length of this section.</p>
+        <p>Normally, the first sequence entry is for TEXT, and the text data
+         will start at 0x200. After that is normally two or three STSH entries
+         (so the first short has values 0, then 1, then 2). After that it
+         seems to vary.</p>
+        <p>At 0x200 we have the text, stored as little endian 16 bit unicode.</p>
+        <p>After the text comes all sorts of other stuff, presumably as 
+         described by the sequences.</p>
+               </section>
        </body>
 </document>
index 47ee17ecea0d9f6d7aeee0d4cfbc57f7d978471d..e2472b47c7880bc3047691d736ffb66b6fe43358 100644 (file)
@@ -175,20 +175,23 @@ public class HPBFDumper {
                // 00 00 00 88 1E 00 00 00
        }
        
-       public void dumpCONTENTS(DirectoryNode dir) throws IOException {
+       public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
                byte[] data = getData(dir, "CONTENTS");
                
                System.out.println("");
                System.out.println("CONTENTS - " + data.length + " bytes long:");
                
                // Between the start and 0x200 we have
-               //  CHNKINK(space) + 24 bytes + 0x1800
+               //  CHNKINK(space) + 24 bytes
+               //  0x1800
                //  TEXT + 6 bytes
-               //  TEXT + 8 bytes + 0x1800
+               //  TEXT + 8 bytes
+               //  0x1800
                //  STSH + 6 bytes
-               //  STSH + 8 bytes + 0x1800
+               //  STSH + 8 bytes
+               //  0x1800
                //  STSH + 6 bytes
-               //  STSH + 8 bytes + 0x1800
+               //  STSH + 8 bytes
                // but towards 0x200 the pattern may
                //  break down a little bit
                
@@ -237,6 +240,73 @@ public class HPBFDumper {
                // The hyperlinks may come before the fonts,
                //  or slightly in front
        }
+       public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+               byte[] data = getData(dir, "CONTENTS");
+               
+               System.out.println("");
+               System.out.println("CONTENTS - " + data.length + " bytes long:");
+               
+               String[] startType = new String[20];
+               String[] endType = new String[20];
+               int[] optA = new int[20];
+               int[] optB = new int[20];
+               int[] optC = new int[20];
+               int[] from = new int[20];
+               int[] len = new int[20];
+               
+               for(int i=0; i<20; i++) {
+                       int offset = 0x20 + i*24;
+                       if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+                               // Has data
+                               startType[i] = new String(data, offset+2, 4);
+                               optA[i] = LittleEndian.getUShort(data, offset+6);
+                               optB[i] = LittleEndian.getUShort(data, offset+8);
+                               optC[i] = LittleEndian.getUShort(data, offset+10);
+                               endType[i] = new String(data, offset+12, 4);
+                               from[i] = (int)LittleEndian.getUInt(data, offset+16);
+                               len[i] = (int)LittleEndian.getUInt(data, offset+20);
+                       } else {
+                               // Doesn't have data
+                       }
+               }
+               
+               String text = StringUtil.getFromUnicodeLE(
+                               data, from[0], len[0]/2
+               );
+               
+               // Dump
+               for(int i=0; i<20; i++) {
+                       String num = Integer.toString(i);
+                       if(i < 10) {
+                               num = "0" + i;
+                       }
+                       System.out.print(num + " ");
+                       
+                       if(startType[i] == null) {
+                               System.out.println("(not present)");
+                       } else {
+                               System.out.println(
+                                               "\t" +
+                                               startType[i] + " " + 
+                                               optA[i] + " " + 
+                                               optB[i] + " " +
+                                               optC[i]
+                               );
+                               System.out.println(
+                                               "\t" +
+                                               endType[i] + " " +
+                                               "from: " +
+                                               Integer.toHexString(from[i]) +
+                                               " (" + from[i] + ")" + 
+                                               ", len: " +
+                                               Integer.toHexString(len[i]) +
+                                               " (" + len[i] + ")"
+                               );
+                       }
+               }
+               System.out.println("");
+               System.out.println(text);
+       }
        
        protected void dump001CompObj(DirectoryNode dir) {
                // TODO
@@ -249,6 +319,7 @@ public class HPBFDumper {
                        quillDir.getEntry("QuillSub");
 
                dump001CompObj(quillSubDir);
-               dumpCONTENTS(quillSubDir);
+               dumpCONTENTSraw(quillSubDir);
+               dumpCONTENTSguessed(quillSubDir);
        }
 }