]> source.dussan.org Git - poi.git/commitdiff
Further HPBF documentation, and some more sample files used
authorNick Burch <nick@apache.org>
Sun, 17 Aug 2008 20:15:51 +0000 (20:15 +0000)
committerNick Burch <nick@apache.org>
Sun, 17 Aug 2008 20:15:51 +0000 (20:15 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@686640 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/book.xml
src/documentation/content/xdocs/hpbf/file-format.xml
src/documentation/content/xdocs/hpbf/index.xml
src/documentation/content/xdocs/index.xml
src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub [new file with mode: 0755]
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub [new file with mode: 0755]
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt [new file with mode: 0644]
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub [new file with mode: 0755]
src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt [new file with mode: 0644]

index ab1af452e8cc9ff0a491d604a75d631559c8bd7a..39424524f9efd1c533ab440d12076139e6fc6f93 100644 (file)
@@ -41,6 +41,7 @@
         <menu-item label="HSLF" href="hslf/index.html"/>
         <menu-item label="HSMF" href="hsmf/index.html"/>
         <menu-item label="HDGF" href="hdgf/index.html"/>
+        <menu-item label="HPBF" href="hpbf/index.html"/>
                <menu-item label="POI-Ruby" href="poi-ruby.html"/>
         <menu-item label="POI-Utils" href="utils/index.html"/>
         <menu-item label="Text Extraction" href="text-extraction.html"/>
index 591204951e6104caec38917b73a177ba06b42eb6..97d5a33d7c9c2cbf3c8f3cd0417a6bde508860fc 100644 (file)
 Root Entry -
   Objects -
     (no children)
-  SummaryInformation <(0x05)SummaryInformation>
-  DocumentSummaryInformation <(0x05)DocumentSummaryInformation>
+  SummaryInformation &lt;(0x05)SummaryInformation&gt;
+  DocumentSummaryInformation &lt;(0x05)DocumentSummaryInformation&gt;
   Escher -
     EscherStm
     EscherDelayStm
   Quill -
     QuillSub -
       CONTENTS
-      CompObj <(0x01)CompObj>
+      CompObj &lt;(0x01)CompObj&gt;
   Envelope
   Contents
-  Internal <(0x03)Internal>
-  CompObj <(0x01)CompObj>
+  Internal &lt;(0x03)Internal&gt;
+  CompObj &lt;(0x01)CompObj&gt;
   VBA -
     (no children)
 </source>
@@ -69,7 +69,7 @@ Root Entry -
           then both <em>Contents</em> and <em>CONTENTS</em> streams
           change. There are no changes to the Escher streams.</p>
         <p>If you set the background colour of a textbox, but make
-          no changes to the text,
+          no changes to the text, (to finish off)</p>
                </section>
         <section><title>Structure of CONTENTS</title>
         <p>First we have "CHNKINK ", followed by 24 bytes.</p>
@@ -162,6 +162,8 @@ PL   62 1a 00 00 48 00 00 00 // PL   from: 1a62 (6754), len: 48 (72)
 00 00 00 00 00 00
 00 00 00 00 00 00 00 00 
 00 00 00 00 00 00 00 00
+
+(the text will then start)
 </source>
                </section>
        </body>
index 2601a4174df6cc798fa19e18d5216f96bf1ccec5..c74dc23621a9c469fba35855b3b4cea078997207 100755 (executable)
@@ -39,7 +39,7 @@
               after that if demand and developer interest warrant it.</p>
                        <p>At this time, there is no <em>usermodel</em> api or similar.</p>
             <p>Our current understanding of the file format is documented
-              <link href="file-format.html">here</a>.</p>
+              <link href="file-format.html">here</link>.</p>
             <note> 
                 This code currently lives the 
                 <link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link> 
index 3cf305954d7a99bfb0cf1c95b50587825c5a9907..17e4336d0231d6328f12713ea047c86521e25f22 100644 (file)
            href="./hslf/index.html">the HSLF project page for more
            information</link>.</p>
         </section>
-        <section><title>HDGF for Visio Documents</title>
-       <p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
-         Java. It currently only supports reading at a very low level, and
-      simple text extraction. Please see <link
-           href="./hdgf/index.html">the HDGF project page for more
-           information</link>.</p>
-        </section>
         <section><title>HPSF for Document Properties</title>
        <p>HPSF is our port of the OLE 2 property set format to pure
          Java. Property sets are mostly use to store a document's properties
          (title, author, date of last modification etc.), but they can be used
          for application-specific purposes as well.</p>
 
-       <p>HPSF supports reading and writing of properties. However, you will
-      need to be using version 3.0 of POI to utilise the write support.</p>
-
+       <p>HPSF supports both reading and writing of properties.</p>
        <p>Please see <link href="./hpsf/index.html">the HPSF project
            page</link> for more information.</p>
         </section>
-
+        <section><title>HDGF for Visio Documents</title>
+       <p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
+         Java. It currently only supports reading at a very low level, and
+      simple text extraction. Please see <link
+           href="./hdgf/index.html">the HDGF project page for more
+           information</link>.</p>
+        </section>
+        <section><title>HPBF for Publisher Documents</title>
+       <p>HPBF is our port of the Microsoft Publisher 98(-2007) file format to pure
+         Java. At the moment, we are still figuring out the file format, but we hope
+      to have simple text extraction shortly. Please see <link
+           href="./hpbf/index.html">the HPBF project page for more
+           information</link>.</p>
+        </section>
      </section>
 
     <section><title>Contributing </title>
index ae3cb56d568d671b620d0eb632e6777d12b3cd74..6c52bbb04c9c38324f60106a7378481a53814630 100644 (file)
@@ -310,8 +310,30 @@ public class HPBFDumper {
                                );
                        }
                }
+               
+               // Text
                System.out.println("");
+               System.out.println("TEXT:");
                System.out.println(text);
+               System.out.println("");
+               
+               // All the others
+               for(int i=0; i<20; i++) {
+                       if(startType[i] == null) {
+                               continue;
+                       }
+                       int start = from[i];
+                       
+                       System.out.println(
+                                       startType[i] + " -> " + endType[i] +
+                                       " @ " + Integer.toHexString(start) + 
+                                       " (" + start + ")"
+                       );
+                       System.out.println("\t" + dumpBytes(data, start, 4));
+                       System.out.println("\t" + dumpBytes(data, start+4, 4));
+                       System.out.println("\t" + dumpBytes(data, start+8, 4));
+                       System.out.println("\t(etc)");
+               }
        }
        
        protected void dump001CompObj(DirectoryNode dir) {
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub
new file mode 100755 (executable)
index 0000000..610362c
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt
new file mode 100644 (file)
index 0000000..f8a68bb
--- /dev/null
@@ -0,0 +1,34 @@
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+We’ve added some more text in here, to push all the offsets about a bit.
+
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+Ditto with more text in here.
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2    Top right
+P2 table left      P2 table right
+Bottom Left        Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub
new file mode 100755 (executable)
index 0000000..4f19bec
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt
new file mode 100644 (file)
index 0000000..c2d791b
--- /dev/null
@@ -0,0 +1,29 @@
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+
+This is the second page12345678
+
+It is also times new roman, 10 point
+
+
+Table on page 2    Top right
+P2 table left      P2 table right
+Bottom Left        Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub
new file mode 100755 (executable)
index 0000000..445df85
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt
new file mode 100644 (file)
index 0000000..279395e
--- /dev/null
@@ -0,0 +1,29 @@
+This is some text on the first page
+It’s in times new roman, font size 10, all normal
+
+
+This is in bold and italic
+It’s Arial, 20 point font
+It’s in the second textbox on the first page
+
+
+This is the second page
+
+It is also times new roman, 10 point
+
+
+Table on page 2    Top right
+P2 table left      P2 table right
+Bottom Left        Bottom Right
+
+
+This text is on page two
+This is a link to Apache POI
+More normal text
+Link to a file
+
+
+More text, more hyperlinks
+email link
+Final hyperlink
+Within doc to page 1