diff options
author | Nick Burch <nick@apache.org> | 2008-08-20 13:47:57 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2008-08-20 13:47:57 +0000 |
commit | 48ef5fb2d3523381ad6ef2c135b7625860a5d9f3 (patch) | |
tree | 2c6e28943e7333f211b688b141fe83f3fab86fde /src/scratchpad | |
parent | 097c9e92dfc1fc4875cbe9278d64a577983bc412 (diff) | |
download | poi-48ef5fb2d3523381ad6ef2c135b7625860a5d9f3.tar.gz poi-48ef5fb2d3523381ad6ef2c135b7625860a5d9f3.zip |
Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682532,682534-682619,682622-682777,682779-682998,683000-683019,683021-683022,683024-683080,683082-683092,683094-683095,683097-683127,683129-683131,683133-683166,683168-683698,683700-683705,683707-683757,683759-683787,683789-683870,683872-683879,683881-683900,683902-684066,684068-684074,684076-684222,684224-684254,684257-684281,684283-684286,684288-684292,684294-684298,684300-684301,684303-684308,684310-684317,684320,684323-684335,684337-684348,684350-684354,684356-684361,684363-684369,684371-684453,684455-684883,684885-684937,684940-684958,684960-684970,684972-684985,684987-685053,685055-685063,685065-685259,685261-685262,685264-685266,685268-685282,685285-686035,686037-686045,686047-686052,686054-687331 via svnmerge from
https://svn.apache.org/repos/asf/poi/trunk
........
r686207 | nick | 2008-08-15 13:43:02 +0100 (Fri, 15 Aug 2008) | 1 line
Add sample publisher files from bug #45602 to svn
........
r686216 | nick | 2008-08-15 15:05:30 +0100 (Fri, 15 Aug 2008) | 1 line
Add a few more source package excludes
........
r686278 | nick | 2008-08-15 17:57:30 +0100 (Fri, 15 Aug 2008) | 1 line
More sample hpbf docs, with a description
........
r686290 | nick | 2008-08-15 18:42:25 +0100 (Fri, 15 Aug 2008) | 1 line
Start on a HPBF dumper
........
r686621 | nick | 2008-08-17 17:36:40 +0100 (Sun, 17 Aug 2008) | 1 line
Few little tweaks to dev helpers
........
r686624 | nick | 2008-08-17 18:39:10 +0100 (Sun, 17 Aug 2008) | 1 line
More work understanding hpbf
........
r686625 | nick | 2008-08-17 19:02:31 +0100 (Sun, 17 Aug 2008) | 1 line
More work understanding hpbf
........
r686628 | nick | 2008-08-17 19:21:34 +0100 (Sun, 17 Aug 2008) | 1 line
More work understanding hpbf
........
r686640 | nick | 2008-08-17 21:15:51 +0100 (Sun, 17 Aug 2008) | 1 line
Further HPBF documentation, and some more sample files used
........
r686844 | yegor | 2008-08-18 19:33:58 +0100 (Mon, 18 Aug 2008) | 1 line
fixed bug #45645: Fix for HSSFSheet.autoSizeColumn() for widths exceeding Short.MAX_VALUE
........
r686977 | josh | 2008-08-19 08:44:57 +0100 (Tue, 19 Aug 2008) | 1 line
Fix for bug 45640 - avoid creating multiple GUTS records
........
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@687333 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
13 files changed, 474 insertions, 0 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java new file mode 100644 index 0000000000..6c52bbb04c --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java @@ -0,0 +1,353 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hpbf.dev; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.ddf.DefaultEscherRecordFactory; +import org.apache.poi.ddf.EscherRecord; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.StringUtil; + +/** + * For dumping out the contents of HPBF (Publisher) + * files, while we try to figure out how they're + * constructed. + */ +public class HPBFDumper { + private POIFSFileSystem fs; + public HPBFDumper(POIFSFileSystem fs) { + this.fs = fs; + } + public HPBFDumper(InputStream inp) throws IOException { + this(new POIFSFileSystem(inp)); + } + + private static byte[] getData(DirectoryNode dir, String name) throws IOException { + DocumentEntry docProps = + (DocumentEntry)dir.getEntry(name); + + // Grab the document stream + byte[] d = new byte[docProps.getSize()]; + dir.createDocumentInputStream(name).read(d); + + // All done + return d; + } + + /** + * Dumps out the given number of bytes as hex, + * two chars + */ + private String dumpBytes(byte[] data, int offset, int len) { + StringBuffer ret = new StringBuffer(); + for(int i=0; i<len; i++) { + int j = i + offset; + int b = data[j]; + if(b < 0) { b += 256; } + + String bs = Integer.toHexString(b); + if(bs.length() == 1) + ret.append('0'); + ret.append(bs); + ret.append(' '); + } + return ret.toString(); + } + + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" HPBFDumper <filename>"); + System.exit(1); + } + HPBFDumper dump = new HPBFDumper( + new FileInputStream(args[0]) + ); + + System.out.println("Dumping " + args[0]); + dump.dumpContents(); + dump.dumpEnvelope(); + dump.dumpEscher(); + dump.dump001CompObj(dump.fs.getRoot()); + dump.dumpQuill(); + + // Still to go: + // (0x03)Internal + // Objects + } + + /** + * Dump out the escher parts of the file. + * Escher -> EscherStm and EscherDelayStm + */ + public void dumpEscher() throws IOException { + DirectoryNode escherDir = (DirectoryNode) + fs.getRoot().getEntry("Escher"); + + dumpEscherStm(escherDir); + dumpEscherDelayStm(escherDir); + } + private void dumpEscherStream(byte[] data) { + DefaultEscherRecordFactory erf = + new DefaultEscherRecordFactory(); + + // Dump + int left = data.length; + while(left > 0) { + EscherRecord er = erf.createRecord(data, 0); + er.fillFields(data, 0, erf); + left -= er.getRecordSize(); + + System.out.println(er.toString()); + } + } + protected void dumpEscherStm(DirectoryNode escherDir) throws IOException { + byte[] data = getData(escherDir, "EscherStm"); + System.out.println(""); + System.out.println("EscherStm - " + data.length + " bytes long:"); + if(data.length > 0) + dumpEscherStream(data); + } + protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException { + byte[] data = getData(escherDir, "EscherDelayStm"); + System.out.println(""); + System.out.println("EscherDelayStm - " + data.length + " bytes long:"); + if(data.length > 0) + dumpEscherStream(data); + } + + public void dumpEnvelope() throws IOException { + byte[] data = getData(fs.getRoot(), "Envelope"); + + System.out.println(""); + System.out.println("Envelope - " + data.length + " bytes long:"); + } + + public void dumpContents() throws IOException { + byte[] data = getData(fs.getRoot(), "Contents"); + + System.out.println(""); + System.out.println("Contents - " + data.length + " bytes long:"); + + // 8 bytes, always seems to be + // E8 AC 2C 00 E8 03 05 01 + // E8 AC 2C 00 E8 03 05 01 + + // 4 bytes - size of contents + // 13/15 00 00 01 + + // .... + + // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... .......... + + // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... .. + + // 01 18 30 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + + // 01 18 31 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + + // 01 18 32 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + } + + public void dumpCONTENTSraw(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(""); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + // Between the start and 0x200 we have + // CHNKINK(space) + 24 bytes + // 0x1800 + // TEXT + 6 bytes + // TEXT + 8 bytes + // 0x1800 + // STSH + 6 bytes + // STSH + 8 bytes + // 0x1800 + // STSH + 6 bytes + // STSH + 8 bytes + // but towards 0x200 the pattern may + // break down a little bit + + // After the second of a given type, + // it seems to be 4 bytes giving the start, + // then 4 bytes giving the length, then + // 18 00 + System.out.println( + new String(data, 0, 8) + + dumpBytes(data, 8, 0x20-8) + ); + + int pos = 0x20; + boolean sixNotEight = true; + while(pos < 0x200) { + if(sixNotEight) { + System.out.println( + dumpBytes(data, pos, 2) + ); + pos += 2; + } + String text = new String(data, pos, 4); + int blen = 8; + if(sixNotEight) + blen = 6; + System.out.println( + text + " " + dumpBytes(data, pos+4, blen) + ); + + pos += 4 + blen; + sixNotEight = ! sixNotEight; + } + + // Text from 0x200 onwards until we get + // to \r(00)\n(00)(00)(00) + int textStop = -1; + for(int i=0x200; i<data.length-2 && textStop == -1; i++) { + if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) { + textStop = i; + } + } + if(textStop > 0) { + int len = (textStop - 0x200) / 2; + System.out.println(""); + System.out.println( + StringUtil.getFromUnicodeLE(data, 0x200, len) + ); + } + + // The font list comes slightly later + + // The hyperlinks may come before the fonts, + // or slightly in front + } + public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(""); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + String[] startType = new String[20]; + String[] endType = new String[20]; + int[] optA = new int[20]; + int[] optB = new int[20]; + int[] optC = new int[20]; + int[] from = new int[20]; + int[] len = new int[20]; + + for(int i=0; i<20; i++) { + int offset = 0x20 + i*24; + if(data[offset] == 0x18 && data[offset+1] == 0x00) { + // Has data + startType[i] = new String(data, offset+2, 4); + optA[i] = LittleEndian.getUShort(data, offset+6); + optB[i] = LittleEndian.getUShort(data, offset+8); + optC[i] = LittleEndian.getUShort(data, offset+10); + endType[i] = new String(data, offset+12, 4); + from[i] = (int)LittleEndian.getUInt(data, offset+16); + len[i] = (int)LittleEndian.getUInt(data, offset+20); + } else { + // Doesn't have data + } + } + + String text = StringUtil.getFromUnicodeLE( + data, from[0], len[0]/2 + ); + + // Dump + for(int i=0; i<20; i++) { + String num = Integer.toString(i); + if(i < 10) { + num = "0" + i; + } + System.out.print(num + " "); + + if(startType[i] == null) { + System.out.println("(not present)"); + } else { + System.out.println( + "\t" + + startType[i] + " " + + optA[i] + " " + + optB[i] + " " + + optC[i] + ); + System.out.println( + "\t" + + endType[i] + " " + + "from: " + + Integer.toHexString(from[i]) + + " (" + from[i] + ")" + + ", len: " + + Integer.toHexString(len[i]) + + " (" + len[i] + ")" + ); + } + } + + // Text + System.out.println(""); + System.out.println("TEXT:"); + System.out.println(text); + System.out.println(""); + + // All the others + for(int i=0; i<20; i++) { + if(startType[i] == null) { + continue; + } + int start = from[i]; + + System.out.println( + startType[i] + " -> " + endType[i] + + " @ " + Integer.toHexString(start) + + " (" + start + ")" + ); + System.out.println("\t" + dumpBytes(data, start, 4)); + System.out.println("\t" + dumpBytes(data, start+4, 4)); + System.out.println("\t" + dumpBytes(data, start+8, 4)); + System.out.println("\t(etc)"); + } + } + + protected void dump001CompObj(DirectoryNode dir) { + // TODO + } + + public void dumpQuill() throws IOException { + DirectoryNode quillDir = (DirectoryNode) + fs.getRoot().getEntry("Quill"); + DirectoryNode quillSubDir = (DirectoryNode) + quillDir.getEntry("QuillSub"); + + dump001CompObj(quillSubDir); + dumpCONTENTSraw(quillSubDir); + dumpCONTENTSguessed(quillSubDir); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub Binary files differnew file mode 100755 index 0000000000..b870168319 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt new file mode 100644 index 0000000000..279395e5de --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub Binary files differnew file mode 100755 index 0000000000..610362c471 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt new file mode 100644 index 0000000000..f8a68bb649 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2.txt @@ -0,0 +1,34 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + +We’ve added some more text in here, to push all the offsets about a bit. + + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + +Ditto with more text in here. + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub Binary files differnew file mode 100755 index 0000000000..2c6174e5e8 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample2000.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub Binary files differnew file mode 100755 index 0000000000..4f19bec93d --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt new file mode 100644 index 0000000000..c2d791b9af --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample3.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page12345678 + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub Binary files differnew file mode 100755 index 0000000000..445df85f09 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt new file mode 100644 index 0000000000..279395e5de --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample4.txt @@ -0,0 +1,29 @@ +This is some text on the first page +It’s in times new roman, font size 10, all normal + + +This is in bold and italic +It’s Arial, 20 point font +It’s in the second textbox on the first page + + +This is the second page + +It is also times new roman, 10 point + + +Table on page 2 Top right +P2 table left P2 table right +Bottom Left Bottom Right + + +This text is on page two +This is a link to Apache POI +More normal text +Link to a file + + +More text, more hyperlinks +email link +Final hyperlink +Within doc to page 1 diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub Binary files differnew file mode 100755 index 0000000000..8adffda77a --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Sample98.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub Binary files differnew file mode 100644 index 0000000000..00deec14dc --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleBrochure.pub diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub Binary files differnew file mode 100644 index 0000000000..94900925af --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/SampleNewsletter.pub |