aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-08-27 10:34:33 +0000
committerNick Burch <nick@apache.org>2008-08-27 10:34:33 +0000
commitab3de58257edddff10886d2f0830c07fbdf9abc8 (patch)
tree8ac7344f0b825d722aba854b57566f8d584d66ca /src/scratchpad
parent48ef5fb2d3523381ad6ef2c135b7625860a5d9f3 (diff)
downloadpoi-ab3de58257edddff10886d2f0830c07fbdf9abc8.tar.gz
poi-ab3de58257edddff10886d2f0830c07fbdf9abc8.zip
Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682532,682534-682619,682622-682777,682779-682998,683000-683019,683021-683022,683024-683080,683082-683092,683094-683095,683097-683127,683129-683131,683133-683166,683168-683698,683700-683705,683707-683757,683759-683787,683789-683870,683872-683879,683881-683900,683902-684066,684068-684074,684076-684222,684224-684254,684257-684281,684283-684286,684288-684292,684294-684298,684300-684301,684303-684308,684310-684317,684320,684323-684335,684337-684348,684350-684354,684356-684361,684363-684369,684371-684453,684455-684883,684885-684937,684940-684958,684960-684970,684972-684985,684987-685053,685055-685063,685065-685259,685261-685262,685264-685266,685268-685282,685285-686035,686037-686045,686047-686052,686054-686206,686208-686215,686217-686277,686279-686289,686291-686620,686622-686623,686626-686627,686629-686639,686641-686843,686845-686976,686978-689430 via svnmerge from
https://svn.apache.org/repos/asf/poi/trunk ........ r687403 | nick | 2008-08-20 19:14:11 +0100 (Wed, 20 Aug 2008) | 1 line Make an initial start on hpbf code ........ r687423 | nick | 2008-08-20 19:50:15 +0100 (Wed, 20 Aug 2008) | 1 line More HPBF stuff, and some tests ........ r687429 | nick | 2008-08-20 20:40:05 +0100 (Wed, 20 Aug 2008) | 1 line Quill CONTENTS bits, and tests ........ r687443 | nick | 2008-08-20 21:13:08 +0100 (Wed, 20 Aug 2008) | 1 line HPBF text extractor and unit tests ........ r688426 | josh | 2008-08-23 23:47:51 +0100 (Sat, 23 Aug 2008) | 1 line Fix for bug 45672 - prevent MissingRecordAwareHSSFListener generating multiple LastCellOfRowDummyRecords when shared formulas are present ........ r688642 | josh | 2008-08-25 08:56:37 +0100 (Mon, 25 Aug 2008) | 1 line JDK 1.4 fixes for new hpbf stuff. Some clean-up ........ r688650 | josh | 2008-08-25 09:09:02 +0100 (Mon, 25 Aug 2008) | 1 line Initial support for evaluating external add-in functions like YEARFRAC ........ r688655 | josh | 2008-08-25 09:30:54 +0100 (Mon, 25 Aug 2008) | 1 line Fix for bug 45682 - allow cloning of sheets with conditional formatting ........ r688825 | josh | 2008-08-25 19:57:14 +0100 (Mon, 25 Aug 2008) | 1 line Fix for small bug introduced in c688655 - keep header field in sync with number of rules ........ r688910 | josh | 2008-08-25 23:41:08 +0100 (Mon, 25 Aug 2008) | 1 line Replaced calls to deprecated versions of createCell(), getCell(), createRow(), and getRow(). (Changing short to int) ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@689435 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java86
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java78
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/EscherDelayStm.java30
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/EscherPart.java80
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/EscherStm.java29
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/HPBFPart.java104
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/MainContents.java38
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java87
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java69
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCTextBit.java43
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/UnknownQCBit.java27
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hpbf/TestHPBFDocument.java60
-rwxr-xr-xsrc/scratchpad/testcases/org/apache/poi/hpbf/data/Simple.pubbin0 -> 65536 bytes
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java105
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java50
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hpbf/model/TestQuillContents.java80
16 files changed, 966 insertions, 0 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java b/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
new file mode 100644
index 0000000000..49de0d0876
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
@@ -0,0 +1,86 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hpbf;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.poi.POIDocument;
+import org.apache.poi.hpbf.model.EscherDelayStm;
+import org.apache.poi.hpbf.model.EscherStm;
+import org.apache.poi.hpbf.model.MainContents;
+import org.apache.poi.hpbf.model.QuillContents;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * This class provides the basic functionality
+ * for HPBF, our implementation of the publisher
+ * file format.
+ */
+public final class HPBFDocument extends POIDocument {
+ private MainContents mainContents;
+ private QuillContents quillContents;
+ private EscherStm escherStm;
+ private EscherDelayStm escherDelayStm;
+
+ /**
+ * Opens a new publisher document
+ */
+ public HPBFDocument(POIFSFileSystem fs) throws IOException {
+ this(fs.getRoot(), fs);
+ }
+ public HPBFDocument(InputStream inp) throws IOException {
+ this(new POIFSFileSystem(inp));
+ }
+
+ /**
+ * Opens an embeded publisher document,
+ * at the given directory.
+ */
+ public HPBFDocument(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
+ super(dir, fs);
+
+ // Go looking for our interesting child
+ // streams
+ mainContents = new MainContents(dir);
+ quillContents = new QuillContents(dir);
+
+ // Now the Escher bits
+ escherStm = new EscherStm(dir);
+ escherDelayStm = new EscherDelayStm(dir);
+ }
+
+ public MainContents getMainContents() {
+ return mainContents;
+ }
+ public QuillContents getQuillContents() {
+ return quillContents;
+ }
+ public EscherStm getEscherStm() {
+ return escherStm;
+ }
+ public EscherDelayStm getEscherDelayStm() {
+ return escherDelayStm;
+ }
+
+ public void write(OutputStream out) throws IOException {
+ throw new IllegalStateException("Writing is not yet implemented, see http://poi.apache.org/hpbf/");
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
new file mode 100644
index 0000000000..2257283a0f
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
@@ -0,0 +1,78 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.extractor;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.hpbf.HPBFDocument;
+import org.apache.poi.hpbf.model.qcbits.QCBit;
+import org.apache.poi.hpbf.model.qcbits.QCTextBit;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Extract text from HPBF Publisher files
+ */
+public class PublisherTextExtractor extends POIOLE2TextExtractor {
+ private HPBFDocument doc;
+
+ public PublisherTextExtractor(HPBFDocument doc) {
+ super(doc);
+ this.doc = doc;
+ }
+ public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
+ this(new HPBFDocument(fs));
+ }
+ public PublisherTextExtractor(InputStream is) throws IOException {
+ this(new POIFSFileSystem(is));
+ }
+
+ public String getText() {
+ StringBuffer text = new StringBuffer();
+
+ // Get the text from the Quill Contents
+ QCBit[] bits = doc.getQuillContents().getBits();
+ for(int i=0; i<bits.length; i++) {
+ if(bits[i] != null && bits[i] instanceof QCTextBit) {
+ QCTextBit t = (QCTextBit)bits[i];
+ text.append( t.getText().replace('\r', '\n') );
+ }
+ }
+
+ // Get more text
+ // TODO
+
+ return text.toString();
+ }
+
+
+ public static void main(String[] args) throws Exception {
+ if(args.length == 0) {
+ System.err.println("Use:");
+ System.err.println(" PublisherTextExtractor <file.pub>");
+ }
+
+ for(int i=0; i<args.length; i++) {
+ PublisherTextExtractor te = new PublisherTextExtractor(
+ new FileInputStream(args[i])
+ );
+ System.out.println(te.getText());
+ }
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/EscherDelayStm.java b/src/scratchpad/src/org/apache/poi/hpbf/model/EscherDelayStm.java
new file mode 100644
index 0000000000..032484626e
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/EscherDelayStm.java
@@ -0,0 +1,30 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hpbf.model;
+
+import java.io.IOException;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+
+public final class EscherDelayStm extends EscherPart {
+ private static final String[] PATH = { "Escher", "EscherDelayStm", };
+
+ public EscherDelayStm(DirectoryNode baseDir) throws IOException {
+ super(baseDir, PATH);
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/EscherPart.java b/src/scratchpad/src/org/apache/poi/hpbf/model/EscherPart.java
new file mode 100644
index 0000000000..00e685f8f2
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/EscherPart.java
@@ -0,0 +1,80 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hpbf.model;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.poi.ddf.DefaultEscherRecordFactory;
+import org.apache.poi.ddf.EscherRecord;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+
+/**
+ * Parent class of all Escher parts
+ */
+public abstract class EscherPart extends HPBFPart {
+ private EscherRecord[] records;
+
+ /**
+ * Creates the Escher Part, and finds our child
+ * escher records
+ */
+ public EscherPart(DirectoryNode baseDir, String[] parts) throws IOException {
+ super(baseDir, parts);
+
+ // Now create our Escher children
+ DefaultEscherRecordFactory erf =
+ new DefaultEscherRecordFactory();
+
+ ArrayList ec = new ArrayList();
+ int left = data.length;
+ while(left > 0) {
+ EscherRecord er = erf.createRecord(data, 0);
+ er.fillFields(data, 0, erf);
+ left -= er.getRecordSize();
+
+ ec.add(er);
+ }
+
+ records = (EscherRecord[])
+ ec.toArray(new EscherRecord[ec.size()]);
+ }
+
+ public EscherRecord[] getEscherRecords() {
+ return records;
+ }
+
+ /**
+ * Serialises our Escher children back
+ * into bytes.
+ */
+ protected void generateData() {
+ int size = 0;
+ for(int i=0; i<records.length; i++) {
+ size += records[i].getRecordSize();
+ }
+
+ data = new byte[size];
+ size = 0;
+ for(int i=0; i<records.length; i++) {
+ int thisSize =
+ records[i].serialize(size, data);
+ size += thisSize;
+ }
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/EscherStm.java b/src/scratchpad/src/org/apache/poi/hpbf/model/EscherStm.java
new file mode 100644
index 0000000000..2ed79ea341
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/EscherStm.java
@@ -0,0 +1,29 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hpbf.model;
+
+import java.io.IOException;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+
+public final class EscherStm extends EscherPart {
+ private static final String[] PATH = { "Escher", "EscherStm", };
+ public EscherStm(DirectoryNode baseDir) throws IOException {
+ super(baseDir, PATH);
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/HPBFPart.java b/src/scratchpad/src/org/apache/poi/hpbf/model/HPBFPart.java
new file mode 100644
index 0000000000..9db15c6642
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/HPBFPart.java
@@ -0,0 +1,104 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model;
+
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+
+/**
+ * Parent class of all HPBF sub-parts, handling
+ * the fiddly reading in / writing out bits
+ * for all of them.
+ */
+public abstract class HPBFPart {
+ protected byte[] data;
+ /**
+ * @param path the path to the part, eg Contents or Quill, QuillSub, CONTENTS
+ */
+ public HPBFPart(DirectoryNode baseDir, String[] path) throws IOException {
+
+ DirectoryNode dir = getDir(path, baseDir);
+ String name = path[path.length-1];
+
+ DocumentEntry docProps;
+ try {
+ docProps = (DocumentEntry)dir.getEntry(name);
+ } catch (FileNotFoundException e) {
+ throw new IllegalArgumentException("File invalid - failed to find document entry '"
+ + name + "'");
+ }
+
+ // Grab the data from the part stream
+ data = new byte[docProps.getSize()];
+ dir.createDocumentInputStream(name).read(data);
+ }
+ private DirectoryNode getDir(String[] path, DirectoryNode baseDir) {
+ DirectoryNode dir = baseDir;
+ for(int i=0; i<path.length-1; i++) {
+ try {
+ dir = (DirectoryNode)dir.getEntry(path[i]);
+ } catch (FileNotFoundException e) {
+ throw new IllegalArgumentException("File invalid - failed to find directory entry '"
+ + path[i] + "'");
+ }
+ }
+ return dir;
+ }
+
+ public void writeOut(DirectoryNode baseDir) throws IOException {
+ String[] path = getPath();
+
+ // Ensure that all parent directories exist
+ DirectoryNode dir = baseDir;
+ for(int i=0; i<path.length-1; i++) {
+ try {
+ dir = (DirectoryNode)dir.getEntry(path[i]);
+ } catch(FileNotFoundException e) {
+ dir.createDirectory(path[i]);
+ }
+ }
+
+ // Update the byte array with the latest data
+ generateData();
+
+ // Write out
+ ByteArrayInputStream bais = new ByteArrayInputStream(data);
+ dir.createDocument(path[path.length-1], bais);
+ }
+
+ /**
+ * Called just before writing out, to trigger
+ * the data byte array to be updated with the
+ * latest contents.
+ */
+ protected abstract void generateData();
+
+ /**
+ * Returns the raw data that makes up
+ * this document part.
+ */
+ public byte[] getData() { return data; }
+
+ /**
+ * Returns
+ */
+ public final String[] getPath() {return null;}
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/MainContents.java b/src/scratchpad/src/org/apache/poi/hpbf/model/MainContents.java
new file mode 100644
index 0000000000..9783ebdc54
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/MainContents.java
@@ -0,0 +1,38 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hpbf.model;
+
+import java.io.IOException;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+
+/**
+ * The main Contents. Not yet understood
+ */
+public final class MainContents extends HPBFPart {
+ private static final String[] PATH = { "Contents", };
+
+ public MainContents(DirectoryNode baseDir) throws IOException {
+ super(baseDir, PATH);
+ }
+
+ protected void generateData() {
+ // We don't parse the contents, so
+ // nothing will have changed
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java b/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java
new file mode 100644
index 0000000000..ae626fcae3
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java
@@ -0,0 +1,87 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model;
+
+import java.io.IOException;
+
+import org.apache.poi.hpbf.model.qcbits.QCBit;
+import org.apache.poi.hpbf.model.qcbits.QCTextBit;
+import org.apache.poi.hpbf.model.qcbits.UnknownQCBit;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * Quill -> QuillSub -> CONTENTS
+ */
+public final class QuillContents extends HPBFPart {
+ private static final String[] PATH = { "Quill", "QuillSub", "CONTENTS", };
+ private QCBit[] bits;
+
+ public QuillContents(DirectoryNode baseDir) throws IOException {
+ super(baseDir, PATH);
+
+ // Now parse the first 512 bytes, and produce
+ // all our bits
+
+ // Check first 8 bytes
+ String f8 = new String(data, 0, 8);
+ if(! f8.equals("CHNKINK ")) {
+ throw new IllegalArgumentException("Expecting 'CHNKINK ' but was '"+f8+"'");
+ }
+ // Ignore the next 24, for now at least
+
+ // Now, parse all our QC Bits
+ bits = new QCBit[20];
+ for(int i=0; i<20; i++) {
+ int offset = 0x20 + i*24;
+ if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+ // Has some data
+ String thingType = new String(data, offset+2, 4);
+ int optA = LittleEndian.getUShort(data, offset+6);
+ int optB = LittleEndian.getUShort(data, offset+8);
+ int optC = LittleEndian.getUShort(data, offset+10);
+ String bitType = new String(data, offset+12, 4);
+ int from = (int)LittleEndian.getUInt(data, offset+16);
+ int len = (int)LittleEndian.getUInt(data, offset+20);
+
+ byte[] bitData = new byte[len];
+ System.arraycopy(data, from, bitData, 0, len);
+
+ // Create
+ if(bitType.equals("TEXT")) {
+ bits[i] = new QCTextBit(thingType, bitType, bitData);
+ } else {
+ bits[i] = new UnknownQCBit(thingType, bitType, bitData);
+ }
+ bits[i].setOptA(optA);
+ bits[i].setOptB(optB);
+ bits[i].setOptC(optC);
+ } else {
+ // Doesn't have data
+ }
+ }
+ }
+
+ public QCBit[] getBits() {
+ return bits;
+ }
+
+ protected void generateData() {
+ // TODO
+ throw new IllegalStateException("Not done yet!");
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java
new file mode 100644
index 0000000000..61c7955f66
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java
@@ -0,0 +1,69 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model.qcbits;
+
+/**
+ * Parent of all Quill CONTENTS bits
+ */
+public abstract class QCBit {
+ protected String thingType;
+ protected String bitType;
+ protected byte[] data;
+
+ protected int optA;
+ protected int optB;
+ protected int optC;
+
+ public QCBit(String thingType, String bitType, byte[] data) {
+ this.thingType = thingType;
+ this.bitType = bitType;
+ this.data = data;
+ }
+
+ /**
+ * Returns the type of the thing, eg TEXT, FONT
+ * or TOKN
+ */
+ public String getThingType() { return thingType; }
+ /**
+ * Returns the type of the bit data, eg TEXT
+ * or PLC
+ */
+ public String getBitType() { return bitType; }
+ public byte[] getData() { return data; }
+
+ public int getOptA() {
+ return optA;
+ }
+ public void setOptA(int optA) {
+ this.optA = optA;
+ }
+
+ public int getOptB() {
+ return optB;
+ }
+ public void setOptB(int optB) {
+ this.optB = optB;
+ }
+
+ public int getOptC() {
+ return optC;
+ }
+ public void setOptC(int optC) {
+ this.optC = optC;
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCTextBit.java b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCTextBit.java
new file mode 100644
index 0000000000..e3c8dcb589
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCTextBit.java
@@ -0,0 +1,43 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model.qcbits;
+
+import org.apache.poi.util.StringUtil;
+
+/**
+ * A Text based bit of Quill Contents
+ */
+public class QCTextBit extends QCBit {
+ public QCTextBit(String thingType, String bitType, byte[] data) {
+ super(thingType, bitType, data);
+ }
+
+ /**
+ * Returns the text. Note that line endings
+ * are \r and not \n
+ */
+ public String getText() {
+ return StringUtil.getFromUnicodeLE(
+ data, 0, data.length/2
+ );
+ }
+
+ public void setText(String text) {
+ data = new byte[text.length()*2];
+ StringUtil.putUnicodeLE(text, data, 0);
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/UnknownQCBit.java b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/UnknownQCBit.java
new file mode 100644
index 0000000000..2548b4fca6
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/UnknownQCBit.java
@@ -0,0 +1,27 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model.qcbits;
+
+/**
+ * Any Quill Contents bits we don't know
+ * how to handle explicitly
+ */
+public class UnknownQCBit extends QCBit {
+ public UnknownQCBit(String thingType, String bitType, byte[] data) {
+ super(thingType, bitType, data);
+ }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/TestHPBFDocument.java b/src/scratchpad/testcases/org/apache/poi/hpbf/TestHPBFDocument.java
new file mode 100644
index 0000000000..c0186dd8d1
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/TestHPBFDocument.java
@@ -0,0 +1,60 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+import junit.framework.TestCase;
+
+public class TestHPBFDocument extends TestCase {
+ private String dir;
+
+ protected void setUp() throws Exception {
+ dir = System.getProperty("HPBF.testdata.path");
+ }
+
+ public void testOpen() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ assertNotNull(doc);
+ }
+
+ public void testBits() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ assertNotNull(doc.getMainContents());
+ assertNotNull(doc.getQuillContents());
+ assertNotNull(doc.getEscherStm());
+ assertNotNull(doc.getEscherDelayStm());
+
+ assertTrue(doc.getMainContents().getData().length > 0);
+ assertTrue(doc.getQuillContents().getData().length > 0);
+ assertTrue(doc.getEscherStm().getData().length > 0);
+ assertTrue(doc.getEscherDelayStm().getData().length == 0);
+ }
+
+ // TODO
+// public void testWrite() throws Exception {
+// }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/data/Simple.pub b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Simple.pub
new file mode 100755
index 0000000000..2397b9d01c
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/data/Simple.pub
Binary files differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java
new file mode 100644
index 0000000000..96396e1073
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java
@@ -0,0 +1,105 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+import org.apache.poi.hpbf.HPBFDocument;
+
+import junit.framework.TestCase;
+
+public class TextPublisherTextExtractor extends TestCase {
+ private String dir;
+
+ protected void setUp() throws Exception {
+ dir = System.getProperty("HPBF.testdata.path");
+ }
+
+ public void testBasics() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ PublisherTextExtractor ext =
+ new PublisherTextExtractor(doc);
+ ext.getText();
+
+ f = new File(dir, "Simple.pub");
+ ext = new PublisherTextExtractor(
+ new FileInputStream(f)
+ );
+ ext.getText();
+ }
+
+ public void testContents() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ PublisherTextExtractor ext =
+ new PublisherTextExtractor(doc);
+ String text = ext.getText();
+
+ assertEquals(
+"This is some text on the first page\n" +
+"It’s in times new roman, font size 10, all normal\n" +
+"" +
+"This is in bold and italic\n" +
+"It’s Arial, 20 point font\n" +
+"It’s in the second textbox on the first page\n" +
+"" +
+"This is the second page\n\n" +
+"" +
+"It is also times new roman, 10 point\n" +
+"" +
+"Table on page 2\nTop right\n" +
+"P2 table left\nP2 table right\n" +
+"Bottom Left\nBottom Right\n" +
+"" +
+"This text is on page two\n" +
+"#This is a link to Apache POI\n" +
+"More normal text\n" +
+"Link to a file\n" +
+"" +
+"More text, more hyperlinks\n" +
+"email link\n" +
+"Final hyperlink\n" +
+"Within doc to page 1\n"
+ , text
+ );
+
+ // Now a simpler one
+ f = new File(dir, "Simple.pub");
+ ext = new PublisherTextExtractor(
+ new FileInputStream(f)
+ );
+ text = ext.getText();
+ assertEquals(
+"0123456789\n" +
+"0123456789abcdef\n" +
+"0123456789abcdef0123456789abcdef\n" +
+"0123456789\n" +
+"0123456789abcdef\n" +
+"0123456789abcdef0123456789abcdef\n" +
+"0123456789abcdef0123456789abcdef0123456789abcdef\n"
+ , text
+ );
+ }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java b/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java
new file mode 100644
index 0000000000..dbaf46c649
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java
@@ -0,0 +1,50 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+import org.apache.poi.hpbf.HPBFDocument;
+
+import junit.framework.TestCase;
+
+public class TestEscherParts extends TestCase {
+ private String dir;
+
+ protected void setUp() throws Exception {
+ dir = System.getProperty("HPBF.testdata.path");
+ }
+
+ public void testBasics() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ EscherStm es = doc.getEscherStm();
+ EscherDelayStm eds = doc.getEscherDelayStm();
+
+ assertNotNull(es);
+ assertNotNull(eds);
+
+ assertEquals(13, es.getEscherRecords().length);
+ assertEquals(0, eds.getEscherRecords().length);
+
+ // TODO - check the contents
+ }
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestQuillContents.java b/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestQuillContents.java
new file mode 100644
index 0000000000..ce6ddf83ef
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestQuillContents.java
@@ -0,0 +1,80 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.model;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+import org.apache.poi.hpbf.HPBFDocument;
+import org.apache.poi.hpbf.model.qcbits.QCTextBit;
+
+import junit.framework.TestCase;
+
+public class TestQuillContents extends TestCase {
+ private String dir;
+
+ protected void setUp() throws Exception {
+ dir = System.getProperty("HPBF.testdata.path");
+ }
+
+ public void testBasics() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ QuillContents qc = doc.getQuillContents();
+ assertEquals(20, qc.getBits().length);
+ for(int i=0; i<19; i++) {
+ assertNotNull(qc.getBits()[i]);
+ }
+ // Last one is blank
+ assertNull(qc.getBits()[19]);
+
+ // Should be text, then three STSHs
+ assertEquals("TEXT", qc.getBits()[0].getThingType());
+ assertEquals("TEXT", qc.getBits()[0].getBitType());
+ assertEquals(0, qc.getBits()[0].getOptA());
+
+ assertEquals("STSH", qc.getBits()[1].getThingType());
+ assertEquals("STSH", qc.getBits()[1].getBitType());
+ assertEquals(0, qc.getBits()[1].getOptA());
+
+ assertEquals("STSH", qc.getBits()[2].getThingType());
+ assertEquals("STSH", qc.getBits()[2].getBitType());
+ assertEquals(1, qc.getBits()[2].getOptA());
+
+ assertEquals("STSH", qc.getBits()[3].getThingType());
+ assertEquals("STSH", qc.getBits()[3].getBitType());
+ assertEquals(2, qc.getBits()[3].getOptA());
+ }
+
+ public void testText() throws Exception {
+ File f = new File(dir, "Sample.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ QuillContents qc = doc.getQuillContents();
+ assertEquals(20, qc.getBits().length);
+
+ QCTextBit text = (QCTextBit)qc.getBits()[0];
+ String t = text.getText();
+ assertTrue(t.startsWith("This is some text on the first page"));
+ assertTrue(t.endsWith("Within doc to page 1\r"));
+ }
+}