aboutsummaryrefslogtreecommitdiffstats
path: root/src/scratchpad
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-08-30 16:49:07 +0000
committerNick Burch <nick@apache.org>2008-08-30 16:49:07 +0000
commit64ca0135ff942b4a6b68fd6260a595578248ef41 (patch)
tree716e266019eaabbf42f1f5063610882b3a0f2b7c /src/scratchpad
parentadd15697500f68770362f26ee5380397b8e8ddca (diff)
downloadpoi-64ca0135ff942b4a6b68fd6260a595578248ef41.tar.gz
poi-64ca0135ff942b4a6b68fd6260a595578248ef41.zip
Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682532,682534-682619,682622-682777,682779-682998,683000-683019,683021-683022,683024-683080,683082-683092,683094-683095,683097-683127,683129-683131,683133-683166,683168-683698,683700-683705,683707-683757,683759-683787,683789-683870,683872-683879,683881-683900,683902-684066,684068-684074,684076-684222,684224-684254,684257-684281,684283-684286,684288-684292,684294-684298,684300-684301,684303-684308,684310-684317,684320,684323-684335,684337-684348,684350-684354,684356-684361,684363-684369,684371-684453,684455-684883,684885-684937,684940-684958,684960-684970,684972-684985,684987-685053,685055-685063,685065-685259,685261-685262,685264-685266,685268-685282,685285-686035,686037-686045,686047-686052,686054-686206,686208-686215,686217-686277,686279-686289,686291-686620,686622-686623,686626-686627,686629-686639,686641-686843,686845-686976,686978-687402,687404-687422,687424-687428,687430-687442,687444-688425,688427-688641,688643-688649,688651-688654,688656-688824,688826-688909,688911-689543,689545-689558,689560-689635,689637-689703,689705-689715,689717-689718,689720,689722-689972,689974-690090,690092-690093,690095-690111,690113-690258,690260-690261,690263-690517 via svnmerge from
https://svn.apache.org/repos/asf/poi/trunk ........ r690404 | josh | 2008-08-29 23:08:42 +0100 (Fri, 29 Aug 2008) | 1 line Clean-up toString() and inner class ........ r690411 | josh | 2008-08-29 23:21:10 +0100 (Fri, 29 Aug 2008) | 1 line Added ArrayRecord and CellRangeAddress8Bit ........ r690461 | josh | 2008-08-30 05:34:01 +0100 (Sat, 30 Aug 2008) | 1 line Fixed decoding of operand class for ArrayPtg ........ r690517 | nick | 2008-08-30 15:47:33 +0100 (Sat, 30 Aug 2008) | 1 line Various bug fixes, and hpbf updates ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@690533 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java90
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java1
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java13
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java7
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java38
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java34
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java4
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java10
8 files changed, 187 insertions, 10 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
new file mode 100644
index 0000000000..368755efc0
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
@@ -0,0 +1,90 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpbf.dev;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.ddf.DefaultEscherRecordFactory;
+import org.apache.poi.ddf.EscherRecord;
+import org.apache.poi.hpbf.HPBFDocument;
+import org.apache.poi.hpbf.model.QuillContents;
+import org.apache.poi.hpbf.model.qcbits.QCBit;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.HexDump;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.StringUtil;
+
+/**
+ * For dumping out the PLC contents of QC Bits of a
+ * HPBF (Publisher) file, while we try to figure out
+ * what the format of them is.
+ */
+public class PLCDumper {
+ private HPBFDocument doc;
+ private QuillContents qc;
+
+ public PLCDumper(HPBFDocument doc) {
+ this.doc = doc;
+ qc = doc.getQuillContents();
+ }
+ public PLCDumper(POIFSFileSystem fs) throws IOException {
+ this(new HPBFDocument(fs));
+ }
+ public PLCDumper(InputStream inp) throws IOException {
+ this(new POIFSFileSystem(inp));
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" PLCDumper <filename>");
+ System.exit(1);
+ }
+ PLCDumper dump = new PLCDumper(
+ new FileInputStream(args[0])
+ );
+
+ System.out.println("Dumping " + args[0]);
+ dump.dumpPLC();
+ }
+
+ private void dumpPLC() {
+ QuillContents qc = doc.getQuillContents();
+ QCBit[] bits = qc.getBits();
+
+ for(int i=0; i<bits.length; i++) {
+ if(bits[i] == null) continue;
+ if(bits[i].getBitType().equals("PLC ")) {
+ dumpBit(bits[i], i);
+ }
+ }
+ }
+
+ private void dumpBit(QCBit bit, int index) {
+ System.out.println("");
+ System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
+ System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA());
+ System.out.println(" Starts at " + bit.getDataOffset() + " (" + Integer.toHexString(bit.getDataOffset()) + ")");
+ System.out.println(" Runs for " + bit.getLength() + " (" + Integer.toHexString(bit.getLength()) + ")");
+
+ System.out.println(HexDump.dump(bit.getData(), 0, 0));
+ }
+}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java b/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java
index ae626fcae3..b8d4ad298a 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/QuillContents.java
@@ -70,6 +70,7 @@ public final class QuillContents extends HPBFPart {
bits[i].setOptA(optA);
bits[i].setOptB(optB);
bits[i].setOptC(optC);
+ bits[i].setDataOffset(from);
} else {
// Doesn't have data
}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java
index 61c7955f66..d6a5608bb4 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/model/qcbits/QCBit.java
@@ -28,6 +28,8 @@ public abstract class QCBit {
protected int optB;
protected int optC;
+ protected int dataOffset;
+
public QCBit(String thingType, String bitType, byte[] data) {
this.thingType = thingType;
this.bitType = bitType;
@@ -66,4 +68,15 @@ public abstract class QCBit {
public void setOptC(int optC) {
this.optC = optC;
}
+
+ public int getDataOffset() {
+ return dataOffset;
+ }
+ public void setDataOffset(int offset) {
+ this.dataOffset = offset;
+ }
+
+ public int getLength() {
+ return data.length;
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java
index 8300361720..574c065ec3 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java
@@ -167,6 +167,13 @@ public class HeaderStories {
if(stripFields) {
return Range.stripFields(text);
}
+ // If you create a header/footer, then remove it again, word
+ // will leave \r\r. Turn these back into an empty string,
+ // which is more what you'd expect
+ if(text.equals("\r\r")) {
+ return "";
+ }
+
return text;
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java
index 96396e1073..d5b4712227 100644
--- a/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/extractor/TextPublisherTextExtractor.java
@@ -59,11 +59,11 @@ public class TextPublisherTextExtractor extends TestCase {
assertEquals(
"This is some text on the first page\n" +
-"It’s in times new roman, font size 10, all normal\n" +
+"It\u2019s in times new roman, font size 10, all normal\n" +
"" +
"This is in bold and italic\n" +
-"It’s Arial, 20 point font\n" +
-"It’s in the second textbox on the first page\n" +
+"It\u2019s Arial, 20 point font\n" +
+"It\u2019s in the second textbox on the first page\n" +
"" +
"This is the second page\n\n" +
"" +
@@ -102,4 +102,36 @@ public class TextPublisherTextExtractor extends TestCase {
, text
);
}
+
+ /**
+ * We have the same file saved for Publisher 98, Publisher
+ * 2000 and Publisher 2007. Check they all agree.
+ * @throws Exception
+ */
+ public void testMultipleVersions() throws Exception {
+ File f;
+ HPBFDocument doc;
+
+ f = new File(dir, "Sample.pub");
+ doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+ String s2007 = (new PublisherTextExtractor(doc)).getText();
+
+ f = new File(dir, "Sample2000.pub");
+ doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+ String s2000 = (new PublisherTextExtractor(doc)).getText();
+
+ f = new File(dir, "Sample98.pub");
+ doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+ String s98 = (new PublisherTextExtractor(doc)).getText();
+
+ // Check they all agree
+ assertEquals(s2007, s2000);
+ assertEquals(s2007, s98);
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java b/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java
index dbaf46c649..631095007d 100644
--- a/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java
+++ b/src/scratchpad/testcases/org/apache/poi/hpbf/model/TestEscherParts.java
@@ -47,4 +47,38 @@ public class TestEscherParts extends TestCase {
// TODO - check the contents
}
+
+ public void testComplex() throws Exception {
+ File f = new File(dir, "SampleBrochure.pub");
+ HPBFDocument doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ EscherStm es = doc.getEscherStm();
+ EscherDelayStm eds = doc.getEscherDelayStm();
+
+ assertNotNull(es);
+ assertNotNull(eds);
+
+ assertEquals(30, es.getEscherRecords().length);
+ assertEquals(19, eds.getEscherRecords().length);
+
+ // TODO - check contents
+
+
+ // Now do another complex file
+ f = new File(dir, "SampleNewsletter.pub");
+ doc = new HPBFDocument(
+ new FileInputStream(f)
+ );
+
+ es = doc.getEscherStm();
+ eds = doc.getEscherDelayStm();
+
+ assertNotNull(es);
+ assertNotNull(eds);
+
+ assertEquals(51, es.getEscherRecords().length);
+ assertEquals(92, eds.getEscherRecords().length);
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
index 704b4d4dd2..a1b78752f8 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
@@ -183,7 +183,7 @@ public class TestWordExtractor extends TestCase {
extractor = new WordExtractor(doc);
assertEquals(
- "\n\nThis is a simple header, with a \u20ac euro symbol in it.\n\n",
+ "This is a simple header, with a \u20ac euro symbol in it.\n\n",
extractor.getHeaderText()
);
text = extractor.getText();
@@ -217,7 +217,7 @@ public class TestWordExtractor extends TestCase {
extractor = new WordExtractor(doc);
assertEquals(
- "\n\nThe footer, with Moli\u00e8re, has Unicode in it.\n",
+ "The footer, with Moli\u00e8re, has Unicode in it.\n",
extractor.getFooterText()
);
text = extractor.getText();
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java
index 404f6e47a4..e68352b508 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHeaderStories.java
@@ -123,7 +123,7 @@ public class TestHeaderStories extends TestCase {
assertEquals("", hs.getFirstHeader());
assertEquals("", hs.getEvenHeader());
- assertEquals("\r\r", hs.getOddHeader());
+ assertEquals("", hs.getOddHeader()); // Was \r\r but gets emptied
assertEquals("", hs.getFirstFooter());
@@ -181,13 +181,13 @@ public class TestHeaderStories extends TestCase {
public void testUnicode() throws Exception {
HeaderStories hs = new HeaderStories(unicode);
- assertEquals("\r\r", hs.getFirstHeader());
- assertEquals("\r\r", hs.getEvenHeader());
+ assertEquals("", hs.getFirstHeader());
+ assertEquals("", hs.getEvenHeader());
assertEquals("This is a simple header, with a \u20ac euro symbol in it.\r\r\r", hs.getOddHeader());
- assertEquals("\r\r", hs.getFirstFooter());
- assertEquals("\r\r", hs.getEvenFooter());
+ assertEquals("", hs.getFirstFooter());
+ assertEquals("", hs.getEvenFooter());
assertEquals("The footer, with Moli\u00e8re, has Unicode in it.\r\r", hs.getOddFooter());
}