aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPJ Fanning <fanningpj@apache.org>2019-12-23 09:18:38 +0000
committerPJ Fanning <fanningpj@apache.org>2019-12-23 09:18:38 +0000
commit93a7b81ed97adfcc2c50c87b81b1118791bc7d16 (patch)
tree03ae1bc72f86b47fd2756989874e2c340f5a4ad1
parent66471836f584d5c73be18367e1db4c4783b0cb48 (diff)
downloadpoi-93a7b81ed97adfcc2c50c87b81b1118791bc7d16.tar.gz
poi-93a7b81ed97adfcc2c50c87b81b1118791bc7d16.zip
convert some tabs to spaces
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1871921 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java78
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java626
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java82
-rw-r--r--src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java122
-rw-r--r--src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java296
5 files changed, 602 insertions, 602 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java b/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
index 47a39f5588..1f600977c3 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java
@@ -34,49 +34,49 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* file format.
*/
public final class HPBFDocument extends POIReadOnlyDocument {
- private MainContents mainContents;
- private QuillContents quillContents;
- private EscherStm escherStm;
- private EscherDelayStm escherDelayStm;
+ private MainContents mainContents;
+ private QuillContents quillContents;
+ private EscherStm escherStm;
+ private EscherDelayStm escherDelayStm;
- /**
- * Opens a new publisher document
- */
- public HPBFDocument(POIFSFileSystem fs) throws IOException {
- this(fs.getRoot());
- }
+ /**
+ * Opens a new publisher document
+ */
+ public HPBFDocument(POIFSFileSystem fs) throws IOException {
+ this(fs.getRoot());
+ }
- public HPBFDocument(InputStream inp) throws IOException {
- this(new POIFSFileSystem(inp));
- }
+ public HPBFDocument(InputStream inp) throws IOException {
+ this(new POIFSFileSystem(inp));
+ }
- /**
- * Opens an embedded publisher document,
- * at the given directory.
- */
- public HPBFDocument(DirectoryNode dir) throws IOException {
- super(dir);
+ /**
+ * Opens an embedded publisher document,
+ * at the given directory.
+ */
+ public HPBFDocument(DirectoryNode dir) throws IOException {
+ super(dir);
- // Go looking for our interesting child
- // streams
- mainContents = new MainContents(dir);
- quillContents = new QuillContents(dir);
+ // Go looking for our interesting child
+ // streams
+ mainContents = new MainContents(dir);
+ quillContents = new QuillContents(dir);
- // Now the Escher bits
- escherStm = new EscherStm(dir);
- escherDelayStm = new EscherDelayStm(dir);
- }
+ // Now the Escher bits
+ escherStm = new EscherStm(dir);
+ escherDelayStm = new EscherDelayStm(dir);
+ }
- public MainContents getMainContents() {
- return mainContents;
- }
- public QuillContents getQuillContents() {
- return quillContents;
- }
- public EscherStm getEscherStm() {
- return escherStm;
- }
- public EscherDelayStm getEscherDelayStm() {
- return escherDelayStm;
- }
+ public MainContents getMainContents() {
+ return mainContents;
+ }
+ public QuillContents getQuillContents() {
+ return quillContents;
+ }
+ public EscherStm getEscherStm() {
+ return escherStm;
+ }
+ public EscherDelayStm getEscherDelayStm() {
+ return escherDelayStm;
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
index c31ee059be..a638ba9e77 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java
@@ -36,319 +36,319 @@ import org.apache.poi.util.StringUtil;
* constructed.
*/
public final class HPBFDumper {
- private POIFSFileSystem fs;
- public HPBFDumper(POIFSFileSystem fs) {
- this.fs = fs;
- }
-
- @SuppressWarnings("resource")
+ private POIFSFileSystem fs;
+ public HPBFDumper(POIFSFileSystem fs) {
+ this.fs = fs;
+ }
+
+ @SuppressWarnings("resource")
public HPBFDumper(InputStream inp) throws IOException {
- this(new POIFSFileSystem(inp));
- }
-
- private static byte[] getData(DirectoryNode dir, String name) throws IOException {
- // Grab the document stream
- InputStream is = dir.createDocumentInputStream(name);
- byte[] d = IOUtils.toByteArray(is);
- is.close();
-
- // All done
- return d;
- }
-
- /**
- * Dumps out the given number of bytes as hex,
- * two chars
- */
- private String dumpBytes(byte[] data, int offset, int len) {
- StringBuilder ret = new StringBuilder();
- for(int i=0; i<len; i++) {
- int j = i + offset;
- int b = data[j];
- if(b < 0) { b += 256; }
-
- String bs = Integer.toHexString(b);
- if(bs.length() == 1)
- ret.append('0');
- ret.append(bs);
- ret.append(' ');
- }
- return ret.toString();
- }
-
- @SuppressWarnings("resource")
+ this(new POIFSFileSystem(inp));
+ }
+
+ private static byte[] getData(DirectoryNode dir, String name) throws IOException {
+ // Grab the document stream
+ InputStream is = dir.createDocumentInputStream(name);
+ byte[] d = IOUtils.toByteArray(is);
+ is.close();
+
+ // All done
+ return d;
+ }
+
+ /**
+ * Dumps out the given number of bytes as hex,
+ * two chars
+ */
+ private String dumpBytes(byte[] data, int offset, int len) {
+ StringBuilder ret = new StringBuilder();
+ for(int i=0; i<len; i++) {
+ int j = i + offset;
+ int b = data[j];
+ if(b < 0) { b += 256; }
+
+ String bs = Integer.toHexString(b);
+ if(bs.length() == 1)
+ ret.append('0');
+ ret.append(bs);
+ ret.append(' ');
+ }
+ return ret.toString();
+ }
+
+ @SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HPBFDumper <filename>");
- System.exit(1);
- }
- HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0])));
-
- System.out.println("Dumping " + args[0]);
- dump.dumpContents();
- dump.dumpEnvelope();
- dump.dumpEscher();
- dump.dump001CompObj(dump.fs.getRoot());
- dump.dumpQuill();
-
- // Still to go:
- // (0x03)Internal
- // Objects
- }
-
- /**
- * Dump out the escher parts of the file.
- * Escher -> EscherStm and EscherDelayStm
- */
- public void dumpEscher() throws IOException {
- DirectoryNode escherDir = (DirectoryNode)
- fs.getRoot().getEntry("Escher");
-
- dumpEscherStm(escherDir);
- dumpEscherDelayStm(escherDir);
- }
- private void dumpEscherStream(byte[] data) {
- DefaultEscherRecordFactory erf =
- new DefaultEscherRecordFactory();
-
- // Dump
- int left = data.length;
- while(left > 0) {
- EscherRecord er = erf.createRecord(data, 0);
- er.fillFields(data, 0, erf);
- left -= er.getRecordSize();
-
- System.out.println(er);
- }
- }
- protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
- byte[] data = getData(escherDir, "EscherStm");
- System.out.println();
- System.out.println("EscherStm - " + data.length + " bytes long:");
- if(data.length > 0)
- dumpEscherStream(data);
- }
- protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
- byte[] data = getData(escherDir, "EscherDelayStm");
- System.out.println();
- System.out.println("EscherDelayStm - " + data.length + " bytes long:");
- if(data.length > 0)
- dumpEscherStream(data);
- }
-
- public void dumpEnvelope() throws IOException {
- byte[] data = getData(fs.getRoot(), "Envelope");
-
- System.out.println();
- System.out.println("Envelope - " + data.length + " bytes long:");
- }
-
- public void dumpContents() throws IOException {
- byte[] data = getData(fs.getRoot(), "Contents");
-
- System.out.println();
- System.out.println("Contents - " + data.length + " bytes long:");
-
- // 8 bytes, always seems to be
- // E8 AC 2C 00 E8 03 05 01
- // E8 AC 2C 00 E8 03 05 01
-
- // 4 bytes - size of contents
- // 13/15 00 00 01
-
- // ....
-
- // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
-
- // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
-
- // 01 18 30 00 03 20 00 00
- // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
- // 00 00 00 88 1E 00 00 00
-
- // 01 18 31 00 03 20 00 00
- // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
- // 00 00 00 88 1E 00 00 00
-
- // 01 18 32 00 03 20 00 00
- // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
- // 00 00 00 88 1E 00 00 00
- }
-
- public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
- byte[] data = getData(dir, "CONTENTS");
-
- System.out.println();
- System.out.println("CONTENTS - " + data.length + " bytes long:");
-
- // Between the start and 0x200 we have
- // CHNKINK(space) + 24 bytes
- // 0x1800
- // TEXT + 6 bytes
- // TEXT + 8 bytes
- // 0x1800
- // STSH + 6 bytes
- // STSH + 8 bytes
- // 0x1800
- // STSH + 6 bytes
- // STSH + 8 bytes
- // but towards 0x200 the pattern may
- // break down a little bit
-
- // After the second of a given type,
- // it seems to be 4 bytes giving the start,
- // then 4 bytes giving the length, then
- // 18 00
- System.out.println(
- new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
- dumpBytes(data, 8, 0x20-8)
- );
-
- int pos = 0x20;
- boolean sixNotEight = true;
- while(pos < 0x200) {
- if(sixNotEight) {
- System.out.println(
- dumpBytes(data, pos, 2)
- );
- pos += 2;
- }
- String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
- int blen = 8;
- if(sixNotEight)
- blen = 6;
- System.out.println(
- text + " " + dumpBytes(data, pos+4, blen)
- );
-
- pos += 4 + blen;
- sixNotEight = ! sixNotEight;
- }
-
- // Text from 0x200 onwards until we get
- // to \r(00)\n(00)(00)(00)
- int textStop = -1;
- for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
- if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
- textStop = i;
- }
- }
- if(textStop > 0) {
- int len = (textStop - 0x200) / 2;
- System.out.println();
- System.out.println(
- StringUtil.getFromUnicodeLE(data, 0x200, len)
- );
- }
-
- // The font list comes slightly later
-
- // The hyperlinks may come before the fonts,
- // or slightly in front
- }
- public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
- byte[] data = getData(dir, "CONTENTS");
-
- System.out.println();
- System.out.println("CONTENTS - " + data.length + " bytes long:");
-
- String[] startType = new String[20];
- String[] endType = new String[20];
- int[] optA = new int[20];
- int[] optB = new int[20];
- int[] optC = new int[20];
- int[] from = new int[20];
- int[] len = new int[20];
-
- for(int i=0; i<20; i++) {
- int offset = 0x20 + i*24;
- if(data[offset] == 0x18 && data[offset+1] == 0x00) {
- // Has data
- startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252);
- optA[i] = LittleEndian.getUShort(data, offset+6);
- optB[i] = LittleEndian.getUShort(data, offset+8);
- optC[i] = LittleEndian.getUShort(data, offset+10);
- endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252);
- from[i] = (int)LittleEndian.getUInt(data, offset+16);
- len[i] = (int)LittleEndian.getUInt(data, offset+20);
- } else {
- // Doesn't have data
- }
- }
-
- String text = StringUtil.getFromUnicodeLE(
- data, from[0], len[0]/2
- );
-
- // Dump
- for(int i=0; i<20; i++) {
- String num = Integer.toString(i);
- if(i < 10) {
- num = "0" + i;
- }
- System.out.print(num + " ");
-
- if(startType[i] == null) {
- System.out.println("(not present)");
- } else {
- System.out.println(
- "\t" +
- startType[i] + " " +
- optA[i] + " " +
- optB[i] + " " +
- optC[i]
- );
- System.out.println(
- "\t" +
- endType[i] + " " +
- "from: " +
- Integer.toHexString(from[i]) +
- " (" + from[i] + ")" +
- ", len: " +
- Integer.toHexString(len[i]) +
- " (" + len[i] + ")"
- );
- }
- }
-
- // Text
- System.out.println();
- System.out.println("TEXT:");
- System.out.println(text);
- System.out.println();
-
- // All the others
- for(int i=0; i<20; i++) {
- if(startType[i] == null) {
- continue;
- }
- int start = from[i];
-
- System.out.println(
- startType[i] + " -> " + endType[i] +
- " @ " + Integer.toHexString(start) +
- " (" + start + ")"
- );
- System.out.println("\t" + dumpBytes(data, start, 4));
- System.out.println("\t" + dumpBytes(data, start+4, 4));
- System.out.println("\t" + dumpBytes(data, start+8, 4));
- System.out.println("\t(etc)");
- }
- }
-
- protected void dump001CompObj(DirectoryNode dir) {
- // TODO
- }
-
- public void dumpQuill() throws IOException {
- DirectoryNode quillDir = (DirectoryNode)
- fs.getRoot().getEntry("Quill");
- DirectoryNode quillSubDir = (DirectoryNode)
- quillDir.getEntry("QuillSub");
-
- dump001CompObj(quillSubDir);
- dumpCONTENTSraw(quillSubDir);
- dumpCONTENTSguessed(quillSubDir);
- }
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" HPBFDumper <filename>");
+ System.exit(1);
+ }
+ HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0])));
+
+ System.out.println("Dumping " + args[0]);
+ dump.dumpContents();
+ dump.dumpEnvelope();
+ dump.dumpEscher();
+ dump.dump001CompObj(dump.fs.getRoot());
+ dump.dumpQuill();
+
+ // Still to go:
+ // (0x03)Internal
+ // Objects
+ }
+
+ /**
+ * Dump out the escher parts of the file.
+ * Escher -> EscherStm and EscherDelayStm
+ */
+ public void dumpEscher() throws IOException {
+ DirectoryNode escherDir = (DirectoryNode)
+ fs.getRoot().getEntry("Escher");
+
+ dumpEscherStm(escherDir);
+ dumpEscherDelayStm(escherDir);
+ }
+ private void dumpEscherStream(byte[] data) {
+ DefaultEscherRecordFactory erf =
+ new DefaultEscherRecordFactory();
+
+ // Dump
+ int left = data.length;
+ while(left > 0) {
+ EscherRecord er = erf.createRecord(data, 0);
+ er.fillFields(data, 0, erf);
+ left -= er.getRecordSize();
+
+ System.out.println(er);
+ }
+ }
+ protected void dumpEscherStm(DirectoryNode escherDir) throws IOException {
+ byte[] data = getData(escherDir, "EscherStm");
+ System.out.println();
+ System.out.println("EscherStm - " + data.length + " bytes long:");
+ if(data.length > 0)
+ dumpEscherStream(data);
+ }
+ protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException {
+ byte[] data = getData(escherDir, "EscherDelayStm");
+ System.out.println();
+ System.out.println("EscherDelayStm - " + data.length + " bytes long:");
+ if(data.length > 0)
+ dumpEscherStream(data);
+ }
+
+ public void dumpEnvelope() throws IOException {
+ byte[] data = getData(fs.getRoot(), "Envelope");
+
+ System.out.println();
+ System.out.println("Envelope - " + data.length + " bytes long:");
+ }
+
+ public void dumpContents() throws IOException {
+ byte[] data = getData(fs.getRoot(), "Contents");
+
+ System.out.println();
+ System.out.println("Contents - " + data.length + " bytes long:");
+
+ // 8 bytes, always seems to be
+ // E8 AC 2C 00 E8 03 05 01
+ // E8 AC 2C 00 E8 03 05 01
+
+ // 4 bytes - size of contents
+ // 13/15 00 00 01
+
+ // ....
+
+ // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... ..........
+
+ // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... ..
+
+ // 01 18 30 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+
+ // 01 18 31 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+
+ // 01 18 32 00 03 20 00 00
+ // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00
+ // 00 00 00 88 1E 00 00 00
+ }
+
+ public void dumpCONTENTSraw(DirectoryNode dir) throws IOException {
+ byte[] data = getData(dir, "CONTENTS");
+
+ System.out.println();
+ System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+ // Between the start and 0x200 we have
+ // CHNKINK(space) + 24 bytes
+ // 0x1800
+ // TEXT + 6 bytes
+ // TEXT + 8 bytes
+ // 0x1800
+ // STSH + 6 bytes
+ // STSH + 8 bytes
+ // 0x1800
+ // STSH + 6 bytes
+ // STSH + 8 bytes
+ // but towards 0x200 the pattern may
+ // break down a little bit
+
+ // After the second of a given type,
+ // it seems to be 4 bytes giving the start,
+ // then 4 bytes giving the length, then
+ // 18 00
+ System.out.println(
+ new String(data, 0, 8, LocaleUtil.CHARSET_1252) +
+ dumpBytes(data, 8, 0x20-8)
+ );
+
+ int pos = 0x20;
+ boolean sixNotEight = true;
+ while(pos < 0x200) {
+ if(sixNotEight) {
+ System.out.println(
+ dumpBytes(data, pos, 2)
+ );
+ pos += 2;
+ }
+ String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252);
+ int blen = 8;
+ if(sixNotEight)
+ blen = 6;
+ System.out.println(
+ text + " " + dumpBytes(data, pos+4, blen)
+ );
+
+ pos += 4 + blen;
+ sixNotEight = ! sixNotEight;
+ }
+
+ // Text from 0x200 onwards until we get
+ // to \r(00)\n(00)(00)(00)
+ int textStop = -1;
+ for(int i=0x200; i<data.length-2 && textStop == -1; i++) {
+ if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) {
+ textStop = i;
+ }
+ }
+ if(textStop > 0) {
+ int len = (textStop - 0x200) / 2;
+ System.out.println();
+ System.out.println(
+ StringUtil.getFromUnicodeLE(data, 0x200, len)
+ );
+ }
+
+ // The font list comes slightly later
+
+ // The hyperlinks may come before the fonts,
+ // or slightly in front
+ }
+ public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException {
+ byte[] data = getData(dir, "CONTENTS");
+
+ System.out.println();
+ System.out.println("CONTENTS - " + data.length + " bytes long:");
+
+ String[] startType = new String[20];
+ String[] endType = new String[20];
+ int[] optA = new int[20];
+ int[] optB = new int[20];
+ int[] optC = new int[20];
+ int[] from = new int[20];
+ int[] len = new int[20];
+
+ for(int i=0; i<20; i++) {
+ int offset = 0x20 + i*24;
+ if(data[offset] == 0x18 && data[offset+1] == 0x00) {
+ // Has data
+ startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252);
+ optA[i] = LittleEndian.getUShort(data, offset+6);
+ optB[i] = LittleEndian.getUShort(data, offset+8);
+ optC[i] = LittleEndian.getUShort(data, offset+10);
+ endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252);
+ from[i] = (int)LittleEndian.getUInt(data, offset+16);
+ len[i] = (int)LittleEndian.getUInt(data, offset+20);
+ } else {
+ // Doesn't have data
+ }
+ }
+
+ String text = StringUtil.getFromUnicodeLE(
+ data, from[0], len[0]/2
+ );
+
+ // Dump
+ for(int i=0; i<20; i++) {
+ String num = Integer.toString(i);
+ if(i < 10) {
+ num = "0" + i;
+ }
+ System.out.print(num + " ");
+
+ if(startType[i] == null) {
+ System.out.println("(not present)");
+ } else {
+ System.out.println(
+ "\t" +
+ startType[i] + " " +
+ optA[i] + " " +
+ optB[i] + " " +
+ optC[i]
+ );
+ System.out.println(
+ "\t" +
+ endType[i] + " " +
+ "from: " +
+ Integer.toHexString(from[i]) +
+ " (" + from[i] + ")" +
+ ", len: " +
+ Integer.toHexString(len[i]) +
+ " (" + len[i] + ")"
+ );
+ }
+ }
+
+ // Text
+ System.out.println();
+ System.out.println("TEXT:");
+ System.out.println(text);
+ System.out.println();
+
+ // All the others
+ for(int i=0; i<20; i++) {
+ if(startType[i] == null) {
+ continue;
+ }
+ int start = from[i];
+
+ System.out.println(
+ startType[i] + " -> " + endType[i] +
+ " @ " + Integer.toHexString(start) +
+ " (" + start + ")"
+ );
+ System.out.println("\t" + dumpBytes(data, start, 4));
+ System.out.println("\t" + dumpBytes(data, start+4, 4));
+ System.out.println("\t" + dumpBytes(data, start+8, 4));
+ System.out.println("\t(etc)");
+ }
+ }
+
+ protected void dump001CompObj(DirectoryNode dir) {
+ // TODO
+ }
+
+ public void dumpQuill() throws IOException {
+ DirectoryNode quillDir = (DirectoryNode)
+ fs.getRoot().getEntry("Quill");
+ DirectoryNode quillSubDir = (DirectoryNode)
+ quillDir.getEntry("QuillSub");
+
+ dump001CompObj(quillSubDir);
+ dumpCONTENTSraw(quillSubDir);
+ dumpCONTENTSguessed(quillSubDir);
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
index 4438111e4e..3c222d51b2 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java
@@ -33,53 +33,53 @@ import org.apache.poi.util.HexDump;
* what the format of them is.
*/
public final class PLCDumper {
- private HPBFDocument doc;
- private QuillContents qc;
+ private HPBFDocument doc;
+ private QuillContents qc;
- public PLCDumper(HPBFDocument hpbfDoc) {
- doc = hpbfDoc;
- qc = doc.getQuillContents();
- }
- public PLCDumper(POIFSFileSystem fs) throws IOException {
- this(new HPBFDocument(fs));
- }
- public PLCDumper(InputStream inp) throws IOException {
- this(new POIFSFileSystem(inp));
- }
+ public PLCDumper(HPBFDocument hpbfDoc) {
+ doc = hpbfDoc;
+ qc = doc.getQuillContents();
+ }
+ public PLCDumper(POIFSFileSystem fs) throws IOException {
+ this(new HPBFDocument(fs));
+ }
+ public PLCDumper(InputStream inp) throws IOException {
+ this(new POIFSFileSystem(inp));
+ }
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" PLCDumper <filename>");
- System.exit(1);
- }
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" PLCDumper <filename>");
+ System.exit(1);
+ }
- try (FileInputStream fis = new FileInputStream(args[0])) {
- PLCDumper dump = new PLCDumper(fis);
+ try (FileInputStream fis = new FileInputStream(args[0])) {
+ PLCDumper dump = new PLCDumper(fis);
- System.out.println("Dumping " + args[0]);
- dump.dumpPLC();
- }
- }
+ System.out.println("Dumping " + args[0]);
+ dump.dumpPLC();
+ }
+ }
- private void dumpPLC() {
- QCBit[] bits = qc.getBits();
+ private void dumpPLC() {
+ QCBit[] bits = qc.getBits();
- for(int i=0; i<bits.length; i++) {
- if(bits[i] == null) continue;
- if(bits[i].getBitType().equals("PLC ")) {
- dumpBit(bits[i], i);
- }
- }
- }
+ for(int i=0; i<bits.length; i++) {
+ if(bits[i] == null) continue;
+ if(bits[i].getBitType().equals("PLC ")) {
+ dumpBit(bits[i], i);
+ }
+ }
+ }
- private void dumpBit(QCBit bit, int index) {
- System.out.println();
- System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
- System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA());
- System.out.println(" Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")");
- System.out.println(" Runs for " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")");
+ private void dumpBit(QCBit bit, int index) {
+ System.out.println();
+ System.out.println("Dumping " + bit.getBitType() + " bit at " + index);
+ System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA());
+ System.out.println(" Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")");
+ System.out.println(" Runs for " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")");
- System.out.println(HexDump.dump(bit.getData(), 0, 0));
- }
+ System.out.println(HexDump.dump(bit.getData(), 0, 0));
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
index 27f62aeeec..bd442b8da6 100644
--- a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
@@ -50,65 +50,65 @@ public final class PublisherTextExtractor extends POIOLE2TextExtractor {
this(new POIFSFileSystem(is));
}
- /**
- * Should a call to getText() return hyperlinks inline
- * with the text?
- * Default is no
- */
- public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
- this.hyperlinksByDefault = hyperlinksByDefault;
- }
-
-
- public String getText() {
- StringBuilder text = new StringBuilder();
-
- // Get the text from the Quill Contents
- QCBit[] bits = doc.getQuillContents().getBits();
- for (QCBit bit1 : bits) {
- if (bit1 != null && bit1 instanceof QCTextBit) {
- QCTextBit t = (QCTextBit) bit1;
- text.append(t.getText().replace('\r', '\n'));
- }
- }
-
- // If requested, add in the hyperlinks
- // Ideally, we'd do these inline, but the hyperlink
- // positions are relative to the text area the
- // hyperlink is in, and we have yet to figure out
- // how to tie that together.
- if(hyperlinksByDefault) {
- for (QCBit bit : bits) {
- if (bit != null && bit instanceof Type12) {
- Type12 hyperlinks = (Type12) bit;
- for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
- text.append("<");
- text.append(hyperlinks.getHyperlink(j));
- text.append(">\n");
- }
- }
- }
- }
-
- // Get more text
- // TODO
-
- return text.toString();
- }
-
-
- public static void main(String[] args) throws Exception {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println(" PublisherTextExtractor <file.pub>");
- }
-
- for (String arg : args) {
- try (FileInputStream fis = new FileInputStream(arg)) {
- PublisherTextExtractor te = new PublisherTextExtractor(fis);
- System.out.println(te.getText());
- te.close();
- }
- }
- }
+ /**
+ * Should a call to getText() return hyperlinks inline
+ * with the text?
+ * Default is no
+ */
+ public void setHyperlinksByDefault(boolean hyperlinksByDefault) {
+ this.hyperlinksByDefault = hyperlinksByDefault;
+ }
+
+
+ public String getText() {
+ StringBuilder text = new StringBuilder();
+
+ // Get the text from the Quill Contents
+ QCBit[] bits = doc.getQuillContents().getBits();
+ for (QCBit bit1 : bits) {
+ if (bit1 != null && bit1 instanceof QCTextBit) {
+ QCTextBit t = (QCTextBit) bit1;
+ text.append(t.getText().replace('\r', '\n'));
+ }
+ }
+
+ // If requested, add in the hyperlinks
+ // Ideally, we'd do these inline, but the hyperlink
+ // positions are relative to the text area the
+ // hyperlink is in, and we have yet to figure out
+ // how to tie that together.
+ if(hyperlinksByDefault) {
+ for (QCBit bit : bits) {
+ if (bit != null && bit instanceof Type12) {
+ Type12 hyperlinks = (Type12) bit;
+ for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
+ text.append("<");
+ text.append(hyperlinks.getHyperlink(j));
+ text.append(">\n");
+ }
+ }
+ }
+ }
+
+ // Get more text
+ // TODO
+
+ return text.toString();
+ }
+
+
+ public static void main(String[] args) throws Exception {
+ if(args.length == 0) {
+ System.err.println("Use:");
+ System.err.println(" PublisherTextExtractor <file.pub>");
+ }
+
+ for (String arg : args) {
+ try (FileInputStream fis = new FileInputStream(arg)) {
+ PublisherTextExtractor te = new PublisherTextExtractor(fis);
+ System.out.println(te.getText());
+ te.close();
+ }
+ }
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
index 9005e65fbd..3c92da1a3c 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java
@@ -53,158 +53,158 @@ import org.apache.poi.util.LittleEndian;
* lucene indexers) that would ever want to use this!
*/
public final class QuickButCruddyTextExtractor {
- private POIFSFileSystem fs;
- private InputStream is;
- private byte[] pptContents;
-
- /**
- * Really basic text extractor, that will also return lots of crud text.
- * Takes a single argument, the file to extract from
- */
- public static void main(String[] args) throws IOException
- {
- if(args.length < 1) {
- System.err.println("Useage:");
- System.err.println("\tQuickButCruddyTextExtractor <file>");
- System.exit(1);
- }
-
- String file = args[0];
-
- QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file);
- System.out.println(ppe.getTextAsString());
- ppe.close();
- }
-
- /**
- * Creates an extractor from a given file name
- * @param fileName
- */
- @SuppressWarnings("resource")
+ private POIFSFileSystem fs;
+ private InputStream is;
+ private byte[] pptContents;
+
+ /**
+ * Really basic text extractor, that will also return lots of crud text.
+ * Takes a single argument, the file to extract from
+ */
+ public static void main(String[] args) throws IOException
+ {
+ if(args.length < 1) {
+ System.err.println("Useage:");
+ System.err.println("\tQuickButCruddyTextExtractor <file>");
+ System.exit(1);
+ }
+
+ String file = args[0];
+
+ QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file);
+ System.out.println(ppe.getTextAsString());
+ ppe.close();
+ }
+
+ /**
+ * Creates an extractor from a given file name
+ * @param fileName
+ */
+ @SuppressWarnings("resource")
public QuickButCruddyTextExtractor(String fileName) throws IOException {
- this(new POIFSFileSystem(new File(fileName)));
- }
+ this(new POIFSFileSystem(new File(fileName)));
+ }
- /**
- * Creates an extractor from a given input stream
- * @param iStream
- */
+ /**
+ * Creates an extractor from a given input stream
+ * @param iStream
+ */
@SuppressWarnings("resource")
- public QuickButCruddyTextExtractor(InputStream iStream) throws IOException {
- this(new POIFSFileSystem(iStream));
- is = iStream;
- }
-
- /**
- * Creates an extractor from a POIFS Filesystem
- * @param poifs
- */
- public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException {
- fs = poifs;
-
- // Find the PowerPoint bit, and get out the bytes
- InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
- pptContents = IOUtils.toByteArray(pptIs);
- pptIs.close();
- }
-
-
- /**
- * Shuts down the underlying streams
- */
- public void close() throws IOException {
- if(is != null) { is.close(); }
- fs = null;
- }
-
- /**
- * Fetches the ALL the text of the powerpoint file, as a single string
- */
- public String getTextAsString() {
- StringBuilder ret = new StringBuilder();
- List<String> textV = getTextAsVector();
- for(String text : textV) {
- ret.append(text);
- if(! text.endsWith("\n")) {
- ret.append('\n');
- }
- }
- return ret.toString();
- }
-
- /**
- * Fetches the ALL the text of the powerpoint file, in a List of
- * strings, one per text record
- */
- public List<String> getTextAsVector() {
- List<String> textV = new ArrayList<>();
-
- // Set to the start of the file
- int walkPos = 0;
-
- // Start walking the file, looking for the records
- while(walkPos != -1) {
+ public QuickButCruddyTextExtractor(InputStream iStream) throws IOException {
+ this(new POIFSFileSystem(iStream));
+ is = iStream;
+ }
+
+ /**
+ * Creates an extractor from a POIFS Filesystem
+ * @param poifs
+ */
+ public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException {
+ fs = poifs;
+
+ // Find the PowerPoint bit, and get out the bytes
+ InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT);
+ pptContents = IOUtils.toByteArray(pptIs);
+ pptIs.close();
+ }
+
+
+ /**
+ * Shuts down the underlying streams
+ */
+ public void close() throws IOException {
+ if(is != null) { is.close(); }
+ fs = null;
+ }
+
+ /**
+ * Fetches the ALL the text of the powerpoint file, as a single string
+ */
+ public String getTextAsString() {
+ StringBuilder ret = new StringBuilder();
+ List<String> textV = getTextAsVector();
+ for(String text : textV) {
+ ret.append(text);
+ if(! text.endsWith("\n")) {
+ ret.append('\n');
+ }
+ }
+ return ret.toString();
+ }
+
+ /**
+ * Fetches the ALL the text of the powerpoint file, in a List of
+ * strings, one per text record
+ */
+ public List<String> getTextAsVector() {
+ List<String> textV = new ArrayList<>();
+
+ // Set to the start of the file
+ int walkPos = 0;
+
+ // Start walking the file, looking for the records
+ while(walkPos != -1) {
walkPos = findTextRecords(walkPos,textV);
- }
-
- // Return what we find
- return textV;
- }
-
- /**
- * For the given position, look if the record is a text record, and wind
- * on after.
- * If it is a text record, grabs out the text. Whatever happens, returns
- * the position of the next record, or -1 if no more.
- */
- public int findTextRecords(int startPos, List<String> textV) {
- // Grab the length, and the first option byte
- // Note that the length doesn't include the 8 byte atom header
- int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
- byte opt = pptContents[startPos];
-
- // If it's a container, step into it and return
- // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
- int container = opt & 0x0f;
- if(container == 0x0f) {
- return (startPos+8);
- }
-
- // Otherwise, check the type to see if it's text
- int type = LittleEndian.getUShort(pptContents,startPos+2);
-
- // TextBytesAtom
- if(type == RecordTypes.TextBytesAtom.typeID) {
- TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
- String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
- textV.add(text);
- }
- // TextCharsAtom
- if(type == RecordTypes.TextCharsAtom.typeID) {
- TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
+ }
+
+ // Return what we find
+ return textV;
+ }
+
+ /**
+ * For the given position, look if the record is a text record, and wind
+ * on after.
+ * If it is a text record, grabs out the text. Whatever happens, returns
+ * the position of the next record, or -1 if no more.
+ */
+ public int findTextRecords(int startPos, List<String> textV) {
+ // Grab the length, and the first option byte
+ // Note that the length doesn't include the 8 byte atom header
+ int len = (int)LittleEndian.getUInt(pptContents,startPos+4);
+ byte opt = pptContents[startPos];
+
+ // If it's a container, step into it and return
+ // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f)
+ int container = opt & 0x0f;
+ if(container == 0x0f) {
+ return (startPos+8);
+ }
+
+ // Otherwise, check the type to see if it's text
+ int type = LittleEndian.getUShort(pptContents,startPos+2);
+
+ // TextBytesAtom
+ if(type == RecordTypes.TextBytesAtom.typeID) {
+ TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
+ String text = HSLFTextParagraph.toExternalString(tba.getText(), -1);
+ textV.add(text);
+ }
+ // TextCharsAtom
+ if(type == RecordTypes.TextCharsAtom.typeID) {
+ TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8);
String text = HSLFTextParagraph.toExternalString(tca.getText(), -1);
textV.add(text);
- }
-
- // CString (doesn't go via a TextRun)
- if(type == RecordTypes.CString.typeID) {
- CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8);
- String text = cs.getText();
-
- // Ignore the ones we know to be rubbish
- if(text.equals("___PPT10")) {
- } else if(text.equals("Default Design")) {
- } else {
- textV.add(text);
- }
- }
-
-
- // Wind on by the atom length, and check we're not at the end
- int newPos = (startPos + 8 + len);
- if(newPos > (pptContents.length - 8)) {
- newPos = -1;
- }
- return newPos;
- }
+ }
+
+ // CString (doesn't go via a TextRun)
+ if(type == RecordTypes.CString.typeID) {
+ CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8);
+ String text = cs.getText();
+
+ // Ignore the ones we know to be rubbish
+ if(text.equals("___PPT10")) {
+ } else if(text.equals("Default Design")) {
+ } else {
+ textV.add(text);
+ }
+ }
+
+
+ // Wind on by the atom length, and check we're not at the end
+ int newPos = (startPos + 8 + len);
+ if(newPos > (pptContents.length - 8)) {
+ newPos = -1;
+ }
+ return newPos;
+ }
}