diff options
author | PJ Fanning <fanningpj@apache.org> | 2019-12-23 09:18:38 +0000 |
---|---|---|
committer | PJ Fanning <fanningpj@apache.org> | 2019-12-23 09:18:38 +0000 |
commit | 93a7b81ed97adfcc2c50c87b81b1118791bc7d16 (patch) | |
tree | 03ae1bc72f86b47fd2756989874e2c340f5a4ad1 | |
parent | 66471836f584d5c73be18367e1db4c4783b0cb48 (diff) | |
download | poi-93a7b81ed97adfcc2c50c87b81b1118791bc7d16.tar.gz poi-93a7b81ed97adfcc2c50c87b81b1118791bc7d16.zip |
convert some tabs to spaces
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1871921 13f79535-47bb-0310-9956-ffa450edef68
5 files changed, 602 insertions, 602 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java b/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java index 47a39f5588..1f600977c3 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/HPBFDocument.java @@ -34,49 +34,49 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * file format. */ public final class HPBFDocument extends POIReadOnlyDocument { - private MainContents mainContents; - private QuillContents quillContents; - private EscherStm escherStm; - private EscherDelayStm escherDelayStm; + private MainContents mainContents; + private QuillContents quillContents; + private EscherStm escherStm; + private EscherDelayStm escherDelayStm; - /** - * Opens a new publisher document - */ - public HPBFDocument(POIFSFileSystem fs) throws IOException { - this(fs.getRoot()); - } + /** + * Opens a new publisher document + */ + public HPBFDocument(POIFSFileSystem fs) throws IOException { + this(fs.getRoot()); + } - public HPBFDocument(InputStream inp) throws IOException { - this(new POIFSFileSystem(inp)); - } + public HPBFDocument(InputStream inp) throws IOException { + this(new POIFSFileSystem(inp)); + } - /** - * Opens an embedded publisher document, - * at the given directory. - */ - public HPBFDocument(DirectoryNode dir) throws IOException { - super(dir); + /** + * Opens an embedded publisher document, + * at the given directory. + */ + public HPBFDocument(DirectoryNode dir) throws IOException { + super(dir); - // Go looking for our interesting child - // streams - mainContents = new MainContents(dir); - quillContents = new QuillContents(dir); + // Go looking for our interesting child + // streams + mainContents = new MainContents(dir); + quillContents = new QuillContents(dir); - // Now the Escher bits - escherStm = new EscherStm(dir); - escherDelayStm = new EscherDelayStm(dir); - } + // Now the Escher bits + escherStm = new EscherStm(dir); + escherDelayStm = new EscherDelayStm(dir); + } - public MainContents getMainContents() { - return mainContents; - } - public QuillContents getQuillContents() { - return quillContents; - } - public EscherStm getEscherStm() { - return escherStm; - } - public EscherDelayStm getEscherDelayStm() { - return escherDelayStm; - } + public MainContents getMainContents() { + return mainContents; + } + public QuillContents getQuillContents() { + return quillContents; + } + public EscherStm getEscherStm() { + return escherStm; + } + public EscherDelayStm getEscherDelayStm() { + return escherDelayStm; + } } diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java index c31ee059be..a638ba9e77 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/HPBFDumper.java @@ -36,319 +36,319 @@ import org.apache.poi.util.StringUtil; * constructed. */ public final class HPBFDumper { - private POIFSFileSystem fs; - public HPBFDumper(POIFSFileSystem fs) { - this.fs = fs; - } - - @SuppressWarnings("resource") + private POIFSFileSystem fs; + public HPBFDumper(POIFSFileSystem fs) { + this.fs = fs; + } + + @SuppressWarnings("resource") public HPBFDumper(InputStream inp) throws IOException { - this(new POIFSFileSystem(inp)); - } - - private static byte[] getData(DirectoryNode dir, String name) throws IOException { - // Grab the document stream - InputStream is = dir.createDocumentInputStream(name); - byte[] d = IOUtils.toByteArray(is); - is.close(); - - // All done - return d; - } - - /** - * Dumps out the given number of bytes as hex, - * two chars - */ - private String dumpBytes(byte[] data, int offset, int len) { - StringBuilder ret = new StringBuilder(); - for(int i=0; i<len; i++) { - int j = i + offset; - int b = data[j]; - if(b < 0) { b += 256; } - - String bs = Integer.toHexString(b); - if(bs.length() == 1) - ret.append('0'); - ret.append(bs); - ret.append(' '); - } - return ret.toString(); - } - - @SuppressWarnings("resource") + this(new POIFSFileSystem(inp)); + } + + private static byte[] getData(DirectoryNode dir, String name) throws IOException { + // Grab the document stream + InputStream is = dir.createDocumentInputStream(name); + byte[] d = IOUtils.toByteArray(is); + is.close(); + + // All done + return d; + } + + /** + * Dumps out the given number of bytes as hex, + * two chars + */ + private String dumpBytes(byte[] data, int offset, int len) { + StringBuilder ret = new StringBuilder(); + for(int i=0; i<len; i++) { + int j = i + offset; + int b = data[j]; + if(b < 0) { b += 256; } + + String bs = Integer.toHexString(b); + if(bs.length() == 1) + ret.append('0'); + ret.append(bs); + ret.append(' '); + } + return ret.toString(); + } + + @SuppressWarnings("resource") public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HPBFDumper <filename>"); - System.exit(1); - } - HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0]))); - - System.out.println("Dumping " + args[0]); - dump.dumpContents(); - dump.dumpEnvelope(); - dump.dumpEscher(); - dump.dump001CompObj(dump.fs.getRoot()); - dump.dumpQuill(); - - // Still to go: - // (0x03)Internal - // Objects - } - - /** - * Dump out the escher parts of the file. - * Escher -> EscherStm and EscherDelayStm - */ - public void dumpEscher() throws IOException { - DirectoryNode escherDir = (DirectoryNode) - fs.getRoot().getEntry("Escher"); - - dumpEscherStm(escherDir); - dumpEscherDelayStm(escherDir); - } - private void dumpEscherStream(byte[] data) { - DefaultEscherRecordFactory erf = - new DefaultEscherRecordFactory(); - - // Dump - int left = data.length; - while(left > 0) { - EscherRecord er = erf.createRecord(data, 0); - er.fillFields(data, 0, erf); - left -= er.getRecordSize(); - - System.out.println(er); - } - } - protected void dumpEscherStm(DirectoryNode escherDir) throws IOException { - byte[] data = getData(escherDir, "EscherStm"); - System.out.println(); - System.out.println("EscherStm - " + data.length + " bytes long:"); - if(data.length > 0) - dumpEscherStream(data); - } - protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException { - byte[] data = getData(escherDir, "EscherDelayStm"); - System.out.println(); - System.out.println("EscherDelayStm - " + data.length + " bytes long:"); - if(data.length > 0) - dumpEscherStream(data); - } - - public void dumpEnvelope() throws IOException { - byte[] data = getData(fs.getRoot(), "Envelope"); - - System.out.println(); - System.out.println("Envelope - " + data.length + " bytes long:"); - } - - public void dumpContents() throws IOException { - byte[] data = getData(fs.getRoot(), "Contents"); - - System.out.println(); - System.out.println("Contents - " + data.length + " bytes long:"); - - // 8 bytes, always seems to be - // E8 AC 2C 00 E8 03 05 01 - // E8 AC 2C 00 E8 03 05 01 - - // 4 bytes - size of contents - // 13/15 00 00 01 - - // .... - - // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... .......... - - // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... .. - - // 01 18 30 00 03 20 00 00 - // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 - // 00 00 00 88 1E 00 00 00 - - // 01 18 31 00 03 20 00 00 - // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 - // 00 00 00 88 1E 00 00 00 - - // 01 18 32 00 03 20 00 00 - // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 - // 00 00 00 88 1E 00 00 00 - } - - public void dumpCONTENTSraw(DirectoryNode dir) throws IOException { - byte[] data = getData(dir, "CONTENTS"); - - System.out.println(); - System.out.println("CONTENTS - " + data.length + " bytes long:"); - - // Between the start and 0x200 we have - // CHNKINK(space) + 24 bytes - // 0x1800 - // TEXT + 6 bytes - // TEXT + 8 bytes - // 0x1800 - // STSH + 6 bytes - // STSH + 8 bytes - // 0x1800 - // STSH + 6 bytes - // STSH + 8 bytes - // but towards 0x200 the pattern may - // break down a little bit - - // After the second of a given type, - // it seems to be 4 bytes giving the start, - // then 4 bytes giving the length, then - // 18 00 - System.out.println( - new String(data, 0, 8, LocaleUtil.CHARSET_1252) + - dumpBytes(data, 8, 0x20-8) - ); - - int pos = 0x20; - boolean sixNotEight = true; - while(pos < 0x200) { - if(sixNotEight) { - System.out.println( - dumpBytes(data, pos, 2) - ); - pos += 2; - } - String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252); - int blen = 8; - if(sixNotEight) - blen = 6; - System.out.println( - text + " " + dumpBytes(data, pos+4, blen) - ); - - pos += 4 + blen; - sixNotEight = ! sixNotEight; - } - - // Text from 0x200 onwards until we get - // to \r(00)\n(00)(00)(00) - int textStop = -1; - for(int i=0x200; i<data.length-2 && textStop == -1; i++) { - if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) { - textStop = i; - } - } - if(textStop > 0) { - int len = (textStop - 0x200) / 2; - System.out.println(); - System.out.println( - StringUtil.getFromUnicodeLE(data, 0x200, len) - ); - } - - // The font list comes slightly later - - // The hyperlinks may come before the fonts, - // or slightly in front - } - public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException { - byte[] data = getData(dir, "CONTENTS"); - - System.out.println(); - System.out.println("CONTENTS - " + data.length + " bytes long:"); - - String[] startType = new String[20]; - String[] endType = new String[20]; - int[] optA = new int[20]; - int[] optB = new int[20]; - int[] optC = new int[20]; - int[] from = new int[20]; - int[] len = new int[20]; - - for(int i=0; i<20; i++) { - int offset = 0x20 + i*24; - if(data[offset] == 0x18 && data[offset+1] == 0x00) { - // Has data - startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252); - optA[i] = LittleEndian.getUShort(data, offset+6); - optB[i] = LittleEndian.getUShort(data, offset+8); - optC[i] = LittleEndian.getUShort(data, offset+10); - endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252); - from[i] = (int)LittleEndian.getUInt(data, offset+16); - len[i] = (int)LittleEndian.getUInt(data, offset+20); - } else { - // Doesn't have data - } - } - - String text = StringUtil.getFromUnicodeLE( - data, from[0], len[0]/2 - ); - - // Dump - for(int i=0; i<20; i++) { - String num = Integer.toString(i); - if(i < 10) { - num = "0" + i; - } - System.out.print(num + " "); - - if(startType[i] == null) { - System.out.println("(not present)"); - } else { - System.out.println( - "\t" + - startType[i] + " " + - optA[i] + " " + - optB[i] + " " + - optC[i] - ); - System.out.println( - "\t" + - endType[i] + " " + - "from: " + - Integer.toHexString(from[i]) + - " (" + from[i] + ")" + - ", len: " + - Integer.toHexString(len[i]) + - " (" + len[i] + ")" - ); - } - } - - // Text - System.out.println(); - System.out.println("TEXT:"); - System.out.println(text); - System.out.println(); - - // All the others - for(int i=0; i<20; i++) { - if(startType[i] == null) { - continue; - } - int start = from[i]; - - System.out.println( - startType[i] + " -> " + endType[i] + - " @ " + Integer.toHexString(start) + - " (" + start + ")" - ); - System.out.println("\t" + dumpBytes(data, start, 4)); - System.out.println("\t" + dumpBytes(data, start+4, 4)); - System.out.println("\t" + dumpBytes(data, start+8, 4)); - System.out.println("\t(etc)"); - } - } - - protected void dump001CompObj(DirectoryNode dir) { - // TODO - } - - public void dumpQuill() throws IOException { - DirectoryNode quillDir = (DirectoryNode) - fs.getRoot().getEntry("Quill"); - DirectoryNode quillSubDir = (DirectoryNode) - quillDir.getEntry("QuillSub"); - - dump001CompObj(quillSubDir); - dumpCONTENTSraw(quillSubDir); - dumpCONTENTSguessed(quillSubDir); - } + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" HPBFDumper <filename>"); + System.exit(1); + } + HPBFDumper dump = new HPBFDumper(new POIFSFileSystem(new File(args[0]))); + + System.out.println("Dumping " + args[0]); + dump.dumpContents(); + dump.dumpEnvelope(); + dump.dumpEscher(); + dump.dump001CompObj(dump.fs.getRoot()); + dump.dumpQuill(); + + // Still to go: + // (0x03)Internal + // Objects + } + + /** + * Dump out the escher parts of the file. + * Escher -> EscherStm and EscherDelayStm + */ + public void dumpEscher() throws IOException { + DirectoryNode escherDir = (DirectoryNode) + fs.getRoot().getEntry("Escher"); + + dumpEscherStm(escherDir); + dumpEscherDelayStm(escherDir); + } + private void dumpEscherStream(byte[] data) { + DefaultEscherRecordFactory erf = + new DefaultEscherRecordFactory(); + + // Dump + int left = data.length; + while(left > 0) { + EscherRecord er = erf.createRecord(data, 0); + er.fillFields(data, 0, erf); + left -= er.getRecordSize(); + + System.out.println(er); + } + } + protected void dumpEscherStm(DirectoryNode escherDir) throws IOException { + byte[] data = getData(escherDir, "EscherStm"); + System.out.println(); + System.out.println("EscherStm - " + data.length + " bytes long:"); + if(data.length > 0) + dumpEscherStream(data); + } + protected void dumpEscherDelayStm(DirectoryNode escherDir) throws IOException { + byte[] data = getData(escherDir, "EscherDelayStm"); + System.out.println(); + System.out.println("EscherDelayStm - " + data.length + " bytes long:"); + if(data.length > 0) + dumpEscherStream(data); + } + + public void dumpEnvelope() throws IOException { + byte[] data = getData(fs.getRoot(), "Envelope"); + + System.out.println(); + System.out.println("Envelope - " + data.length + " bytes long:"); + } + + public void dumpContents() throws IOException { + byte[] data = getData(fs.getRoot(), "Contents"); + + System.out.println(); + System.out.println("Contents - " + data.length + " bytes long:"); + + // 8 bytes, always seems to be + // E8 AC 2C 00 E8 03 05 01 + // E8 AC 2C 00 E8 03 05 01 + + // 4 bytes - size of contents + // 13/15 00 00 01 + + // .... + + // E8 03 08 08 0C 20 03 00 00 00 00 88 16 00 00 00 ..... .......... + + // 01 18 27 00 03 20 00 00 E8 03 08 08 0C 20 03 00 ..'.. ....... .. + + // 01 18 30 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + + // 01 18 31 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + + // 01 18 32 00 03 20 00 00 + // E8 03 06 08 07 08 08 08 09 10 01 00 0C 20 04 00 + // 00 00 00 88 1E 00 00 00 + } + + public void dumpCONTENTSraw(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + // Between the start and 0x200 we have + // CHNKINK(space) + 24 bytes + // 0x1800 + // TEXT + 6 bytes + // TEXT + 8 bytes + // 0x1800 + // STSH + 6 bytes + // STSH + 8 bytes + // 0x1800 + // STSH + 6 bytes + // STSH + 8 bytes + // but towards 0x200 the pattern may + // break down a little bit + + // After the second of a given type, + // it seems to be 4 bytes giving the start, + // then 4 bytes giving the length, then + // 18 00 + System.out.println( + new String(data, 0, 8, LocaleUtil.CHARSET_1252) + + dumpBytes(data, 8, 0x20-8) + ); + + int pos = 0x20; + boolean sixNotEight = true; + while(pos < 0x200) { + if(sixNotEight) { + System.out.println( + dumpBytes(data, pos, 2) + ); + pos += 2; + } + String text = new String(data, pos, 4, LocaleUtil.CHARSET_1252); + int blen = 8; + if(sixNotEight) + blen = 6; + System.out.println( + text + " " + dumpBytes(data, pos+4, blen) + ); + + pos += 4 + blen; + sixNotEight = ! sixNotEight; + } + + // Text from 0x200 onwards until we get + // to \r(00)\n(00)(00)(00) + int textStop = -1; + for(int i=0x200; i<data.length-2 && textStop == -1; i++) { + if(data[i] == 0 && data[i+1] == 0 && data[i+2] == 0) { + textStop = i; + } + } + if(textStop > 0) { + int len = (textStop - 0x200) / 2; + System.out.println(); + System.out.println( + StringUtil.getFromUnicodeLE(data, 0x200, len) + ); + } + + // The font list comes slightly later + + // The hyperlinks may come before the fonts, + // or slightly in front + } + public void dumpCONTENTSguessed(DirectoryNode dir) throws IOException { + byte[] data = getData(dir, "CONTENTS"); + + System.out.println(); + System.out.println("CONTENTS - " + data.length + " bytes long:"); + + String[] startType = new String[20]; + String[] endType = new String[20]; + int[] optA = new int[20]; + int[] optB = new int[20]; + int[] optC = new int[20]; + int[] from = new int[20]; + int[] len = new int[20]; + + for(int i=0; i<20; i++) { + int offset = 0x20 + i*24; + if(data[offset] == 0x18 && data[offset+1] == 0x00) { + // Has data + startType[i] = new String(data, offset+2, 4, LocaleUtil.CHARSET_1252); + optA[i] = LittleEndian.getUShort(data, offset+6); + optB[i] = LittleEndian.getUShort(data, offset+8); + optC[i] = LittleEndian.getUShort(data, offset+10); + endType[i] = new String(data, offset+12, 4, LocaleUtil.CHARSET_1252); + from[i] = (int)LittleEndian.getUInt(data, offset+16); + len[i] = (int)LittleEndian.getUInt(data, offset+20); + } else { + // Doesn't have data + } + } + + String text = StringUtil.getFromUnicodeLE( + data, from[0], len[0]/2 + ); + + // Dump + for(int i=0; i<20; i++) { + String num = Integer.toString(i); + if(i < 10) { + num = "0" + i; + } + System.out.print(num + " "); + + if(startType[i] == null) { + System.out.println("(not present)"); + } else { + System.out.println( + "\t" + + startType[i] + " " + + optA[i] + " " + + optB[i] + " " + + optC[i] + ); + System.out.println( + "\t" + + endType[i] + " " + + "from: " + + Integer.toHexString(from[i]) + + " (" + from[i] + ")" + + ", len: " + + Integer.toHexString(len[i]) + + " (" + len[i] + ")" + ); + } + } + + // Text + System.out.println(); + System.out.println("TEXT:"); + System.out.println(text); + System.out.println(); + + // All the others + for(int i=0; i<20; i++) { + if(startType[i] == null) { + continue; + } + int start = from[i]; + + System.out.println( + startType[i] + " -> " + endType[i] + + " @ " + Integer.toHexString(start) + + " (" + start + ")" + ); + System.out.println("\t" + dumpBytes(data, start, 4)); + System.out.println("\t" + dumpBytes(data, start+4, 4)); + System.out.println("\t" + dumpBytes(data, start+8, 4)); + System.out.println("\t(etc)"); + } + } + + protected void dump001CompObj(DirectoryNode dir) { + // TODO + } + + public void dumpQuill() throws IOException { + DirectoryNode quillDir = (DirectoryNode) + fs.getRoot().getEntry("Quill"); + DirectoryNode quillSubDir = (DirectoryNode) + quillDir.getEntry("QuillSub"); + + dump001CompObj(quillSubDir); + dumpCONTENTSraw(quillSubDir); + dumpCONTENTSguessed(quillSubDir); + } } diff --git a/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java b/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java index 4438111e4e..3c222d51b2 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/dev/PLCDumper.java @@ -33,53 +33,53 @@ import org.apache.poi.util.HexDump; * what the format of them is. */ public final class PLCDumper { - private HPBFDocument doc; - private QuillContents qc; + private HPBFDocument doc; + private QuillContents qc; - public PLCDumper(HPBFDocument hpbfDoc) { - doc = hpbfDoc; - qc = doc.getQuillContents(); - } - public PLCDumper(POIFSFileSystem fs) throws IOException { - this(new HPBFDocument(fs)); - } - public PLCDumper(InputStream inp) throws IOException { - this(new POIFSFileSystem(inp)); - } + public PLCDumper(HPBFDocument hpbfDoc) { + doc = hpbfDoc; + qc = doc.getQuillContents(); + } + public PLCDumper(POIFSFileSystem fs) throws IOException { + this(new HPBFDocument(fs)); + } + public PLCDumper(InputStream inp) throws IOException { + this(new POIFSFileSystem(inp)); + } - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" PLCDumper <filename>"); - System.exit(1); - } + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" PLCDumper <filename>"); + System.exit(1); + } - try (FileInputStream fis = new FileInputStream(args[0])) { - PLCDumper dump = new PLCDumper(fis); + try (FileInputStream fis = new FileInputStream(args[0])) { + PLCDumper dump = new PLCDumper(fis); - System.out.println("Dumping " + args[0]); - dump.dumpPLC(); - } - } + System.out.println("Dumping " + args[0]); + dump.dumpPLC(); + } + } - private void dumpPLC() { - QCBit[] bits = qc.getBits(); + private void dumpPLC() { + QCBit[] bits = qc.getBits(); - for(int i=0; i<bits.length; i++) { - if(bits[i] == null) continue; - if(bits[i].getBitType().equals("PLC ")) { - dumpBit(bits[i], i); - } - } - } + for(int i=0; i<bits.length; i++) { + if(bits[i] == null) continue; + if(bits[i].getBitType().equals("PLC ")) { + dumpBit(bits[i], i); + } + } + } - private void dumpBit(QCBit bit, int index) { - System.out.println(); - System.out.println("Dumping " + bit.getBitType() + " bit at " + index); - System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA()); - System.out.println(" Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")"); - System.out.println(" Runs for " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")"); + private void dumpBit(QCBit bit, int index) { + System.out.println(); + System.out.println("Dumping " + bit.getBitType() + " bit at " + index); + System.out.println(" Is a " + bit.getThingType() + ", number is " + bit.getOptA()); + System.out.println(" Starts at " + bit.getDataOffset() + " (0x" + Integer.toHexString(bit.getDataOffset()) + ")"); + System.out.println(" Runs for " + bit.getLength() + " (0x" + Integer.toHexString(bit.getLength()) + ")"); - System.out.println(HexDump.dump(bit.getData(), 0, 0)); - } + System.out.println(HexDump.dump(bit.getData(), 0, 0)); + } } diff --git a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java index 27f62aeeec..bd442b8da6 100644 --- a/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java @@ -50,65 +50,65 @@ public final class PublisherTextExtractor extends POIOLE2TextExtractor { this(new POIFSFileSystem(is)); } - /** - * Should a call to getText() return hyperlinks inline - * with the text? - * Default is no - */ - public void setHyperlinksByDefault(boolean hyperlinksByDefault) { - this.hyperlinksByDefault = hyperlinksByDefault; - } - - - public String getText() { - StringBuilder text = new StringBuilder(); - - // Get the text from the Quill Contents - QCBit[] bits = doc.getQuillContents().getBits(); - for (QCBit bit1 : bits) { - if (bit1 != null && bit1 instanceof QCTextBit) { - QCTextBit t = (QCTextBit) bit1; - text.append(t.getText().replace('\r', '\n')); - } - } - - // If requested, add in the hyperlinks - // Ideally, we'd do these inline, but the hyperlink - // positions are relative to the text area the - // hyperlink is in, and we have yet to figure out - // how to tie that together. - if(hyperlinksByDefault) { - for (QCBit bit : bits) { - if (bit != null && bit instanceof Type12) { - Type12 hyperlinks = (Type12) bit; - for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) { - text.append("<"); - text.append(hyperlinks.getHyperlink(j)); - text.append(">\n"); - } - } - } - } - - // Get more text - // TODO - - return text.toString(); - } - - - public static void main(String[] args) throws Exception { - if(args.length == 0) { - System.err.println("Use:"); - System.err.println(" PublisherTextExtractor <file.pub>"); - } - - for (String arg : args) { - try (FileInputStream fis = new FileInputStream(arg)) { - PublisherTextExtractor te = new PublisherTextExtractor(fis); - System.out.println(te.getText()); - te.close(); - } - } - } + /** + * Should a call to getText() return hyperlinks inline + * with the text? + * Default is no + */ + public void setHyperlinksByDefault(boolean hyperlinksByDefault) { + this.hyperlinksByDefault = hyperlinksByDefault; + } + + + public String getText() { + StringBuilder text = new StringBuilder(); + + // Get the text from the Quill Contents + QCBit[] bits = doc.getQuillContents().getBits(); + for (QCBit bit1 : bits) { + if (bit1 != null && bit1 instanceof QCTextBit) { + QCTextBit t = (QCTextBit) bit1; + text.append(t.getText().replace('\r', '\n')); + } + } + + // If requested, add in the hyperlinks + // Ideally, we'd do these inline, but the hyperlink + // positions are relative to the text area the + // hyperlink is in, and we have yet to figure out + // how to tie that together. + if(hyperlinksByDefault) { + for (QCBit bit : bits) { + if (bit != null && bit instanceof Type12) { + Type12 hyperlinks = (Type12) bit; + for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) { + text.append("<"); + text.append(hyperlinks.getHyperlink(j)); + text.append(">\n"); + } + } + } + } + + // Get more text + // TODO + + return text.toString(); + } + + + public static void main(String[] args) throws Exception { + if(args.length == 0) { + System.err.println("Use:"); + System.err.println(" PublisherTextExtractor <file.pub>"); + } + + for (String arg : args) { + try (FileInputStream fis = new FileInputStream(arg)) { + PublisherTextExtractor te = new PublisherTextExtractor(fis); + System.out.println(te.getText()); + te.close(); + } + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java index 9005e65fbd..3c92da1a3c 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/QuickButCruddyTextExtractor.java @@ -53,158 +53,158 @@ import org.apache.poi.util.LittleEndian; * lucene indexers) that would ever want to use this! */ public final class QuickButCruddyTextExtractor { - private POIFSFileSystem fs; - private InputStream is; - private byte[] pptContents; - - /** - * Really basic text extractor, that will also return lots of crud text. - * Takes a single argument, the file to extract from - */ - public static void main(String[] args) throws IOException - { - if(args.length < 1) { - System.err.println("Useage:"); - System.err.println("\tQuickButCruddyTextExtractor <file>"); - System.exit(1); - } - - String file = args[0]; - - QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file); - System.out.println(ppe.getTextAsString()); - ppe.close(); - } - - /** - * Creates an extractor from a given file name - * @param fileName - */ - @SuppressWarnings("resource") + private POIFSFileSystem fs; + private InputStream is; + private byte[] pptContents; + + /** + * Really basic text extractor, that will also return lots of crud text. + * Takes a single argument, the file to extract from + */ + public static void main(String[] args) throws IOException + { + if(args.length < 1) { + System.err.println("Useage:"); + System.err.println("\tQuickButCruddyTextExtractor <file>"); + System.exit(1); + } + + String file = args[0]; + + QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file); + System.out.println(ppe.getTextAsString()); + ppe.close(); + } + + /** + * Creates an extractor from a given file name + * @param fileName + */ + @SuppressWarnings("resource") public QuickButCruddyTextExtractor(String fileName) throws IOException { - this(new POIFSFileSystem(new File(fileName))); - } + this(new POIFSFileSystem(new File(fileName))); + } - /** - * Creates an extractor from a given input stream - * @param iStream - */ + /** + * Creates an extractor from a given input stream + * @param iStream + */ @SuppressWarnings("resource") - public QuickButCruddyTextExtractor(InputStream iStream) throws IOException { - this(new POIFSFileSystem(iStream)); - is = iStream; - } - - /** - * Creates an extractor from a POIFS Filesystem - * @param poifs - */ - public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException { - fs = poifs; - - // Find the PowerPoint bit, and get out the bytes - InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT); - pptContents = IOUtils.toByteArray(pptIs); - pptIs.close(); - } - - - /** - * Shuts down the underlying streams - */ - public void close() throws IOException { - if(is != null) { is.close(); } - fs = null; - } - - /** - * Fetches the ALL the text of the powerpoint file, as a single string - */ - public String getTextAsString() { - StringBuilder ret = new StringBuilder(); - List<String> textV = getTextAsVector(); - for(String text : textV) { - ret.append(text); - if(! text.endsWith("\n")) { - ret.append('\n'); - } - } - return ret.toString(); - } - - /** - * Fetches the ALL the text of the powerpoint file, in a List of - * strings, one per text record - */ - public List<String> getTextAsVector() { - List<String> textV = new ArrayList<>(); - - // Set to the start of the file - int walkPos = 0; - - // Start walking the file, looking for the records - while(walkPos != -1) { + public QuickButCruddyTextExtractor(InputStream iStream) throws IOException { + this(new POIFSFileSystem(iStream)); + is = iStream; + } + + /** + * Creates an extractor from a POIFS Filesystem + * @param poifs + */ + public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException { + fs = poifs; + + // Find the PowerPoint bit, and get out the bytes + InputStream pptIs = fs.createDocumentInputStream(HSLFSlideShow.POWERPOINT_DOCUMENT); + pptContents = IOUtils.toByteArray(pptIs); + pptIs.close(); + } + + + /** + * Shuts down the underlying streams + */ + public void close() throws IOException { + if(is != null) { is.close(); } + fs = null; + } + + /** + * Fetches the ALL the text of the powerpoint file, as a single string + */ + public String getTextAsString() { + StringBuilder ret = new StringBuilder(); + List<String> textV = getTextAsVector(); + for(String text : textV) { + ret.append(text); + if(! text.endsWith("\n")) { + ret.append('\n'); + } + } + return ret.toString(); + } + + /** + * Fetches the ALL the text of the powerpoint file, in a List of + * strings, one per text record + */ + public List<String> getTextAsVector() { + List<String> textV = new ArrayList<>(); + + // Set to the start of the file + int walkPos = 0; + + // Start walking the file, looking for the records + while(walkPos != -1) { walkPos = findTextRecords(walkPos,textV); - } - - // Return what we find - return textV; - } - - /** - * For the given position, look if the record is a text record, and wind - * on after. - * If it is a text record, grabs out the text. Whatever happens, returns - * the position of the next record, or -1 if no more. - */ - public int findTextRecords(int startPos, List<String> textV) { - // Grab the length, and the first option byte - // Note that the length doesn't include the 8 byte atom header - int len = (int)LittleEndian.getUInt(pptContents,startPos+4); - byte opt = pptContents[startPos]; - - // If it's a container, step into it and return - // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f) - int container = opt & 0x0f; - if(container == 0x0f) { - return (startPos+8); - } - - // Otherwise, check the type to see if it's text - int type = LittleEndian.getUShort(pptContents,startPos+2); - - // TextBytesAtom - if(type == RecordTypes.TextBytesAtom.typeID) { - TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8); - String text = HSLFTextParagraph.toExternalString(tba.getText(), -1); - textV.add(text); - } - // TextCharsAtom - if(type == RecordTypes.TextCharsAtom.typeID) { - TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8); + } + + // Return what we find + return textV; + } + + /** + * For the given position, look if the record is a text record, and wind + * on after. + * If it is a text record, grabs out the text. Whatever happens, returns + * the position of the next record, or -1 if no more. + */ + public int findTextRecords(int startPos, List<String> textV) { + // Grab the length, and the first option byte + // Note that the length doesn't include the 8 byte atom header + int len = (int)LittleEndian.getUInt(pptContents,startPos+4); + byte opt = pptContents[startPos]; + + // If it's a container, step into it and return + // (If it's a container, option byte 1 BINARY_AND 0x0f will be 0x0f) + int container = opt & 0x0f; + if(container == 0x0f) { + return (startPos+8); + } + + // Otherwise, check the type to see if it's text + int type = LittleEndian.getUShort(pptContents,startPos+2); + + // TextBytesAtom + if(type == RecordTypes.TextBytesAtom.typeID) { + TextBytesAtom tba = (TextBytesAtom)Record.createRecordForType(type, pptContents, startPos, len+8); + String text = HSLFTextParagraph.toExternalString(tba.getText(), -1); + textV.add(text); + } + // TextCharsAtom + if(type == RecordTypes.TextCharsAtom.typeID) { + TextCharsAtom tca = (TextCharsAtom)Record.createRecordForType(type, pptContents, startPos, len+8); String text = HSLFTextParagraph.toExternalString(tca.getText(), -1); textV.add(text); - } - - // CString (doesn't go via a TextRun) - if(type == RecordTypes.CString.typeID) { - CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8); - String text = cs.getText(); - - // Ignore the ones we know to be rubbish - if(text.equals("___PPT10")) { - } else if(text.equals("Default Design")) { - } else { - textV.add(text); - } - } - - - // Wind on by the atom length, and check we're not at the end - int newPos = (startPos + 8 + len); - if(newPos > (pptContents.length - 8)) { - newPos = -1; - } - return newPos; - } + } + + // CString (doesn't go via a TextRun) + if(type == RecordTypes.CString.typeID) { + CString cs = (CString)Record.createRecordForType(type, pptContents, startPos, len+8); + String text = cs.getText(); + + // Ignore the ones we know to be rubbish + if(text.equals("___PPT10")) { + } else if(text.equals("Default Design")) { + } else { + textV.add(text); + } + } + + + // Wind on by the atom length, and check we're not at the end + int newPos = (startPos + 8 + len); + if(newPos > (pptContents.length - 8)) { + newPos = -1; + } + return newPos; + } } |