You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BiffViewer.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hssf.dev;
  16. import static org.apache.logging.log4j.util.Unbox.box;
  17. import java.io.File;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.io.OutputStream;
  21. import java.io.OutputStreamWriter;
  22. import java.io.PrintWriter;
  23. import java.io.Writer;
  24. import java.nio.charset.Charset;
  25. import java.util.ArrayList;
  26. import java.util.List;
  27. import org.apache.commons.io.output.CloseShieldOutputStream;
  28. import org.apache.commons.io.output.NullOutputStream;
  29. import org.apache.logging.log4j.LogManager;
  30. import org.apache.logging.log4j.Logger;
  31. import org.apache.poi.hssf.dev.BiffDumpingStream.IBiffRecordListener;
  32. import org.apache.poi.hssf.record.ContinueRecord;
  33. import org.apache.poi.hssf.record.HSSFRecordTypes;
  34. import org.apache.poi.hssf.record.Record;
  35. import org.apache.poi.hssf.record.RecordInputStream;
  36. import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException;
  37. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  38. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  39. import org.apache.poi.util.HexDump;
  40. import org.apache.poi.util.IOUtils;
  41. import org.apache.poi.util.RecordFormatException;
  42. import org.apache.poi.util.StringUtil;
  43. /**
  44. * Utility for reading in BIFF8 records and displaying data from them.
  45. */
  46. public final class BiffViewer {
  47. private static final char[] NEW_LINE_CHARS = System.getProperty("line.separator").toCharArray();
  48. private static final Logger LOG = LogManager.getLogger(BiffViewer.class);
  49. private static final String ESCHER_SERIALIZE = "poi.deserialize.escher";
  50. private static final int DUMP_LINE_LEN = 16;
  51. private static final char[] COLUMN_SEPARATOR = " | ".toCharArray();
  52. private boolean biffHex;
  53. private boolean interpretRecords = true;
  54. private boolean rawHexOnly;
  55. private boolean noHeader = true;
  56. private boolean zeroAlignRecord = true;
  57. private final List<String> _headers = new ArrayList<>();
  58. /**
  59. * show hex dump of each BIFF record
  60. */
  61. public void setDumpBiffHex(boolean biffhex) {
  62. this.biffHex = biffhex;
  63. }
  64. /**
  65. * output interpretation of BIFF records
  66. */
  67. public void setInterpretRecords(boolean interpretRecords) {
  68. this.interpretRecords = interpretRecords;
  69. }
  70. /**
  71. * output raw hex dump of whole workbook stream
  72. */
  73. public void setOutputRawHexOnly(boolean rawhex) {
  74. this.rawHexOnly = rawhex;
  75. }
  76. /**
  77. * do not print record header - default is on
  78. */
  79. public void setSuppressHeader(boolean noHeader) {
  80. this.noHeader = noHeader;
  81. }
  82. /**
  83. * turn on deserialization of escher records (default is off)
  84. */
  85. public void setSerializeEscher(boolean serialize) {
  86. if (serialize) {
  87. System.setProperty(ESCHER_SERIALIZE, "true");
  88. } else {
  89. System.clearProperty(ESCHER_SERIALIZE);
  90. }
  91. }
  92. public void setZeroAlignRecord(boolean zeroAlignRecord) {
  93. this.zeroAlignRecord = zeroAlignRecord;
  94. }
  95. public void parse(File file) throws IOException {
  96. parse(file, System.out);
  97. }
  98. public void parse(File file, OutputStream os) throws IOException {
  99. try (POIFSFileSystem fs = new POIFSFileSystem(file, true);
  100. InputStream is = getPOIFSInputStream(fs);
  101. PrintWriter pw = wrap(os)
  102. ) {
  103. if (rawHexOnly) {
  104. byte[] data = IOUtils.toByteArray(is);
  105. HexDump.dump(data, 0, System.out, 0);
  106. } else {
  107. IBiffRecordListener recListener = (globalOffset, recordCounter, sid, dataSize, data) -> {
  108. String header = formatRecordDetails(globalOffset, sid, dataSize, recordCounter);
  109. if (!noHeader) {
  110. _headers.add(header);
  111. }
  112. if (biffHex) {
  113. pw.write(header);
  114. pw.write(NEW_LINE_CHARS);
  115. hexDumpAligned(pw, data, dataSize+4, globalOffset);
  116. pw.flush();
  117. }
  118. };
  119. try (InputStream is2 = new BiffDumpingStream(is, recListener)) {
  120. createRecords(is2, pw);
  121. }
  122. }
  123. }
  124. }
  125. private static String formatRecordDetails(int globalOffset, int sid, int size, int recordCounter) {
  126. return "Offset=" + HexDump.intToHex(globalOffset) + "(" + globalOffset + ")" +
  127. " recno=" + recordCounter +
  128. " sid=" + HexDump.shortToHex(sid) +
  129. " size=" + HexDump.shortToHex(size) + "(" + size + ")";
  130. }
  131. /**
  132. * Create an array of records from an input stream
  133. *
  134. * @param is the InputStream from which the records will be obtained
  135. * @param ps the PrintWriter to output the record data
  136. *
  137. * @throws RecordFormatException on error processing the InputStream
  138. */
  139. private void createRecords(InputStream is, PrintWriter ps) throws RecordFormatException {
  140. RecordInputStream recStream = new RecordInputStream(is);
  141. while (true) {
  142. _headers.clear();
  143. boolean hasNext;
  144. try {
  145. hasNext = recStream.hasNextRecord();
  146. } catch (LeftoverDataException e) {
  147. LOG.atError().withThrowable(e).log("Discarding {} bytes and continuing", box(recStream.remaining()));
  148. recStream.readRemainder();
  149. hasNext = recStream.hasNextRecord();
  150. }
  151. if (!hasNext) {
  152. break;
  153. }
  154. recStream.nextRecord();
  155. if (recStream.getSid() == 0) {
  156. continue;
  157. }
  158. Record record;
  159. if (interpretRecords) {
  160. record = HSSFRecordTypes.forSID(recStream.getSid()).getRecordConstructor().apply(recStream);
  161. if (record.getSid() == ContinueRecord.sid) {
  162. continue;
  163. }
  164. _headers.forEach(ps::println);
  165. ps.print(record);
  166. } else {
  167. recStream.readRemainder();
  168. }
  169. ps.println();
  170. }
  171. }
  172. private static PrintWriter wrap(OutputStream os) {
  173. final OutputStream osOut;
  174. final Charset cs;
  175. if (os == null) {
  176. cs = Charset.defaultCharset();
  177. osOut = NullOutputStream.NULL_OUTPUT_STREAM;
  178. } else if (os == System.out) {
  179. // Use the system default encoding when sending to System Out
  180. cs = Charset.defaultCharset();
  181. osOut = CloseShieldOutputStream.wrap(System.out);
  182. } else {
  183. cs = StringUtil.UTF8;
  184. osOut = os;
  185. }
  186. return new PrintWriter(new OutputStreamWriter(osOut, cs));
  187. }
  188. static InputStream getPOIFSInputStream(POIFSFileSystem fs) throws IOException {
  189. String workbookName = HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
  190. return fs.createDocumentInputStream(workbookName);
  191. }
  192. /**
  193. * Hex-dumps a portion of a byte array in typical format, also preserving dump-line alignment
  194. * @param globalOffset (somewhat arbitrary) used to calculate the addresses printed at the
  195. * start of each line
  196. */
  197. private void hexDumpAligned(Writer w, byte[] data, int dumpLen, int globalOffset) {
  198. int baseDataOffset = 0;
  199. // perhaps this code should be moved to HexDump
  200. int globalStart = globalOffset + baseDataOffset;
  201. int globalEnd = globalOffset + baseDataOffset + dumpLen;
  202. int startDelta = globalStart % DUMP_LINE_LEN;
  203. int endDelta = globalEnd % DUMP_LINE_LEN;
  204. if (zeroAlignRecord) {
  205. endDelta -= startDelta;
  206. if (endDelta < 0) {
  207. endDelta += DUMP_LINE_LEN;
  208. }
  209. startDelta = 0;
  210. }
  211. int startLineAddr;
  212. int endLineAddr;
  213. if (zeroAlignRecord) {
  214. endLineAddr = globalEnd - endDelta - (globalStart - startDelta);
  215. startLineAddr = 0;
  216. } else {
  217. startLineAddr = globalStart - startDelta;
  218. endLineAddr = globalEnd - endDelta;
  219. }
  220. int lineDataOffset = baseDataOffset - startDelta;
  221. int lineAddr = startLineAddr;
  222. // output (possibly incomplete) first line
  223. if (startLineAddr == endLineAddr) {
  224. hexDumpLine(w, data, lineAddr, lineDataOffset, startDelta, endDelta);
  225. return;
  226. }
  227. hexDumpLine(w, data, lineAddr, lineDataOffset, startDelta, DUMP_LINE_LEN);
  228. // output all full lines in the middle
  229. while (true) {
  230. lineAddr += DUMP_LINE_LEN;
  231. lineDataOffset += DUMP_LINE_LEN;
  232. if (lineAddr >= endLineAddr) {
  233. break;
  234. }
  235. hexDumpLine(w, data, lineAddr, lineDataOffset, 0, DUMP_LINE_LEN);
  236. }
  237. // output (possibly incomplete) last line
  238. if (endDelta != 0) {
  239. hexDumpLine(w, data, lineAddr, lineDataOffset, 0, endDelta);
  240. }
  241. }
  242. private static void hexDumpLine(Writer w, byte[] data, int lineStartAddress, int lineDataOffset, int startDelta, int endDelta) {
  243. final char[] buf = new char[8+2*COLUMN_SEPARATOR.length+DUMP_LINE_LEN*3-1+DUMP_LINE_LEN+NEW_LINE_CHARS.length];
  244. if (startDelta >= endDelta) {
  245. throw new IllegalArgumentException("Bad start/end delta");
  246. }
  247. int idx=0;
  248. try {
  249. writeHex(buf, idx, lineStartAddress, 8);
  250. idx = arraycopy(COLUMN_SEPARATOR, buf, idx+8);
  251. // raw hex data
  252. for (int i=0; i< DUMP_LINE_LEN; i++) {
  253. if (i>0) {
  254. buf[idx++] = ' ';
  255. }
  256. if (i >= startDelta && i < endDelta) {
  257. writeHex(buf, idx, data[lineDataOffset+i], 2);
  258. } else {
  259. buf[idx] = ' ';
  260. buf[idx+1] = ' ';
  261. }
  262. idx += 2;
  263. }
  264. idx = arraycopy(COLUMN_SEPARATOR, buf, idx);
  265. // interpreted ascii
  266. for (int i=0; i< DUMP_LINE_LEN; i++) {
  267. char ch = ' ';
  268. if (i >= startDelta && i < endDelta) {
  269. ch = getPrintableChar(data[lineDataOffset+i]);
  270. }
  271. buf[idx++] = ch;
  272. }
  273. idx = arraycopy(NEW_LINE_CHARS, buf, idx);
  274. w.write(buf, 0, idx);
  275. } catch (IOException e) {
  276. throw new RuntimeException(e);
  277. }
  278. }
  279. private static int arraycopy(char[] in, char[] out, int pos) {
  280. int idx = pos;
  281. for (char c : in) {
  282. out[idx++] = c;
  283. }
  284. return idx;
  285. }
  286. private static char getPrintableChar(byte b) {
  287. char ib = (char) (b & 0x00FF);
  288. if (ib < 32 || ib > 126) {
  289. return '.';
  290. }
  291. return ib;
  292. }
  293. private static void writeHex(char[] buf, int startInBuf, int value, int nDigits) {
  294. int acc = value;
  295. for(int i=nDigits-1; i>=0; i--) {
  296. int digit = acc & 0x0F;
  297. buf[startInBuf+i] = (char) (digit < 10 ? ('0' + digit) : ('A' + digit - 10));
  298. acc >>>= 4;
  299. }
  300. }
  301. }