123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hssf.dev;
-
- import static org.apache.logging.log4j.util.Unbox.box;
-
- import java.io.File;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.io.OutputStreamWriter;
- import java.io.PrintWriter;
- import java.io.Writer;
- import java.nio.charset.Charset;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.commons.io.output.CloseShieldOutputStream;
- import org.apache.commons.io.output.NullOutputStream;
- import org.apache.logging.log4j.LogManager;
- import org.apache.logging.log4j.Logger;
- import org.apache.poi.hssf.dev.BiffDumpingStream.IBiffRecordListener;
- import org.apache.poi.hssf.record.ContinueRecord;
- import org.apache.poi.hssf.record.HSSFRecordTypes;
- import org.apache.poi.hssf.record.Record;
- import org.apache.poi.hssf.record.RecordInputStream;
- import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException;
- import org.apache.poi.hssf.usermodel.HSSFWorkbook;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- import org.apache.poi.util.HexDump;
- import org.apache.poi.util.IOUtils;
- import org.apache.poi.util.RecordFormatException;
- import org.apache.poi.util.StringUtil;
-
- /**
- * Utility for reading in BIFF8 records and displaying data from them.
- */
- public final class BiffViewer {
- private static final char[] NEW_LINE_CHARS = System.getProperty("line.separator").toCharArray();
- private static final Logger LOG = LogManager.getLogger(BiffViewer.class);
- private static final String ESCHER_SERIALIZE = "poi.deserialize.escher";
- private static final int DUMP_LINE_LEN = 16;
- private static final char[] COLUMN_SEPARATOR = " | ".toCharArray();
-
- private boolean biffHex;
- private boolean interpretRecords = true;
- private boolean rawHexOnly;
- private boolean noHeader = true;
- private boolean zeroAlignRecord = true;
- private final List<String> _headers = new ArrayList<>();
-
-
- /**
- * show hex dump of each BIFF record
- */
- public void setDumpBiffHex(boolean biffhex) {
- this.biffHex = biffhex;
- }
-
- /**
- * output interpretation of BIFF records
- */
- public void setInterpretRecords(boolean interpretRecords) {
- this.interpretRecords = interpretRecords;
- }
-
- /**
- * output raw hex dump of whole workbook stream
- */
- public void setOutputRawHexOnly(boolean rawhex) {
- this.rawHexOnly = rawhex;
- }
-
- /**
- * do not print record header - default is on
- */
- public void setSuppressHeader(boolean noHeader) {
- this.noHeader = noHeader;
- }
-
- /**
- * turn on deserialization of escher records (default is off)
- */
- public void setSerializeEscher(boolean serialize) {
- if (serialize) {
- System.setProperty(ESCHER_SERIALIZE, "true");
- } else {
- System.clearProperty(ESCHER_SERIALIZE);
- }
- }
-
- public void setZeroAlignRecord(boolean zeroAlignRecord) {
- this.zeroAlignRecord = zeroAlignRecord;
- }
-
- public void parse(File file) throws IOException {
- parse(file, System.out);
- }
-
- public void parse(File file, OutputStream os) throws IOException {
- try (POIFSFileSystem fs = new POIFSFileSystem(file, true);
- InputStream is = getPOIFSInputStream(fs);
- PrintWriter pw = wrap(os)
- ) {
- if (rawHexOnly) {
- byte[] data = IOUtils.toByteArray(is);
- HexDump.dump(data, 0, System.out, 0);
- } else {
- IBiffRecordListener recListener = (globalOffset, recordCounter, sid, dataSize, data) -> {
- String header = formatRecordDetails(globalOffset, sid, dataSize, recordCounter);
- if (!noHeader) {
- _headers.add(header);
- }
- if (biffHex) {
- pw.write(header);
- pw.write(NEW_LINE_CHARS);
- hexDumpAligned(pw, data, dataSize+4, globalOffset);
- pw.flush();
- }
- };
-
- try (InputStream is2 = new BiffDumpingStream(is, recListener)) {
- createRecords(is2, pw);
- }
- }
- }
- }
-
- private static String formatRecordDetails(int globalOffset, int sid, int size, int recordCounter) {
- return "Offset=" + HexDump.intToHex(globalOffset) + "(" + globalOffset + ")" +
- " recno=" + recordCounter +
- " sid=" + HexDump.shortToHex(sid) +
- " size=" + HexDump.shortToHex(size) + "(" + size + ")";
- }
-
- /**
- * Create an array of records from an input stream
- *
- * @param is the InputStream from which the records will be obtained
- * @param ps the PrintWriter to output the record data
- *
- * @throws RecordFormatException on error processing the InputStream
- */
- private void createRecords(InputStream is, PrintWriter ps) throws RecordFormatException {
- RecordInputStream recStream = new RecordInputStream(is);
- while (true) {
- _headers.clear();
- boolean hasNext;
- try {
- hasNext = recStream.hasNextRecord();
- } catch (LeftoverDataException e) {
- LOG.atError().withThrowable(e).log("Discarding {} bytes and continuing", box(recStream.remaining()));
- recStream.readRemainder();
- hasNext = recStream.hasNextRecord();
- }
- if (!hasNext) {
- break;
- }
- recStream.nextRecord();
- if (recStream.getSid() == 0) {
- continue;
- }
- Record record;
- if (interpretRecords) {
- record = HSSFRecordTypes.forSID(recStream.getSid()).getRecordConstructor().apply(recStream);
- if (record.getSid() == ContinueRecord.sid) {
- continue;
- }
-
- _headers.forEach(ps::println);
- ps.print(record);
- } else {
- recStream.readRemainder();
- }
- ps.println();
- }
- }
-
- private static PrintWriter wrap(OutputStream os) {
- final OutputStream osOut;
- final Charset cs;
-
- if (os == null) {
- cs = Charset.defaultCharset();
- osOut = NullOutputStream.NULL_OUTPUT_STREAM;
- } else if (os == System.out) {
- // Use the system default encoding when sending to System Out
- cs = Charset.defaultCharset();
- osOut = CloseShieldOutputStream.wrap(System.out);
- } else {
- cs = StringUtil.UTF8;
- osOut = os;
- }
- return new PrintWriter(new OutputStreamWriter(osOut, cs));
- }
-
-
- static InputStream getPOIFSInputStream(POIFSFileSystem fs) throws IOException {
- String workbookName = HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
- return fs.createDocumentInputStream(workbookName);
- }
-
-
- /**
- * Hex-dumps a portion of a byte array in typical format, also preserving dump-line alignment
- * @param globalOffset (somewhat arbitrary) used to calculate the addresses printed at the
- * start of each line
- */
- private void hexDumpAligned(Writer w, byte[] data, int dumpLen, int globalOffset) {
- int baseDataOffset = 0;
-
- // perhaps this code should be moved to HexDump
- int globalStart = globalOffset + baseDataOffset;
- int globalEnd = globalOffset + baseDataOffset + dumpLen;
- int startDelta = globalStart % DUMP_LINE_LEN;
- int endDelta = globalEnd % DUMP_LINE_LEN;
- if (zeroAlignRecord) {
- endDelta -= startDelta;
- if (endDelta < 0) {
- endDelta += DUMP_LINE_LEN;
- }
- startDelta = 0;
- }
- int startLineAddr;
- int endLineAddr;
- if (zeroAlignRecord) {
- endLineAddr = globalEnd - endDelta - (globalStart - startDelta);
- startLineAddr = 0;
- } else {
- startLineAddr = globalStart - startDelta;
- endLineAddr = globalEnd - endDelta;
- }
-
- int lineDataOffset = baseDataOffset - startDelta;
- int lineAddr = startLineAddr;
-
- // output (possibly incomplete) first line
- if (startLineAddr == endLineAddr) {
- hexDumpLine(w, data, lineAddr, lineDataOffset, startDelta, endDelta);
- return;
- }
- hexDumpLine(w, data, lineAddr, lineDataOffset, startDelta, DUMP_LINE_LEN);
-
- // output all full lines in the middle
- while (true) {
- lineAddr += DUMP_LINE_LEN;
- lineDataOffset += DUMP_LINE_LEN;
- if (lineAddr >= endLineAddr) {
- break;
- }
- hexDumpLine(w, data, lineAddr, lineDataOffset, 0, DUMP_LINE_LEN);
- }
-
-
- // output (possibly incomplete) last line
- if (endDelta != 0) {
- hexDumpLine(w, data, lineAddr, lineDataOffset, 0, endDelta);
- }
- }
-
- private static void hexDumpLine(Writer w, byte[] data, int lineStartAddress, int lineDataOffset, int startDelta, int endDelta) {
- final char[] buf = new char[8+2*COLUMN_SEPARATOR.length+DUMP_LINE_LEN*3-1+DUMP_LINE_LEN+NEW_LINE_CHARS.length];
-
- if (startDelta >= endDelta) {
- throw new IllegalArgumentException("Bad start/end delta");
- }
- int idx=0;
- try {
- writeHex(buf, idx, lineStartAddress, 8);
- idx = arraycopy(COLUMN_SEPARATOR, buf, idx+8);
- // raw hex data
- for (int i=0; i< DUMP_LINE_LEN; i++) {
- if (i>0) {
- buf[idx++] = ' ';
- }
- if (i >= startDelta && i < endDelta) {
- writeHex(buf, idx, data[lineDataOffset+i], 2);
- } else {
- buf[idx] = ' ';
- buf[idx+1] = ' ';
- }
- idx += 2;
- }
- idx = arraycopy(COLUMN_SEPARATOR, buf, idx);
-
- // interpreted ascii
- for (int i=0; i< DUMP_LINE_LEN; i++) {
- char ch = ' ';
- if (i >= startDelta && i < endDelta) {
- ch = getPrintableChar(data[lineDataOffset+i]);
- }
- buf[idx++] = ch;
- }
-
- idx = arraycopy(NEW_LINE_CHARS, buf, idx);
-
- w.write(buf, 0, idx);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- private static int arraycopy(char[] in, char[] out, int pos) {
- int idx = pos;
- for (char c : in) {
- out[idx++] = c;
- }
- return idx;
- }
-
- private static char getPrintableChar(byte b) {
- char ib = (char) (b & 0x00FF);
- if (ib < 32 || ib > 126) {
- return '.';
- }
- return ib;
- }
-
- private static void writeHex(char[] buf, int startInBuf, int value, int nDigits) {
- int acc = value;
- for(int i=nDigits-1; i>=0; i--) {
- int digit = acc & 0x0F;
- buf[startInBuf+i] = (char) (digit < 10 ? ('0' + digit) : ('A' + digit - 10));
- acc >>>= 4;
- }
- }
- }
|