123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hssf.record;
-
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.Locale;
-
- import org.apache.poi.hssf.dev.BiffViewer;
- import org.apache.poi.hssf.record.crypto.Biff8DecryptingStream;
- import org.apache.poi.poifs.crypt.EncryptionInfo;
- import org.apache.poi.util.IOUtils;
- import org.apache.poi.util.Internal;
- import org.apache.poi.util.LittleEndianConsts;
- import org.apache.poi.util.LittleEndianInput;
- import org.apache.poi.util.LittleEndianInputStream;
- import org.apache.poi.util.RecordFormatException;
-
- /**
- * Title: Record Input Stream
- *
- * Description: Wraps a stream and provides helper methods for the construction of records.
- */
- public final class RecordInputStream implements LittleEndianInput {
-
-
- /** Maximum size of a single record (minus the 4 byte header) without a continue*/
- public final static short MAX_RECORD_DATA_SIZE = 8224;
- private static final int INVALID_SID_VALUE = -1;
- //arbitrarily selected; may need to increase
- private static final int MAX_RECORD_LENGTH = 100_000;
- /**
- * When {@link #_currentDataLength} has this value, it means that the previous BIFF record is
- * finished, the next sid has been properly read, but the data size field has not been read yet.
- */
- private static final int DATA_LEN_NEEDS_TO_BE_READ = -1;
- private static final byte[] EMPTY_BYTE_ARRAY = { };
-
- /**
- * For use in {@link BiffViewer} which may construct {@link Record}s that don't completely
- * read all available data. This exception should never be thrown otherwise.
- */
- @SuppressWarnings("serial")
- public static final class LeftoverDataException extends RuntimeException {
- public LeftoverDataException(int sid, int remainingByteCount) {
- super("Initialisation of record 0x" + Integer.toHexString(sid).toUpperCase(Locale.ROOT)
- + "(" + getRecordName(sid) + ") left " + remainingByteCount
- + " bytes remaining still to be read.");
- }
-
- private static String getRecordName(int sid) {
- Class<? extends Record> recordClass = RecordFactory.getRecordClass(sid);
- if(recordClass == null) {
- return null;
- }
- return recordClass.getSimpleName();
- }
- }
-
- /** Header {@link LittleEndianInput} facet of the wrapped {@link InputStream} */
- private final BiffHeaderInput _bhi;
- /** Data {@link LittleEndianInput} facet of the wrapped {@link InputStream} */
- private final LittleEndianInput _dataInput;
- /** the record identifier of the BIFF record currently being read */
- private int _currentSid;
- /**
- * Length of the data section of the current BIFF record (always 4 less than the total record size).
- * When uninitialised, this field is set to {@link #DATA_LEN_NEEDS_TO_BE_READ}.
- */
- private int _currentDataLength;
- /**
- * The BIFF record identifier for the next record is read when just as the current record
- * is finished.
- * This field is only really valid during the time that ({@link #_currentDataLength} ==
- * {@link #DATA_LEN_NEEDS_TO_BE_READ}). At most other times its value is not really the
- * 'sid of the next record'. Wwhile mid-record, this field coincidentally holds the sid
- * of the current record.
- */
- private int _nextSid;
- /**
- * index within the data section of the current BIFF record
- */
- private int _currentDataOffset;
- /**
- * index within the data section when mark() was called
- */
- private int _markedDataOffset;
-
- private static final class SimpleHeaderInput implements BiffHeaderInput {
-
- private final LittleEndianInput _lei;
-
- private SimpleHeaderInput(LittleEndianInput lei) {
- _lei = lei;
- }
- @Override
- public int available() {
- return _lei.available();
- }
- @Override
- public int readDataSize() {
- return _lei.readUShort();
- }
- @Override
- public int readRecordSID() {
- return _lei.readUShort();
- }
- }
-
- public RecordInputStream(InputStream in) throws RecordFormatException {
- this (in, null, 0);
- }
-
- public RecordInputStream(InputStream in, EncryptionInfo key, int initialOffset) throws RecordFormatException {
- if (key == null) {
- _dataInput = (in instanceof LittleEndianInput)
- // accessing directly is an optimisation
- ? (LittleEndianInput)in
- // less optimal, but should work OK just the same. Often occurs in junit tests.
- : new LittleEndianInputStream(in);
- _bhi = new SimpleHeaderInput(_dataInput);
- } else {
- Biff8DecryptingStream bds = new Biff8DecryptingStream(in, initialOffset, key);
- _dataInput = bds;
- _bhi = bds;
- }
- _nextSid = readNextSid();
- }
-
- static LittleEndianInput getLEI(InputStream is) {
- if (is instanceof LittleEndianInput) {
- // accessing directly is an optimisation
- return (LittleEndianInput) is;
- }
- // less optimal, but should work OK just the same. Often occurs in junit tests.
- return new LittleEndianInputStream(is);
- }
-
- /**
- * @return the number of bytes available in the current BIFF record
- * @see #remaining()
- */
- @Override
- public int available() {
- return remaining();
- }
-
- public int read(byte[] b, int off, int len) {
- int limit = Math.min(len, remaining());
- if (limit == 0) {
- return 0;
- }
- readFully(b, off,limit);
- return limit;
- }
-
- public short getSid() {
- return (short) _currentSid;
- }
-
- /**
- * Note - this method is expected to be called only when completed reading the current BIFF
- * record.
- *
- * @return true, if there's another record in the stream
- *
- * @throws LeftoverDataException if this method is called before reaching the end of the
- * current record.
- */
- public boolean hasNextRecord() throws LeftoverDataException {
- if (_currentDataLength != -1 && _currentDataLength != _currentDataOffset) {
- throw new LeftoverDataException(_currentSid, remaining());
- }
- if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
- _nextSid = readNextSid();
- }
- return _nextSid != INVALID_SID_VALUE;
- }
-
- /**
- * @return the sid of the next record or {@link #INVALID_SID_VALUE} if at end of stream
- */
- private int readNextSid() {
- int nAvailable = _bhi.available();
- if (nAvailable < EOFRecord.ENCODED_SIZE) {
- // some scrap left over, if nAvailable > 0?
- // ex45582-22397.xls has one extra byte after the last record
- // Excel reads that file OK
- return INVALID_SID_VALUE;
- }
- int result = _bhi.readRecordSID();
- if (result == INVALID_SID_VALUE) {
- throw new RecordFormatException("Found invalid sid (" + result + ")");
- }
- _currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
- return result;
- }
-
- /** Moves to the next record in the stream.
- *
- * <i>Note: The auto continue flag is reset to true</i>
- */
- public void nextRecord() throws RecordFormatException {
- if (_nextSid == INVALID_SID_VALUE) {
- throw new IllegalStateException("EOF - next record not available");
- }
- if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
- throw new IllegalStateException("Cannot call nextRecord() without checking hasNextRecord() first");
- }
- _currentSid = _nextSid;
- _currentDataOffset = 0;
- _currentDataLength = _bhi.readDataSize();
- if (_currentDataLength > MAX_RECORD_DATA_SIZE) {
- throw new RecordFormatException("The content of an excel record cannot exceed "
- + MAX_RECORD_DATA_SIZE + " bytes");
- }
- }
-
- private void checkRecordPosition(int requiredByteCount) {
-
- int nAvailable = remaining();
- if (nAvailable >= requiredByteCount) {
- // all OK
- return;
- }
- if (nAvailable == 0 && isContinueNext()) {
- nextRecord();
- return;
- }
- throw new RecordFormatException("Not enough data (" + nAvailable
- + ") to read requested (" + requiredByteCount +") bytes");
- }
-
- /**
- * Reads an 8 bit, signed value
- */
- @Override
- public byte readByte() {
- checkRecordPosition(LittleEndianConsts.BYTE_SIZE);
- _currentDataOffset += LittleEndianConsts.BYTE_SIZE;
- return _dataInput.readByte();
- }
-
- /**
- * Reads a 16 bit, signed value
- */
- @Override
- public short readShort() {
- checkRecordPosition(LittleEndianConsts.SHORT_SIZE);
- _currentDataOffset += LittleEndianConsts.SHORT_SIZE;
- return _dataInput.readShort();
- }
-
- /**
- * Reads a 32 bit, signed value
- */
- @Override
- public int readInt() {
- checkRecordPosition(LittleEndianConsts.INT_SIZE);
- _currentDataOffset += LittleEndianConsts.INT_SIZE;
- return _dataInput.readInt();
- }
-
- /**
- * Reads a 64 bit, signed value
- */
- @Override
- public long readLong() {
- checkRecordPosition(LittleEndianConsts.LONG_SIZE);
- _currentDataOffset += LittleEndianConsts.LONG_SIZE;
- return _dataInput.readLong();
- }
-
- /**
- * Reads an 8 bit, unsigned value
- */
- @Override
- public int readUByte() {
- return readByte() & 0x00FF;
- }
-
- /**
- * Reads a 16 bit, unsigned value.
- */
- @Override
- public int readUShort() {
- checkRecordPosition(LittleEndianConsts.SHORT_SIZE);
- _currentDataOffset += LittleEndianConsts.SHORT_SIZE;
- return _dataInput.readUShort();
- }
-
- @Override
- public double readDouble() {
- // YK: Excel doesn't write NaN but instead converts the cell type into {@link CellType#ERROR}.
- return Double.longBitsToDouble(readLong());
- }
-
- public void readPlain(byte[] buf, int off, int len) {
- readFully(buf, 0, buf.length, true);
- }
-
- @Override
- public void readFully(byte[] buf) {
- readFully(buf, 0, buf.length, false);
- }
-
- @Override
- public void readFully(byte[] buf, int off, int len) {
- readFully(buf, off, len, false);
- }
-
- private void readFully(byte[] buf, int off, int len, boolean isPlain) {
- int origLen = len;
- if (buf == null) {
- throw new NullPointerException();
- } else if (off < 0 || len < 0 || len > buf.length - off) {
- throw new IndexOutOfBoundsException();
- }
-
- while (len > 0) {
- int nextChunk = Math.min(available(),len);
- if (nextChunk == 0) {
- if (!hasNextRecord()) {
- throw new RecordFormatException("Can't read the remaining "+len+" bytes of the requested "+origLen+" bytes. No further record exists.");
- } else {
- nextRecord();
- nextChunk = Math.min(available(),len);
- assert(nextChunk > 0);
- }
- }
- checkRecordPosition(nextChunk);
- if (isPlain) {
- _dataInput.readPlain(buf, off, nextChunk);
- } else {
- _dataInput.readFully(buf, off, nextChunk);
- }
- _currentDataOffset+=nextChunk;
- off += nextChunk;
- len -= nextChunk;
- }
- }
-
- public String readString() {
- int requestedLength = readUShort();
- byte compressFlag = readByte();
- return readStringCommon(requestedLength, compressFlag == 0);
- }
- /**
- * given a byte array of 16-bit unicode characters, compress to 8-bit and
- * return a string
- *
- * { 0x16, 0x00 } -0x16
- *
- * @param requestedLength the length of the final string
- * @return the converted string
- * @exception IllegalArgumentException if len is too large (i.e.,
- * there is not enough data in string to create a String of that
- * length)
- */
- public String readUnicodeLEString(int requestedLength) {
- return readStringCommon(requestedLength, false);
- }
-
- public String readCompressedUnicode(int requestedLength) {
- return readStringCommon(requestedLength, true);
- }
-
- private String readStringCommon(int requestedLength, boolean pIsCompressedEncoding) {
- // Sanity check to detect garbage string lengths
- if (requestedLength < 0 || requestedLength > 0x100000) { // 16 million chars?
- throw new IllegalArgumentException("Bad requested string length (" + requestedLength + ")");
- }
- char[] buf = new char[requestedLength];
- boolean isCompressedEncoding = pIsCompressedEncoding;
- int curLen = 0;
- while(true) {
- int availableChars =isCompressedEncoding ? remaining() : remaining() / LittleEndianConsts.SHORT_SIZE;
- if (requestedLength - curLen <= availableChars) {
- // enough space in current record, so just read it out
- while(curLen < requestedLength) {
- char ch;
- if (isCompressedEncoding) {
- ch = (char)readUByte();
- } else {
- ch = (char)readShort();
- }
- buf[curLen] = ch;
- curLen++;
- }
- return new String(buf);
- }
- // else string has been spilled into next continue record
- // so read what's left of the current record
- while(availableChars > 0) {
- char ch;
- if (isCompressedEncoding) {
- ch = (char)readUByte();
- } else {
- ch = (char)readShort();
- }
- buf[curLen] = ch;
- curLen++;
- availableChars--;
- }
- if (!isContinueNext()) {
- throw new RecordFormatException("Expected to find a ContinueRecord in order to read remaining "
- + (requestedLength-curLen) + " of " + requestedLength + " chars");
- }
- if(remaining() != 0) {
- throw new RecordFormatException("Odd number of bytes(" + remaining() + ") left behind");
- }
- nextRecord();
- // note - the compressed flag may change on the fly
- byte compressFlag = readByte();
- assert(compressFlag == 0 || compressFlag == 1);
- isCompressedEncoding = (compressFlag == 0);
- }
- }
-
- /** Returns the remaining bytes for the current record.
- *
- * @return The remaining bytes of the current record.
- */
- public byte[] readRemainder() {
- int size = remaining();
- if (size ==0) {
- return EMPTY_BYTE_ARRAY;
- }
- byte[] result = IOUtils.safelyAllocate(size, MAX_RECORD_LENGTH);
- readFully(result);
- return result;
- }
-
- /**
- * Reads all byte data for the current record, including any that overlaps
- * into any following continue records.
- *
- * @return all byte data for the current record
- *
- * @deprecated POI 2.0 Best to write a input stream that wraps this one
- * where there is special sub record that may overlap continue
- * records.
- */
- @Deprecated
- public byte[] readAllContinuedRemainder() {
- ByteArrayOutputStream out = new ByteArrayOutputStream(2 * MAX_RECORD_DATA_SIZE);
-
- while (true) {
- byte[] b = readRemainder();
- out.write(b, 0, b.length);
- if (!isContinueNext()) {
- break;
- }
- nextRecord();
- }
- return out.toByteArray();
- }
-
- /** The remaining number of bytes in the <i>current</i> record.
- *
- * @return The number of bytes remaining in the current record
- */
- public int remaining() {
- if (_currentDataLength == DATA_LEN_NEEDS_TO_BE_READ) {
- // already read sid of next record. so current one is finished
- return 0;
- }
- return _currentDataLength - _currentDataOffset;
- }
-
- /**
- *
- * @return <code>true</code> when a {@link ContinueRecord} is next.
- */
- private boolean isContinueNext() {
- if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ && _currentDataOffset != _currentDataLength) {
- throw new IllegalStateException("Should never be called before end of current record");
- }
- if (!hasNextRecord()) {
- return false;
- }
- // At what point are records continued?
- // - Often from within the char data of long strings (caller is within readStringCommon()).
- // - From UnicodeString construction (many different points - call via checkRecordPosition)
- // - During TextObjectRecord construction (just before the text, perhaps within the text,
- // and before the formatting run data)
- return _nextSid == ContinueRecord.sid;
- }
-
- /**
- @return sid of next record. Can be called after hasNextRecord()
- */
- public int getNextSid() {
- return _nextSid;
- }
-
- /**
- * Mark the stream position - experimental function
- *
- * @param readlimit the read ahead limit
- *
- * @see InputStream#mark(int)
- */
- @Internal
- public void mark(int readlimit) {
- ((InputStream)_dataInput).mark(readlimit);
- _markedDataOffset = _currentDataOffset;
- }
-
- /**
- * Resets the stream position to the previously marked position.
- * Experimental function - this only works, when nextRecord() wasn't called in the meantime.
- *
- * @throws IOException if marking is not supported
- *
- * @see InputStream#reset()
- */
- @Internal
- public void reset() throws IOException {
- ((InputStream)_dataInput).reset();
- _currentDataOffset = _markedDataOffset;
- }
- }
|