package org.apache.poi.hssf.record;
import java.io.ByteArrayOutputStream;
-import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.util.LittleEndian;
/** Maximum size of a single record (minus the 4 byte header) without a continue*/
public final static short MAX_RECORD_DATA_SIZE = 8224;
private static final int INVALID_SID_VALUE = -1;
+ /**
+ * When {@link #_currentDataLength} has this value, it means that the previous BIFF record is
+ * finished, the next sid has been properly read, but the data size field has not been read yet.
+ */
private static final int DATA_LEN_NEEDS_TO_BE_READ = -1;
private static final byte[] EMPTY_BYTE_ARRAY = { };
- private final InputStream _in;
- /** {@link LittleEndianInput} facet of field {@link #_in} */
+ /** {@link LittleEndianInput} facet of the wrapped {@link InputStream} */
private final LittleEndianInput _le;
- private int currentSid;
+ /** the record identifier of the BIFF record currently being read */
+ private int _currentSid;
+ /**
+ * Length of the data section of the current BIFF record (always 4 less than the total record size).
+ * When uninitialised, this field is set to {@link #DATA_LEN_NEEDS_TO_BE_READ}.
+ */
private int _currentDataLength;
- private int nextSid;
- private int recordOffset;
- private boolean autoContinue; // TODO - remove this
+ /**
+ * The BIFF record identifier for the next record is read when just as the current record
+ * is finished.
+ * This field is only really valid during the time that ({@link #_currentDataLength} ==
+ * {@link #DATA_LEN_NEEDS_TO_BE_READ}). At most other times its value is not really the
+ * 'sid of the next record'. Wwhile mid-record, this field coincidentally holds the sid
+ * of the current record.
+ */
+ private int _nextSid;
+ /**
+ * index within the data section of the current BIFF record
+ */
+ private int _currentDataOffset;
public RecordInputStream(InputStream in) throws RecordFormatException {
- _in = in;
if (in instanceof LittleEndianInput) {
// accessing directly is an optimisation
_le = (LittleEndianInput) in;
// less optimal, but should work OK just the same. Often occurs in junit tests.
_le = new LittleEndianInputStream(in);
}
- try {
- if (_in.available() < LittleEndian.SHORT_SIZE) {
- nextSid = INVALID_SID_VALUE;
- } else {
- nextSid = LittleEndian.readShort(in);
- }
- } catch (IOException ex) {
- throw new RecordFormatException("Error reading bytes", ex);
- }
- _currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
- autoContinue = true;
+ _nextSid = readNextSid();
+ }
+
+ /**
+ * @returns the number of bytes available in the current BIFF record
+ * @see #remaining()
+ */
+ public int available() {
+ return remaining();
}
public int read() {
checkRecordPosition(LittleEndian.BYTE_SIZE);
- recordOffset += LittleEndian.BYTE_SIZE;
+ _currentDataOffset += LittleEndian.BYTE_SIZE;
return _le.readUByte();
}
public int read(byte[] b, int off, int len) {
return limit;
}
- public short getSid() {
- return (short) currentSid;
- }
-
- public short getLength() { // TODO - remove
- return (short) _currentDataLength;
- }
-
+ public short getSid() {
+ return (short) _currentSid;
+ }
/**
* Note - this method is expected to be called only when completed reading the current BIFF record.
* discarded
*/
public boolean hasNextRecord() {
- if (_currentDataLength != -1 && _currentDataLength != recordOffset) {
- System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(currentSid));
+ if (_currentDataLength != -1 && _currentDataLength != _currentDataOffset) {
+ System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(_currentSid));
// discard unread data
- while (recordOffset < _currentDataLength) {
+ while (_currentDataOffset < _currentDataLength) {
readByte();
}
}
if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
- nextSid = readNextSid();
- _currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
+ _nextSid = readNextSid();
}
- return nextSid != INVALID_SID_VALUE;
+ return _nextSid != INVALID_SID_VALUE;
}
/**
* @return the sid of the next record or {@link #INVALID_SID_VALUE} if at end of stream
*/
private int readNextSid() {
- int nAvailable;
- try {
- nAvailable = _in.available();
- } catch (IOException e) {
- throw new RecordFormatException("Error checking stream available bytes", e);
- }
+ int nAvailable = _le.available();
if (nAvailable < EOFRecord.ENCODED_SIZE) {
if (nAvailable > 0) {
// some scrap left over?
if (result == INVALID_SID_VALUE) {
throw new RecordFormatException("Found invalid sid (" + result + ")");
}
+ _currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
return result;
}
* <i>Note: The auto continue flag is reset to true</i>
*/
public void nextRecord() throws RecordFormatException {
- if (nextSid == INVALID_SID_VALUE) {
+ if (_nextSid == INVALID_SID_VALUE) {
throw new IllegalStateException("EOF - next record not available");
}
- currentSid = nextSid;
- autoContinue = true;
- recordOffset = 0;
+ if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
+ throw new IllegalStateException("Cannot call nextRecord() without checking hasNextRecord() first");
+ }
+ _currentSid = _nextSid;
+ _currentDataOffset = 0;
_currentDataLength = _le.readUShort();
if (_currentDataLength > MAX_RECORD_DATA_SIZE) {
throw new RecordFormatException("The content of an excel record cannot exceed "
}
}
- public void setAutoContinue(boolean enable) {
- this.autoContinue = enable;
- }
-
private void checkRecordPosition(int requiredByteCount) {
- if (remaining() < requiredByteCount) {
- if (isContinueNext() && autoContinue) {
- nextRecord();
- } else {
- throw new ArrayIndexOutOfBoundsException();
- }
+ int nAvailable = remaining();
+ if (nAvailable >= requiredByteCount) {
+ // all OK
+ return;
+ }
+ if (nAvailable == 0 && isContinueNext()) {
+ nextRecord();
+ return;
}
+ throw new RecordFormatException("Not enough data (" + nAvailable
+ + ") to read requested (" + requiredByteCount +") bytes");
}
/**
*/
public byte readByte() {
checkRecordPosition(LittleEndian.BYTE_SIZE);
- recordOffset += LittleEndian.BYTE_SIZE;
+ _currentDataOffset += LittleEndian.BYTE_SIZE;
return _le.readByte();
}
*/
public short readShort() {
checkRecordPosition(LittleEndian.SHORT_SIZE);
- recordOffset += LittleEndian.SHORT_SIZE;
+ _currentDataOffset += LittleEndian.SHORT_SIZE;
return _le.readShort();
}
public int readInt() {
checkRecordPosition(LittleEndian.INT_SIZE);
- recordOffset += LittleEndian.INT_SIZE;
+ _currentDataOffset += LittleEndian.INT_SIZE;
return _le.readInt();
}
public long readLong() {
checkRecordPosition(LittleEndian.LONG_SIZE);
- recordOffset += LittleEndian.LONG_SIZE;
+ _currentDataOffset += LittleEndian.LONG_SIZE;
return _le.readLong();
}
*/
public int readUShort() {
checkRecordPosition(LittleEndian.SHORT_SIZE);
- recordOffset += LittleEndian.SHORT_SIZE;
+ _currentDataOffset += LittleEndian.SHORT_SIZE;
return _le.readUShort();
}
public double readDouble() {
checkRecordPosition(LittleEndian.DOUBLE_SIZE);
- recordOffset += LittleEndian.DOUBLE_SIZE;
+ _currentDataOffset += LittleEndian.DOUBLE_SIZE;
long valueLongBits = _le.readLong();
double result = Double.longBitsToDouble(valueLongBits);
if (Double.isNaN(result)) {
public void readFully(byte[] buf, int off, int len) {
checkRecordPosition(len);
_le.readFully(buf, off, len);
- recordOffset+=len;
+ _currentDataOffset+=len;
}
public String readString() {
// already read sid of next record. so current one is finished
return 0;
}
- return (_currentDataLength - recordOffset);
+ return _currentDataLength - _currentDataOffset;
}
/**
*
* @return <code>true</code> when a {@link ContinueRecord} is next.
*/
- public boolean isContinueNext() {
- if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ && recordOffset != _currentDataLength) {
+ private boolean isContinueNext() {
+ if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ && _currentDataOffset != _currentDataLength) {
throw new IllegalStateException("Should never be called before end of current record");
}
if (!hasNextRecord()) {
return false;
}
- return nextSid == ContinueRecord.sid;
+ // At what point are records continued?
+ // - Often from within the char data of long strings (caller is within readStringCommon()).
+ // - From UnicodeString construction (many different points - call via checkRecordPosition)
+ // - During TextObjectRecord construction (just before the text, perhaps within the text,
+ // and before the formatting run data)
+ return _nextSid == ContinueRecord.sid;
}
}
* @param offset of the record's data (provided a big array of the file)
*/
public SupBookRecord(RecordInputStream in) {
+ int recLen = in.remaining();
+
field_1_number_of_sheets = in.readShort();
- if(in.getLength() > SMALL_RECORD_SIZE) {
+ if(recLen > SMALL_RECORD_SIZE) {
// 5.38.1 External References
_isAddInFunctions = false;
_text = new HSSFRichTextString(text);
if (field_7_formattingDataLength > 0) {
- if (in.isContinueNext() && in.remaining() == 0) {
- in.nextRecord();
- processFontRuns(in, _text, field_7_formattingDataLength);
- } else {
- throw new RecordFormatException(
- "Expected Continue Record to hold font runs for TextObjectRecord");
- }
+ processFontRuns(in, _text, field_7_formattingDataLength);
}
}
throw new RecordFormatException("Bad format run data length " + formattingRunDataLength
+ ")");
}
- if (in.remaining() != formattingRunDataLength) {
- throw new RecordFormatException("Expected " + formattingRunDataLength
- + " bytes but got " + in.remaining());
- }
int nRuns = formattingRunDataLength / FORMAT_RUN_ENCODED_SIZE;
for (int i = 0; i < nRuns; i++) {
short index = in.readShort();
* @author Andrew C. Oliver
* @author Marc Johnson (mjohnson at apache dot org)
* @author Glen Stampoultzis (glens at apache.org)
- * @version 2.0-pre
*/
-
-public class UnicodeString
- implements Comparable
-{
- public final static short sid = 0xFFF;
+public final class UnicodeString implements Comparable {
private short field_1_charCount; // = 0;
private byte field_2_optionflags; // = 0;
private String field_3_string; // = null;
private static final BitField richText = BitFieldFactory.getInstance(0x8);
public static class FormatRun implements Comparable {
- private short character;
- private short fontIndex;
+ short character;
+ short fontIndex;
public FormatRun(short character, short fontIndex) {
this.character = character;
setString(str);
}
- /**
- * construct a unicode string record and fill its fields, ID is ignored
- * @param in the RecordInputstream to read the record from
- */
-
- public UnicodeString(RecordInputStream in)
- {
- fillFields(in); // TODO - inline
- }
public int hashCode()
&& field_3_string.equals(other.field_3_string));
if (!eq) return false;
- //Ok string appears to be equal but now lets compare formatting runs
+ //OK string appears to be equal but now lets compare formatting runs
if ((field_4_format_runs == null) && (other.field_4_format_runs == null))
- //Strings are equal, and there are not formtting runs.
+ //Strings are equal, and there are not formatting runs.
return true;
if (((field_4_format_runs == null) && (other.field_4_format_runs != null)) ||
(field_4_format_runs != null) && (other.field_4_format_runs == null))
}
/**
+ * construct a unicode string record and fill its fields, ID is ignored
* @param in the RecordInputstream to read the record from
*/
- protected void fillFields(RecordInputStream in)
- {
+ public UnicodeString(RecordInputStream in) {
field_1_charCount = in.readShort();
field_2_optionflags = in.readByte();
extensionLength = in.readInt();
}
- //Now need to get the string data.
- //Turn off autocontinuation so that we can catch the continue boundary
- in.setAutoContinue(false);
- StringBuffer tmpString = new StringBuffer(field_1_charCount);
- int stringCharCount = field_1_charCount;
boolean isCompressed = ((field_2_optionflags & 1) == 0);
- while (stringCharCount != 0) {
- if (in.remaining() == 0) {
- if (in.isContinueNext()) {
- in.nextRecord();
- //Check if we are now reading, compressed or uncompressed unicode.
- byte optionflags = in.readByte();
- isCompressed = ((optionflags & 1) == 0);
- } else
- throw new RecordFormatException("Expected continue record.");
- }
- if (isCompressed) {
- char ch = (char)in.readUByte(); // avoid sex
- tmpString.append(ch);
- } else {
- char ch = (char) in.readShort();
- tmpString.append(ch);
- }
- stringCharCount --;
+ if (isCompressed) {
+ field_3_string = in.readCompressedUnicode(field_1_charCount);
+ } else {
+ field_3_string = in.readUnicodeLEString(field_1_charCount);
}
- field_3_string = tmpString.toString();
- //Turn back on autocontinuation
- in.setAutoContinue(true);
-
+
if (isRichText() && (runCount > 0)) {
field_4_format_runs = new ArrayList(runCount);
}
/**
- * get the actual string this contains as a java String object
- *
- *
- * @return String
- *
+ * @return the actual string this contains as a java String object
*/
-
public String getString()
{
return field_3_string;
}
}
if (useUTF16)
- //Set the uncomressed bit
+ //Set the uncompressed bit
field_2_optionflags = highByte.setByte(field_2_optionflags);
else field_2_optionflags = highByte.clearByte(field_2_optionflags);
}
//Make sure that we now say that we are a rich string
field_2_optionflags = richText.setByte(field_2_optionflags);
- }
+ }
public Iterator formatIterator() {
if (field_4_format_runs != null)
LittleEndian.putShort(data, offset, ContinueRecord.sid);
offset+=2;
- //Record the location of the last continue legnth position, but dont write
- //anything there yet (since we dont know what it will be!)
+ //Record the location of the last continue length position, but don't write
+ //anything there yet (since we don't know what it will be!)
stats.lastLengthPos = offset;
offset += 2;
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
}
return offset;
- }
+ }
public int serialize(UnicodeRecordStats stats, final int offset, byte [] data)
{
//Basic string overhead
pos = writeContinueIfRequired(stats, 3, pos, data);
- // byte[] retval = new byte[ 3 + (getString().length() * charsize)];
LittleEndian.putShort(data, pos, getCharCount());
pos += 2;
data[ pos ] = getOptionFlags();
//Check to see if the offset occurs mid string, if so then we need to add
//the byte to start with that represents the first byte of the continue record.
if (strSize > stats.remainingSize) {
- //Ok the offset occurs half way through the string, that means that
+ //OK the offset occurs half way through the string, that means that
//we need an extra byte after the continue record ie we didnt finish
//writing out the string the 1st time through
//But hang on, how many continue records did we span? What if this is
//a REALLY long string. We need to work this all out.
- int ammountThatCantFit = strSize;
+ int amountThatCantFit = strSize;
int strPos = 0;
- while (ammountThatCantFit > 0) {
- int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
- //Make sure that the ammount that cant fit takes into account
+ while (amountThatCantFit > 0) {
+ int amountWritten = Math.min(stats.remainingSize, amountThatCantFit);
+ //Make sure that the amount that can't fit takes into account
//whether we are writing double byte unicode
if (isUncompressedUnicode()) {
//We have the '-1' here because whether this is the first record or
//subsequent continue records, there is always the case that the
- //number of bytes in a string on doube byte boundaries is actually odd.
- if ( ( (ammountWritten ) % 2) == 1)
- ammountWritten--;
+ //number of bytes in a string on double byte boundaries is actually odd.
+ if ( ( (amountWritten ) % 2) == 1)
+ amountWritten--;
}
- System.arraycopy(strBytes, strPos, data, pos, ammountWritten);
- pos += ammountWritten;
- strPos += ammountWritten;
- stats.recordSize += ammountWritten;
- stats.remainingSize -= ammountWritten;
+ System.arraycopy(strBytes, strPos, data, pos, amountWritten);
+ pos += amountWritten;
+ strPos += amountWritten;
+ stats.recordSize += amountWritten;
+ stats.remainingSize -= amountWritten;
//Ok lets subtract what we can write
- ammountThatCantFit -= ammountWritten;
+ amountThatCantFit -= amountWritten;
//Each iteration of this while loop is another continue record, unless
//everything now fits.
- if (ammountThatCantFit > 0) {
+ if (amountThatCantFit > 0) {
//We know that a continue WILL be requied, but use this common method
- pos = writeContinueIfRequired(stats, ammountThatCantFit, pos, data);
+ pos = writeContinueIfRequired(stats, amountThatCantFit, pos, data);
//The first byte after a continue mid string is the extra byte to
//indicate if this run is compressed or not.
return highByte.isSet(getOptionFlags());
}
- /** Returns the size of this record, given the ammount of record space
+ /** Returns the size of this record, given the amount of record space
* remaining, it will also include the size of writing a continue record.
*/
}
}
-
-
- public short getSid()
- {
- return sid;
- }
-
public int compareTo(Object obj)
{
UnicodeString str = ( UnicodeString ) obj;
}
//Well the format runs are equal as well!, better check the ExtRst data
- //Which by the way we dont know how to decode!
+ //Which by the way we don't know how to decode!
if ((field_5_ext_rst == null) && (str.field_5_ext_rst == null))
return 0;
if ((field_5_ext_rst == null) && (str.field_5_ext_rst != null))
_currentBlock = getDataInputBlock(0);
}
- public int available() throws IOException {
- dieIfClosed();
+ public int available() {
+ if (_closed) {
+ throw new IllegalStateException("cannot perform requested operation on a closed stream");
+ }
return _document_size - _current_offset;
}
private void checkAvaliable(int requestedSize) {
if (_closed) {
- throw new RuntimeException("cannot perform requested operation on a closed stream");
+ throw new IllegalStateException("cannot perform requested operation on a closed stream");
}
if (requestedSize > _document_size - _current_offset) {
throw new RuntimeException("Buffer underrun - requested " + requestedSize
this(buf, 0, buf.length);
}
+ public int available() {
+ return _endIndex - _readIndex;
+ }
private void checkPosition(int i) {
if (i > _endIndex - _readIndex) {
throw new RuntimeException("Buffer overrun");
* @author Josh Micich\r
*/\r
public interface LittleEndianInput {\r
+ int available();\r
byte readByte();\r
int readUByte();\r
short readShort();\r
import java.io.InputStream;\r
\r
/**\r
+ * Wraps an {@link InputStream} providing {@link LittleEndianInput}<p/>\r
+ * \r
+ * This class does not buffer any input, so the stream read position maintained \r
+ * by this class is consistent with that of the inner stream.\r
* \r
* @author Josh Micich\r
*/\r
public LittleEndianInputStream(InputStream is) {\r
super(is);\r
}\r
-\r
+ public int available() {\r
+ try {\r
+ return super.available();\r
+ } catch (IOException e) {\r
+ throw new RuntimeException(e);\r
+ }\r
+ }\r
public byte readByte() {\r
return (byte)readUByte();\r
}\r
-
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
-
package org.apache.poi.poifs.filesystem;
-import java.io.*;
-
-import java.util.*;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Arrays;
-import junit.framework.*;
+import junit.framework.TestCase;
import org.apache.poi.poifs.property.DirectoryProperty;
-import org.apache.poi.poifs.property.DocumentProperty;
import org.apache.poi.poifs.storage.RawDataBlock;
/**
* @author Marc Johnson
*/
-public class TestDocumentInputStream
- extends TestCase
-{
-
- /**
- * Constructor TestDocumentInputStream
- *
- * @param name
- *
- * @exception IOException
- */
+public final class TestDocumentInputStream extends TestCase {
- public TestDocumentInputStream(String name)
- throws IOException
- {
- super(name);
+ protected void setUp() throws Exception {
int blocks = (_workbook_size + 511) / 512;
_workbook_data = new byte[ 512 * blocks ];
/**
* test constructor
- *
- * @exception IOException
*/
-
- public void testConstructor()
- throws IOException
- {
+ public void testConstructor() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
assertEquals(_workbook_size, stream.available());
/**
* test available() behavior
- *
- * @exception IOException
*/
-
- public void testAvailable()
- throws IOException
- {
+ public void testAvailable() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
assertEquals(_workbook_size, stream.available());
{
stream.available();
fail("Should have caught IOException");
- }
- catch (IOException ignored)
- {
+ } catch (IllegalStateException ignored) {
// as expected
}
/**
* test mark/reset/markSupported.
- *
- * @exception IOException
*/
-
- public void testMarkFunctions()
- throws IOException
- {
+ public void testMarkFunctions() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
byte[] buffer = new byte[ _workbook_size / 5 ];
/**
* test simple read method
- *
- * @exception IOException
*/
-
- public void testReadSingleByte()
- throws IOException
- {
+ public void testReadSingleByte() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
int remaining = _workbook_size;
/**
* Test buffered read
- *
- * @exception IOException
*/
-
- public void testBufferRead()
- throws IOException
- {
+ public void testBufferRead() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
try
/**
* Test complex buffered read
- *
- * @exception IOException
*/
-
- public void testComplexBufferRead()
- throws IOException
- {
+ public void testComplexBufferRead() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
try {
/**
* test skip
- *
- * @exception IOException
*/
-
- public void testSkip()
- throws IOException
- {
+ public void testSkip() throws IOException {
DocumentInputStream stream = new DocumentInputStream(_workbook);
assertEquals(_workbook_size, stream.available());
stream.skip(2 + ( long ) Integer.MAX_VALUE));
assertEquals(0, stream.available());
}
-
- /**
- * main method to run the unit tests
- *
- * @param ignored_args
- */
-
- public static void main(String [] ignored_args)
- {
- System.out.println(
- "Testing org.apache.poi.poifs.filesystem.DocumentInputStream");
- junit.textui.TestRunner.run(TestDocumentInputStream.class);
- }
}