import java.io.IOException;
import java.io.InputStream;
+import org.apache.poi.hssf.record.BOFRecord;
+import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.OldFormulaRecord;
import org.apache.poi.hssf.record.OldLabelRecord;
public class OldExcelExtractor {
private RecordInputStream ris;
private Closeable input;
+ private int biffVersion;
public OldExcelExtractor(InputStream input) throws IOException {
BufferedInputStream bstream = new BufferedInputStream(input, 8);
* for these old file formats
*/
public String getText() {
- StringBuffer text = new StringBuffer();
+ // Work out what version we're dealing with
+ int bofSid = ris.getNextSid();
+ switch (bofSid) {
+ case BOFRecord.biff2_sid:
+ biffVersion = 2;
+ break;
+ case BOFRecord.biff3_sid:
+ biffVersion = 3;
+ break;
+ case BOFRecord.biff4_sid:
+ biffVersion = 4;
+ break;
+ case BOFRecord.biff5_sid:
+ biffVersion = 5;
+ break;
+ default:
+ throw new IllegalArgumentException("File does not begin with a BOF, found sid of " + bofSid);
+ }
+ StringBuffer text = new StringBuffer();
while (ris.hasNextRecord()) {
int sid = ris.getNextSid();
ris.nextRecord();
case OldFormulaRecord.biff2_sid:
case OldFormulaRecord.biff3_sid:
case OldFormulaRecord.biff4_sid:
- OldFormulaRecord fr = new OldFormulaRecord(ris);
- if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
- handleNumericCell(text, fr.getValue());
+ // Biff 2 and 5+ share the same SID, due to a bug...
+ if (biffVersion == 5) {
+ FormulaRecord fr = new FormulaRecord(ris);
+ if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
+ handleNumericCell(text, fr.getValue());
+ }
+ } else {
+ OldFormulaRecord fr = new OldFormulaRecord(ris);
+ if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
+ handleNumericCell(text, fr.getValue());
+ }
}
break;
case RKRecord.sid:
*/
public final class BOFRecord extends StandardRecord {
/**
- * for BIFF8 files the BOF is 0x809. For earlier versions it was 0x09 or 0x(biffversion)09
+ * for BIFF8 files the BOF is 0x809. For earlier versions see
+ * {@link #biff2_sid} {@link #biff3_sid} {@link #biff4_sid}
+ * {@link #biff5_sid}
*/
public final static short sid = 0x809;
+ // SIDs from earlier BIFF versions
+ public final static short biff2_sid = 0x009;
+ public final static short biff3_sid = 0x209;
+ public final static short biff4_sid = 0x409;
+ public final static short biff5_sid = 0x809;
/** suggested default (0x0600 - BIFF8) */
public final static int VERSION = 0x0600;
assertContains(text, "11");
assertContains(text, "784");
}
- public void DISABLEDtestSimpleExcel5() {
+ public void testSimpleExcel5() {
for (String ver : new String[] {"5", "95"}) {
OldExcelExtractor extractor = createExtractor("testEXCEL_"+ver+".xls");
// assertContains(text, "55,624");
// assertContains(text, "11,743,477");
}
- public void DISABLEDtestFormattedNumbersExcel5() {
+ public void testFormattedNumbersExcel5() {
for (String ver : new String[] {"5", "95"}) {
OldExcelExtractor extractor = createExtractor("testEXCEL_"+ver+".xls");
String text = extractor.getText();
// Numbers which come from formulas
assertContains(text, "13");
assertContains(text, "169");
+
+ // Formatted numbers
+ // TODO
}
}