import java.io.IOException;
import java.io.InputStream;
-import org.apache.poi.hssf.record.LabelRecord;
+import org.apache.poi.hssf.record.FormulaRecord;
+import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.OldLabelRecord;
+import org.apache.poi.hssf.record.OldStringRecord;
+import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.RecordInputStream;
+import org.apache.poi.ss.usermodel.Cell;
/**
* A text extractor for very old (pre-OLE2) Excel files,
ris.nextRecord();
switch (sid) {
- case LabelRecord.sid:
+ // label - 5.63 - TODO Needs codepages
+ case OldLabelRecord.biff2_sid:
+ case OldLabelRecord.biff345_sid:
OldLabelRecord lr = new OldLabelRecord(ris);
text.append(lr.getValue());
text.append('\n');
break;
+ // string - 5.102 - TODO Needs codepages
+ case OldStringRecord.biff2_sid:
+ case OldStringRecord.biff345_sid:
+ OldStringRecord sr = new OldStringRecord(ris);
+ text.append(sr.getString());
+ text.append('\n');
+ break;
+ // number - 5.71 - TODO Needs format strings
+ case NumberRecord.sid:
+ NumberRecord nr = new NumberRecord(ris);
+ text.append(nr.getValue());
+ text.append('\n');
+ break;
+/*
+ case OldFormulaRecord.sid:
+ FormulaRecord fr = new FormulaRecord(ris);
+System.out.println(fr.getCachedResultType());
+ if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
+ text.append(fr.getValue());
+ text.append('\n');
+ }
+*/
+ case RKRecord.sid:
+ RKRecord rr = new RKRecord(ris);
+ text.append(rr.getRKNumber());
+ text.append('\n');
+ break;
default:
ris.readFully(new byte[ris.remaining()]);
+ // text.append(" = " + ris.getSid() + " = \n");
}
-
- // label - 5.63 - TODO Needs codepages
- // number - 5.71
- // rk - 5.87
- // string - 5.102
-
}
return text.toString();
public final class FormulaRecord extends CellRecord {
public static final short sid = 0x0006; // docs say 406...because of a bug Microsoft support site article #Q184647)
+ public static final short olderSid = 0x0406; // older biff versions do manage 406!
private static int FIXED_SIZE = 14; // double + short + int
private static final BitField alwaysCalc = BitFieldFactory.getInstance(0x0001);
private short field_3_xf_index; // Biff 3+
private short field_4_string_len;
private byte[] field_5_bytes;
- //private XXXXX codepage; // TODO
+ //private XXXXX codepage; // TODO Implement for this and OldStringRecord
/**
* @param in the RecordInputstream to read the record from
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.record;
+
+
+/**
+ * Biff2 - Biff 4 Label Record (0x0007 / 0x0207) - read only support for
+ * formula string results.
+ */
+public final class OldStringRecord {
+ public final static short biff2_sid = 0x0007;
+ public final static short biff345_sid = 0x0207;
+
+ private short sid;
+ private short field_1_string_len;
+ private byte[] field_2_bytes;
+ //private XXXXX codepage; // TODO Implement for this and OldLabelRecord
+
+ /**
+ * @param in the RecordInputstream to read the record from
+ */
+ public OldStringRecord(RecordInputStream in) {
+ sid = in.getSid();
+
+ if (in.getSid() == biff2_sid) {
+ field_1_string_len = (short)in.readUByte();
+ } else {
+ field_1_string_len = in.readShort();
+ }
+
+ // Can only decode properly later when you know the codepage
+ field_2_bytes = new byte[field_1_string_len];
+ in.read(field_2_bytes, 0, field_1_string_len);
+ }
+
+ public boolean isBiff2() {
+ return sid == biff2_sid;
+ }
+
+ public short getSid() {
+ return sid;
+ }
+
+ /**
+ * @return The string represented by this record.
+ */
+ public String getString()
+ {
+ // We really need the codepage here to do this right...
+ return new String(field_2_bytes);
+ }
+
+ public String toString()
+ {
+ StringBuffer buffer = new StringBuffer();
+
+ buffer.append("[OLD STRING]\n");
+ buffer.append(" .string = ")
+ .append(getString()).append("\n");
+ buffer.append("[/OLD STRING]\n");
+ return buffer.toString();
+ }
+}
// Check we find a few words we expect in there
assertTrue(text, text.contains("Size"));
assertTrue(text, text.contains("Returns"));
+
+ // Check we find a few numbers we expect in there
+ assertTrue(text, text.contains("11"));
+ assertTrue(text, text.contains("784"));
}
- // TODO Rest of the tests
+ public void testStrings() {
+ OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls");
+ String text = extractor.getText();
+
+ // Simple strings
+ assertTrue(text, text.contains("Table 10 -- Examination Coverage:"));
+ assertTrue(text, text.contains("Recommended and Average Recommended Additional Tax After"));
+ assertTrue(text, text.contains("Individual income tax returns, total"));
+
+ // More complicated strings
+ assertTrue(text, text.contains("$100,000 or more"));
+ assertTrue(text, text.contains("S corporation returns, Form 1120S [10,15]"));
+ // TODO Get these quotes working correctly
+// assertTrue(text, text.contains("individual income tax return “short forms.”"));
+
+ // Formula based strings
+ // TODO Find some then test
+ }
+
+ public void testFormattedNumbers() {
+ OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls");
+ String text = extractor.getText();
+
+ // Simple numbers
+ assertTrue(text, text.contains("151"));
+ assertTrue(text, text.contains("784"));
+
+ // Numbers which come from formulas
+ // TODO
+// assertTrue(text, text.contains("0.40"));
+// assertTrue(text, text.contains("624"));
+
+ // Formatted numbers
+ // TODO
+ }
}