From 41ba513d112cde6c6f5a99ad0b1b06766e9c8ebf Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sun, 30 Nov 2014 01:30:43 +0000 Subject: [PATCH] Formula values for Excel 4 extractor, for TIKA-1490 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642497 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hssf/extractor/OldExcelExtractor.java | 32 ++++++++----------- .../apache/poi/hssf/record/FormulaRecord.java | 2 +- .../poi/hssf/record/OldFormulaRecord.java | 28 ++++++++++++++-- 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java index af0fee01a7..d78e2268f3 100644 --- a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java @@ -22,7 +22,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.OldFormulaRecord; import org.apache.poi.hssf.record.OldLabelRecord; @@ -42,7 +41,6 @@ import org.apache.poi.ss.usermodel.Cell; */ public class OldExcelExtractor { private InputStream input; - private boolean _includeSheetNames = true; public OldExcelExtractor(InputStream input) { this.input = input; @@ -61,13 +59,6 @@ public class OldExcelExtractor { System.out.println(extractor.getText()); } - /** - * Should sheet names be included? Default is true - */ - public void setIncludeSheetNames(boolean includeSheetNames) { - _includeSheetNames = includeSheetNames; - } - /** * Retrieves the text contents of the file, as best we can * for these old file formats @@ -95,32 +86,35 @@ public class OldExcelExtractor { text.append(sr.getString()); text.append('\n'); break; - // number - 5.71 - TODO Needs format strings + case NumberRecord.sid: NumberRecord nr = new NumberRecord(ris); - text.append(nr.getValue()); - text.append('\n'); + handleNumericCell(text, nr.getValue()); break; case OldFormulaRecord.biff2_sid: case OldFormulaRecord.biff3_sid: case OldFormulaRecord.biff4_sid: OldFormulaRecord fr = new OldFormulaRecord(ris); -// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) { - text.append(fr.getValue()); - text.append('\n'); -// } + if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) { + handleNumericCell(text, fr.getValue()); + } break; case RKRecord.sid: RKRecord rr = new RKRecord(ris); - text.append(rr.getRKNumber()); - text.append('\n'); + handleNumericCell(text, rr.getRKNumber()); break; + default: ris.readFully(new byte[ris.remaining()]); - // text.append(" = " + ris.getSid() + " = \n"); } } return text.toString(); } + + protected void handleNumericCell(StringBuffer text, double value) { + // TODO Need to fetch / use format strings + text.append(value); + text.append('\n'); + } } diff --git a/src/java/org/apache/poi/hssf/record/FormulaRecord.java b/src/java/org/apache/poi/hssf/record/FormulaRecord.java index c0a63d3092..f6c2740cca 100644 --- a/src/java/org/apache/poi/hssf/record/FormulaRecord.java +++ b/src/java/org/apache/poi/hssf/record/FormulaRecord.java @@ -47,7 +47,7 @@ public final class FormulaRecord extends CellRecord { * Excel encodes the same 8 bytes that would be field_4_value with various NaN * values that are decoded/encoded by this class. */ - private static final class SpecialCachedValue { + static final class SpecialCachedValue { /** deliberately chosen by Excel in order to encode other values within Double NaNs */ private static final long BIT_MARKER = 0xFFFF000000000000L; private static final int VARIABLE_DATA_LENGTH = 6; diff --git a/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java index eb5335e936..f3aaf872e0 100644 --- a/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java +++ b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java @@ -17,6 +17,8 @@ package org.apache.poi.hssf.record; +import org.apache.poi.hssf.record.FormulaRecord.SpecialCachedValue; +import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.ss.formula.Formula; import org.apache.poi.ss.formula.ptg.Ptg; @@ -30,6 +32,7 @@ public final class OldFormulaRecord extends OldCellRecord { public final static short biff4_sid = 0x0406; public final static short biff5_sid = 0x0006; + private SpecialCachedValue specialCachedValue; private double field_4_value; private short field_5_options; private Formula field_6_parsed_expr; @@ -37,8 +40,15 @@ public final class OldFormulaRecord extends OldCellRecord { public OldFormulaRecord(RecordInputStream ris) { super(ris, ris.getSid() == biff2_sid); - // TODO Handle special cached values, for Biff 3+ - field_4_value = ris.readDouble(); + if (isBiff2()) { + field_4_value = ris.readDouble(); + } else { + long valueLongBits = ris.readLong(); + specialCachedValue = SpecialCachedValue.create(valueLongBits); + if (specialCachedValue == null) { + field_4_value = Double.longBitsToDouble(valueLongBits); + } + } if (isBiff2()) { field_5_options = (short)ris.readUByte(); @@ -51,6 +61,20 @@ public final class OldFormulaRecord extends OldCellRecord { field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable); } + public int getCachedResultType() { + if (specialCachedValue == null) { + return HSSFCell.CELL_TYPE_NUMERIC; + } + return specialCachedValue.getValueType(); + } + + public boolean getCachedBooleanValue() { + return specialCachedValue.getBooleanValue(); + } + public int getCachedErrorValue() { + return specialCachedValue.getErrorValue(); + } + /** * get the calculated value of the formula * -- 2.39.5