]> source.dussan.org Git - poi.git/commitdiff
Further Excel 4 text extractor support, for TIKA-1490
authorNick Burch <nick@apache.org>
Sun, 30 Nov 2014 01:03:24 +0000 (01:03 +0000)
committerNick Burch <nick@apache.org>
Sun, 30 Nov 2014 01:03:24 +0000 (01:03 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642492 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java
src/java/org/apache/poi/hssf/record/FormulaRecord.java
src/java/org/apache/poi/hssf/record/OldFormulaRecord.java [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java

index e3705f159d3b951d6cd38f0de298417a1db7b023..af0fee01a7ed87e65c56f5f6cb06e16f73dbf0e3 100644 (file)
@@ -24,6 +24,7 @@ import java.io.InputStream;
 
 import org.apache.poi.hssf.record.FormulaRecord;
 import org.apache.poi.hssf.record.NumberRecord;
+import org.apache.poi.hssf.record.OldFormulaRecord;
 import org.apache.poi.hssf.record.OldLabelRecord;
 import org.apache.poi.hssf.record.OldStringRecord;
 import org.apache.poi.hssf.record.RKRecord;
@@ -100,15 +101,15 @@ public class OldExcelExtractor {
                     text.append(nr.getValue());
                     text.append('\n');
                     break;
-/*                    
-                case OldFormulaRecord.sid:
-                    FormulaRecord fr = new FormulaRecord(ris);
-System.out.println(fr.getCachedResultType());                    
-                    if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
+                case OldFormulaRecord.biff2_sid:
+                case OldFormulaRecord.biff3_sid:
+                case OldFormulaRecord.biff4_sid:
+                    OldFormulaRecord fr = new OldFormulaRecord(ris);
+//                  if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
                         text.append(fr.getValue());
                         text.append('\n');
-                    }
-*/
+//                  }
+                    break;
                 case RKRecord.sid:
                     RKRecord rr = new RKRecord(ris);
                     text.append(rr.getRKNumber());
index ccd3cb041291b3cffe42cbb76e7ccb2553c1013e..c0a63d3092aebbe4ea0b1b6bd2475b60b3f5fcb5 100644 (file)
@@ -36,7 +36,6 @@ import org.apache.poi.util.LittleEndianOutput;
 public final class FormulaRecord extends CellRecord {
 
        public static final short sid = 0x0006;   // docs say 406...because of a bug Microsoft support site article #Q184647)
-       public static final short olderSid = 0x0406; // older biff versions do manage 406! 
        private static int FIXED_SIZE = 14; // double + short + int
 
        private static final BitField alwaysCalc = BitFieldFactory.getInstance(0x0001);
diff --git a/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java
new file mode 100644 (file)
index 0000000..3be21f5
--- /dev/null
@@ -0,0 +1,118 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.record;
+
+import org.apache.poi.ss.formula.Formula;
+import org.apache.poi.ss.formula.ptg.Ptg;
+
+/**
+ * Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in
+ *  encoded form, along with the value if a number
+ */
+public final class OldFormulaRecord {
+    public final static short biff2_sid = 0x0006;
+    public final static short biff3_sid = 0x0206;
+    public final static short biff4_sid = 0x0406;
+    public final static short biff5_sid = 0x0006;
+
+    private short   sid;
+    private int     field_1_row;
+    private short   field_2_column;
+    private int     field_3_cell_attrs; // Biff 2
+    private short   field_3_xf_index;   // Biff 3+
+    private double  field_4_value;
+    private short   field_5_options;
+    private Formula field_6_parsed_expr;
+
+    public OldFormulaRecord(RecordInputStream ris) {
+        field_1_row          = ris.readUShort();
+        field_2_column       = ris.readShort();
+
+        if (ris.getSid() == biff2_sid) {
+            field_3_cell_attrs = ris.readUShort() << 8;
+            field_3_cell_attrs += ris.readUByte();
+        } else {
+            field_3_xf_index     = ris.readShort();
+        }
+
+        // TODO Handle special cached values, for Biff 3+
+        field_4_value = ris.readDouble();
+
+        if (ris.getSid() == biff2_sid) {
+            field_5_options = (short)ris.readUByte();
+        } else {
+            field_5_options = ris.readShort();
+        }
+
+        int expression_len = ris.readShort();
+        int nBytesAvailable = ris.available();
+        field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
+    }
+
+    public int getRow()
+    {
+        return field_1_row;
+    }
+
+    public short getColumn()
+    {
+        return field_2_column;
+    }
+
+    public short getXFIndex()
+    {
+        return field_3_xf_index;
+    }
+    public int getCellAttrs()
+    {
+        return field_3_cell_attrs;
+    }
+
+    /**
+     * get the calculated value of the formula
+     *
+     * @return calculated value
+     */
+    public double getValue() {
+        return field_4_value;
+    }
+
+    /**
+     * get the option flags
+     *
+     * @return bitmask
+     */
+    public short getOptions() {
+        return field_5_options;
+    }
+
+    /**
+     * @return the formula tokens. never <code>null</code>
+     */
+    public Ptg[] getParsedExpression() {
+        return field_6_parsed_expr.getTokens();
+    }
+
+    public Formula getFormula() {
+        return field_6_parsed_expr;
+    }
+
+    public short getSid() {
+        return sid;
+    }
+}
index fd057cd63c74da340ef552cdfd1eb674b296d1cf..b83d8942d645020f29a5b9a9490a6f793c8134c5 100644 (file)
@@ -65,7 +65,7 @@ public final class TestOldExcelExtractor extends TestCase {
         assertTrue(text, text.contains("$100,000 or more"));
         assertTrue(text, text.contains("S corporation returns, Form 1120S [10,15]"));
         // TODO Get these quotes working correctly
-//        assertTrue(text, text.contains("individual income tax return “short forms.”"));
+//        assertTrue(text, text.contains("individual income tax return \u201Cshort forms.\u201D"));
         
         // Formula based strings
         // TODO Find some then test