]> source.dussan.org Git - poi.git/commitdiff
Fix for 45060 (and 45041) - Improved token class transformation during formula parsing
authorJosh Micich <josh@apache.org>
Wed, 28 May 2008 06:19:31 +0000 (06:19 +0000)
committerJosh Micich <josh@apache.org>
Wed, 28 May 2008 06:19:31 +0000 (06:19 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@660828 13f79535-47bb-0310-9956-ffa450edef68

13 files changed:
src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/hssf/model/FormulaParser.java
src/java/org/apache/poi/hssf/model/OperandClassTransformer.java [new file with mode: 0644]
src/java/org/apache/poi/hssf/model/ParseNode.java [new file with mode: 0644]
src/java/org/apache/poi/hssf/record/formula/ControlPtg.java
src/java/org/apache/poi/hssf/record/formula/Ptg.java
src/resources/main/org/apache/poi/hssf/record/formula/function/functionMetadata.txt
src/testcases/org/apache/poi/hssf/data/testRVA.xls [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/model/AllModelTests.java
src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/model/TestRVA.java [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java [new file with mode: 0644]

index 688410c8936f5f4557cfda9b385f37ff306bd7ed..831149c3b1ff50c6ea028338734c190c2facf723 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.1-final" date="2008-06-??">
+           <action dev="POI-DEVELOPERS" type="add">45060 - Improved token class transformation during formula parsing</action>
            <action dev="POI-DEVELOPERS" type="add">44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS</action>
            <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
            <action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
index 9b81f2a01e96b757e3eec1f3044738f1ab133068..d9052130c27c645c67959cbceb92f2a64e03a486 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1-final" date="2008-06-??">
+           <action dev="POI-DEVELOPERS" type="add">45060 - Improved token class transformation during formula parsing</action>
            <action dev="POI-DEVELOPERS" type="add">44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS</action>
            <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
            <action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
index 82f54f0acc51240c67c4b5710aca4b33d95a2fc4..508061c033d4697539bb1312266770830ad754e0 100644 (file)
@@ -18,7 +18,6 @@
 package org.apache.poi.hssf.model;
 
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Stack;
 import java.util.regex.Pattern;
@@ -61,17 +60,17 @@ public final class FormulaParser {
         }
     }
 
-    public static int FORMULA_TYPE_CELL = 0;
-    public static int FORMULA_TYPE_SHARED = 1;
-    public static int FORMULA_TYPE_ARRAY =2;
-    public static int FORMULA_TYPE_CONDFOMRAT = 3;
-    public static int FORMULA_TYPE_NAMEDRANGE = 4;
+    public static final int FORMULA_TYPE_CELL = 0;
+    public static final int FORMULA_TYPE_SHARED = 1;
+    public static final int FORMULA_TYPE_ARRAY =2;
+    public static final int FORMULA_TYPE_CONDFOMRAT = 3;
+    public static final int FORMULA_TYPE_NAMEDRANGE = 4;
 
     private final String formulaString;
     private final int formulaLength;
     private int pointer;
 
-    private final List tokens = new Stack();
+    private ParseNode _rootNode;
 
     /**
      * Used for spotting if we have a cell reference,
@@ -221,14 +220,15 @@ public final class FormulaParser {
         return value.length() == 0 ? null : value.toString();
     }
 
-    /** Parse and Translate a String Identifier */
-    private Ptg parseIdent() {
-        String name;
-        name = GetName();
+    private ParseNode parseFunctionOrIdentifier() {
+        String name = GetName();
         if (look == '('){
             //This is a function
             return function(name);
         }
+        return new ParseNode(parseIdentifier(name));
+    }
+    private Ptg parseIdentifier(String name) {
 
         if (look == ':' || look == '.') { // this is a AreaReference
             GetChar();
@@ -287,14 +287,6 @@ public final class FormulaParser {
                     + name + "\", but that named range wasn't defined!");
     }
 
-    /**
-     * Adds a pointer to the last token to the latest function argument list.
-     * @param obj
-     */
-    private void addArgumentPointer(List argumentPointers) {
-        argumentPointers.add(tokens.get(tokens.size()-1));
-    }
-
     /**
      * Note - Excel function names are 'case aware but not case sensitive'.  This method may end
      * up creating a defined name record in the workbook if the specified name is not an internal
@@ -302,58 +294,23 @@ public final class FormulaParser {
      *
      * @param name case preserved function name (as it was entered/appeared in the formula).
      */
-    private Ptg function(String name) {
-        int numArgs =0 ;
+    private ParseNode function(String name) {
+        NamePtg nameToken = null;
         // Note regarding parameter -
         if(!AbstractFunctionPtg.isInternalFunctionName(name)) {
             // external functions get a Name token which points to a defined name record
-            NamePtg nameToken = new NamePtg(name, this.book);
+            nameToken = new NamePtg(name, this.book);
 
             // in the token tree, the name is more or less the first argument
-            numArgs++;
-            tokens.add(nameToken);
         }
-        //average 2 args per function
-        List argumentPointers = new ArrayList(2);
 
         Match('(');
-        numArgs += Arguments(argumentPointers);
+        ParseNode[] args = Arguments();
         Match(')');
 
-        return getFunction(name, numArgs, argumentPointers);
-    }
-
-    /**
-     * Adds the size of all the ptgs after the provided index (inclusive).
-     * <p>
-     * Initially used to count a goto
-     * @param index
-     * @return int
-     */
-    private int getPtgSize(int index) {
-        int count = 0;
-
-        Iterator ptgIterator = tokens.listIterator(index);
-        while (ptgIterator.hasNext()) {
-            Ptg ptg = (Ptg)ptgIterator.next();
-            count+=ptg.getSize();
-        }
-
-        return count;
+        return getFunction(name, nameToken, args);
     }
 
-    private int getPtgSize(int start, int end) {
-        int count = 0;
-        int index = start;
-        Iterator ptgIterator = tokens.listIterator(index);
-        while (ptgIterator.hasNext() && index <= end) {
-            Ptg ptg = (Ptg)ptgIterator.next();
-            count+=ptg.getSize();
-            index++;
-        }
-
-        return count;
-    }
     /**
      * Generates the variable function ptg for the formula.
      * <p>
@@ -362,84 +319,35 @@ public final class FormulaParser {
      * @param numArgs
      * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function
      */
-    private AbstractFunctionPtg getFunction(String name, int numArgs, List argumentPointers) {
+    private ParseNode getFunction(String name, NamePtg namePtg, ParseNode[] args) {
 
-        boolean isVarArgs;
-        int funcIx;
         FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase());
+        int numArgs = args.length;
         if(fm == null) {
+               if (namePtg == null) {
+                       throw new IllegalStateException("NamePtg must be supplied for external functions");
+               }
             // must be external function
-            isVarArgs = true;
-            funcIx = FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL;
-        } else {
-            isVarArgs = !fm.hasFixedArgsLength();
-            funcIx = fm.getIndex();
-            validateNumArgs(numArgs, fm);
+            ParseNode[] allArgs = new ParseNode[numArgs+1];
+            allArgs[0] = new ParseNode(namePtg);
+            System.arraycopy(args, 0, allArgs, 1, numArgs);
+            return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs);
         }
+
+        if (namePtg != null) {
+               throw new IllegalStateException("NamePtg no applicable to internal functions");
+       }
+        boolean isVarArgs = !fm.hasFixedArgsLength();
+        int funcIx = fm.getIndex();
+        validateNumArgs(args.length, fm);
+
         AbstractFunctionPtg retval;
         if(isVarArgs) {
             retval = new FuncVarPtg(name, (byte)numArgs);
         } else {
             retval = new FuncPtg(funcIx);
         }
-        if (!name.equalsIgnoreCase(AbstractFunctionPtg.FUNCTION_NAME_IF)) {
-            // early return for everything else besides IF()
-            return retval;
-        }
-
-
-        AttrPtg ifPtg = new AttrPtg();
-        ifPtg.setData((short)7); //mirroring excel output
-        ifPtg.setOptimizedIf(true);
-
-        if (argumentPointers.size() != 2  && argumentPointers.size() != 3) {
-            throw new IllegalArgumentException("["+argumentPointers.size()+"] Arguments Found - An IF formula requires 2 or 3 arguments. IF(CONDITION, TRUE_VALUE, FALSE_VALUE [OPTIONAL]");
-        }
-
-        //Biffview of an IF formula record indicates the attr ptg goes after the condition ptgs and are
-        //tracked in the argument pointers
-        //The beginning first argument pointer is the last ptg of the condition
-        int ifIndex = tokens.indexOf(argumentPointers.get(0))+1;
-        tokens.add(ifIndex, ifPtg);
-
-        //we now need a goto ptgAttr to skip to the end of the formula after a true condition
-        //the true condition is should be inserted after the last ptg in the first argument
-
-        int gotoIndex = tokens.indexOf(argumentPointers.get(1))+1;
-
-        AttrPtg goto1Ptg = new AttrPtg();
-        goto1Ptg.setGoto(true);
-
-
-        tokens.add(gotoIndex, goto1Ptg);
-
-
-        if (numArgs > 2) { //only add false jump if there is a false condition
-
-            //second goto to skip past the function ptg
-            AttrPtg goto2Ptg = new AttrPtg();
-            goto2Ptg.setGoto(true);
-            goto2Ptg.setData((short)(retval.getSize()-1));
-            //Page 472 of the Microsoft Excel Developer's kit states that:
-            //The b(or w) field specifies the number byes (or words to skip, minus 1
-
-            tokens.add(goto2Ptg); //this goes after all the arguments are defined
-        }
-
-        //data portion of the if ptg points to the false subexpression (Page 472 of MS Excel Developer's kit)
-        //count the number of bytes after the ifPtg to the False Subexpression
-        //doesn't specify -1 in the documentation
-        ifPtg.setData((short)(getPtgSize(ifIndex+1, gotoIndex)));
-
-        //count all the additional (goto) ptgs but dont count itself
-        int ptgCount = this.getPtgSize(gotoIndex)-goto1Ptg.getSize()+retval.getSize();
-        if (ptgCount > Short.MAX_VALUE) {
-            throw new RuntimeException("Ptg Size exceeds short when being specified for a goto ptg in an if");
-        }
-
-        goto1Ptg.setData((short)(ptgCount-1));
-
-        return retval;
+        return new ParseNode(retval, args);
     }
 
     private void validateNumArgs(int numArgs, FunctionMetadata fm) {
@@ -470,10 +378,12 @@ public final class FormulaParser {
     }
 
     /** get arguments to a function */
-    private int Arguments(List argumentPointers) {
+    private ParseNode[] Arguments() {
+        //average 2 args per function
+        List temp = new ArrayList(2);
         SkipWhite();
         if(look == ')') {
-            return 0;
+            return ParseNode.EMPTY_ARRAY;
         }
 
         boolean missedPrevArg = true;
@@ -482,8 +392,7 @@ public final class FormulaParser {
             SkipWhite();
             if (isArgumentDelimiter(look)) {
                 if (missedPrevArg) {
-                    tokens.add(new MissingArgPtg());
-                    addArgumentPointer(argumentPointers);
+                       temp.add(new ParseNode(new MissingArgPtg()));
                     numArgs++;
                 }
                 if (look == ')') {
@@ -493,8 +402,7 @@ public final class FormulaParser {
                 missedPrevArg = true;
                 continue;
             }
-            comparisonExpression();
-            addArgumentPointer(argumentPointers);
+            temp.add(comparisonExpression());
             numArgs++;
             missedPrevArg = false;
             SkipWhite();
@@ -502,32 +410,34 @@ public final class FormulaParser {
                 throw expected("',' or ')'");
             }
         }
-        return numArgs;
+        ParseNode[] result = new ParseNode[temp.size()];
+        temp.toArray(result);
+        return result;
     }
 
    /** Parse and Translate a Math Factor  */
-    private void powerFactor() {
-        percentFactor();
+    private ParseNode powerFactor() {
+       ParseNode result = percentFactor();
         while(true) {
             SkipWhite();
             if(look != '^') {
-                return;
+                return result;
             }
             Match('^');
-            percentFactor();
-            tokens.add(new PowerPtg());
+            ParseNode other = percentFactor();
+            result = new ParseNode(new PowerPtg(), result, other);
         }
     }
 
-    private void percentFactor() {
-        tokens.add(parseSimpleFactor());
+    private ParseNode percentFactor() {
+       ParseNode result = parseSimpleFactor();
         while(true) {
             SkipWhite();
             if(look != '%') {
-                return;
+                return result;
             }
             Match('%');
-            tokens.add(new PercentPtg());
+            result = new ParseNode(new PercentPtg(), result);
         }
     }
 
@@ -535,32 +445,30 @@ public final class FormulaParser {
     /**
      * factors (without ^ or % )
      */
-    private Ptg parseSimpleFactor() {
+    private ParseNode parseSimpleFactor() {
         SkipWhite();
         switch(look) {
             case '#':
-                return parseErrorLiteral();
+                return new ParseNode(parseErrorLiteral());
             case '-':
                 Match('-');
-                powerFactor();
-                return new UnaryMinusPtg();
+                return new ParseNode(new UnaryMinusPtg(), powerFactor());
             case '+':
                 Match('+');
-                powerFactor();
-                return new UnaryPlusPtg();
+                return new ParseNode(new UnaryPlusPtg(), powerFactor());
             case '(':
                 Match('(');
-                comparisonExpression();
+                ParseNode inside = comparisonExpression();
                 Match(')');
-                return new ParenthesisPtg();
+                return new ParseNode(new ParenthesisPtg(), inside);
             case '"':
-                return parseStringLiteral();
+                return new ParseNode(parseStringLiteral());
         }
         if (IsAlpha(look) || look == '\''){
-            return parseIdent();
+            return parseFunctionOrIdentifier();
         }
         // else - assume number
-        return parseNumber();
+        return new ParseNode(parseNumber());
     }
 
 
@@ -716,28 +624,30 @@ public final class FormulaParser {
     }
 
     /** Parse and Translate a Math Term */
-    private void  Term() {
-        powerFactor();
+    private ParseNode  Term() {
+       ParseNode result = powerFactor();
         while(true) {
             SkipWhite();
+            Ptg operator;
             switch(look) {
                 case '*':
                     Match('*');
-                    powerFactor();
-                    tokens.add(new MultiplyPtg());
-                    continue;
+                    operator = new MultiplyPtg();
+                    break;
                 case '/':
                     Match('/');
-                    powerFactor();
-                    tokens.add(new DividePtg());
-                    continue;
+                    operator = new DividePtg();
+                    break;
+                default:
+                    return result; // finished with Term
             }
-            return; // finished with Term
+            ParseNode other = powerFactor();
+            result = new ParseNode(operator, result, other);
         }
     }
 
-    private void comparisonExpression() {
-        concatExpression();
+    private ParseNode comparisonExpression() {
+       ParseNode result = concatExpression();
         while (true) {
             SkipWhite();
             switch(look) {
@@ -745,11 +655,11 @@ public final class FormulaParser {
                 case '>':
                 case '<':
                     Ptg comparisonToken = getComparisonToken();
-                    concatExpression();
-                    tokens.add(comparisonToken);
+                    ParseNode other = concatExpression();
+                    result = new ParseNode(comparisonToken, result, other);
                     continue;
             }
-            return; // finished with predicate expression
+            return result; // finished with predicate expression
         }
     }
 
@@ -779,38 +689,41 @@ public final class FormulaParser {
     }
 
 
-    private void concatExpression() {
-        additiveExpression();
+    private ParseNode concatExpression() {
+        ParseNode result = additiveExpression();
         while (true) {
             SkipWhite();
             if(look != '&') {
                 break; // finished with concat expression
             }
             Match('&');
-            additiveExpression();
-            tokens.add(new ConcatPtg());
+            ParseNode other = additiveExpression();
+            result = new ParseNode(new ConcatPtg(), result, other);
         }
+        return result;
     }
 
 
     /** Parse and Translate an Expression */
-    private void additiveExpression() {
-        Term();
+    private ParseNode additiveExpression() {
+       ParseNode result = Term();
         while (true) {
             SkipWhite();
+            Ptg operator;
             switch(look) {
                 case '+':
                     Match('+');
-                    Term();
-                    tokens.add(new AddPtg());
-                    continue;
+                    operator = new AddPtg();
+                    break;
                 case '-':
                     Match('-');
-                    Term();
-                    tokens.add(new SubtractPtg());
-                    continue;
+                    operator = new SubtractPtg();
+                    break;
+                default:
+                    return result; // finished with additive expression
             }
-            return; // finished with additive expression
+            ParseNode other = Term();
+            result = new ParseNode(operator, result, other);
         }
     }
 
@@ -835,7 +748,7 @@ end;
     public void parse() {
         pointer=0;
         GetChar();
-        comparisonExpression();
+        _rootNode = comparisonExpression();
 
         if(pointer <= formulaLength) {
             String msg = "Unused input [" + formulaString.substring(pointer-1)
@@ -858,91 +771,12 @@ end;
     }
 
     public Ptg[] getRPNPtg(int formulaType) {
-        Node node = createTree();
+       OperandClassTransformer oct = new OperandClassTransformer(formulaType);
         // RVA is for 'operand class': 'reference', 'value', 'array'
-        setRootLevelRVA(node, formulaType);
-        setParameterRVA(node,formulaType);
-        return (Ptg[]) tokens.toArray(new Ptg[0]);
-    }
-
-    private void setRootLevelRVA(Node n, int formulaType) {
-        //Pg 16, excelfileformat.pdf @ openoffice.org
-        Ptg p = n.getValue();
-            if (formulaType == FormulaParser.FORMULA_TYPE_NAMEDRANGE) {
-                if (p.getDefaultOperandClass() == Ptg.CLASS_REF) {
-                    setClass(n,Ptg.CLASS_REF);
-                } else {
-                    setClass(n,Ptg.CLASS_ARRAY);
-                }
-            } else {
-                setClass(n,Ptg.CLASS_VALUE);
-            }
-
-    }
-
-    private void setParameterRVA(Node n, int formulaType) {
-        Ptg p = n.getValue();
-        int numOperands = n.getNumChildren();
-        if (p instanceof AbstractFunctionPtg) {
-            for (int i =0;i<numOperands;i++) {
-                setParameterRVA(n.getChild(i),((AbstractFunctionPtg)p).getParameterClass(i),formulaType);
-//                if (n.getChild(i).getValue() instanceof AbstractFunctionPtg) {
-//                    setParameterRVA(n.getChild(i),formulaType);
-//                }
-                setParameterRVA(n.getChild(i),formulaType);
-            }
-        } else {
-            for (int i =0;i<numOperands;i++) {
-                setParameterRVA(n.getChild(i),formulaType);
-            }
-        }
-    }
-    private void setParameterRVA(Node n, int expectedClass,int formulaType) {
-        Ptg p = n.getValue();
-        if (expectedClass == Ptg.CLASS_REF) { //pg 15, table 1
-            if (p.getDefaultOperandClass() == Ptg.CLASS_REF ) {
-                setClass(n, Ptg.CLASS_REF);
-            }
-            if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE) {
-                if (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED) {
-                    setClass(n,Ptg.CLASS_VALUE);
-                } else {
-                    setClass(n,Ptg.CLASS_ARRAY);
-                }
-            }
-            if (p.getDefaultOperandClass() == Ptg.CLASS_ARRAY ) {
-                setClass(n, Ptg.CLASS_ARRAY);
-            }
-        } else if (expectedClass == Ptg.CLASS_VALUE) { //pg 15, table 2
-            if (formulaType == FORMULA_TYPE_NAMEDRANGE) {
-                setClass(n,Ptg.CLASS_ARRAY) ;
-            } else {
-                setClass(n,Ptg.CLASS_VALUE);
-            }
-        } else { //Array class, pg 16.
-            if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE &&
-                 (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED)) {
-                 setClass(n,Ptg.CLASS_VALUE);
-            } else {
-                setClass(n,Ptg.CLASS_ARRAY);
-            }
-        }
+       oct.transformFormula(_rootNode);
+        return ParseNode.toTokenArray(_rootNode);
     }
 
-     private void setClass(Node n, byte theClass) {
-        Ptg p = n.getValue();
-        if (p.isBaseToken()) {
-            return;
-        }
-        
-        if (p instanceof AbstractFunctionPtg || !(p instanceof OperationPtg)) {
-            p.setClass(theClass);
-        } else {
-            for (int i =0;i<n.getNumChildren();i++) {
-                setClass(n.getChild(i),theClass);
-            }
-        }
-     }
     /**
      * Convenience method which takes in a list then passes it to the
      *  other toFormulaString signature.
@@ -1067,59 +901,4 @@ end;
     public String toFormulaString(Ptg[] ptgs) {
         return toFormulaString(book, ptgs);
     }
-
-
-    /** Create a tree representation of the RPN token array
-     *used to run the class(RVA) change algo
-     */
-    private Node createTree() {
-        Stack stack = new Stack();
-        int numPtgs = tokens.size();
-        OperationPtg o;
-        int numOperands;
-        Node[] operands;
-        for (int i=0;i<numPtgs;i++) {
-            if (tokens.get(i) instanceof OperationPtg) {
-
-                o = (OperationPtg) tokens.get(i);
-                numOperands = o.getNumberOfOperands();
-                operands = new Node[numOperands];
-                for (int j=0;j<numOperands;j++) {
-                    operands[numOperands-j-1] = (Node) stack.pop();
-                }
-                Node result = new Node(o);
-                result.setChildren(operands);
-                stack.push(result);
-            } else {
-                stack.push(new Node((Ptg)tokens.get(i)));
-            }
-        }
-        return (Node) stack.pop();
-    }
-
-    /** toString on the parser instance returns the RPN ordered list of tokens
-     *   Useful for testing
-     */
-    public String toString() {
-        StringBuffer buf = new StringBuffer();
-           for (int i=0;i<tokens.size();i++) {
-            buf.append( ( (Ptg)tokens.get(i)).toFormulaString(book));
-            buf.append(' ');
-        }
-        return buf.toString();
-    }
-
-    /** Private helper class, used to create a tree representation of the formula*/
-    private static final class Node {
-        private Ptg value=null;
-        private Node[] children=new Node[0];
-        private int numChild=0;
-        public Node(Ptg val) {
-            value = val;
-        }
-        public void setChildren(Node[] child) {children = child;numChild=child.length;}
-        public int getNumChildren() {return numChild;}
-        public Node getChild(int number) {return children[number];}
-        public Ptg getValue() {return value;}
-    }
 }
diff --git a/src/java/org/apache/poi/hssf/model/OperandClassTransformer.java b/src/java/org/apache/poi/hssf/model/OperandClassTransformer.java
new file mode 100644 (file)
index 0000000..5358324
--- /dev/null
@@ -0,0 +1,206 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.model;
+
+import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
+import org.apache.poi.hssf.record.formula.ControlPtg;
+import org.apache.poi.hssf.record.formula.ValueOperatorPtg;
+import org.apache.poi.hssf.record.formula.Ptg;
+
+/**
+ * This class performs 'operand class' transformation. Non-base tokens are classified into three 
+ * operand classes:
+ * <ul>
+ * <li>reference</li> 
+ * <li>value</li> 
+ * <li>array</li> 
+ * </ul>
+ * <p/>
+ * 
+ * The final operand class chosen for each token depends on the formula type and the token's place
+ * in the formula. If POI gets the operand class wrong, Excel <em>may</em> interpret the formula
+ * incorrectly.  This condition is typically manifested as a formula cell that displays as '#VALUE!',
+ * but resolves correctly when the user presses F2, enter.<p/>
+ * 
+ * The logic implemented here was partially inspired by the description in
+ * "OpenOffice.org's Documentation of the Microsoft Excel File Format".  The model presented there
+ * seems to be inconsistent with observed Excel behaviour (These differences have not been fully
+ * investigated). The implementation in this class has been heavily modified in order to satisfy
+ * concrete examples of how Excel performs the same logic (see TestRVA).<p/>
+ * 
+ * Hopefully, as additional important test cases are identified and added to the test suite, 
+ * patterns might become more obvious in this code and allow for simplification.
+ * 
+ * @author Josh Micich
+ */
+final class OperandClassTransformer {
+
+       private final int _formulaType;
+
+       public OperandClassTransformer(int formulaType) {
+               _formulaType = formulaType;
+       }
+
+       /**
+        * Traverses the supplied formula parse tree, calling <tt>Ptg.setClass()</tt> for each non-base
+        * token to set its operand class.
+        */
+       public void transformFormula(ParseNode rootNode) {
+               byte rootNodeOperandClass;
+               switch (_formulaType) {
+                       case FormulaParser.FORMULA_TYPE_CELL:
+                               rootNodeOperandClass = Ptg.CLASS_VALUE;
+                               break;
+                       default:
+                               throw new RuntimeException("Incomplete code - formula type (" 
+                                               + _formulaType + ") not supported yet");
+               
+               }
+               transformNode(rootNode, rootNodeOperandClass, false);
+       }
+
+       private void transformNode(ParseNode node, byte desiredOperandClass,
+                       boolean callerForceArrayFlag) {
+               Ptg token = node.getToken();
+               ParseNode[] children = node.getChildren();
+               if (token instanceof ValueOperatorPtg || token instanceof ControlPtg) {
+                       // Value Operator Ptgs and Control are base tokens, so token will be unchanged
+                       
+                       // but any child nodes are processed according to desiredOperandClass and callerForceArrayFlag
+                       for (int i = 0; i < children.length; i++) {
+                               ParseNode child = children[i];
+                               transformNode(child, desiredOperandClass, callerForceArrayFlag);
+                       }
+                       return;
+               }
+               if (token instanceof AbstractFunctionPtg) {
+                       transformFunctionNode((AbstractFunctionPtg) token, children, desiredOperandClass,
+                                       callerForceArrayFlag);
+                       return;
+               }
+               if (children.length > 0) {
+                       throw new IllegalStateException("Node should not have any children");
+               }
+
+               if (token.isBaseToken()) {
+                       // nothing to do
+                       return;
+               }
+        if (callerForceArrayFlag) {
+               switch (desiredOperandClass) {
+                       case Ptg.CLASS_VALUE:
+                       case Ptg.CLASS_ARRAY:
+                               token.setClass(Ptg.CLASS_ARRAY); 
+                               break;
+                       case Ptg.CLASS_REF:
+                               token.setClass(Ptg.CLASS_REF); 
+                               break;
+                       default:
+                               throw new IllegalStateException("Unexpected operand class ("
+                                               + desiredOperandClass + ")");
+               }
+        } else {
+               token.setClass(desiredOperandClass);
+        }
+       }
+
+       private void transformFunctionNode(AbstractFunctionPtg afp, ParseNode[] children,
+                       byte desiredOperandClass, boolean callerForceArrayFlag) {
+
+               boolean localForceArrayFlag;
+               byte defaultReturnOperandClass = afp.getDefaultOperandClass();
+
+               if (callerForceArrayFlag) {
+                       switch (defaultReturnOperandClass) {
+                               case Ptg.CLASS_REF:
+                                       if (desiredOperandClass == Ptg.CLASS_REF) {
+                                               afp.setClass(Ptg.CLASS_REF);
+                                       } else {
+                                               afp.setClass(Ptg.CLASS_ARRAY);
+                                       }
+                                       localForceArrayFlag = false;
+                                       break;
+                               case Ptg.CLASS_ARRAY:
+                                       afp.setClass(Ptg.CLASS_ARRAY);
+                                       localForceArrayFlag = false;
+                                       break;
+                               case Ptg.CLASS_VALUE:
+                                       afp.setClass(Ptg.CLASS_ARRAY);
+                                       localForceArrayFlag = true;
+                                       break;
+                               default:
+                                       throw new IllegalStateException("Unexpected operand class ("
+                                                       + defaultReturnOperandClass + ")");
+                       }
+               } else {
+                       if (defaultReturnOperandClass == desiredOperandClass) {
+                               localForceArrayFlag = false;
+                               // an alternative would have been to for non-base Ptgs to set their operand class 
+                               // from their default, but this would require the call in many subclasses because
+                               // the default OC is not known until the end of the constructor
+                               afp.setClass(defaultReturnOperandClass); 
+                       } else {
+                               switch (desiredOperandClass) {
+                                       case Ptg.CLASS_VALUE:
+                                               // always OK to set functions to return 'value'
+                                               afp.setClass(Ptg.CLASS_VALUE); 
+                                               localForceArrayFlag = false;
+                                               break;
+                                       case Ptg.CLASS_ARRAY:
+                                               switch (defaultReturnOperandClass) {
+                                                       case Ptg.CLASS_REF:
+                                                               afp.setClass(Ptg.CLASS_REF);
+                                                               break;
+                                                       case Ptg.CLASS_VALUE:
+                                                               afp.setClass(Ptg.CLASS_ARRAY);
+                                                               break;
+                                                       default:
+                                                               throw new IllegalStateException("Unexpected operand class ("
+                                                                               + defaultReturnOperandClass + ")");
+                                               }
+                                               localForceArrayFlag = (defaultReturnOperandClass == Ptg.CLASS_VALUE);
+                                               break;
+                                       case Ptg.CLASS_REF:
+                                               switch (defaultReturnOperandClass) {
+                                                       case Ptg.CLASS_ARRAY:
+                                                               afp.setClass(Ptg.CLASS_ARRAY);
+                                                               break;
+                                                       case Ptg.CLASS_VALUE:
+                                                               afp.setClass(Ptg.CLASS_VALUE);
+                                                               break;
+                                                       default:
+                                                               throw new IllegalStateException("Unexpected operand class ("
+                                                                               + defaultReturnOperandClass + ")");
+                                               }
+                                               localForceArrayFlag = false;
+                                               break;
+                                       default:
+                                               throw new IllegalStateException("Unexpected operand class ("
+                                                               + desiredOperandClass + ")");
+                               }
+
+                       }
+               }
+
+               for (int i = 0; i < children.length; i++) {
+                       ParseNode child = children[i];
+                       byte paramOperandClass = afp.getParameterClass(i);
+                       transformNode(child, paramOperandClass, localForceArrayFlag);
+               }
+       }
+}
diff --git a/src/java/org/apache/poi/hssf/model/ParseNode.java b/src/java/org/apache/poi/hssf/model/ParseNode.java
new file mode 100644 (file)
index 0000000..acd8cb1
--- /dev/null
@@ -0,0 +1,201 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.model;
+
+import org.apache.poi.hssf.record.formula.AttrPtg;
+import org.apache.poi.hssf.record.formula.FuncVarPtg;
+import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
+/**
+ * Represents a syntactic element from a formula by encapsulating the corresponding <tt>Ptg</tt>
+ * token.  Each <tt>ParseNode</tt> may have child <tt>ParseNode</tt>s in the case when the wrapped
+ * <tt>Ptg</tt> is non-atomic.
+ * 
+ * @author Josh Micich
+ */
+final class ParseNode {
+
+       public static final ParseNode[] EMPTY_ARRAY = { };
+       private final Ptg _token;
+       private final ParseNode[] _children;
+       private boolean _isIf;
+       private final int _tokenCount;
+
+       public ParseNode(Ptg token, ParseNode[] children) {
+               _token = token;
+               _children = children;
+               _isIf = isIf(token);
+               int tokenCount = 1;
+               for (int i = 0; i < children.length; i++) {
+                       tokenCount += children[i].getTokenCount();
+               }
+               if (_isIf) {
+                       // there will be 2 or 3 extra tAttr tokens according to whether the false param is present
+                       tokenCount += children.length;
+               }
+               _tokenCount = tokenCount;
+       }
+       public ParseNode(Ptg token) {
+               this(token, EMPTY_ARRAY);
+       }
+       public ParseNode(Ptg token, ParseNode child0) {
+               this(token, new ParseNode[] { child0, });
+       }
+       public ParseNode(Ptg token, ParseNode child0, ParseNode child1) {
+               this(token, new ParseNode[] { child0, child1, });
+       }
+       private int getTokenCount() {
+               return _tokenCount;
+       }
+
+       /**
+        * Collects the array of <tt>Ptg</tt> tokens for the specified tree.
+        */
+       public static Ptg[] toTokenArray(ParseNode rootNode) {
+               TokenCollector temp = new TokenCollector(rootNode.getTokenCount());
+               rootNode.collectPtgs(temp);
+               return temp.getResult();
+       }
+       private void collectPtgs(TokenCollector temp) {
+               if (isIf(getToken())) {
+                       collectIfPtgs(temp);
+                       return;
+               }
+               for (int i=0; i< getChildren().length; i++) {
+                       getChildren()[i].collectPtgs(temp);
+               }
+               temp.add(getToken());
+       }
+       /**
+        * The IF() function gets marked up with two or three tAttr tokens.
+        * Similar logic will be required for CHOOSE() when it is supported
+        * 
+        * See excelfileformat.pdf sec 3.10.5 "tAttr (19H)
+        */
+       private void collectIfPtgs(TokenCollector temp) {
+
+               // condition goes first
+               getChildren()[0].collectPtgs(temp);
+               
+               // placeholder for tAttrIf
+               int ifAttrIndex = temp.createPlaceholder();
+               
+               // true parameter
+               getChildren()[1].collectPtgs(temp);
+               
+               // placeholder for first skip attr
+               int skipAfterTrueParamIndex = temp.createPlaceholder();
+               int trueParamSize = temp.sumTokenSizes(ifAttrIndex+1, skipAfterTrueParamIndex);
+
+               AttrPtg attrIf = new AttrPtg();
+               attrIf.setOptimizedIf(true);
+               AttrPtg attrSkipAfterTrue = new AttrPtg();
+               attrSkipAfterTrue.setGoto(true);
+               
+               if (getChildren().length > 2) {
+                       // false param present
+                       
+                       // false parameter
+                       getChildren()[2].collectPtgs(temp);
+                       
+                       int skipAfterFalseParamIndex = temp.createPlaceholder();
+
+                       AttrPtg attrSkipAfterFalse = new AttrPtg();
+                       attrSkipAfterFalse.setGoto(true);
+
+                       int falseParamSize =  temp.sumTokenSizes(skipAfterTrueParamIndex+1, skipAfterFalseParamIndex);
+                       
+                       attrIf.setData((short)(trueParamSize + 4)); // distance to start of false parameter. +4 for skip after true
+                       attrSkipAfterTrue.setData((short)(falseParamSize + 4 + 4 - 1)); // 1 less than distance to end of if FuncVar(size=4). +4 for attr skip before 
+                       attrSkipAfterFalse.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4). 
+
+                       temp.setPlaceholder(ifAttrIndex, attrIf);
+                       temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue);
+                       temp.setPlaceholder(skipAfterFalseParamIndex, attrSkipAfterFalse);
+               } else {
+                       // false parameter not present
+                       attrIf.setData((short)(trueParamSize + 4)); // distance to start of FuncVar. +4 for skip after true
+                       attrSkipAfterTrue.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4). 
+                       
+                       temp.setPlaceholder(ifAttrIndex, attrIf);
+                       temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue);
+               }
+               
+               temp.add(getToken());
+       }
+
+       private static boolean isIf(Ptg token) {
+               if (token instanceof FuncVarPtg) {
+                       FuncVarPtg func = (FuncVarPtg) token;
+                       if (FunctionMetadataRegistry.FUNCTION_NAME_IF.equals(func.getName())) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       public Ptg getToken() {
+               return _token;
+       }
+
+       public ParseNode[] getChildren() {
+               return _children;
+       }
+
+       private static final class TokenCollector {
+
+               private final Ptg[] _ptgs;
+               private int _offset;
+
+               public TokenCollector(int tokenCount) {
+                       _ptgs = new Ptg[tokenCount];
+                       _offset = 0;
+               }
+
+               public int sumTokenSizes(int fromIx, int toIx) {
+                       int result = 0;
+                       for (int i=fromIx; i<toIx; i++) {
+                               result += _ptgs[i].getSize();
+                       }
+                       return result;
+               }
+
+               public int createPlaceholder() {
+                       return _offset++;
+               }
+
+               public void add(Ptg token) {
+                       if (token == null) {
+                               throw new IllegalArgumentException("token must not be null");
+                       }
+                       _ptgs[_offset] = token;
+                       _offset++;
+               }
+
+               public void setPlaceholder(int index, Ptg token) {
+                       if (_ptgs[index] != null) {
+                               throw new IllegalStateException("Invalid placeholder index (" + index + ")");
+                       }
+                       _ptgs[index] = token;
+               }
+
+               public Ptg[] getResult() {
+                       return _ptgs;
+               }
+       }
+}
index 90c1965909066d6c7fcfab29471513f928585a90..6c97bd1a37d6a845baeddc9d812dc1aeb7398efe 100644 (file)
@@ -28,12 +28,11 @@ package org.apache.poi.hssf.record.formula;
  * tEndSheet
  */
 public abstract class ControlPtg extends Ptg {
-       
+
        public boolean isBaseToken() {
                return true;
        }
        public final byte getDefaultOperandClass() {
-// TODO                throw new IllegalStateException("Control tokens are not classified");
-               return Ptg.CLASS_VALUE;
-    }
+               throw new IllegalStateException("Control tokens are not classified");
+       }
 }
index 28d5d13b4e84e77f37f9815339d2ef9ed42a0c45..568f639cd3e48bfe046430f9fcb8785c634b22b6 100644 (file)
@@ -341,7 +341,9 @@ public abstract class Ptg
         ptgClass = thePtgClass;
     }
 
-    /** returns the class (REF/VALUE/ARRAY) for this Ptg */
+    /**
+     *  @return the 'operand class' (REF/VALUE/ARRAY) for this Ptg
+     */
     public byte getPtgClass() {
         return ptgClass;
     }
index 8a85f42841a8405a4e591145d67cc359a6750462..31694d5d76a9b8d482c48b827e1157fc65dfb5ba 100644 (file)
@@ -15,6 +15,7 @@
 \r
 # Created by (org.apache.poi.hssf.record.formula.function.ExcelFileFormatDocFunctionExtractor)\r
 # from source file 'excelfileformat.odt' (size=356107, md5=0x8f789cb6e75594caf068f8e193004ef4)\r
+#  ! + some manual edits !\r
 #\r
 #Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )\r
 \r
@@ -78,8 +79,8 @@
 58     NPER    3       5       V       V V V V V               \r
 59     PMT     3       5       V       V V V V V               \r
 60     RATE    3       6       V       V V V V V V             \r
-61     MIRR    3       3       V       R V V           \r
-62     IRR     1       2       V       R V             \r
+61     MIRR    3       3       V       A V V           \r
+62     IRR     1       2       V       A V             \r
 63     RAND    0       0       V       -       x       \r
 64     MATCH   2       3       V       V R R           \r
 65     DATE    3       3       V       V V V           \r
@@ -93,8 +94,8 @@
 73     SECOND  1       1       V       V               \r
 74     NOW     0       0       V       -       x       \r
 75     AREAS   1       1       V       R               \r
-76     ROWS    1       1       V       R               \r
-77     COLUMNS 1       1       V       R               \r
+76     ROWS    1       1       V       A               \r
+77     COLUMNS 1       1       V       A               \r
 78     OFFSET  3       5       R       R V V V V       x       \r
 82     SEARCH  2       3       V       V V V           \r
 83     TRANSPOSE       1       1       A       A               \r
diff --git a/src/testcases/org/apache/poi/hssf/data/testRVA.xls b/src/testcases/org/apache/poi/hssf/data/testRVA.xls
new file mode 100644 (file)
index 0000000..327edbb
Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/testRVA.xls differ
index 15f9b1b40f890eb27a259fa337cca9fdb2267133..045e371a22afc6d167e37f96edc12a45b3197170 100755 (executable)
@@ -34,6 +34,8 @@ public final class AllModelTests {
                result.addTestSuite(TestFormulaParser.class);
                result.addTestSuite(TestFormulaParserEval.class);
                result.addTestSuite(TestFormulaParserIf.class);
+               result.addTestSuite(TestOperandClassTransformer.class);
+               result.addTestSuite(TestRVA.class);
                result.addTestSuite(TestSheet.class);
                result.addTestSuite(TestSheetAdditional.class);
                return result;
diff --git a/src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java b/src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java
new file mode 100644 (file)
index 0000000..90a5d8b
--- /dev/null
@@ -0,0 +1,110 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.model;
+
+import junit.framework.AssertionFailedError;
+import junit.framework.TestCase;
+
+import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
+import org.apache.poi.hssf.record.formula.FuncVarPtg;
+import org.apache.poi.hssf.record.formula.Ptg;
+
+/**
+ * Tests specific formula examples in <tt>OperandClassTransformer</tt>.
+ * 
+ * @author Josh Micich
+ */
+public final class TestOperandClassTransformer extends TestCase {
+
+       public void testMdeterm() {
+               String formula = "MDETERM(ABS(A1))";
+               Ptg[] ptgs = FormulaParser.parse(formula, null);
+
+               confirmTokenClass(ptgs, 0, Ptg.CLASS_ARRAY);
+               confirmFuncClass(ptgs, 1, "ABS", Ptg.CLASS_ARRAY);
+               confirmFuncClass(ptgs, 2, "MDETERM", Ptg.CLASS_VALUE);
+       }
+
+       /**
+        * In the example: <code>INDEX(PI(),1)</code>, Excel encodes PI() as 'array'.  It is not clear
+        * what rule justifies this. POI currently encodes it as 'value' which Excel(2007) seems to 
+        * tolerate. Changing the metadata for INDEX to have first parameter as 'array' class breaks 
+        * other formulas involving INDEX.  It seems like a special case needs to be made.  Perhaps an 
+        * important observation is that INDEX is one of very few functions that returns 'reference' type.
+        * 
+        * This test has been added but disabled in order to document this issue.
+        */
+       public void DISABLED_testIndexPi1() {
+               String formula = "INDEX(PI(),1)";
+               Ptg[] ptgs = FormulaParser.parse(formula, null);
+
+               confirmFuncClass(ptgs, 1, "PI", Ptg.CLASS_ARRAY); // fails as of POI 3.1
+               confirmFuncClass(ptgs, 2, "INDEX", Ptg.CLASS_VALUE);
+       }
+
+       public void testComplexIRR_bug45041() {
+               String formula = "(1+IRR(SUMIF(A:A,ROW(INDIRECT(MIN(A:A)&\":\"&MAX(A:A))),B:B),0))^365-1";
+               Ptg[] ptgs = FormulaParser.parse(formula, null);
+
+               FuncVarPtg rowFunc = (FuncVarPtg) ptgs[10];
+               FuncVarPtg sumifFunc = (FuncVarPtg) ptgs[12];
+               assertEquals("ROW", rowFunc.getName());
+               assertEquals("SUMIF", sumifFunc.getName());
+
+               if (rowFunc.getPtgClass() == Ptg.CLASS_VALUE || sumifFunc.getPtgClass() == Ptg.CLASS_VALUE) {
+                       throw new AssertionFailedError("Identified bug 45041");
+               }
+               confirmTokenClass(ptgs, 1, Ptg.CLASS_REF);
+               confirmTokenClass(ptgs, 2, Ptg.CLASS_REF);
+               confirmFuncClass(ptgs, 3, "MIN", Ptg.CLASS_VALUE);
+               confirmTokenClass(ptgs, 6, Ptg.CLASS_REF);
+               confirmFuncClass(ptgs, 7, "MAX", Ptg.CLASS_VALUE);
+               confirmFuncClass(ptgs, 9, "INDIRECT", Ptg.CLASS_REF);
+               confirmFuncClass(ptgs, 10, "ROW", Ptg.CLASS_ARRAY);
+               confirmTokenClass(ptgs, 11, Ptg.CLASS_REF);
+               confirmFuncClass(ptgs, 12, "SUMIF", Ptg.CLASS_ARRAY);
+               confirmFuncClass(ptgs, 14, "IRR", Ptg.CLASS_VALUE);
+       }
+
+       private void confirmFuncClass(Ptg[] ptgs, int i, String expectedFunctionName, byte operandClass) {
+               confirmTokenClass(ptgs, i, operandClass);
+               AbstractFunctionPtg afp = (AbstractFunctionPtg) ptgs[i];
+               assertEquals(expectedFunctionName, afp.getName());
+       }
+
+       private void confirmTokenClass(Ptg[] ptgs, int i, byte operandClass) {
+               Ptg ptg = ptgs[i];
+               if (operandClass != ptg.getPtgClass()) {
+                       throw new AssertionFailedError("Wrong operand class for function ptg ("
+                                       + ptg.toString() + "). Expected " + getOperandClassName(operandClass)
+                                       + " but got " + getOperandClassName(ptg.getPtgClass()));
+               }
+       }
+
+       private static String getOperandClassName(byte ptgClass) {
+               switch (ptgClass) {
+                       case Ptg.CLASS_REF:
+                               return "R";
+                       case Ptg.CLASS_VALUE:
+                               return "V";
+                       case Ptg.CLASS_ARRAY:
+                               return "A";
+               }
+               throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
+       }
+}
diff --git a/src/testcases/org/apache/poi/hssf/model/TestRVA.java b/src/testcases/org/apache/poi/hssf/model/TestRVA.java
new file mode 100644 (file)
index 0000000..cb51c17
--- /dev/null
@@ -0,0 +1,156 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.model;
+
+import junit.framework.AssertionFailedError;
+import junit.framework.TestCase;
+
+import org.apache.poi.hssf.HSSFTestDataSamples;
+import org.apache.poi.hssf.record.formula.AttrPtg;
+import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.ReferencePtg;
+import org.apache.poi.hssf.usermodel.FormulaExtractor;
+import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+
+/**
+ * Tests 'operand class' transformation performed by
+ * <tt>OperandClassTransformer</tt> by comparing its results with those
+ * directly produced by Excel (in a sample spreadsheet).
+ * 
+ * @author Josh Micich
+ */
+public final class TestRVA extends TestCase {
+
+       private static final String NEW_LINE = System.getProperty("line.separator");
+
+       public void testFormulas() {
+               HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("testRVA.xls");
+               HSSFSheet sheet = wb.getSheetAt(0);
+
+               int countFailures = 0;
+               int countErrors = 0;
+
+               int rowIx = 0;
+               while (rowIx < 65535) {
+                       HSSFRow row = sheet.getRow(rowIx);
+                       if (row == null) {
+                               break;
+                       }
+                       HSSFCell cell = row.getCell(0);
+                       if (cell == null || cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) {
+                               break;
+                       }
+                       String formula = cell.getCellFormula();
+                       try {
+                               confirmCell(cell, formula);
+                       } catch (AssertionFailedError e) {
+                               System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'");
+                               System.err.println(e.getMessage());
+                               countFailures++;
+                       } catch (RuntimeException e) {
+                               System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'");
+                               countErrors++;
+                               e.printStackTrace();
+                       }
+                       rowIx++;
+               }
+               if (countErrors + countFailures > 0) {
+                       String msg = "One or more RVA tests failed: countFailures=" + countFailures
+                                       + " countFailures=" + countErrors + ". See stderr for details.";
+                       throw new AssertionFailedError(msg);
+               }
+       }
+
+       private void confirmCell(HSSFCell formulaCell, String formula) {
+               Ptg[] excelPtgs = FormulaExtractor.getPtgs(formulaCell);
+               Ptg[] poiPtgs = FormulaParser.parse(formula, null);
+               int nExcelTokens = excelPtgs.length;
+               int nPoiTokens = poiPtgs.length;
+               if (nExcelTokens != nPoiTokens) {
+                       if (nExcelTokens == nPoiTokens + 1 && excelPtgs[0].getClass() == AttrPtg.class) {
+                               // compensate for missing tAttrVolatile, which belongs in any formula 
+                               // involving OFFSET() et al. POI currently does not insert where required
+                               Ptg[] temp = new Ptg[nExcelTokens];
+                               temp[0] = excelPtgs[0];
+                               System.arraycopy(poiPtgs, 0, temp, 1, nPoiTokens);
+                               poiPtgs = temp;
+                       } else {
+                               throw new RuntimeException("Expected " + nExcelTokens + " tokens but got "
+                                               + nPoiTokens);
+                       }
+               }
+               boolean hasMismatch = false;
+               StringBuffer sb = new StringBuffer();
+               for (int i = 0; i < nExcelTokens; i++) {
+                       Ptg poiPtg = poiPtgs[i];
+                       Ptg excelPtg = excelPtgs[i];
+                       if (!areTokenClassesSame(poiPtg, excelPtg)) {
+                               hasMismatch = true;
+                               sb.append("  mismatch token type[" + i + "] " + getShortClassName(excelPtg) + " "
+                                               + getOperandClassName(excelPtg) + " - " + getShortClassName(poiPtg) + " "
+                                               + getOperandClassName(poiPtg));
+                               sb.append(NEW_LINE);
+                               continue;
+                       }
+                       if (poiPtg.isBaseToken()) {
+                               continue;
+                       }
+                       sb.append("  token[" + i + "] " + excelPtg.toString() + " "
+                                       + getOperandClassName(excelPtg));
+
+                       if (excelPtg.getPtgClass() != poiPtg.getPtgClass()) {
+                               hasMismatch = true;
+                               sb.append(" - was " + getOperandClassName(poiPtg));
+                       }
+                       sb.append(NEW_LINE);
+               }
+               if (hasMismatch) {
+                       throw new AssertionFailedError(sb.toString());
+               }
+       }
+
+       private boolean areTokenClassesSame(Ptg poiPtg, Ptg excelPtg) {
+               if (excelPtg.getClass() == poiPtg.getClass()) {
+                       return true;
+               }
+               if (poiPtg.getClass() == ReferencePtg.class) {
+                       // TODO - remove funny subclasses of ReferencePtg
+                       return excelPtg instanceof ReferencePtg;
+               }
+               return false;
+       }
+
+       private String getShortClassName(Object o) {
+               String cn = o.getClass().getName();
+               int pos = cn.lastIndexOf('.');
+               return cn.substring(pos + 1);
+       }
+
+       private static String getOperandClassName(Ptg ptg) {
+               byte ptgClass = ptg.getPtgClass();
+               switch (ptgClass) {
+                       case Ptg.CLASS_REF:   return "R";
+                       case Ptg.CLASS_VALUE: return "V";
+                       case Ptg.CLASS_ARRAY: return "A";
+               }
+               throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
+       }
+}
diff --git a/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java b/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java
new file mode 100644 (file)
index 0000000..d657647
--- /dev/null
@@ -0,0 +1,49 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hssf.usermodel;
+
+import java.util.List;
+
+import org.apache.poi.hssf.record.CellValueRecordInterface;
+import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate;
+import org.apache.poi.hssf.record.formula.Ptg;
+
+/**
+ * Test utility class to get <tt>Ptg</tt> arrays out of formula cells
+ * 
+ * @author Josh Micich
+ */
+public final class FormulaExtractor {
+
+       private FormulaExtractor() {
+               // no instances of this class
+       }
+       
+       public static Ptg[] getPtgs(HSSFCell cell) {
+               CellValueRecordInterface vr = cell.getCellValueRecord();
+               if (!(vr instanceof FormulaRecordAggregate)) {
+                       throw new IllegalArgumentException("Not a formula cell");
+               }
+               FormulaRecordAggregate fra = (FormulaRecordAggregate) vr;
+               List tokens = fra.getFormulaRecord().getParsedExpression();
+               Ptg[] result = new Ptg[tokens.size()];
+               tokens.toArray(result);
+               return result;
+       }
+
+}