]> source.dussan.org Git - poi.git/commitdiff
Bugzilla 48195 - short-circuit evaluation of IF() and CHOOSE()
authorJosh Micich <josh@apache.org>
Fri, 13 Nov 2009 21:21:23 +0000 (21:21 +0000)
committerJosh Micich <josh@apache.org>
Fri, 13 Nov 2009 21:21:23 +0000 (21:21 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@835994 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/hssf/record/formula/functions/Choose.java
src/java/org/apache/poi/hssf/record/formula/functions/If.java
src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java
src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java
src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java

index c26a9fb685f8759fca91714dc3e36d05ad985482..0ae0c956ba39f818df9749195d9001ad2148bbe8 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.6-beta1" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="add">48195 - short-circuit evaluation of IF() and CHOOSE()</action>
            <action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
            <action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
            <action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
index e7a827f4bdc89f29778f682a89b5d15f15e8c8e7..cafdd226f0f4fe729a388160de04834237a07a47 100644 (file)
@@ -23,7 +23,6 @@ import org.apache.poi.hssf.record.formula.eval.OperandResolver;
 import org.apache.poi.hssf.record.formula.eval.ValueEval;
 
 /**
- *
  * @author Josh Micich
  */
 public final class Choose implements Function {
@@ -34,8 +33,7 @@ public final class Choose implements Function {
                }
 
                try {
-                       ValueEval ev = OperandResolver.getSingleValue(args[0], srcRowIndex, srcColumnIndex);
-                       int ix = OperandResolver.coerceValueToInt(ev);
+                       int ix = evaluateFirstArg(args[0], srcRowIndex, srcColumnIndex);
                        if (ix < 1 || ix >= args.length) {
                                return ErrorEval.VALUE_INVALID;
                        }
@@ -44,4 +42,10 @@ public final class Choose implements Function {
                        return e.getErrorEval();
                }
        }
+
+       public static int evaluateFirstArg(ValueEval arg0, int srcRowIndex, int srcColumnIndex)
+                       throws EvaluationException {
+               ValueEval ev = OperandResolver.getSingleValue(arg0, srcRowIndex, srcColumnIndex);
+               return OperandResolver.coerceValueToInt(ev);
+       }
 }
index f8d3c03ee3f59071537de1abcf9e3cc1459b7f4b..1723f014bc2e02a6f4b7fcf965df7cfe3186f312 100644 (file)
@@ -53,7 +53,7 @@ public final class If implements Function {
                return falseResult;
        }
 
-       private static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, short srcCellCol)
+       public static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, int srcCellCol)
                        throws EvaluationException {
                ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
                Boolean b = OperandResolver.coerceValueToBoolean(ve, false);
index 2a272fa3dbecab886ceafe4aef9206a631eecea7..f9b7151dfac207a7b696c81b9292af5ad99b1598 100644 (file)
@@ -52,6 +52,7 @@ import org.apache.poi.hssf.record.formula.eval.AreaEval;
 import org.apache.poi.hssf.record.formula.eval.BlankEval;
 import org.apache.poi.hssf.record.formula.eval.BoolEval;
 import org.apache.poi.hssf.record.formula.eval.ErrorEval;
+import org.apache.poi.hssf.record.formula.eval.EvaluationException;
 import org.apache.poi.hssf.record.formula.eval.MissingArgEval;
 import org.apache.poi.hssf.record.formula.eval.NameEval;
 import org.apache.poi.hssf.record.formula.eval.NameXEval;
@@ -60,7 +61,9 @@ import org.apache.poi.hssf.record.formula.eval.OperationEval;
 import org.apache.poi.hssf.record.formula.eval.RefEval;
 import org.apache.poi.hssf.record.formula.eval.StringEval;
 import org.apache.poi.hssf.record.formula.eval.ValueEval;
+import org.apache.poi.hssf.record.formula.functions.Choose;
 import org.apache.poi.hssf.record.formula.functions.FreeRefFunction;
+import org.apache.poi.hssf.record.formula.functions.If;
 import org.apache.poi.hssf.record.formula.udf.UDFFinder;
 import org.apache.poi.hssf.util.CellReference;
 import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment.WorkbookNotFoundException;
@@ -228,7 +231,7 @@ public final class WorkbookEvaluator {
        private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex,
                                int rowIndex, int columnIndex, EvaluationTracker tracker) {
 
-               // avoid tracking dependencies for cells that have constant definition
+               // avoid tracking dependencies to cells that have constant definition
                boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true
                                        : !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex);
                if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) {
@@ -344,6 +347,66 @@ public final class WorkbookEvaluator {
                                        byte nArgs = 1;  // tAttrSum always has 1 parameter
                                        ptg = new FuncVarPtg("SUM", nArgs);
                                }
+                               if (attrPtg.isOptimizedChoose()) {
+                                       ValueEval arg0 = stack.pop();
+                                       int[] jumpTable = attrPtg.getJumpTable();
+                                       int dist;
+                                       int nChoices = jumpTable.length;
+                                       try {
+                                               int switchIndex = Choose.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
+                                               if (switchIndex<1 || switchIndex > nChoices) {
+                                                       stack.push(ErrorEval.VALUE_INVALID);
+                                                       dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
+                                               } else {
+                                                       dist = jumpTable[switchIndex-1];
+                                               }
+                                       } catch (EvaluationException e) {
+                                               stack.push(e.getErrorEval());
+                                               dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
+                                       }
+                                       // Encoded dist for tAttrChoose includes size of jump table, but
+                                       // countTokensToBeSkipped() does not (it counts whole tokens).
+                                       dist -= nChoices*2+2; // subtract jump table size
+                                       i+= countTokensToBeSkipped(ptgs, i, dist);
+                                       continue;
+                               }
+                               if (attrPtg.isOptimizedIf()) {
+                                       ValueEval arg0 = stack.pop();
+                                       boolean evaluatedPredicate;
+                                       try {
+                                               evaluatedPredicate = If.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
+                                       } catch (EvaluationException e) {
+                                               stack.push(e.getErrorEval());
+                                               int dist = attrPtg.getData();
+                                               i+= countTokensToBeSkipped(ptgs, i, dist);
+                                               attrPtg = (AttrPtg) ptgs[i];
+                                               dist = attrPtg.getData()+1;
+                                               i+= countTokensToBeSkipped(ptgs, i, dist);
+                                               continue;
+                                       }
+                                       if (evaluatedPredicate) {
+                                               // nothing to skip - true param folows
+                                       } else {
+                                               int dist = attrPtg.getData();
+                                               i+= countTokensToBeSkipped(ptgs, i, dist);
+                                               Ptg nextPtg = ptgs[i+1];
+                                               if (ptgs[i] instanceof AttrPtg && nextPtg instanceof FuncVarPtg) {
+                                                       // this is an if statement without a false param (as opposed to MissingArgPtg as the false param)
+                                                       i++;
+                                                       stack.push(BoolEval.FALSE);
+                                               }
+                                       }
+                                       continue;
+                               }
+                               if (attrPtg.isSkip()) {
+                                       int dist = attrPtg.getData()+1;
+                                       i+= countTokensToBeSkipped(ptgs, i, dist);
+                                       if (stack.peek() == MissingArgEval.instance) {
+                                               stack.pop();
+                                               stack.push(BlankEval.INSTANCE);
+                                       }
+                                       continue;
+                               }
                        }
                        if (ptg instanceof ControlPtg) {
                                // skip Parentheses, Attr, etc
@@ -402,6 +465,27 @@ public final class WorkbookEvaluator {
                return value;
        }
 
+       /**
+        * Calculates the number of tokens that the evaluator should skip upon reaching a tAttrSkip.
+        *
+        * @return the number of tokens (starting from <tt>startIndex+1</tt>) that need to be skipped
+        * to achieve the specified <tt>distInBytes</tt> skip distance.
+        */
+       private static int countTokensToBeSkipped(Ptg[] ptgs, int startIndex, int distInBytes) {
+               int remBytes = distInBytes;
+               int index = startIndex;
+               while (remBytes != 0) {
+                       index++;
+                       remBytes -= ptgs[index].getSize();
+                       if (remBytes < 0) {
+                               throw new RuntimeException("Bad skip distance (wrong token size calculation).");
+                       }
+                       if (index >= ptgs.length) {
+                               throw new RuntimeException("Skip distance too far (ran out of formula tokens).");
+                       }
+               }
+               return index-startIndex;
+       }
        /**
         * Dereferences a single value from any AreaEval or RefEval evaluation result.
         * If the supplied evaluationResult is just a plain value, it is returned as-is.
index d94cfd9a96b25dbbd1e95619fa6907d7a1af6711..939634b6298da1586a927d0e6e64327f86c22430 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate;
 import org.apache.poi.hssf.record.formula.AreaPtg;
 import org.apache.poi.hssf.record.formula.FuncVarPtg;
 import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.eval.ErrorEval;
 import org.apache.poi.hssf.record.formula.eval.ValueEval;
 import org.apache.poi.ss.formula.EvaluationCell;
 import org.apache.poi.ss.formula.EvaluationListener;
@@ -319,6 +320,10 @@ public final class TestFormulaEvaluatorBugs extends TestCase {
         * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
         */
        public void testSlowEvaluate45376() {
+               /*
+                * Note - to observe behaviour without caching, disable the call to
+                * updateValue() from FormulaCellCacheEntry.updateFormulaResult().
+                */
 
                // Firstly set up a sequence of formula cells where each depends on the  previous multiple
                // times.  Without caching, each subsequent cell take about 4 times longer to evaluate.
@@ -330,30 +335,39 @@ public final class TestFormulaEvaluatorBugs extends TestCase {
                        char prevCol = (char) ('A' + i-1);
                        String prevCell = prevCol + "1";
                        // this formula is inspired by the offending formula of the attachment for bug 45376
+                       // IF(DATE(YEAR(A1),MONTH(A1)+1,1)<=$D$3,DATE(YEAR(A1),MONTH(A1)+1,1),NA()) etc
                        String formula = "IF(DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1)<=$D$3," +
                                        "DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1),NA())";
                        cell.setCellFormula(formula);
-
                }
                Calendar cal = new GregorianCalendar(2000, 0, 1, 0, 0, 0);
                row.createCell(0).setCellValue(cal);
 
-               // Choose cell A9, so that the failing test case doesn't take too long to execute.
+               // Choose cell A9 instead of A10, so that the failing test case doesn't take too long to execute.
                HSSFCell cell = row.getCell(8);
                EvalListener evalListener = new EvalListener();
                WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
-               evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
+               ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
                int evalCount = evalListener.getCountCacheMisses();
                if (evalCount > 10) {
                        // Without caching, evaluating cell 'A9' takes 21845 evaluations which consumes
                        // much time (~3 sec on Core 2 Duo 2.2GHz)
+                       // short-circuit-if optimisation cuts this down to 255 evaluations which is still too high
                        System.err.println("Cell A9 took " + evalCount + " intermediate evaluations");
                        throw new AssertionFailedError("Identifed bug 45376 - Formula evaluator should cache values");
                }
-               // With caching, the evaluationCount is 8 which is a big improvement
-               // Note - these expected values may change if the WorkbookEvaluator is
-               // ever optimised to short circuit 'if' functions.
+               // With caching, the evaluationCount is 8 which is exactly the
+               // number of formula cells that needed to be evaluated.
                assertEquals(8, evalCount);
-               assertEquals(24, evalListener.getCountCacheHits());
+
+               // The cache hits would be 24 if fully evaluating all arguments of the
+               // "IF()" functions (Each of the 8 formulas has 4 refs to formula cells
+               // which result in 1 cache miss and 3 cache hits). However with the
+               // short-circuit-if optimisation, 2 of the cell refs get skipped
+               // reducing this metric 8.
+               assertEquals(8, evalListener.getCountCacheHits());
+
+               // confirm the evaluation result too
+               assertEquals(ErrorEval.NA, ve);
        }
 }
index 082f07a84e2dec87063cbdf5f3b197a2ebb9961b..02e383f33aea4cdcb5e0b2dc6a7ccc20c489702c 100644 (file)
@@ -22,6 +22,13 @@ import junit.framework.TestCase;
 
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.hssf.record.NameRecord;
+import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.eval.NumberEval;
+import org.apache.poi.hssf.record.formula.eval.ValueEval;
+import org.apache.poi.ss.formula.EvaluationCell;
+import org.apache.poi.ss.formula.EvaluationListener;
+import org.apache.poi.ss.formula.WorkbookEvaluator;
+import org.apache.poi.ss.formula.WorkbookEvaluatorTestHelper;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellValue;
 /**
@@ -167,4 +174,46 @@ public final class TestHSSFFormulaEvaluator extends TestCase {
                assertEquals(Cell.CELL_TYPE_NUMERIC, value.getCellType());
                assertEquals(5.33, value.getNumberValue(), 0.0);
        }
+       private static final class EvalCountListener extends EvaluationListener {
+               private int _evalCount;
+               public EvalCountListener() {
+                       _evalCount = 0;
+               }
+               public void onStartEvaluate(EvaluationCell cell, ICacheEntry entry, Ptg[] ptgs) {
+                       _evalCount++;
+               }
+               public int getEvalCount() {
+                       return _evalCount;
+               }
+       }
+
+       /**
+        * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
+        */
+       public void testShortCircuitIfEvaluation() {
+
+               // Set up a simple IF() formula that has measurable evaluation cost for its operands.
+               HSSFWorkbook wb = new HSSFWorkbook();
+               HSSFSheet sheet = wb.createSheet("Sheet1");
+               HSSFRow row = sheet.createRow(0);
+               HSSFCell cellA1 = row.createCell(0);
+               cellA1.setCellFormula("if(B1,C1,D1+E1+F1)");
+               // populate cells B1..F1 with simple formulas instead of plain values so we can use
+               // EvaluationListener to check which parts of the first formula get evaluated
+               for (int i=1; i<6; i++) {
+                       // formulas are just literal constants "1".."5"
+                       row.createCell(i).setCellFormula(String.valueOf(i));
+               }
+
+               EvalCountListener evalListener = new EvalCountListener();
+               WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
+               ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cellA1));
+               int evalCount = evalListener.getEvalCount();
+               if (evalCount == 6) {
+                       // Without short-circuit-if evaluation, evaluating cell 'A1' takes 3 extra evaluations (for D1,E1,F1)
+                       throw new AssertionFailedError("Identifed bug 48195 - Formula evaluator should short-circuit IF() calculations.");
+               }
+               assertEquals(3, evalCount);
+               assertEquals(2.0, ((NumberEval)ve).getNumberValue(), 0D);
+       }
 }