From: Josh Micich Date: Fri, 13 Nov 2009 21:21:23 +0000 (+0000) Subject: Bugzilla 48195 - short-circuit evaluation of IF() and CHOOSE() X-Git-Tag: REL_3_6~58 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=89223f9500dea29c2b6ecad70401c844c11206ca;p=poi.git Bugzilla 48195 - short-circuit evaluation of IF() and CHOOSE() git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@835994 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index c26a9fb685..0ae0c956ba 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 48195 - short-circuit evaluation of IF() and CHOOSE() 48161 - support for text extraction from PPT master slides 47970 - added a method to set arabic mode in HSSFSheet 48134 - release system resources when using Picture.resize() diff --git a/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java b/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java index e7a827f4bd..cafdd226f0 100644 --- a/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java +++ b/src/java/org/apache/poi/hssf/record/formula/functions/Choose.java @@ -23,7 +23,6 @@ import org.apache.poi.hssf.record.formula.eval.OperandResolver; import org.apache.poi.hssf.record.formula.eval.ValueEval; /** - * * @author Josh Micich */ public final class Choose implements Function { @@ -34,8 +33,7 @@ public final class Choose implements Function { } try { - ValueEval ev = OperandResolver.getSingleValue(args[0], srcRowIndex, srcColumnIndex); - int ix = OperandResolver.coerceValueToInt(ev); + int ix = evaluateFirstArg(args[0], srcRowIndex, srcColumnIndex); if (ix < 1 || ix >= args.length) { return ErrorEval.VALUE_INVALID; } @@ -44,4 +42,10 @@ public final class Choose implements Function { return e.getErrorEval(); } } + + public static int evaluateFirstArg(ValueEval arg0, int srcRowIndex, int srcColumnIndex) + throws EvaluationException { + ValueEval ev = OperandResolver.getSingleValue(arg0, srcRowIndex, srcColumnIndex); + return OperandResolver.coerceValueToInt(ev); + } } diff --git a/src/java/org/apache/poi/hssf/record/formula/functions/If.java b/src/java/org/apache/poi/hssf/record/formula/functions/If.java index f8d3c03ee3..1723f014bc 100644 --- a/src/java/org/apache/poi/hssf/record/formula/functions/If.java +++ b/src/java/org/apache/poi/hssf/record/formula/functions/If.java @@ -53,7 +53,7 @@ public final class If implements Function { return falseResult; } - private static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, short srcCellCol) + public static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, int srcCellCol) throws EvaluationException { ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol); Boolean b = OperandResolver.coerceValueToBoolean(ve, false); diff --git a/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java b/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java index 2a272fa3db..f9b7151dfa 100644 --- a/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java +++ b/src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java @@ -52,6 +52,7 @@ import org.apache.poi.hssf.record.formula.eval.AreaEval; import org.apache.poi.hssf.record.formula.eval.BlankEval; import org.apache.poi.hssf.record.formula.eval.BoolEval; import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; import org.apache.poi.hssf.record.formula.eval.MissingArgEval; import org.apache.poi.hssf.record.formula.eval.NameEval; import org.apache.poi.hssf.record.formula.eval.NameXEval; @@ -60,7 +61,9 @@ import org.apache.poi.hssf.record.formula.eval.OperationEval; import org.apache.poi.hssf.record.formula.eval.RefEval; import org.apache.poi.hssf.record.formula.eval.StringEval; import org.apache.poi.hssf.record.formula.eval.ValueEval; +import org.apache.poi.hssf.record.formula.functions.Choose; import org.apache.poi.hssf.record.formula.functions.FreeRefFunction; +import org.apache.poi.hssf.record.formula.functions.If; import org.apache.poi.hssf.record.formula.udf.UDFFinder; import org.apache.poi.hssf.util.CellReference; import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment.WorkbookNotFoundException; @@ -228,7 +231,7 @@ public final class WorkbookEvaluator { private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex, int rowIndex, int columnIndex, EvaluationTracker tracker) { - // avoid tracking dependencies for cells that have constant definition + // avoid tracking dependencies to cells that have constant definition boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true : !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex); if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) { @@ -344,6 +347,66 @@ public final class WorkbookEvaluator { byte nArgs = 1; // tAttrSum always has 1 parameter ptg = new FuncVarPtg("SUM", nArgs); } + if (attrPtg.isOptimizedChoose()) { + ValueEval arg0 = stack.pop(); + int[] jumpTable = attrPtg.getJumpTable(); + int dist; + int nChoices = jumpTable.length; + try { + int switchIndex = Choose.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex()); + if (switchIndex<1 || switchIndex > nChoices) { + stack.push(ErrorEval.VALUE_INVALID); + dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE) + } else { + dist = jumpTable[switchIndex-1]; + } + } catch (EvaluationException e) { + stack.push(e.getErrorEval()); + dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE) + } + // Encoded dist for tAttrChoose includes size of jump table, but + // countTokensToBeSkipped() does not (it counts whole tokens). + dist -= nChoices*2+2; // subtract jump table size + i+= countTokensToBeSkipped(ptgs, i, dist); + continue; + } + if (attrPtg.isOptimizedIf()) { + ValueEval arg0 = stack.pop(); + boolean evaluatedPredicate; + try { + evaluatedPredicate = If.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex()); + } catch (EvaluationException e) { + stack.push(e.getErrorEval()); + int dist = attrPtg.getData(); + i+= countTokensToBeSkipped(ptgs, i, dist); + attrPtg = (AttrPtg) ptgs[i]; + dist = attrPtg.getData()+1; + i+= countTokensToBeSkipped(ptgs, i, dist); + continue; + } + if (evaluatedPredicate) { + // nothing to skip - true param folows + } else { + int dist = attrPtg.getData(); + i+= countTokensToBeSkipped(ptgs, i, dist); + Ptg nextPtg = ptgs[i+1]; + if (ptgs[i] instanceof AttrPtg && nextPtg instanceof FuncVarPtg) { + // this is an if statement without a false param (as opposed to MissingArgPtg as the false param) + i++; + stack.push(BoolEval.FALSE); + } + } + continue; + } + if (attrPtg.isSkip()) { + int dist = attrPtg.getData()+1; + i+= countTokensToBeSkipped(ptgs, i, dist); + if (stack.peek() == MissingArgEval.instance) { + stack.pop(); + stack.push(BlankEval.INSTANCE); + } + continue; + } } if (ptg instanceof ControlPtg) { // skip Parentheses, Attr, etc @@ -402,6 +465,27 @@ public final class WorkbookEvaluator { return value; } + /** + * Calculates the number of tokens that the evaluator should skip upon reaching a tAttrSkip. + * + * @return the number of tokens (starting from startIndex+1) that need to be skipped + * to achieve the specified distInBytes skip distance. + */ + private static int countTokensToBeSkipped(Ptg[] ptgs, int startIndex, int distInBytes) { + int remBytes = distInBytes; + int index = startIndex; + while (remBytes != 0) { + index++; + remBytes -= ptgs[index].getSize(); + if (remBytes < 0) { + throw new RuntimeException("Bad skip distance (wrong token size calculation)."); + } + if (index >= ptgs.length) { + throw new RuntimeException("Skip distance too far (ran out of formula tokens)."); + } + } + return index-startIndex; + } /** * Dereferences a single value from any AreaEval or RefEval evaluation result. * If the supplied evaluationResult is just a plain value, it is returned as-is. diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java b/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java index d94cfd9a96..939634b629 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestFormulaEvaluatorBugs.java @@ -31,6 +31,7 @@ import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate; import org.apache.poi.hssf.record.formula.AreaPtg; import org.apache.poi.hssf.record.formula.FuncVarPtg; import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.hssf.record.formula.eval.ErrorEval; import org.apache.poi.hssf.record.formula.eval.ValueEval; import org.apache.poi.ss.formula.EvaluationCell; import org.apache.poi.ss.formula.EvaluationListener; @@ -319,6 +320,10 @@ public final class TestFormulaEvaluatorBugs extends TestCase { * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values */ public void testSlowEvaluate45376() { + /* + * Note - to observe behaviour without caching, disable the call to + * updateValue() from FormulaCellCacheEntry.updateFormulaResult(). + */ // Firstly set up a sequence of formula cells where each depends on the previous multiple // times. Without caching, each subsequent cell take about 4 times longer to evaluate. @@ -330,30 +335,39 @@ public final class TestFormulaEvaluatorBugs extends TestCase { char prevCol = (char) ('A' + i-1); String prevCell = prevCol + "1"; // this formula is inspired by the offending formula of the attachment for bug 45376 + // IF(DATE(YEAR(A1),MONTH(A1)+1,1)<=$D$3,DATE(YEAR(A1),MONTH(A1)+1,1),NA()) etc String formula = "IF(DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1)<=$D$3," + "DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1),NA())"; cell.setCellFormula(formula); - } Calendar cal = new GregorianCalendar(2000, 0, 1, 0, 0, 0); row.createCell(0).setCellValue(cal); - // Choose cell A9, so that the failing test case doesn't take too long to execute. + // Choose cell A9 instead of A10, so that the failing test case doesn't take too long to execute. HSSFCell cell = row.getCell(8); EvalListener evalListener = new EvalListener(); WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener); - evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell)); + ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell)); int evalCount = evalListener.getCountCacheMisses(); if (evalCount > 10) { // Without caching, evaluating cell 'A9' takes 21845 evaluations which consumes // much time (~3 sec on Core 2 Duo 2.2GHz) + // short-circuit-if optimisation cuts this down to 255 evaluations which is still too high System.err.println("Cell A9 took " + evalCount + " intermediate evaluations"); throw new AssertionFailedError("Identifed bug 45376 - Formula evaluator should cache values"); } - // With caching, the evaluationCount is 8 which is a big improvement - // Note - these expected values may change if the WorkbookEvaluator is - // ever optimised to short circuit 'if' functions. + // With caching, the evaluationCount is 8 which is exactly the + // number of formula cells that needed to be evaluated. assertEquals(8, evalCount); - assertEquals(24, evalListener.getCountCacheHits()); + + // The cache hits would be 24 if fully evaluating all arguments of the + // "IF()" functions (Each of the 8 formulas has 4 refs to formula cells + // which result in 1 cache miss and 3 cache hits). However with the + // short-circuit-if optimisation, 2 of the cell refs get skipped + // reducing this metric 8. + assertEquals(8, evalListener.getCountCacheHits()); + + // confirm the evaluation result too + assertEquals(ErrorEval.NA, ve); } } diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java index 082f07a84e..02e383f33a 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFFormulaEvaluator.java @@ -22,6 +22,13 @@ import junit.framework.TestCase; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.record.NameRecord; +import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.hssf.record.formula.eval.NumberEval; +import org.apache.poi.hssf.record.formula.eval.ValueEval; +import org.apache.poi.ss.formula.EvaluationCell; +import org.apache.poi.ss.formula.EvaluationListener; +import org.apache.poi.ss.formula.WorkbookEvaluator; +import org.apache.poi.ss.formula.WorkbookEvaluatorTestHelper; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellValue; /** @@ -167,4 +174,46 @@ public final class TestHSSFFormulaEvaluator extends TestCase { assertEquals(Cell.CELL_TYPE_NUMERIC, value.getCellType()); assertEquals(5.33, value.getNumberValue(), 0.0); } + private static final class EvalCountListener extends EvaluationListener { + private int _evalCount; + public EvalCountListener() { + _evalCount = 0; + } + public void onStartEvaluate(EvaluationCell cell, ICacheEntry entry, Ptg[] ptgs) { + _evalCount++; + } + public int getEvalCount() { + return _evalCount; + } + } + + /** + * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values + */ + public void testShortCircuitIfEvaluation() { + + // Set up a simple IF() formula that has measurable evaluation cost for its operands. + HSSFWorkbook wb = new HSSFWorkbook(); + HSSFSheet sheet = wb.createSheet("Sheet1"); + HSSFRow row = sheet.createRow(0); + HSSFCell cellA1 = row.createCell(0); + cellA1.setCellFormula("if(B1,C1,D1+E1+F1)"); + // populate cells B1..F1 with simple formulas instead of plain values so we can use + // EvaluationListener to check which parts of the first formula get evaluated + for (int i=1; i<6; i++) { + // formulas are just literal constants "1".."5" + row.createCell(i).setCellFormula(String.valueOf(i)); + } + + EvalCountListener evalListener = new EvalCountListener(); + WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener); + ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cellA1)); + int evalCount = evalListener.getEvalCount(); + if (evalCount == 6) { + // Without short-circuit-if evaluation, evaluating cell 'A1' takes 3 extra evaluations (for D1,E1,F1) + throw new AssertionFailedError("Identifed bug 48195 - Formula evaluator should short-circuit IF() calculations."); + } + assertEquals(3, evalCount); + assertEquals(2.0, ((NumberEval)ve).getNumberValue(), 0D); + } }