<changes>
<release version="3.6-beta1" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="add">48195 - short-circuit evaluation of IF() and CHOOSE()</action>
<action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
<action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
<action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
import org.apache.poi.hssf.record.formula.eval.ValueEval;
/**
- *
* @author Josh Micich
*/
public final class Choose implements Function {
}
try {
- ValueEval ev = OperandResolver.getSingleValue(args[0], srcRowIndex, srcColumnIndex);
- int ix = OperandResolver.coerceValueToInt(ev);
+ int ix = evaluateFirstArg(args[0], srcRowIndex, srcColumnIndex);
if (ix < 1 || ix >= args.length) {
return ErrorEval.VALUE_INVALID;
}
return e.getErrorEval();
}
}
+
+ public static int evaluateFirstArg(ValueEval arg0, int srcRowIndex, int srcColumnIndex)
+ throws EvaluationException {
+ ValueEval ev = OperandResolver.getSingleValue(arg0, srcRowIndex, srcColumnIndex);
+ return OperandResolver.coerceValueToInt(ev);
+ }
}
return falseResult;
}
- private static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, short srcCellCol)
+ public static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, int srcCellCol)
throws EvaluationException {
ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
Boolean b = OperandResolver.coerceValueToBoolean(ve, false);
import org.apache.poi.hssf.record.formula.eval.BlankEval;
import org.apache.poi.hssf.record.formula.eval.BoolEval;
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
+import org.apache.poi.hssf.record.formula.eval.EvaluationException;
import org.apache.poi.hssf.record.formula.eval.MissingArgEval;
import org.apache.poi.hssf.record.formula.eval.NameEval;
import org.apache.poi.hssf.record.formula.eval.NameXEval;
import org.apache.poi.hssf.record.formula.eval.RefEval;
import org.apache.poi.hssf.record.formula.eval.StringEval;
import org.apache.poi.hssf.record.formula.eval.ValueEval;
+import org.apache.poi.hssf.record.formula.functions.Choose;
import org.apache.poi.hssf.record.formula.functions.FreeRefFunction;
+import org.apache.poi.hssf.record.formula.functions.If;
import org.apache.poi.hssf.record.formula.udf.UDFFinder;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment.WorkbookNotFoundException;
private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex,
int rowIndex, int columnIndex, EvaluationTracker tracker) {
- // avoid tracking dependencies for cells that have constant definition
+ // avoid tracking dependencies to cells that have constant definition
boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true
: !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex);
if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) {
byte nArgs = 1; // tAttrSum always has 1 parameter
ptg = new FuncVarPtg("SUM", nArgs);
}
+ if (attrPtg.isOptimizedChoose()) {
+ ValueEval arg0 = stack.pop();
+ int[] jumpTable = attrPtg.getJumpTable();
+ int dist;
+ int nChoices = jumpTable.length;
+ try {
+ int switchIndex = Choose.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
+ if (switchIndex<1 || switchIndex > nChoices) {
+ stack.push(ErrorEval.VALUE_INVALID);
+ dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
+ } else {
+ dist = jumpTable[switchIndex-1];
+ }
+ } catch (EvaluationException e) {
+ stack.push(e.getErrorEval());
+ dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
+ }
+ // Encoded dist for tAttrChoose includes size of jump table, but
+ // countTokensToBeSkipped() does not (it counts whole tokens).
+ dist -= nChoices*2+2; // subtract jump table size
+ i+= countTokensToBeSkipped(ptgs, i, dist);
+ continue;
+ }
+ if (attrPtg.isOptimizedIf()) {
+ ValueEval arg0 = stack.pop();
+ boolean evaluatedPredicate;
+ try {
+ evaluatedPredicate = If.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
+ } catch (EvaluationException e) {
+ stack.push(e.getErrorEval());
+ int dist = attrPtg.getData();
+ i+= countTokensToBeSkipped(ptgs, i, dist);
+ attrPtg = (AttrPtg) ptgs[i];
+ dist = attrPtg.getData()+1;
+ i+= countTokensToBeSkipped(ptgs, i, dist);
+ continue;
+ }
+ if (evaluatedPredicate) {
+ // nothing to skip - true param folows
+ } else {
+ int dist = attrPtg.getData();
+ i+= countTokensToBeSkipped(ptgs, i, dist);
+ Ptg nextPtg = ptgs[i+1];
+ if (ptgs[i] instanceof AttrPtg && nextPtg instanceof FuncVarPtg) {
+ // this is an if statement without a false param (as opposed to MissingArgPtg as the false param)
+ i++;
+ stack.push(BoolEval.FALSE);
+ }
+ }
+ continue;
+ }
+ if (attrPtg.isSkip()) {
+ int dist = attrPtg.getData()+1;
+ i+= countTokensToBeSkipped(ptgs, i, dist);
+ if (stack.peek() == MissingArgEval.instance) {
+ stack.pop();
+ stack.push(BlankEval.INSTANCE);
+ }
+ continue;
+ }
}
if (ptg instanceof ControlPtg) {
// skip Parentheses, Attr, etc
return value;
}
+ /**
+ * Calculates the number of tokens that the evaluator should skip upon reaching a tAttrSkip.
+ *
+ * @return the number of tokens (starting from <tt>startIndex+1</tt>) that need to be skipped
+ * to achieve the specified <tt>distInBytes</tt> skip distance.
+ */
+ private static int countTokensToBeSkipped(Ptg[] ptgs, int startIndex, int distInBytes) {
+ int remBytes = distInBytes;
+ int index = startIndex;
+ while (remBytes != 0) {
+ index++;
+ remBytes -= ptgs[index].getSize();
+ if (remBytes < 0) {
+ throw new RuntimeException("Bad skip distance (wrong token size calculation).");
+ }
+ if (index >= ptgs.length) {
+ throw new RuntimeException("Skip distance too far (ran out of formula tokens).");
+ }
+ }
+ return index-startIndex;
+ }
/**
* Dereferences a single value from any AreaEval or RefEval evaluation result.
* If the supplied evaluationResult is just a plain value, it is returned as-is.
import org.apache.poi.hssf.record.formula.AreaPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.eval.ErrorEval;
import org.apache.poi.hssf.record.formula.eval.ValueEval;
import org.apache.poi.ss.formula.EvaluationCell;
import org.apache.poi.ss.formula.EvaluationListener;
* The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
*/
public void testSlowEvaluate45376() {
+ /*
+ * Note - to observe behaviour without caching, disable the call to
+ * updateValue() from FormulaCellCacheEntry.updateFormulaResult().
+ */
// Firstly set up a sequence of formula cells where each depends on the previous multiple
// times. Without caching, each subsequent cell take about 4 times longer to evaluate.
char prevCol = (char) ('A' + i-1);
String prevCell = prevCol + "1";
// this formula is inspired by the offending formula of the attachment for bug 45376
+ // IF(DATE(YEAR(A1),MONTH(A1)+1,1)<=$D$3,DATE(YEAR(A1),MONTH(A1)+1,1),NA()) etc
String formula = "IF(DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1)<=$D$3," +
"DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1),NA())";
cell.setCellFormula(formula);
-
}
Calendar cal = new GregorianCalendar(2000, 0, 1, 0, 0, 0);
row.createCell(0).setCellValue(cal);
- // Choose cell A9, so that the failing test case doesn't take too long to execute.
+ // Choose cell A9 instead of A10, so that the failing test case doesn't take too long to execute.
HSSFCell cell = row.getCell(8);
EvalListener evalListener = new EvalListener();
WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
- evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
+ ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
int evalCount = evalListener.getCountCacheMisses();
if (evalCount > 10) {
// Without caching, evaluating cell 'A9' takes 21845 evaluations which consumes
// much time (~3 sec on Core 2 Duo 2.2GHz)
+ // short-circuit-if optimisation cuts this down to 255 evaluations which is still too high
System.err.println("Cell A9 took " + evalCount + " intermediate evaluations");
throw new AssertionFailedError("Identifed bug 45376 - Formula evaluator should cache values");
}
- // With caching, the evaluationCount is 8 which is a big improvement
- // Note - these expected values may change if the WorkbookEvaluator is
- // ever optimised to short circuit 'if' functions.
+ // With caching, the evaluationCount is 8 which is exactly the
+ // number of formula cells that needed to be evaluated.
assertEquals(8, evalCount);
- assertEquals(24, evalListener.getCountCacheHits());
+
+ // The cache hits would be 24 if fully evaluating all arguments of the
+ // "IF()" functions (Each of the 8 formulas has 4 refs to formula cells
+ // which result in 1 cache miss and 3 cache hits). However with the
+ // short-circuit-if optimisation, 2 of the cell refs get skipped
+ // reducing this metric 8.
+ assertEquals(8, evalListener.getCountCacheHits());
+
+ // confirm the evaluation result too
+ assertEquals(ErrorEval.NA, ve);
}
}
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.NameRecord;
+import org.apache.poi.hssf.record.formula.Ptg;
+import org.apache.poi.hssf.record.formula.eval.NumberEval;
+import org.apache.poi.hssf.record.formula.eval.ValueEval;
+import org.apache.poi.ss.formula.EvaluationCell;
+import org.apache.poi.ss.formula.EvaluationListener;
+import org.apache.poi.ss.formula.WorkbookEvaluator;
+import org.apache.poi.ss.formula.WorkbookEvaluatorTestHelper;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellValue;
/**
assertEquals(Cell.CELL_TYPE_NUMERIC, value.getCellType());
assertEquals(5.33, value.getNumberValue(), 0.0);
}
+ private static final class EvalCountListener extends EvaluationListener {
+ private int _evalCount;
+ public EvalCountListener() {
+ _evalCount = 0;
+ }
+ public void onStartEvaluate(EvaluationCell cell, ICacheEntry entry, Ptg[] ptgs) {
+ _evalCount++;
+ }
+ public int getEvalCount() {
+ return _evalCount;
+ }
+ }
+
+ /**
+ * The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
+ */
+ public void testShortCircuitIfEvaluation() {
+
+ // Set up a simple IF() formula that has measurable evaluation cost for its operands.
+ HSSFWorkbook wb = new HSSFWorkbook();
+ HSSFSheet sheet = wb.createSheet("Sheet1");
+ HSSFRow row = sheet.createRow(0);
+ HSSFCell cellA1 = row.createCell(0);
+ cellA1.setCellFormula("if(B1,C1,D1+E1+F1)");
+ // populate cells B1..F1 with simple formulas instead of plain values so we can use
+ // EvaluationListener to check which parts of the first formula get evaluated
+ for (int i=1; i<6; i++) {
+ // formulas are just literal constants "1".."5"
+ row.createCell(i).setCellFormula(String.valueOf(i));
+ }
+
+ EvalCountListener evalListener = new EvalCountListener();
+ WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
+ ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cellA1));
+ int evalCount = evalListener.getEvalCount();
+ if (evalCount == 6) {
+ // Without short-circuit-if evaluation, evaluating cell 'A1' takes 3 extra evaluations (for D1,E1,F1)
+ throw new AssertionFailedError("Identifed bug 48195 - Formula evaluator should short-circuit IF() calculations.");
+ }
+ assertEquals(3, evalCount);
+ assertEquals(2.0, ((NumberEval)ve).getNumberValue(), 0D);
+ }
}