From: Dominik Stadler Date: Tue, 2 Nov 2021 13:17:39 +0000 (+0000) Subject: Optimize formula evaluation of row-references X-Git-Tag: REL_5_2_0~290 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=d205318583e5dbae0d9ccd64cb55d6930fe285d8;p=poi.git Optimize formula evaluation of row-references We currently walk 1 million rows for every formula which uses a column-reference like "$A" Execution time of test-case TestVlookup.testFullColumnAreaRef61841 went from more than 16 seconds to around 2 seconds git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1894675 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/poi/src/main/java/org/apache/poi/ss/formula/LazyAreaEval.java b/poi/src/main/java/org/apache/poi/ss/formula/LazyAreaEval.java index bb33f870f8..583ffacccc 100644 --- a/poi/src/main/java/org/apache/poi/ss/formula/LazyAreaEval.java +++ b/poi/src/main/java/org/apache/poi/ss/formula/LazyAreaEval.java @@ -37,7 +37,7 @@ final class LazyAreaEval extends AreaEvalBase { public LazyAreaEval(int firstRowIndex, int firstColumnIndex, int lastRowIndex, int lastColumnIndex, SheetRangeEvaluator evaluator) { - super(evaluator, firstRowIndex, firstColumnIndex, lastRowIndex, lastColumnIndex); + super(evaluator, firstRowIndex, firstColumnIndex, evaluator.adjustRowNumber(lastRowIndex), lastColumnIndex); _evaluator = evaluator; } diff --git a/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java b/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java index 806dc08f35..3afe4a4851 100644 --- a/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java +++ b/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java @@ -25,7 +25,7 @@ import org.apache.poi.ss.formula.eval.ValueEval; final class SheetRangeEvaluator implements SheetRange { private final int _firstSheetIndex; private final int _lastSheetIndex; - private SheetRefEvaluator[] _sheetEvaluators; + private final SheetRefEvaluator[] _sheetEvaluators; public SheetRangeEvaluator(int firstSheetIndex, int lastSheetIndex, SheetRefEvaluator[] sheetEvaluators) { if (firstSheetIndex < 0) { @@ -41,7 +41,7 @@ final class SheetRangeEvaluator implements SheetRange { public SheetRangeEvaluator(int onlySheetIndex, SheetRefEvaluator sheetEvaluator) { this(onlySheetIndex, onlySheetIndex, new SheetRefEvaluator[] {sheetEvaluator}); } - + public SheetRefEvaluator getSheetEvaluator(int sheetIndex) { if (sheetIndex < _firstSheetIndex || sheetIndex > _lastSheetIndex) { throw new IllegalArgumentException("Invalid SheetIndex: " + sheetIndex + @@ -49,7 +49,7 @@ final class SheetRangeEvaluator implements SheetRange { } return _sheetEvaluators[sheetIndex-_firstSheetIndex]; } - + public int getFirstSheetIndex() { return _firstSheetIndex; } @@ -73,4 +73,26 @@ final class SheetRangeEvaluator implements SheetRange { public ValueEval getEvalForCell(int sheetIndex, int rowIndex, int columnIndex) { return getSheetEvaluator(sheetIndex).getEvalForCell(rowIndex, columnIndex); } + + /** + * This method returns a lower row-number if it would lie outside the row-boundaries of + * any sheet. + * + * This is used to optimize cases where very high number of rows would be checked otherwise + * without any benefit as no such row exists anyway. + * + * @param rowIndex The 0-based row-index to check + * @return If the given index lies withing the max row number across all sheets, it is returned. + * Otherwise, the highest used row number across all sheets is returned. + */ + public int adjustRowNumber(int rowIndex) { + int maxRowNum = rowIndex; + + for (int i = _firstSheetIndex; i < _lastSheetIndex; i++) { + maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i].getLastRowNum()); + } + + // do not try to evaluate further than there are rows in any sheet + return Math.min(rowIndex, maxRowNum); + } } diff --git a/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java b/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java index eb411bd664..e765af3bf8 100644 --- a/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java +++ b/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java @@ -56,8 +56,8 @@ final class SheetRefEvaluator { } /** - * @param rowIndex - * @param columnIndex + * @param rowIndex The 0-based row-index to check + * @param columnIndex The 0-based column-index to check * @return whether cell at rowIndex and columnIndex is a subtotal * @see org.apache.poi.ss.formula.functions.Subtotal */ @@ -83,10 +83,17 @@ final class SheetRefEvaluator { * Used by functions that calculate differently depending on row visibility, like some * variations of SUBTOTAL() * @see org.apache.poi.ss.formula.functions.Subtotal - * @param rowIndex + * @param rowIndex The 0-based row-index to check * @return true if the row is hidden */ public boolean isRowHidden(int rowIndex) { return getSheet().isRowHidden(rowIndex); } + + /** + * @return The last used row in this sheet + */ + public int getLastRowNum() { + return getSheet().getLastRowNum(); + } }