From d80f48dd82269964449864acbc81be2eac057884 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 3 Apr 2022 13:25:21 +0000 Subject: [PATCH] Speed up execution of formulas with whole-column area reference The previous change via r1894675 did not fully solve this as it only took place when more than one sheet were used. There were still some cases where evaluating formulas needed to iterate across a huge number of non-existant rows, e.g. TestVLookup, which took more than 1 minute locally. However doing the lowering of row-numbers always would trigger test-regressions as some Excel-functionality depends on actions being possible on rows larger than current "last row" in the sheet. Thus changed this to a slightly different approach which only adjusts last-row when it is at or above the workbook-limit of rows Execution of TestVLookup is now at 1-2 seconds! Also added a test-case to verify details of the implementation now. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1899533 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/ss/formula/SheetRangeEvaluator.java | 29 +++- .../poi/ss/formula/SheetRefEvaluator.java | 8 + .../ss/formula/SheetRangeEvaluatorTest.java | 143 ++++++++++++++++++ 3 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java diff --git a/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java b/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java index 6d1a1baaa2..10ac22da06 100644 --- a/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java +++ b/poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java @@ -38,6 +38,7 @@ final class SheetRangeEvaluator implements SheetRange { _lastSheetIndex = lastSheetIndex; _sheetEvaluators = sheetEvaluators.clone(); } + public SheetRangeEvaluator(int onlySheetIndex, SheetRefEvaluator sheetEvaluator) { this(onlySheetIndex, onlySheetIndex, new SheetRefEvaluator[] {sheetEvaluator}); } @@ -53,6 +54,7 @@ final class SheetRangeEvaluator implements SheetRange { public int getFirstSheetIndex() { return _firstSheetIndex; } + public int getLastSheetIndex() { return _lastSheetIndex; } @@ -60,6 +62,7 @@ final class SheetRangeEvaluator implements SheetRange { public String getSheetName(int sheetIndex) { return getSheetEvaluator(sheetIndex).getSheetName(); } + public String getSheetNameRange() { StringBuilder sb = new StringBuilder(); sb.append(getSheetName(_firstSheetIndex)); @@ -86,13 +89,31 @@ final class SheetRangeEvaluator implements SheetRange { * Otherwise, the highest used row number across all sheets is returned. */ public int adjustRowNumber(int rowIndex) { - int maxRowNum = rowIndex; + // some code is depending on the row-numbers higher than the "last row of the sheet" + // so we can only apply this optimisation if the given rowIndex is at or above the maximum + // possible number of row-indices for the current SpreadSheetVersion + boolean found = false; + for (int i = _firstSheetIndex; i <= _lastSheetIndex; i++) { + if (rowIndex >= _sheetEvaluators[i - _firstSheetIndex].getMaxRowNum()) { + found = true; + } + } + + // rowIndex is not large enough, so no change necessary + if (!found) { + return rowIndex; + } + + // reset maxRowNum if it is at the limit, i.e. we did choose "all rows" somewhere + // This happens e.g. for whole-column-references like "A:B" - for (int i = _firstSheetIndex; i < _lastSheetIndex; i++) { - maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i].getLastRowNum()); + // find the highest row-number across all relevant sheets + int maxRowNum = 0; + for (int i = _firstSheetIndex; i <= _lastSheetIndex; i++) { + maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i - _firstSheetIndex].getLastRowNum()); } - // do not try to evaluate further than there are rows in any sheet + // now use the new maxRowNum if it was actually lowered return Math.min(rowIndex, maxRowNum); } } diff --git a/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java b/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java index 8960efaa31..a79917374f 100644 --- a/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java +++ b/poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java @@ -96,4 +96,12 @@ final class SheetRefEvaluator { public int getLastRowNum() { return getSheet().getLastRowNum(); } + + /** + * @return The maximum row number that is possible for the current + * Spreadsheet version, see {@link org.apache.poi.ss.SpreadsheetVersion#getLastRowIndex()} + */ + public int getMaxRowNum() { + return _bookEvaluator.getWorkbook().getSpreadsheetVersion().getLastRowIndex(); + } } diff --git a/poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java b/poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java new file mode 100644 index 0000000000..621eb311e2 --- /dev/null +++ b/poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java @@ -0,0 +1,143 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ss.formula; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.SpreadsheetVersion; +import org.apache.poi.ss.formula.eval.BlankEval; +import org.apache.poi.ss.formula.eval.ValueEval; +import org.apache.poi.ss.usermodel.Sheet; +import org.junit.jupiter.api.Test; + +class SheetRangeEvaluatorTest { + @Test + public void testConstruct() { + SheetRangeEvaluator eval = new SheetRangeEvaluator(0, 1, new SheetRefEvaluator[0]); + assertEquals(0, eval.getFirstSheetIndex()); + assertEquals(1, eval.getLastSheetIndex()); + } + + @Test + public void testConstruct2() { + SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(null, null, 0)); + assertEquals(1, eval.getFirstSheetIndex()); + assertEquals(1, eval.getLastSheetIndex()); + assertNotNull(eval.getSheetEvaluator(1)); + } + + @Test + public void testGetSheetName() { + HSSFWorkbook wb = new HSSFWorkbook(); + wb.createSheet("abc"); + wb.createSheet("def"); + WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb), + IStabilityClassifier.TOTALLY_IMMUTABLE, null); + SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(book, null, 1)); + assertEquals("def", eval.getSheetName(1)); + assertEquals("def", eval.getSheetNameRange()); + } + + @Test + public void testGetSheetNameRange() { + SheetRangeEvaluator eval = createWithTwoSheets(); + assertEquals("abc", eval.getSheetName(0)); + assertEquals("def", eval.getSheetName(1)); + + assertEquals("abc:def", eval.getSheetNameRange()); + } + + private SheetRangeEvaluator createWithTwoSheets() { + HSSFWorkbook wb = new HSSFWorkbook(); + wb.createSheet("abc"); + wb.createSheet("def"); + WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb), + IStabilityClassifier.TOTALLY_IMMUTABLE, null); + return new SheetRangeEvaluator(0, 1, + new SheetRefEvaluator[] { + new SheetRefEvaluator(book, null, 0), + new SheetRefEvaluator(book, null, 1) + }); + } + + @SuppressWarnings("ResultOfMethodCallIgnored") + @Test + public void testInvalidInput() { + assertThrows(IllegalArgumentException.class, () -> new SheetRangeEvaluator(-1, 0, null)); + assertThrows(IllegalArgumentException.class, () -> new SheetRangeEvaluator(1, 0, null)); + + SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(null, null, 0)); + assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(-1)); + assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(0)); + assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(2)); + } + + @Test + public void testGetEval() { + SheetRangeEvaluator eval = createWithTwoSheets(); + ValueEval valueEval = eval.getEvalForCell(1, 0, 0); + assertTrue(valueEval instanceof BlankEval, + "Had: " + valueEval); + } + + @Test + public void testAdjustRowNumber() { + HSSFWorkbook wb = new HSSFWorkbook(); + Sheet sheet = wb.createSheet("abc"); + sheet.createRow(0); + sheet.createRow(10); + WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb), + IStabilityClassifier.TOTALLY_IMMUTABLE, null); + SheetRangeEvaluator eval = new SheetRangeEvaluator(0, new SheetRefEvaluator(book, null, 0)); + + assertEquals(0, eval.adjustRowNumber(0), + "Should stay if input is lower than max row"); + assertEquals(1, eval.adjustRowNumber(1), + "Should stay if input is lower than max row"); + assertEquals(8, eval.adjustRowNumber(8), + "Should stay if input is lower than max row"); + assertEquals(9, eval.adjustRowNumber(9), + "Should stay if input is lower than max row"); + assertEquals(10, eval.adjustRowNumber(10), + "Should stay if input is equal than max row"); + assertEquals(11, eval.adjustRowNumber(11), + "Should be adjusted to 9 as this is the max row-index"); + assertEquals(10, eval.adjustRowNumber(Integer.MAX_VALUE), + "Should be adjusted to 9 as this is the max row-index"); + assertEquals(10, eval.adjustRowNumber(SpreadsheetVersion.EXCEL97.getLastRowIndex()), + "Should be adjusted to 9 as this is the max row-index"); + } + + @Test + public void testAdjustRowNumberEmpty() { + SheetRangeEvaluator eval = createWithTwoSheets(); + assertEquals(0, eval.adjustRowNumber(0), + "Should stay at zero as there is no row in workbook"); + assertEquals(1, eval.adjustRowNumber(1), + "Should be adjusted to zero as there is no row in workbook"); + assertEquals(0, eval.adjustRowNumber(SpreadsheetVersion.EXCEL97.getLastRowIndex()), + "Should be adjusted to 9 as this is the max row-index"); + assertEquals(0, eval.adjustRowNumber(Integer.MAX_VALUE), + "Should be adjusted to zero as there is no row in workbook"); + } +} \ No newline at end of file -- 2.39.5