]> source.dussan.org Git - poi.git/commitdiff
Speed up execution of formulas with whole-column area reference
authorDominik Stadler <centic@apache.org>
Sun, 3 Apr 2022 13:25:21 +0000 (13:25 +0000)
committerDominik Stadler <centic@apache.org>
Sun, 3 Apr 2022 13:25:21 +0000 (13:25 +0000)
The previous change via r1894675 did not fully solve this as
it only took place when more than one sheet were used.

There were still some cases where evaluating formulas needed
to iterate across a huge number of non-existant rows, e.g.
TestVLookup, which took more than 1 minute locally.

However doing the lowering of row-numbers always would trigger
test-regressions as some Excel-functionality depends on actions
being possible on rows larger than current "last row" in the sheet.

Thus changed this to a slightly different approach which only adjusts
last-row when it is at or above the workbook-limit of rows

Execution of TestVLookup is now at 1-2 seconds!

Also added a test-case to verify details of the implementation now.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1899533 13f79535-47bb-0310-9956-ffa450edef68

poi/src/main/java/org/apache/poi/ss/formula/SheetRangeEvaluator.java
poi/src/main/java/org/apache/poi/ss/formula/SheetRefEvaluator.java
poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java [new file with mode: 0644]

index 6d1a1baaa243bee55d910fe4d536e038f02c7d03..10ac22da060f366d981be6a64a8a28ff3496caec 100644 (file)
@@ -38,6 +38,7 @@ final class SheetRangeEvaluator implements SheetRange {
         _lastSheetIndex = lastSheetIndex;
         _sheetEvaluators = sheetEvaluators.clone();
     }
+
     public SheetRangeEvaluator(int onlySheetIndex, SheetRefEvaluator sheetEvaluator) {
         this(onlySheetIndex, onlySheetIndex, new SheetRefEvaluator[] {sheetEvaluator});
     }
@@ -53,6 +54,7 @@ final class SheetRangeEvaluator implements SheetRange {
     public int getFirstSheetIndex() {
         return _firstSheetIndex;
     }
+
     public int getLastSheetIndex() {
         return _lastSheetIndex;
     }
@@ -60,6 +62,7 @@ final class SheetRangeEvaluator implements SheetRange {
     public String getSheetName(int sheetIndex) {
         return getSheetEvaluator(sheetIndex).getSheetName();
     }
+
     public String getSheetNameRange() {
         StringBuilder sb = new StringBuilder();
         sb.append(getSheetName(_firstSheetIndex));
@@ -86,13 +89,31 @@ final class SheetRangeEvaluator implements SheetRange {
      *      Otherwise, the highest used row number across all sheets is returned.
      */
     public int adjustRowNumber(int rowIndex) {
-        int maxRowNum = rowIndex;
+        // some code is depending on the row-numbers higher than the "last row of the sheet"
+        // so we can only apply this optimisation if the given rowIndex is at or above the maximum
+        // possible number of row-indices for the current SpreadSheetVersion
+        boolean found = false;
+        for (int i = _firstSheetIndex; i <= _lastSheetIndex; i++) {
+            if (rowIndex >= _sheetEvaluators[i - _firstSheetIndex].getMaxRowNum()) {
+                found = true;
+            }
+        }
+
+        // rowIndex is not large enough, so no change necessary
+        if (!found) {
+            return rowIndex;
+        }
+
+        // reset maxRowNum if it is at the limit, i.e. we did choose "all rows" somewhere
+        // This happens e.g. for whole-column-references like "A:B"
 
-        for (int i = _firstSheetIndex; i < _lastSheetIndex; i++) {
-            maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i].getLastRowNum());
+        // find the highest row-number across all relevant sheets
+        int maxRowNum = 0;
+        for (int i = _firstSheetIndex; i <= _lastSheetIndex; i++) {
+            maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i - _firstSheetIndex].getLastRowNum());
         }
 
-        // do not try to evaluate further than there are rows in any sheet
+        // now use the new maxRowNum if it was actually lowered
         return Math.min(rowIndex, maxRowNum);
     }
 }
index 8960efaa31b7b5a810d05750aa36021a4fbb774f..a79917374f565ac0056f37f270c4cc864c06b4ef 100644 (file)
@@ -96,4 +96,12 @@ final class SheetRefEvaluator {
     public int getLastRowNum() {
         return getSheet().getLastRowNum();
     }
+
+    /**
+     * @return The maximum row number that is possible for the current
+     *         Spreadsheet version, see {@link org.apache.poi.ss.SpreadsheetVersion#getLastRowIndex()}
+     */
+    public int getMaxRowNum() {
+        return _bookEvaluator.getWorkbook().getSpreadsheetVersion().getLastRowIndex();
+    }
 }
diff --git a/poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java b/poi/src/test/java/org/apache/poi/ss/formula/SheetRangeEvaluatorTest.java
new file mode 100644 (file)
index 0000000..621eb31
--- /dev/null
@@ -0,0 +1,143 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.ss.formula;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.SpreadsheetVersion;
+import org.apache.poi.ss.formula.eval.BlankEval;
+import org.apache.poi.ss.formula.eval.ValueEval;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.junit.jupiter.api.Test;
+
+class SheetRangeEvaluatorTest {
+    @Test
+    public void testConstruct() {
+        SheetRangeEvaluator eval = new SheetRangeEvaluator(0, 1, new SheetRefEvaluator[0]);
+        assertEquals(0, eval.getFirstSheetIndex());
+        assertEquals(1, eval.getLastSheetIndex());
+    }
+
+    @Test
+    public void testConstruct2() {
+        SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(null, null, 0));
+        assertEquals(1, eval.getFirstSheetIndex());
+        assertEquals(1, eval.getLastSheetIndex());
+        assertNotNull(eval.getSheetEvaluator(1));
+    }
+
+    @Test
+    public void testGetSheetName() {
+        HSSFWorkbook wb = new HSSFWorkbook();
+        wb.createSheet("abc");
+        wb.createSheet("def");
+        WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb),
+                IStabilityClassifier.TOTALLY_IMMUTABLE, null);
+        SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(book, null, 1));
+        assertEquals("def", eval.getSheetName(1));
+        assertEquals("def", eval.getSheetNameRange());
+    }
+
+    @Test
+    public void testGetSheetNameRange() {
+        SheetRangeEvaluator eval = createWithTwoSheets();
+        assertEquals("abc", eval.getSheetName(0));
+        assertEquals("def", eval.getSheetName(1));
+
+        assertEquals("abc:def", eval.getSheetNameRange());
+    }
+
+    private SheetRangeEvaluator createWithTwoSheets() {
+        HSSFWorkbook wb = new HSSFWorkbook();
+        wb.createSheet("abc");
+        wb.createSheet("def");
+        WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb),
+                IStabilityClassifier.TOTALLY_IMMUTABLE, null);
+        return new SheetRangeEvaluator(0, 1,
+                new SheetRefEvaluator[] {
+                        new SheetRefEvaluator(book, null, 0),
+                        new SheetRefEvaluator(book, null, 1)
+                });
+    }
+
+    @SuppressWarnings("ResultOfMethodCallIgnored")
+    @Test
+    public void testInvalidInput() {
+        assertThrows(IllegalArgumentException.class, () -> new SheetRangeEvaluator(-1, 0, null));
+        assertThrows(IllegalArgumentException.class, () -> new SheetRangeEvaluator(1, 0, null));
+
+        SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(null, null, 0));
+        assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(-1));
+        assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(0));
+        assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(2));
+    }
+
+    @Test
+    public void testGetEval() {
+        SheetRangeEvaluator eval = createWithTwoSheets();
+        ValueEval valueEval = eval.getEvalForCell(1, 0, 0);
+        assertTrue(valueEval instanceof BlankEval,
+                "Had: " + valueEval);
+    }
+
+    @Test
+    public void testAdjustRowNumber() {
+        HSSFWorkbook wb = new HSSFWorkbook();
+        Sheet sheet = wb.createSheet("abc");
+        sheet.createRow(0);
+        sheet.createRow(10);
+        WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb),
+                IStabilityClassifier.TOTALLY_IMMUTABLE, null);
+        SheetRangeEvaluator eval = new SheetRangeEvaluator(0, new SheetRefEvaluator(book, null, 0));
+
+        assertEquals(0, eval.adjustRowNumber(0),
+                "Should stay if input is lower than max row");
+        assertEquals(1, eval.adjustRowNumber(1),
+                "Should stay if input is lower than max row");
+        assertEquals(8, eval.adjustRowNumber(8),
+                "Should stay if input is lower than max row");
+        assertEquals(9, eval.adjustRowNumber(9),
+                "Should stay if input is lower than max row");
+        assertEquals(10, eval.adjustRowNumber(10),
+                "Should stay if input is equal than max row");
+        assertEquals(11, eval.adjustRowNumber(11),
+                "Should be adjusted to 9 as this is the max row-index");
+        assertEquals(10, eval.adjustRowNumber(Integer.MAX_VALUE),
+                "Should be adjusted to 9 as this is the max row-index");
+        assertEquals(10, eval.adjustRowNumber(SpreadsheetVersion.EXCEL97.getLastRowIndex()),
+                "Should be adjusted to 9 as this is the max row-index");
+    }
+
+    @Test
+    public void testAdjustRowNumberEmpty() {
+        SheetRangeEvaluator eval = createWithTwoSheets();
+        assertEquals(0, eval.adjustRowNumber(0),
+                "Should stay at zero as there is no row in workbook");
+        assertEquals(1, eval.adjustRowNumber(1),
+                "Should be adjusted to zero as there is no row in workbook");
+        assertEquals(0, eval.adjustRowNumber(SpreadsheetVersion.EXCEL97.getLastRowIndex()),
+                "Should be adjusted to 9 as this is the max row-index");
+        assertEquals(0, eval.adjustRowNumber(Integer.MAX_VALUE),
+                "Should be adjusted to zero as there is no row in workbook");
+    }
+}
\ No newline at end of file