Browse Source

Bug #61841 - Unnecessary long computation when evaluating VLOOKUP on all column reference

Found some optimizations in the general evaluation framework related to blank cells in rows beyond the last defined row of a sheet.

I don't see any issue with passing a bit of context down deeper into this framework, as it's all POI-internal and only had one calling path.

See the above bug for the performance analysis.  Not specifically related to VLOOKUP, but improves that case by more than 2/3 as well.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1817252 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_4_0_0_FINAL
Greg Woolsey 6 years ago
parent
commit
8f26e2721d

+ 12
- 1
src/java/org/apache/poi/hssf/usermodel/HSSFEvaluationSheet.java View File

final class HSSFEvaluationSheet implements EvaluationSheet { final class HSSFEvaluationSheet implements EvaluationSheet {


private final HSSFSheet _hs; private final HSSFSheet _hs;
private int _lastDefinedRow = -1;


public HSSFEvaluationSheet(HSSFSheet hs) { public HSSFEvaluationSheet(HSSFSheet hs) {
_hs = hs; _hs = hs;
_lastDefinedRow = _hs.getLastRowNum();
} }


public HSSFSheet getHSSFSheet() { public HSSFSheet getHSSFSheet() {
return _hs; return _hs;
} }
/* (non-Javadoc)
* @see org.apache.poi.ss.formula.EvaluationSheet#getlastRowNum()
* @since POI 4.0.0
*/
public int getlastRowNum() {
return _lastDefinedRow;
}
@Override @Override
public EvaluationCell getCell(int rowIndex, int columnIndex) { public EvaluationCell getCell(int rowIndex, int columnIndex) {
HSSFRow row = _hs.getRow(rowIndex); HSSFRow row = _hs.getRow(rowIndex);
*/ */
@Override @Override
public void clearAllCachedResultValues() { public void clearAllCachedResultValues() {
// nothing to do
_lastDefinedRow = _hs.getLastRowNum();
} }
} }

+ 2
- 2
src/java/org/apache/poi/ss/formula/CellEvaluationFrame.java View File

_sensitiveInputCells.toArray(result); _sensitiveInputCells.toArray(result);
return result; return result;
} }
public void addUsedBlankCell(int bookIndex, int sheetIndex, int rowIndex, int columnIndex) {
public void addUsedBlankCell(EvaluationWorkbook evalWorkbook, int bookIndex, int sheetIndex, int rowIndex, int columnIndex) {
if (_usedBlankCellGroup == null) { if (_usedBlankCellGroup == null) {
_usedBlankCellGroup = new FormulaUsedBlankCellSet(); _usedBlankCellGroup = new FormulaUsedBlankCellSet();
} }
_usedBlankCellGroup.addCell(bookIndex, sheetIndex, rowIndex, columnIndex);
_usedBlankCellGroup.addCell(evalWorkbook, bookIndex, sheetIndex, rowIndex, columnIndex);
} }


public void updateFormulaResult(ValueEval result) { public void updateFormulaResult(ValueEval result) {

+ 7
- 0
src/java/org/apache/poi/ss/formula/EvaluationSheet.java View File



package org.apache.poi.ss.formula; package org.apache.poi.ss.formula;


import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;


/** /**
* @since POI 3.15 beta 3 * @since POI 3.15 beta 3
*/ */
public void clearAllCachedResultValues(); public void clearAllCachedResultValues();
/**
* @return last row index referenced on this sheet, for evaluation optimization
* @since POI 4.0.0
*/
public int getlastRowNum();
} }

+ 2
- 2
src/java/org/apache/poi/ss/formula/EvaluationTracker.java View File

} }
} }


public void acceptPlainValueDependency(int bookIndex, int sheetIndex,
public void acceptPlainValueDependency(EvaluationWorkbook evalWorkbook, int bookIndex, int sheetIndex,
int rowIndex, int columnIndex, ValueEval value) { int rowIndex, int columnIndex, ValueEval value) {
// Tell the currently evaluating cell frame that it has a dependency on the specified // Tell the currently evaluating cell frame that it has a dependency on the specified
int prevFrameIndex = _evaluationFrames.size() - 1; int prevFrameIndex = _evaluationFrames.size() - 1;
} else { } else {
CellEvaluationFrame consumingFrame = _evaluationFrames.get(prevFrameIndex); CellEvaluationFrame consumingFrame = _evaluationFrames.get(prevFrameIndex);
if (value == BlankEval.instance) { if (value == BlankEval.instance) {
consumingFrame.addUsedBlankCell(bookIndex, sheetIndex, rowIndex, columnIndex);
consumingFrame.addUsedBlankCell(evalWorkbook, bookIndex, sheetIndex, rowIndex, columnIndex);
} else { } else {
PlainValueCellCacheEntry cce = _cache.getPlainValueEntry(bookIndex, sheetIndex, PlainValueCellCacheEntry cce = _cache.getPlainValueEntry(bookIndex, sheetIndex,
rowIndex, columnIndex, value); rowIndex, columnIndex, value);

+ 11
- 5
src/java/org/apache/poi/ss/formula/FormulaUsedBlankCellSet.java View File

private int _firstColumnIndex; private int _firstColumnIndex;
private int _lastColumnIndex; private int _lastColumnIndex;
private BlankCellRectangleGroup _currentRectangleGroup; private BlankCellRectangleGroup _currentRectangleGroup;
private int _lastDefinedRow;


public BlankCellSheetGroup() {
public BlankCellSheetGroup(int lastDefinedRow) {
_rectangleGroups = new ArrayList<>(); _rectangleGroups = new ArrayList<>();
_currentRowIndex = -1; _currentRowIndex = -1;
_lastDefinedRow = lastDefinedRow;
} }


public void addCell(int rowIndex, int columnIndex) { public void addCell(int rowIndex, int columnIndex) {
if (rowIndex > _lastDefinedRow) return;
if (_currentRowIndex == -1) { if (_currentRowIndex == -1) {
_currentRowIndex = rowIndex; _currentRowIndex = rowIndex;
_firstColumnIndex = columnIndex; _firstColumnIndex = columnIndex;
} }


public boolean containsCell(int rowIndex, int columnIndex) { public boolean containsCell(int rowIndex, int columnIndex) {
if (rowIndex > _lastDefinedRow) return true;
for (int i=_rectangleGroups.size()-1; i>=0; i--) { for (int i=_rectangleGroups.size()-1; i>=0; i--) {
BlankCellRectangleGroup bcrg = _rectangleGroups.get(i); BlankCellRectangleGroup bcrg = _rectangleGroups.get(i);
if (bcrg.containsCell(rowIndex, columnIndex)) { if (bcrg.containsCell(rowIndex, columnIndex)) {
_sheetGroupsByBookSheet = new HashMap<>(); _sheetGroupsByBookSheet = new HashMap<>();
} }


public void addCell(int bookIndex, int sheetIndex, int rowIndex, int columnIndex) {
BlankCellSheetGroup sbcg = getSheetGroup(bookIndex, sheetIndex);
public void addCell(EvaluationWorkbook evalWorkbook, int bookIndex, int sheetIndex, int rowIndex, int columnIndex) {
BlankCellSheetGroup sbcg = getSheetGroup(evalWorkbook, bookIndex, sheetIndex);
sbcg.addCell(rowIndex, columnIndex); sbcg.addCell(rowIndex, columnIndex);
} }


private BlankCellSheetGroup getSheetGroup(int bookIndex, int sheetIndex) {
private BlankCellSheetGroup getSheetGroup(EvaluationWorkbook evalWorkbook, int bookIndex, int sheetIndex) {
BookSheetKey key = new BookSheetKey(bookIndex, sheetIndex); BookSheetKey key = new BookSheetKey(bookIndex, sheetIndex);


BlankCellSheetGroup result = _sheetGroupsByBookSheet.get(key); BlankCellSheetGroup result = _sheetGroupsByBookSheet.get(key);
if (result == null) { if (result == null) {
result = new BlankCellSheetGroup();
result = new BlankCellSheetGroup(evalWorkbook.getSheet(sheetIndex).getlastRowNum());
_sheetGroupsByBookSheet.put(key, result); _sheetGroupsByBookSheet.put(key, result);
} }
return result; return result;

+ 1
- 1
src/java/org/apache/poi/ss/formula/WorkbookEvaluator.java View File

if (srcCell == null || srcCell.getCellType() != CellType.FORMULA) { if (srcCell == null || srcCell.getCellType() != CellType.FORMULA) {
ValueEval result = getValueFromNonFormulaCell(srcCell); ValueEval result = getValueFromNonFormulaCell(srcCell);
if (shouldCellDependencyBeRecorded) { if (shouldCellDependencyBeRecorded) {
tracker.acceptPlainValueDependency(_workbookIx, sheetIndex, rowIndex, columnIndex, result);
tracker.acceptPlainValueDependency(_workbook, _workbookIx, sheetIndex, rowIndex, columnIndex, result);
} }
return result; return result;
} }

+ 10
- 1
src/java/org/apache/poi/ss/formula/eval/forked/ForkedEvaluationSheet.java View File

final class ForkedEvaluationSheet implements EvaluationSheet { final class ForkedEvaluationSheet implements EvaluationSheet {


private final EvaluationSheet _masterSheet; private final EvaluationSheet _masterSheet;
/** /**
* Only cells which have been split are put in this map. (This has been done to conserve memory). * Only cells which have been split are put in this map. (This has been done to conserve memory).
*/ */
_masterSheet = masterSheet; _masterSheet = masterSheet;
_sharedCellsByRowCol = new HashMap<>(); _sharedCellsByRowCol = new HashMap<>();
} }

/* (non-Javadoc)
* @see org.apache.poi.ss.formula.EvaluationSheet#getlastRowNum()
* @since POI 4.0.0
*/
public int getlastRowNum() {
return _masterSheet.getlastRowNum();
}
@Override @Override
public EvaluationCell getCell(int rowIndex, int columnIndex) { public EvaluationCell getCell(int rowIndex, int columnIndex) {
RowColKey key = new RowColKey(rowIndex, columnIndex); RowColKey key = new RowColKey(rowIndex, columnIndex);

+ 12
- 1
src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFEvaluationSheet.java View File

@Internal @Internal
final class SXSSFEvaluationSheet implements EvaluationSheet { final class SXSSFEvaluationSheet implements EvaluationSheet {
private final SXSSFSheet _xs; private final SXSSFSheet _xs;
private int _lastDefinedRow = -1;


public SXSSFEvaluationSheet(SXSSFSheet sheet) { public SXSSFEvaluationSheet(SXSSFSheet sheet) {
_xs = sheet; _xs = sheet;
_lastDefinedRow = _xs.getLastRowNum();
} }


public SXSSFSheet getSXSSFSheet() { public SXSSFSheet getSXSSFSheet() {
return _xs; return _xs;
} }

/* (non-Javadoc)
* @see org.apache.poi.ss.formula.EvaluationSheet#getlastRowNum()
* @since POI 4.0.0
*/
public int getlastRowNum() {
return _lastDefinedRow;
}
@Override @Override
public EvaluationCell getCell(int rowIndex, int columnIndex) { public EvaluationCell getCell(int rowIndex, int columnIndex) {
SXSSFRow row = _xs.getRow(rowIndex); SXSSFRow row = _xs.getRow(rowIndex);
*/ */
@Override @Override
public void clearAllCachedResultValues() { public void clearAllCachedResultValues() {
// nothing to do
_lastDefinedRow = _xs.getLastRowNum();
} }
} }

+ 15
- 0
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFEvaluationSheet.java View File



private final XSSFSheet _xs; private final XSSFSheet _xs;
private Map<CellKey, EvaluationCell> _cellCache; private Map<CellKey, EvaluationCell> _cellCache;
private int _lastDefinedRow = -1;


public XSSFEvaluationSheet(XSSFSheet sheet) { public XSSFEvaluationSheet(XSSFSheet sheet) {
_xs = sheet; _xs = sheet;
_lastDefinedRow = _xs.getLastRowNum();
} }


public XSSFSheet getXSSFSheet() { public XSSFSheet getXSSFSheet() {
return _xs; return _xs;
} }


/* (non-Javadoc)
* @see org.apache.poi.ss.formula.EvaluationSheet#getlastRowNum()
* @since POI 4.0.0
*/
public int getlastRowNum() {
return _lastDefinedRow;
}
/* (non-JavaDoc), inherit JavaDoc from EvaluationWorkbook /* (non-JavaDoc), inherit JavaDoc from EvaluationWorkbook
* @since POI 3.15 beta 3 * @since POI 3.15 beta 3
*/ */
@Override @Override
public void clearAllCachedResultValues() { public void clearAllCachedResultValues() {
_cellCache = null; _cellCache = null;
_lastDefinedRow = _xs.getLastRowNum();
} }
@Override @Override
public EvaluationCell getCell(int rowIndex, int columnIndex) { public EvaluationCell getCell(int rowIndex, int columnIndex) {
// shortcut evaluation if reference is outside the bounds of existing data
// see issue #61841 for impact on VLOOKUP in particular
if (rowIndex > _lastDefinedRow) return null;
// cache for performance: ~30% speedup due to caching // cache for performance: ~30% speedup due to caching
if (_cellCache == null) { if (_cellCache == null) {
_cellCache = new HashMap<>(_xs.getLastRowNum() * 3); _cellCache = new HashMap<>(_xs.getLastRowNum() * 3);

+ 25
- 0
src/ooxml/testcases/org/apache/poi/ss/formula/functions/TestVlookup.java View File

package org.apache.poi.ss.formula.functions;

import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.junit.Test;

import junit.framework.TestCase;

/**
* Test the VLOOKUP function
*/
public class TestVlookup extends TestCase {

@Test
public void testFullColumnAreaRef61841() {
final Workbook wb = XSSFTestDataSamples.openSampleWorkbook("VLookupFullColumn.xlsx");
FormulaEvaluator feval = wb.getCreationHelper().createFormulaEvaluator();
feval.evaluateAll();
assertEquals("Wrong lookup value", "Value1", feval.evaluate(wb.getSheetAt(0).getRow(3).getCell(1)).getStringValue());
assertEquals("Lookup should return #N/A", CellType.ERROR, feval.evaluate(wb.getSheetAt(0).getRow(4).getCell(1)).getCellType());
}

}

BIN
test-data/spreadsheet/VLookupFullColumn.xlsx View File


Loading…
Cancel
Save