diff options
author | Nick Burch <nick@apache.org> | 2008-02-22 11:23:50 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2008-02-22 11:23:50 +0000 |
commit | f055e2999a3f07f51e8430027445528544255cef (patch) | |
tree | 200288744ea941c312eed3be4ee08008003cfb00 /src/scratchpad | |
parent | 6ebb5e11ef97a6ac1c6081c41c379924f6f8f944 (diff) | |
download | poi-f055e2999a3f07f51e8430027445528544255cef.tar.gz poi-f055e2999a3f07f51e8430027445528544255cef.zip |
Patch from Josh from bug #44450 - VLookup and HLookup support, and improvements to Lookup and Offset
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@630160 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/scratchpad')
9 files changed, 1394 insertions, 298 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/ErrorEval.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/ErrorEval.java index f57976d3ed..56e4db1b22 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/ErrorEval.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/ErrorEval.java @@ -14,112 +14,105 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* - * Created on May 8, 2005 - * - */ + package org.apache.poi.hssf.record.formula.eval; +import org.apache.poi.hssf.usermodel.HSSFErrorConstants; + /** * @author Amol S. Deshmukh < amolweb at ya hoo dot com > - * + * */ public final class ErrorEval implements ValueEval { - /** - * Contains raw Excel error codes (as defined in OOO's excelfileformat.pdf (2.5.6) - */ - private static final class ErrorCode { - /** <b>#NULL!</b> - Intersection of two cell ranges is empty */ - public static final int NULL = 0x00; - /** <b>#DIV/0!</b> - Division by zero */ - public static final int DIV_0 = 0x07; - /** <b>#VALUE!</b> - Wrong type of operand */ - public static final int VALUE = 0x0F; - /** <b>#REF!</b> - Illegal or deleted cell reference */ - public static final int REF = 0x17; - /** <b>#NAME?</b> - Wrong function or range name */ - public static final int NAME = 0x1D; - /** <b>#NUM!</b> - Value range overflow */ - public static final int NUM = 0x24; - /** <b>#N/A</b> - Argument or function not available */ - public static final int N_A = 0x2A; - - public static final String getText(int errorCode) { - switch(errorCode) { - case NULL: return "#NULL!"; - case DIV_0: return "#DIV/0!"; - case VALUE: return "#VALUE!"; - case REF: return "#REF!"; - case NAME: return "#NAME?"; - case NUM: return "#NUM!"; - case N_A: return "#N/A"; - } - return "???"; - } - } + + // convenient access to namespace + private static final HSSFErrorConstants EC = null; /** <b>#NULL!</b> - Intersection of two cell ranges is empty */ - public static final ErrorEval NULL_INTERSECTION = new ErrorEval(ErrorCode.NULL); + public static final ErrorEval NULL_INTERSECTION = new ErrorEval(EC.ERROR_NULL); /** <b>#DIV/0!</b> - Division by zero */ - public static final ErrorEval DIV_ZERO = new ErrorEval(ErrorCode.DIV_0); + public static final ErrorEval DIV_ZERO = new ErrorEval(EC.ERROR_DIV_0); /** <b>#VALUE!</b> - Wrong type of operand */ - public static final ErrorEval VALUE_INVALID = new ErrorEval(ErrorCode.VALUE); + public static final ErrorEval VALUE_INVALID = new ErrorEval(EC.ERROR_VALUE); /** <b>#REF!</b> - Illegal or deleted cell reference */ - public static final ErrorEval REF_INVALID = new ErrorEval(ErrorCode.REF); + public static final ErrorEval REF_INVALID = new ErrorEval(EC.ERROR_REF); /** <b>#NAME?</b> - Wrong function or range name */ - public static final ErrorEval NAME_INVALID = new ErrorEval(ErrorCode.NAME); + public static final ErrorEval NAME_INVALID = new ErrorEval(EC.ERROR_NAME); /** <b>#NUM!</b> - Value range overflow */ - public static final ErrorEval NUM_ERROR = new ErrorEval(ErrorCode.NUM); + public static final ErrorEval NUM_ERROR = new ErrorEval(EC.ERROR_NUM); /** <b>#N/A</b> - Argument or function not available */ - public static final ErrorEval NA = new ErrorEval(ErrorCode.N_A); + public static final ErrorEval NA = new ErrorEval(EC.ERROR_NA); + + + // POI internal error codes + private static final int CIRCULAR_REF_ERROR_CODE = 0xFFFFFFC4; + private static final int FUNCTION_NOT_IMPLEMENTED_CODE = 0xFFFFFFE2; - /** - * Translates an Excel internal error code into the corresponding POI ErrorEval instance + * @deprecated do not use this error code. For conditions that should never occur, throw an + * unchecked exception. For all other situations use the error code that corresponds to the + * error Excel would have raised under the same circumstances. + */ + public static final ErrorEval UNKNOWN_ERROR = new ErrorEval(-20); + public static final ErrorEval FUNCTION_NOT_IMPLEMENTED = new ErrorEval(FUNCTION_NOT_IMPLEMENTED_CODE); + // Note - Excel does not seem to represent this condition with an error code + public static final ErrorEval CIRCULAR_REF_ERROR = new ErrorEval(CIRCULAR_REF_ERROR_CODE); + + + /** + * Translates an Excel internal error code into the corresponding POI ErrorEval instance * @param errorCode */ public static ErrorEval valueOf(int errorCode) { switch(errorCode) { - case ErrorCode.NULL: return NULL_INTERSECTION; - case ErrorCode.DIV_0: return DIV_ZERO; - case ErrorCode.VALUE: return VALUE_INVALID; -// case ErrorCode.REF: return REF_INVALID; - case ErrorCode.REF: return UNKNOWN_ERROR; - case ErrorCode.NAME: return NAME_INVALID; - case ErrorCode.NUM: return NUM_ERROR; - case ErrorCode.N_A: return NA; - - // these cases probably shouldn't be coming through here - // but (as of Jan-2008) a lot of code depends on it. -// case -20: return UNKNOWN_ERROR; -// case -30: return FUNCTION_NOT_IMPLEMENTED; -// case -60: return CIRCULAR_REF_ERROR; + case HSSFErrorConstants.ERROR_NULL: return NULL_INTERSECTION; + case HSSFErrorConstants.ERROR_DIV_0: return DIV_ZERO; + case HSSFErrorConstants.ERROR_VALUE: return VALUE_INVALID; + case HSSFErrorConstants.ERROR_REF: return REF_INVALID; + case HSSFErrorConstants.ERROR_NAME: return NAME_INVALID; + case HSSFErrorConstants.ERROR_NUM: return NUM_ERROR; + case HSSFErrorConstants.ERROR_NA: return NA; + // non-std errors (conditions modeled as errors by POI) + case CIRCULAR_REF_ERROR_CODE: return CIRCULAR_REF_ERROR; + case FUNCTION_NOT_IMPLEMENTED_CODE: return FUNCTION_NOT_IMPLEMENTED; } throw new RuntimeException("Unexpected error code (" + errorCode + ")"); } - - // POI internal error codes - public static final ErrorEval UNKNOWN_ERROR = new ErrorEval(-20); - public static final ErrorEval FUNCTION_NOT_IMPLEMENTED = new ErrorEval(-30); - // Note - Excel does not seem to represent this condition with an error code - public static final ErrorEval CIRCULAR_REF_ERROR = new ErrorEval(-60); + /** + * Converts error codes to text. Handles non-standard error codes OK. + * For debug/test purposes (and for formatting error messages). + * @return the String representation of the specified Excel error code. + */ + public static String getText(int errorCode) { + if(HSSFErrorConstants.isValidCode(errorCode)) { + return HSSFErrorConstants.getText(errorCode); + } + // It is desirable to make these (arbitrary) strings look clearly different from any other + // value expression that might appear in a formula. In addition these error strings should + // look unlike the standard Excel errors. Hence tilde ('~') was used. + switch(errorCode) { + case CIRCULAR_REF_ERROR_CODE: return "~CIRCULAR~REF~"; + case FUNCTION_NOT_IMPLEMENTED_CODE: return "~FUNCTION~NOT~IMPLEMENTED~"; + } + return "~non~std~err(" + errorCode + ")~"; + } - private int errorCode; + private int _errorCode; /** * @param errorCode an 8-bit value */ private ErrorEval(int errorCode) { - this.errorCode = errorCode; + _errorCode = errorCode; } public int getErrorCode() { - return errorCode; + return _errorCode; } public String toString() { StringBuffer sb = new StringBuffer(64); sb.append(getClass().getName()).append(" ["); - sb.append(ErrorCode.getText(errorCode)); + sb.append(getText(_errorCode)); sb.append("]"); return sb.toString(); } diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringEval.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringEval.java index 01af4e8436..27a9c6a627 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringEval.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringEval.java @@ -14,10 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* - * Created on May 8, 2005 - * - */ + package org.apache.poi.hssf.record.formula.eval; import org.apache.poi.hssf.record.formula.Ptg; @@ -27,21 +24,31 @@ import org.apache.poi.hssf.record.formula.StringPtg; * @author Amol S. Deshmukh < amolweb at ya hoo dot com > * */ -public class StringEval implements StringValueEval { +public final class StringEval implements StringValueEval { public static final StringEval EMPTY_INSTANCE = new StringEval(""); - private String value; + private final String value; public StringEval(Ptg ptg) { - this.value = ((StringPtg) ptg).getValue(); + this(((StringPtg) ptg).getValue()); } public StringEval(String value) { + if(value == null) { + throw new IllegalArgumentException("value must not be null"); + } this.value = value; } public String getStringValue() { return value; } + public String toString() { + StringBuffer sb = new StringBuffer(64); + sb.append(getClass().getName()).append(" ["); + sb.append(value); + sb.append("]"); + return sb.toString(); + } } diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringValueEval.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringValueEval.java index b692f01ea2..46c12236b9 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringValueEval.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/eval/StringValueEval.java @@ -14,10 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* - * Created on May 8, 2005 - * - */ + package org.apache.poi.hssf.record.formula.eval; /** @@ -26,5 +23,8 @@ package org.apache.poi.hssf.record.formula.eval; */ public interface StringValueEval extends ValueEval { - public String getStringValue(); + /** + * @return never <code>null</code>, possibly empty string. + */ + String getStringValue(); } diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Hlookup.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Hlookup.java index 8bac3d0c02..40ed1da490 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Hlookup.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Hlookup.java @@ -1,25 +1,123 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -/* - * Created on May 15, 2005 - * - */ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + package org.apache.poi.hssf.record.formula.functions; -public class Hlookup extends NotImplementedFunction { +import org.apache.poi.hssf.record.formula.eval.AreaEval; +import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.record.formula.eval.Eval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; +import org.apache.poi.hssf.record.formula.eval.OperandResolver; +import org.apache.poi.hssf.record.formula.eval.ValueEval; +import org.apache.poi.hssf.record.formula.functions.LookupUtils.ValueVector; +/** + * Implementation of the VLOOKUP() function.<p/> + * + * HLOOKUP finds a column in a lookup table by the first row value and returns the value from another row. + * + * <b>Syntax</b>:<br/> + * <b>HLOOKUP</b>(<b>lookup_value</b>, <b>table_array</b>, <b>row_index_num</b>, range_lookup)<p/> + * + * <b>lookup_value</b> The value to be found in the first column of the table array.<br/> + * <b>table_array</> An area reference for the lookup data. <br/> + * <b>row_index_num</b> a 1 based index specifying which row value of the lookup data will be returned.<br/> + * <b>range_lookup</b> If TRUE (default), HLOOKUP finds the largest value less than or equal to + * the lookup_value. If FALSE, only exact matches will be considered<br/> + * + * @author Josh Micich + */ +public final class Hlookup implements Function { + + private static final class RowVector implements ValueVector { + + private final AreaEval _tableArray; + private final int _size; + private final int _rowAbsoluteIndex; + private final int _firstColumnAbsoluteIndex; + + public RowVector(AreaEval tableArray, int rowIndex) { + _rowAbsoluteIndex = tableArray.getFirstRow() + rowIndex; + if(!tableArray.containsRow(_rowAbsoluteIndex)) { + int lastRowIx = tableArray.getLastRow() - tableArray.getFirstRow(); + throw new IllegalArgumentException("Specified row index (" + rowIndex + + ") is outside the allowed range (0.." + lastRowIx + ")"); + } + _tableArray = tableArray; + _size = tableArray.getLastColumn() - tableArray.getFirstColumn() + 1; + if(_size < 1) { + throw new RuntimeException("bad table array size zero"); + } + _firstColumnAbsoluteIndex = tableArray.getFirstColumn(); + } + + public ValueEval getItem(int index) { + if(index>_size) { + throw new ArrayIndexOutOfBoundsException("Specified index (" + index + + ") is outside the allowed range (0.." + (_size-1) + ")"); + } + return _tableArray.getValueAt(_rowAbsoluteIndex, (short) (_firstColumnAbsoluteIndex + index)); + } + public int getSize() { + return _size; + } + } + + public Eval evaluate(Eval[] args, int srcCellRow, short srcCellCol) { + Eval arg3 = null; + switch(args.length) { + case 4: + arg3 = args[3]; // important: assumed array element is never null + case 3: + break; + default: + // wrong number of arguments + return ErrorEval.VALUE_INVALID; + } + try { + // Evaluation order: + // arg0 lookup_value, arg1 table_array, arg3 range_lookup, find lookup value, arg2 row_index, fetch result + ValueEval lookupValue = OperandResolver.getSingleValue(args[0], srcCellRow, srcCellCol); + AreaEval tableArray = LookupUtils.resolveTableArrayArg(args[1]); + boolean isRangeLookup = LookupUtils.resolveRangeLookupArg(arg3, srcCellRow, srcCellCol); + int colIndex = LookupUtils.lookupIndexOfValue(lookupValue, new RowVector(tableArray, 0), isRangeLookup); + ValueEval veColIndex = OperandResolver.getSingleValue(args[2], srcCellRow, srcCellCol); + int rowIndex = LookupUtils.resolveRowOrColIndexArg(veColIndex); + ValueVector resultCol = createResultColumnVector(tableArray, rowIndex); + return resultCol.getItem(colIndex); + } catch (EvaluationException e) { + return e.getErrorEval(); + } + } + + /** + * Returns one column from an <tt>AreaEval</tt> + * + * @throws EvaluationException (#VALUE!) if colIndex is negative, (#REF!) if colIndex is too high + */ + private ValueVector createResultColumnVector(AreaEval tableArray, int colIndex) throws EvaluationException { + if(colIndex < 0) { + throw EvaluationException.invalidValue(); + } + int nCols = tableArray.getLastColumn() - tableArray.getFirstRow() + 1; + + if(colIndex >= nCols) { + throw EvaluationException.invalidRef(); + } + return new RowVector(tableArray, colIndex); + } } diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Lookup.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Lookup.java index f98ccca7e3..be1d0d0f94 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Lookup.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Lookup.java @@ -1,25 +1,96 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -/* - * Created on May 15, 2005 - * - */ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + package org.apache.poi.hssf.record.formula.functions; -public class Lookup extends NotImplementedFunction { +import org.apache.poi.hssf.record.formula.eval.AreaEval; +import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.record.formula.eval.Eval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; +import org.apache.poi.hssf.record.formula.eval.OperandResolver; +import org.apache.poi.hssf.record.formula.eval.ValueEval; +import org.apache.poi.hssf.record.formula.functions.LookupUtils.ValueVector; + +/** + * Implementation of Excel function LOOKUP.<p/> + * + * LOOKUP finds an index row in a lookup table by the first column value and returns the value from another column. + * + * <b>Syntax</b>:<br/> + * <b>VLOOKUP</b>(<b>lookup_value</b>, <b>lookup_vector</b>, result_vector)<p/> + * + * <b>lookup_value</b> The value to be found in the lookup vector.<br/> + * <b>lookup_vector</> An area reference for the lookup data. <br/> + * <b>result_vector</b> Single row or single column area reference from which the result value is chosen.<br/> + * + * @author Josh Micich + */ +public final class Lookup implements Function { + private static final class SimpleValueVector implements ValueVector { + private final ValueEval[] _values; + + public SimpleValueVector(ValueEval[] values) { + _values = values; + } + public ValueEval getItem(int index) { + return _values[index]; + } + public int getSize() { + return _values.length; + } + } + + public Eval evaluate(Eval[] args, int srcCellRow, short srcCellCol) { + switch(args.length) { + case 3: + break; + case 2: + // complex rules to choose lookupVector and resultVector from the single area ref + throw new RuntimeException("Two arg version of LOOKUP not supported yet"); + default: + return ErrorEval.VALUE_INVALID; + } + + + try { + ValueEval lookupValue = OperandResolver.getSingleValue(args[0], srcCellRow, srcCellCol); + AreaEval aeLookupVector = LookupUtils.resolveTableArrayArg(args[1]); + AreaEval aeResultVector = LookupUtils.resolveTableArrayArg(args[2]); + + ValueVector lookupVector = createVector(aeLookupVector); + ValueVector resultVector = createVector(aeResultVector); + if(lookupVector.getSize() > resultVector.getSize()) { + // Excel seems to handle this by accessing past the end of the result vector. + throw new RuntimeException("Lookup vector and result vector of differing sizes not supported yet"); + } + int index = LookupUtils.lookupIndexOfValue(lookupValue, lookupVector, true); + + return resultVector.getItem(index); + } catch (EvaluationException e) { + return e.getErrorEval(); + } + } + private static ValueVector createVector(AreaEval ae) { + + if(!ae.isRow() && !ae.isColumn()) { + // extra complexity required to emulate the way LOOKUP can handles these abnormal cases. + throw new RuntimeException("non-vector lookup or result areas not supported yet"); + } + return new SimpleValueVector(ae.getValues()); + } } diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/LookupUtils.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/LookupUtils.java new file mode 100644 index 0000000000..66123b2985 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/LookupUtils.java @@ -0,0 +1,530 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record.formula.functions; + +import org.apache.poi.hssf.record.formula.AreaPtg; +import org.apache.poi.hssf.record.formula.eval.Area2DEval; +import org.apache.poi.hssf.record.formula.eval.AreaEval; +import org.apache.poi.hssf.record.formula.eval.BlankEval; +import org.apache.poi.hssf.record.formula.eval.BoolEval; +import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.record.formula.eval.Eval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; +import org.apache.poi.hssf.record.formula.eval.NumberEval; +import org.apache.poi.hssf.record.formula.eval.NumericValueEval; +import org.apache.poi.hssf.record.formula.eval.OperandResolver; +import org.apache.poi.hssf.record.formula.eval.RefEval; +import org.apache.poi.hssf.record.formula.eval.StringEval; +import org.apache.poi.hssf.record.formula.eval.ValueEval; + +/** + * Common functionality used by VLOOKUP, HLOOKUP, LOOKUP and MATCH + * + * @author Josh Micich + */ +final class LookupUtils { + + /** + * Represents a single row or column within an <tt>AreaEval</tt>. + */ + public interface ValueVector { + ValueEval getItem(int index); + int getSize(); + } + /** + * Enumeration to support <b>4</b> valued comparison results.<p/> + * Excel lookup functions have complex behaviour in the case where the lookup array has mixed + * types, and/or is unordered. Contrary to suggestions in some Excel documentation, there + * does not appear to be a universal ordering across types. The binary search algorithm used + * changes behaviour when the evaluated 'mid' value has a different type to the lookup value.<p/> + * + * A simple int might have done the same job, but there is risk in confusion with the well + * known <tt>Comparable.compareTo()</tt> and <tt>Comparator.compare()</tt> which both use + * a ubiquitous 3 value result encoding. + */ + public static final class CompareResult { + private final boolean _isTypeMismatch; + private final boolean _isLessThan; + private final boolean _isEqual; + private final boolean _isGreaterThan; + + private CompareResult(boolean isTypeMismatch, int simpleCompareResult) { + if(isTypeMismatch) { + _isTypeMismatch = true; + _isLessThan = false; + _isEqual = false; + _isGreaterThan = false; + } else { + _isTypeMismatch = false; + _isLessThan = simpleCompareResult < 0; + _isEqual = simpleCompareResult == 0; + _isGreaterThan = simpleCompareResult > 0; + } + } + public static final CompareResult TYPE_MISMATCH = new CompareResult(true, 0); + public static final CompareResult LESS_THAN = new CompareResult(false, -1); + public static final CompareResult EQUAL = new CompareResult(false, 0); + public static final CompareResult GREATER_THAN = new CompareResult(false, +1); + + public static final CompareResult valueOf(int simpleCompareResult) { + if(simpleCompareResult < 0) { + return LESS_THAN; + } + if(simpleCompareResult > 0) { + return GREATER_THAN; + } + return EQUAL; + } + + public boolean isTypeMismatch() { + return _isTypeMismatch; + } + public boolean isLessThan() { + return _isLessThan; + } + public boolean isEqual() { + return _isEqual; + } + public boolean isGreaterThan() { + return _isGreaterThan; + } + public String toString() { + StringBuffer sb = new StringBuffer(64); + sb.append(getClass().getName()).append(" ["); + sb.append(formatAsString()); + sb.append("]"); + return sb.toString(); + } + + private String formatAsString() { + if(_isTypeMismatch) { + return "TYPE_MISMATCH"; + } + if(_isLessThan) { + return "LESS_THAN"; + } + if(_isEqual) { + return "EQUAL"; + } + if(_isGreaterThan) { + return "GREATER_THAN"; + } + // toString must be reliable + return "??error??"; + } + } + + public interface LookupValueComparer { + /** + * @return one of 4 instances or <tt>CompareResult</tt>: <tt>LESS_THAN</tt>, <tt>EQUAL</tt>, + * <tt>GREATER_THAN</tt> or <tt>TYPE_MISMATCH</tt> + */ + CompareResult compareTo(ValueEval other); + } + + private static abstract class LookupValueComparerBase implements LookupValueComparer { + + private final Class _targetClass; + protected LookupValueComparerBase(ValueEval targetValue) { + if(targetValue == null) { + throw new RuntimeException("targetValue cannot be null"); + } + _targetClass = targetValue.getClass(); + } + public final CompareResult compareTo(ValueEval other) { + if (other == null) { + throw new RuntimeException("compare to value cannot be null"); + } + if (_targetClass != other.getClass()) { + return CompareResult.TYPE_MISMATCH; + } + if (_targetClass == StringEval.class) { + + } + return compareSameType(other); + } + public String toString() { + StringBuffer sb = new StringBuffer(64); + sb.append(getClass().getName()).append(" ["); + sb.append(getValueAsString()); + sb.append("]"); + return sb.toString(); + } + protected abstract CompareResult compareSameType(ValueEval other); + /** used only for debug purposes */ + protected abstract String getValueAsString(); + } + + private static final class StringLookupComparer extends LookupValueComparerBase { + private String _value; + + protected StringLookupComparer(StringEval se) { + super(se); + _value = se.getStringValue(); + } + protected CompareResult compareSameType(ValueEval other) { + StringEval se = (StringEval) other; + return CompareResult.valueOf(_value.compareToIgnoreCase(se.getStringValue())); + } + protected String getValueAsString() { + return _value; + } + } + private static final class NumberLookupComparer extends LookupValueComparerBase { + private double _value; + + protected NumberLookupComparer(NumberEval ne) { + super(ne); + _value = ne.getNumberValue(); + } + protected CompareResult compareSameType(ValueEval other) { + NumberEval ne = (NumberEval) other; + return CompareResult.valueOf(Double.compare(_value, ne.getNumberValue())); + } + protected String getValueAsString() { + return String.valueOf(_value); + } + } + private static final class BooleanLookupComparer extends LookupValueComparerBase { + private boolean _value; + + protected BooleanLookupComparer(BoolEval be) { + super(be); + _value = be.getBooleanValue(); + } + protected CompareResult compareSameType(ValueEval other) { + BoolEval be = (BoolEval) other; + boolean otherVal = be.getBooleanValue(); + if(_value == otherVal) { + return CompareResult.EQUAL; + } + // TRUE > FALSE + if(_value) { + return CompareResult.GREATER_THAN; + } + return CompareResult.LESS_THAN; + } + protected String getValueAsString() { + return String.valueOf(_value); + } + } + + /** + * Processes the third argument to VLOOKUP, or HLOOKUP (<b>col_index_num</b> + * or <b>row_index_num</b> respectively).<br> + * Sample behaviour: + * <table border="0" cellpadding="1" cellspacing="2" summary="Sample behaviour"> + * <tr><th>Input Return</th><th>Value </th><th>Thrown Error</th></tr> + * <tr><td>5</td><td>4</td><td> </td></tr> + * <tr><td>2.9</td><td>2</td><td> </td></tr> + * <tr><td>"5"</td><td>4</td><td> </td></tr> + * <tr><td>"2.18e1"</td><td>21</td><td> </td></tr> + * <tr><td>"-$2"</td><td>-3</td><td>*</td></tr> + * <tr><td>FALSE</td><td>-1</td><td>*</td></tr> + * <tr><td>TRUE</td><td>0</td><td> </td></tr> + * <tr><td>"TRUE"</td><td> </td><td>#REF!</td></tr> + * <tr><td>"abc"</td><td> </td><td>#REF!</td></tr> + * <tr><td>""</td><td> </td><td>#REF!</td></tr> + * <tr><td><blank></td><td> </td><td>#VALUE!</td></tr> + * </table><br/> + * + * * Note - out of range errors (both too high and too low) are handled by the caller. + * @return column or row index as a zero-based value + * + */ + public static int resolveRowOrColIndexArg(ValueEval veRowColIndexArg) throws EvaluationException { + if(veRowColIndexArg == null) { + throw new IllegalArgumentException("argument must not be null"); + } + if(veRowColIndexArg instanceof BlankEval) { + throw EvaluationException.invalidValue(); + } + if(veRowColIndexArg instanceof StringEval) { + StringEval se = (StringEval) veRowColIndexArg; + String strVal = se.getStringValue(); + Double dVal = OperandResolver.parseDouble(strVal); + if(dVal == null) { + // String does not resolve to a number. Raise #VALUE! error. + throw EvaluationException.invalidRef(); + // This includes text booleans "TRUE" and "FALSE". They are not valid. + } + // else - numeric value parses OK + } + // actual BoolEval values get interpreted as FALSE->0 and TRUE->1 + return OperandResolver.coerceValueToInt(veRowColIndexArg) - 1; + } + + + + /** + * The second argument (table_array) should be an area ref, but can actually be a cell ref, in + * which case it is interpreted as a 1x1 area ref. Other scalar values cause #VALUE! error. + */ + public static AreaEval resolveTableArrayArg(Eval eval) throws EvaluationException { + if (eval instanceof AreaEval) { + return (AreaEval) eval; + } + + if(eval instanceof RefEval) { + RefEval refEval = (RefEval) eval; + // Make this cell ref look like a 1x1 area ref. + + // It doesn't matter if eval is a 2D or 3D ref, because that detail is never asked of AreaEval. + // This code only requires the value array item. + // anything would be ok for rowIx and colIx, but may as well get it right. + short rowIx = refEval.getRow(); + short colIx = refEval.getColumn(); + AreaPtg ap = new AreaPtg(rowIx, rowIx, colIx, colIx, false, false, false, false); + ValueEval value = refEval.getInnerValueEval(); + return new Area2DEval(ap, new ValueEval[] { value, }); + } + throw EvaluationException.invalidValue(); + } + + + /** + * Resolves the last (optional) parameter (<b>range_lookup</b>) to the VLOOKUP and HLOOKUP functions. + * @param rangeLookupArg + * @param srcCellRow + * @param srcCellCol + * @return + * @throws EvaluationException + */ + public static boolean resolveRangeLookupArg(Eval rangeLookupArg, int srcCellRow, short srcCellCol) throws EvaluationException { + if(rangeLookupArg == null) { + // range_lookup arg not provided + return true; // default is TRUE + } + ValueEval valEval = OperandResolver.getSingleValue(rangeLookupArg, srcCellRow, srcCellCol); + if(valEval instanceof BlankEval) { + // Tricky: + // fourth arg supplied but evaluates to blank + // this does not get the default value + return false; + } + if(valEval instanceof BoolEval) { + // Happy day flow + BoolEval boolEval = (BoolEval) valEval; + return boolEval.getBooleanValue(); + } + + if (valEval instanceof StringEval) { + String stringValue = ((StringEval) valEval).getStringValue(); + if(stringValue.length() < 1) { + // More trickiness: + // Empty string is not the same as BlankEval. It causes #VALUE! error + throw EvaluationException.invalidValue(); + } + // TODO move parseBoolean to OperandResolver + Boolean b = Countif.parseBoolean(stringValue); + if(b != null) { + // string converted to boolean OK + return b.booleanValue(); + } + // Even more trickiness: + // Note - even if the StringEval represents a number value (for example "1"), + // Excel does not resolve it to a boolean. + throw EvaluationException.invalidValue(); + // This is in contrast to the code below,, where NumberEvals values (for + // example 0.01) *do* resolve to equivalent boolean values. + } + if (valEval instanceof NumericValueEval) { + NumericValueEval nve = (NumericValueEval) valEval; + // zero is FALSE, everything else is TRUE + return 0.0 != nve.getNumberValue(); + } + throw new RuntimeException("Unexpected eval type (" + valEval.getClass().getName() + ")"); + } + + public static int lookupIndexOfValue(ValueEval lookupValue, ValueVector vector, boolean isRangeLookup) throws EvaluationException { + LookupValueComparer lookupComparer = createLookupComparer(lookupValue); + int result; + if(isRangeLookup) { + result = performBinarySearch(vector, lookupComparer); + } else { + result = lookupIndexOfExactValue(lookupComparer, vector); + } + if(result < 0) { + throw new EvaluationException(ErrorEval.NA); + } + return result; + } + + + /** + * Finds first (lowest index) exact occurrence of specified value. + * @param lookupValue the value to be found in column or row vector + * @param vector the values to be searched. For VLOOKUP this is the first column of the + * tableArray. For HLOOKUP this is the first row of the tableArray. + * @return zero based index into the vector, -1 if value cannot be found + */ + private static int lookupIndexOfExactValue(LookupValueComparer lookupComparer, ValueVector vector) { + + // find first occurrence of lookup value + int size = vector.getSize(); + for (int i = 0; i < size; i++) { + if(lookupComparer.compareTo(vector.getItem(i)).isEqual()) { + return i; + } + } + return -1; + } + + + /** + * Encapsulates some standard binary search functionality so the unusual Excel behaviour can + * be clearly distinguished. + */ + private static final class BinarySearchIndexes { + + private int _lowIx; + private int _highIx; + + public BinarySearchIndexes(int highIx) { + _lowIx = -1; + _highIx = highIx; + } + + /** + * @return -1 if the search range is empty + */ + public int getMidIx() { + int ixDiff = _highIx - _lowIx; + if(ixDiff < 2) { + return -1; + } + return _lowIx + (ixDiff / 2); + } + + public int getLowIx() { + return _lowIx; + } + public int getHighIx() { + return _highIx; + } + public void narrowSearch(int midIx, boolean isLessThan) { + if(isLessThan) { + _highIx = midIx; + } else { + _lowIx = midIx; + } + } + } + /** + * Excel has funny behaviour when the some elements in the search vector are the wrong type. + * + */ + private static int performBinarySearch(ValueVector vector, LookupValueComparer lookupComparer) { + // both low and high indexes point to values assumed too low and too high. + BinarySearchIndexes bsi = new BinarySearchIndexes(vector.getSize()); + + while(true) { + int midIx = bsi.getMidIx(); + + if(midIx < 0) { + return bsi.getLowIx(); + } + CompareResult cr = lookupComparer.compareTo(vector.getItem(midIx)); + if(cr.isTypeMismatch()) { + int newMidIx = handleMidValueTypeMismatch(lookupComparer, vector, bsi, midIx); + if(newMidIx < 0) { + continue; + } + midIx = newMidIx; + cr = lookupComparer.compareTo(vector.getItem(midIx)); + } + if(cr.isEqual()) { + return findLastIndexInRunOfEqualValues(lookupComparer, vector, midIx, bsi.getHighIx()); + } + bsi.narrowSearch(midIx, cr.isLessThan()); + } + } + /** + * Excel seems to handle mismatched types initially by just stepping 'mid' ix forward to the + * first compatible value. + * @param midIx 'mid' index (value which has the wrong type) + * @return usually -1, signifying that the BinarySearchIndex has been narrowed to the new mid + * index. Zero or greater signifies that an exact match for the lookup value was found + */ + private static int handleMidValueTypeMismatch(LookupValueComparer lookupComparer, ValueVector vector, + BinarySearchIndexes bsi, int midIx) { + int newMid = midIx; + int highIx = bsi.getHighIx(); + + while(true) { + newMid++; + if(newMid == highIx) { + // every element from midIx to highIx was the wrong type + // move highIx down to the low end of the mid values + bsi.narrowSearch(midIx, true); + return -1; + } + CompareResult cr = lookupComparer.compareTo(vector.getItem(newMid)); + if(cr.isLessThan() && newMid == highIx-1) { + // move highIx down to the low end of the mid values + bsi.narrowSearch(midIx, true); + return -1; + // but only when "newMid == highIx-1"? slightly weird. + // It would seem more efficient to always do this. + } + if(cr.isTypeMismatch()) { + // keep stepping over values until the right type is found + continue; + } + if(cr.isEqual()) { + return newMid; + } + // Note - if moving highIx down (due to lookup<vector[newMid]), + // this execution path only moves highIx it down as far as newMid, not midIx, + // which would be more efficient. + bsi.narrowSearch(newMid, cr.isLessThan()); + return -1; + } + } + /** + * Once the binary search has found a single match, (V/H)LOOKUP steps one by one over subsequent + * values to choose the last matching item. + */ + private static int findLastIndexInRunOfEqualValues(LookupValueComparer lookupComparer, ValueVector vector, + int firstFoundIndex, int maxIx) { + for(int i=firstFoundIndex+1; i<maxIx; i++) { + if(!lookupComparer.compareTo(vector.getItem(i)).isEqual()) { + return i-1; + } + } + return maxIx - 1; + } + + public static LookupValueComparer createLookupComparer(ValueEval lookupValue) throws EvaluationException { + + if (lookupValue instanceof BlankEval) { + // blank eval can never be found in a lookup array + throw new EvaluationException(ErrorEval.NA); + } + if (lookupValue instanceof StringEval) { + return new StringLookupComparer((StringEval) lookupValue); + } + if (lookupValue instanceof NumberEval) { + return new NumberLookupComparer((NumberEval) lookupValue); + } + if (lookupValue instanceof BoolEval) { + return new BooleanLookupComparer((BoolEval) lookupValue); + } + throw new IllegalArgumentException("Bad lookup value type (" + lookupValue.getClass().getName() + ")"); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Match.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Match.java index a3ad49b194..a2a12cdba8 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Match.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Match.java @@ -18,14 +18,17 @@ package org.apache.poi.hssf.record.formula.functions; import org.apache.poi.hssf.record.formula.eval.AreaEval; -import org.apache.poi.hssf.record.formula.eval.BoolEval; import org.apache.poi.hssf.record.formula.eval.ErrorEval; import org.apache.poi.hssf.record.formula.eval.Eval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; import org.apache.poi.hssf.record.formula.eval.NumberEval; import org.apache.poi.hssf.record.formula.eval.NumericValueEval; +import org.apache.poi.hssf.record.formula.eval.OperandResolver; import org.apache.poi.hssf.record.formula.eval.RefEval; import org.apache.poi.hssf.record.formula.eval.StringEval; import org.apache.poi.hssf.record.formula.eval.ValueEval; +import org.apache.poi.hssf.record.formula.functions.LookupUtils.CompareResult; +import org.apache.poi.hssf.record.formula.functions.LookupUtils.LookupValueComparer; /** * Implementation for the MATCH() Excel function.<p/> @@ -62,17 +65,6 @@ import org.apache.poi.hssf.record.formula.eval.ValueEval; */ public final class Match implements Function { - private static final class EvalEx extends Exception { - private final ErrorEval _error; - - public EvalEx(ErrorEval error) { - _error = error; - } - public ErrorEval getError() { - return _error; - } - } - public Eval evaluate(Eval[] args, int srcCellRow, short srcCellCol) { @@ -82,7 +74,7 @@ public final class Match implements Function { case 3: try { match_type = evaluateMatchTypeArg(args[2], srcCellRow, srcCellCol); - } catch (EvalEx e) { + } catch (EvaluationException e) { // Excel/MATCH() seems to have slightly abnormal handling of errors with // the last parameter. Errors do not propagate up. Every error gets // translated into #REF! @@ -100,53 +92,16 @@ public final class Match implements Function { try { - ValueEval lookupValue = evaluateLookupValue(args[0], srcCellRow, srcCellCol); + ValueEval lookupValue = OperandResolver.getSingleValue(args[0], srcCellRow, srcCellCol); ValueEval[] lookupRange = evaluateLookupRange(args[1]); int index = findIndexOfValue(lookupValue, lookupRange, matchExact, findLargestLessThanOrEqual); return new NumberEval(index + 1); // +1 to convert to 1-based - } catch (EvalEx e) { - return e.getError(); - } - } - - private static ValueEval chooseSingleElementFromArea(AreaEval ae, - int srcCellRow, short srcCellCol) throws EvalEx { - if (ae.isColumn()) { - if(ae.isRow()) { - return ae.getValues()[0]; - } - if(!ae.containsRow(srcCellRow)) { - throw new EvalEx(ErrorEval.VALUE_INVALID); - } - return ae.getValueAt(srcCellRow, ae.getFirstColumn()); - } - if(!ae.isRow()) { - throw new EvalEx(ErrorEval.VALUE_INVALID); + } catch (EvaluationException e) { + return e.getErrorEval(); } - if(!ae.containsColumn(srcCellCol)) { - throw new EvalEx(ErrorEval.VALUE_INVALID); - } - return ae.getValueAt(ae.getFirstRow(), srcCellCol); - } - private static ValueEval evaluateLookupValue(Eval eval, int srcCellRow, short srcCellCol) - throws EvalEx { - if (eval instanceof RefEval) { - RefEval re = (RefEval) eval; - return re.getInnerValueEval(); - } - if (eval instanceof AreaEval) { - return chooseSingleElementFromArea((AreaEval) eval, srcCellRow, srcCellCol); - } - if (eval instanceof ValueEval) { - return (ValueEval) eval; - } - throw new RuntimeException("Unexpected eval type (" + eval.getClass().getName() + ")"); - } - - - private static ValueEval[] evaluateLookupRange(Eval eval) throws EvalEx { + private static ValueEval[] evaluateLookupRange(Eval eval) throws EvaluationException { if (eval instanceof RefEval) { RefEval re = (RefEval) eval; return new ValueEval[] { re.getInnerValueEval(), }; @@ -154,55 +109,36 @@ public final class Match implements Function { if (eval instanceof AreaEval) { AreaEval ae = (AreaEval) eval; if(!ae.isColumn() && !ae.isRow()) { - throw new EvalEx(ErrorEval.NA); + throw new EvaluationException(ErrorEval.NA); } return ae.getValues(); } // Error handling for lookup_range arg is also unusual if(eval instanceof NumericValueEval) { - throw new EvalEx(ErrorEval.NA); + throw new EvaluationException(ErrorEval.NA); } if (eval instanceof StringEval) { StringEval se = (StringEval) eval; - Double d = parseDouble(se.getStringValue()); + Double d = OperandResolver.parseDouble(se.getStringValue()); if(d == null) { // plain string - throw new EvalEx(ErrorEval.VALUE_INVALID); + throw new EvaluationException(ErrorEval.VALUE_INVALID); } // else looks like a number - throw new EvalEx(ErrorEval.NA); + throw new EvaluationException(ErrorEval.NA); } throw new RuntimeException("Unexpected eval type (" + eval.getClass().getName() + ")"); } - private static Double parseDouble(String stringValue) { - // TODO find better home for parseDouble - return Countif.parseDouble(stringValue); - } - - private static double evaluateMatchTypeArg(Eval arg, int srcCellRow, short srcCellCol) - throws EvalEx { - Eval match_type = arg; - if(arg instanceof AreaEval) { - AreaEval ae = (AreaEval) arg; - // an area ref can work as a scalar value if it is 1x1 - if(ae.isColumn() && ae.isRow()) { - match_type = ae.getValues()[0]; - } else { - match_type = chooseSingleElementFromArea(ae, srcCellRow, srcCellCol); - } - } - - if(match_type instanceof RefEval) { - RefEval re = (RefEval) match_type; - match_type = re.getInnerValueEval(); - } + throws EvaluationException { + Eval match_type = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol); + if(match_type instanceof ErrorEval) { - throw new EvalEx((ErrorEval)match_type); + throw new EvaluationException((ErrorEval)match_type); } if(match_type instanceof NumericValueEval) { NumericValueEval ne = (NumericValueEval) match_type; @@ -210,12 +146,12 @@ public final class Match implements Function { } if (match_type instanceof StringEval) { StringEval se = (StringEval) match_type; - Double d = parseDouble(se.getStringValue()); + Double d = OperandResolver.parseDouble(se.getStringValue()); if(d == null) { // plain string - throw new EvalEx(ErrorEval.VALUE_INVALID); + throw new EvaluationException(ErrorEval.VALUE_INVALID); } - // if the string parses as a number, it is ok + // if the string parses as a number, it is OK return d.doubleValue(); } throw new RuntimeException("Unexpected match_type type (" + match_type.getClass().getName() + ")"); @@ -225,88 +161,66 @@ public final class Match implements Function { * @return zero based index */ private static int findIndexOfValue(ValueEval lookupValue, ValueEval[] lookupRange, - boolean matchExact, boolean findLargestLessThanOrEqual) throws EvalEx { - // TODO - wildcard matching when matchExact and lookupValue is text containing * or ? + boolean matchExact, boolean findLargestLessThanOrEqual) throws EvaluationException { + + LookupValueComparer lookupComparer = createLookupComparer(lookupValue, matchExact); + if(matchExact) { for (int i = 0; i < lookupRange.length; i++) { - ValueEval lri = lookupRange[i]; - if(lri.getClass() != lookupValue.getClass()) { - continue; - } - if(compareValues(lookupValue, lri) == 0) { + if(lookupComparer.compareTo(lookupRange[i]).isEqual()) { return i; } } - } else { + throw new EvaluationException(ErrorEval.NA); + } + + if(findLargestLessThanOrEqual) { // Note - backward iteration - if(findLargestLessThanOrEqual) { - for (int i = lookupRange.length - 1; i>=0; i--) { - ValueEval lri = lookupRange[i]; - if(lri.getClass() != lookupValue.getClass()) { - continue; - } - int cmp = compareValues(lookupValue, lri); - if(cmp == 0) { - return i; - } - if(cmp > 0) { - return i; - } + for (int i = lookupRange.length - 1; i>=0; i--) { + CompareResult cmp = lookupComparer.compareTo(lookupRange[i]); + if(cmp.isTypeMismatch()) { + continue; + } + if(!cmp.isLessThan()) { + return i; } - } else { - // find smallest greater than or equal to - for (int i = 0; i<lookupRange.length; i++) { - ValueEval lri = lookupRange[i]; - if(lri.getClass() != lookupValue.getClass()) { - continue; - } - int cmp = compareValues(lookupValue, lri); - if(cmp == 0) { - return i; - } - if(cmp > 0) { - if(i<1) { - throw new EvalEx(ErrorEval.NA); - } - return i-1; - } + } + throw new EvaluationException(ErrorEval.NA); + } + + // else - find smallest greater than or equal to + // TODO - is binary search used for (match_type==+1) ? + for (int i = 0; i<lookupRange.length; i++) { + CompareResult cmp = lookupComparer.compareTo(lookupRange[i]); + if(cmp.isEqual()) { + return i; + } + if(cmp.isGreaterThan()) { + if(i<1) { + throw new EvaluationException(ErrorEval.NA); } - + return i-1; } } - throw new EvalEx(ErrorEval.NA); + throw new EvaluationException(ErrorEval.NA); } - - /** - * This method can only compare a pair of <tt>NumericValueEval</tt>s, <tt>StringEval</tt>s - * or <tt>BoolEval</tt>s - * @return negative for a<b, positive for a>b and 0 for a = b - */ - private static int compareValues(ValueEval a, ValueEval b) { - if (a instanceof StringEval) { - StringEval sa = (StringEval) a; - StringEval sb = (StringEval) b; - return sa.getStringValue().compareToIgnoreCase(sb.getStringValue()); - } - if (a instanceof NumericValueEval) { - NumericValueEval na = (NumericValueEval) a; - NumericValueEval nb = (NumericValueEval) b; - return Double.compare(na.getNumberValue(), nb.getNumberValue()); - } - if (a instanceof BoolEval) { - boolean ba = ((BoolEval) a).getBooleanValue(); - boolean bb = ((BoolEval) b).getBooleanValue(); - if(ba == bb) { - return 0; - } - // TRUE > FALSE - if(ba) { - return +1; + private static LookupValueComparer createLookupComparer(ValueEval lookupValue, boolean matchExact) throws EvaluationException { + if (matchExact && lookupValue instanceof StringEval) { + String stringValue = ((StringEval) lookupValue).getStringValue(); + if(isLookupValueWild(stringValue)) { + throw new RuntimeException("Wildcard lookup values '" + stringValue + "' not supported yet"); } - return -1; + + } + return LookupUtils.createLookupComparer(lookupValue); + } + + private static boolean isLookupValueWild(String stringValue) { + if(stringValue.indexOf('?') >=0 || stringValue.indexOf('*') >=0) { + return true; } - throw new RuntimeException("bad eval type (" + a.getClass().getName() + ")"); + return false; } } diff --git a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Vlookup.java b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Vlookup.java index ad8b88daf9..7d27491df1 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Vlookup.java +++ b/src/scratchpad/src/org/apache/poi/hssf/record/formula/functions/Vlookup.java @@ -1,25 +1,123 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -/* - * Created on May 15, 2005 - * - */ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + package org.apache.poi.hssf.record.formula.functions; -public class Vlookup extends NotImplementedFunction { +import org.apache.poi.hssf.record.formula.eval.AreaEval; +import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.record.formula.eval.Eval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; +import org.apache.poi.hssf.record.formula.eval.OperandResolver; +import org.apache.poi.hssf.record.formula.eval.ValueEval; +import org.apache.poi.hssf.record.formula.functions.LookupUtils.ValueVector; +/** + * Implementation of the VLOOKUP() function.<p/> + * + * VLOOKUP finds a row in a lookup table by the first column value and returns the value from another column. + * + * <b>Syntax</b>:<br/> + * <b>VLOOKUP</b>(<b>lookup_value</b>, <b>table_array</b>, <b>col_index_num</b>, range_lookup)<p/> + * + * <b>lookup_value</b> The value to be found in the first column of the table array.<br/> + * <b>table_array</> An area reference for the lookup data. <br/> + * <b>col_index_num</b> a 1 based index specifying which column value of the lookup data will be returned.<br/> + * <b>range_lookup</b> If TRUE (default), VLOOKUP finds the largest value less than or equal to + * the lookup_value. If FALSE, only exact matches will be considered<br/> + * + * @author Josh Micich + */ +public final class Vlookup implements Function { + + private static final class ColumnVector implements ValueVector { + + private final AreaEval _tableArray; + private final int _size; + private final int _columnAbsoluteIndex; + private final int _firstRowAbsoluteIndex; + + public ColumnVector(AreaEval tableArray, int columnIndex) { + _columnAbsoluteIndex = tableArray.getFirstColumn() + columnIndex; + if(!tableArray.containsColumn((short)_columnAbsoluteIndex)) { + int lastColIx = tableArray.getLastColumn() - tableArray.getFirstColumn(); + throw new IllegalArgumentException("Specified column index (" + columnIndex + + ") is outside the allowed range (0.." + lastColIx + ")"); + } + _tableArray = tableArray; + _size = tableArray.getLastRow() - tableArray.getFirstRow() + 1; + if(_size < 1) { + throw new RuntimeException("bad table array size zero"); + } + _firstRowAbsoluteIndex = tableArray.getFirstRow(); + } + + public ValueEval getItem(int index) { + if(index>_size) { + throw new ArrayIndexOutOfBoundsException("Specified index (" + index + + ") is outside the allowed range (0.." + (_size-1) + ")"); + } + return _tableArray.getValueAt(_firstRowAbsoluteIndex + index, (short)_columnAbsoluteIndex); + } + public int getSize() { + return _size; + } + } + + public Eval evaluate(Eval[] args, int srcCellRow, short srcCellCol) { + Eval arg3 = null; + switch(args.length) { + case 4: + arg3 = args[3]; // important: assumed array element is never null + case 3: + break; + default: + // wrong number of arguments + return ErrorEval.VALUE_INVALID; + } + try { + // Evaluation order: + // arg0 lookup_value, arg1 table_array, arg3 range_lookup, find lookup value, arg2 col_index, fetch result + ValueEval lookupValue = OperandResolver.getSingleValue(args[0], srcCellRow, srcCellCol); + AreaEval tableArray = LookupUtils.resolveTableArrayArg(args[1]); + boolean isRangeLookup = LookupUtils.resolveRangeLookupArg(arg3, srcCellRow, srcCellCol); + int rowIndex = LookupUtils.lookupIndexOfValue(lookupValue, new ColumnVector(tableArray, 0), isRangeLookup); + ValueEval veColIndex = OperandResolver.getSingleValue(args[2], srcCellRow, srcCellCol); + int colIndex = LookupUtils.resolveRowOrColIndexArg(veColIndex); + ValueVector resultCol = createResultColumnVector(tableArray, colIndex); + return resultCol.getItem(rowIndex); + } catch (EvaluationException e) { + return e.getErrorEval(); + } + } + + /** + * Returns one column from an <tt>AreaEval</tt> + * + * @throws EvaluationException (#VALUE!) if colIndex is negative, (#REF!) if colIndex is too high + */ + private ValueVector createResultColumnVector(AreaEval tableArray, int colIndex) throws EvaluationException { + if(colIndex < 0) { + throw EvaluationException.invalidValue(); + } + int nCols = tableArray.getLastColumn() - tableArray.getFirstColumn() + 1; + + if(colIndex >= nCols) { + throw EvaluationException.invalidRef(); + } + return new ColumnVector(tableArray, colIndex); + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hssf/record/formula/functions/TestLookupFunctionsFromSpreadsheet.java b/src/scratchpad/testcases/org/apache/poi/hssf/record/formula/functions/TestLookupFunctionsFromSpreadsheet.java new file mode 100644 index 0000000000..071ca0f7d8 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hssf/record/formula/functions/TestLookupFunctionsFromSpreadsheet.java @@ -0,0 +1,385 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + + +package org.apache.poi.hssf.record.formula.functions; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.PrintStream; + +import junit.framework.Assert; +import junit.framework.AssertionFailedError; +import junit.framework.TestCase; + +import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator; +import org.apache.poi.hssf.usermodel.HSSFRow; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator.CellValue; +import org.apache.poi.hssf.util.CellReference; + +/** + * Tests lookup functions (VLOOKUP, HLOOKUP, LOOKUP, MATCH) as loaded from a test data spreadsheet.<p/> + * These tests have been separated from the common function and operator tests because the lookup + * functions have more complex test cases and test data setup. + * + * Tests for bug fixes and specific/tricky behaviour can be found in the corresponding test class + * (<tt>TestXxxx</tt>) of the target (<tt>Xxxx</tt>) implementor, where execution can be observed + * more easily. + * + * @author Josh Micich + */ +public final class TestLookupFunctionsFromSpreadsheet extends TestCase { + + private static final class Result { + public static final int SOME_EVALUATIONS_FAILED = -1; + public static final int ALL_EVALUATIONS_SUCCEEDED = +1; + public static final int NO_EVALUATIONS_FOUND = 0; + } + + /** + * This class defines constants for navigating around the test data spreadsheet used for these tests. + */ + private static final class SS { + + /** Name of the test spreadsheet (found in the standard test data folder) */ + public final static String FILENAME = "LookupFunctionsTestCaseData.xls"; + + /** Name of the first sheet in the spreadsheet (contains comments) */ + public final static String README_SHEET_NAME = "Read Me"; + + + /** Row (zero-based) in each sheet where the evaluation cases start. */ + public static final int START_TEST_CASES_ROW_INDEX = 4; // Row '5' + /** Index of the column that contains the function names */ + public static final short COLUMN_INDEX_MARKER = 0; // Column 'A' + public static final short COLUMN_INDEX_EVALUATION = 1; // Column 'B' + public static final short COLUMN_INDEX_EXPECTED_RESULT = 2; // Column 'C' + public static final short COLUMN_ROW_COMMENT = 3; // Column 'D' + + /** Used to indicate when there are no more test cases on the current sheet */ + public static final String TEST_CASES_END_MARKER = "<end>"; + /** Used to indicate that the test on the current row should be ignored */ + public static final String SKIP_CURRENT_TEST_CASE_MARKER = "<skip>"; + + } + + // Note - multiple failures are aggregated before ending. + // If one or more functions fail, a single AssertionFailedError is thrown at the end + private int _sheetFailureCount; + private int _sheetSuccessCount; + private int _evaluationFailureCount; + private int _evaluationSuccessCount; + + + + private static void confirmExpectedResult(String msg, HSSFCell expected, HSSFFormulaEvaluator.CellValue actual) { + if (expected == null) { + throw new AssertionFailedError(msg + " - Bad setup data expected value is null"); + } + if(actual == null) { + throw new AssertionFailedError(msg + " - actual value was null"); + } + if(expected.getCellType() == HSSFCell.CELL_TYPE_ERROR) { + confirmErrorResult(msg, expected.getErrorCellValue(), actual); + return; + } + if(actual.getCellType() == HSSFCell.CELL_TYPE_ERROR) { + throw unexpectedError(msg, expected, actual.getErrorValue()); + } + if(actual.getCellType() != expected.getCellType()) { + throw wrongTypeError(msg, expected, actual); + } + + + switch (expected.getCellType()) { + case HSSFCell.CELL_TYPE_BOOLEAN: + assertEquals(msg, expected.getBooleanCellValue(), actual.getBooleanValue()); + break; + case HSSFCell.CELL_TYPE_FORMULA: // will never be used, since we will call method after formula evaluation + throw new AssertionFailedError("Cannot expect formula as result of formula evaluation: " + msg); + case HSSFCell.CELL_TYPE_NUMERIC: + assertEquals(expected.getNumericCellValue(), actual.getNumberValue(), 0.0); + break; + case HSSFCell.CELL_TYPE_STRING: + assertEquals(msg, expected.getRichStringCellValue().getString(), actual.getRichTextStringValue().getString()); + break; + } + } + + + private static AssertionFailedError wrongTypeError(String msgPrefix, HSSFCell expectedCell, CellValue actualValue) { + return new AssertionFailedError(msgPrefix + " Result type mismatch. Evaluated result was " + + formatValue(actualValue) + + " but the expected result was " + + formatValue(expectedCell) + ); + } + private static AssertionFailedError unexpectedError(String msgPrefix, HSSFCell expected, int actualErrorCode) { + return new AssertionFailedError(msgPrefix + " Error code (" + + ErrorEval.getText(actualErrorCode) + + ") was evaluated, but the expected result was " + + formatValue(expected) + ); + } + + + private static void confirmErrorResult(String msgPrefix, int expectedErrorCode, CellValue actual) { + if(actual.getCellType() != HSSFCell.CELL_TYPE_ERROR) { + throw new AssertionFailedError(msgPrefix + " Expected cell error (" + + ErrorEval.getText(expectedErrorCode) + ") but actual value was " + + formatValue(actual)); + } + if(expectedErrorCode != actual.getErrorValue()) { + throw new AssertionFailedError(msgPrefix + " Expected cell error code (" + + ErrorEval.getText(expectedErrorCode) + + ") but actual error code was (" + + ErrorEval.getText(actual.getErrorValue()) + + ")"); + } + } + + + private static String formatValue(HSSFCell expecedCell) { + switch (expecedCell.getCellType()) { + case HSSFCell.CELL_TYPE_BLANK: return "<blank>"; + case HSSFCell.CELL_TYPE_BOOLEAN: return String.valueOf(expecedCell.getBooleanCellValue()); + case HSSFCell.CELL_TYPE_NUMERIC: return String.valueOf(expecedCell.getNumericCellValue()); + case HSSFCell.CELL_TYPE_STRING: return expecedCell.getRichStringCellValue().getString(); + } + throw new RuntimeException("Unexpected cell type of expected value (" + expecedCell.getCellType() + ")"); + } + private static String formatValue(CellValue actual) { + switch (actual.getCellType()) { + case HSSFCell.CELL_TYPE_BLANK: return "<blank>"; + case HSSFCell.CELL_TYPE_BOOLEAN: return String.valueOf(actual.getBooleanValue()); + case HSSFCell.CELL_TYPE_NUMERIC: return String.valueOf(actual.getNumberValue()); + case HSSFCell.CELL_TYPE_STRING: return actual.getRichTextStringValue().getString(); + } + throw new RuntimeException("Unexpected cell type of evaluated value (" + actual.getCellType() + ")"); + } + + + protected void setUp() throws Exception { + _sheetFailureCount = 0; + _sheetSuccessCount = 0; + _evaluationFailureCount = 0; + _evaluationSuccessCount = 0; + } + + public void testFunctionsFromTestSpreadsheet() { + String filePath = System.getProperty("HSSF.testdata.path")+ "/" + SS.FILENAME; + HSSFWorkbook workbook; + try { + FileInputStream fin = new FileInputStream( filePath ); + workbook = new HSSFWorkbook( fin ); + } catch (IOException e) { + throw new RuntimeException(e); + } + + confirmReadMeSheet(workbook); + int nSheets = workbook.getNumberOfSheets(); + for(int i=1; i< nSheets; i++) { + int sheetResult = processTestSheet(workbook, i, workbook.getSheetName(i)); + switch(sheetResult) { + case Result.ALL_EVALUATIONS_SUCCEEDED: _sheetSuccessCount ++; break; + case Result.SOME_EVALUATIONS_FAILED: _sheetFailureCount ++; break; + } + } + + // confirm results + String successMsg = "There were " + + _sheetSuccessCount + " successful sheets(s) and " + + _evaluationSuccessCount + " function(s) without error"; + if(_sheetFailureCount > 0) { + String msg = _sheetFailureCount + " sheets(s) failed with " + + _evaluationFailureCount + " evaluation(s). " + successMsg; + throw new AssertionFailedError(msg); + } + if(false) { // normally no output for successful tests + System.out.println(getClass().getName() + ": " + successMsg); + } + } + + private int processTestSheet(HSSFWorkbook workbook, int sheetIndex, String sheetName) { + HSSFSheet sheet = workbook.getSheetAt(sheetIndex); + HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, workbook); + int maxRows = sheet.getLastRowNum()+1; + int result = Result.NO_EVALUATIONS_FOUND; // so far + + String currentGroupComment = null; + for(int rowIndex=SS.START_TEST_CASES_ROW_INDEX; rowIndex<maxRows; rowIndex++) { + HSSFRow r = sheet.getRow(rowIndex); + String newMarkerValue = getMarkerColumnValue(r); + if(r == null) { + continue; + } + if(SS.TEST_CASES_END_MARKER.equalsIgnoreCase(newMarkerValue)) { + // normal exit point + return result; + } + if(SS.SKIP_CURRENT_TEST_CASE_MARKER.equalsIgnoreCase(newMarkerValue)) { + // currently disabled test case row + continue; + } + if(newMarkerValue != null) { + currentGroupComment = newMarkerValue; + } + HSSFCell c = r.getCell(SS.COLUMN_INDEX_EVALUATION); + if (c == null || c.getCellType() != HSSFCell.CELL_TYPE_FORMULA) { + continue; + } + evaluator.setCurrentRow(r); + CellValue actualValue = evaluator.evaluate(c); + HSSFCell expectedValueCell = r.getCell(SS.COLUMN_INDEX_EXPECTED_RESULT); + String rowComment = getRowCommentColumnValue(r); + + String msgPrefix = formatTestCaseDetails(sheetName, r.getRowNum(), c, currentGroupComment, rowComment); + try { + confirmExpectedResult(msgPrefix, expectedValueCell, actualValue); + _evaluationSuccessCount ++; + if(result != Result.SOME_EVALUATIONS_FAILED) { + result = Result.ALL_EVALUATIONS_SUCCEEDED; + } + } catch (RuntimeException e) { + _evaluationFailureCount ++; + printShortStackTrace(System.err, e); + result = Result.SOME_EVALUATIONS_FAILED; + } catch (AssertionFailedError e) { + _evaluationFailureCount ++; + printShortStackTrace(System.err, e); + result = Result.SOME_EVALUATIONS_FAILED; + } + + } + throw new RuntimeException("Missing end marker '" + SS.TEST_CASES_END_MARKER + + "' on sheet '" + sheetName + "'"); + + } + + + private static String formatTestCaseDetails(String sheetName, int rowNum, HSSFCell c, String currentGroupComment, + String rowComment) { + + StringBuffer sb = new StringBuffer(); + CellReference cr = new CellReference(sheetName, rowNum, c.getCellNum(), false, false); + sb.append(cr.formatAsString()); + sb.append(" {=").append(c.getCellFormula()).append("}"); + + if(currentGroupComment != null) { + sb.append(" '"); + sb.append(currentGroupComment); + if(rowComment != null) { + sb.append(" - "); + sb.append(rowComment); + } + sb.append("' "); + } else { + if(rowComment != null) { + sb.append(" '"); + sb.append(rowComment); + sb.append("' "); + } + } + + return sb.toString(); + } + + /** + * Asserts that the 'read me' comment page exists, and has this class' name in one of the + * cells. This back-link is to make it easy to find this class if a reader encounters the + * spreadsheet first. + */ + private void confirmReadMeSheet(HSSFWorkbook workbook) { + String firstSheetName = workbook.getSheetName(0); + if(!firstSheetName.equalsIgnoreCase(SS.README_SHEET_NAME)) { + throw new RuntimeException("First sheet's name was '" + firstSheetName + "' but expected '" + SS.README_SHEET_NAME + "'"); + } + HSSFSheet sheet = workbook.getSheetAt(0); + String specifiedClassName = sheet.getRow(2).getCell((short)0).getRichStringCellValue().getString(); + assertEquals("Test class name in spreadsheet comment", getClass().getName(), specifiedClassName); + + } + + + /** + * Useful to keep output concise when expecting many failures to be reported by this test case + */ + private static void printShortStackTrace(PrintStream ps, Throwable e) { + StackTraceElement[] stes = e.getStackTrace(); + + int startIx = 0; + // skip any top frames inside junit.framework.Assert + while(startIx<stes.length) { + if(!stes[startIx].getClassName().equals(Assert.class.getName())) { + break; + } + startIx++; + } + // skip bottom frames (part of junit framework) + int endIx = startIx+1; + while(endIx < stes.length) { + if(stes[endIx].getClassName().equals(TestCase.class.getName())) { + break; + } + endIx++; + } + if(startIx >= endIx) { + // something went wrong. just print the whole stack trace + e.printStackTrace(ps); + } + endIx -= 4; // skip 4 frames of reflection invocation + ps.println(e.toString()); + for(int i=startIx; i<endIx; i++) { + ps.println("\tat " + stes[i].toString()); + } + + } + + private static String getRowCommentColumnValue(HSSFRow r) { + return getCellTextValue(r, SS.COLUMN_ROW_COMMENT, "row comment"); + } + + private static String getMarkerColumnValue(HSSFRow r) { + return getCellTextValue(r, SS.COLUMN_INDEX_MARKER, "marker"); + } + + /** + * @return <code>null</code> if cell is missing, empty or blank + */ + private static String getCellTextValue(HSSFRow r, int colIndex, String columnName) { + if(r == null) { + return null; + } + HSSFCell cell = r.getCell((short) colIndex); + if(cell == null) { + return null; + } + if(cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) { + return null; + } + if(cell.getCellType() == HSSFCell.CELL_TYPE_STRING) { + return cell.getRichStringCellValue().getString(); + } + + throw new RuntimeException("Bad cell type for '" + columnName + "' column: (" + + cell.getCellType() + ") row (" + (r.getRowNum() +1) + ")"); + } +} |