From 5e0d5e9510084a7a73ffe73010e6b524d0dbb02e Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Mon, 6 Apr 2009 08:22:25 +0000 Subject: [PATCH] Bug 46951 - fixed formula parser to better handle range operators and whole row/column refs. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@762250 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../poi/hssf/record/formula/Area3DPtg.java | 2 +- .../poi/hssf/record/formula/MemAreaPtg.java | 4 + .../apache/poi/ss/formula/FormulaParser.java | 2547 ++++++++++------- .../ss/formula/OperandClassTransformer.java | 2 + .../org/apache/poi/ss/formula/ParseNode.java | 6 +- .../org/apache/poi/ss/util/AreaReference.java | 10 +- .../org/apache/poi/ss/util/CellReference.java | 2 +- .../poi/hssf/model/TestFormulaParser.java | 260 +- 10 files changed, 1692 insertions(+), 1143 deletions(-) diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index c9f1eab99c..58f29dd78c 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 46951 - Fixed formula parser to better handle range operators and whole row/column refs. 46948 - Fixed evaluation of range operator to allow for area-ref operands 46918 - Fixed ExtendedPivotTableViewFieldsRecord(SXVDEX) to allow shorter format 46898 - Fixed formula evaluator to not cache intermediate circular-reference error results diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 65923e12c2..45e4518452 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 46951 - Fixed formula parser to better handle range operators and whole row/column refs. 46948 - Fixed evaluation of range operator to allow for area-ref operands 46918 - Fixed ExtendedPivotTableViewFieldsRecord(SXVDEX) to allow shorter format 46898 - Fixed formula evaluator to not cache intermediate circular-reference error results diff --git a/src/java/org/apache/poi/hssf/record/formula/Area3DPtg.java b/src/java/org/apache/poi/hssf/record/formula/Area3DPtg.java index 26aa154e75..aec3e03503 100644 --- a/src/java/org/apache/poi/hssf/record/formula/Area3DPtg.java +++ b/src/java/org/apache/poi/hssf/record/formula/Area3DPtg.java @@ -17,7 +17,7 @@ package org.apache.poi.hssf.record.formula; -import org.apache.poi.hssf.util.AreaReference; +import org.apache.poi.ss.util.AreaReference; import org.apache.poi.ss.formula.ExternSheetReferenceToken; import org.apache.poi.ss.formula.FormulaRenderingWorkbook; import org.apache.poi.ss.formula.WorkbookDependentFormula; diff --git a/src/java/org/apache/poi/hssf/record/formula/MemAreaPtg.java b/src/java/org/apache/poi/hssf/record/formula/MemAreaPtg.java index d0b20e2c12..ee771207c7 100644 --- a/src/java/org/apache/poi/hssf/record/formula/MemAreaPtg.java +++ b/src/java/org/apache/poi/hssf/record/formula/MemAreaPtg.java @@ -41,6 +41,10 @@ public final class MemAreaPtg extends OperandPtg { field_2_subex_len = in.readShort(); } + public int getLenRefSubexpression() { + return field_2_subex_len; + } + public void write(LittleEndianOutput out) { out.writeByte(sid + getPtgClass()); out.writeInt(field_1_reserved); diff --git a/src/java/org/apache/poi/ss/formula/FormulaParser.java b/src/java/org/apache/poi/ss/formula/FormulaParser.java index 98c93e0e5c..8311a17c44 100644 --- a/src/java/org/apache/poi/ss/formula/FormulaParser.java +++ b/src/java/org/apache/poi/ss/formula/FormulaParser.java @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -19,57 +19,23 @@ package org.apache.poi.ss.formula; import java.util.ArrayList; import java.util.List; +import java.util.regex.Pattern; import org.apache.poi.hssf.record.UnicodeString; import org.apache.poi.hssf.record.constant.ErrorConstant; -import org.apache.poi.hssf.record.formula.AbstractFunctionPtg; -import org.apache.poi.hssf.record.formula.AddPtg; -import org.apache.poi.hssf.record.formula.Area3DPtg; -import org.apache.poi.hssf.record.formula.AreaPtg; -import org.apache.poi.hssf.record.formula.ArrayPtg; -import org.apache.poi.hssf.record.formula.AttrPtg; -import org.apache.poi.hssf.record.formula.BoolPtg; -import org.apache.poi.hssf.record.formula.ConcatPtg; -import org.apache.poi.hssf.record.formula.DividePtg; -import org.apache.poi.hssf.record.formula.EqualPtg; -import org.apache.poi.hssf.record.formula.ErrPtg; -import org.apache.poi.hssf.record.formula.FuncPtg; -import org.apache.poi.hssf.record.formula.FuncVarPtg; -import org.apache.poi.hssf.record.formula.GreaterEqualPtg; -import org.apache.poi.hssf.record.formula.GreaterThanPtg; -import org.apache.poi.hssf.record.formula.IntPtg; -import org.apache.poi.hssf.record.formula.LessEqualPtg; -import org.apache.poi.hssf.record.formula.LessThanPtg; -import org.apache.poi.hssf.record.formula.MemFuncPtg; -import org.apache.poi.hssf.record.formula.MissingArgPtg; -import org.apache.poi.hssf.record.formula.MultiplyPtg; -import org.apache.poi.hssf.record.formula.NamePtg; -import org.apache.poi.hssf.record.formula.NameXPtg; -import org.apache.poi.hssf.record.formula.NotEqualPtg; -import org.apache.poi.hssf.record.formula.NumberPtg; -import org.apache.poi.hssf.record.formula.ParenthesisPtg; -import org.apache.poi.hssf.record.formula.PercentPtg; -import org.apache.poi.hssf.record.formula.PowerPtg; -import org.apache.poi.hssf.record.formula.Ptg; -import org.apache.poi.hssf.record.formula.RangePtg; -import org.apache.poi.hssf.record.formula.Ref3DPtg; -import org.apache.poi.hssf.record.formula.RefPtg; -import org.apache.poi.hssf.record.formula.StringPtg; -import org.apache.poi.hssf.record.formula.SubtractPtg; -import org.apache.poi.hssf.record.formula.UnaryMinusPtg; -import org.apache.poi.hssf.record.formula.UnaryPlusPtg; -import org.apache.poi.hssf.record.formula.UnionPtg; +import org.apache.poi.hssf.record.formula.*; +import org.apache.poi.hssf.record.formula.ValueOperatorPtg; import org.apache.poi.hssf.record.formula.function.FunctionMetadata; import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry; import org.apache.poi.hssf.usermodel.HSSFErrorConstants; -import org.apache.poi.hssf.util.AreaReference; -import org.apache.poi.hssf.util.CellReference; +import org.apache.poi.ss.util.AreaReference; +import org.apache.poi.ss.util.CellReference; import org.apache.poi.ss.util.CellReference.NameType; /** * This class parses a formula string into a List of tokens in RPN order. * Inspired by - * Lets Build a Compiler, by Jack Crenshaw + * Lets Build a Compiler, by Jack Crenshaw * BNF for the formula expression is : * ::= [ ]* * ::= [ ]* @@ -85,1025 +51,1440 @@ import org.apache.poi.ss.util.CellReference.NameType; * @author Josh Micich */ public final class FormulaParser { - private static final class Identifier { - private final String _name; - private final boolean _isQuoted; - - public Identifier(String name, boolean isQuoted) { - _name = name; - _isQuoted = isQuoted; - } - public String getName() { - return _name; - } - public boolean isQuoted() { - return _isQuoted; - } - public String toString() { - StringBuffer sb = new StringBuffer(64); - sb.append(getClass().getName()); - sb.append(" ["); - if (_isQuoted) { - sb.append("'").append(_name).append("'"); - } else { - sb.append(_name); - } - sb.append("]"); - return sb.toString(); - } - } - - /** - * Specific exception thrown when a supplied formula does not parse properly.
- * Primarily used by test cases when testing for specific parsing exceptions.

- * - */ - static final class FormulaParseException extends RuntimeException { - // This class was given package scope until it would become clear that it is useful to - // general client code. - public FormulaParseException(String msg) { - super(msg); - } - } - - - private final String _formulaString; - private final int _formulaLength; - private int _pointer; - - private ParseNode _rootNode; - - private static char TAB = '\t'; - - /** - * Lookahead Character. - * gets value '\0' when the input string is exhausted - */ - private char look; - - private FormulaParsingWorkbook _book; - - private int _sheetIndex; - - - /** - * Create the formula parser, with the string that is to be - * parsed against the supplied workbook. - * A later call the parse() method to return ptg list in - * rpn order, then call the getRPNPtg() to retrieve the - * parse results. - * This class is recommended only for single threaded use. - * - * If you only have a usermodel.HSSFWorkbook, and not a - * model.Workbook, then use the convenience method on - * usermodel.HSSFFormulaEvaluator - */ - private FormulaParser(String formula, FormulaParsingWorkbook book, int sheetIndex){ - _formulaString = formula; - _pointer=0; - _book = book; - _formulaLength = _formulaString.length(); - _sheetIndex = sheetIndex; - } - - public static Ptg[] parse(String formula, FormulaParsingWorkbook book) { - return parse(formula, book, FormulaType.CELL); - } - - public static Ptg[] parse(String formula, FormulaParsingWorkbook workbook, int formulaType) { - return parse(formula, workbook, formulaType, -1); - } - - /** - * Parse a formula into a array of tokens - * - * @param formula the formula to parse - * @param workbook the parent workbook - * @param formulaType the type of the formula, see {@link FormulaType} - * @param sheetIndex the 0-based index of the sheet this formula belongs to. - * The sheet index is required to resolve sheet-level names. -1 means that - * the scope of the name will be ignored and the parser will match names only by name - * - * @return array of parsed tokens - * @throws FormulaParseException if the formula is unparsable - */ - public static Ptg[] parse(String formula, FormulaParsingWorkbook workbook, int formulaType, int sheetIndex) { - FormulaParser fp = new FormulaParser(formula, workbook, sheetIndex); - fp.parse(); - return fp.getRPNPtg(formulaType); - } - - /** Read New Character From Input Stream */ - private void GetChar() { - // Check to see if we've walked off the end of the string. - if (_pointer > _formulaLength) { - throw new RuntimeException("too far"); - } - if (_pointer < _formulaLength) { - look=_formulaString.charAt(_pointer); - } else { - // Just return if so and reset 'look' to something to keep - // SkipWhitespace from spinning - look = (char)0; - } - _pointer++; - //System.out.println("Got char: "+ look); - } - - /** Report What Was Expected */ - private RuntimeException expected(String s) { - String msg; - - if (look == '=' && _formulaString.substring(0, _pointer-1).trim().length() < 1) { - msg = "The specified formula '" + _formulaString - + "' starts with an equals sign which is not allowed."; - } else { - msg = "Parse error near char " + (_pointer-1) + " '" + look + "'" - + " in specified formula '" + _formulaString + "'. Expected " - + s; - } - return new FormulaParseException(msg); - } - - /** Recognize an Alpha Character */ - private static boolean IsAlpha(char c) { - return Character.isLetter(c) || c == '$' || c=='_'; - } - - /** Recognize a Decimal Digit */ - private static boolean IsDigit(char c) { - return Character.isDigit(c); - } - - /** Recognize an Alphanumeric */ - private static boolean IsAlNum(char c) { - return IsAlpha(c) || IsDigit(c); - } - - /** Recognize White Space */ - private static boolean IsWhite( char c) { - return c ==' ' || c== TAB; - } - - /** Skip Over Leading White Space */ - private void SkipWhite() { - while (IsWhite(look)) { - GetChar(); - } - } - - /** - * Consumes the next input character if it is equal to the one specified otherwise throws an - * unchecked exception. This method does not consume whitespace (before or after the - * matched character). - */ - private void Match(char x) { - if (look != x) { - throw expected("'" + x + "'"); - } - GetChar(); - } - private String parseUnquotedIdentifier() { - Identifier iden = parseIdentifier(); - if (iden.isQuoted()) { - throw expected("unquoted identifier"); - } - return iden.getName(); - } - /** - * Parses a sheet name, named range name, or simple cell reference.
- * Note - identifiers in Excel can contain dots, so this method may return a String - * which may need to be converted to an area reference. For example, this method - * may return a value like "A1..B2", in which case the caller must convert it to - * an area reference like "A1:B2" - */ - private Identifier parseIdentifier() { - StringBuffer sb = new StringBuffer(); - if (!IsAlpha(look) && look != '\'' && look != '[') { - throw expected("Name"); - } - boolean isQuoted = look == '\''; - if(isQuoted) { - Match('\''); - boolean done = look == '\''; - while(!done) { - sb.append(look); - GetChar(); - if(look == '\'') - { - Match('\''); - done = look != '\''; - } - } - } else { - // allow for any sequence of dots and identifier chars - // special case of two consecutive dots is best treated in the calling code - while (IsAlNum(look) || look == '.' || look == '[' || look == ']' || look == '\\') { - sb.append(look); - GetChar(); - } - } - return new Identifier(sb.toString(), isQuoted); - } - - /** Get a Number */ - private String GetNum() { - StringBuffer value = new StringBuffer(); - - while (IsDigit(this.look)){ - value.append(this.look); - GetChar(); - } - return value.length() == 0 ? null : value.toString(); - } - - private ParseNode parseFunctionReferenceOrName() { - Identifier iden = parseIdentifier(); - if (look == '('){ - //This is a function - return function(iden.getName()); - } - if (!iden.isQuoted()) { - String name = iden.getName(); - if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) { - return new ParseNode(new BoolPtg(name.toUpperCase())); - } - } - return parseRangeExpression(iden); - } - - private ParseNode parseRangeExpression(Identifier iden) { - Ptg ptgA = parseNameOrCellRef(iden); - if (look == ':') { - GetChar(); - Identifier iden2 = parseIdentifier(); - Ptg ptgB = parseNameOrCellRef(iden2); - Ptg simplified = reduceRangeExpression(ptgA, ptgB); - - if (simplified == null) { - ParseNode[] children = { - new ParseNode(ptgA), - new ParseNode(ptgB), - }; - ParseNode result = new ParseNode(RangePtg.instance, children); - MemFuncPtg memFuncPtg = new MemFuncPtg(result.getEncodedSize()); - return new ParseNode(memFuncPtg, result); - } - return new ParseNode(simplified); - } - return new ParseNode(ptgA); - } - - /** - * - * "A1", "B3" -> "A1:B3" - * "sheet1!A1", "B3" -> "sheet1!A1:B3" - * - * @return null if the range expression cannot / shouldn't be reduced. - */ - private static Ptg reduceRangeExpression(Ptg ptgA, Ptg ptgB) { - if (!(ptgB instanceof RefPtg)) { - // only when second ref is simple 2-D ref can the range - // expression be converted to an area ref - return null; - } - RefPtg refB = (RefPtg) ptgB; - - if (ptgA instanceof RefPtg) { - RefPtg refA = (RefPtg) ptgA; - return new AreaPtg(refA.getRow(), refB.getRow(), refA.getColumn(), refB.getColumn(), - refA.isRowRelative(), refB.isRowRelative(), refA.isColRelative(), refB.isColRelative()); - } - if (ptgA instanceof Ref3DPtg) { - Ref3DPtg refA = (Ref3DPtg) ptgA; - return new Area3DPtg(refA.getRow(), refB.getRow(), refA.getColumn(), refB.getColumn(), - refA.isRowRelative(), refB.isRowRelative(), refA.isColRelative(), refB.isColRelative(), - refA.getExternSheetIndex()); - } - // Note - other operand types (like AreaPtg) which probably can't evaluate - // do not cause validation errors at parse time - return null; - } - - private Ptg parseNameOrCellRef(Identifier iden) { - - if (look == '!') { - GetChar(); - // 3-D ref - // this code assumes iden is a sheetName - // TODO - handle ! - int externIdx = getExternalSheetIndex(iden.getName()); - String secondIden = parseUnquotedIdentifier(); - if (isRowOrCol(secondIden) && look == ':') { - GetChar(); - String thirdIden = parseUnquotedIdentifier(); - return new Area3DPtg(secondIden + ":" + thirdIden, externIdx); - } - AreaReference areaRef = parseArea(secondIden); - if (areaRef == null) { - return new Ref3DPtg(secondIden, externIdx); - } - // will happen if dots are used instead of colon - return new Area3DPtg(areaRef.formatAsString(), externIdx); - } - - String name = iden.getName(); - AreaReference areaRef = parseArea(name); - if (areaRef != null) { - // will happen if dots are used instead of colon - return new AreaPtg(areaRef.formatAsString()); - } - // This can be either a cell ref or a named range - - - int nameType = CellReference.classifyCellReference(name); - if (nameType == NameType.CELL) { - return new RefPtg(name); - } - if (look == ':') { - if (nameType == NameType.COLUMN) { - GetChar(); - String secondIden = parseUnquotedIdentifier(); - if (CellReference.classifyCellReference(secondIden) != NameType.COLUMN) { - throw new FormulaParseException("Expected full column after '" + name - + ":' but got '" + secondIden + "'"); - } - return new AreaPtg(name + ":" + secondIden); - } - } - if (nameType != NameType.NAMED_RANGE) { - new FormulaParseException("Name '" + name - + "' does not look like a cell reference or named range"); - } - EvaluationName evalName = _book.getName(name, _sheetIndex); - if (evalName == null) { - throw new FormulaParseException("Specified named range '" - + name + "' does not exist in the current workbook."); - } - if (evalName.isRange()) { - return evalName.createPtg(); - } - throw new FormulaParseException("Specified name '" - + name + "' is not a range as expected"); - } - - private static boolean isRowOrCol(String str) { - int i=0; - if (str.charAt(i) == '$') { - i++; - } - if (IsDigit(str.charAt(i))) { - while (inull if name cannot be split at a dot - */ - private AreaReference parseArea(String name) { - int dotPos = name.indexOf('.'); - if (dotPos < 0) { - return null; - } - int dotCount = 1; - while (dotCount3) { - // four or more consecutive dots does not convert to ':' - return null; - } - } - // This expression is only valid as an area ref, if the LHS and RHS of the dot(s) are both - // cell refs. Otherwise, this expression must be a named range name - String partA = name.substring(0, dotPos); - if (!isValidCellReference(partA)) { - return null; - } - String partB = name.substring(dotPos+dotCount); - if (!isValidCellReference(partB)) { - return null; - } - CellReference topLeft = new CellReference(partA); - CellReference bottomRight = new CellReference(partB); - return new AreaReference(topLeft, bottomRight); - } - - /** - * @return true if the specified name is a valid cell reference - */ - private static boolean isValidCellReference(String str) { - return CellReference.classifyCellReference(str) == NameType.CELL; - } - - - /** - * Note - Excel function names are 'case aware but not case sensitive'. This method may end - * up creating a defined name record in the workbook if the specified name is not an internal - * Excel function, and has not been encountered before. - * - * @param name case preserved function name (as it was entered/appeared in the formula). - */ - private ParseNode function(String name) { - Ptg nameToken = null; - if(!AbstractFunctionPtg.isBuiltInFunctionName(name)) { - // user defined function - // in the token tree, the name is more or less the first argument - - EvaluationName hName = _book.getName(name, _sheetIndex); - if (hName == null) { - - nameToken = _book.getNameXPtg(name); - if (nameToken == null) { - throw new FormulaParseException("Name '" + name - + "' is completely unknown in the current workbook"); - } - } else { - if (!hName.isFunctionName()) { - throw new FormulaParseException("Attempt to use name '" + name - + "' as a function, but defined name in workbook does not refer to a function"); - } - - // calls to user-defined functions within the workbook - // get a Name token which points to a defined name record - nameToken = hName.createPtg(); - } - } - - Match('('); - ParseNode[] args = Arguments(); - Match(')'); - - return getFunction(name, nameToken, args); - } - - /** - * Generates the variable function ptg for the formula. - *

- * For IF Formulas, additional PTGs are added to the tokens - * @param name a {@link NamePtg} or {@link NameXPtg} or null - * @param numArgs - * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function - */ - private ParseNode getFunction(String name, Ptg namePtg, ParseNode[] args) { - - FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase()); - int numArgs = args.length; - if(fm == null) { - if (namePtg == null) { - throw new IllegalStateException("NamePtg must be supplied for external functions"); - } - // must be external function - ParseNode[] allArgs = new ParseNode[numArgs+1]; - allArgs[0] = new ParseNode(namePtg); - System.arraycopy(args, 0, allArgs, 1, numArgs); - return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs); - } - - if (namePtg != null) { - throw new IllegalStateException("NamePtg no applicable to internal functions"); - } - boolean isVarArgs = !fm.hasFixedArgsLength(); - int funcIx = fm.getIndex(); - if (funcIx == FunctionMetadataRegistry.FUNCTION_INDEX_SUM && args.length == 1) { - // Excel encodes the sum of a single argument as tAttrSum - // POI does the same for consistency, but this is not critical - return new ParseNode(AttrPtg.getSumSingle(), args); - // The code below would encode tFuncVar(SUM) which seems to do no harm - } - validateNumArgs(args.length, fm); - - AbstractFunctionPtg retval; - if(isVarArgs) { - retval = new FuncVarPtg(name, (byte)numArgs); - } else { - retval = new FuncPtg(funcIx); - } - return new ParseNode(retval, args); - } - - private void validateNumArgs(int numArgs, FunctionMetadata fm) { - if(numArgs < fm.getMinParams()) { - String msg = "Too few arguments to function '" + fm.getName() + "'. "; - if(fm.hasFixedArgsLength()) { - msg += "Expected " + fm.getMinParams(); - } else { - msg += "At least " + fm.getMinParams() + " were expected"; - } - msg += " but got " + numArgs + "."; - throw new FormulaParseException(msg); - } - if(numArgs > fm.getMaxParams()) { - String msg = "Too many arguments to function '" + fm.getName() + "'. "; - if(fm.hasFixedArgsLength()) { - msg += "Expected " + fm.getMaxParams(); - } else { - msg += "At most " + fm.getMaxParams() + " were expected"; - } - msg += " but got " + numArgs + "."; - throw new FormulaParseException(msg); - } - } - - private static boolean isArgumentDelimiter(char ch) { - return ch == ',' || ch == ')'; - } - - /** get arguments to a function */ - private ParseNode[] Arguments() { - //average 2 args per function - List temp = new ArrayList(2); - SkipWhite(); - if(look == ')') { - return ParseNode.EMPTY_ARRAY; - } - - boolean missedPrevArg = true; - int numArgs = 0; - while (true) { - SkipWhite(); - if (isArgumentDelimiter(look)) { - if (missedPrevArg) { - temp.add(new ParseNode(MissingArgPtg.instance)); - numArgs++; - } - if (look == ')') { - break; - } - Match(','); - missedPrevArg = true; - continue; - } - temp.add(comparisonExpression()); - numArgs++; - missedPrevArg = false; - SkipWhite(); - if (!isArgumentDelimiter(look)) { - throw expected("',' or ')'"); - } - } - ParseNode[] result = new ParseNode[temp.size()]; - temp.toArray(result); - return result; - } + private static final class Identifier { + private final String _name; + private final boolean _isQuoted; + + public Identifier(String name, boolean isQuoted) { + _name = name; + _isQuoted = isQuoted; + } + public String getName() { + return _name; + } + public boolean isQuoted() { + return _isQuoted; + } + public String toString() { + StringBuffer sb = new StringBuffer(64); + sb.append(getClass().getName()); + sb.append(" ["); + if (_isQuoted) { + sb.append("'").append(_name).append("'"); + } else { + sb.append(_name); + } + sb.append("]"); + return sb.toString(); + } + } + private static final class SheetIdentifier { + + + private final String _bookName; + private final Identifier _sheetIdentifier; + public SheetIdentifier(String bookName, Identifier sheetIdentifier) { + _bookName = bookName; + _sheetIdentifier = sheetIdentifier; + } + public String getBookName() { + return _bookName; + } + public Identifier getSheetIdentifier() { + return _sheetIdentifier; + } + public String toString() { + StringBuffer sb = new StringBuffer(64); + sb.append(getClass().getName()); + sb.append(" ["); + if (_bookName != null) { + sb.append(" [").append(_sheetIdentifier.getName()).append("]"); + } + if (_sheetIdentifier.isQuoted()) { + sb.append("'").append(_sheetIdentifier.getName()).append("'"); + } else { + sb.append(_sheetIdentifier.getName()); + } + sb.append("]"); + return sb.toString(); + } + } + + /** + * Specific exception thrown when a supplied formula does not parse properly.
+ * Primarily used by test cases when testing for specific parsing exceptions.

+ * + */ + static final class FormulaParseException extends RuntimeException { + // This class was given package scope until it would become clear that it is useful to + // general client code. + public FormulaParseException(String msg) { + super(msg); + } + } + + + private final String _formulaString; + private final int _formulaLength; + /** points at the next character to be read (after the {@link #look} char) */ + private int _pointer; + + private ParseNode _rootNode; + + private static char TAB = '\t'; + + /** + * Lookahead Character. + * gets value '\0' when the input string is exhausted + */ + private char look; + + private FormulaParsingWorkbook _book; + + private int _sheetIndex; + + + /** + * Create the formula parser, with the string that is to be + * parsed against the supplied workbook. + * A later call the parse() method to return ptg list in + * rpn order, then call the getRPNPtg() to retrieve the + * parse results. + * This class is recommended only for single threaded use. + * + * If you only have a usermodel.HSSFWorkbook, and not a + * model.Workbook, then use the convenience method on + * usermodel.HSSFFormulaEvaluator + */ + private FormulaParser(String formula, FormulaParsingWorkbook book, int sheetIndex){ + _formulaString = formula; + _pointer=0; + _book = book; + _formulaLength = _formulaString.length(); + _sheetIndex = sheetIndex; + } + + public static Ptg[] parse(String formula, FormulaParsingWorkbook book) { + return parse(formula, book, FormulaType.CELL); + } + + public static Ptg[] parse(String formula, FormulaParsingWorkbook workbook, int formulaType) { + return parse(formula, workbook, formulaType, -1); + } + + /** + * Parse a formula into a array of tokens + * + * @param formula the formula to parse + * @param workbook the parent workbook + * @param formulaType the type of the formula, see {@link FormulaType} + * @param sheetIndex the 0-based index of the sheet this formula belongs to. + * The sheet index is required to resolve sheet-level names. -1 means that + * the scope of the name will be ignored and the parser will match names only by name + * + * @return array of parsed tokens + * @throws FormulaParseException if the formula is unparsable + */ + public static Ptg[] parse(String formula, FormulaParsingWorkbook workbook, int formulaType, int sheetIndex) { + FormulaParser fp = new FormulaParser(formula, workbook, sheetIndex); + fp.parse(); + return fp.getRPNPtg(formulaType); + } + + /** Read New Character From Input Stream */ + private void GetChar() { + // Check to see if we've walked off the end of the string. + if (_pointer > _formulaLength) { + throw new RuntimeException("too far"); + } + if (_pointer < _formulaLength) { + look=_formulaString.charAt(_pointer); + } else { + // Just return if so and reset 'look' to something to keep + // SkipWhitespace from spinning + look = (char)0; + } + _pointer++; + //System.out.println("Got char: "+ look); + } + private void resetPointer(int ptr) { + _pointer = ptr; + if (_pointer <= _formulaLength) { + look=_formulaString.charAt(_pointer-1); + } else { + // Just return if so and reset 'look' to something to keep + // SkipWhitespace from spinning + look = (char)0; + } + } + + /** Report What Was Expected */ + private RuntimeException expected(String s) { + String msg; + + if (look == '=' && _formulaString.substring(0, _pointer-1).trim().length() < 1) { + msg = "The specified formula '" + _formulaString + + "' starts with an equals sign which is not allowed."; + } else { + msg = "Parse error near char " + (_pointer-1) + " '" + look + "'" + + " in specified formula '" + _formulaString + "'. Expected " + + s; + } + return new FormulaParseException(msg); + } + + /** Recognize an Alpha Character */ + private static boolean IsAlpha(char c) { + return Character.isLetter(c) || c == '$' || c=='_'; + } + + /** Recognize a Decimal Digit */ + private static boolean IsDigit(char c) { + return Character.isDigit(c); + } + + /** Recognize White Space */ + private static boolean IsWhite( char c) { + return c ==' ' || c== TAB; + } + + /** Skip Over Leading White Space */ + private void SkipWhite() { + while (IsWhite(look)) { + GetChar(); + } + } + + /** + * Consumes the next input character if it is equal to the one specified otherwise throws an + * unchecked exception. This method does not consume whitespace (before or after the + * matched character). + */ + private void Match(char x) { + if (look != x) { + throw expected("'" + x + "'"); + } + GetChar(); + } + + /** Get a Number */ + private String GetNum() { + StringBuffer value = new StringBuffer(); + + while (IsDigit(this.look)){ + value.append(this.look); + GetChar(); + } + return value.length() == 0 ? null : value.toString(); + } + + private ParseNode parseRangeExpression() { + ParseNode result = parseRangeable(); + boolean hasRange = false; + while (look == ':') { + int pos = _pointer; + GetChar(); + ParseNode nextPart = parseRangeable(); + // Note - no range simplification here. An expr like "A1:B2:C3:D4:E5" should be + // grouped into area ref pairs like: "(A1:B2):(C3:D4):E5" + // Furthermore, Excel doesn't seem to simplify + // expressions like "Sheet1!A1:Sheet1:B2" into "Sheet1!A1:B2" + + checkValidRangeOperand("LHS", pos, result); + checkValidRangeOperand("RHS", pos, nextPart); + + ParseNode[] children = { result, nextPart, }; + result = new ParseNode(RangePtg.instance, children); + hasRange = true; + } + if (hasRange) { + return augmentWithMemPtg(result); + } + return result; + } + + private static ParseNode augmentWithMemPtg(ParseNode root) { + Ptg memPtg; + if (needsMemFunc(root)) { + memPtg = new MemFuncPtg(root.getEncodedSize()); + } else { + memPtg = new MemAreaPtg(root.getEncodedSize()); + } + return new ParseNode(memPtg, root); + } + /** + * From OOO doc: "Whenever one operand of the reference subexpression is a function, + * a defined name, a 3D reference, or an external reference (and no error occurs), + * a tMemFunc token is used" + * + */ + private static boolean needsMemFunc(ParseNode root) { + Ptg token = root.getToken(); + if (token instanceof AbstractFunctionPtg) { + return true; + } + if (token instanceof ExternSheetReferenceToken) { // 3D refs + return true; + } + if (token instanceof NamePtg || token instanceof NameXPtg) { // 3D refs + return true; + } + + if (token instanceof OperationPtg || token instanceof ParenthesisPtg) { + // expect RangePtg, but perhaps also UnionPtg, IntersectionPtg etc + for(ParseNode child : root.getChildren()) { + if (needsMemFunc(child)) { + return true; + } + } + return false; + } + if (token instanceof OperandPtg) { + return false; + } + if (token instanceof OperationPtg) { + return true; + } + + return false; + } + + /** + * @param currentParsePosition used to format a potential error message + */ + private static void checkValidRangeOperand(String sideName, int currentParsePosition, ParseNode pn) { + if (!isValidRangeOperand(pn)) { + throw new FormulaParseException("The " + sideName + + " of the range operator ':' at position " + + currentParsePosition + " is not a proper reference."); + } + } + + /** + * @return false if sub-expression represented the specified ParseNode definitely + * cannot appear on either side of the range (':') operator + */ + private static boolean isValidRangeOperand(ParseNode a) { + Ptg tkn = a.getToken(); + // Note - order is important for these instance-of checks + if (tkn instanceof OperandPtg) { + // notably cell refs and area refs + return true; + } + + // next 2 are special cases of OperationPtg + if (tkn instanceof AbstractFunctionPtg) { + AbstractFunctionPtg afp = (AbstractFunctionPtg) tkn; + byte returnClass = afp.getDefaultOperandClass(); + return Ptg.CLASS_REF == returnClass; + } + if (tkn instanceof ValueOperatorPtg) { + return false; + } + if (tkn instanceof OperationPtg) { + return true; + } + + // one special case of ControlPtg + if (tkn instanceof ParenthesisPtg) { + // parenthesis Ptg should have only one child + return isValidRangeOperand(a.getChildren()[0]); + } + + // one special case of ScalarConstantPtg + if (tkn == ErrPtg.REF_INVALID) { + return true; + } + + // All other ControlPtgs and ScalarConstantPtgs cannot be used with ':' + return false; + } + + /** + * Parses area refs (things which could be the operand of ':') and simple factors + * Examples + *
+	 *   A$1
+	 *   $A$1 :  $B1
+	 *   A1 .......	C2
+	 *   Sheet1 !$A1
+	 *   a..b!A1
+	 *   'my sheet'!A1
+	 *   .my.sheet!A1
+	 *   my.named..range.
+	 *   foo.bar(123.456, "abc")
+	 *   123.456
+	 *   "abc"
+	 *   true
+	 * 
+ * + */ + private ParseNode parseRangeable() { + SkipWhite(); + int savePointer = _pointer; + SheetIdentifier sheetIden = parseSheetName(); + if (sheetIden == null) { + resetPointer(savePointer); + } else { + SkipWhite(); + savePointer = _pointer; + } + + SimpleRangePart part1 = parseSimpleRangePart(); + if (part1 == null) { + if (sheetIden != null) { + throw new FormulaParseException("Cell reference expected after sheet name at index " + + _pointer + "."); + } + return parseNonRange(savePointer); + } + boolean whiteAfterPart1 = IsWhite(look); + if (whiteAfterPart1) { + SkipWhite(); + } + + if (look == ':') { + int colonPos = _pointer; + GetChar(); + SkipWhite(); + SimpleRangePart part2 = parseSimpleRangePart(); + if (part2 != null && !part1.isCompatibleForArea(part2)) { + // second part is not compatible with an area ref e.g. S!A1:S!B2 + // where S might be a sheet name (that looks like a column name) + + part2 = null; + } + if (part2 == null) { + // second part is not compatible with an area ref e.g. A1:OFFSET(B2, 1, 2) + // reset and let caller use explicit range operator + resetPointer(colonPos); + if (!part1.isCell()) { + String prefix; + if (sheetIden == null) { + prefix = ""; + } else { + prefix = "'" + sheetIden.getSheetIdentifier().getName() + '!'; + } + throw new FormulaParseException(prefix + part1.getRep() + "' is not a proper reference."); + } + return createAreaRefParseNode(sheetIden, part1, part2); + } + return createAreaRefParseNode(sheetIden, part1, part2); + } + + if (look == '.') { + GetChar(); + int dotCount = 1; + while (look =='.') { + dotCount ++; + GetChar(); + } + boolean whiteBeforePart2 = IsWhite(look); + + SkipWhite(); + SimpleRangePart part2 = parseSimpleRangePart(); + String part1And2 = _formulaString.substring(savePointer-1, _pointer-1); + if (part2 == null) { + if (sheetIden != null) { + throw new FormulaParseException("Complete area reference expected after sheet name at index " + + _pointer + "."); + } + return parseNonRange(savePointer); + } + + + if (whiteAfterPart1 || whiteBeforePart2) { + if (part1.isRowOrColumn() || part2.isRowOrColumn()) { + // "A .. B" not valid syntax for "A:B" + // and there's no other valid expression that fits this grammar + throw new FormulaParseException("Dotted range (full row or column) expression '" + + part1And2 + "' must not contain whitespace."); + } + return createAreaRefParseNode(sheetIden, part1, part2); + } + + if (dotCount == 1 && part1.isRow() && part2.isRow()) { + // actually, this is looking more like a number + return parseNonRange(savePointer); + } + + if (part1.isRowOrColumn() || part2.isRowOrColumn()) { + if (dotCount != 2) { + throw new FormulaParseException("Dotted range (full row or column) expression '" + part1And2 + + "' must have exactly 2 dots."); + } + } + return createAreaRefParseNode(sheetIden, part1, part2); + } + if (part1.isCell() && isValidCellReference(part1.getRep())) { + return createAreaRefParseNode(sheetIden, part1, null); + } + if (sheetIden != null) { + throw new FormulaParseException("Second part of cell reference expected after sheet name at index " + + _pointer + "."); + } + + return parseNonRange(savePointer); + } + + + + /** + * Parses simple factors that are not primitive ranges or range components + * i.e. '!', ':'(and equiv '...') do not appear + * Examples + *
+	 *   my.named...range.
+	 *   foo.bar(123.456, "abc")
+	 *   123.456
+	 *   "abc"
+	 *   true
+	 * 
+ */ + private ParseNode parseNonRange(int savePointer) { + resetPointer(savePointer); + + if (Character.isDigit(look)) { + return new ParseNode(parseNumber()); + } + if (look == '"') { + return new ParseNode(new StringPtg(parseStringLiteral())); + } + // from now on we can only be dealing with non-quoted identifiers + // which will either be named ranges or functions + StringBuilder sb = new StringBuilder(); + + if (!Character.isLetter(look)) { + throw expected("number, string, or defined name"); + } + while (isValidDefinedNameChar(look)) { + sb.append(look); + GetChar(); + } + SkipWhite(); + String name = sb.toString(); + if (look == '(') { + return function(name); + } + if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) { + return new ParseNode(new BoolPtg(name.toUpperCase())); + } + if (_book == null) { + // Only test cases omit the book (expecting it not to be needed) + throw new IllegalStateException("Need book to evaluate name '" + name + "'"); + } + EvaluationName evalName = _book.getName(name, _sheetIndex); + if (evalName == null) { + throw new FormulaParseException("Specified named range '" + + name + "' does not exist in the current workbook."); + } + if (evalName.isRange()) { + return new ParseNode(evalName.createPtg()); + } + // TODO - what about NameX ? + throw new FormulaParseException("Specified name '" + + name + "' is not a range as expected."); + } + + /** + * + * @return true if the specified character may be used in a defined name + */ + private static boolean isValidDefinedNameChar(char ch) { + if (Character.isLetterOrDigit(ch)) { + return true; + } + switch (ch) { + case '.': + case '_': + case '?': + case '\\': // of all things + return true; + } + return false; + } + + /** + * + * @param sheetIden may be null + * @param part1 + * @param part2 may be null + */ + private ParseNode createAreaRefParseNode(SheetIdentifier sheetIden, SimpleRangePart part1, + SimpleRangePart part2) throws FormulaParseException { + + int extIx; + if (sheetIden == null) { + extIx = Integer.MIN_VALUE; + } else { + String sName = sheetIden.getSheetIdentifier().getName(); + if (sheetIden.getBookName() == null) { + extIx = _book.getExternalSheetIndex(sName); + } else { + extIx = _book.getExternalSheetIndex(sheetIden.getBookName(), sName); + } + } + Ptg ptg; + if (part2 == null) { + CellReference cr = part1.getCellReference(); + if (sheetIden == null) { + ptg = new RefPtg(cr); + } else { + ptg = new Ref3DPtg(cr, extIx); + } + } else { + AreaReference areaRef = createAreaRef(part1, part2); + + if (sheetIden == null) { + ptg = new AreaPtg(areaRef); + } else { + ptg = new Area3DPtg(areaRef, extIx); + } + } + return new ParseNode(ptg); + } + + private static AreaReference createAreaRef(SimpleRangePart part1, SimpleRangePart part2) { + if (!part1.isCompatibleForArea(part2)) { + throw new FormulaParseException("has incompatible parts: '" + + part1.getRep() + "' and '" + part2.getRep() + "'."); + } + if (part1.isRow()) { + return AreaReference.getWholeRow(part1.getRep(), part2.getRep()); + } + if (part1.isColumn()) { + return AreaReference.getWholeColumn(part1.getRep(), part2.getRep()); + } + return new AreaReference(part1.getCellReference(), part2.getCellReference()); + } + + /** + * Matches a zero or one letter-runs followed by zero or one digit-runs. + * Either or both runs man optionally be prefixed with a single '$'. + * (copied+modified from {@link org.apache.poi.ss.util.CellReference#CELL_REF_PATTERN}) + */ + private static final Pattern CELL_REF_PATTERN = Pattern.compile("(\\$?[A-Za-z]+)?(\\$?[0-9]+)?"); + + /** + * Parses out a potential LHS or RHS of a ':' intended to produce a plain AreaRef. Normally these are + * proper cell references but they could also be row or column refs like "$AC" or "10" + * @return null (and leaves {@link #_pointer} unchanged if a proper range part does not parse out + */ + private SimpleRangePart parseSimpleRangePart() { + int ptr = _pointer-1; // TODO avoid StringIndexOutOfBounds + boolean hasDigits = false; + boolean hasLetters = false; + while (ptr < _formulaLength) { + char ch = _formulaString.charAt(ptr); + if (Character.isDigit(ch)) { + hasDigits = true; + } else if (Character.isLetter(ch)) { + hasLetters = true; + } else if (ch =='$') { + // + } else { + break; + } + ptr++; + } + if (ptr <= _pointer-1) { + return null; + } + String rep = _formulaString.substring(_pointer-1, ptr); + if (!CELL_REF_PATTERN.matcher(rep).matches()) { + return null; + } + // Check range bounds against grid max + if (hasLetters && hasDigits) { + if (!isValidCellReference(rep)) { + return null; + } + } else if (hasLetters) { + if (!CellReference.isColumnWithnRange(rep.replace("$", ""))) { + return null; + } + } else if (hasDigits) { + int i; + try { + i = Integer.parseInt(rep.replace("$", "")); + } catch (NumberFormatException e) { + return null; + } + if (i<1 || i>65536) { + return null; + } + } else { + // just dollars ? can this happen? + return null; + } + + + resetPointer(ptr+1); // stepping forward + return new SimpleRangePart(rep, hasLetters, hasDigits); + } + + + /** + * A1, $A1, A$1, $A$1, A, 1 + */ + private static final class SimpleRangePart { + private enum Type { + CELL, ROW, COLUMN; + + public static Type get(boolean hasLetters, boolean hasDigits) { + if (hasLetters) { + return hasDigits ? CELL : COLUMN; + } + if (!hasDigits) { + throw new IllegalArgumentException("must have either letters or numbers"); + } + return ROW; + } + } + + private final Type _type; + private final String _rep; + + public SimpleRangePart(String rep, boolean hasLetters, boolean hasNumbers) { + _rep = rep; + _type = Type.get(hasLetters, hasNumbers); + } + + public boolean isCell() { + return _type == Type.CELL; + } + + public boolean isRowOrColumn() { + return _type != Type.CELL; + } + + public CellReference getCellReference() { + if (_type != Type.CELL) { + throw new IllegalStateException("Not applicable to this type"); + } + return new CellReference(_rep); + } + + public boolean isColumn() { + return _type == Type.COLUMN; + } + + public boolean isRow() { + return _type == Type.ROW; + } + + public String getRep() { + return _rep; + } + + /** + * @return true if the two range parts can be combined in an + * {@link AreaPtg} ( Note - the explicit range operator (:) may still be valid + * when this method returns false ) + */ + public boolean isCompatibleForArea(SimpleRangePart part2) { + return _type == part2._type; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(64); + sb.append(getClass().getName()).append(" ["); + sb.append(_rep); + sb.append("]"); + return sb.toString(); + } + } + + /** + * Note - caller should reset {@link #_pointer} upon null result + * @param iden identifier prefix (if unquoted, it is terminated at first dot) + * @return The sheet name as an identifier null if '!' is not found in the right place + */ + private SheetIdentifier parseSheetName() { + + String bookName; + if (look == '[') { + StringBuilder sb = new StringBuilder(); + GetChar(); + while (look != ']') { + sb.append(look); + GetChar(); + } + GetChar(); + bookName = sb.toString(); + } else { + bookName = null; + } + + if (look == '\'') { + StringBuffer sb = new StringBuffer(); + + Match('\''); + boolean done = look == '\''; + while(!done) { + sb.append(look); + GetChar(); + if(look == '\'') + { + Match('\''); + done = look != '\''; + } + } + + Identifier iden = new Identifier(sb.toString(), true); + // quoted identifier - can't concatenate anything more + SkipWhite(); + if (look == '!') { + GetChar(); + return new SheetIdentifier(bookName, iden); + } + return null; + } + + // unquoted sheet names must start with underscore or a letter + if (look =='_' || Character.isLetter(look)) { + StringBuilder sb = new StringBuilder(); + // can concatenate idens with dots + while (isUnquotedSheetNameChar(look)) { + sb.append(look); + GetChar(); + } + SkipWhite(); + if (look == '!') { + GetChar(); + return new SheetIdentifier(bookName, new Identifier(sb.toString(), false)); + } + return null; + } + return null; + } + + /** + * very similar to {@link SheetNameFormatter#isSpecialChar(char)} + */ + private static boolean isUnquotedSheetNameChar(char ch) { + if(Character.isLetterOrDigit(ch)) { + return true; + } + switch(ch) { + case '.': // dot is OK + case '_': // underscore is OK + return true; + } + return false; + } + + /** + * @return true if the specified name is a valid cell reference + */ + private static boolean isValidCellReference(String str) { + return CellReference.classifyCellReference(str) == NameType.CELL; + } + + + /** + * Note - Excel function names are 'case aware but not case sensitive'. This method may end + * up creating a defined name record in the workbook if the specified name is not an internal + * Excel function, and has not been encountered before. + * + * @param name case preserved function name (as it was entered/appeared in the formula). + */ + private ParseNode function(String name) { + Ptg nameToken = null; + if(!AbstractFunctionPtg.isBuiltInFunctionName(name)) { + // user defined function + // in the token tree, the name is more or less the first argument + + if (_book == null) { + // Only test cases omit the book (expecting it not to be needed) + throw new IllegalStateException("Need book to evaluate name '" + name + "'"); + } + EvaluationName hName = _book.getName(name, _sheetIndex); + if (hName == null) { + + nameToken = _book.getNameXPtg(name); + if (nameToken == null) { + throw new FormulaParseException("Name '" + name + + "' is completely unknown in the current workbook"); + } + } else { + if (!hName.isFunctionName()) { + throw new FormulaParseException("Attempt to use name '" + name + + "' as a function, but defined name in workbook does not refer to a function"); + } + + // calls to user-defined functions within the workbook + // get a Name token which points to a defined name record + nameToken = hName.createPtg(); + } + } + + Match('('); + ParseNode[] args = Arguments(); + Match(')'); + + return getFunction(name, nameToken, args); + } + + /** + * Generates the variable function ptg for the formula. + *

+ * For IF Formulas, additional PTGs are added to the tokens + * @param name a {@link NamePtg} or {@link NameXPtg} or null + * @param numArgs + * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function + */ + private ParseNode getFunction(String name, Ptg namePtg, ParseNode[] args) { + + FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase()); + int numArgs = args.length; + if(fm == null) { + if (namePtg == null) { + throw new IllegalStateException("NamePtg must be supplied for external functions"); + } + // must be external function + ParseNode[] allArgs = new ParseNode[numArgs+1]; + allArgs[0] = new ParseNode(namePtg); + System.arraycopy(args, 0, allArgs, 1, numArgs); + return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs); + } + + if (namePtg != null) { + throw new IllegalStateException("NamePtg no applicable to internal functions"); + } + boolean isVarArgs = !fm.hasFixedArgsLength(); + int funcIx = fm.getIndex(); + if (funcIx == FunctionMetadataRegistry.FUNCTION_INDEX_SUM && args.length == 1) { + // Excel encodes the sum of a single argument as tAttrSum + // POI does the same for consistency, but this is not critical + return new ParseNode(AttrPtg.getSumSingle(), args); + // The code below would encode tFuncVar(SUM) which seems to do no harm + } + validateNumArgs(args.length, fm); + + AbstractFunctionPtg retval; + if(isVarArgs) { + retval = new FuncVarPtg(name, (byte)numArgs); + } else { + retval = new FuncPtg(funcIx); + } + return new ParseNode(retval, args); + } + + private void validateNumArgs(int numArgs, FunctionMetadata fm) { + if(numArgs < fm.getMinParams()) { + String msg = "Too few arguments to function '" + fm.getName() + "'. "; + if(fm.hasFixedArgsLength()) { + msg += "Expected " + fm.getMinParams(); + } else { + msg += "At least " + fm.getMinParams() + " were expected"; + } + msg += " but got " + numArgs + "."; + throw new FormulaParseException(msg); + } + if(numArgs > fm.getMaxParams()) { + String msg = "Too many arguments to function '" + fm.getName() + "'. "; + if(fm.hasFixedArgsLength()) { + msg += "Expected " + fm.getMaxParams(); + } else { + msg += "At most " + fm.getMaxParams() + " were expected"; + } + msg += " but got " + numArgs + "."; + throw new FormulaParseException(msg); + } + } + + private static boolean isArgumentDelimiter(char ch) { + return ch == ',' || ch == ')'; + } + + /** get arguments to a function */ + private ParseNode[] Arguments() { + //average 2 args per function + List temp = new ArrayList(2); + SkipWhite(); + if(look == ')') { + return ParseNode.EMPTY_ARRAY; + } + + boolean missedPrevArg = true; + int numArgs = 0; + while (true) { + SkipWhite(); + if (isArgumentDelimiter(look)) { + if (missedPrevArg) { + temp.add(new ParseNode(MissingArgPtg.instance)); + numArgs++; + } + if (look == ')') { + break; + } + Match(','); + missedPrevArg = true; + continue; + } + temp.add(comparisonExpression()); + numArgs++; + missedPrevArg = false; + SkipWhite(); + if (!isArgumentDelimiter(look)) { + throw expected("',' or ')'"); + } + } + ParseNode[] result = new ParseNode[temp.size()]; + temp.toArray(result); + return result; + } /** Parse and Translate a Math Factor */ - private ParseNode powerFactor() { - ParseNode result = percentFactor(); - while(true) { - SkipWhite(); - if(look != '^') { - return result; - } - Match('^'); - ParseNode other = percentFactor(); - result = new ParseNode(PowerPtg.instance, result, other); - } - } - - private ParseNode percentFactor() { - ParseNode result = parseSimpleFactor(); - while(true) { - SkipWhite(); - if(look != '%') { - return result; - } - Match('%'); - result = new ParseNode(PercentPtg.instance, result); - } - } - - - /** - * factors (without ^ or % ) - */ - private ParseNode parseSimpleFactor() { - SkipWhite(); - switch(look) { - case '#': - return new ParseNode(ErrPtg.valueOf(parseErrorLiteral())); - case '-': - Match('-'); - return new ParseNode(UnaryMinusPtg.instance, powerFactor()); - case '+': - Match('+'); - return new ParseNode(UnaryPlusPtg.instance, powerFactor()); - case '(': - Match('('); - ParseNode inside = comparisonExpression(); - Match(')'); - return new ParseNode(ParenthesisPtg.instance, inside); - case '"': - return new ParseNode(new StringPtg(parseStringLiteral())); - case '{': - Match('{'); - ParseNode arrayNode = parseArray(); - Match('}'); - return arrayNode; - } - if (IsAlpha(look) || look == '\'' || look == '['){ - return parseFunctionReferenceOrName(); - } - // else - assume number - return new ParseNode(parseNumber()); - } - - - private ParseNode parseArray() { - List rowsData = new ArrayList(); - while(true) { - Object[] singleRowData = parseArrayRow(); - rowsData.add(singleRowData); - if (look == '}') { - break; - } - if (look != ';') { - throw expected("'}' or ';'"); - } - Match(';'); - } - int nRows = rowsData.size(); - Object[][] values2d = new Object[nRows][]; - rowsData.toArray(values2d); - int nColumns = values2d[0].length; - checkRowLengths(values2d, nColumns); - - return new ParseNode(new ArrayPtg(values2d)); - } - private void checkRowLengths(Object[][] values2d, int nColumns) { - for (int i = 0; i < values2d.length; i++) { - int rowLen = values2d[i].length; - if (rowLen != nColumns) { - throw new FormulaParseException("Array row " + i + " has length " + rowLen - + " but row 0 has length " + nColumns); - } - } - } - - private Object[] parseArrayRow() { - List temp = new ArrayList(); - while (true) { - temp.add(parseArrayItem()); - SkipWhite(); - switch(look) { - case '}': - case ';': - break; - case ',': - Match(','); - continue; - default: - throw expected("'}' or ','"); - - } - break; - } - - Object[] result = new Object[temp.size()]; - temp.toArray(result); - return result; - } - - private Object parseArrayItem() { - SkipWhite(); - switch(look) { - case '"': return new UnicodeString(parseStringLiteral()); - case '#': return ErrorConstant.valueOf(parseErrorLiteral()); - case 'F': case 'f': - case 'T': case 't': - return parseBooleanLiteral(); - } - // else assume number - return convertArrayNumber(parseNumber()); - } - - private Boolean parseBooleanLiteral() { - String iden = parseUnquotedIdentifier(); - if ("TRUE".equalsIgnoreCase(iden)) { - return Boolean.TRUE; - } - if ("FALSE".equalsIgnoreCase(iden)) { - return Boolean.FALSE; - } - throw expected("'TRUE' or 'FALSE'"); - } - - private static Double convertArrayNumber(Ptg ptg) { - if (ptg instanceof IntPtg) { - return new Double(((IntPtg)ptg).getValue()); - } - if (ptg instanceof NumberPtg) { - return new Double(((NumberPtg)ptg).getValue()); - } - throw new RuntimeException("Unexpected ptg (" + ptg.getClass().getName() + ")"); - } - - private Ptg parseNumber() { - String number2 = null; - String exponent = null; - String number1 = GetNum(); - - if (look == '.') { - GetChar(); - number2 = GetNum(); - } - - if (look == 'E') { - GetChar(); - - String sign = ""; - if (look == '+') { - GetChar(); - } else if (look == '-') { - GetChar(); - sign = "-"; - } - - String number = GetNum(); - if (number == null) { - throw expected("Integer"); - } - exponent = sign + number; - } - - if (number1 == null && number2 == null) { - throw expected("Integer"); - } - - return getNumberPtgFromString(number1, number2, exponent); - } - - - private int parseErrorLiteral() { - Match('#'); - String part1 = parseUnquotedIdentifier().toUpperCase(); - - switch(part1.charAt(0)) { - case 'V': - if(part1.equals("VALUE")) { - Match('!'); - return HSSFErrorConstants.ERROR_VALUE; - } - throw expected("#VALUE!"); - case 'R': - if(part1.equals("REF")) { - Match('!'); - return HSSFErrorConstants.ERROR_REF; - } - throw expected("#REF!"); - case 'D': - if(part1.equals("DIV")) { - Match('/'); - Match('0'); - Match('!'); - return HSSFErrorConstants.ERROR_DIV_0; - } - throw expected("#DIV/0!"); - case 'N': - if(part1.equals("NAME")) { - Match('?'); // only one that ends in '?' - return HSSFErrorConstants.ERROR_NAME; - } - if(part1.equals("NUM")) { - Match('!'); - return HSSFErrorConstants.ERROR_NUM; - } - if(part1.equals("NULL")) { - Match('!'); - return HSSFErrorConstants.ERROR_NULL; - } - if(part1.equals("N")) { - Match('/'); - if(look != 'A' && look != 'a') { - throw expected("#N/A"); - } - Match(look); - // Note - no '!' or '?' suffix - return HSSFErrorConstants.ERROR_NA; - } - throw expected("#NAME?, #NUM!, #NULL! or #N/A"); - - } - throw expected("#VALUE!, #REF!, #DIV/0!, #NAME?, #NUM!, #NULL! or #N/A"); - } - - - /** - * Get a PTG for an integer from its string representation. - * return Int or Number Ptg based on size of input - */ - private static Ptg getNumberPtgFromString(String number1, String number2, String exponent) { - StringBuffer number = new StringBuffer(); - - if (number2 == null) { - number.append(number1); - - if (exponent != null) { - number.append('E'); - number.append(exponent); - } - - String numberStr = number.toString(); - int intVal; - try { - intVal = Integer.parseInt(numberStr); - } catch (NumberFormatException e) { - return new NumberPtg(numberStr); - } - if (IntPtg.isInRange(intVal)) { - return new IntPtg(intVal); - } - return new NumberPtg(numberStr); - } - - if (number1 != null) { - number.append(number1); - } - - number.append('.'); - number.append(number2); - - if (exponent != null) { - number.append('E'); - number.append(exponent); - } - - return new NumberPtg(number.toString()); - } - - - private String parseStringLiteral() { - Match('"'); - - StringBuffer token = new StringBuffer(); - while (true) { - if (look == '"') { - GetChar(); - if (look != '"') { - break; - } - } - token.append(look); - GetChar(); - } - return token.toString(); - } - - /** Parse and Translate a Math Term */ - private ParseNode Term() { - ParseNode result = powerFactor(); - while(true) { - SkipWhite(); - Ptg operator; - switch(look) { - case '*': - Match('*'); - operator = MultiplyPtg.instance; - break; - case '/': - Match('/'); - operator = DividePtg.instance; - break; - default: - return result; // finished with Term - } - ParseNode other = powerFactor(); - result = new ParseNode(operator, result, other); - } - } - private ParseNode unionExpression() { - ParseNode result = comparisonExpression(); - boolean hasUnions = false; - while (true) { - SkipWhite(); - switch(look) { - case ',': - GetChar(); - hasUnions = true; - ParseNode other = comparisonExpression(); - result = new ParseNode(UnionPtg.instance, result, other); - continue; - } - if (hasUnions) { - MemFuncPtg memFuncPtg = new MemFuncPtg(result.getEncodedSize()); - result = new ParseNode(memFuncPtg, result); - } - return result; - } - } - - private ParseNode comparisonExpression() { - ParseNode result = concatExpression(); - while (true) { - SkipWhite(); - switch(look) { - case '=': - case '>': - case '<': - Ptg comparisonToken = getComparisonToken(); - ParseNode other = concatExpression(); - result = new ParseNode(comparisonToken, result, other); - continue; - } - return result; // finished with predicate expression - } - } - - private Ptg getComparisonToken() { - if(look == '=') { - Match(look); - return EqualPtg.instance; - } - boolean isGreater = look == '>'; - Match(look); - if(isGreater) { - if(look == '=') { - Match('='); - return GreaterEqualPtg.instance; - } - return GreaterThanPtg.instance; - } - switch(look) { - case '=': - Match('='); - return LessEqualPtg.instance; - case '>': - Match('>'); - return NotEqualPtg.instance; - } - return LessThanPtg.instance; - } - - - private ParseNode concatExpression() { - ParseNode result = additiveExpression(); - while (true) { - SkipWhite(); - if(look != '&') { - break; // finished with concat expression - } - Match('&'); - ParseNode other = additiveExpression(); - result = new ParseNode(ConcatPtg.instance, result, other); - } - return result; - } - - - /** Parse and Translate an Expression */ - private ParseNode additiveExpression() { - ParseNode result = Term(); - while (true) { - SkipWhite(); - Ptg operator; - switch(look) { - case '+': - Match('+'); - operator = AddPtg.instance; - break; - case '-': - Match('-'); - operator = SubtractPtg.instance; - break; - default: - return result; // finished with additive expression - } - ParseNode other = Term(); - result = new ParseNode(operator, result, other); - } - } - - //{--------------------------------------------------------------} - //{ Parse and Translate an Assignment Statement } - /** + private ParseNode powerFactor() { + ParseNode result = percentFactor(); + while(true) { + SkipWhite(); + if(look != '^') { + return result; + } + Match('^'); + ParseNode other = percentFactor(); + result = new ParseNode(PowerPtg.instance, result, other); + } + } + + private ParseNode percentFactor() { + ParseNode result = parseSimpleFactor(); + while(true) { + SkipWhite(); + if(look != '%') { + return result; + } + Match('%'); + result = new ParseNode(PercentPtg.instance, result); + } + } + + + /** + * factors (without ^ or % ) + */ + private ParseNode parseSimpleFactor() { + SkipWhite(); + switch(look) { + case '#': + return new ParseNode(ErrPtg.valueOf(parseErrorLiteral())); + case '-': + Match('-'); + return new ParseNode(UnaryMinusPtg.instance, powerFactor()); + case '+': + Match('+'); + return new ParseNode(UnaryPlusPtg.instance, powerFactor()); + case '(': + Match('('); + ParseNode inside = comparisonExpression(); + Match(')'); + return new ParseNode(ParenthesisPtg.instance, inside); + case '"': + return new ParseNode(new StringPtg(parseStringLiteral())); + case '{': + Match('{'); + ParseNode arrayNode = parseArray(); + Match('}'); + return arrayNode; + } + if (IsAlpha(look) || Character.isDigit(look) || look == '\'' || look == '['){ + return parseRangeExpression(); + } + if (look == '.') { + return new ParseNode(parseNumber()); + } + throw expected("cell ref or constant literal"); + } + + + private ParseNode parseArray() { + List rowsData = new ArrayList(); + while(true) { + Object[] singleRowData = parseArrayRow(); + rowsData.add(singleRowData); + if (look == '}') { + break; + } + if (look != ';') { + throw expected("'}' or ';'"); + } + Match(';'); + } + int nRows = rowsData.size(); + Object[][] values2d = new Object[nRows][]; + rowsData.toArray(values2d); + int nColumns = values2d[0].length; + checkRowLengths(values2d, nColumns); + + return new ParseNode(new ArrayPtg(values2d)); + } + private void checkRowLengths(Object[][] values2d, int nColumns) { + for (int i = 0; i < values2d.length; i++) { + int rowLen = values2d[i].length; + if (rowLen != nColumns) { + throw new FormulaParseException("Array row " + i + " has length " + rowLen + + " but row 0 has length " + nColumns); + } + } + } + + private Object[] parseArrayRow() { + List temp = new ArrayList(); + while (true) { + temp.add(parseArrayItem()); + SkipWhite(); + switch(look) { + case '}': + case ';': + break; + case ',': + Match(','); + continue; + default: + throw expected("'}' or ','"); + + } + break; + } + + Object[] result = new Object[temp.size()]; + temp.toArray(result); + return result; + } + + private Object parseArrayItem() { + SkipWhite(); + switch(look) { + case '"': return new UnicodeString(parseStringLiteral()); + case '#': return ErrorConstant.valueOf(parseErrorLiteral()); + case 'F': case 'f': + case 'T': case 't': + return parseBooleanLiteral(); + } + // else assume number + return convertArrayNumber(parseNumber()); + } + + private Boolean parseBooleanLiteral() { + String iden = parseUnquotedIdentifier(); + if ("TRUE".equalsIgnoreCase(iden)) { + return Boolean.TRUE; + } + if ("FALSE".equalsIgnoreCase(iden)) { + return Boolean.FALSE; + } + throw expected("'TRUE' or 'FALSE'"); + } + + private static Double convertArrayNumber(Ptg ptg) { + if (ptg instanceof IntPtg) { + return new Double(((IntPtg)ptg).getValue()); + } + if (ptg instanceof NumberPtg) { + return new Double(((NumberPtg)ptg).getValue()); + } + throw new RuntimeException("Unexpected ptg (" + ptg.getClass().getName() + ")"); + } + + private Ptg parseNumber() { + String number2 = null; + String exponent = null; + String number1 = GetNum(); + + if (look == '.') { + GetChar(); + number2 = GetNum(); + } + + if (look == 'E') { + GetChar(); + + String sign = ""; + if (look == '+') { + GetChar(); + } else if (look == '-') { + GetChar(); + sign = "-"; + } + + String number = GetNum(); + if (number == null) { + throw expected("Integer"); + } + exponent = sign + number; + } + + if (number1 == null && number2 == null) { + throw expected("Integer"); + } + + return getNumberPtgFromString(number1, number2, exponent); + } + + + private int parseErrorLiteral() { + Match('#'); + String part1 = parseUnquotedIdentifier().toUpperCase(); + if (part1 == null) { + throw expected("remainder of error constant literal"); + } + + switch(part1.charAt(0)) { + case 'V': + if(part1.equals("VALUE")) { + Match('!'); + return HSSFErrorConstants.ERROR_VALUE; + } + throw expected("#VALUE!"); + case 'R': + if(part1.equals("REF")) { + Match('!'); + return HSSFErrorConstants.ERROR_REF; + } + throw expected("#REF!"); + case 'D': + if(part1.equals("DIV")) { + Match('/'); + Match('0'); + Match('!'); + return HSSFErrorConstants.ERROR_DIV_0; + } + throw expected("#DIV/0!"); + case 'N': + if(part1.equals("NAME")) { + Match('?'); // only one that ends in '?' + return HSSFErrorConstants.ERROR_NAME; + } + if(part1.equals("NUM")) { + Match('!'); + return HSSFErrorConstants.ERROR_NUM; + } + if(part1.equals("NULL")) { + Match('!'); + return HSSFErrorConstants.ERROR_NULL; + } + if(part1.equals("N")) { + Match('/'); + if(look != 'A' && look != 'a') { + throw expected("#N/A"); + } + Match(look); + // Note - no '!' or '?' suffix + return HSSFErrorConstants.ERROR_NA; + } + throw expected("#NAME?, #NUM!, #NULL! or #N/A"); + + } + throw expected("#VALUE!, #REF!, #DIV/0!, #NAME?, #NUM!, #NULL! or #N/A"); + } + + private String parseUnquotedIdentifier() { + if (look == '\'') { + throw expected("unquoted identifier"); + } + StringBuilder sb = new StringBuilder(); + while (Character.isLetterOrDigit(look) || look == '.') { + sb.append(look); + GetChar(); + } + if (sb.length() < 1) { + return null; + } + + return sb.toString(); + } + + /** + * Get a PTG for an integer from its string representation. + * return Int or Number Ptg based on size of input + */ + private static Ptg getNumberPtgFromString(String number1, String number2, String exponent) { + StringBuffer number = new StringBuffer(); + + if (number2 == null) { + number.append(number1); + + if (exponent != null) { + number.append('E'); + number.append(exponent); + } + + String numberStr = number.toString(); + int intVal; + try { + intVal = Integer.parseInt(numberStr); + } catch (NumberFormatException e) { + return new NumberPtg(numberStr); + } + if (IntPtg.isInRange(intVal)) { + return new IntPtg(intVal); + } + return new NumberPtg(numberStr); + } + + if (number1 != null) { + number.append(number1); + } + + number.append('.'); + number.append(number2); + + if (exponent != null) { + number.append('E'); + number.append(exponent); + } + + return new NumberPtg(number.toString()); + } + + + private String parseStringLiteral() { + Match('"'); + + StringBuffer token = new StringBuffer(); + while (true) { + if (look == '"') { + GetChar(); + if (look != '"') { + break; + } + } + token.append(look); + GetChar(); + } + return token.toString(); + } + + /** Parse and Translate a Math Term */ + private ParseNode Term() { + ParseNode result = powerFactor(); + while(true) { + SkipWhite(); + Ptg operator; + switch(look) { + case '*': + Match('*'); + operator = MultiplyPtg.instance; + break; + case '/': + Match('/'); + operator = DividePtg.instance; + break; + default: + return result; // finished with Term + } + ParseNode other = powerFactor(); + result = new ParseNode(operator, result, other); + } + } + private ParseNode unionExpression() { + ParseNode result = comparisonExpression(); + boolean hasUnions = false; + while (true) { + SkipWhite(); + switch(look) { + case ',': + GetChar(); + hasUnions = true; + ParseNode other = comparisonExpression(); + result = new ParseNode(UnionPtg.instance, result, other); + continue; + } + if (hasUnions) { + return augmentWithMemPtg(result); + } + return result; + } + } + + private ParseNode comparisonExpression() { + ParseNode result = concatExpression(); + while (true) { + SkipWhite(); + switch(look) { + case '=': + case '>': + case '<': + Ptg comparisonToken = getComparisonToken(); + ParseNode other = concatExpression(); + result = new ParseNode(comparisonToken, result, other); + continue; + } + return result; // finished with predicate expression + } + } + + private Ptg getComparisonToken() { + if(look == '=') { + Match(look); + return EqualPtg.instance; + } + boolean isGreater = look == '>'; + Match(look); + if(isGreater) { + if(look == '=') { + Match('='); + return GreaterEqualPtg.instance; + } + return GreaterThanPtg.instance; + } + switch(look) { + case '=': + Match('='); + return LessEqualPtg.instance; + case '>': + Match('>'); + return NotEqualPtg.instance; + } + return LessThanPtg.instance; + } + + + private ParseNode concatExpression() { + ParseNode result = additiveExpression(); + while (true) { + SkipWhite(); + if(look != '&') { + break; // finished with concat expression + } + Match('&'); + ParseNode other = additiveExpression(); + result = new ParseNode(ConcatPtg.instance, result, other); + } + return result; + } + + + /** Parse and Translate an Expression */ + private ParseNode additiveExpression() { + ParseNode result = Term(); + while (true) { + SkipWhite(); + Ptg operator; + switch(look) { + case '+': + Match('+'); + operator = AddPtg.instance; + break; + case '-': + Match('-'); + operator = SubtractPtg.instance; + break; + default: + return result; // finished with additive expression + } + ParseNode other = Term(); + result = new ParseNode(operator, result, other); + } + } + + //{--------------------------------------------------------------} + //{ Parse and Translate an Assignment Statement } + /** procedure Assignment; var Name: string[8]; begin @@ -1112,29 +1493,29 @@ begin Expression; end; - **/ - - - /** - * API call to execute the parsing of the formula - * - */ - private void parse() { - _pointer=0; - GetChar(); - _rootNode = unionExpression(); - - if(_pointer <= _formulaLength) { - String msg = "Unused input [" + _formulaString.substring(_pointer-1) - + "] after attempting to parse the formula [" + _formulaString + "]"; - throw new FormulaParseException(msg); - } - } - - private Ptg[] getRPNPtg(int formulaType) { - OperandClassTransformer oct = new OperandClassTransformer(formulaType); - // RVA is for 'operand class': 'reference', 'value', 'array' - oct.transformFormula(_rootNode); - return ParseNode.toTokenArray(_rootNode); - } + **/ + + + /** + * API call to execute the parsing of the formula + * + */ + private void parse() { + _pointer=0; + GetChar(); + _rootNode = unionExpression(); + + if(_pointer <= _formulaLength) { + String msg = "Unused input [" + _formulaString.substring(_pointer-1) + + "] after attempting to parse the formula [" + _formulaString + "]"; + throw new FormulaParseException(msg); + } + } + + private Ptg[] getRPNPtg(int formulaType) { + OperandClassTransformer oct = new OperandClassTransformer(formulaType); + // RVA is for 'operand class': 'reference', 'value', 'array' + oct.transformFormula(_rootNode); + return ParseNode.toTokenArray(_rootNode); + } } diff --git a/src/java/org/apache/poi/ss/formula/OperandClassTransformer.java b/src/java/org/apache/poi/ss/formula/OperandClassTransformer.java index 6a6e676a1f..620370aa01 100644 --- a/src/java/org/apache/poi/ss/formula/OperandClassTransformer.java +++ b/src/java/org/apache/poi/ss/formula/OperandClassTransformer.java @@ -21,6 +21,7 @@ import org.apache.poi.hssf.record.formula.AbstractFunctionPtg; import org.apache.poi.hssf.record.formula.AttrPtg; import org.apache.poi.hssf.record.formula.ControlPtg; import org.apache.poi.hssf.record.formula.FuncVarPtg; +import org.apache.poi.hssf.record.formula.MemAreaPtg; import org.apache.poi.hssf.record.formula.MemFuncPtg; import org.apache.poi.hssf.record.formula.Ptg; import org.apache.poi.hssf.record.formula.RangePtg; @@ -112,6 +113,7 @@ final class OperandClassTransformer { } if (token instanceof ValueOperatorPtg || token instanceof ControlPtg || token instanceof MemFuncPtg + || token instanceof MemAreaPtg || token instanceof UnionPtg) { // Value Operator Ptgs and Control are base tokens, so token will be unchanged // but any child nodes are processed according to desiredOperandClass and callerForceArrayFlag diff --git a/src/java/org/apache/poi/ss/formula/ParseNode.java b/src/java/org/apache/poi/ss/formula/ParseNode.java index 83e2b08175..40d99444e3 100644 --- a/src/java/org/apache/poi/ss/formula/ParseNode.java +++ b/src/java/org/apache/poi/ss/formula/ParseNode.java @@ -20,6 +20,7 @@ package org.apache.poi.ss.formula; import org.apache.poi.hssf.record.formula.ArrayPtg; import org.apache.poi.hssf.record.formula.AttrPtg; import org.apache.poi.hssf.record.formula.FuncVarPtg; +import org.apache.poi.hssf.record.formula.MemAreaPtg; import org.apache.poi.hssf.record.formula.MemFuncPtg; import org.apache.poi.hssf.record.formula.Ptg; import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry; @@ -39,6 +40,9 @@ final class ParseNode { private final int _tokenCount; public ParseNode(Ptg token, ParseNode[] children) { + if (token == null) { + throw new IllegalArgumentException("token must not be null"); + } _token = token; _children = children; _isIf = isIf(token); @@ -85,7 +89,7 @@ final class ParseNode { collectIfPtgs(temp); return; } - boolean isPreFixOperator = _token instanceof MemFuncPtg; + boolean isPreFixOperator = _token instanceof MemFuncPtg || _token instanceof MemAreaPtg; if (isPreFixOperator) { temp.add(_token); } diff --git a/src/java/org/apache/poi/ss/util/AreaReference.java b/src/java/org/apache/poi/ss/util/AreaReference.java index 30d92ebcf4..c3b383edf6 100644 --- a/src/java/org/apache/poi/ss/util/AreaReference.java +++ b/src/java/org/apache/poi/ss/util/AreaReference.java @@ -158,7 +158,15 @@ public class AreaReference { } return false; } - + + public static AreaReference getWholeRow(String start, String end) { + return new AreaReference("$A" + start + ":$IV" + end); + } + + public static AreaReference getWholeColumn(String start, String end) { + return new AreaReference(start + "$1:" + end + "$65536"); + } + /** * Is the reference for a whole-column reference, * such as C:C or D:G ? diff --git a/src/java/org/apache/poi/ss/util/CellReference.java b/src/java/org/apache/poi/ss/util/CellReference.java index e56bbe86f9..63d80f1c58 100644 --- a/src/java/org/apache/poi/ss/util/CellReference.java +++ b/src/java/org/apache/poi/ss/util/CellReference.java @@ -291,7 +291,7 @@ public class CellReference { return true; } - private static boolean isColumnWithnRange(String colStr) { + public static boolean isColumnWithnRange(String colStr) { int numberOfLetters = colStr.length(); if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) { // "Sheet1" case etc diff --git a/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java b/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java index 21edfa37e2..a68bb042ff 100644 --- a/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java +++ b/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java @@ -23,36 +23,7 @@ import junit.framework.TestCase; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.record.constant.ErrorConstant; import org.apache.poi.hssf.record.formula.AbstractFunctionPtg; -import org.apache.poi.hssf.record.formula.AddPtg; -import org.apache.poi.hssf.record.formula.Area3DPtg; -import org.apache.poi.hssf.record.formula.AreaI; -import org.apache.poi.hssf.record.formula.AreaPtg; -import org.apache.poi.hssf.record.formula.ArrayPtg; -import org.apache.poi.hssf.record.formula.AttrPtg; -import org.apache.poi.hssf.record.formula.BoolPtg; -import org.apache.poi.hssf.record.formula.ConcatPtg; -import org.apache.poi.hssf.record.formula.DividePtg; -import org.apache.poi.hssf.record.formula.EqualPtg; -import org.apache.poi.hssf.record.formula.ErrPtg; -import org.apache.poi.hssf.record.formula.FuncPtg; -import org.apache.poi.hssf.record.formula.FuncVarPtg; -import org.apache.poi.hssf.record.formula.IntPtg; -import org.apache.poi.hssf.record.formula.MemFuncPtg; -import org.apache.poi.hssf.record.formula.MissingArgPtg; -import org.apache.poi.hssf.record.formula.MultiplyPtg; -import org.apache.poi.hssf.record.formula.NamePtg; -import org.apache.poi.hssf.record.formula.NumberPtg; -import org.apache.poi.hssf.record.formula.PercentPtg; -import org.apache.poi.hssf.record.formula.PowerPtg; -import org.apache.poi.hssf.record.formula.Ptg; -import org.apache.poi.hssf.record.formula.RangePtg; -import org.apache.poi.hssf.record.formula.Ref3DPtg; -import org.apache.poi.hssf.record.formula.RefPtg; -import org.apache.poi.hssf.record.formula.StringPtg; -import org.apache.poi.hssf.record.formula.SubtractPtg; -import org.apache.poi.hssf.record.formula.UnaryMinusPtg; -import org.apache.poi.hssf.record.formula.UnaryPlusPtg; -import org.apache.poi.hssf.record.formula.UnionPtg; +import org.apache.poi.hssf.record.formula.*; import org.apache.poi.hssf.usermodel.FormulaExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFErrorConstants; @@ -63,6 +34,7 @@ import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.formula.FormulaParser; import org.apache.poi.ss.formula.FormulaParserTestHelper; +import org.apache.poi.ss.usermodel.BaseTestBugzillaIssues; /** * Test the low level formula parser functionality. High level tests are to @@ -316,31 +288,31 @@ public final class TestFormulaParser extends TestCase { cell.setCellFormula("13E-15/3"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "0.000000000000013/3", formula); + assertEquals("Exponential formula string", "0.000000000000013/3", formula); cell.setCellFormula("-13E-15/3"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "-0.000000000000013/3", formula); + assertEquals("Exponential formula string", "-0.000000000000013/3", formula); cell.setCellFormula("1.3E3/3"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "1300/3", formula); + assertEquals("Exponential formula string", "1300/3", formula); cell.setCellFormula("-1.3E3/3"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "-1300/3", formula); + assertEquals("Exponential formula string", "-1300/3", formula); cell.setCellFormula("1300000000000000/3"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "1300000000000000/3", formula); + assertEquals("Exponential formula string", "1300000000000000/3", formula); cell.setCellFormula("-1300000000000000/3"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "-1300000000000000/3", formula); + assertEquals("Exponential formula string", "-1300000000000000/3", formula); cell.setCellFormula("-10E-1/3.1E2*4E3/3E4"); formula = cell.getCellFormula(); - assertEquals("Exponential formula string", "-1/310*4000/30000", formula); + assertEquals("Exponential formula string", "-1/310*4000/30000", formula); } public void testNumbers() { @@ -371,15 +343,15 @@ public final class TestFormulaParser extends TestCase { cell.setCellFormula("10E1"); formula = cell.getCellFormula(); - assertEquals("100", formula); + assertEquals("100", formula); cell.setCellFormula("10E+1"); formula = cell.getCellFormula(); - assertEquals("100", formula); + assertEquals("100", formula); cell.setCellFormula("10E-1"); formula = cell.getCellFormula(); - assertEquals("1", formula); + assertEquals("1", formula); } public void testRanges() { @@ -985,7 +957,7 @@ public final class TestFormulaParser extends TestCase { confirmTokenClasses(ptgs, new Class[] { Ref3DPtg.class, Ref3DPtg.class, RangePtg.class,}); throw new AssertionFailedError("Identified bug 46643"); } - + Class [] expectedClasses = { MemFuncPtg.class, Ref3DPtg.class, @@ -997,24 +969,200 @@ public final class TestFormulaParser extends TestCase { assertEquals(15, mf.getLenRefSubexpression()); } - /** Named ranges with backslashes, e.g. 'POI\\2009' */ - public void testBackSlashInNames() { - HSSFWorkbook wb = new HSSFWorkbook(); + /** Named ranges with backslashes, e.g. 'POI\\2009' */ + public void testBackSlashInNames() { + HSSFWorkbook wb = new HSSFWorkbook(); + + HSSFName name = wb.createName(); + name.setNameName("POI\\2009"); + name.setRefersToFormula("Sheet1!$A$1"); + + HSSFSheet sheet = wb.createSheet(); + HSSFRow row = sheet.createRow(0); + + HSSFCell cell_C1 = row.createCell(2); + cell_C1.setCellFormula("POI\\2009"); + assertEquals("POI\\2009", cell_C1.getCellFormula()); + + HSSFCell cell_D1 = row.createCell(2); + cell_D1.setCellFormula("NOT(POI\\2009=\"3.5-final\")"); + assertEquals("NOT(POI\\2009=\"3.5-final\")", cell_D1.getCellFormula()); + } + + /** + * TODO - delete equiv test: + * {@link BaseTestBugzillaIssues#test42448()} + */ + public void testParseAbnormalSheetNamesAndRanges_bug42448() { + HSSFWorkbook wb = new HSSFWorkbook(); + wb.createSheet("A"); + try { + HSSFFormulaParser.parse("SUM(A!C7:A!C67)", wb); + } catch (StringIndexOutOfBoundsException e) { + throw new AssertionFailedError("Identified bug 42448"); + } + // the exact example from the bugzilla description: + HSSFFormulaParser.parse("SUMPRODUCT(A!C7:A!C67, B8:B68) / B69", wb); + } + + public void testRangeFuncOperand_bug46951() { + HSSFWorkbook wb = new HSSFWorkbook(); + Ptg[] ptgs; + try { + ptgs = HSSFFormulaParser.parse("SUM(C1:OFFSET(C1,0,B1))", wb); + } catch (RuntimeException e) { + if (e.getMessage().equals("Specified named range 'OFFSET' does not exist in the current workbook.")) { + throw new AssertionFailedError("Identified bug 46951"); + } + throw e; + } + confirmTokenClasses(ptgs, new Class[] { + MemFuncPtg.class, // [len=23] + RefPtg.class, // [C1] + RefPtg.class, // [C1] + IntPtg.class, // [0] + RefPtg.class, // [B1] + FuncVarPtg.class, // [OFFSET nArgs=3] + RangePtg.class, // + AttrPtg.class, // [sum ] + }); + + } + + public void testUnionOfFullCollFullRowRef() { + Ptg[] ptgs; + ptgs = parseFormula("3:4"); + ptgs = parseFormula("$Z:$AC"); + confirmTokenClasses(ptgs, new Class[] { + AreaPtg.class, + }); + ptgs = parseFormula("B:B"); + + ptgs = parseFormula("$11:$13"); + confirmTokenClasses(ptgs, new Class[] { + AreaPtg.class, + }); + + ptgs = parseFormula("$A:$A,$1:$4"); + confirmTokenClasses(ptgs, new Class[] { + MemAreaPtg.class, + AreaPtg.class, + AreaPtg.class, + UnionPtg.class, + }); + + HSSFWorkbook wb = new HSSFWorkbook(); + wb.createSheet("Sheet1"); + ptgs = HSSFFormulaParser.parse("Sheet1!$A:$A,Sheet1!$1:$4", wb); + confirmTokenClasses(ptgs, new Class[] { + MemFuncPtg.class, + Area3DPtg.class, + Area3DPtg.class, + UnionPtg.class, + }); + + ptgs = HSSFFormulaParser.parse("'Sheet1'!$A:$A,'Sheet1'!$1:$4", wb); + confirmTokenClasses(ptgs, new Class[] { + MemFuncPtg.class, + Area3DPtg.class, + Area3DPtg.class, + UnionPtg.class, + }); + } + + + public void testExplicitRangeWithTwoSheetNames() { + HSSFWorkbook wb = new HSSFWorkbook(); + wb.createSheet("Sheet1"); + Ptg[] ptgs = HSSFFormulaParser.parse("Sheet1!F1:Sheet1!G2", wb); + confirmTokenClasses(ptgs, new Class[] { + MemFuncPtg.class, + Ref3DPtg.class, + Ref3DPtg.class, + RangePtg.class, + }); + MemFuncPtg mf; + mf = (MemFuncPtg)ptgs[0]; + assertEquals(15, mf.getLenRefSubexpression()); + + } + + /** + * Checks that the area-ref and explicit range operators get the right associativity + * and that the {@link MemFuncPtg} / {@link MemAreaPtg} is added correctly + */ + public void testComplexExplicitRangeEncodings() { - HSSFName name = wb.createName(); - name.setNameName("POI\\2009"); - name.setRefersToFormula("Sheet1!$A$1"); + Ptg[] ptgs; + ptgs = parseFormula("SUM(OFFSET(A1,0,0):B2:C3:D4:E5:OFFSET(F6,1,1):G7)"); + confirmTokenClasses(ptgs, new Class[] { + // AttrPtg.class, // [volatile ] // POI doesn't do this yet (Apr 2009) + MemFuncPtg.class, // len 57 + RefPtg.class, // [A1] + IntPtg.class, // [0] + IntPtg.class, // [0] + FuncVarPtg.class, // [OFFSET nArgs=3] + AreaPtg.class, // [B2:C3] + RangePtg.class, + AreaPtg.class, // [D4:E5] + RangePtg.class, + RefPtg.class, // [F6] + IntPtg.class, // [1] + IntPtg.class, // [1] + FuncVarPtg.class, // [OFFSET nArgs=3] + RangePtg.class, + RefPtg.class, // [G7] + RangePtg.class, + AttrPtg.class, // [sum ] + }); - HSSFSheet sheet = wb.createSheet(); - HSSFRow row = sheet.createRow(0); + MemFuncPtg mf = (MemFuncPtg)ptgs[0]; + assertEquals(57, mf.getLenRefSubexpression()); + assertEquals("D4:E5", ((AreaPtgBase)ptgs[7]).toFormulaString()); + assertTrue(((AttrPtg)ptgs[16]).isSum()); + + ptgs = parseFormula("SUM(A1:B2:C3:D4)"); + confirmTokenClasses(ptgs, new Class[] { + // AttrPtg.class, // [volatile ] // POI doesn't do this yet (Apr 2009) + MemAreaPtg.class, // len 19 + AreaPtg.class, // [A1:B2] + AreaPtg.class, // [C3:D4] + RangePtg.class, + AttrPtg.class, // [sum ] + }); + MemAreaPtg ma = (MemAreaPtg)ptgs[0]; + assertEquals(19, ma.getLenRefSubexpression()); + } + + + /** + * Mostly confirming that erroneous conditions are detected. Actual error message wording is not critical. + * + */ + public void testEdgeCaseParserErrors() { + HSSFWorkbook wb = new HSSFWorkbook(); + wb.createSheet("Sheet1"); - HSSFCell cell_C1 = row.createCell(2); - cell_C1.setCellFormula("POI\\2009"); - assertEquals("POI\\2009", cell_C1.getCellFormula()); + confirmParseError(wb, "A1:ROUND(B1,1)", "The RHS of the range operator ':' at position 3 is not a proper reference."); - HSSFCell cell_D1 = row.createCell(2); - cell_D1.setCellFormula("NOT(POI\\2009=\"3.5-final\")"); - assertEquals("NOT(POI\\2009=\"3.5-final\")", cell_D1.getCellFormula()); - } + confirmParseError(wb, "Sheet1!Sheet1", "Cell reference expected after sheet name at index 8."); + confirmParseError(wb, "Sheet1!F:Sheet1!G", "'Sheet1!F' is not a proper reference."); + confirmParseError(wb, "Sheet1!F..foobar", "Complete area reference expected after sheet name at index 11."); + confirmParseError(wb, "Sheet1!A .. B", "Dotted range (full row or column) expression 'A .. B' must not contain whitespace."); + confirmParseError(wb, "Sheet1!A...B", "Dotted range (full row or column) expression 'A...B' must have exactly 2 dots."); + confirmParseError(wb, "Sheet1!A foobar", "Second part of cell reference expected after sheet name at index 10."); + confirmParseError(wb, "foobar", "Specified named range 'foobar' does not exist in the current workbook."); + confirmParseError(wb, "A1:1", "The RHS of the range operator ':' at position 3 is not a proper reference."); + } + + private static void confirmParseError(HSSFWorkbook wb, String formula, String expectedMessage) { + + try { + HSSFFormulaParser.parse(formula, wb); + throw new AssertionFailedError("Expected formula parse execption"); + } catch (RuntimeException e) { + FormulaParserTestHelper.confirmParseException(e, expectedMessage); + } + } } -- 2.39.5