12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.ss.formula;
-
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Locale;
- import java.util.regex.Pattern;
-
- import org.apache.poi.ss.SpreadsheetVersion;
- import org.apache.poi.ss.formula.constant.ErrorConstant;
- import org.apache.poi.ss.formula.function.FunctionMetadata;
- import org.apache.poi.ss.formula.function.FunctionMetadataRegistry;
- import org.apache.poi.ss.formula.ptg.AbstractFunctionPtg;
- import org.apache.poi.ss.formula.ptg.AddPtg;
- import org.apache.poi.ss.formula.ptg.Area3DPxg;
- import org.apache.poi.ss.formula.ptg.AreaPtg;
- import org.apache.poi.ss.formula.ptg.ArrayPtg;
- import org.apache.poi.ss.formula.ptg.AttrPtg;
- import org.apache.poi.ss.formula.ptg.BoolPtg;
- import org.apache.poi.ss.formula.ptg.ConcatPtg;
- import org.apache.poi.ss.formula.ptg.DividePtg;
- import org.apache.poi.ss.formula.ptg.EqualPtg;
- import org.apache.poi.ss.formula.ptg.ErrPtg;
- import org.apache.poi.ss.formula.ptg.FuncPtg;
- import org.apache.poi.ss.formula.ptg.FuncVarPtg;
- import org.apache.poi.ss.formula.ptg.GreaterEqualPtg;
- import org.apache.poi.ss.formula.ptg.GreaterThanPtg;
- import org.apache.poi.ss.formula.ptg.IntPtg;
- import org.apache.poi.ss.formula.ptg.IntersectionPtg;
- import org.apache.poi.ss.formula.ptg.LessEqualPtg;
- import org.apache.poi.ss.formula.ptg.LessThanPtg;
- import org.apache.poi.ss.formula.ptg.MemAreaPtg;
- import org.apache.poi.ss.formula.ptg.MemFuncPtg;
- import org.apache.poi.ss.formula.ptg.MissingArgPtg;
- import org.apache.poi.ss.formula.ptg.MultiplyPtg;
- import org.apache.poi.ss.formula.ptg.NamePtg;
- import org.apache.poi.ss.formula.ptg.NameXPtg;
- import org.apache.poi.ss.formula.ptg.NameXPxg;
- import org.apache.poi.ss.formula.ptg.NotEqualPtg;
- import org.apache.poi.ss.formula.ptg.NumberPtg;
- import org.apache.poi.ss.formula.ptg.OperandPtg;
- import org.apache.poi.ss.formula.ptg.OperationPtg;
- import org.apache.poi.ss.formula.ptg.ParenthesisPtg;
- import org.apache.poi.ss.formula.ptg.PercentPtg;
- import org.apache.poi.ss.formula.ptg.PowerPtg;
- import org.apache.poi.ss.formula.ptg.Ptg;
- import org.apache.poi.ss.formula.ptg.RangePtg;
- import org.apache.poi.ss.formula.ptg.RefPtg;
- import org.apache.poi.ss.formula.ptg.StringPtg;
- import org.apache.poi.ss.formula.ptg.SubtractPtg;
- import org.apache.poi.ss.formula.ptg.UnaryMinusPtg;
- import org.apache.poi.ss.formula.ptg.UnaryPlusPtg;
- import org.apache.poi.ss.formula.ptg.UnionPtg;
- import org.apache.poi.ss.formula.ptg.ValueOperatorPtg;
- import org.apache.poi.ss.usermodel.FormulaError;
- import org.apache.poi.ss.usermodel.Name;
- import org.apache.poi.ss.usermodel.Table;
- import org.apache.poi.ss.util.AreaReference;
- import org.apache.poi.ss.util.CellReference;
- import org.apache.poi.ss.util.CellReference.NameType;
- import org.apache.poi.util.Internal;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
-
- /**
- * This class parses a formula string into a List of tokens in RPN order.
- * Inspired by
- * Lets Build a Compiler, by Jack Crenshaw
- * BNF for the formula expression is :
- * <expression> ::= <term> [<addop> <term>]*
- * <term> ::= <factor> [ <mulop> <factor> ]*
- * <factor> ::= <number> | (<expression>) | <cellRef> | <function>
- * <function> ::= <functionName> ([expression [, expression]*])
- * <p/>
- * For POI internal use only
- * <p/>
- */
- @Internal
- public final class FormulaParser {
- private final static POILogger log = POILogFactory.getLogger(FormulaParser.class);
- private final String _formulaString;
- private final int _formulaLength;
- /** points at the next character to be read (after the {@link #look} codepoint) */
- private int _pointer;
-
- private ParseNode _rootNode;
-
- private final static char TAB = '\t'; // HSSF + XSSF
- private final static char CR = '\r'; // Normally just XSSF
- private final static char LF = '\n'; // Normally just XSSF
-
- /**
- * Lookahead unicode codepoint
- * gets value '\0' when the input string is exhausted
- */
- private int look;
-
- /**
- * Tracks whether the run of whitespace preceding "look" could be an
- * intersection operator. See GetChar.
- */
- private boolean _inIntersection;
-
- private final FormulaParsingWorkbook _book;
- private final SpreadsheetVersion _ssVersion;
-
- private final int _sheetIndex;
- private final int _rowIndex; // 0-based
-
-
- /**
- * Create the formula parser, with the string that is to be
- * parsed against the supplied workbook.
- * A later call the parse() method to return ptg list in
- * rpn order, then call the getRPNPtg() to retrieve the
- * parse results.
- * This class is recommended only for single threaded use.
- *
- * If you have a {@link org.apache.poi.hssf.usermodel.HSSFWorkbook}, and not a
- * {@link org.apache.poi.ss.usermodel.Workbook}, then use the convenience method on
- * {@link org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator}
- */
- private FormulaParser(String formula, FormulaParsingWorkbook book, int sheetIndex, int rowIndex) {
- _formulaString = formula;
- _pointer=0;
- _book = book;
- _ssVersion = book == null ? SpreadsheetVersion.EXCEL97 : book.getSpreadsheetVersion();
- _formulaLength = _formulaString.length();
- _sheetIndex = sheetIndex;
- _rowIndex = rowIndex;
- }
-
- /**
- * Parse a formula into an array of tokens
- * Side effect: creates name ({@link org.apache.poi.ss.usermodel.Workbook#createName})
- * if formula contains unrecognized names (names are likely UDFs)
- *
- * @param formula the formula to parse
- * @param workbook the parent workbook
- * @param formulaType the type of the formula
- * @param sheetIndex the 0-based index of the sheet this formula belongs to.
- * The sheet index is required to resolve sheet-level names. <code>-1</code> means that
- * the scope of the name will be ignored and the parser will match names only by name
- * @param rowIndex - the related cell's row index in 0-based form (-1 if the formula is not cell related)
- * used to handle structured references that have the "#This Row" quantifier.
- * Use rowIndex=-1 or {@link #parseStructuredReference(String, FormulaParsingWorkbook, int)} if formula
- * does not contain structured references.
- *
- * @return array of parsed tokens
- * @throws FormulaParseException if the formula has incorrect syntax or is otherwise invalid
- */
- public static Ptg[] parse(String formula, FormulaParsingWorkbook workbook, FormulaType formulaType, int sheetIndex, int rowIndex) {
- FormulaParser fp = new FormulaParser(formula, workbook, sheetIndex, rowIndex);
- fp.parse();
- return fp.getRPNPtg(formulaType);
- }
-
- /**
- * Parse a formula into an array of tokens
- * Side effect: creates name ({@link org.apache.poi.ss.usermodel.Workbook#createName})
- * if formula contains unrecognized names (names are likely UDFs)
- *
- * @param formula the formula to parse
- * @param workbook the parent workbook
- * @param formulaType the type of the formula
- * @param sheetIndex the 0-based index of the sheet this formula belongs to.
- * The sheet index is required to resolve sheet-level names. <code>-1</code> means that
- * the scope of the name will be ignored and the parser will match names only by name
- *
- * @return array of parsed tokens
- * @throws FormulaParseException if the formula has incorrect syntax or is otherwise invalid
- */
- public static Ptg[] parse(String formula, FormulaParsingWorkbook workbook, FormulaType formulaType, int sheetIndex) {
- return parse(formula, workbook, formulaType, sheetIndex, -1);
- }
-
- /**
- * Parse a structured reference. Converts the structured
- * reference to the area that represent it.
- *
- * @param tableText - The structured reference text
- * @param workbook - the parent workbook
- * @param rowIndex - the 0-based cell's row index ( used to handle "#This Row" quantifiers )
- * @return the area that being represented by the structured reference.
- */
- public static Area3DPxg parseStructuredReference(String tableText, FormulaParsingWorkbook workbook, int rowIndex) {
- final int sheetIndex = -1; //don't care?
- Ptg[] arr = FormulaParser.parse(tableText, workbook, FormulaType.CELL, sheetIndex, rowIndex);
- if (arr.length != 1 || !(arr[0] instanceof Area3DPxg) ) {
- throw new IllegalStateException("Illegal structured reference");
- }
- return (Area3DPxg) arr[0];
- }
-
- /** Read New Character From Input Stream */
- private void GetChar() {
- // The intersection operator is a space. We track whether the run of
- // whitespace preceeding "look" counts as an intersection operator.
- if (IsWhite(look)) {
- if (look == ' ') {
- _inIntersection = true;
- }
- }
- else {
- _inIntersection = false;
- }
-
- // Check to see if we've walked off the end of the string.
- if (_pointer > _formulaLength) {
- throw new RuntimeException("too far");
- }
- if (_pointer < _formulaLength) {
- look=_formulaString.codePointAt(_pointer);
- } else {
- // Just return if so and reset 'look' to something to keep
- // SkipWhitespace from spinning
- look = (char)0;
- _inIntersection = false;
- }
- _pointer += Character.charCount(look);
- //System.out.println(new StringBuilder("Got char: ").appendCodePoint(look)).toString();
- }
- private void resetPointer(int ptr) {
- _pointer = ptr;
- if (_pointer <= _formulaLength) {
- look=_formulaString.codePointAt(_pointer - Character.charCount(look));
- } else {
- // Just return if so and reset 'look' to something to keep
- // SkipWhitespace from spinning
- look = (char)0;
- }
- }
-
- /** Report What Was Expected */
- private RuntimeException expected(String s) {
- String msg;
-
- if (look == '=' && _formulaString.substring(0, _pointer-1).trim().length() < 1) {
- msg = "The specified formula '" + _formulaString
- + "' starts with an equals sign which is not allowed.";
- } else {
- msg = new StringBuilder("Parse error near char ")
- .append(_pointer-1) //this is the codepoint index, not char index, which may be larger if there are multi-byte chars
- .append(" '")
- .appendCodePoint(look)
- .append("'")
- .append(" in specified formula '")
- .append(_formulaString)
- .append("'. Expected ")
- .append(s)
- .toString();
- }
- return new FormulaParseException(msg);
- }
-
- /** Recognize an Alpha Character */
- private static boolean IsAlpha(int c) {
- return Character.isLetter(c) || c == '$' || c=='_';
- }
-
- /** Recognize a Decimal Digit */
- private static boolean IsDigit(int c) {
- return Character.isDigit(c);
- }
-
- /** Recognize White Space */
- private static boolean IsWhite(int c) {
- return c ==' ' || c== TAB || c == CR || c == LF;
- }
-
- /** Skip Over Leading White Space */
- private void SkipWhite() {
- while (IsWhite(look)) {
- GetChar();
- }
- }
-
- /**
- * Consumes the next input character if it is equal to the one specified otherwise throws an
- * unchecked exception. This method does <b>not</b> consume whitespace (before or after the
- * matched character).
- */
- private void Match(int x) {
- if (look != x) {
- throw expected(new StringBuilder()
- .append("'")
- .appendCodePoint(x)
- .append("'")
- .toString());
- }
- GetChar();
- }
-
- /** Get a Number */
- private String GetNum() {
- StringBuilder value = new StringBuilder();
-
- while (IsDigit(this.look)){
- value.appendCodePoint(this.look);
- GetChar();
- }
- return value.length() == 0 ? null : value.toString();
- }
-
- private ParseNode parseRangeExpression() {
- ParseNode result = parseRangeable();
- boolean hasRange = false;
- while (look == ':') {
- int pos = _pointer;
- GetChar();
- ParseNode nextPart = parseRangeable();
- // Note - no range simplification here. An expr like "A1:B2:C3:D4:E5" should be
- // grouped into area ref pairs like: "(A1:B2):(C3:D4):E5"
- // Furthermore, Excel doesn't seem to simplify
- // expressions like "Sheet1!A1:Sheet1:B2" into "Sheet1!A1:B2"
-
- checkValidRangeOperand("LHS", pos, result);
- checkValidRangeOperand("RHS", pos, nextPart);
-
- ParseNode[] children = { result, nextPart, };
- result = new ParseNode(RangePtg.instance, children);
- hasRange = true;
- }
- if (hasRange) {
- return augmentWithMemPtg(result);
- }
- return result;
- }
-
- private static ParseNode augmentWithMemPtg(ParseNode root) {
- Ptg memPtg;
- if (needsMemFunc(root)) {
- memPtg = new MemFuncPtg(root.getEncodedSize());
- } else {
- memPtg = new MemAreaPtg(root.getEncodedSize());
- }
- return new ParseNode(memPtg, root);
- }
- /**
- * From OOO doc: "Whenever one operand of the reference subexpression is a function,
- * a defined name, a 3D reference, or an external reference (and no error occurs),
- * a tMemFunc token is used"
- *
- */
- private static boolean needsMemFunc(ParseNode root) {
- Ptg token = root.getToken();
- if (token instanceof AbstractFunctionPtg) {
- return true;
- }
- if (token instanceof ExternSheetReferenceToken) { // 3D refs
- return true;
- }
- if (token instanceof NamePtg || token instanceof NameXPtg) { // 3D refs
- return true;
- }
-
- if (token instanceof OperationPtg || token instanceof ParenthesisPtg) {
- // expect RangePtg, but perhaps also UnionPtg, IntersectionPtg etc
- for(ParseNode child : root.getChildren()) {
- if (needsMemFunc(child)) {
- return true;
- }
- }
- return false;
- }
- if (token instanceof OperandPtg) {
- return false;
- }
- if (token instanceof OperationPtg) {
- return true;
- }
-
- return false;
- }
-
- /**
- * @param currentParsePosition used to format a potential error message
- */
- private static void checkValidRangeOperand(String sideName, int currentParsePosition, ParseNode pn) {
- if (!isValidRangeOperand(pn)) {
- throw new FormulaParseException("The " + sideName
- + " of the range operator ':' at position "
- + currentParsePosition + " is not a proper reference.");
- }
- }
-
- /**
- * @return <code>false</code> if sub-expression represented the specified ParseNode definitely
- * cannot appear on either side of the range (':') operator
- */
- private static boolean isValidRangeOperand(ParseNode a) {
- Ptg tkn = a.getToken();
- // Note - order is important for these instance-of checks
- if (tkn instanceof OperandPtg) {
- // notably cell refs and area refs
- return true;
- }
-
- // next 2 are special cases of OperationPtg
- if (tkn instanceof AbstractFunctionPtg) {
- AbstractFunctionPtg afp = (AbstractFunctionPtg) tkn;
- byte returnClass = afp.getDefaultOperandClass();
- return Ptg.CLASS_REF == returnClass;
- }
- if (tkn instanceof ValueOperatorPtg) {
- return false;
- }
- if (tkn instanceof OperationPtg) {
- return true;
- }
-
- // one special case of ControlPtg
- if (tkn instanceof ParenthesisPtg) {
- // parenthesis Ptg should have only one child
- return isValidRangeOperand(a.getChildren()[0]);
- }
-
- // one special case of ScalarConstantPtg
- if (tkn == ErrPtg.REF_INVALID) {
- return true;
- }
-
- // All other ControlPtgs and ScalarConstantPtgs cannot be used with ':'
- return false;
- }
-
- /**
- * Parses area refs (things which could be the operand of ':') and simple factors
- * Examples
- * <pre>
- * A$1
- * $A$1 : $B1
- * A1 ....... C2
- * Sheet1 !$A1
- * a..b!A1
- * 'my sheet'!A1
- * .my.sheet!A1
- * 'my sheet':'my alt sheet'!A1
- * .my.sheet1:.my.sheet2!$B$2
- * my.named..range.
- * 'my sheet'!my.named.range
- * .my.sheet!my.named.range
- * foo.bar(123.456, "abc")
- * 123.456
- * "abc"
- * true
- * [Foo.xls]!$A$1
- * [Foo.xls]'my sheet'!$A$1
- * [Foo.xls]!my.named.range
- * </pre>
- *
- */
- private ParseNode parseRangeable() {
- SkipWhite();
- int savePointer = _pointer;
- SheetIdentifier sheetIden = parseSheetName();
-
- if (sheetIden == null) {
- resetPointer(savePointer);
- } else {
- SkipWhite();
- savePointer = _pointer;
- }
-
- SimpleRangePart part1 = parseSimpleRangePart();
- if (part1 == null) {
- if (sheetIden != null) {
- if(look == '#'){ // error ref like MySheet!#REF!
- return new ParseNode(ErrPtg.valueOf(parseErrorLiteral()));
- } else {
- // Is it a named range?
- String name = parseAsName();
- if (name.length() == 0) {
- throw new FormulaParseException("Cell reference or Named Range "
- + "expected after sheet name at index " + _pointer + ".");
- }
- Ptg nameXPtg = _book.getNameXPtg(name, sheetIden);
- if (nameXPtg == null) {
- throw new FormulaParseException("Specified name '" + name +
- "' for sheet " + sheetIden.asFormulaString() + " not found");
- }
- return new ParseNode(nameXPtg);
- }
- }
- return parseNonRange(savePointer);
- }
- boolean whiteAfterPart1 = IsWhite(look);
- if (whiteAfterPart1) {
- SkipWhite();
- }
-
- if (look == ':') {
- int colonPos = _pointer;
- GetChar();
- SkipWhite();
- SimpleRangePart part2 = parseSimpleRangePart();
- if (part2 != null && !part1.isCompatibleForArea(part2)) {
- // second part is not compatible with an area ref e.g. S!A1:S!B2
- // where S might be a sheet name (that looks like a column name)
-
- part2 = null;
- }
- if (part2 == null) {
- // second part is not compatible with an area ref e.g. A1:OFFSET(B2, 1, 2)
- // reset and let caller use explicit range operator
- resetPointer(colonPos);
- if (!part1.isCell()) {
- String prefix = "";
- if (sheetIden != null) {
- prefix = "'" + sheetIden.getSheetIdentifier().getName() + '!';
- }
- throw new FormulaParseException(prefix + part1.getRep() + "' is not a proper reference.");
- }
- }
- return createAreaRefParseNode(sheetIden, part1, part2);
- }
-
- if (look == '.') {
- GetChar();
- int dotCount = 1;
- while (look =='.') {
- dotCount ++;
- GetChar();
- }
- boolean whiteBeforePart2 = IsWhite(look);
-
- SkipWhite();
- SimpleRangePart part2 = parseSimpleRangePart();
- String part1And2 = _formulaString.substring(savePointer-1, _pointer-1);
- if (part2 == null) {
- if (sheetIden != null) {
- throw new FormulaParseException("Complete area reference expected after sheet name at index "
- + _pointer + ".");
- }
- return parseNonRange(savePointer);
- }
-
-
- if (whiteAfterPart1 || whiteBeforePart2) {
- if (part1.isRowOrColumn() || part2.isRowOrColumn()) {
- // "A .. B" not valid syntax for "A:B"
- // and there's no other valid expression that fits this grammar
- throw new FormulaParseException("Dotted range (full row or column) expression '"
- + part1And2 + "' must not contain whitespace.");
- }
- return createAreaRefParseNode(sheetIden, part1, part2);
- }
-
- if (dotCount == 1 && part1.isRow() && part2.isRow()) {
- // actually, this is looking more like a number
- return parseNonRange(savePointer);
- }
-
- if (part1.isRowOrColumn() || part2.isRowOrColumn()) {
- if (dotCount != 2) {
- throw new FormulaParseException("Dotted range (full row or column) expression '" + part1And2
- + "' must have exactly 2 dots.");
- }
- }
- return createAreaRefParseNode(sheetIden, part1, part2);
- }
- if (part1.isCell() && isValidCellReference(part1.getRep())) {
- return createAreaRefParseNode(sheetIden, part1, null);
- }
- if (sheetIden != null) {
- throw new FormulaParseException("Second part of cell reference expected after sheet name at index "
- + _pointer + ".");
- }
-
- return parseNonRange(savePointer);
- }
-
-
-
- private final static String specHeaders = "Headers";
- private final static String specAll = "All";
- private final static String specData = "Data";
- private final static String specTotals = "Totals";
- private final static String specThisRow = "This Row";
-
- /**
- * Parses a structured reference, returns it as area reference.
- * Examples:
- * <pre>
- * Table1[col]
- * Table1[[#Totals],[col]]
- * Table1[#Totals]
- * Table1[#All]
- * Table1[#Data]
- * Table1[#Headers]
- * Table1[#Totals]
- * Table1[#This Row]
- * Table1[[#All],[col]]
- * Table1[[#Headers],[col]]
- * Table1[[#Totals],[col]]
- * Table1[[#All],[col1]:[col2]]
- * Table1[[#Data],[col1]:[col2]]
- * Table1[[#Headers],[col1]:[col2]]
- * Table1[[#Totals],[col1]:[col2]]
- * Table1[[#Headers],[#Data],[col2]]
- * Table1[[#This Row], [col1]]
- * Table1[ [col1]:[col2] ]
- * </pre>
- * @param tableName
- * @return Area Reference for the given table
- */
- private ParseNode parseStructuredReference(String tableName) {
-
- if ( ! (_ssVersion.equals(SpreadsheetVersion.EXCEL2007)) ) {
- throw new FormulaParseException("Structured references work only on XSSF (Excel 2007+)!");
- }
- Table tbl = _book.getTable(tableName);
- if (tbl == null) {
- throw new FormulaParseException("Illegal table name: '" + tableName + "'");
- }
- String sheetName = tbl.getSheetName();
-
- int startCol = tbl.getStartColIndex();
- int endCol = tbl.getEndColIndex();
- int startRow = tbl.getStartRowIndex();
- int endRow = tbl.getEndRowIndex();
-
- // Do NOT return before done reading all the structured reference tokens from the input stream.
- // Throwing exceptions is okay.
- int savePtr0 = _pointer;
- GetChar();
-
- boolean isTotalsSpec = false;
- boolean isThisRowSpec = false;
- boolean isDataSpec = false;
- boolean isHeadersSpec = false;
- boolean isAllSpec = false;
- int nSpecQuantifiers = 0; // The number of special quantifiers
- while (true) {
- int savePtr1 = _pointer;
- String specName = parseAsSpecialQuantifier();
- if (specName == null) {
- resetPointer(savePtr1);
- break;
- }
- if (specName.equals(specAll)) {
- isAllSpec = true;
- } else if (specName.equals(specData)) {
- isDataSpec = true;
- } else if (specName.equals(specHeaders)) {
- isHeadersSpec = true;
- } else if (specName.equals(specThisRow)) {
- isThisRowSpec = true;
- } else if (specName.equals(specTotals)) {
- isTotalsSpec = true;
- } else {
- throw new FormulaParseException("Unknown special quantifier "+ specName);
- }
- nSpecQuantifiers++;
- if (look == ','){
- GetChar();
- } else {
- break;
- }
- }
- boolean isThisRow = false;
- SkipWhite();
- if (look == '@') {
- isThisRow = true;
- GetChar();
- }
- // parse column quantifier
- String startColumnName;
- String endColumnName = null;
- int nColQuantifiers = 0;
- int savePtr1 = _pointer;
- startColumnName = parseAsColumnQuantifier();
- if (startColumnName == null) {
- resetPointer(savePtr1);
- } else {
- nColQuantifiers++;
- if (look == ','){
- throw new FormulaParseException("The formula "+ _formulaString + "is illegal: you should not use ',' with column quantifiers");
- } else if (look == ':') {
- GetChar();
- endColumnName = parseAsColumnQuantifier();
- nColQuantifiers++;
- if (endColumnName == null) {
- throw new FormulaParseException("The formula "+ _formulaString + "is illegal: the string after ':' must be column quantifier");
- }
- }
- }
-
- if(nColQuantifiers == 0 && nSpecQuantifiers == 0){
- resetPointer(savePtr0);
- savePtr0 = _pointer;
- startColumnName = parseAsColumnQuantifier();
- if (startColumnName != null) {
- nColQuantifiers++;
- } else {
- resetPointer(savePtr0);
- String name = parseAsSpecialQuantifier();
- if (name!=null) {
- if (name.equals(specAll)) {
- isAllSpec = true;
- } else if (name.equals(specData)) {
- isDataSpec = true;
- } else if (name.equals(specHeaders)) {
- isHeadersSpec = true;
- } else if (name.equals(specThisRow)) {
- isThisRowSpec = true;
- } else if (name.equals(specTotals)) {
- isTotalsSpec = true;
- } else {
- throw new FormulaParseException("Unknown special quantifier "+ name);
- }
- nSpecQuantifiers++;
- } else {
- throw new FormulaParseException("The formula "+ _formulaString + " is illegal");
- }
- }
- } else {
- Match(']');
- }
- // Done reading from input stream
- // Ok to return now
-
- if (isTotalsSpec && !tbl.isHasTotalsRow()) {
- return new ParseNode(ErrPtg.REF_INVALID);
- }
- if ((isThisRow || isThisRowSpec) && (_rowIndex < startRow || endRow < _rowIndex)) {
- // structured reference is trying to reference a row above or below the table with [#This Row] or [@]
- if (_rowIndex >= 0) {
- return new ParseNode(ErrPtg.VALUE_INVALID);
- } else {
- throw new FormulaParseException(
- "Formula contained [#This Row] or [@] structured reference but this row < 0. " +
- "Row index must be specified for row-referencing structured references.");
- }
- }
-
- int actualStartRow = startRow;
- int actualEndRow = endRow;
- int actualStartCol = startCol;
- int actualEndCol = endCol;
- if (nSpecQuantifiers > 0) {
- //Selecting rows
- if (nSpecQuantifiers == 1 && isAllSpec) {
- //do nothing
- } else if (isDataSpec && isHeadersSpec) {
- if (tbl.isHasTotalsRow()) {
- actualEndRow = endRow - 1;
- }
- } else if (isDataSpec && isTotalsSpec) {
- actualStartRow = startRow + 1;
- } else if (nSpecQuantifiers == 1 && isDataSpec) {
- actualStartRow = startRow + 1;
- if (tbl.isHasTotalsRow()) {
- actualEndRow = endRow - 1;
- }
- } else if (nSpecQuantifiers == 1 && isHeadersSpec) {
- actualEndRow = actualStartRow;
- } else if (nSpecQuantifiers == 1 && isTotalsSpec) {
- actualStartRow = actualEndRow;
- } else if ((nSpecQuantifiers == 1 && isThisRowSpec) || isThisRow) {
- actualStartRow = _rowIndex; //The rowNum is 0 based
- actualEndRow = _rowIndex;
- } else {
- throw new FormulaParseException("The formula "+ _formulaString + " is illegal");
- }
- } else {
- if (isThisRow) { // there is a @
- actualStartRow = _rowIndex; //The rowNum is 0 based
- actualEndRow = _rowIndex;
- } else { // Really no special quantifiers
- actualStartRow++;
- }
- }
-
- //Selecting cols
-
- if (nColQuantifiers == 2) {
- if (startColumnName == null || endColumnName == null) {
- throw new IllegalStateException("Fatal error");
- }
- int startIdx = tbl.findColumnIndex(startColumnName);
- int endIdx = tbl.findColumnIndex(endColumnName);
- if (startIdx == -1 || endIdx == -1) {
- throw new FormulaParseException("One of the columns "+ startColumnName +", "+ endColumnName +" doesn't exist in table "+ tbl.getName());
- }
- actualStartCol = startCol+ startIdx;
- actualEndCol = startCol + endIdx;
-
- } else if (nColQuantifiers == 1 && !isThisRow) {
- if (startColumnName == null) {
- throw new IllegalStateException("Fatal error");
- }
- int idx = tbl.findColumnIndex(startColumnName);
- if (idx == -1) {
- throw new FormulaParseException("The column "+ startColumnName + " doesn't exist in table "+ tbl.getName());
- }
- actualStartCol = startCol + idx;
- actualEndCol = actualStartCol;
- }
- CellReference topLeft = new CellReference(actualStartRow, actualStartCol);
- CellReference bottomRight = new CellReference(actualEndRow, actualEndCol);
- SheetIdentifier sheetIden = new SheetIdentifier( null, new NameIdentifier(sheetName, true));
- Ptg ptg = _book.get3DReferencePtg(new AreaReference(topLeft, bottomRight), sheetIden);
- return new ParseNode(ptg);
- }
-
- /**
- * Tries to parse the next as column - can contain whitespace
- * Caller should save pointer.
- */
- private String parseAsColumnQuantifier() {
- if ( look != '[') {
- return null;
- }
- GetChar();
- if (look == '#') {
- return null;
- }
- if (look == '@') {
- GetChar();
- }
- StringBuilder name = new StringBuilder();
- while (look!=']') {
- name.appendCodePoint(look);
- GetChar();
- }
- Match(']');
- return name.toString();
- }
- /**
- * Tries to parse the next as special quantifier
- * Caller should save pointer.
- */
- private String parseAsSpecialQuantifier(){
- if ( look != '[') {
- return null;
- }
- GetChar();
- if( look != '#') {
- return null;
- }
- GetChar();
- String name = parseAsName();
- if ( name.equals("This")) {
- name = name + ' ' + parseAsName();
- }
- Match(']');
- return name;
- }
-
-
- /**
- * Parses simple factors that are not primitive ranges or range components
- * i.e. '!', ':'(and equiv '...') do not appear
- * Examples
- * <pre>
- * my.named...range.
- * foo.bar(123.456, "abc")
- * 123.456
- * "abc"
- * true
- * </pre>
- */
- private ParseNode parseNonRange(int savePointer) {
- resetPointer(savePointer);
-
- if (Character.isDigit(look)) {
- return new ParseNode(parseNumber());
- }
- if (look == '"') {
- return new ParseNode(new StringPtg(parseStringLiteral()));
- }
-
- // from now on we can only be dealing with non-quoted identifiers
- // which will either be named ranges or functions
- String name = parseAsName();
-
- if (look == '(') {
- return function(name);
- }
- if(look == '['){
- return parseStructuredReference(name);
- }
- if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) {
- return new ParseNode(BoolPtg.valueOf(name.equalsIgnoreCase("TRUE")));
- }
- if (_book == null) {
- // Only test cases omit the book (expecting it not to be needed)
- throw new IllegalStateException("Need book to evaluate name '" + name + "'");
- }
- EvaluationName evalName = _book.getName(name, _sheetIndex);
- if (evalName == null) {
- throw new FormulaParseException("Specified named range '"
- + name + "' does not exist in the current workbook.");
- }
- if (evalName.isRange()) {
- return new ParseNode(evalName.createPtg());
- }
- // TODO - what about NameX ?
- throw new FormulaParseException("Specified name '"
- + name + "' is not a range as expected.");
- }
-
- private String parseAsName() {
- StringBuilder sb = new StringBuilder();
-
- // defined names may begin with a letter or underscore or backslash
- if (!Character.isLetter(look) && look != '_' && look != '\\') {
- throw expected("number, string, defined name, or data table");
- }
- while (isValidDefinedNameChar(look)) {
- sb.appendCodePoint(look);
- GetChar();
- }
- SkipWhite();
-
- return sb.toString();
- }
-
- /**
- * @param ch unicode codepoint
- * @return <code>true</code> if the specified character may be used in a defined name
- */
- private static boolean isValidDefinedNameChar(int ch) {
- if (Character.isLetterOrDigit(ch)) {
- return true;
- }
- // the sheet naming rules are vague on whether unicode characters are allowed
- // assume they're allowed.
- if (ch > 128) {
- return true;
- }
- switch (ch) {
- case '.':
- case '_':
- case '?':
- case '\\': // of all things
- return true;
- }
- // includes special non-name control characters like ! $ : , ( ) [ ] and space
- return false;
- }
-
- /**
- *
- * @param sheetIden may be <code>null</code>
- * @param part1
- * @param part2 may be <code>null</code>
- */
- private ParseNode createAreaRefParseNode(SheetIdentifier sheetIden, SimpleRangePart part1,
- SimpleRangePart part2) throws FormulaParseException {
- Ptg ptg;
- if (part2 == null) {
- CellReference cr = part1.getCellReference();
- if (sheetIden == null) {
- ptg = new RefPtg(cr);
- } else {
- ptg = _book.get3DReferencePtg(cr, sheetIden);
- }
- } else {
- AreaReference areaRef = createAreaRef(part1, part2);
-
- if (sheetIden == null) {
- ptg = new AreaPtg(areaRef);
- } else {
- ptg = _book.get3DReferencePtg(areaRef, sheetIden);
- }
- }
- return new ParseNode(ptg);
- }
-
- private AreaReference createAreaRef(SimpleRangePart part1, SimpleRangePart part2) {
- if (!part1.isCompatibleForArea(part2)) {
- throw new FormulaParseException("has incompatible parts: '"
- + part1.getRep() + "' and '" + part2.getRep() + "'.");
- }
- if (part1.isRow()) {
- return AreaReference.getWholeRow(_ssVersion, part1.getRep(), part2.getRep());
- }
- if (part1.isColumn()) {
- return AreaReference.getWholeColumn(_ssVersion, part1.getRep(), part2.getRep());
- }
- return new AreaReference(part1.getCellReference(), part2.getCellReference());
- }
-
- /**
- * Matches a zero or one letter-runs followed by zero or one digit-runs.
- * Either or both runs man optionally be prefixed with a single '$'.
- * (copied+modified from {@link org.apache.poi.ss.util.CellReference#CELL_REF_PATTERN})
- */
- private static final Pattern CELL_REF_PATTERN = Pattern.compile("(\\$?[A-Za-z]+)?(\\$?[0-9]+)?");
-
- /**
- * Parses out a potential LHS or RHS of a ':' intended to produce a plain AreaRef. Normally these are
- * proper cell references but they could also be row or column refs like "$AC" or "10"
- * @return <code>null</code> (and leaves {@link #_pointer} unchanged if a proper range part does not parse out
- */
- private SimpleRangePart parseSimpleRangePart() {
- int ptr = _pointer-1; // TODO avoid StringIndexOutOfBounds
- boolean hasDigits = false;
- boolean hasLetters = false;
- while (ptr < _formulaLength) {
- char ch = _formulaString.charAt(ptr);
- if (Character.isDigit(ch)) {
- hasDigits = true;
- } else if (Character.isLetter(ch)) {
- hasLetters = true;
- } else if (ch =='$' || ch =='_') {
- //
- } else {
- break;
- }
- ptr++;
- }
- if (ptr <= _pointer-1) {
- return null;
- }
- String rep = _formulaString.substring(_pointer-1, ptr);
- if (!CELL_REF_PATTERN.matcher(rep).matches()) {
- return null;
- }
- // Check range bounds against grid max
- if (hasLetters && hasDigits) {
- if (!isValidCellReference(rep)) {
- return null;
- }
- } else if (hasLetters) {
- if (!CellReference.isColumnWithinRange(rep.replace("$", ""), _ssVersion)) {
- return null;
- }
- } else if (hasDigits) {
- int i;
- try {
- i = Integer.parseInt(rep.replace("$", ""));
- } catch (NumberFormatException e) {
- return null;
- }
- if (i<1 || i>_ssVersion.getMaxRows()) {
- return null;
- }
- } else {
- // just dollars ? can this happen?
- return null;
- }
-
-
- resetPointer(ptr+1); // stepping forward
- return new SimpleRangePart(rep, hasLetters, hasDigits);
- }
-
-
- /**
- * A1, $A1, A$1, $A$1, A, 1
- */
- private static final class SimpleRangePart {
- private enum Type {
- CELL, ROW, COLUMN;
-
- public static Type get(boolean hasLetters, boolean hasDigits) {
- if (hasLetters) {
- return hasDigits ? CELL : COLUMN;
- }
- if (!hasDigits) {
- throw new IllegalArgumentException("must have either letters or numbers");
- }
- return ROW;
- }
- }
-
- private final Type _type;
- private final String _rep;
-
- public SimpleRangePart(String rep, boolean hasLetters, boolean hasNumbers) {
- _rep = rep;
- _type = Type.get(hasLetters, hasNumbers);
- }
-
- public boolean isCell() {
- return _type == Type.CELL;
- }
-
- public boolean isRowOrColumn() {
- return _type != Type.CELL;
- }
-
- public CellReference getCellReference() {
- if (_type != Type.CELL) {
- throw new IllegalStateException("Not applicable to this type");
- }
- return new CellReference(_rep);
- }
-
- public boolean isColumn() {
- return _type == Type.COLUMN;
- }
-
- public boolean isRow() {
- return _type == Type.ROW;
- }
-
- public String getRep() {
- return _rep;
- }
-
- /**
- * @return <code>true</code> if the two range parts can be combined in an
- * {@link AreaPtg} ( Note - the explicit range operator (:) may still be valid
- * when this method returns <code>false</code> )
- */
- public boolean isCompatibleForArea(SimpleRangePart part2) {
- return _type == part2._type;
- }
-
- @Override
- public String toString() {
- return getClass().getName() + " [" + _rep + "]";
- }
- }
-
- private String getBookName() {
- StringBuilder sb = new StringBuilder();
- GetChar();
- while (look != ']') {
- sb.appendCodePoint(look);
- GetChar();
- }
- GetChar();
- return sb.toString();
- }
-
- /**
- * Note - caller should reset {@link #_pointer} upon <code>null</code> result
- * @return The sheet name as an identifier <code>null</code> if '!' is not found in the right place
- */
- private SheetIdentifier parseSheetName() {
- String bookName;
- if (look == '[') {
- bookName = getBookName();
- } else {
- bookName = null;
- }
-
- if (look == '\'') {
- Match('\'');
-
- if (look == '[')
- bookName = getBookName();
-
- StringBuilder sb = new StringBuilder();
- boolean done = look == '\'';
- while(!done) {
- sb.appendCodePoint(look);
- GetChar();
- if(look == '\'')
- {
- Match('\'');
- done = look != '\'';
- }
- }
-
- NameIdentifier iden = new NameIdentifier(sb.toString(), true);
- // quoted identifier - can't concatenate anything more
- SkipWhite();
- if (look == '!') {
- GetChar();
- return new SheetIdentifier(bookName, iden);
- }
- // See if it's a multi-sheet range, eg Sheet1:Sheet3!A1
- if (look == ':') {
- return parseSheetRange(bookName, iden);
- }
- return null;
- }
-
- // unquoted sheet names must start with underscore or a letter
- if (look =='_' || Character.isLetter(look)) {
- StringBuilder sb = new StringBuilder();
- // can concatenate idens with dots
- while (isUnquotedSheetNameChar(look)) {
- sb.appendCodePoint(look);
- GetChar();
- }
- NameIdentifier iden = new NameIdentifier(sb.toString(), false);
- SkipWhite();
- if (look == '!') {
- GetChar();
- return new SheetIdentifier(bookName, iden);
- }
- // See if it's a multi-sheet range, eg Sheet1:Sheet3!A1
- if (look == ':') {
- return parseSheetRange(bookName, iden);
- }
- return null;
- }
- if (look == '!' && bookName != null) {
- // Raw book reference, without a sheet
- GetChar();
- return new SheetIdentifier(bookName, null);
- }
- return null;
- }
-
- /**
- * If we have something that looks like [book]Sheet1: or
- * Sheet1, see if it's actually a range eg Sheet1:Sheet2!
- */
- private SheetIdentifier parseSheetRange(String bookname, NameIdentifier sheet1Name) {
- GetChar();
- SheetIdentifier sheet2 = parseSheetName();
- if (sheet2 != null) {
- return new SheetRangeIdentifier(bookname, sheet1Name, sheet2.getSheetIdentifier());
- }
- return null;
- }
-
- /**
- * very similar to {@link SheetNameFormatter#isSpecialChar(char)}
- * @param ch unicode codepoint
- */
- private static boolean isUnquotedSheetNameChar(int ch) {
- if(Character.isLetterOrDigit(ch)) {
- return true;
- }
- // the sheet naming rules are vague on whether unicode characters are allowed
- // assume they're allowed.
- if (ch > 128) {
- return true;
- }
- switch(ch) {
- case '.': // dot is OK
- case '_': // underscore is OK
- return true;
- }
- return false;
- }
-
- /**
- * @return <code>true</code> if the specified name is a valid cell reference
- */
- private boolean isValidCellReference(String str) {
- //check range bounds against grid max
- boolean result = CellReference.classifyCellReference(str, _ssVersion) == NameType.CELL;
-
- if(result){
- /*
- * Check if the argument is a function. Certain names can be either a cell reference or a function name
- * depending on the contenxt. Compare the following examples in Excel 2007:
- * (a) LOG10(100) + 1
- * (b) LOG10 + 1
- * In (a) LOG10 is a name of a built-in function. In (b) LOG10 is a cell reference
- */
- boolean isFunc = FunctionMetadataRegistry.getFunctionByName(str.toUpperCase(Locale.ROOT)) != null;
- if(isFunc){
- int savePointer = _pointer;
- resetPointer(_pointer + str.length());
- SkipWhite();
- // open bracket indicates that the argument is a function,
- // the returning value should be false, i.e. "not a valid cell reference"
- result = look != '(';
- resetPointer(savePointer);
- }
- }
- return result;
- }
-
-
- /**
- * Note - Excel function names are 'case aware but not case sensitive'. This method may end
- * up creating a defined name record in the workbook if the specified name is not an internal
- * Excel function, and has not been encountered before.
- *
- * Side effect: creates workbook name if name is not recognized (name is probably a UDF)
- *
- * @param name case preserved function name (as it was entered/appeared in the formula).
- */
- private ParseNode function(String name) {
- Ptg nameToken = null;
- if(!AbstractFunctionPtg.isBuiltInFunctionName(name)) {
- // user defined function
- // in the token tree, the name is more or less the first argument
-
- if (_book == null) {
- // Only test cases omit the book (expecting it not to be needed)
- throw new IllegalStateException("Need book to evaluate name '" + name + "'");
- }
- // Check to see if name is a named range in the workbook
- EvaluationName hName = _book.getName(name, _sheetIndex);
- if (hName != null) {
- if (!hName.isFunctionName()) {
- throw new FormulaParseException("Attempt to use name '" + name
- + "' as a function, but defined name in workbook does not refer to a function");
- }
-
- // calls to user-defined functions within the workbook
- // get a Name token which points to a defined name record
- nameToken = hName.createPtg();
- } else {
- // Check if name is an external names table
- nameToken = _book.getNameXPtg(name, null);
- if (nameToken == null) {
- // name is not an internal or external name
- if (log.check(POILogger.WARN)) {
- log.log(POILogger.WARN,
- "FormulaParser.function: Name '" + name + "' is completely unknown in the current workbook.");
- }
- // name is probably the name of an unregistered User-Defined Function
- switch (_book.getSpreadsheetVersion()) {
- case EXCEL97:
- // HSSFWorkbooks require a name to be added to Workbook defined names table
- addName(name);
- hName = _book.getName(name, _sheetIndex);
- nameToken = hName.createPtg();
- break;
- case EXCEL2007:
- // XSSFWorkbooks store formula names as strings.
- nameToken = new NameXPxg(name);
- break;
- default:
- throw new IllegalStateException("Unexpected spreadsheet version: " + _book.getSpreadsheetVersion().name());
- }
- }
- }
- }
-
- Match('(');
- ParseNode[] args = Arguments();
- Match(')');
-
- return getFunction(name, nameToken, args);
- }
-
- /**
- * Adds a name (named range or user defined function) to underlying workbook's names table
- * @param functionName
- */
- private void addName(String functionName) {
- final Name name = _book.createName();
- name.setFunction(true);
- name.setNameName(functionName);
- name.setSheetIndex(_sheetIndex);
- }
-
- /**
- * Generates the variable function ptg for the formula.
- * <p>
- * For IF Formulas, additional PTGs are added to the tokens
- * @param name a {@link NamePtg} or {@link NameXPtg} or <code>null</code>
- * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function
- */
- private ParseNode getFunction(String name, Ptg namePtg, ParseNode[] args) {
-
- FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase(Locale.ROOT));
- int numArgs = args.length;
- if(fm == null) {
- if (namePtg == null) {
- throw new IllegalStateException("NamePtg must be supplied for external functions");
- }
- // must be external function
- ParseNode[] allArgs = new ParseNode[numArgs+1];
- allArgs[0] = new ParseNode(namePtg);
- System.arraycopy(args, 0, allArgs, 1, numArgs);
- return new ParseNode(FuncVarPtg.create(name, numArgs+1), allArgs);
- }
-
- if (namePtg != null) {
- throw new IllegalStateException("NamePtg no applicable to internal functions");
- }
- boolean isVarArgs = !fm.hasFixedArgsLength();
- int funcIx = fm.getIndex();
- if (funcIx == FunctionMetadataRegistry.FUNCTION_INDEX_SUM && args.length == 1) {
- // Excel encodes the sum of a single argument as tAttrSum
- // POI does the same for consistency, but this is not critical
- return new ParseNode(AttrPtg.getSumSingle(), args);
- // The code below would encode tFuncVar(SUM) which seems to do no harm
- }
- validateNumArgs(args.length, fm);
-
- AbstractFunctionPtg retval;
- if(isVarArgs) {
- retval = FuncVarPtg.create(name, numArgs);
- } else {
- retval = FuncPtg.create(funcIx);
- }
- return new ParseNode(retval, args);
- }
-
- private void validateNumArgs(int numArgs, FunctionMetadata fm) {
- if(numArgs < fm.getMinParams()) {
- String msg = "Too few arguments to function '" + fm.getName() + "'. ";
- if(fm.hasFixedArgsLength()) {
- msg += "Expected " + fm.getMinParams();
- } else {
- msg += "At least " + fm.getMinParams() + " were expected";
- }
- msg += " but got " + numArgs + ".";
- throw new FormulaParseException(msg);
- }
- //the maximum number of arguments depends on the Excel version
- int maxArgs;
- if (fm.hasUnlimitedVarags()) {
- if(_book != null) {
- maxArgs = _book.getSpreadsheetVersion().getMaxFunctionArgs();
- } else {
- //_book can be omitted by test cases
- maxArgs = fm.getMaxParams(); // just use BIFF8
- }
- } else {
- maxArgs = fm.getMaxParams();
- }
-
- if(numArgs > maxArgs) {
- String msg = "Too many arguments to function '" + fm.getName() + "'. ";
- if(fm.hasFixedArgsLength()) {
- msg += "Expected " + maxArgs;
- } else {
- msg += "At most " + maxArgs + " were expected";
- }
- msg += " but got " + numArgs + ".";
- throw new FormulaParseException(msg);
- }
- }
-
- /**
- * @param ch unicode codepoint
- *
- */
- private static boolean isArgumentDelimiter(int ch) {
- return ch == ',' || ch == ')';
- }
-
- /** get arguments to a function */
- private ParseNode[] Arguments() {
- //average 2 args per function
- List<ParseNode> temp = new ArrayList<ParseNode>(2);
- SkipWhite();
- if(look == ')') {
- return ParseNode.EMPTY_ARRAY;
- }
-
- boolean missedPrevArg = true;
- while (true) {
- SkipWhite();
- if (isArgumentDelimiter(look)) {
- if (missedPrevArg) {
- temp.add(new ParseNode(MissingArgPtg.instance));
- }
- if (look == ')') {
- break;
- }
- Match(',');
- missedPrevArg = true;
- continue;
- }
- temp.add(comparisonExpression());
- missedPrevArg = false;
- SkipWhite();
- if (!isArgumentDelimiter(look)) {
- throw expected("',' or ')'");
- }
- }
- ParseNode[] result = new ParseNode[temp.size()];
- temp.toArray(result);
- return result;
- }
-
- /** Parse and Translate a Math Factor */
- private ParseNode powerFactor() {
- ParseNode result = percentFactor();
- while(true) {
- SkipWhite();
- if(look != '^') {
- return result;
- }
- Match('^');
- ParseNode other = percentFactor();
- result = new ParseNode(PowerPtg.instance, result, other);
- }
- }
-
- private ParseNode percentFactor() {
- ParseNode result = parseSimpleFactor();
- while(true) {
- SkipWhite();
- if(look != '%') {
- return result;
- }
- Match('%');
- result = new ParseNode(PercentPtg.instance, result);
- }
- }
-
-
- /**
- * factors (without ^ or % )
- */
- private ParseNode parseSimpleFactor() {
- SkipWhite();
- switch(look) {
- case '#':
- return new ParseNode(ErrPtg.valueOf(parseErrorLiteral()));
- case '-':
- Match('-');
- return parseUnary(false);
- case '+':
- Match('+');
- return parseUnary(true);
- case '(':
- Match('(');
- ParseNode inside = unionExpression();
- Match(')');
- return new ParseNode(ParenthesisPtg.instance, inside);
- case '"':
- return new ParseNode(new StringPtg(parseStringLiteral()));
- case '{':
- Match('{');
- ParseNode arrayNode = parseArray();
- Match('}');
- return arrayNode;
- }
- // named ranges and tables can start with underscore or backslash
- // see https://support.office.com/en-us/article/Define-and-use-names-in-formulas-4d0f13ac-53b7-422e-afd2-abd7ff379c64?ui=en-US&rs=en-US&ad=US#bmsyntax_rules_for_names
- if (IsAlpha(look) || Character.isDigit(look) || look == '\'' || look == '[' || look == '_' || look == '\\' ) {
- return parseRangeExpression();
- }
- if (look == '.') {
- return new ParseNode(parseNumber());
- }
- throw expected("cell ref or constant literal");
- }
-
-
- private ParseNode parseUnary(boolean isPlus) {
-
- boolean numberFollows = IsDigit(look) || look=='.';
- ParseNode factor = powerFactor();
-
- if (numberFollows) {
- // + or - directly next to a number is parsed with the number
-
- Ptg token = factor.getToken();
- if (token instanceof NumberPtg) {
- if (isPlus) {
- return factor;
- }
- token = new NumberPtg(-((NumberPtg)token).getValue());
- return new ParseNode(token);
- }
- if (token instanceof IntPtg) {
- if (isPlus) {
- return factor;
- }
- int intVal = ((IntPtg)token).getValue();
- // note - cannot use IntPtg for negatives
- token = new NumberPtg(-intVal);
- return new ParseNode(token);
- }
- }
- return new ParseNode(isPlus ? UnaryPlusPtg.instance : UnaryMinusPtg.instance, factor);
- }
-
- private ParseNode parseArray() {
- List<Object[]> rowsData = new ArrayList<Object[]>();
- while(true) {
- Object[] singleRowData = parseArrayRow();
- rowsData.add(singleRowData);
- if (look == '}') {
- break;
- }
- if (look != ';') {
- throw expected("'}' or ';'");
- }
- Match(';');
- }
- int nRows = rowsData.size();
- Object[][] values2d = new Object[nRows][];
- rowsData.toArray(values2d);
- int nColumns = values2d[0].length;
- checkRowLengths(values2d, nColumns);
-
- return new ParseNode(new ArrayPtg(values2d));
- }
- private void checkRowLengths(Object[][] values2d, int nColumns) {
- for (int i = 0; i < values2d.length; i++) {
- int rowLen = values2d[i].length;
- if (rowLen != nColumns) {
- throw new FormulaParseException("Array row " + i + " has length " + rowLen
- + " but row 0 has length " + nColumns);
- }
- }
- }
-
- private Object[] parseArrayRow() {
- List<Object> temp = new ArrayList<Object>();
- while (true) {
- temp.add(parseArrayItem());
- SkipWhite();
- switch(look) {
- case '}':
- case ';':
- break;
- case ',':
- Match(',');
- continue;
- default:
- throw expected("'}' or ','");
-
- }
- break;
- }
-
- Object[] result = new Object[temp.size()];
- temp.toArray(result);
- return result;
- }
-
- private Object parseArrayItem() {
- SkipWhite();
- switch(look) {
- case '"': return parseStringLiteral();
- case '#': return ErrorConstant.valueOf(parseErrorLiteral());
- case 'F': case 'f':
- case 'T': case 't':
- return parseBooleanLiteral();
- case '-':
- Match('-');
- SkipWhite();
- return convertArrayNumber(parseNumber(), false);
- }
- // else assume number
- return convertArrayNumber(parseNumber(), true);
- }
-
- private Boolean parseBooleanLiteral() {
- String iden = parseUnquotedIdentifier();
- if ("TRUE".equalsIgnoreCase(iden)) {
- return Boolean.TRUE;
- }
- if ("FALSE".equalsIgnoreCase(iden)) {
- return Boolean.FALSE;
- }
- throw expected("'TRUE' or 'FALSE'");
- }
-
- private static Double convertArrayNumber(Ptg ptg, boolean isPositive) {
- double value;
- if (ptg instanceof IntPtg) {
- value = ((IntPtg)ptg).getValue();
- } else if (ptg instanceof NumberPtg) {
- value = ((NumberPtg)ptg).getValue();
- } else {
- throw new RuntimeException("Unexpected ptg (" + ptg.getClass().getName() + ")");
- }
- if (!isPositive) {
- value = -value;
- }
- return new Double(value);
- }
-
- private Ptg parseNumber() {
- String number2 = null;
- String exponent = null;
- String number1 = GetNum();
-
- if (look == '.') {
- GetChar();
- number2 = GetNum();
- }
-
- if (look == 'E') {
- GetChar();
-
- String sign = "";
- if (look == '+') {
- GetChar();
- } else if (look == '-') {
- GetChar();
- sign = "-";
- }
-
- String number = GetNum();
- if (number == null) {
- throw expected("Integer");
- }
- exponent = sign + number;
- }
-
- if (number1 == null && number2 == null) {
- throw expected("Integer");
- }
-
- return getNumberPtgFromString(number1, number2, exponent);
- }
-
-
- private int parseErrorLiteral() {
- Match('#');
- String part1 = parseUnquotedIdentifier().toUpperCase(Locale.ROOT);
- if (part1 == null) {
- throw expected("remainder of error constant literal");
- }
-
- switch(part1.charAt(0)) {
- case 'V': {
- FormulaError fe = FormulaError.VALUE;
- if(part1.equals(fe.name())) {
- Match('!');
- return fe.getCode();
- }
- throw expected(fe.getString());
- }
- case 'R': {
- FormulaError fe = FormulaError.REF;
- if(part1.equals(fe.name())) {
- Match('!');
- return fe.getCode();
- }
- throw expected(fe.getString());
- }
- case 'D': {
- FormulaError fe = FormulaError.DIV0;
- if(part1.equals("DIV")) {
- Match('/');
- Match('0');
- Match('!');
- return fe.getCode();
- }
- throw expected(fe.getString());
- }
- case 'N': {
- FormulaError fe = FormulaError.NAME;
- if(part1.equals(fe.name())) {
- // only one that ends in '?'
- Match('?');
- return fe.getCode();
- }
- fe = FormulaError.NUM;
- if(part1.equals(fe.name())) {
- Match('!');
- return fe.getCode();
- }
- fe = FormulaError.NULL;
- if(part1.equals(fe.name())) {
- Match('!');
- return fe.getCode();
- }
- fe = FormulaError.NA;
- if(part1.equals("N")) {
- Match('/');
- if(look != 'A' && look != 'a') {
- throw expected(fe.getString());
- }
- Match(look);
- // Note - no '!' or '?' suffix
- return fe.getCode();
- }
- throw expected("#NAME?, #NUM!, #NULL! or #N/A");
- }
- }
- throw expected("#VALUE!, #REF!, #DIV/0!, #NAME?, #NUM!, #NULL! or #N/A");
- }
-
- private String parseUnquotedIdentifier() {
- if (look == '\'') {
- throw expected("unquoted identifier");
- }
- StringBuilder sb = new StringBuilder();
- while (Character.isLetterOrDigit(look) || look == '.') {
- sb.appendCodePoint(look);
- GetChar();
- }
- if (sb.length() < 1) {
- return null;
- }
-
- return sb.toString();
- }
-
- /**
- * Get a PTG for an integer from its string representation.
- * return Int or Number Ptg based on size of input
- */
- private static Ptg getNumberPtgFromString(String number1, String number2, String exponent) {
- StringBuilder number = new StringBuilder();
-
- if (number2 == null) {
- number.append(number1);
-
- if (exponent != null) {
- number.append('E');
- number.append(exponent);
- }
-
- String numberStr = number.toString();
- int intVal;
- try {
- intVal = Integer.parseInt(numberStr);
- } catch (NumberFormatException e) {
- return new NumberPtg(numberStr);
- }
- if (IntPtg.isInRange(intVal)) {
- return new IntPtg(intVal);
- }
- return new NumberPtg(numberStr);
- }
-
- if (number1 != null) {
- number.append(number1);
- }
-
- number.append('.');
- number.append(number2);
-
- if (exponent != null) {
- number.append('E');
- number.append(exponent);
- }
-
- return new NumberPtg(number.toString());
- }
-
-
- private String parseStringLiteral() {
- Match('"');
-
- StringBuilder token = new StringBuilder();
- while (true) {
- if (look == '"') {
- GetChar();
- if (look != '"') {
- break;
- }
- }
- token.appendCodePoint(look);
- GetChar();
- }
- return token.toString();
- }
-
- /** Parse and Translate a Math Term */
- private ParseNode Term() {
- ParseNode result = powerFactor();
- while(true) {
- SkipWhite();
- Ptg operator;
- switch(look) {
- case '*':
- Match('*');
- operator = MultiplyPtg.instance;
- break;
- case '/':
- Match('/');
- operator = DividePtg.instance;
- break;
- default:
- return result; // finished with Term
- }
- ParseNode other = powerFactor();
- result = new ParseNode(operator, result, other);
- }
- }
-
- private ParseNode unionExpression() {
- ParseNode result = intersectionExpression();
- boolean hasUnions = false;
- while (true) {
- SkipWhite();
- switch(look) {
- case ',':
- GetChar();
- hasUnions = true;
- ParseNode other = intersectionExpression();
- result = new ParseNode(UnionPtg.instance, result, other);
- continue;
- }
- if (hasUnions) {
- return augmentWithMemPtg(result);
- }
- return result;
- }
- }
-
- private ParseNode intersectionExpression() {
- ParseNode result = comparisonExpression();
- boolean hasIntersections = false;
- while (true) {
- SkipWhite();
- if (_inIntersection) {
- int savePointer = _pointer;
-
- // Don't getChar() as the space has already been eaten and recorded by SkipWhite().
- try {
- ParseNode other = comparisonExpression();
- result = new ParseNode(IntersectionPtg.instance, result, other);
- hasIntersections = true;
- continue;
- } catch (FormulaParseException e) {
- // if parsing for intersection fails we assume that we actually had an arbitrary
- // whitespace and thus should simply skip this whitespace
- resetPointer(savePointer);
- }
- }
- if (hasIntersections) {
- return augmentWithMemPtg(result);
- }
- return result;
- }
- }
-
- private ParseNode comparisonExpression() {
- ParseNode result = concatExpression();
- while (true) {
- SkipWhite();
- switch(look) {
- case '=':
- case '>':
- case '<':
- Ptg comparisonToken = getComparisonToken();
- ParseNode other = concatExpression();
- result = new ParseNode(comparisonToken, result, other);
- continue;
- }
- return result; // finished with predicate expression
- }
- }
-
- private Ptg getComparisonToken() {
- if(look == '=') {
- Match(look);
- return EqualPtg.instance;
- }
- boolean isGreater = look == '>';
- Match(look);
- if(isGreater) {
- if(look == '=') {
- Match('=');
- return GreaterEqualPtg.instance;
- }
- return GreaterThanPtg.instance;
- }
- switch(look) {
- case '=':
- Match('=');
- return LessEqualPtg.instance;
- case '>':
- Match('>');
- return NotEqualPtg.instance;
- }
- return LessThanPtg.instance;
- }
-
-
- private ParseNode concatExpression() {
- ParseNode result = additiveExpression();
- while (true) {
- SkipWhite();
- if(look != '&') {
- break; // finished with concat expression
- }
- Match('&');
- ParseNode other = additiveExpression();
- result = new ParseNode(ConcatPtg.instance, result, other);
- }
- return result;
- }
-
-
- /** Parse and Translate an Expression */
- private ParseNode additiveExpression() {
- ParseNode result = Term();
- while (true) {
- SkipWhite();
- Ptg operator;
- switch(look) {
- case '+':
- Match('+');
- operator = AddPtg.instance;
- break;
- case '-':
- Match('-');
- operator = SubtractPtg.instance;
- break;
- default:
- return result; // finished with additive expression
- }
- ParseNode other = Term();
- result = new ParseNode(operator, result, other);
- }
- }
-
- //{--------------------------------------------------------------}
- //{ Parse and Translate an Assignment Statement }
- /*
- procedure Assignment;
- var Name: string[8];
- begin
- Name := GetName;
- Match('=');
- Expression;
-
- end;
- **/
-
-
- /**
- * API call to execute the parsing of the formula
- *
- */
- private void parse() {
- _pointer=0;
- GetChar();
- _rootNode = unionExpression();
-
- if(_pointer <= _formulaLength) {
- String msg = "Unused input [" + _formulaString.substring(_pointer-1)
- + "] after attempting to parse the formula [" + _formulaString + "]";
- throw new FormulaParseException(msg);
- }
- }
-
- private Ptg[] getRPNPtg(FormulaType formulaType) {
- OperandClassTransformer oct = new OperandClassTransformer(formulaType);
- // RVA is for 'operand class': 'reference', 'value', 'array'
- oct.transformFormula(_rootNode);
- return ParseNode.toTokenArray(_rootNode);
- }
- }
|