From 23687e2d77ba3d4e4a5d5a6efba6968e7c478276 Mon Sep 17 00:00:00 2001 From: James Ahlborn Date: Sat, 17 Sep 2016 03:48:30 +0000 Subject: [PATCH] parse some operators git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/exprs@1039 f203690c-595d-4dc9-a70b-905162fa7fd2 --- .../jackcess/util/ExpressionTokenizer.java | 79 ++++-- .../jackcess/util/Expressionator.java | 239 ++++++++++++++++-- 2 files changed, 273 insertions(+), 45 deletions(-) diff --git a/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java b/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java index b55f674..ab558ca 100644 --- a/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java +++ b/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java @@ -19,11 +19,12 @@ package com.healthmarketscience.jackcess.util; import java.math.BigDecimal; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; -import java.util.Date; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import com.healthmarketscience.jackcess.DatabaseBuilder; @@ -96,16 +97,19 @@ class ExpressionTokenizer // what could it be? switch(charFlag) { case IS_OP_FLAG: + // special case '-' for negative number - Object numLit = maybeParseNumberLiteral(c, buf); + Map.Entry numLit = maybeParseNumberLiteral(c, buf); if(numLit != null) { - tokens.add(new Token(TokenType.LITERAL, numLit)); + tokens.add(new Token(TokenType.LITERAL, numLit.getKey(), + numLit.getValue())); continue; } // all simple operator chars are single character operators tokens.add(new Token(TokenType.OP, String.valueOf(c))); break; + case IS_COMP_FLAG: switch(exprType) { @@ -120,8 +124,11 @@ class ExpressionTokenizer } // def values can't have cond at top level throw new IllegalArgumentException( - exprType + " cannot have top-level conditional"); + exprType + " cannot have top-level conditional " + buf); + case FIELD_VALIDATOR: + case RECORD_VALIDATOR: + tokens.add(new Token(TokenType.OP, parseCompOp(c, buf))); break; } @@ -148,15 +155,16 @@ class ExpressionTokenizer tokens.add(new Token(TokenType.LITERAL, parseQuotedString(buf))); break; case DATE_LIT_DELIM_CHAR: - tokens.add(new Token(TokenType.LITERAL, - parseDateLiteralString(buf, db))); + Map.Entry dateLit = parseDateLiteralString(buf, db); + tokens.add(new Token(TokenType.LITERAL, dateLit.getKey(), + dateLit.getValue())); break; case OBJ_NAME_START_CHAR: tokens.add(new Token(TokenType.OBJ_NAME, parseObjNameString(buf))); break; default: throw new IllegalArgumentException( - "Invalid leading quote character " + c); + "Invalid leading quote character " + c + " " + buf); } break; @@ -168,9 +176,10 @@ class ExpressionTokenizer } else { if(isDigit(c)) { - Object numLit = maybeParseNumberLiteral(c, buf); + Map.Entry numLit = maybeParseNumberLiteral(c, buf); if(numLit != null) { - tokens.add(new Token(TokenType.LITERAL, numLit)); + tokens.add(new Token(TokenType.LITERAL, numLit.getKey(), + numLit.getValue())); continue; } } @@ -262,17 +271,19 @@ class ExpressionTokenizer if(!complete) { throw new IllegalArgumentException("Missing closing '" + QUOTED_STR_CHAR + - "' for quoted string"); + "' for quoted string " + buf); } return sb.toString(); } private static String parseObjNameString(ExprBuf buf) { - return parseStringUntil(buf, OBJ_NAME_END_CHAR); + return parseStringUntil(buf, OBJ_NAME_END_CHAR, OBJ_NAME_START_CHAR); } - private static String parseStringUntil(ExprBuf buf, char endChar) { + private static String parseStringUntil(ExprBuf buf, char endChar, + Character startChar) + { StringBuilder sb = buf.getScratchBuffer(); boolean complete = false; @@ -281,6 +292,10 @@ class ExpressionTokenizer if(c == endChar) { complete = true; break; + } else if((startChar != null) && + (startChar == c)) { + throw new IllegalArgumentException("Missing closing '" + endChar + + "' for quoted string " + buf); } sb.append(c); @@ -288,14 +303,16 @@ class ExpressionTokenizer if(!complete) { throw new IllegalArgumentException("Missing closing '" + endChar + - "' for quoted string"); + "' for quoted string " + buf); } return sb.toString(); } - private static Date parseDateLiteralString(ExprBuf buf, DatabaseImpl db) { - String dateStr = parseStringUntil(buf, DATE_LIT_DELIM_CHAR); + private static Map.Entry parseDateLiteralString( + ExprBuf buf, DatabaseImpl db) + { + String dateStr = parseStringUntil(buf, DATE_LIT_DELIM_CHAR, null); boolean hasDate = (dateStr.indexOf('/') >= 0); boolean hasTime = (dateStr.indexOf(':') >= 0); @@ -308,19 +325,20 @@ class ExpressionTokenizer } else if(hasTime) { sdf = buf.getTimeFormat(db); } else { - throw new IllegalArgumentException("Invalid date time literal " + dateStr); + throw new IllegalArgumentException("Invalid date time literal " + dateStr + + " " + buf); } // FIXME, do we need to know which "type" it was? try { - return sdf.parse(dateStr); + return newEntry(sdf.parse(dateStr), dateStr); } catch(ParseException pe) { throw new IllegalArgumentException( - "Invalid date time literal " + dateStr, pe); + "Invalid date time literal " + dateStr + " " + buf, pe); } } - private static Object maybeParseNumberLiteral(char firstChar, ExprBuf buf) { + private static Map.Entry maybeParseNumberLiteral(char firstChar, ExprBuf buf) { StringBuilder sb = buf.getScratchBuffer().append(firstChar); boolean hasDigit = isDigit(firstChar); @@ -356,10 +374,10 @@ class ExpressionTokenizer // what number type to use here? BigDecimal num = new BigDecimal(numStr); foundNum = true; - return num; + return newEntry(num, numStr); } catch(NumberFormatException ne) { throw new IllegalArgumentException( - "Invalid number literal " + numStr, ne); + "Invalid number literal " + numStr + " " + buf, ne); } } finally { @@ -383,6 +401,10 @@ class ExpressionTokenizer return ((c >= '0') && (c <= '9')); } + static Map.Entry newEntry(K a, V b) { + return new AbstractMap.SimpleImmutableEntry(a, b); + } + private static final class ExprBuf { private final String _str; @@ -461,6 +483,11 @@ class ExpressionTokenizer return ((db != null) ? db.createDateFormat(str) : DatabaseBuilder.createDateFormat(str)); } + + @Override + public String toString() { + return "[char " + _pos + "] '" + _str + "'"; + } } @@ -468,10 +495,16 @@ class ExpressionTokenizer { private final TokenType _type; private final Object _val; + private final String _valStr; + + private Token(TokenType type, String val) { + this(type, val, val); + } - private Token(TokenType type, Object val) { + private Token(TokenType type, Object val, String valStr) { _type = type; _val = val; + _valStr = valStr; } public TokenType getType() { @@ -483,7 +516,7 @@ class ExpressionTokenizer } public String getValueStr() { - return (String)_val; + return _valStr; } @Override diff --git a/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java b/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java index 5621cbc..49a98d1 100644 --- a/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java +++ b/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java @@ -22,10 +22,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Date; +import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; +import java.util.ListIterator; import java.util.Map; import java.util.Set; @@ -41,8 +44,13 @@ import static com.healthmarketscience.jackcess.util.ExpressionTokenizer.TokenTyp public class Expressionator { + // Useful links: + // - syntax: https://support.office.com/en-us/article/Guide-to-expression-syntax-ebc770bc-8486-4adc-a9ec-7427cce39a90 + // - examples: https://support.office.com/en-us/article/Examples-of-expressions-d3901e11-c04e-4649-b40b-8b6ec5aed41f + // - validation rule usage: https://support.office.com/en-us/article/Restrict-data-input-by-using-a-validation-rule-6c0b2ce1-76fa-4be0-8ae9-038b52652320 + public enum Type { - DEFAULT_VALUE, FIELD_VALIDATOR; + DEFAULT_VALUE, FIELD_VALIDATOR, RECORD_VALIDATOR; } private enum WordType { @@ -104,6 +112,12 @@ public class Expressionator public static Expr parse(Type exprType, String exprStr, Database db) { + // FIXME,restrictions: + // - default value only accepts simple exprs, otherwise becomes literal text + // - def val cannot refer to any columns + // - field validation cannot refer to other columns + // - record validation cannot refer to outside columns + List tokens = trimSpaces( ExpressionTokenizer.tokenize(exprType, exprStr, (DatabaseImpl)db)); @@ -145,6 +159,7 @@ public class Expressionator private static Expr parseExpression(Type exprType, TokBuf buf, boolean isSimpleExpr) { + // FIXME pass exprType and isSimple expr in TokBuf? // FIXME, how do we handle order of ops when no parens? @@ -170,6 +185,45 @@ public class Expressionator throw new RuntimeException("Invalid operator " + t); } + // this can old be an OP or a COMP (those are the only words that the + // tokenizer would define as TokenType.OP) + switch(wordType) { + case OP: + + // most ops are two argument except that '-' could be negation + if(buf.hasPendingExpr()) { + buf.setPendingExpr(parseBinaryOperator(t, buf, exprType, + isSimpleExpr)); + } else if(isOp(t, "-")) { + buf.setPendingExpr(parseUnaryOperator(t, buf, exprType, + isSimpleExpr)); + } else { + throw new IllegalArgumentException( + "Missing left expression for binary operator " + t.getValue() + + " " + buf); + } + break; + + case COMP: + + if(!buf.hasPendingExpr() && (exprType == Type.FIELD_VALIDATOR)) { + // comparison operators for field validators can implicitly use + // the current field value for the left value + buf.setPendingExpr(THIS_COL_VALUE); + } + if(buf.hasPendingExpr()) { + buf.setPendingExpr(parseCompOperator(t, buf, exprType, + isSimpleExpr)); + } else { + throw new IllegalArgumentException( + "Missing left expression for comparison operator " + + t.getValue() + " " + buf); + } + break; + + default: + throw new RuntimeException("Unexpected OP word type " + wordType); + } break; @@ -178,13 +232,21 @@ public class Expressionator // see if it's a special word? wordType = getWordType(t); if(wordType == null) { - // literal string? or possibly function? + + // is it a function call? Expr funcExpr = maybeParseFuncCall(t, buf, exprType, isSimpleExpr); if(funcExpr != null) { buf.setPendingExpr(funcExpr); continue; } + // is it an object name? + Token next = buf.peekNext(); + if((next != null) && isObjNameSep(next)) { + buf.setPendingExpr(parseObjectReference(t, buf)); + continue; + } + // FIXME maybe obj name, maybe string? } else { @@ -214,15 +276,19 @@ public class Expressionator private static Expr parseObjectReference(Token firstTok, TokBuf buf) { - // object references may be joined by '.' or '!'; - List objNames = new ArrayList(); + // object references may be joined by '.' or '!'. access syntac docs claim + // object identifiers can be formatted like: + // "[Collection name]![Object name].[Property name]" + // However, in practice, they only ever seem to be (at most) two levels + // and only use '.'. + Deque objNames = new LinkedList(); objNames.add(firstTok.getValueStr()); Token t = null; boolean atSep = false; while((t = buf.peekNext()) != null) { if(!atSep) { - if(isOp(t, ".") || isOp(t, "!")) { + if(isObjNameSep(t)) { buf.next(); atSep = true; continue; @@ -231,7 +297,8 @@ public class Expressionator if((t.getType() == TokenType.OBJ_NAME) || (t.getType() == TokenType.STRING)) { buf.next(); - objNames.add(t.getValueStr()); + // always insert at beginning of list so names are in reverse order + objNames.addFirst(t.getValueStr()); atSep = false; continue; } @@ -239,11 +306,16 @@ public class Expressionator break; } - if(atSep) { + if(atSep || (objNames.size() > 3)) { throw new IllegalArgumentException("Invalid object reference " + buf); } - - return new EObjValue(objNames); + + // names are in reverse order + String fieldName = objNames.poll(); + String objName = objNames.poll(); + String collectionName = objNames.poll(); + + return new EObjValue(collectionName, objName, fieldName); } private static Expr maybeParseFuncCall(Token firstTok, TokBuf buf, @@ -333,6 +405,32 @@ public class Expressionator "' for function call " + buf); } + private static Expr parseBinaryOperator(Token firstTok, TokBuf buf, + Type exprType, boolean isSimpleExpr) { + String op = firstTok.getValueStr(); + Expr leftExpr = buf.takePendingExpr(); + Expr rightExpr = parseExpression(exprType, buf, isSimpleExpr); + + return new EBinaryOp(op, leftExpr, rightExpr); + } + + private static Expr parseUnaryOperator(Token firstTok, TokBuf buf, + Type exprType, boolean isSimpleExpr) { + String op = firstTok.getValueStr(); + Expr val = parseExpression(exprType, buf, isSimpleExpr); + + return new EUnaryOp(op, val); + } + + private static Expr parseCompOperator(Token firstTok, TokBuf buf, + Type exprType, boolean isSimpleExpr) { + String op = firstTok.getValueStr(); + Expr leftExpr = buf.takePendingExpr(); + Expr rightExpr = parseExpression(exprType, buf, isSimpleExpr); + + return new ECompOp(op, leftExpr, rightExpr); + } + private static boolean isSimpleExpression(TokBuf buf, Type exprType) { if(exprType != Type.DEFAULT_VALUE) { return false; @@ -349,8 +447,13 @@ public class Expressionator return true; } + private static boolean isObjNameSep(Token t) { + return (isOp(t, ".") || isOp(t, "!")); + } + private static boolean isOp(Token t, String opStr) { - return ((t.getType() == TokenType.OP) && opStr.equalsIgnoreCase(t.getValueStr())); + return ((t.getType() == TokenType.OP) && + opStr.equalsIgnoreCase(t.getValueStr())); } private static WordType getWordType(Token t) { @@ -367,16 +470,18 @@ public class Expressionator { private final List _tokens; private final TokBuf _parent; + private final int _parentOff; private int _pos; private Expr _pendingExpr; private TokBuf(List tokens) { - this(tokens, null); + this(tokens, null, 0); } - private TokBuf(List tokens, TokBuf parent) { + private TokBuf(List tokens, TokBuf parent, int parentOff) { _tokens = tokens; _parent = parent; + _parentOff = parentOff; } public boolean isTopLevel() { @@ -411,7 +516,7 @@ public class Expressionator } public TokBuf subBuf(int start, int end) { - return new TokBuf(_tokens.subList(start, end), this); + return new TokBuf(_tokens.subList(start, end), this, start); } public void setPendingExpr(Expr expr) { @@ -432,10 +537,38 @@ public class Expressionator return (_pendingExpr != null); } + private Map.Entry> getTopPos() { + int pos = _pos; + List toks = _tokens; + TokBuf cur = this; + while(cur._parent != null) { + pos += cur._parentOff; + cur = cur._parent; + toks = cur._tokens; + } + return ExpressionTokenizer.newEntry(pos, toks); + } + @Override public String toString() { - // FIXME show current pos - return null; + + Map.Entry> e = getTopPos(); + + // TODO actually format expression? + StringBuilder sb = new StringBuilder() + .append("[token ").append(e.getKey()).append("] ("); + + for(Iterator iter = e.getValue().iterator(); iter.hasNext(); ) { + Token t = iter.next(); + sb.append("'").append(t.getValueStr()).append("'"); + if(iter.hasNext()) { + sb.append(","); + } + } + + sb.append(")"); + + return sb.toString(); } } @@ -464,7 +597,8 @@ public class Expressionator { public Object getThisColumnValue(); - public Object getRowValue(String colName); + public Object getRowValue(String collectionName, String objName, + String colName); } private static final class ELiteralValue extends Expr @@ -483,17 +617,20 @@ public class Expressionator private static final class EObjValue extends Expr { - private final List _objNames; + private final String _collectionName; + private final String _objName; + private final String _fieldName; + - private EObjValue(List objNames) { - _objNames = objNames; + private EObjValue(String collectionName, String objName, String fieldName) { + _collectionName = collectionName; + _objName = objName; + _fieldName = fieldName; } @Override public Object eval(RowContext ctx) { - // FIXME - return null; - // return ctx.getRowValue(_colName); + return ctx.getRowValue(_collectionName, _objName, _fieldName); } } @@ -539,4 +676,62 @@ public class Expressionator } } + private static class EBinaryOp extends Expr + { + private final String _op; + private final Expr _left; + private final Expr _right; + + private EBinaryOp(String op, Expr left, Expr right) { + _op = op; + _left = left; + _right = right; + } + + @Override + protected Object eval(RowContext ctx) { + // FIXME + + return null; + } + } + + private static class EUnaryOp extends Expr + { + private final String _op; + private final Expr _val; + + private EUnaryOp(String op, Expr val) { + _op = op; + _val = val; + } + + @Override + protected Object eval(RowContext ctx) { + // FIXME + + return null; + } + } + + private static class ECompOp extends Expr + { + private final String _op; + private final Expr _left; + private final Expr _right; + + private ECompOp(String op, Expr left, Expr right) { + _op = op; + _left = left; + _right = right; + } + + @Override + protected Object eval(RowContext ctx) { + // FIXME + + return null; + } + } + } -- 2.39.5