aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Ahlborn <jtahlborn@yahoo.com>2016-09-29 03:31:04 +0000
committerJames Ahlborn <jtahlborn@yahoo.com>2016-09-29 03:31:04 +0000
commitc49b4c927c32f8ce5e44f60b8bfff77afa043fa9 (patch)
tree237c64804af916307b1c7826ad4f2c4090eb90ed
parent23687e2d77ba3d4e4a5d5a6efba6968e7c478276 (diff)
downloadjackcess-c49b4c927c32f8ce5e44f60b8bfff77afa043fa9.tar.gz
jackcess-c49b4c927c32f8ce5e44f60b8bfff77afa043fa9.zip
make delims separate token type; implement parsing of more expr types
git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/exprs@1040 f203690c-595d-4dc9-a70b-905162fa7fd2
-rw-r--r--src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java12
-rw-r--r--src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java255
2 files changed, 199 insertions, 68 deletions
diff --git a/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java b/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java
index ab558ca..b9a3cd4 100644
--- a/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java
+++ b/src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java
@@ -41,7 +41,7 @@ class ExpressionTokenizer
private static final char QUOTED_STR_CHAR = '"';
private static final char OBJ_NAME_START_CHAR = '[';
private static final char OBJ_NAME_END_CHAR = ']';
- private static final char DATE_LIT_DELIM_CHAR = '#';
+ private static final char DATE_LIT_QUOTE_CHAR = '#';
private static final char EQUALS_CHAR = '=';
private static final String DATE_FORMAT = "M/d/yyyy";
@@ -55,7 +55,7 @@ class ExpressionTokenizer
private static final byte IS_QUOTE_FLAG = 0x10;
enum TokenType {
- OBJ_NAME, LITERAL, OP, STRING, SPACE;
+ OBJ_NAME, LITERAL, OP, DELIM, STRING, SPACE;
}
private static final byte[] CHAR_FLAGS = new byte[128];
@@ -137,8 +137,8 @@ class ExpressionTokenizer
case IS_DELIM_FLAG:
- // all delimiter chars are single character operators
- tokens.add(new Token(TokenType.OP, String.valueOf(c)));
+ // all delimiter chars are single character symbols
+ tokens.add(new Token(TokenType.DELIM, String.valueOf(c)));
break;
case IS_SPACE_FLAG:
@@ -154,7 +154,7 @@ class ExpressionTokenizer
case QUOTED_STR_CHAR:
tokens.add(new Token(TokenType.LITERAL, parseQuotedString(buf)));
break;
- case DATE_LIT_DELIM_CHAR:
+ case DATE_LIT_QUOTE_CHAR:
Map.Entry<?,String> dateLit = parseDateLiteralString(buf, db);
tokens.add(new Token(TokenType.LITERAL, dateLit.getKey(),
dateLit.getValue()));
@@ -312,7 +312,7 @@ class ExpressionTokenizer
private static Map.Entry<?,String> parseDateLiteralString(
ExprBuf buf, DatabaseImpl db)
{
- String dateStr = parseStringUntil(buf, DATE_LIT_DELIM_CHAR, null);
+ String dateStr = parseStringUntil(buf, DATE_LIT_QUOTE_CHAR, null);
boolean hasDate = (dateStr.indexOf('/') >= 0);
boolean hasTime = (dateStr.indexOf(':') >= 0);
diff --git a/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java b/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java
index 49a98d1..32f9874 100644
--- a/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java
+++ b/src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java
@@ -49,16 +49,36 @@ public class Expressionator
// - examples: https://support.office.com/en-us/article/Examples-of-expressions-d3901e11-c04e-4649-b40b-8b6ec5aed41f
// - validation rule usage: https://support.office.com/en-us/article/Restrict-data-input-by-using-a-validation-rule-6c0b2ce1-76fa-4be0-8ae9-038b52652320
+ // FIXME
+ // - need to short-circuit AND/OR
+ // - need to handle order of operations
+ // - ^
+ // - - (negate)
+ // - * /
+ // - \
+ // - Mod
+ // - + -
+ // - &
+ // - < > <> <= >= = Like Is
+ // - Not
+ // - And
+ // - Or
+ // - Xor
+ // - Eqv
+ // - In, Between ????
+
public enum Type {
DEFAULT_VALUE, FIELD_VALIDATOR, RECORD_VALIDATOR;
}
private enum WordType {
- OP, COMP, LOG_OP, CONST, SPEC_OP_PREFIX;
+ OP, COMP, LOG_OP, CONST, SPEC_OP_PREFIX, DELIM;
}
private static final String FUNC_START_DELIM = "(";
private static final String FUNC_END_DELIM = ")";
+ private static final String OPEN_PAREN = "(";
+ private static final String CLOSE_PAREN = ")";
private static final String FUNC_PARAM_SEP = ",";
private static final Map<String,WordType> WORD_TYPES = new HashMap<String,WordType>();
@@ -66,11 +86,31 @@ public class Expressionator
static {
setWordType(WordType.OP, "+", "-", "*", "/", "\\", "^", "&", "mod");
setWordType(WordType.COMP, "<", "<=", ">", ">=", "=", "<>");
- setWordType(WordType.LOG_OP, "and", "or", "eqv", "not", "xor");
+ setWordType(WordType.LOG_OP, "and", "or", "eqv", "xor");
setWordType(WordType.CONST, "true", "false", "null");
- setWordType(WordType.SPEC_OP_PREFIX, "is", "like", "between", "in");
+ setWordType(WordType.SPEC_OP_PREFIX, "is", "like", "between", "in", "not");
+ // "X is null", "X is not null", "X like P", "X between A and B",
+ // "X not between A and B", "X in (A, B, C...)", "X not in (A, B, C...)",
+ // "not X"
+ setWordType(WordType.DELIM, ".", "!", ",", "(", ")");
}
+ private static final Map<String, Integer> PRECENDENCE =
+ buildPrecedenceMap(
+ new String[]{"^"},
+ new String[]{"-"}, // FIXME (negate)?
+ new String[]{"*", "/"},
+ new String[]{"\\"},
+ new String[]{"mod"},
+ new String[]{"+", "-"},
+ new String[]{"&"},
+ new String[]{"<", ">", "<>", "<=", ">=", "=", "like", "is"},
+ new String[]{"not"},
+ new String[]{"and"},
+ new String[]{"or"},
+ new String[]{"xor"},
+ new String[]{"eqv"},
+ new String[]{"in", "between"});
private static final Expr THIS_COL_VALUE = new Expr() {
@Override protected Object eval(RowContext ctx) {
@@ -145,7 +185,7 @@ public class Expressionator
Token t = tokens.get(i);
if(t.getType() == TokenType.SPACE) {
if((tokens.get(i - 1).getType() == TokenType.STRING) &&
- isOp(tokens.get(i + 1), FUNC_START_DELIM)) {
+ isDelim(tokens.get(i + 1), FUNC_START_DELIM)) {
// we want to keep this space
} else {
tokens.remove(i);
@@ -185,23 +225,11 @@ public class Expressionator
throw new RuntimeException("Invalid operator " + t);
}
- // this can old be an OP or a COMP (those are the only words that the
+ // this can only be an OP or a COMP (those are the only words that the
// tokenizer would define as TokenType.OP)
switch(wordType) {
case OP:
-
- // most ops are two argument except that '-' could be negation
- if(buf.hasPendingExpr()) {
- buf.setPendingExpr(parseBinaryOperator(t, buf, exprType,
- isSimpleExpr));
- } else if(isOp(t, "-")) {
- buf.setPendingExpr(parseUnaryOperator(t, buf, exprType,
- isSimpleExpr));
- } else {
- throw new IllegalArgumentException(
- "Missing left expression for binary operator " + t.getValue() +
- " " + buf);
- }
+ parseOperatorExpression(t, buf, exprType, isSimpleExpr);
break;
case COMP:
@@ -226,6 +254,19 @@ public class Expressionator
}
break;
+
+ case DELIM:
+
+ // the only "top-level" delim we expect to find is open paren, and
+ // there shouldn't be any pending expression
+ if(!isDelim(t, OPEN_PAREN) || buf.hasPendingExpr()) {
+ throw new IllegalArgumentException("Unexpected delimiter " +
+ t.getValue() + " " + buf);
+ }
+
+ Expr subExpr = findParenExprs(buf, exprType, isSimpleExpr, false).get(0);
+ buf.setPendingExpr(new EParen(subExpr));
+ break;
case STRING:
@@ -250,7 +291,49 @@ public class Expressionator
// FIXME maybe obj name, maybe string?
} else {
-
+
+ // this could be anything but COMP or DELIM (all COMPs would be
+ // returned as TokenType.OP and all DELIMs would be TokenType.DELIM)
+ switch(wordType) {
+ case OP:
+
+ parseOperatorExpression(t, buf, exprType, isSimpleExpr);
+ break;
+
+ case LOG_OP:
+
+ if(buf.hasPendingExpr()) {
+ buf.setPendingExpr(parseLogicalOperator(t, buf, exprType,
+ isSimpleExpr));
+ } else {
+ throw new IllegalArgumentException(
+ "Missing left expression for logical operator " +
+ t.getValue() + " " + buf);
+ }
+ break;
+
+ case CONST:
+
+ if("true".equalsIgnoreCase(t.getValueStr())) {
+ buf.setPendingExpr(TRUE_VALUE);
+ } else if("false".equalsIgnoreCase(t.getValueStr())) {
+ buf.setPendingExpr(FALSE_VALUE);
+ } else if("false".equalsIgnoreCase(t.getValueStr())) {
+ buf.setPendingExpr(TRUE_VALUE);
+ } else {
+ throw new RuntimeException("Unexpected CONST word "
+ + t.getValue());
+ }
+ break;
+
+ case SPEC_OP_PREFIX:
+ // FIXME
+ break;
+
+ default:
+ throw new RuntimeException("Unexpected STRING word type "
+ + wordType);
+ }
// FIXME
}
@@ -326,22 +409,13 @@ public class Expressionator
try {
Token t = buf.peekNext();
- if((t == null) || !isOp(t, FUNC_START_DELIM)) {
+ if((t == null) || !isDelim(t, FUNC_START_DELIM)) {
// not a function call
return null;
}
buf.next();
- List<TokBuf> paramBufs = findFuncCallParams(buf);
-
- List<Expr> params = Collections.emptyList();
- if(!paramBufs.isEmpty()) {
- params = new ArrayList<Expr>(paramBufs.size());
- for(TokBuf paramBuf : paramBufs) {
- params.add(parseExpression(exprType, paramBuf, isSimpleExpr));
- }
- }
-
+ List<Expr> params = findParenExprs(buf, exprType, isSimpleExpr, true);
return new EFunc(firstTok.getValueStr(), params);
} finally {
@@ -351,58 +425,67 @@ public class Expressionator
}
}
- private static List<TokBuf> findFuncCallParams(TokBuf buf) {
+ private static List<Expr> findParenExprs(
+ TokBuf buf, Type exprType, boolean isSimpleExpr, boolean isFunc) {
- // simple case, no params
- Token t = buf.peekNext();
- if((t != null) && isOp(t, FUNC_END_DELIM)) {
- buf.next();
- return Collections.emptyList();
+ if(isFunc) {
+ // simple case, no nested expr
+ Token t = buf.peekNext();
+ if((t != null) && isDelim(t, CLOSE_PAREN)) {
+ buf.next();
+ return Collections.emptyList();
+ }
}
// find closing ")", handle nested parens
- List<TokBuf> params = new ArrayList<TokBuf>(3);
+ List<Expr> exprs = new ArrayList<Expr>(3);
int level = 1;
int startPos = buf.curPos();
while(buf.hasNext()) {
- t = buf.next();
+ Token t = buf.next();
- if(isOp(t, FUNC_START_DELIM)) {
+ if(isDelim(t, OPEN_PAREN)) {
++level;
- } else if(isOp(t, FUNC_END_DELIM)) {
+ } else if(isDelim(t, CLOSE_PAREN)) {
--level;
if(level == 0) {
- params.add(buf.subBuf(startPos, buf.prevPos()));
-
- if(params.size() > 1) {
- // if there is more than one param and one of them is empty, then
- // something is messed up (note, it should not be possible to have
- // an empty param if there is only one since we trim superfluous
- // spaces)
- for(TokBuf paramBuf : params) {
- if(!paramBuf.hasNext()) {
- throw new IllegalArgumentException(
- "Invalid empty parameter for function " + paramBuf);
- }
- }
- }
-
- return params;
+ TokBuf subBuf = buf.subBuf(startPos, buf.prevPos());
+ exprs.add(parseExpression(exprType, subBuf, isSimpleExpr));
+ return exprs;
}
- } else if((level == 1) && isOp(t, FUNC_PARAM_SEP)) {
+ } else if(isFunc && (level == 1) && isDelim(t, FUNC_PARAM_SEP)) {
- params.add(buf.subBuf(startPos, buf.prevPos()));
+ TokBuf subBuf = buf.subBuf(startPos, buf.prevPos());
+ exprs.add(parseExpression(exprType, subBuf, isSimpleExpr));
startPos = buf.curPos();
}
}
- throw new IllegalArgumentException("Missing closing '" + FUNC_END_DELIM +
- "' for function call " + buf);
+ String exprName = (isFunc ? "function call" : "parenthesized expression");
+ throw new IllegalArgumentException("Missing closing '" + CLOSE_PAREN +
+ "' for " + exprName + " " + buf);
+ }
+
+ private static void parseOperatorExpression(
+ Token t, TokBuf buf, Type exprType, boolean isSimpleExpr) {
+
+ // most ops are two argument except that '-' could be negation
+ if(buf.hasPendingExpr()) {
+ buf.setPendingExpr(parseBinaryOperator(t, buf, exprType,
+ isSimpleExpr));
+ } else if(isOp(t, "-")) {
+ buf.setPendingExpr(parseUnaryOperator(t, buf, exprType,
+ isSimpleExpr));
+ } else {
+ throw new IllegalArgumentException(
+ "Missing left expression for binary operator " + t.getValue() +
+ " " + buf);
+ }
}
private static Expr parseBinaryOperator(Token firstTok, TokBuf buf,
@@ -431,6 +514,15 @@ public class Expressionator
return new ECompOp(op, leftExpr, rightExpr);
}
+ private static Expr parseLogicalOperator(Token firstTok, TokBuf buf,
+ Type exprType, boolean isSimpleExpr) {
+ String op = firstTok.getValueStr();
+ Expr leftExpr = buf.takePendingExpr();
+ Expr rightExpr = parseExpression(exprType, buf, isSimpleExpr);
+
+ return new ELogicalOp(op, leftExpr, rightExpr);
+ }
+
private static boolean isSimpleExpression(TokBuf buf, Type exprType) {
if(exprType != Type.DEFAULT_VALUE) {
return false;
@@ -448,7 +540,7 @@ public class Expressionator
}
private static boolean isObjNameSep(Token t) {
- return (isOp(t, ".") || isOp(t, "!"));
+ return (isDelim(t, ".") || isDelim(t, "!"));
}
private static boolean isOp(Token t, String opStr) {
@@ -456,6 +548,11 @@ public class Expressionator
opStr.equalsIgnoreCase(t.getValueStr()));
}
+ private static boolean isDelim(Token t, String opStr) {
+ return ((t.getType() == TokenType.DELIM) &&
+ opStr.equalsIgnoreCase(t.getValueStr()));
+ }
+
private static WordType getWordType(Token t) {
return WORD_TYPES.get(t.getValueStr().toLowerCase());
}
@@ -572,6 +669,20 @@ public class Expressionator
}
}
+ private static final Map<String, Integer> buildPrecedenceMap(String[]... opArrs) {
+ Map<String, Integer> prec = new HashMap<String, Integer>();
+
+ int level = 0;
+ for(String[] ops : opArrs) {
+ for(String op : ops) {
+ prec.put(op, level);
+ }
+ ++level;
+ }
+
+ return prec;
+ }
+
public static abstract class Expr
{
public Object evalDefault() {
@@ -670,7 +781,7 @@ public class Expressionator
@Override
protected Object eval(RowContext ctx) {
- // FIXME how do func results act for conditional values?
+ // FIXME how do func results act for conditional values? (literals become = tests)
return false;
}
@@ -734,4 +845,24 @@ public class Expressionator
}
}
+
+ private static class ELogicalOp extends Expr
+ {
+ private final String _op;
+ private final Expr _left;
+ private final Expr _right;
+
+ private ELogicalOp(String op, Expr left, Expr right) {
+ _op = op;
+ _left = left;
+ _right = right;
+ }
+
+ @Override
+ protected Object eval(RowContext ctx) {
+ // FIXME
+
+ return null;
+ }
+ }
}