Przeglądaj źródła

impl expr tokenization

git-svn-id: https://svn.code.sf.net/p/jackcess/code/jackcess/branches/exprs@1037 f203690c-595d-4dc9-a70b-905162fa7fd2
tags/jackcess-2.2.0
James Ahlborn 7 lat temu
rodzic
commit
7ae65eeb68

+ 14
- 0
src/main/java/com/healthmarketscience/jackcess/impl/DatabaseImpl.java Wyświetl plik

@@ -28,6 +28,7 @@ import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
@@ -74,6 +75,7 @@ import com.healthmarketscience.jackcess.util.TableIterableBuilder;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.poifs.filesystem.Entry;


/**
@@ -719,6 +721,18 @@ public class DatabaseImpl implements Database
return _calendar;
}

/**
* Returns a SimpleDateFormat for the given format string which is
* configured with a compatible Calendar instance (see
* {@link DatabaseBuilder#toCompatibleCalendar}) and this database's
* {@link TimeZone}.
*/
public SimpleDateFormat createDateFormat(String formatStr) {
SimpleDateFormat sdf = DatabaseBuilder.createDateFormat(formatStr);
sdf.setTimeZone(getTimeZone());
return sdf;
}

/**
* @returns the current handler for reading/writing properties, creating if
* necessary

+ 498
- 0
src/main/java/com/healthmarketscience/jackcess/util/ExpressionTokenizer.java Wyświetl plik

@@ -0,0 +1,498 @@
/*
Copyright (c) 2016 James Ahlborn

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.healthmarketscience.jackcess.util;

import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.healthmarketscience.jackcess.DatabaseBuilder;
import com.healthmarketscience.jackcess.impl.DatabaseImpl;
import static com.healthmarketscience.jackcess.util.Expressionator.*;

/**
*
* @author James Ahlborn
*/
class ExpressionTokenizer
{
private static final int EOF = -1;
private static final char QUOTED_STR_CHAR = '"';
private static final char OBJ_NAME_START_CHAR = '[';
private static final char OBJ_NAME_END_CHAR = ']';
private static final char DATE_LIT_DELIM_CHAR = '#';
private static final char EQUALS_CHAR = '=';

private static final String DATE_FORMAT = "M/d/yyyy";
private static final String TIME_FORMAT = "HH:mm:ss";
private static final String DATE_TIME_FORMAT = DATE_FORMAT + " " + TIME_FORMAT;

private static final byte IS_OP_FLAG = 0x01;
private static final byte IS_COMP_FLAG = 0x02;
private static final byte IS_DELIM_FLAG = 0x04;
private static final byte IS_SPACE_FLAG = 0x08;
private static final byte IS_QUOTE_FLAG = 0x10;

enum TokenType {
OBJ_NAME, LITERAL, OP, STRING, SPACE;
}

private static final byte[] CHAR_FLAGS = new byte[128];
private static final Set<String> TWO_CHAR_COMP_OPS = new HashSet<String>(
Arrays.asList("<=", ">=", "<>"));

static {
setCharFlag(IS_OP_FLAG, '+', '-', '*', '/', '\\', '^', '&');
setCharFlag(IS_COMP_FLAG, '<', '>', '=');
setCharFlag(IS_DELIM_FLAG, '.', '!', ',', '(', ')');
setCharFlag(IS_SPACE_FLAG, ' ', '\n', '\r', '\t');
setCharFlag(IS_QUOTE_FLAG, '"', '#', '[', ']');
}

/**
* Tokenizes an expression string of the given type and (optionally) in the
* context of the relevant database.
*/
static List<Token> tokenize(Type exprType, String exprStr, DatabaseImpl db) {

if(exprStr != null) {
exprStr = exprStr.trim();
}

if((exprStr == null) || (exprStr.length() == 0)) {
return null;
}

List<Token> tokens = new ArrayList<Token>();

ExprBuf buf = new ExprBuf(exprStr);

while(buf.hasNext()) {
char c = buf.next();
byte charFlag = getCharFlag(c);
if(charFlag != 0) {
// what could it be?
switch(charFlag) {
case IS_OP_FLAG:
// special case '-' for negative number
Object numLit = maybeParseNumberLiteral(c, buf);
if(numLit != null) {
tokens.add(new Token(TokenType.LITERAL, numLit));
continue;
}
// all simple operator chars are single character operators
tokens.add(new Token(TokenType.OP, String.valueOf(c)));
break;
case IS_COMP_FLAG:

switch(exprType) {
case DEFAULT_VALUE:

// special case
if((c == EQUALS_CHAR) && (buf.prevPos() == 0)) {
// a leading equals sign indicates how a default value should be
// evaluated
tokens.add(new Token(TokenType.OP, String.valueOf(c)));
continue;
}
// def values can't have cond at top level
throw new IllegalArgumentException(
exprType + " cannot have top-level conditional");
case FIELD_VALIDATOR:
tokens.add(new Token(TokenType.OP, parseCompOp(c, buf)));
break;
}

break;

case IS_DELIM_FLAG:

// all delimiter chars are single character operators
tokens.add(new Token(TokenType.OP, String.valueOf(c)));
break;

case IS_SPACE_FLAG:

// normalize whitespace into single space
consumeWhitespace(buf);
tokens.add(new Token(TokenType.SPACE, " "));
break;

case IS_QUOTE_FLAG:

switch(c) {
case QUOTED_STR_CHAR:
tokens.add(new Token(TokenType.LITERAL, parseQuotedString(buf)));
break;
case DATE_LIT_DELIM_CHAR:
tokens.add(new Token(TokenType.LITERAL,
parseDateLiteralString(buf, db)));
break;
case OBJ_NAME_START_CHAR:
tokens.add(new Token(TokenType.OBJ_NAME, parseObjNameString(buf)));
break;
default:
throw new IllegalArgumentException(
"Invalid leading quote character " + c);
}

break;

default:
throw new RuntimeException("unknown char flag " + charFlag);
}

} else {

if(isDigit(c)) {
Object numLit = maybeParseNumberLiteral(c, buf);
if(numLit != null) {
tokens.add(new Token(TokenType.LITERAL, numLit));
continue;
}
}

// standalone word of some sort
String str = parseBareString(c, buf, exprType);
tokens.add(new Token(TokenType.STRING, str));
}

}

return tokens;
}

private static byte getCharFlag(char c) {
return ((c < 128) ? CHAR_FLAGS[c] : 0);
}

private static boolean isSpecialChar(char c) {
return (getCharFlag(c) != 0);
}

private static String parseCompOp(char firstChar, ExprBuf buf) {
String opStr = String.valueOf(firstChar);

int c = buf.peekNext();
if((c != EOF) && hasFlag(getCharFlag((char)c), IS_COMP_FLAG)) {

// is the combo a valid comparison operator?
String tmpStr = opStr + (char)c;
if(TWO_CHAR_COMP_OPS.contains(tmpStr)) {
opStr = tmpStr;
buf.next();
}
}

return opStr;
}

private static void consumeWhitespace(ExprBuf buf) {
int c = EOF;
while(((c = buf.peekNext()) != EOF) &&
hasFlag(getCharFlag((char)c), IS_SPACE_FLAG)) {
buf.next();
}
}
private static String parseBareString(char firstChar, ExprBuf buf,
Type exprType) {
StringBuilder sb = buf.getScratchBuffer().append(firstChar);

byte stopFlags = (IS_OP_FLAG | IS_DELIM_FLAG | IS_SPACE_FLAG);
if(exprType == Type.FIELD_VALIDATOR) {
stopFlags |= IS_COMP_FLAG;
}

while(buf.hasNext()) {
char c = buf.next();
byte charFlag = getCharFlag(c);
if(hasFlag(charFlag, stopFlags)) {
buf.popPrev();
break;
}
sb.append(c);
}
return sb.toString();
}

private static String parseQuotedString(ExprBuf buf) {
StringBuilder sb = buf.getScratchBuffer();

boolean complete = false;
while(buf.hasNext()) {
char c = buf.next();
if(c == QUOTED_STR_CHAR) {
int nc = buf.peekNext();
if(nc == QUOTED_STR_CHAR) {
sb.append(QUOTED_STR_CHAR);
buf.next();
} else {
complete = true;
break;
}
}

sb.append(c);
}

if(!complete) {
throw new IllegalArgumentException("Missing closing '" + QUOTED_STR_CHAR +
"' for quoted string");
}

return sb.toString();
}

private static String parseObjNameString(ExprBuf buf) {
return parseStringUntil(buf, OBJ_NAME_END_CHAR);
}

private static String parseStringUntil(ExprBuf buf, char endChar) {
StringBuilder sb = buf.getScratchBuffer();

boolean complete = false;
while(buf.hasNext()) {
char c = buf.next();
if(c == endChar) {
complete = true;
break;
}

sb.append(c);
}

if(!complete) {
throw new IllegalArgumentException("Missing closing '" + endChar +
"' for quoted string");
}

return sb.toString();
}

private static Date parseDateLiteralString(ExprBuf buf, DatabaseImpl db) {
String dateStr = parseStringUntil(buf, DATE_LIT_DELIM_CHAR);
boolean hasDate = (dateStr.indexOf('/') >= 0);
boolean hasTime = (dateStr.indexOf(':') >= 0);

SimpleDateFormat sdf = null;
if(hasDate && hasTime) {
sdf = buf.getDateTimeFormat(db);
} else if(hasDate) {
sdf = buf.getDateFormat(db);
} else if(hasTime) {
sdf = buf.getTimeFormat(db);
} else {
throw new IllegalArgumentException("Invalid date time literal " + dateStr);
}

// FIXME, do we need to know which "type" it was?
try {
return sdf.parse(dateStr);
} catch(ParseException pe) {
throw new IllegalArgumentException(
"Invalid date time literal " + dateStr, pe);
}
}

private static Object maybeParseNumberLiteral(char firstChar, ExprBuf buf) {
StringBuilder sb = buf.getScratchBuffer().append(firstChar);
boolean hasDigit = isDigit(firstChar);

int startPos = buf.curPos();
boolean foundNum = false;

try {

int c = EOF;
while((c = buf.peekNext()) != EOF) {
if(isDigit(c)) {
hasDigit = true;
sb.append((char)c);
buf.next();
} else if(c == '.') {
sb.append((char)c);
buf.next();
} else if(isSpecialChar((char)c)) {
break;
} else {
// found a non-number, non-special string
return null;
}
}

if(!hasDigit) {
// no digits, no number
return null;
}

String numStr = sb.toString();
try {
// what number type to use here?
BigDecimal num = new BigDecimal(numStr);
foundNum = true;
return num;
} catch(NumberFormatException ne) {
throw new IllegalArgumentException(
"Invalid number literal " + numStr, ne);
}
} finally {
if(!foundNum) {
buf.reset(startPos);
}
}
}

private static boolean hasFlag(byte charFlag, byte flag) {
return ((charFlag & flag) != 0);
}

private static void setCharFlag(byte flag, char... chars) {
for(char c : chars) {
CHAR_FLAGS[c] |= flag;
}
}

private static boolean isDigit(int c) {
return ((c >= '0') && (c <= '9'));
}

private static final class ExprBuf
{
private final String _str;
private int _pos;
private SimpleDateFormat _dateFmt;
private SimpleDateFormat _timeFmt;
private SimpleDateFormat _dateTimeFmt;
private final StringBuilder _scratch = new StringBuilder();
private ExprBuf(String str) {
_str = str;
}

private int len() {
return _str.length();
}

public int curPos() {
return _pos;
}

public int prevPos() {
return _pos - 1;
}

public boolean hasNext() {
return _pos < len();
}

public char next() {
return _str.charAt(_pos++);
}

public void popPrev() {
--_pos;
}

public int peekNext() {
if(!hasNext()) {
return EOF;
}
return _str.charAt(_pos);
}

public void reset(int pos) {
_pos = pos;
}

public StringBuilder getScratchBuffer() {
_scratch.setLength(0);
return _scratch;
}

public SimpleDateFormat getDateFormat(DatabaseImpl db) {
if(_dateFmt == null) {
_dateFmt = newFormat(DATE_FORMAT, db);
}
return _dateFmt;
}

public SimpleDateFormat getTimeFormat(DatabaseImpl db) {
if(_timeFmt == null) {
_timeFmt = newFormat(TIME_FORMAT, db);
}
return _timeFmt;
}

public SimpleDateFormat getDateTimeFormat(DatabaseImpl db) {
if(_dateTimeFmt == null) {
_dateTimeFmt = newFormat(DATE_TIME_FORMAT, db);
}
return _dateTimeFmt;
}

private static SimpleDateFormat newFormat(String str, DatabaseImpl db) {
return ((db != null) ? db.createDateFormat(str) :
DatabaseBuilder.createDateFormat(str));
}
}


static final class Token
{
private final TokenType _type;
private final Object _val;

private Token(TokenType type, Object val) {
_type = type;
_val = val;
}

public TokenType getType() {
return _type;
}

public Object getValue() {
return _val;
}

@Override
public String toString() {
if(_type == TokenType.SPACE) {
return "' '";
}
String str = "[" + _type + "] '" + _val + "'";
if(_type == TokenType.LITERAL) {
str += " (" + _val.getClass() + ")";
}
return str;
}
}

}

+ 474
- 0
src/main/java/com/healthmarketscience/jackcess/util/Expressionator.java Wyświetl plik

@@ -0,0 +1,474 @@
/*
Copyright (c) 2016 James Ahlborn

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.healthmarketscience.jackcess.util;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.healthmarketscience.jackcess.Database;
import com.healthmarketscience.jackcess.impl.DatabaseImpl;
import static com.healthmarketscience.jackcess.util.ExpressionTokenizer.Token;
import static com.healthmarketscience.jackcess.util.ExpressionTokenizer.TokenType;

/**
*
* @author James Ahlborn
*/
public class Expressionator
{

public enum Type {
DEFAULT_VALUE, FIELD_VALIDATOR;
}

private enum WordType {
OP, COMP, LOG_OP, CONST, SPEC_OP_PREFIX;
}

private static final String FUNC_START_DELIM = "(";
private static final String FUNC_END_DELIM = ")";
private static final String FUNC_PARAM_SEP = ",";

private static final Map<String,WordType> WORD_TYPES = new HashMap<String,WordType>();

static {
setWordType(WordType.OP, "+", "-", "*", "/", "\\", "^", "&", "mod");
setWordType(WordType.COMP, "<", "<=", ">", ">=", "=", "<>");
setWordType(WordType.LOG_OP, "and", "or", "eqv", "not", "xor");
setWordType(WordType.CONST, "true", "false", "null");
setWordType(WordType.SPEC_OP_PREFIX, "is", "like", "between", "in");
}


private static final Expr THIS_COL_VALUE = new Expr() {
@Override protected Object eval(RowContext ctx) {
return ctx.getThisColumnValue();
}
};
private static final Expr NULL_VALUE = new Expr() {
@Override protected Object eval(RowContext ctx) {
return null;
}
};
private static final Expr TRUE_VALUE = new Expr() {
@Override protected Object eval(RowContext ctx) {
return Boolean.TRUE;
}
};
private static final Expr FALSE_VALUE = new Expr() {
@Override protected Object eval(RowContext ctx) {
return Boolean.FALSE;
}
};

private Expressionator()
{
}

public static String testTokenize(Type exprType, String exprStr, Database db) {
List<Token> tokens = trimSpaces(
ExpressionTokenizer.tokenize(exprType, exprStr, (DatabaseImpl)db));

if(tokens == null) {
// FIXME, NULL_EXPR?
return null;
}

return tokens.toString();
}

public static Expr parse(Type exprType, String exprStr, Database db) {

List<Token> tokens = trimSpaces(
ExpressionTokenizer.tokenize(exprType, exprStr, (DatabaseImpl)db));

if(tokens == null) {
// FIXME, NULL_EXPR?
return null;
}

TokBuf buf = new TokBuf(tokens);
parseExpression(exprType, buf, isSimpleExpression(buf, exprType));
// FIXME
return null;
}

private static List<Token> trimSpaces(List<Token> tokens) {
if(tokens == null) {
return null;
}

// for the most part, spaces are superfluous except for one situation(?).
// when they appear between a string literal and '(' they help distinguish
// a function call from another expression form
for(int i = 1; i < (tokens.size() - 1); ++i) {
Token t = tokens.get(i);
if(t.getType() == TokenType.SPACE) {
if((tokens.get(i - 1).getType() == TokenType.STRING) &&
isOp(tokens.get(i + 1), FUNC_START_DELIM)) {
// we want to keep this space
} else {
tokens.remove(i);
--i;
}
}
}
return tokens;
}

private static Expr parseExpression(Type exprType, TokBuf buf,
boolean isSimpleExpr)
{

// FIXME, how do we handle order of ops when no parens?
while(buf.hasNext()) {
Token t = buf.next();

switch(t.getType()) {
case OBJ_NAME:
break;
case LITERAL:
break;
case OP:
break;
case STRING:
WordType wordType = getWordType(t);
if(wordType == null) {
// literal string? or possibly function?
Expr funcExpr = maybeParseFuncCall(t, buf, exprType, isSimpleExpr);
if(funcExpr != null) {
buf.setPendingExpr(funcExpr);
continue;
}
// FIXME
}
break;
case SPACE:
// top-level space is irrelevant
break;
default:
throw new RuntimeException("unknown token type " + t.getType());
}
}

Expr expr = buf.takePendingExpr();
if(expr == null) {
throw new IllegalArgumentException("No expression found?");
}

return expr;
}

private static Expr maybeParseFuncCall(Token firstTok, TokBuf buf,
Type exprType, boolean isSimpleExpr) {

int startPos = buf.curPos();
boolean foundFunc = false;

try {
Token t = buf.peekNext();
if((t == null) || !isOp(t, FUNC_START_DELIM)) {
// not a function call
return null;
}
buf.next();
List<TokBuf> paramBufs = findFuncCallParams(buf);

List<Expr> params = Collections.emptyList();
if(!paramBufs.isEmpty()) {
params = new ArrayList<Expr>(paramBufs.size());
for(TokBuf paramBuf : paramBufs) {
params.add(parseExpression(exprType, paramBuf, isSimpleExpr));
}
}
return new EFunc((String)firstTok.getValue(), params);

} finally {
if(!foundFunc) {
buf.reset(startPos);
}
}
}

private static List<TokBuf> findFuncCallParams(TokBuf buf) {

// simple case, no params
Token t = buf.peekNext();
if((t != null) && isOp(t, FUNC_END_DELIM)) {
buf.next();
return Collections.emptyList();
}

// find closing ")", handle nested parens
List<TokBuf> params = new ArrayList<TokBuf>(3);
int level = 1;
int startPos = buf.curPos();
while(buf.hasNext()) {

t = buf.next();

if(isOp(t, FUNC_START_DELIM)) {

++level;

} else if(isOp(t, FUNC_END_DELIM)) {

--level;
if(level == 0) {
params.add(buf.subBuf(startPos, buf.prevPos()));

if(params.size() > 1) {
// if there is more than one param and one of them is empty, then
// something is messed up (note, it should not be possible to have
// an empty param if there is only one since we trim superfluous
// spaces)
for(TokBuf paramBuf : params) {
if(!paramBuf.hasNext()) {
throw new IllegalArgumentException(
"Invalid empty parameter for function");
}
}
}

return params;
}

} else if((level == 1) && isOp(t, FUNC_PARAM_SEP)) {

params.add(buf.subBuf(startPos, buf.prevPos()));
startPos = buf.curPos();
}
}

throw new IllegalArgumentException("Missing closing '" + FUNC_END_DELIM +
"' for function call");
}

private static boolean isSimpleExpression(TokBuf buf, Type exprType) {
if(exprType != Type.DEFAULT_VALUE) {
return false;
}

// a leading "=" indicates "full" expression handling for a DEFAULT_VALUE
Token t = buf.peekNext();
if((t != null) && isOp(t, "=")) {
buf.next();
return false;
}

// this is a "simple" DEFAULT_VALUE
return true;
}

private static boolean isOp(Token t, String opStr) {
return ((t.getType() == TokenType.OP) &&
opStr.equalsIgnoreCase((String)t.getValue()));
}

private static WordType getWordType(Token t) {
return WORD_TYPES.get(((String)t.getValue()).toLowerCase());
}

private static void setWordType(WordType type, String... words) {
for(String w : words) {
WORD_TYPES.put(w, type);
}
}

private static final class TokBuf
{
private final List<Token> _tokens;
private final boolean _topLevel;
private int _pos;
private Expr _pendingExpr;

private TokBuf(List<Token> tokens) {
this(tokens, true);
}

private TokBuf(List<Token> tokens, boolean topLevel) {
_tokens = tokens;
_topLevel = topLevel;
}

public boolean isTopLevel() {
return _topLevel;
}

public int curPos() {
return _pos;
}

public int prevPos() {
return _pos - 1;
}

public boolean hasNext() {
return (_pos < _tokens.size());
}

public Token peekNext() {
if(!hasNext()) {
return null;
}
return _tokens.get(_pos);
}

public Token next() {
return _tokens.get(_pos++);
}

public void reset(int pos) {
_pos = pos;
}

public TokBuf subBuf(int start, int end) {
return new TokBuf(_tokens.subList(start, end), false);
}

public void setPendingExpr(Expr expr) {
if(_pendingExpr == null) {
throw new IllegalArgumentException("Found multiple expressions with no operator");
}
_pendingExpr = expr;
}

public Expr takePendingExpr() {
Expr expr = _pendingExpr;
_pendingExpr = null;
return expr;
}

public boolean hasPendingExpr() {
return (_pendingExpr != null);
}
}

public static abstract class Expr
{
public Object evalDefault() {
return eval(null);
}

public boolean evalCondition(RowContext ctx) {
Object val = eval(ctx);

if(val instanceof Boolean) {
return (Boolean)val;
}

// a single value as a conditional expression seems to act like an
// implicit "="
return val.equals(ctx.getThisColumnValue());
}

protected abstract Object eval(RowContext ctx);
}

public interface RowContext
{
public Object getThisColumnValue();

public Object getRowValue(String colName);
}

private static final class ELiteralValue extends Expr
{
private final Object _value;

private ELiteralValue(Object value) {
_value = value;
}

@Override
public Object eval(RowContext ctx) {
return _value;
}
}

private static final class EColumnValue extends Expr
{
private final String _colName;

private EColumnValue(String colName) {
_colName = colName;
}

@Override
public Object eval(RowContext ctx) {
return ctx.getRowValue(_colName);
}
}

private static abstract class EOp
{
}

private static abstract class ECond
{
}

private static class EParen extends Expr
{
private final Expr _expr;

private EParen(Expr expr) {
_expr = expr;
}

@Override
protected Object eval(RowContext ctx) {
return _expr.eval(ctx);
}
}

private static class EFunc extends Expr
{
private final String _name;
private final List<Expr> _params;

private EFunc(String name, List<Expr> params) {
_name = name;
_params = params;
}

@Override
protected Object eval(RowContext ctx) {
// FIXME how do func results act for conditional values?

return false;
}
}

}

Ładowanie…
Anuluj
Zapisz