123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686 |
- /*
- Copyright (c) 2016 James Ahlborn
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
- package com.healthmarketscience.jackcess.impl.expr;
-
- import java.math.BigDecimal;
- import java.text.DateFormat;
- import java.text.FieldPosition;
- import java.text.ParsePosition;
- import java.util.AbstractMap;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Calendar;
- import java.util.Date;
- import java.util.EnumMap;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import java.util.TimeZone;
-
- import static com.healthmarketscience.jackcess.impl.expr.Expressionator.*;
- import com.healthmarketscience.jackcess.expr.LocaleContext;
- import com.healthmarketscience.jackcess.expr.ParseException;
- import com.healthmarketscience.jackcess.expr.TemporalConfig;
- import com.healthmarketscience.jackcess.expr.Value;
-
-
- /**
- *
- * @author James Ahlborn
- */
- class ExpressionTokenizer
- {
- private static final int EOF = -1;
- static final char QUOTED_STR_CHAR = '"';
- private static final char SINGLE_QUOTED_STR_CHAR = '\'';
- private static final char OBJ_NAME_START_CHAR = '[';
- private static final char OBJ_NAME_END_CHAR = ']';
- private static final char DATE_LIT_QUOTE_CHAR = '#';
- private static final char EQUALS_CHAR = '=';
-
- private static final int AMPM_SUFFIX_LEN = 3;
- private static final String AM_SUFFIX = " am";
- private static final String PM_SUFFIX = " pm";
- // access times are based on this date (not the UTC base)
- private static final String BASE_DATE = "12/30/1899 ";
- private static final String BASE_DATE_FMT = "M/d/yyyy";
-
- private static final byte IS_OP_FLAG = 0x01;
- private static final byte IS_COMP_FLAG = 0x02;
- private static final byte IS_DELIM_FLAG = 0x04;
- private static final byte IS_SPACE_FLAG = 0x08;
- private static final byte IS_QUOTE_FLAG = 0x10;
-
- enum TokenType {
- OBJ_NAME, LITERAL, OP, DELIM, STRING, SPACE;
- }
-
- private static final byte[] CHAR_FLAGS = new byte[128];
- private static final Set<String> TWO_CHAR_COMP_OPS = new HashSet<String>(
- Arrays.asList("<=", ">=", "<>"));
-
- static {
- setCharFlag(IS_OP_FLAG, '+', '-', '*', '/', '\\', '^', '&');
- setCharFlag(IS_COMP_FLAG, '<', '>', '=');
- setCharFlag(IS_DELIM_FLAG, '.', '!', ',', '(', ')');
- setCharFlag(IS_SPACE_FLAG, ' ', '\n', '\r', '\t');
- setCharFlag(IS_QUOTE_FLAG, '"', '#', '[', ']', '\'');
- }
-
- private ExpressionTokenizer() {}
-
- /**
- * Tokenizes an expression string of the given type and (optionally) in the
- * context of the relevant database.
- */
- static List<Token> tokenize(Type exprType, String exprStr,
- ParseContext context) {
-
- if(exprStr != null) {
- exprStr = exprStr.trim();
- }
-
- if((exprStr == null) || (exprStr.length() == 0)) {
- return null;
- }
-
- List<Token> tokens = new ArrayList<Token>();
-
- ExprBuf buf = new ExprBuf(exprStr, context);
-
- while(buf.hasNext()) {
- char c = buf.next();
-
- byte charFlag = getCharFlag(c);
- if(charFlag != 0) {
-
- // what could it be?
- switch(charFlag) {
- case IS_OP_FLAG:
-
- // all simple operator chars are single character operators
- tokens.add(new Token(TokenType.OP, String.valueOf(c)));
- break;
-
- case IS_COMP_FLAG:
-
- // special case for default values
- if((exprType == Type.DEFAULT_VALUE) && (c == EQUALS_CHAR) &&
- (buf.prevPos() == 0)) {
- // a leading equals sign indicates how a default value should be
- // evaluated
- tokens.add(new Token(TokenType.OP, String.valueOf(c)));
- continue;
- }
-
- tokens.add(new Token(TokenType.OP, parseCompOp(c, buf)));
- break;
-
- case IS_DELIM_FLAG:
-
- // all delimiter chars are single character symbols
- tokens.add(new Token(TokenType.DELIM, String.valueOf(c)));
- break;
-
- case IS_SPACE_FLAG:
-
- // normalize whitespace into single space
- consumeWhitespace(buf);
- tokens.add(new Token(TokenType.SPACE, " "));
- break;
-
- case IS_QUOTE_FLAG:
-
- switch(c) {
- case QUOTED_STR_CHAR:
- case SINGLE_QUOTED_STR_CHAR:
- tokens.add(new Token(TokenType.LITERAL, null,
- parseQuotedString(buf, c), Value.Type.STRING));
- break;
- case DATE_LIT_QUOTE_CHAR:
- tokens.add(parseDateLiteral(buf));
- break;
- case OBJ_NAME_START_CHAR:
- tokens.add(new Token(TokenType.OBJ_NAME, parseObjNameString(buf)));
- break;
- default:
- throw new ParseException(
- "Invalid leading quote character " + c + " " + buf);
- }
-
- break;
-
- default:
- throw new RuntimeException("unknown char flag " + charFlag);
- }
-
- } else {
-
- if(isDigit(c)) {
- Token numLit = maybeParseNumberLiteral(c, buf);
- if(numLit != null) {
- tokens.add(numLit);
- continue;
- }
- }
-
- // standalone word of some sort
- String str = parseBareString(c, buf, exprType);
- tokens.add(new Token(TokenType.STRING, str));
- }
-
- }
-
- return tokens;
- }
-
- private static byte getCharFlag(char c) {
- return ((c < 128) ? CHAR_FLAGS[c] : 0);
- }
-
- private static boolean isSpecialChar(char c) {
- return (getCharFlag(c) != 0);
- }
-
- private static String parseCompOp(char firstChar, ExprBuf buf) {
- String opStr = String.valueOf(firstChar);
-
- int c = buf.peekNext();
- if((c != EOF) && hasFlag(getCharFlag((char)c), IS_COMP_FLAG)) {
-
- // is the combo a valid comparison operator?
- String tmpStr = opStr + (char)c;
- if(TWO_CHAR_COMP_OPS.contains(tmpStr)) {
- opStr = tmpStr;
- buf.next();
- }
- }
-
- return opStr;
- }
-
- private static void consumeWhitespace(ExprBuf buf) {
- int c = EOF;
- while(((c = buf.peekNext()) != EOF) &&
- hasFlag(getCharFlag((char)c), IS_SPACE_FLAG)) {
- buf.next();
- }
- }
-
- private static String parseBareString(char firstChar, ExprBuf buf,
- Type exprType) {
- StringBuilder sb = buf.getScratchBuffer().append(firstChar);
-
- byte stopFlags = (IS_OP_FLAG | IS_DELIM_FLAG | IS_SPACE_FLAG);
- if(exprType == Type.FIELD_VALIDATOR) {
- stopFlags |= IS_COMP_FLAG;
- }
-
- while(buf.hasNext()) {
- char c = buf.next();
- byte charFlag = getCharFlag(c);
- if(hasFlag(charFlag, stopFlags)) {
- buf.popPrev();
- break;
- }
- sb.append(c);
- }
-
- return sb.toString();
- }
-
- private static String parseQuotedString(ExprBuf buf, char quoteChar) {
- return parseStringUntil(buf, quoteChar, null, true);
- }
-
- private static String parseObjNameString(ExprBuf buf) {
- return parseStringUntil(buf, OBJ_NAME_END_CHAR, OBJ_NAME_START_CHAR, false);
- }
-
- private static String parseDateLiteralString(ExprBuf buf) {
- return parseStringUntil(buf, DATE_LIT_QUOTE_CHAR, null, false);
- }
-
- private static String parseStringUntil(ExprBuf buf, char endChar,
- Character startChar,
- boolean allowDoubledEscape)
- {
- StringBuilder sb = buf.getScratchBuffer();
-
- boolean complete = false;
- while(buf.hasNext()) {
- char c = buf.next();
- if(c == endChar) {
- if(allowDoubledEscape && (buf.peekNext() == endChar)) {
- sb.append(endChar);
- buf.next();
- } else {
- complete = true;
- break;
- }
- } else if((startChar != null) &&
- (startChar == c)) {
- throw new ParseException("Missing closing '" + endChar +
- "' for quoted string " + buf);
- }
-
- sb.append(c);
- }
-
- if(!complete) {
- throw new ParseException("Missing closing '" + endChar +
- "' for quoted string " + buf);
- }
-
- return sb.toString();
- }
-
- private static Token parseDateLiteral(ExprBuf buf)
- {
- String dateStr = parseDateLiteralString(buf);
-
- TemporalConfig.Type type = determineDateType(
- dateStr, buf.getContext());
- if(type == null) {
- throw new ParseException("Invalid date/time literal " + dateStr +
- " " + buf);
- }
-
- // note that although we may parse in the time "24" format, we will
- // display as the default time format
- DateFormat parseDf = buf.getDateTimeFormat(type);
- DateFormat df = buf.getDateTimeFormat(type.getDefaultType());
-
- try {
- return new Token(TokenType.LITERAL, parseComplete(parseDf, dateStr),
- dateStr, type.getValueType(), df);
- } catch(java.text.ParseException pe) {
- throw new ParseException(
- "Invalid date/time literal " + dateStr + " " + buf, pe);
- }
- }
-
- static TemporalConfig.Type determineDateType(
- String dateStr, LocaleContext ctx)
- {
- TemporalConfig cfg = ctx.getTemporalConfig();
- boolean hasDate = (dateStr.indexOf(cfg.getDateSeparator()) >= 0);
- boolean hasTime = (dateStr.indexOf(cfg.getTimeSeparator()) >= 0);
- boolean hasAmPm = false;
-
- if(hasTime) {
- int strLen = dateStr.length();
- hasAmPm = ((strLen >= AMPM_SUFFIX_LEN) &&
- (dateStr.regionMatches(true, strLen - AMPM_SUFFIX_LEN,
- AM_SUFFIX, 0, AMPM_SUFFIX_LEN) ||
- dateStr.regionMatches(true, strLen - AMPM_SUFFIX_LEN,
- PM_SUFFIX, 0, AMPM_SUFFIX_LEN)));
- }
-
- if(hasDate) {
- if(hasTime) {
- return (hasAmPm ? TemporalConfig.Type.DATE_TIME_12 :
- TemporalConfig.Type.DATE_TIME_24);
- }
- return TemporalConfig.Type.DATE;
- } else if(hasTime) {
- return (hasAmPm ? TemporalConfig.Type.TIME_12 :
- TemporalConfig.Type.TIME_24);
- }
- return null;
- }
-
- static DateFormat createParseDateFormat(TemporalConfig.Type type,
- LocaleContext ctx)
- {
- TemporalConfig cfg = ctx.getTemporalConfig();
- DateFormat df = ctx.createDateFormat(cfg.getDateTimeFormat(type));
-
- TemporalConfig.Type parseType = null;
- switch(type) {
- case TIME:
- parseType = TemporalConfig.Type.DATE_TIME;
- break;
- case TIME_12:
- parseType = TemporalConfig.Type.DATE_TIME_12;
- break;
- case TIME_24:
- parseType = TemporalConfig.Type.DATE_TIME_24;
- break;
- default:
- }
-
- if(parseType != null) {
- // we need to use a special DateFormat impl which handles parsing
- // separately from formatting
- String baseDate = getBaseDatePrefix(ctx);
- DateFormat parseDf = ctx.createDateFormat(
- cfg.getDateTimeFormat(parseType));
- df = new TimeFormat(parseDf, df, baseDate);
- }
-
- return df;
- }
-
- private static String getBaseDatePrefix(LocaleContext ctx) {
- String dateFmt = ctx.getTemporalConfig().getDateFormat();
- String baseDate = BASE_DATE;
- if(!BASE_DATE_FMT.equals(dateFmt)) {
- try {
- // need to reformat the base date to the relevant date format
- DateFormat parseDf = ctx.createDateFormat(BASE_DATE_FMT);
- DateFormat df = ctx.createDateFormat(dateFmt);
- baseDate = df.format(parseComplete(parseDf, baseDate));
- } catch(Exception e) {
- throw new ParseException("Could not parse base date", e);
- }
- }
- return baseDate + " ";
- }
-
- private static Token maybeParseNumberLiteral(char firstChar, ExprBuf buf) {
- StringBuilder sb = buf.getScratchBuffer().append(firstChar);
- boolean hasDigit = isDigit(firstChar);
-
- int startPos = buf.curPos();
- boolean foundNum = false;
- boolean isFp = false;
- int expPos = -1;
-
- try {
-
- int c = EOF;
- while((c = buf.peekNext()) != EOF) {
- if(isDigit(c)) {
- hasDigit = true;
- sb.append((char)c);
- buf.next();
- } else if(c == '.') {
- isFp = true;
- sb.append((char)c);
- buf.next();
- } else if(hasDigit && (expPos < 0) && ((c == 'e') || (c == 'E'))) {
- isFp = true;
- sb.append((char)c);
- expPos = sb.length();
- buf.next();
- } else if((expPos == sb.length()) && ((c == '-') || (c == '+'))) {
- sb.append((char)c);
- buf.next();
- } else if(isSpecialChar((char)c)) {
- break;
- } else {
- // found a non-number, non-special string
- return null;
- }
- }
-
- if(!hasDigit) {
- // no digits, no number
- return null;
- }
-
- String numStr = sb.toString();
- try {
- Number num = null;
- Value.Type numType = null;
-
- if(!isFp) {
- try {
- // try to parse as int. if that fails, fall back to BigDecimal
- // (this will handle the case of int overflow)
- num = Integer.valueOf(numStr);
- numType = Value.Type.LONG;
- } catch(NumberFormatException ne) {
- // fallback to decimal
- }
- }
-
- if(num == null) {
- num = new BigDecimal(numStr);
- numType = Value.Type.BIG_DEC;
- }
-
- foundNum = true;
- return new Token(TokenType.LITERAL, num, numStr, numType);
- } catch(NumberFormatException ne) {
- throw new ParseException(
- "Invalid number literal " + numStr + " " + buf, ne);
- }
-
- } finally {
- if(!foundNum) {
- buf.reset(startPos);
- }
- }
- }
-
- private static boolean hasFlag(byte charFlag, byte flag) {
- return ((charFlag & flag) != 0);
- }
-
- private static void setCharFlag(byte flag, char... chars) {
- for(char c : chars) {
- CHAR_FLAGS[c] |= flag;
- }
- }
-
- private static boolean isDigit(int c) {
- return ((c >= '0') && (c <= '9'));
- }
-
- static <K,V> Map.Entry<K,V> newEntry(K a, V b) {
- return new AbstractMap.SimpleImmutableEntry<K,V>(a, b);
- }
-
- static Date parseComplete(DateFormat df, String str)
- throws java.text.ParseException
- {
- // the java parsers will parse "successfully" even if there is leftover
- // information. we only want to consider a parse operation successful if
- // it parses the entire string (ignoring surrounding whitespace)
- str = str.trim();
- ParsePosition pp = new ParsePosition(0);
- Object d = df.parse(str, pp);
- if(pp.getIndex() < str.length()) {
- throw new java.text.ParseException("Failed parsing '" + str + "'",
- pp.getIndex());
- }
- return (Date)d;
- }
-
- private static final class ExprBuf
- {
- private final String _str;
- private final ParseContext _ctx;
- private int _pos;
- private final Map<TemporalConfig.Type,DateFormat> _dateTimeFmts =
- new EnumMap<TemporalConfig.Type,DateFormat>(TemporalConfig.Type.class);
- private final StringBuilder _scratch = new StringBuilder();
-
- private ExprBuf(String str, ParseContext ctx) {
- _str = str;
- _ctx = ctx;
- }
-
- private int len() {
- return _str.length();
- }
-
- public int curPos() {
- return _pos;
- }
-
- public int prevPos() {
- return _pos - 1;
- }
-
- public boolean hasNext() {
- return _pos < len();
- }
-
- public char next() {
- return _str.charAt(_pos++);
- }
-
- public void popPrev() {
- --_pos;
- }
-
- public int peekNext() {
- if(!hasNext()) {
- return EOF;
- }
- return _str.charAt(_pos);
- }
-
- public void reset(int pos) {
- _pos = pos;
- }
-
- public StringBuilder getScratchBuffer() {
- _scratch.setLength(0);
- return _scratch;
- }
-
- public ParseContext getContext() {
- return _ctx;
- }
-
- public DateFormat getDateTimeFormat(TemporalConfig.Type type) {
- DateFormat df = _dateTimeFmts.get(type);
- if(df == null) {
- df = createParseDateFormat(type, _ctx);
- _dateTimeFmts.put(type, df);
- }
- return df;
- }
-
- @Override
- public String toString() {
- return "[char " + _pos + "] '" + _str + "'";
- }
- }
-
-
- static final class Token
- {
- private final TokenType _type;
- private final Object _val;
- private final String _valStr;
- private final Value.Type _valType;
- private final DateFormat _sdf;
-
- private Token(TokenType type, String val) {
- this(type, val, val);
- }
-
- private Token(TokenType type, Object val, String valStr) {
- this(type, val, valStr, null, null);
- }
-
- private Token(TokenType type, Object val, String valStr, Value.Type valType) {
- this(type, val, valStr, valType, null);
- }
-
- private Token(TokenType type, Object val, String valStr, Value.Type valType,
- DateFormat sdf) {
- _type = type;
- _val = ((val != null) ? val : valStr);
- _valStr = valStr;
- _valType = valType;
- _sdf = sdf;
- }
-
- public TokenType getType() {
- return _type;
- }
-
- public Object getValue() {
- return _val;
- }
-
- public String getValueStr() {
- return _valStr;
- }
-
- public Value.Type getValueType() {
- return _valType;
- }
-
- public DateFormat getDateFormat() {
- return _sdf;
- }
-
- @Override
- public String toString() {
- if(_type == TokenType.SPACE) {
- return "' '";
- }
- String str = "[" + _type + "] '" + _val + "'";
- if(_valType != null) {
- str += " (" + _valType + ")";
- }
- return str;
- }
- }
-
- /**
- * Special date/time format which will parse time-only strings "correctly"
- * according to how access handles time-only values.
- */
- private static final class TimeFormat extends DateFormat
- {
- private static final long serialVersionUID = 0L;
-
- private final DateFormat _parseDelegate;
- private final DateFormat _fmtDelegate;
- private final String _baseDate;
-
- private TimeFormat(DateFormat parseDelegate, DateFormat fmtDelegate,
- String baseDate)
- {
- _parseDelegate = parseDelegate;
- _fmtDelegate = fmtDelegate;
- _baseDate = baseDate;
- }
-
- @Override
- public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition) {
- return _fmtDelegate.format(date, toAppendTo, fieldPosition);
- }
-
- @Override
- public Date parse(String source, ParsePosition pos) {
- // we parse as a full date/time in order to get the correct "base date"
- // used by access
- return _parseDelegate.parse(_baseDate + source, pos);
- }
-
- @Override
- public Calendar getCalendar() {
- return _fmtDelegate.getCalendar();
- }
-
- @Override
- public TimeZone getTimeZone() {
- return _fmtDelegate.getTimeZone();
- }
- }
-
- }
|