You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ExpressionTokenizer.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. /*
  2. Copyright (c) 2016 James Ahlborn
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.impl.expr;
  14. import java.math.BigDecimal;
  15. import java.text.DateFormat;
  16. import java.text.FieldPosition;
  17. import java.text.ParsePosition;
  18. import java.util.AbstractMap;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.Calendar;
  22. import java.util.Date;
  23. import java.util.EnumMap;
  24. import java.util.HashSet;
  25. import java.util.List;
  26. import java.util.Map;
  27. import java.util.Set;
  28. import java.util.TimeZone;
  29. import static com.healthmarketscience.jackcess.impl.expr.Expressionator.*;
  30. import com.healthmarketscience.jackcess.expr.LocaleContext;
  31. import com.healthmarketscience.jackcess.expr.ParseException;
  32. import com.healthmarketscience.jackcess.expr.TemporalConfig;
  33. import com.healthmarketscience.jackcess.expr.Value;
  34. /**
  35. *
  36. * @author James Ahlborn
  37. */
  38. class ExpressionTokenizer
  39. {
  40. private static final int EOF = -1;
  41. static final char QUOTED_STR_CHAR = '"';
  42. private static final char SINGLE_QUOTED_STR_CHAR = '\'';
  43. private static final char OBJ_NAME_START_CHAR = '[';
  44. private static final char OBJ_NAME_END_CHAR = ']';
  45. private static final char DATE_LIT_QUOTE_CHAR = '#';
  46. private static final char EQUALS_CHAR = '=';
  47. private static final int AMPM_SUFFIX_LEN = 3;
  48. private static final String AM_SUFFIX = " am";
  49. private static final String PM_SUFFIX = " pm";
  50. // access times are based on this date (not the UTC base)
  51. private static final String BASE_DATE = "12/30/1899";
  52. private static final String BASE_DATE_FMT = "M/d/yyyy";
  53. private static final byte IS_OP_FLAG = 0x01;
  54. private static final byte IS_COMP_FLAG = 0x02;
  55. private static final byte IS_DELIM_FLAG = 0x04;
  56. private static final byte IS_SPACE_FLAG = 0x08;
  57. private static final byte IS_QUOTE_FLAG = 0x10;
  58. enum TokenType {
  59. OBJ_NAME, LITERAL, OP, DELIM, STRING, SPACE;
  60. }
  61. private static final byte[] CHAR_FLAGS = new byte[128];
  62. private static final Set<String> TWO_CHAR_COMP_OPS = new HashSet<String>(
  63. Arrays.asList("<=", ">=", "<>"));
  64. static {
  65. setCharFlag(IS_OP_FLAG, '+', '-', '*', '/', '\\', '^', '&');
  66. setCharFlag(IS_COMP_FLAG, '<', '>', '=');
  67. setCharFlag(IS_DELIM_FLAG, '.', '!', ',', '(', ')');
  68. setCharFlag(IS_SPACE_FLAG, ' ', '\n', '\r', '\t');
  69. setCharFlag(IS_QUOTE_FLAG, '"', '#', '[', ']', '\'');
  70. }
  71. private ExpressionTokenizer() {}
  72. /**
  73. * Tokenizes an expression string of the given type and (optionally) in the
  74. * context of the relevant database.
  75. */
  76. static List<Token> tokenize(Type exprType, String exprStr,
  77. ParseContext context) {
  78. if(exprStr != null) {
  79. exprStr = exprStr.trim();
  80. }
  81. if((exprStr == null) || (exprStr.length() == 0)) {
  82. return null;
  83. }
  84. List<Token> tokens = new ArrayList<Token>();
  85. ExprBuf buf = new ExprBuf(exprStr, context);
  86. while(buf.hasNext()) {
  87. char c = buf.next();
  88. byte charFlag = getCharFlag(c);
  89. if(charFlag != 0) {
  90. // what could it be?
  91. switch(charFlag) {
  92. case IS_OP_FLAG:
  93. // all simple operator chars are single character operators
  94. tokens.add(new Token(TokenType.OP, String.valueOf(c)));
  95. break;
  96. case IS_COMP_FLAG:
  97. // special case for default values
  98. if((exprType == Type.DEFAULT_VALUE) && (c == EQUALS_CHAR) &&
  99. (buf.prevPos() == 0)) {
  100. // a leading equals sign indicates how a default value should be
  101. // evaluated
  102. tokens.add(new Token(TokenType.OP, String.valueOf(c)));
  103. continue;
  104. }
  105. tokens.add(new Token(TokenType.OP, parseCompOp(c, buf)));
  106. break;
  107. case IS_DELIM_FLAG:
  108. // all delimiter chars are single character symbols
  109. tokens.add(new Token(TokenType.DELIM, String.valueOf(c)));
  110. break;
  111. case IS_SPACE_FLAG:
  112. // normalize whitespace into single space
  113. consumeWhitespace(buf);
  114. tokens.add(new Token(TokenType.SPACE, " "));
  115. break;
  116. case IS_QUOTE_FLAG:
  117. switch(c) {
  118. case QUOTED_STR_CHAR:
  119. case SINGLE_QUOTED_STR_CHAR:
  120. tokens.add(new Token(TokenType.LITERAL, null,
  121. parseQuotedString(buf, c), Value.Type.STRING));
  122. break;
  123. case DATE_LIT_QUOTE_CHAR:
  124. tokens.add(parseDateLiteral(buf));
  125. break;
  126. case OBJ_NAME_START_CHAR:
  127. tokens.add(new Token(TokenType.OBJ_NAME, parseObjNameString(buf)));
  128. break;
  129. default:
  130. throw new ParseException(
  131. "Invalid leading quote character " + c + " " + buf);
  132. }
  133. break;
  134. default:
  135. throw new RuntimeException("unknown char flag " + charFlag);
  136. }
  137. } else {
  138. if(isDigit(c)) {
  139. Token numLit = maybeParseNumberLiteral(c, buf);
  140. if(numLit != null) {
  141. tokens.add(numLit);
  142. continue;
  143. }
  144. }
  145. // standalone word of some sort
  146. String str = parseBareString(c, buf, exprType);
  147. tokens.add(new Token(TokenType.STRING, str));
  148. }
  149. }
  150. return tokens;
  151. }
  152. private static byte getCharFlag(char c) {
  153. return ((c < 128) ? CHAR_FLAGS[c] : 0);
  154. }
  155. private static boolean isSpecialChar(char c) {
  156. return (getCharFlag(c) != 0);
  157. }
  158. private static String parseCompOp(char firstChar, ExprBuf buf) {
  159. String opStr = String.valueOf(firstChar);
  160. int c = buf.peekNext();
  161. if((c != EOF) && hasFlag(getCharFlag((char)c), IS_COMP_FLAG)) {
  162. // is the combo a valid comparison operator?
  163. String tmpStr = opStr + (char)c;
  164. if(TWO_CHAR_COMP_OPS.contains(tmpStr)) {
  165. opStr = tmpStr;
  166. buf.next();
  167. }
  168. }
  169. return opStr;
  170. }
  171. private static void consumeWhitespace(ExprBuf buf) {
  172. int c = EOF;
  173. while(((c = buf.peekNext()) != EOF) &&
  174. hasFlag(getCharFlag((char)c), IS_SPACE_FLAG)) {
  175. buf.next();
  176. }
  177. }
  178. private static String parseBareString(char firstChar, ExprBuf buf,
  179. Type exprType) {
  180. StringBuilder sb = buf.getScratchBuffer().append(firstChar);
  181. byte stopFlags = (IS_OP_FLAG | IS_DELIM_FLAG | IS_SPACE_FLAG);
  182. if(exprType == Type.FIELD_VALIDATOR) {
  183. stopFlags |= IS_COMP_FLAG;
  184. }
  185. while(buf.hasNext()) {
  186. char c = buf.next();
  187. byte charFlag = getCharFlag(c);
  188. if(hasFlag(charFlag, stopFlags)) {
  189. buf.popPrev();
  190. break;
  191. }
  192. sb.append(c);
  193. }
  194. return sb.toString();
  195. }
  196. private static String parseQuotedString(ExprBuf buf, char quoteChar) {
  197. return parseStringUntil(buf, quoteChar, null, true);
  198. }
  199. private static String parseObjNameString(ExprBuf buf) {
  200. return parseStringUntil(buf, OBJ_NAME_END_CHAR, OBJ_NAME_START_CHAR, false);
  201. }
  202. private static String parseDateLiteralString(ExprBuf buf) {
  203. return parseStringUntil(buf, DATE_LIT_QUOTE_CHAR, null, false);
  204. }
  205. private static String parseStringUntil(ExprBuf buf, char endChar,
  206. Character startChar,
  207. boolean allowDoubledEscape)
  208. {
  209. StringBuilder sb = buf.getScratchBuffer();
  210. boolean complete = false;
  211. while(buf.hasNext()) {
  212. char c = buf.next();
  213. if(c == endChar) {
  214. if(allowDoubledEscape && (buf.peekNext() == endChar)) {
  215. sb.append(endChar);
  216. buf.next();
  217. } else {
  218. complete = true;
  219. break;
  220. }
  221. } else if((startChar != null) &&
  222. (startChar == c)) {
  223. throw new ParseException("Missing closing '" + endChar +
  224. "' for quoted string " + buf);
  225. }
  226. sb.append(c);
  227. }
  228. if(!complete) {
  229. throw new ParseException("Missing closing '" + endChar +
  230. "' for quoted string " + buf);
  231. }
  232. return sb.toString();
  233. }
  234. private static Token parseDateLiteral(ExprBuf buf)
  235. {
  236. String dateStr = parseDateLiteralString(buf);
  237. TemporalConfig.Type type = determineDateType(
  238. dateStr, buf.getContext());
  239. if(type == null) {
  240. throw new ParseException("Invalid date/time literal " + dateStr +
  241. " " + buf);
  242. }
  243. // note that although we may parse in the time "24" format, we will
  244. // display as the default time format
  245. DateFormat parseDf = buf.getParseDateTimeFormat(type);
  246. try {
  247. return new Token(TokenType.LITERAL, parseComplete(parseDf, dateStr),
  248. dateStr, type.getValueType());
  249. } catch(java.text.ParseException pe) {
  250. throw new ParseException(
  251. "Invalid date/time literal " + dateStr + " " + buf, pe);
  252. }
  253. }
  254. static TemporalConfig.Type determineDateType(
  255. String dateStr, LocaleContext ctx)
  256. {
  257. TemporalConfig cfg = ctx.getTemporalConfig();
  258. boolean hasDate = (dateStr.indexOf(cfg.getDateSeparator()) >= 0);
  259. boolean hasTime = (dateStr.indexOf(cfg.getTimeSeparator()) >= 0);
  260. boolean hasAmPm = false;
  261. if(hasTime) {
  262. int strLen = dateStr.length();
  263. hasAmPm = ((strLen >= AMPM_SUFFIX_LEN) &&
  264. (dateStr.regionMatches(true, strLen - AMPM_SUFFIX_LEN,
  265. AM_SUFFIX, 0, AMPM_SUFFIX_LEN) ||
  266. dateStr.regionMatches(true, strLen - AMPM_SUFFIX_LEN,
  267. PM_SUFFIX, 0, AMPM_SUFFIX_LEN)));
  268. }
  269. if(hasDate) {
  270. if(hasTime) {
  271. return (hasAmPm ? TemporalConfig.Type.DATE_TIME_12 :
  272. TemporalConfig.Type.DATE_TIME_24);
  273. }
  274. return TemporalConfig.Type.DATE;
  275. } else if(hasTime) {
  276. return (hasAmPm ? TemporalConfig.Type.TIME_12 :
  277. TemporalConfig.Type.TIME_24);
  278. }
  279. return null;
  280. }
  281. static DateFormat createParseDateTimeFormat(TemporalConfig.Type type,
  282. LocaleContext ctx)
  283. {
  284. switch(type) {
  285. case TIME:
  286. return createParseTimeFormat(TemporalConfig.Type.DATE_TIME, ctx);
  287. case TIME_12:
  288. return createParseTimeFormat(TemporalConfig.Type.DATE_TIME_12, ctx);
  289. case TIME_24:
  290. return createParseTimeFormat(TemporalConfig.Type.DATE_TIME_24, ctx);
  291. default:
  292. // use normal formatter
  293. }
  294. TemporalConfig cfg = ctx.getTemporalConfig();
  295. return ctx.createDateFormat(cfg.getDateTimeFormat(type));
  296. }
  297. private static DateFormat createParseTimeFormat(TemporalConfig.Type parseType,
  298. LocaleContext ctx)
  299. {
  300. TemporalConfig cfg = ctx.getTemporalConfig();
  301. // we need to use a special DateFormat impl which manipulates the parsed
  302. // time-only value so it becomes the right Date value
  303. String baseDate = getBaseDatePrefix(ctx);
  304. DateFormat parseDf = ctx.createDateFormat(
  305. cfg.getDateTimeFormat(parseType));
  306. return new ParseTimeFormat(parseDf, baseDate);
  307. }
  308. private static String getBaseDatePrefix(LocaleContext ctx) {
  309. String dateFmt = ctx.getTemporalConfig().getDateFormat();
  310. String baseDate = BASE_DATE;
  311. if(!BASE_DATE_FMT.equals(dateFmt)) {
  312. try {
  313. // need to reformat the base date to the relevant date format
  314. DateFormat parseDf = ctx.createDateFormat(BASE_DATE_FMT);
  315. DateFormat df = ctx.createDateFormat(dateFmt);
  316. baseDate = df.format(parseComplete(parseDf, baseDate));
  317. } catch(Exception e) {
  318. throw new ParseException("Could not parse base date", e);
  319. }
  320. }
  321. return baseDate + " ";
  322. }
  323. private static Token maybeParseNumberLiteral(char firstChar, ExprBuf buf) {
  324. StringBuilder sb = buf.getScratchBuffer().append(firstChar);
  325. boolean hasDigit = isDigit(firstChar);
  326. int startPos = buf.curPos();
  327. boolean foundNum = false;
  328. boolean isFp = false;
  329. int expPos = -1;
  330. try {
  331. int c = EOF;
  332. while((c = buf.peekNext()) != EOF) {
  333. if(isDigit(c)) {
  334. hasDigit = true;
  335. sb.append((char)c);
  336. buf.next();
  337. } else if(c == '.') {
  338. isFp = true;
  339. sb.append((char)c);
  340. buf.next();
  341. } else if(hasDigit && (expPos < 0) && ((c == 'e') || (c == 'E'))) {
  342. isFp = true;
  343. sb.append((char)c);
  344. expPos = sb.length();
  345. buf.next();
  346. } else if((expPos == sb.length()) && ((c == '-') || (c == '+'))) {
  347. sb.append((char)c);
  348. buf.next();
  349. } else if(isSpecialChar((char)c)) {
  350. break;
  351. } else {
  352. // found a non-number, non-special string
  353. return null;
  354. }
  355. }
  356. if(!hasDigit) {
  357. // no digits, no number
  358. return null;
  359. }
  360. String numStr = sb.toString();
  361. try {
  362. Number num = null;
  363. Value.Type numType = null;
  364. if(!isFp) {
  365. try {
  366. // try to parse as int. if that fails, fall back to BigDecimal
  367. // (this will handle the case of int overflow)
  368. num = Integer.valueOf(numStr);
  369. numType = Value.Type.LONG;
  370. } catch(NumberFormatException ne) {
  371. // fallback to decimal
  372. }
  373. }
  374. if(num == null) {
  375. num = new BigDecimal(numStr);
  376. numType = Value.Type.BIG_DEC;
  377. }
  378. foundNum = true;
  379. return new Token(TokenType.LITERAL, num, numStr, numType);
  380. } catch(NumberFormatException ne) {
  381. throw new ParseException(
  382. "Invalid number literal " + numStr + " " + buf, ne);
  383. }
  384. } finally {
  385. if(!foundNum) {
  386. buf.reset(startPos);
  387. }
  388. }
  389. }
  390. private static boolean hasFlag(byte charFlag, byte flag) {
  391. return ((charFlag & flag) != 0);
  392. }
  393. private static void setCharFlag(byte flag, char... chars) {
  394. for(char c : chars) {
  395. CHAR_FLAGS[c] |= flag;
  396. }
  397. }
  398. private static boolean isDigit(int c) {
  399. return ((c >= '0') && (c <= '9'));
  400. }
  401. static <K,V> Map.Entry<K,V> newEntry(K a, V b) {
  402. return new AbstractMap.SimpleImmutableEntry<K,V>(a, b);
  403. }
  404. static Date parseComplete(DateFormat df, String str)
  405. throws java.text.ParseException
  406. {
  407. // the java parsers will parse "successfully" even if there is leftover
  408. // information. we only want to consider a parse operation successful if
  409. // it parses the entire string (ignoring surrounding whitespace)
  410. str = str.trim();
  411. ParsePosition pp = new ParsePosition(0);
  412. Object d = df.parse(str, pp);
  413. if(pp.getIndex() < str.length()) {
  414. throw new java.text.ParseException("Failed parsing '" + str + "'",
  415. pp.getIndex());
  416. }
  417. return (Date)d;
  418. }
  419. private static final class ExprBuf
  420. {
  421. private final String _str;
  422. private final ParseContext _ctx;
  423. private int _pos;
  424. private final Map<TemporalConfig.Type,DateFormat> _dateTimeFmts =
  425. new EnumMap<TemporalConfig.Type,DateFormat>(TemporalConfig.Type.class);
  426. private final StringBuilder _scratch = new StringBuilder();
  427. private ExprBuf(String str, ParseContext ctx) {
  428. _str = str;
  429. _ctx = ctx;
  430. }
  431. private int len() {
  432. return _str.length();
  433. }
  434. public int curPos() {
  435. return _pos;
  436. }
  437. public int prevPos() {
  438. return _pos - 1;
  439. }
  440. public boolean hasNext() {
  441. return _pos < len();
  442. }
  443. public char next() {
  444. return _str.charAt(_pos++);
  445. }
  446. public void popPrev() {
  447. --_pos;
  448. }
  449. public int peekNext() {
  450. if(!hasNext()) {
  451. return EOF;
  452. }
  453. return _str.charAt(_pos);
  454. }
  455. public void reset(int pos) {
  456. _pos = pos;
  457. }
  458. public StringBuilder getScratchBuffer() {
  459. _scratch.setLength(0);
  460. return _scratch;
  461. }
  462. public ParseContext getContext() {
  463. return _ctx;
  464. }
  465. public DateFormat getParseDateTimeFormat(TemporalConfig.Type type) {
  466. DateFormat df = _dateTimeFmts.get(type);
  467. if(df == null) {
  468. df = createParseDateTimeFormat(type, _ctx);
  469. _dateTimeFmts.put(type, df);
  470. }
  471. return df;
  472. }
  473. @Override
  474. public String toString() {
  475. return "[char " + _pos + "] '" + _str + "'";
  476. }
  477. }
  478. static final class Token
  479. {
  480. private final TokenType _type;
  481. private final Object _val;
  482. private final String _valStr;
  483. private final Value.Type _valType;
  484. private Token(TokenType type, String val) {
  485. this(type, val, val);
  486. }
  487. private Token(TokenType type, Object val, String valStr) {
  488. this(type, val, valStr, null);
  489. }
  490. private Token(TokenType type, Object val, String valStr, Value.Type valType) {
  491. _type = type;
  492. _val = ((val != null) ? val : valStr);
  493. _valStr = valStr;
  494. _valType = valType;
  495. }
  496. public TokenType getType() {
  497. return _type;
  498. }
  499. public Object getValue() {
  500. return _val;
  501. }
  502. public String getValueStr() {
  503. return _valStr;
  504. }
  505. public Value.Type getValueType() {
  506. return _valType;
  507. }
  508. @Override
  509. public String toString() {
  510. if(_type == TokenType.SPACE) {
  511. return "' '";
  512. }
  513. String str = "[" + _type + "] '" + _val + "'";
  514. if(_valType != null) {
  515. str += " (" + _valType + ")";
  516. }
  517. return str;
  518. }
  519. }
  520. /**
  521. * Special date/time format which will parse time-only strings "correctly"
  522. * according to how access handles time-only values.
  523. */
  524. private static final class ParseTimeFormat extends DateFormat
  525. {
  526. private static final long serialVersionUID = 0L;
  527. private final DateFormat _parseDelegate;
  528. private final String _baseDate;
  529. private ParseTimeFormat(DateFormat parseDelegate, String baseDate)
  530. {
  531. _parseDelegate = parseDelegate;
  532. _baseDate = baseDate;
  533. }
  534. @Override
  535. public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition) {
  536. throw new UnsupportedOperationException();
  537. }
  538. @Override
  539. public Date parse(String source, ParsePosition pos) {
  540. // we parse as a full date/time in order to get the correct "base date"
  541. // used by access
  542. return _parseDelegate.parse(_baseDate + source, pos);
  543. }
  544. @Override
  545. public Calendar getCalendar() {
  546. return _parseDelegate.getCalendar();
  547. }
  548. @Override
  549. public TimeZone getTimeZone() {
  550. return _parseDelegate.getTimeZone();
  551. }
  552. }
  553. }