You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ExpressionTokenizer.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. /*
  2. Copyright (c) 2016 James Ahlborn
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.impl.expr;
  14. import java.math.BigDecimal;
  15. import java.text.DateFormat;
  16. import java.text.FieldPosition;
  17. import java.text.ParsePosition;
  18. import java.util.AbstractMap;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.Calendar;
  22. import java.util.Date;
  23. import java.util.HashSet;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.Set;
  27. import java.util.TimeZone;
  28. import static com.healthmarketscience.jackcess.impl.expr.Expressionator.*;
  29. import com.healthmarketscience.jackcess.expr.Value;
  30. import com.healthmarketscience.jackcess.expr.TemporalConfig;
  31. import com.healthmarketscience.jackcess.expr.ParseException;
  32. /**
  33. *
  34. * @author James Ahlborn
  35. */
  36. class ExpressionTokenizer
  37. {
  38. private static final int EOF = -1;
  39. static final char QUOTED_STR_CHAR = '"';
  40. private static final char SINGLE_QUOTED_STR_CHAR = '\'';
  41. private static final char OBJ_NAME_START_CHAR = '[';
  42. private static final char OBJ_NAME_END_CHAR = ']';
  43. private static final char DATE_LIT_QUOTE_CHAR = '#';
  44. private static final char EQUALS_CHAR = '=';
  45. private static final int AMPM_SUFFIX_LEN = 3;
  46. private static final String AM_SUFFIX = " am";
  47. private static final String PM_SUFFIX = " pm";
  48. // access times are based on this date (not the UTC base)
  49. private static final String BASE_DATE = "12/30/1899 ";
  50. private static final String BASE_DATE_FMT = "M/d/yyyy";
  51. private static final byte IS_OP_FLAG = 0x01;
  52. private static final byte IS_COMP_FLAG = 0x02;
  53. private static final byte IS_DELIM_FLAG = 0x04;
  54. private static final byte IS_SPACE_FLAG = 0x08;
  55. private static final byte IS_QUOTE_FLAG = 0x10;
  56. enum TokenType {
  57. OBJ_NAME, LITERAL, OP, DELIM, STRING, SPACE;
  58. }
  59. private static final byte[] CHAR_FLAGS = new byte[128];
  60. private static final Set<String> TWO_CHAR_COMP_OPS = new HashSet<String>(
  61. Arrays.asList("<=", ">=", "<>"));
  62. static {
  63. setCharFlag(IS_OP_FLAG, '+', '-', '*', '/', '\\', '^', '&');
  64. setCharFlag(IS_COMP_FLAG, '<', '>', '=');
  65. setCharFlag(IS_DELIM_FLAG, '.', '!', ',', '(', ')');
  66. setCharFlag(IS_SPACE_FLAG, ' ', '\n', '\r', '\t');
  67. setCharFlag(IS_QUOTE_FLAG, '"', '#', '[', ']', '\'');
  68. }
  69. /**
  70. * Tokenizes an expression string of the given type and (optionally) in the
  71. * context of the relevant database.
  72. */
  73. static List<Token> tokenize(Type exprType, String exprStr,
  74. ParseContext context) {
  75. if(exprStr != null) {
  76. exprStr = exprStr.trim();
  77. }
  78. if((exprStr == null) || (exprStr.length() == 0)) {
  79. return null;
  80. }
  81. List<Token> tokens = new ArrayList<Token>();
  82. ExprBuf buf = new ExprBuf(exprStr, context);
  83. while(buf.hasNext()) {
  84. char c = buf.next();
  85. byte charFlag = getCharFlag(c);
  86. if(charFlag != 0) {
  87. // what could it be?
  88. switch(charFlag) {
  89. case IS_OP_FLAG:
  90. // all simple operator chars are single character operators
  91. tokens.add(new Token(TokenType.OP, String.valueOf(c)));
  92. break;
  93. case IS_COMP_FLAG:
  94. // special case for default values
  95. if((exprType == Type.DEFAULT_VALUE) && (c == EQUALS_CHAR) &&
  96. (buf.prevPos() == 0)) {
  97. // a leading equals sign indicates how a default value should be
  98. // evaluated
  99. tokens.add(new Token(TokenType.OP, String.valueOf(c)));
  100. continue;
  101. }
  102. tokens.add(new Token(TokenType.OP, parseCompOp(c, buf)));
  103. break;
  104. case IS_DELIM_FLAG:
  105. // all delimiter chars are single character symbols
  106. tokens.add(new Token(TokenType.DELIM, String.valueOf(c)));
  107. break;
  108. case IS_SPACE_FLAG:
  109. // normalize whitespace into single space
  110. consumeWhitespace(buf);
  111. tokens.add(new Token(TokenType.SPACE, " "));
  112. break;
  113. case IS_QUOTE_FLAG:
  114. switch(c) {
  115. case QUOTED_STR_CHAR:
  116. case SINGLE_QUOTED_STR_CHAR:
  117. tokens.add(new Token(TokenType.LITERAL, null,
  118. parseQuotedString(buf, c), Value.Type.STRING));
  119. break;
  120. case DATE_LIT_QUOTE_CHAR:
  121. tokens.add(parseDateLiteral(buf));
  122. break;
  123. case OBJ_NAME_START_CHAR:
  124. tokens.add(new Token(TokenType.OBJ_NAME, parseObjNameString(buf)));
  125. break;
  126. default:
  127. throw new ParseException(
  128. "Invalid leading quote character " + c + " " + buf);
  129. }
  130. break;
  131. default:
  132. throw new RuntimeException("unknown char flag " + charFlag);
  133. }
  134. } else {
  135. if(isDigit(c)) {
  136. Token numLit = maybeParseNumberLiteral(c, buf);
  137. if(numLit != null) {
  138. tokens.add(numLit);
  139. continue;
  140. }
  141. }
  142. // standalone word of some sort
  143. String str = parseBareString(c, buf, exprType);
  144. tokens.add(new Token(TokenType.STRING, str));
  145. }
  146. }
  147. return tokens;
  148. }
  149. private static byte getCharFlag(char c) {
  150. return ((c < 128) ? CHAR_FLAGS[c] : 0);
  151. }
  152. private static boolean isSpecialChar(char c) {
  153. return (getCharFlag(c) != 0);
  154. }
  155. private static String parseCompOp(char firstChar, ExprBuf buf) {
  156. String opStr = String.valueOf(firstChar);
  157. int c = buf.peekNext();
  158. if((c != EOF) && hasFlag(getCharFlag((char)c), IS_COMP_FLAG)) {
  159. // is the combo a valid comparison operator?
  160. String tmpStr = opStr + (char)c;
  161. if(TWO_CHAR_COMP_OPS.contains(tmpStr)) {
  162. opStr = tmpStr;
  163. buf.next();
  164. }
  165. }
  166. return opStr;
  167. }
  168. private static void consumeWhitespace(ExprBuf buf) {
  169. int c = EOF;
  170. while(((c = buf.peekNext()) != EOF) &&
  171. hasFlag(getCharFlag((char)c), IS_SPACE_FLAG)) {
  172. buf.next();
  173. }
  174. }
  175. private static String parseBareString(char firstChar, ExprBuf buf,
  176. Type exprType) {
  177. StringBuilder sb = buf.getScratchBuffer().append(firstChar);
  178. byte stopFlags = (IS_OP_FLAG | IS_DELIM_FLAG | IS_SPACE_FLAG);
  179. if(exprType == Type.FIELD_VALIDATOR) {
  180. stopFlags |= IS_COMP_FLAG;
  181. }
  182. while(buf.hasNext()) {
  183. char c = buf.next();
  184. byte charFlag = getCharFlag(c);
  185. if(hasFlag(charFlag, stopFlags)) {
  186. buf.popPrev();
  187. break;
  188. }
  189. sb.append(c);
  190. }
  191. return sb.toString();
  192. }
  193. private static String parseQuotedString(ExprBuf buf, char quoteChar) {
  194. return parseStringUntil(buf, quoteChar, null, true);
  195. }
  196. private static String parseObjNameString(ExprBuf buf) {
  197. return parseStringUntil(buf, OBJ_NAME_END_CHAR, OBJ_NAME_START_CHAR, false);
  198. }
  199. private static String parseDateLiteralString(ExprBuf buf) {
  200. return parseStringUntil(buf, DATE_LIT_QUOTE_CHAR, null, false);
  201. }
  202. private static String parseStringUntil(ExprBuf buf, char endChar,
  203. Character startChar,
  204. boolean allowDoubledEscape)
  205. {
  206. StringBuilder sb = buf.getScratchBuffer();
  207. boolean complete = false;
  208. while(buf.hasNext()) {
  209. char c = buf.next();
  210. if(c == endChar) {
  211. if(allowDoubledEscape && (buf.peekNext() == endChar)) {
  212. sb.append(endChar);
  213. buf.next();
  214. } else {
  215. complete = true;
  216. break;
  217. }
  218. } else if((startChar != null) &&
  219. (startChar == c)) {
  220. throw new ParseException("Missing closing '" + endChar +
  221. "' for quoted string " + buf);
  222. }
  223. sb.append(c);
  224. }
  225. if(!complete) {
  226. throw new ParseException("Missing closing '" + endChar +
  227. "' for quoted string " + buf);
  228. }
  229. return sb.toString();
  230. }
  231. private static Token parseDateLiteral(ExprBuf buf)
  232. {
  233. TemporalConfig cfg = buf.getTemporalConfig();
  234. String dateStr = parseDateLiteralString(buf);
  235. boolean hasDate = (dateStr.indexOf(cfg.getDateSeparator()) >= 0);
  236. boolean hasTime = (dateStr.indexOf(cfg.getTimeSeparator()) >= 0);
  237. boolean hasAmPm = false;
  238. if(hasTime) {
  239. int strLen = dateStr.length();
  240. hasAmPm = ((strLen >= AMPM_SUFFIX_LEN) &&
  241. (dateStr.regionMatches(true, strLen - AMPM_SUFFIX_LEN,
  242. AM_SUFFIX, 0, AMPM_SUFFIX_LEN) ||
  243. dateStr.regionMatches(true, strLen - AMPM_SUFFIX_LEN,
  244. PM_SUFFIX, 0, AMPM_SUFFIX_LEN)));
  245. }
  246. DateFormat sdf = null;
  247. Value.Type valType = null;
  248. if(hasDate && hasTime) {
  249. sdf = (hasAmPm ? buf.getDateTimeFormat12() : buf.getDateTimeFormat24());
  250. valType = Value.Type.DATE_TIME;
  251. } else if(hasDate) {
  252. sdf = buf.getDateFormat();
  253. valType = Value.Type.DATE;
  254. } else if(hasTime) {
  255. sdf = (hasAmPm ? buf.getTimeFormat12() : buf.getTimeFormat24());
  256. valType = Value.Type.TIME;
  257. } else {
  258. throw new ParseException("Invalid date time literal " + dateStr +
  259. " " + buf);
  260. }
  261. try {
  262. return new Token(TokenType.LITERAL, sdf.parse(dateStr), dateStr, valType,
  263. sdf);
  264. } catch(java.text.ParseException pe) {
  265. throw new ParseException(
  266. "Invalid date time literal " + dateStr + " " + buf, pe);
  267. }
  268. }
  269. private static Token maybeParseNumberLiteral(char firstChar, ExprBuf buf) {
  270. StringBuilder sb = buf.getScratchBuffer().append(firstChar);
  271. boolean hasDigit = isDigit(firstChar);
  272. int startPos = buf.curPos();
  273. boolean foundNum = false;
  274. boolean isFp = false;
  275. int expPos = -1;
  276. try {
  277. int c = EOF;
  278. while((c = buf.peekNext()) != EOF) {
  279. if(isDigit(c)) {
  280. hasDigit = true;
  281. sb.append((char)c);
  282. buf.next();
  283. } else if(c == '.') {
  284. isFp = true;
  285. sb.append((char)c);
  286. buf.next();
  287. } else if(hasDigit && (expPos < 0) && ((c == 'e') || (c == 'E'))) {
  288. isFp = true;
  289. sb.append((char)c);
  290. expPos = sb.length();
  291. buf.next();
  292. } else if((expPos == sb.length()) && ((c == '-') || (c == '+'))) {
  293. sb.append((char)c);
  294. buf.next();
  295. } else if(isSpecialChar((char)c)) {
  296. break;
  297. } else {
  298. // found a non-number, non-special string
  299. return null;
  300. }
  301. }
  302. if(!hasDigit) {
  303. // no digits, no number
  304. return null;
  305. }
  306. String numStr = sb.toString();
  307. try {
  308. Number num = null;
  309. Value.Type numType = null;
  310. if(!isFp) {
  311. try {
  312. // try to parse as int. if that fails, fall back to BigDecimal
  313. // (this will handle the case of int overflow)
  314. num = Integer.valueOf(numStr);
  315. numType = Value.Type.LONG;
  316. } catch(NumberFormatException ne) {
  317. // fallback to decimal
  318. }
  319. }
  320. if(num == null) {
  321. num = new BigDecimal(numStr);
  322. numType = Value.Type.BIG_DEC;
  323. }
  324. foundNum = true;
  325. return new Token(TokenType.LITERAL, num, numStr, numType);
  326. } catch(NumberFormatException ne) {
  327. throw new ParseException(
  328. "Invalid number literal " + numStr + " " + buf, ne);
  329. }
  330. } finally {
  331. if(!foundNum) {
  332. buf.reset(startPos);
  333. }
  334. }
  335. }
  336. private static boolean hasFlag(byte charFlag, byte flag) {
  337. return ((charFlag & flag) != 0);
  338. }
  339. private static void setCharFlag(byte flag, char... chars) {
  340. for(char c : chars) {
  341. CHAR_FLAGS[c] |= flag;
  342. }
  343. }
  344. private static boolean isDigit(int c) {
  345. return ((c >= '0') && (c <= '9'));
  346. }
  347. static <K,V> Map.Entry<K,V> newEntry(K a, V b) {
  348. return new AbstractMap.SimpleImmutableEntry<K,V>(a, b);
  349. }
  350. private static final class ExprBuf
  351. {
  352. private final String _str;
  353. private final ParseContext _ctx;
  354. private int _pos;
  355. private DateFormat _dateFmt;
  356. private DateFormat _timeFmt12;
  357. private DateFormat _dateTimeFmt12;
  358. private DateFormat _timeFmt24;
  359. private DateFormat _dateTimeFmt24;
  360. private String _baseDate;
  361. private final StringBuilder _scratch = new StringBuilder();
  362. private ExprBuf(String str, ParseContext ctx) {
  363. _str = str;
  364. _ctx = ctx;
  365. }
  366. private int len() {
  367. return _str.length();
  368. }
  369. public int curPos() {
  370. return _pos;
  371. }
  372. public int prevPos() {
  373. return _pos - 1;
  374. }
  375. public boolean hasNext() {
  376. return _pos < len();
  377. }
  378. public char next() {
  379. return _str.charAt(_pos++);
  380. }
  381. public void popPrev() {
  382. --_pos;
  383. }
  384. public int peekNext() {
  385. if(!hasNext()) {
  386. return EOF;
  387. }
  388. return _str.charAt(_pos);
  389. }
  390. public void reset(int pos) {
  391. _pos = pos;
  392. }
  393. public StringBuilder getScratchBuffer() {
  394. _scratch.setLength(0);
  395. return _scratch;
  396. }
  397. public TemporalConfig getTemporalConfig() {
  398. return _ctx.getTemporalConfig();
  399. }
  400. public DateFormat getDateFormat() {
  401. if(_dateFmt == null) {
  402. _dateFmt = _ctx.createDateFormat(getTemporalConfig().getDateFormat());
  403. }
  404. return _dateFmt;
  405. }
  406. public DateFormat getTimeFormat12() {
  407. if(_timeFmt12 == null) {
  408. _timeFmt12 = new TimeFormat(
  409. getDateTimeFormat12(), _ctx.createDateFormat(
  410. getTemporalConfig().getTimeFormat12()),
  411. getBaseDate());
  412. }
  413. return _timeFmt12;
  414. }
  415. public DateFormat getDateTimeFormat12() {
  416. if(_dateTimeFmt12 == null) {
  417. _dateTimeFmt12 = _ctx.createDateFormat(
  418. getTemporalConfig().getDateTimeFormat12());
  419. }
  420. return _dateTimeFmt12;
  421. }
  422. public DateFormat getTimeFormat24() {
  423. if(_timeFmt24 == null) {
  424. _timeFmt24 = new TimeFormat(
  425. getDateTimeFormat24(), _ctx.createDateFormat(
  426. getTemporalConfig().getTimeFormat24()),
  427. getBaseDate());
  428. }
  429. return _timeFmt24;
  430. }
  431. public DateFormat getDateTimeFormat24() {
  432. if(_dateTimeFmt24 == null) {
  433. _dateTimeFmt24 = _ctx.createDateFormat(
  434. getTemporalConfig().getDateTimeFormat24());
  435. }
  436. return _dateTimeFmt24;
  437. }
  438. private String getBaseDate() {
  439. if(_baseDate == null) {
  440. String dateFmt = getTemporalConfig().getDateFormat();
  441. String baseDate = BASE_DATE;
  442. if(!BASE_DATE_FMT.equals(dateFmt)) {
  443. try {
  444. // need to reformat the base date to the relevant date format
  445. DateFormat df = _ctx.createDateFormat(BASE_DATE_FMT);
  446. baseDate = getDateFormat().format(df.parse(baseDate));
  447. } catch(Exception e) {
  448. throw new ParseException("Could not parse base date", e);
  449. }
  450. }
  451. _baseDate = baseDate + " ";
  452. }
  453. return _baseDate;
  454. }
  455. @Override
  456. public String toString() {
  457. return "[char " + _pos + "] '" + _str + "'";
  458. }
  459. }
  460. static final class Token
  461. {
  462. private final TokenType _type;
  463. private final Object _val;
  464. private final String _valStr;
  465. private final Value.Type _valType;
  466. private final DateFormat _sdf;
  467. private Token(TokenType type, String val) {
  468. this(type, val, val);
  469. }
  470. private Token(TokenType type, Object val, String valStr) {
  471. this(type, val, valStr, null, null);
  472. }
  473. private Token(TokenType type, Object val, String valStr, Value.Type valType) {
  474. this(type, val, valStr, valType, null);
  475. }
  476. private Token(TokenType type, Object val, String valStr, Value.Type valType,
  477. DateFormat sdf) {
  478. _type = type;
  479. _val = ((val != null) ? val : valStr);
  480. _valStr = valStr;
  481. _valType = valType;
  482. _sdf = sdf;
  483. }
  484. public TokenType getType() {
  485. return _type;
  486. }
  487. public Object getValue() {
  488. return _val;
  489. }
  490. public String getValueStr() {
  491. return _valStr;
  492. }
  493. public Value.Type getValueType() {
  494. return _valType;
  495. }
  496. public DateFormat getDateFormat() {
  497. return _sdf;
  498. }
  499. @Override
  500. public String toString() {
  501. if(_type == TokenType.SPACE) {
  502. return "' '";
  503. }
  504. String str = "[" + _type + "] '" + _val + "'";
  505. if(_valType != null) {
  506. str += " (" + _valType + ")";
  507. }
  508. return str;
  509. }
  510. }
  511. private static final class TimeFormat extends DateFormat
  512. {
  513. private static final long serialVersionUID = 0L;
  514. private final DateFormat _parseDelegate;
  515. private final DateFormat _fmtDelegate;
  516. private final String _baseDate;
  517. private TimeFormat(DateFormat parseDelegate, DateFormat fmtDelegate,
  518. String baseDate)
  519. {
  520. _parseDelegate = parseDelegate;
  521. _fmtDelegate = fmtDelegate;
  522. _baseDate = baseDate;
  523. }
  524. @Override
  525. public StringBuffer format(Date date, StringBuffer toAppendTo, FieldPosition fieldPosition) {
  526. return _fmtDelegate.format(date, toAppendTo, fieldPosition);
  527. }
  528. @Override
  529. public Date parse(String source, ParsePosition pos) {
  530. // we parse as a full date/time in order to get the correct "base date"
  531. // used by access
  532. return _parseDelegate.parse(_baseDate + source, pos);
  533. }
  534. @Override
  535. public Calendar getCalendar() {
  536. return _fmtDelegate.getCalendar();
  537. }
  538. @Override
  539. public TimeZone getTimeZone() {
  540. return _fmtDelegate.getTimeZone();
  541. }
  542. }
  543. }