You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ExpressionTokenizer.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. /*
  2. Copyright (c) 2016 James Ahlborn
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package com.healthmarketscience.jackcess.impl.expr;
  14. import java.math.BigDecimal;
  15. import java.time.DateTimeException;
  16. import java.time.LocalDate;
  17. import java.time.LocalDateTime;
  18. import java.time.LocalTime;
  19. import java.time.format.DateTimeFormatter;
  20. import java.time.temporal.TemporalAccessor;
  21. import java.util.AbstractMap;
  22. import java.util.ArrayList;
  23. import java.util.Arrays;
  24. import java.util.EnumMap;
  25. import java.util.HashSet;
  26. import java.util.List;
  27. import java.util.Map;
  28. import java.util.Set;
  29. import static com.healthmarketscience.jackcess.impl.expr.Expressionator.*;
  30. import com.healthmarketscience.jackcess.expr.LocaleContext;
  31. import com.healthmarketscience.jackcess.expr.ParseException;
  32. import com.healthmarketscience.jackcess.expr.TemporalConfig;
  33. import com.healthmarketscience.jackcess.expr.Value;
  34. import com.healthmarketscience.jackcess.impl.ColumnImpl;
  35. /**
  36. *
  37. * @author James Ahlborn
  38. */
  39. class ExpressionTokenizer
  40. {
  41. private static final int EOF = -1;
  42. static final char QUOTED_STR_CHAR = '"';
  43. private static final char SINGLE_QUOTED_STR_CHAR = '\'';
  44. private static final char OBJ_NAME_START_CHAR = '[';
  45. private static final char OBJ_NAME_END_CHAR = ']';
  46. private static final char DATE_LIT_QUOTE_CHAR = '#';
  47. private static final char EQUALS_CHAR = '=';
  48. private static final byte IS_OP_FLAG = 0x01;
  49. private static final byte IS_COMP_FLAG = 0x02;
  50. private static final byte IS_DELIM_FLAG = 0x04;
  51. private static final byte IS_SPACE_FLAG = 0x08;
  52. private static final byte IS_QUOTE_FLAG = 0x10;
  53. enum TokenType {
  54. OBJ_NAME, LITERAL, OP, DELIM, STRING, SPACE;
  55. }
  56. private static final byte[] CHAR_FLAGS = new byte[128];
  57. private static final Set<String> TWO_CHAR_COMP_OPS = new HashSet<String>(
  58. Arrays.asList("<=", ">=", "<>"));
  59. static {
  60. setCharFlag(IS_OP_FLAG, '+', '-', '*', '/', '\\', '^', '&');
  61. setCharFlag(IS_COMP_FLAG, '<', '>', '=');
  62. setCharFlag(IS_DELIM_FLAG, '.', '!', ',', '(', ')');
  63. setCharFlag(IS_SPACE_FLAG, ' ', '\n', '\r', '\t');
  64. setCharFlag(IS_QUOTE_FLAG, '"', '#', '[', ']', '\'');
  65. }
  66. private ExpressionTokenizer() {}
  67. /**
  68. * Tokenizes an expression string of the given type and (optionally) in the
  69. * context of the relevant database.
  70. */
  71. static List<Token> tokenize(Type exprType, String exprStr,
  72. ParseContext context) {
  73. if(exprStr != null) {
  74. exprStr = exprStr.trim();
  75. }
  76. if((exprStr == null) || (exprStr.length() == 0)) {
  77. return null;
  78. }
  79. List<Token> tokens = new ArrayList<Token>();
  80. ExprBuf buf = new ExprBuf(exprStr, context);
  81. while(buf.hasNext()) {
  82. char c = buf.next();
  83. byte charFlag = getCharFlag(c);
  84. if(charFlag != 0) {
  85. // what could it be?
  86. switch(charFlag) {
  87. case IS_OP_FLAG:
  88. // all simple operator chars are single character operators
  89. tokens.add(new Token(TokenType.OP, String.valueOf(c)));
  90. break;
  91. case IS_COMP_FLAG:
  92. // special case for default values
  93. if((exprType == Type.DEFAULT_VALUE) && (c == EQUALS_CHAR) &&
  94. (buf.prevPos() == 0)) {
  95. // a leading equals sign indicates how a default value should be
  96. // evaluated
  97. tokens.add(new Token(TokenType.OP, String.valueOf(c)));
  98. continue;
  99. }
  100. tokens.add(new Token(TokenType.OP, parseCompOp(c, buf)));
  101. break;
  102. case IS_DELIM_FLAG:
  103. // all delimiter chars are single character symbols
  104. tokens.add(new Token(TokenType.DELIM, String.valueOf(c)));
  105. break;
  106. case IS_SPACE_FLAG:
  107. // normalize whitespace into single space
  108. consumeWhitespace(buf);
  109. tokens.add(new Token(TokenType.SPACE, " "));
  110. break;
  111. case IS_QUOTE_FLAG:
  112. switch(c) {
  113. case QUOTED_STR_CHAR:
  114. case SINGLE_QUOTED_STR_CHAR:
  115. tokens.add(new Token(TokenType.LITERAL, null,
  116. parseQuotedString(buf, c), Value.Type.STRING));
  117. break;
  118. case DATE_LIT_QUOTE_CHAR:
  119. tokens.add(parseDateLiteral(buf));
  120. break;
  121. case OBJ_NAME_START_CHAR:
  122. tokens.add(new Token(TokenType.OBJ_NAME, parseObjNameString(buf)));
  123. break;
  124. default:
  125. throw new ParseException(
  126. "Invalid leading quote character " + c + " " + buf);
  127. }
  128. break;
  129. default:
  130. throw new RuntimeException("unknown char flag " + charFlag);
  131. }
  132. } else {
  133. if(isDigit(c)) {
  134. Token numLit = maybeParseNumberLiteral(c, buf);
  135. if(numLit != null) {
  136. tokens.add(numLit);
  137. continue;
  138. }
  139. }
  140. // standalone word of some sort
  141. String str = parseBareString(c, buf, exprType);
  142. tokens.add(new Token(TokenType.STRING, str));
  143. }
  144. }
  145. return tokens;
  146. }
  147. private static byte getCharFlag(char c) {
  148. return ((c < 128) ? CHAR_FLAGS[c] : 0);
  149. }
  150. private static boolean isSpecialChar(char c) {
  151. return (getCharFlag(c) != 0);
  152. }
  153. private static String parseCompOp(char firstChar, ExprBuf buf) {
  154. String opStr = String.valueOf(firstChar);
  155. int c = buf.peekNext();
  156. if((c != EOF) && hasFlag(getCharFlag((char)c), IS_COMP_FLAG)) {
  157. // is the combo a valid comparison operator?
  158. String tmpStr = opStr + (char)c;
  159. if(TWO_CHAR_COMP_OPS.contains(tmpStr)) {
  160. opStr = tmpStr;
  161. buf.next();
  162. }
  163. }
  164. return opStr;
  165. }
  166. private static void consumeWhitespace(ExprBuf buf) {
  167. int c = EOF;
  168. while(((c = buf.peekNext()) != EOF) &&
  169. hasFlag(getCharFlag((char)c), IS_SPACE_FLAG)) {
  170. buf.next();
  171. }
  172. }
  173. private static String parseBareString(char firstChar, ExprBuf buf,
  174. Type exprType) {
  175. StringBuilder sb = buf.getScratchBuffer().append(firstChar);
  176. byte stopFlags = (IS_OP_FLAG | IS_DELIM_FLAG | IS_SPACE_FLAG);
  177. if(exprType == Type.FIELD_VALIDATOR) {
  178. stopFlags |= IS_COMP_FLAG;
  179. }
  180. while(buf.hasNext()) {
  181. char c = buf.next();
  182. byte charFlag = getCharFlag(c);
  183. if(hasFlag(charFlag, stopFlags)) {
  184. buf.popPrev();
  185. break;
  186. }
  187. sb.append(c);
  188. }
  189. return sb.toString();
  190. }
  191. private static String parseQuotedString(ExprBuf buf, char quoteChar) {
  192. return parseStringUntil(buf, null, quoteChar, true);
  193. }
  194. private static String parseObjNameString(ExprBuf buf) {
  195. return parseStringUntil(buf, OBJ_NAME_START_CHAR, OBJ_NAME_END_CHAR, false);
  196. }
  197. private static String parseDateLiteralString(ExprBuf buf) {
  198. return parseStringUntil(buf, null, DATE_LIT_QUOTE_CHAR, false);
  199. }
  200. static String parseStringUntil(ExprBuf buf, Character startChar,
  201. char endChar, boolean allowDoubledEscape)
  202. {
  203. return parseStringUntil(buf, startChar, endChar, allowDoubledEscape,
  204. buf.getScratchBuffer())
  205. .toString();
  206. }
  207. static StringBuilder parseStringUntil(
  208. ExprBuf buf, Character startChar, char endChar, boolean allowDoubledEscape,
  209. StringBuilder sb)
  210. {
  211. boolean complete = false;
  212. while(buf.hasNext()) {
  213. char c = buf.next();
  214. if(c == endChar) {
  215. if(allowDoubledEscape && (buf.peekNext() == endChar)) {
  216. buf.next();
  217. } else {
  218. complete = true;
  219. break;
  220. }
  221. } else if((startChar != null) &&
  222. (startChar == c)) {
  223. throw new ParseException("Missing closing '" + endChar +
  224. "' for quoted string " + buf);
  225. }
  226. sb.append(c);
  227. }
  228. if(!complete) {
  229. throw new ParseException("Missing closing '" + endChar +
  230. "' for quoted string " + buf);
  231. }
  232. return sb;
  233. }
  234. private static Token parseDateLiteral(ExprBuf buf)
  235. {
  236. String dateStr = parseDateLiteralString(buf);
  237. TemporalConfig.Type type = determineDateType(
  238. dateStr, buf.getContext());
  239. if(type == null) {
  240. throw new ParseException("Invalid date/time literal " + dateStr +
  241. " " + buf);
  242. }
  243. // note that although we may parse in the time "24" format, we will
  244. // display as the default time format
  245. DateTimeFormatter parseDf = buf.getParseDateTimeFormat(type);
  246. try {
  247. TemporalAccessor parsedInfo = parseDf.parse(dateStr);
  248. LocalDate ld = ColumnImpl.BASE_LD;
  249. if(type.includesDate()) {
  250. ld = LocalDate.from(parsedInfo);
  251. }
  252. LocalTime lt = ColumnImpl.BASE_LT;
  253. if(type.includesTime()) {
  254. lt = LocalTime.from(parsedInfo);
  255. }
  256. return new Token(TokenType.LITERAL, LocalDateTime.of(ld, lt),
  257. dateStr, type.getValueType());
  258. } catch(DateTimeException de) {
  259. throw new ParseException(
  260. "Invalid date/time literal " + dateStr + " " + buf, de);
  261. }
  262. }
  263. static TemporalConfig.Type determineDateType(
  264. String dateStr, LocaleContext ctx)
  265. {
  266. TemporalConfig cfg = ctx.getTemporalConfig();
  267. boolean hasDate = (dateStr.indexOf(cfg.getDateSeparator()) >= 0);
  268. boolean hasTime = (dateStr.indexOf(cfg.getTimeSeparator()) >= 0);
  269. boolean hasAmPm = false;
  270. if(hasTime) {
  271. String[] amPmStrs = cfg.getAmPmStrings();
  272. String amStr = " " + amPmStrs[0];
  273. String pmStr = " " + amPmStrs[1];
  274. hasAmPm = (hasSuffix(dateStr, amStr) || hasSuffix(dateStr, pmStr));
  275. }
  276. if(hasDate) {
  277. if(hasTime) {
  278. return (hasAmPm ? TemporalConfig.Type.DATE_TIME_12 :
  279. TemporalConfig.Type.DATE_TIME_24);
  280. }
  281. return TemporalConfig.Type.DATE;
  282. } else if(hasTime) {
  283. return (hasAmPm ? TemporalConfig.Type.TIME_12 :
  284. TemporalConfig.Type.TIME_24);
  285. }
  286. return null;
  287. }
  288. private static boolean hasSuffix(String str, String suffStr) {
  289. int strLen = str.length();
  290. int suffStrLen = suffStr.length();
  291. return ((strLen >= suffStrLen) &&
  292. str.regionMatches(true, strLen - suffStrLen,
  293. suffStr, 0, suffStrLen));
  294. }
  295. private static Token maybeParseNumberLiteral(char firstChar, ExprBuf buf) {
  296. StringBuilder sb = buf.getScratchBuffer().append(firstChar);
  297. boolean hasDigit = isDigit(firstChar);
  298. int startPos = buf.curPos();
  299. boolean foundNum = false;
  300. boolean isFp = false;
  301. int expPos = -1;
  302. try {
  303. int c = EOF;
  304. while((c = buf.peekNext()) != EOF) {
  305. if(isDigit(c)) {
  306. hasDigit = true;
  307. sb.append((char)c);
  308. buf.next();
  309. } else if(c == '.') {
  310. isFp = true;
  311. sb.append((char)c);
  312. buf.next();
  313. } else if(hasDigit && (expPos < 0) && ((c == 'e') || (c == 'E'))) {
  314. isFp = true;
  315. sb.append((char)c);
  316. expPos = sb.length();
  317. buf.next();
  318. } else if((expPos == sb.length()) && ((c == '-') || (c == '+'))) {
  319. sb.append((char)c);
  320. buf.next();
  321. } else if(isSpecialChar((char)c)) {
  322. break;
  323. } else {
  324. // found a non-number, non-special string
  325. return null;
  326. }
  327. }
  328. if(!hasDigit) {
  329. // no digits, no number
  330. return null;
  331. }
  332. String numStr = sb.toString();
  333. try {
  334. Number num = null;
  335. Value.Type numType = null;
  336. if(!isFp) {
  337. try {
  338. // try to parse as int. if that fails, fall back to BigDecimal
  339. // (this will handle the case of int overflow)
  340. num = Integer.valueOf(numStr);
  341. numType = Value.Type.LONG;
  342. } catch(NumberFormatException ne) {
  343. // fallback to decimal
  344. }
  345. }
  346. if(num == null) {
  347. num = new BigDecimal(numStr);
  348. numType = Value.Type.BIG_DEC;
  349. }
  350. foundNum = true;
  351. return new Token(TokenType.LITERAL, num, numStr, numType);
  352. } catch(NumberFormatException ne) {
  353. throw new ParseException(
  354. "Invalid number literal " + numStr + " " + buf, ne);
  355. }
  356. } finally {
  357. if(!foundNum) {
  358. buf.reset(startPos);
  359. }
  360. }
  361. }
  362. private static boolean hasFlag(byte charFlag, byte flag) {
  363. return ((charFlag & flag) != 0);
  364. }
  365. private static void setCharFlag(byte flag, char... chars) {
  366. for(char c : chars) {
  367. CHAR_FLAGS[c] |= flag;
  368. }
  369. }
  370. private static boolean isDigit(int c) {
  371. return ((c >= '0') && (c <= '9'));
  372. }
  373. static <K,V> Map.Entry<K,V> newEntry(K a, V b) {
  374. return new AbstractMap.SimpleImmutableEntry<K,V>(a, b);
  375. }
  376. static final class ExprBuf
  377. {
  378. private final String _str;
  379. private final ParseContext _ctx;
  380. private int _pos;
  381. private final Map<TemporalConfig.Type,DateTimeFormatter> _dateTimeFmts =
  382. new EnumMap<TemporalConfig.Type,DateTimeFormatter>(
  383. TemporalConfig.Type.class);
  384. private final StringBuilder _scratch = new StringBuilder();
  385. ExprBuf(String str, ParseContext ctx) {
  386. _str = str;
  387. _ctx = ctx;
  388. }
  389. private int len() {
  390. return _str.length();
  391. }
  392. public int curPos() {
  393. return _pos;
  394. }
  395. public int prevPos() {
  396. return _pos - 1;
  397. }
  398. public boolean hasNext() {
  399. return _pos < len();
  400. }
  401. public char next() {
  402. return _str.charAt(_pos++);
  403. }
  404. public void popPrev() {
  405. --_pos;
  406. }
  407. public int peekNext() {
  408. if(!hasNext()) {
  409. return EOF;
  410. }
  411. return _str.charAt(_pos);
  412. }
  413. public void reset(int pos) {
  414. _pos = pos;
  415. }
  416. public StringBuilder getScratchBuffer() {
  417. _scratch.setLength(0);
  418. return _scratch;
  419. }
  420. public ParseContext getContext() {
  421. return _ctx;
  422. }
  423. public DateTimeFormatter getParseDateTimeFormat(TemporalConfig.Type type) {
  424. DateTimeFormatter df = _dateTimeFmts.get(type);
  425. if(df == null) {
  426. df = _ctx.createDateFormatter(
  427. _ctx.getTemporalConfig().getDateTimeFormat(type));
  428. _dateTimeFmts.put(type, df);
  429. }
  430. return df;
  431. }
  432. @Override
  433. public String toString() {
  434. return "[char " + _pos + "] '" + _str + "'";
  435. }
  436. }
  437. static final class Token
  438. {
  439. private final TokenType _type;
  440. private final Object _val;
  441. private final String _valStr;
  442. private final Value.Type _valType;
  443. private Token(TokenType type, String val) {
  444. this(type, val, val);
  445. }
  446. private Token(TokenType type, Object val, String valStr) {
  447. this(type, val, valStr, null);
  448. }
  449. private Token(TokenType type, Object val, String valStr, Value.Type valType) {
  450. _type = type;
  451. _val = ((val != null) ? val : valStr);
  452. _valStr = valStr;
  453. _valType = valType;
  454. }
  455. public TokenType getType() {
  456. return _type;
  457. }
  458. public Object getValue() {
  459. return _val;
  460. }
  461. public String getValueStr() {
  462. return _valStr;
  463. }
  464. public Value.Type getValueType() {
  465. return _valType;
  466. }
  467. @Override
  468. public String toString() {
  469. if(_type == TokenType.SPACE) {
  470. return "' '";
  471. }
  472. String str = "[" + _type + "] '" + _val + "'";
  473. if(_valType != null) {
  474. str += " (" + _valType + ")";
  475. }
  476. return str;
  477. }
  478. }
  479. }