You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Lex.java 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. /*
  2. * Javassist, a Java-bytecode translator toolkit.
  3. * Copyright (C) 1999-2003 Shigeru Chiba. All Rights Reserved.
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. Alternatively, the contents of this file may be used under
  8. * the terms of the GNU Lesser General Public License Version 2.1 or later.
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. */
  15. package javassist.compiler;
  16. class Token {
  17. public Token next = null;
  18. public int tokenId;
  19. public long longValue;
  20. public double doubleValue;
  21. public String textValue;
  22. }
  23. public class Lex implements TokenId {
  24. private int lastChar;
  25. private StringBuffer textBuffer;
  26. private Token currentToken;
  27. private Token lookAheadTokens;
  28. private String input;
  29. private int position, maxlen, lineNumber;
  30. /**
  31. * Constructs a lexical analyzer.
  32. */
  33. public Lex(String s) {
  34. lastChar = -1;
  35. textBuffer = new StringBuffer();
  36. currentToken = new Token();
  37. lookAheadTokens = null;
  38. input = s;
  39. position = 0;
  40. maxlen = s.length();
  41. lineNumber = 0;
  42. }
  43. public int get() {
  44. if (lookAheadTokens == null)
  45. return get(currentToken);
  46. else {
  47. Token t;
  48. currentToken = t = lookAheadTokens;
  49. lookAheadTokens = lookAheadTokens.next;
  50. return t.tokenId;
  51. }
  52. }
  53. /**
  54. * Looks at the next token.
  55. */
  56. public int lookAhead() {
  57. return lookAhead(0);
  58. }
  59. public int lookAhead(int i) {
  60. Token tk = lookAheadTokens;
  61. if (tk == null) {
  62. lookAheadTokens = tk = currentToken; // reuse an object!
  63. tk.next = null;
  64. get(tk);
  65. }
  66. for (; i-- > 0; tk = tk.next)
  67. if (tk.next == null) {
  68. Token tk2;
  69. tk.next = tk2 = new Token();
  70. get(tk2);
  71. }
  72. currentToken = tk;
  73. return tk.tokenId;
  74. }
  75. public String getString() {
  76. return currentToken.textValue;
  77. }
  78. public long getLong() {
  79. return currentToken.longValue;
  80. }
  81. public double getDouble() {
  82. return currentToken.doubleValue;
  83. }
  84. private int get(Token token) {
  85. int t;
  86. do {
  87. t = readLine(token);
  88. } while (t == '\n');
  89. token.tokenId = t;
  90. return t;
  91. }
  92. private int readLine(Token token) {
  93. int c = getNextNonWhiteChar();
  94. if(c < 0)
  95. return c;
  96. else if(c == '\n') {
  97. ++lineNumber;
  98. return '\n';
  99. }
  100. else if (c == '\'')
  101. return readCharConst(token);
  102. else if (c == '"')
  103. return readStringL(token);
  104. else if ('0' <= c && c <= '9')
  105. return readNumber(c, token);
  106. else if(c == '.'){
  107. c = getc();
  108. if ('0' <= c && c <= '9') {
  109. StringBuffer tbuf = textBuffer;
  110. tbuf.setLength(0);
  111. tbuf.append('.');
  112. return readDouble(tbuf, c, token);
  113. }
  114. else{
  115. ungetc(c);
  116. return readSeparator('.');
  117. }
  118. }
  119. else if ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_'
  120. || c == '$')
  121. return readIdentifier(c, token);
  122. else
  123. return readSeparator(c);
  124. }
  125. private int getNextNonWhiteChar() {
  126. int c;
  127. do {
  128. c = getc();
  129. if (c == '/') {
  130. c = getc();
  131. if (c == '/')
  132. do {
  133. c = getc();
  134. } while (c != '\n' && c != '\r' && c != -1);
  135. else if (c == '*')
  136. while (true) {
  137. c = getc();
  138. if (c == -1)
  139. break;
  140. else if (c == '*')
  141. if ((c = getc()) == '/') {
  142. c = ' ';
  143. break;
  144. }
  145. else
  146. ungetc(c);
  147. }
  148. else {
  149. ungetc(c);
  150. c = '/';
  151. }
  152. }
  153. } while(isBlank(c));
  154. return c;
  155. }
  156. private int readCharConst(Token token) {
  157. int c;
  158. int value = 0;
  159. while ((c = getc()) != '\'')
  160. if (c == '\\')
  161. value = readEscapeChar();
  162. else if (c < 0x20) {
  163. if (c == '\n')
  164. ++lineNumber;
  165. return BadToken;
  166. }
  167. else
  168. value = c;
  169. token.longValue = value;
  170. return CharConstant;
  171. }
  172. private int readEscapeChar() {
  173. int c = getc();
  174. if (c == 'n')
  175. c = '\n';
  176. else if (c == 't')
  177. c = '\t';
  178. else if (c == 'r')
  179. c = '\r';
  180. else if (c == 'f')
  181. c = '\f';
  182. else if (c == '\n')
  183. ++lineNumber;
  184. return c;
  185. }
  186. private int readStringL(Token token) {
  187. int c;
  188. StringBuffer tbuf = textBuffer;
  189. tbuf.setLength(0);
  190. for (;;) {
  191. while ((c = getc()) != '"') {
  192. if (c == '\\')
  193. c = readEscapeChar();
  194. else if (c == '\n' || c < 0) {
  195. ++lineNumber;
  196. return BadToken;
  197. }
  198. tbuf.append((char)c);
  199. }
  200. for (;;) {
  201. c = getc();
  202. if (c == '\n')
  203. ++lineNumber;
  204. else if (!isBlank(c))
  205. break;
  206. }
  207. if (c != '"') {
  208. ungetc(c);
  209. break;
  210. }
  211. }
  212. token.textValue = tbuf.toString();
  213. return StringL;
  214. }
  215. private int readNumber(int c, Token token) {
  216. long value = 0;
  217. int c2 = getc();
  218. if (c == '0')
  219. if (c2 == 'X' || c2 == 'x')
  220. for (;;) {
  221. c = getc();
  222. if ('0' <= c && c <= '9')
  223. value = value * 16 + (long)(c - '0');
  224. else if ('A' <= c && c <= 'F')
  225. value = value * 16 + (long)(c - 'A' + 10);
  226. else if ('a' <= c && c <= 'f')
  227. value = value * 16 + (long)(c - 'a' + 10);
  228. else {
  229. token.longValue = value;
  230. if (c == 'L' || c == 'l')
  231. return LongConstant;
  232. else {
  233. ungetc(c);
  234. return IntConstant;
  235. }
  236. }
  237. }
  238. else if ('0' <= c2 && c2 <= '7') {
  239. value = c2 - '0';
  240. for (;;) {
  241. c = getc();
  242. if ('0' <= c && c <= '7')
  243. value = value * 8 + (long)(c - '0');
  244. else {
  245. token.longValue = value;
  246. if (c == 'L' || c == 'l')
  247. return LongConstant;
  248. else {
  249. ungetc(c);
  250. return IntConstant;
  251. }
  252. }
  253. }
  254. }
  255. value = c - '0';
  256. while ('0' <= c2 && c2 <= '9') {
  257. value = value * 10 + c2 - '0';
  258. c2 = getc();
  259. }
  260. token.longValue = value;
  261. if (c2 == 'F' || c2 == 'f') {
  262. token.doubleValue = (double)value;
  263. return FloatConstant;
  264. }
  265. else if (c2 == 'E' || c2 == 'e' || c2 == '.') {
  266. StringBuffer tbuf = textBuffer;
  267. tbuf.setLength(0);
  268. tbuf.append(value);
  269. return readDouble(tbuf, c2, token);
  270. }
  271. else if (c2 == 'L' || c2 == 'l')
  272. return LongConstant;
  273. else {
  274. ungetc(c2);
  275. return IntConstant;
  276. }
  277. }
  278. private int readDouble(StringBuffer sbuf, int c, Token token) {
  279. if (c != 'E' && c != 'e') {
  280. sbuf.append((char)c);
  281. for (;;) {
  282. c = getc();
  283. if ('0' <= c && c <= '9')
  284. sbuf.append((char)c);
  285. else
  286. break;
  287. }
  288. }
  289. if (c == 'E' || c == 'e') {
  290. sbuf.append((char)c);
  291. c = getc();
  292. if (c == '+' || c == '-') {
  293. sbuf.append((char)c);
  294. c = getc();
  295. }
  296. while ('0' <= c && c <= '9') {
  297. sbuf.append((char)c);
  298. c = getc();
  299. }
  300. }
  301. try {
  302. token.doubleValue = Double.parseDouble(sbuf.toString());
  303. }
  304. catch (NumberFormatException e) {
  305. return BadToken;
  306. }
  307. if (c == 'F' || c == 'f')
  308. return FloatConstant;
  309. else {
  310. ungetc(c);
  311. return DoubleConstant;
  312. }
  313. }
  314. // !"#$%&'( )*+,-./0 12345678 9:;<=>?
  315. private static final int[] equalOps
  316. = { NEQ, 0, 0, 0, MOD_E, AND_E, 0, 0,
  317. 0, MUL_E, PLUS_E, 0, MINUS_E, 0, DIV_E, 0,
  318. 0, 0, 0, 0, 0, 0, 0, 0,
  319. 0, 0, 0, LE, EQ, GE, 0 };
  320. private int readSeparator(int c) {
  321. int c2, c3;
  322. if ('!' <= c && c <= '?') {
  323. int t = equalOps[c - '!'];
  324. if (t == 0)
  325. return c;
  326. else {
  327. c2 = getc();
  328. if (c == c2)
  329. switch (c) {
  330. case '=' :
  331. return EQ;
  332. case '+' :
  333. return PLUSPLUS;
  334. case '-' :
  335. return MINUSMINUS;
  336. case '&' :
  337. return ANDAND;
  338. case '<' :
  339. c3 = getc();
  340. if (c3 == '=')
  341. return LSHIFT_E;
  342. else {
  343. ungetc(c3);
  344. return LSHIFT;
  345. }
  346. case '>' :
  347. c3 = getc();
  348. if (c3 == '=')
  349. return RSHIFT_E;
  350. else if (c3 == '>') {
  351. c3 = getc();
  352. if (c3 == '=')
  353. return ARSHIFT_E;
  354. else {
  355. ungetc(c3);
  356. return ARSHIFT;
  357. }
  358. }
  359. else {
  360. ungetc(c3);
  361. return RSHIFT;
  362. }
  363. default :
  364. break;
  365. }
  366. else if (c2 == '=')
  367. return t;
  368. }
  369. }
  370. else if (c == '^') {
  371. c2 = getc();
  372. if (c2 == '=')
  373. return EXOR_E;
  374. }
  375. else if (c == '|') {
  376. c2 = getc();
  377. if (c2 == '=')
  378. return OR_E;
  379. else if (c2 == '|')
  380. return OROR;
  381. }
  382. else
  383. return c;
  384. ungetc(c2);
  385. return c;
  386. }
  387. private int readIdentifier(int c, Token token) {
  388. StringBuffer tbuf = textBuffer;
  389. tbuf.setLength(0);
  390. do {
  391. tbuf.append((char)c);
  392. c = getc();
  393. } while ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '_'
  394. || c == '$' || '0' <= c && c <= '9');
  395. ungetc(c);
  396. String name = tbuf.toString();
  397. int t = ktable.lookup(name);
  398. if (t >= 0)
  399. return t;
  400. else {
  401. /* tbuf.toString() is executed quickly since it does not
  402. * need memory copy. Using a hand-written extensible
  403. * byte-array class instead of StringBuffer is not a good idea
  404. * for execution speed. Converting a byte array to a String
  405. * object is very slow. Using an extensible char array
  406. * might be OK.
  407. */
  408. token.textValue = name;
  409. return Identifier;
  410. }
  411. }
  412. private static final KeywordTable ktable = new KeywordTable();
  413. static {
  414. ktable.append("abstract", ABSTRACT);
  415. ktable.append("boolean", BOOLEAN);
  416. ktable.append("break", BREAK);
  417. ktable.append("byte", BYTE);
  418. ktable.append("case", CASE);
  419. ktable.append("catch", CATCH);
  420. ktable.append("char", CHAR);
  421. ktable.append("class", CLASS);
  422. ktable.append("const", CONST);
  423. ktable.append("continue", CONTINUE);
  424. ktable.append("default", DEFAULT);
  425. ktable.append("do", DO);
  426. ktable.append("double", DOUBLE);
  427. ktable.append("else", ELSE);
  428. ktable.append("extends", EXTENDS);
  429. ktable.append("false", FALSE);
  430. ktable.append("final", FINAL);
  431. ktable.append("finally", FINALLY);
  432. ktable.append("float", FLOAT);
  433. ktable.append("for", FOR);
  434. ktable.append("goto", GOTO);
  435. ktable.append("if", IF);
  436. ktable.append("implements", IMPLEMENTS);
  437. ktable.append("import", IMPORT);
  438. ktable.append("instanceof", INSTANCEOF);
  439. ktable.append("int", INT);
  440. ktable.append("interface", INTERFACE);
  441. ktable.append("long", LONG);
  442. ktable.append("native", NATIVE);
  443. ktable.append("new", NEW);
  444. ktable.append("null", NULL);
  445. ktable.append("package", PACKAGE);
  446. ktable.append("private", PRIVATE);
  447. ktable.append("protected", PROTECTED);
  448. ktable.append("public", PUBLIC);
  449. ktable.append("return", RETURN);
  450. ktable.append("short", SHORT);
  451. ktable.append("static", STATIC);
  452. ktable.append("strict", STRICT);
  453. ktable.append("super", SUPER);
  454. ktable.append("switch", SWITCH);
  455. ktable.append("synchronized", SYNCHRONIZED);
  456. ktable.append("this", THIS);
  457. ktable.append("throw", THROW);
  458. ktable.append("throws", THROWS);
  459. ktable.append("transient", TRANSIENT);
  460. ktable.append("true", TRUE);
  461. ktable.append("try", TRY);
  462. ktable.append("void", VOID);
  463. ktable.append("volatile", VOLATILE);
  464. ktable.append("while", WHILE);
  465. }
  466. private static boolean isBlank(int c) {
  467. return c == ' ' || c == '\t' || c == '\f' || c == '\r'
  468. || c == '\n';
  469. }
  470. private static boolean isDigit(int c) {
  471. return '0' <= c && c <= '9';
  472. }
  473. private void ungetc(int c) {
  474. lastChar = c;
  475. }
  476. private int getc() {
  477. if (lastChar < 0)
  478. if (position < maxlen)
  479. return input.charAt(position++);
  480. else
  481. return -1;
  482. else {
  483. int c = lastChar;
  484. lastChar = -1;
  485. return c;
  486. }
  487. }
  488. }