You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PropertyTokenizer.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.fo.expr;
  19. /**
  20. * Class to tokenize XSL FO property expression.
  21. * This class is heavily based on the epxression tokenizer in James Clark's
  22. * XT, an XSLT processor.
  23. */
  24. class PropertyTokenizer {
  25. static final int TOK_EOF = 0;
  26. static final int TOK_NCNAME = TOK_EOF + 1;
  27. static final int TOK_MULTIPLY = TOK_NCNAME + 1;
  28. static final int TOK_LPAR = TOK_MULTIPLY + 1;
  29. static final int TOK_RPAR = TOK_LPAR + 1;
  30. static final int TOK_LITERAL = TOK_RPAR + 1;
  31. static final int TOK_NUMBER = TOK_LITERAL + 1;
  32. static final int TOK_FUNCTION_LPAR = TOK_NUMBER + 1;
  33. static final int TOK_PLUS = TOK_FUNCTION_LPAR + 1;
  34. static final int TOK_MINUS = TOK_PLUS + 1;
  35. static final int TOK_MOD = TOK_MINUS + 1;
  36. static final int TOK_DIV = TOK_MOD + 1;
  37. static final int TOK_NUMERIC = TOK_DIV + 1;
  38. static final int TOK_COMMA = TOK_NUMERIC + 1;
  39. static final int TOK_PERCENT = TOK_COMMA + 1;
  40. static final int TOK_COLORSPEC = TOK_PERCENT + 1;
  41. static final int TOK_FLOAT = TOK_COLORSPEC + 1;
  42. static final int TOK_INTEGER = TOK_FLOAT + 1;
  43. protected int currentToken = TOK_EOF;
  44. protected String currentTokenValue = null;
  45. protected int currentUnitLength = 0;
  46. private int currentTokenStartIndex = 0;
  47. private /* final */ String expr;
  48. private int exprIndex = 0;
  49. private int exprLength;
  50. private boolean recognizeOperator = false;
  51. /**
  52. * Construct a new PropertyTokenizer object to tokenize the passed
  53. * String.
  54. * @param s The Property expressio to tokenize.
  55. */
  56. PropertyTokenizer(String s) {
  57. this.expr = s;
  58. this.exprLength = s.length();
  59. }
  60. /**
  61. * Return the next token in the expression string.
  62. * This sets the following package visible variables:
  63. * currentToken An enumerated value identifying the recognized token
  64. * currentTokenValue A String containing the token contents
  65. * currentUnitLength If currentToken = TOK_NUMERIC, the number of
  66. * characters in the unit name.
  67. * @throws PropertyException If un unrecognized token is encountered.
  68. */
  69. void next() throws PropertyException {
  70. currentTokenValue = null;
  71. currentTokenStartIndex = exprIndex;
  72. boolean currentMaybeOperator = recognizeOperator;
  73. boolean bSawDecimal;
  74. recognizeOperator = true;
  75. while ( true ) {
  76. if (exprIndex >= exprLength) {
  77. currentToken = TOK_EOF;
  78. return;
  79. }
  80. char c = expr.charAt(exprIndex++);
  81. switch (c) {
  82. case ' ':
  83. case '\t':
  84. case '\r':
  85. case '\n':
  86. currentTokenStartIndex = exprIndex;
  87. break;
  88. case ',':
  89. recognizeOperator = false;
  90. currentToken = TOK_COMMA;
  91. return;
  92. case '+':
  93. recognizeOperator = false;
  94. currentToken = TOK_PLUS;
  95. return;
  96. case '-':
  97. recognizeOperator = false;
  98. currentToken = TOK_MINUS;
  99. return;
  100. case '(':
  101. currentToken = TOK_LPAR;
  102. recognizeOperator = false;
  103. return;
  104. case ')':
  105. currentToken = TOK_RPAR;
  106. return;
  107. case '"':
  108. case '\'':
  109. exprIndex = expr.indexOf(c, exprIndex);
  110. if (exprIndex < 0) {
  111. exprIndex = currentTokenStartIndex + 1;
  112. throw new PropertyException("missing quote");
  113. }
  114. currentTokenValue = expr.substring(currentTokenStartIndex
  115. + 1, exprIndex++);
  116. currentToken = TOK_LITERAL;
  117. return;
  118. case '*':
  119. /*
  120. * if (currentMaybeOperator) {
  121. * recognizeOperator = false;
  122. */
  123. currentToken = TOK_MULTIPLY;
  124. /*
  125. * }
  126. * else
  127. * throw new PropertyException("illegal operator *");
  128. */
  129. return;
  130. case '0':
  131. case '1':
  132. case '2':
  133. case '3':
  134. case '4':
  135. case '5':
  136. case '6':
  137. case '7':
  138. case '8':
  139. case '9':
  140. scanDigits();
  141. if (exprIndex < exprLength && expr.charAt(exprIndex) == '.') {
  142. exprIndex++;
  143. bSawDecimal = true;
  144. if (exprIndex < exprLength
  145. && isDigit(expr.charAt(exprIndex))) {
  146. exprIndex++;
  147. scanDigits();
  148. }
  149. } else {
  150. bSawDecimal = false;
  151. }
  152. if (exprIndex < exprLength && expr.charAt(exprIndex) == '%') {
  153. exprIndex++;
  154. currentToken = TOK_PERCENT;
  155. } else {
  156. // Check for possible unit name following number
  157. currentUnitLength = exprIndex;
  158. scanName();
  159. currentUnitLength = exprIndex - currentUnitLength;
  160. currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
  161. : (bSawDecimal ? TOK_FLOAT : TOK_INTEGER);
  162. }
  163. currentTokenValue = expr.substring(currentTokenStartIndex,
  164. exprIndex);
  165. return;
  166. case '.':
  167. nextDecimalPoint();
  168. return;
  169. case '#': // Start of color value
  170. nextColor();
  171. return;
  172. default:
  173. --exprIndex;
  174. scanName();
  175. if (exprIndex == currentTokenStartIndex) {
  176. throw new PropertyException("illegal character");
  177. }
  178. currentTokenValue = expr.substring(currentTokenStartIndex,
  179. exprIndex);
  180. // if (currentMaybeOperator) {
  181. if (currentTokenValue.equals("mod")) {
  182. currentToken = TOK_MOD;
  183. return;
  184. } else if (currentTokenValue.equals("div")) {
  185. currentToken = TOK_DIV;
  186. return;
  187. }
  188. /*
  189. * else
  190. * throw new PropertyException("unrecognized operator name");
  191. * recognizeOperator = false;
  192. * return;
  193. * }
  194. */
  195. if (followingParen()) {
  196. currentToken = TOK_FUNCTION_LPAR;
  197. recognizeOperator = false;
  198. } else {
  199. currentToken = TOK_NCNAME;
  200. recognizeOperator = false;
  201. }
  202. return;
  203. }
  204. }
  205. }
  206. private void nextDecimalPoint() throws PropertyException {
  207. if (exprIndex < exprLength
  208. && isDigit(expr.charAt(exprIndex))) {
  209. ++exprIndex;
  210. scanDigits();
  211. if (exprIndex < exprLength
  212. && expr.charAt(exprIndex) == '%') {
  213. exprIndex++;
  214. currentToken = TOK_PERCENT;
  215. } else {
  216. // Check for possible unit name following number
  217. currentUnitLength = exprIndex;
  218. scanName();
  219. currentUnitLength = exprIndex - currentUnitLength;
  220. currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
  221. : TOK_FLOAT;
  222. }
  223. currentTokenValue = expr.substring(currentTokenStartIndex,
  224. exprIndex);
  225. return;
  226. }
  227. throw new PropertyException("illegal character '.'");
  228. }
  229. private void nextColor() throws PropertyException {
  230. if (exprIndex < exprLength
  231. && isHexDigit(expr.charAt(exprIndex))) {
  232. ++exprIndex;
  233. scanHexDigits();
  234. int len = exprIndex - currentTokenStartIndex - 1;
  235. if (len % 3 == 0) {
  236. currentToken = TOK_COLORSPEC;
  237. } else {
  238. scanRestOfName();
  239. currentToken = TOK_NCNAME;
  240. }
  241. currentTokenValue = expr.substring(currentTokenStartIndex,
  242. exprIndex);
  243. return;
  244. } else {
  245. throw new PropertyException("illegal character '#'");
  246. }
  247. }
  248. /**
  249. * Attempt to recognize a valid NAME token in the input expression.
  250. */
  251. private void scanName() {
  252. if (exprIndex < exprLength && isNameStartChar(expr.charAt(exprIndex))) {
  253. scanRestOfName();
  254. }
  255. }
  256. private void scanRestOfName() {
  257. while ( ++exprIndex < exprLength ) {
  258. if ( !isNameChar ( expr.charAt ( exprIndex ) ) ) {
  259. break;
  260. }
  261. }
  262. }
  263. /**
  264. * Attempt to recognize a valid sequence of decimal DIGITS in the
  265. * input expression.
  266. */
  267. private void scanDigits() {
  268. while (exprIndex < exprLength && isDigit(expr.charAt(exprIndex))) {
  269. exprIndex++;
  270. }
  271. }
  272. /**
  273. * Attempt to recognize a valid sequence of hexadecimal DIGITS in the
  274. * input expression.
  275. */
  276. private void scanHexDigits() {
  277. while (exprIndex < exprLength && isHexDigit(expr.charAt(exprIndex))) {
  278. exprIndex++;
  279. }
  280. }
  281. /**
  282. * Return a boolean value indicating whether the following non-whitespace
  283. * character is an opening parenthesis.
  284. */
  285. private boolean followingParen() {
  286. for (int i = exprIndex; i < exprLength; i++) {
  287. switch (expr.charAt(i)) {
  288. case '(':
  289. exprIndex = i + 1;
  290. return true;
  291. case ' ':
  292. case '\r':
  293. case '\n':
  294. case '\t':
  295. break;
  296. default:
  297. return false;
  298. }
  299. }
  300. return false;
  301. }
  302. private static final String NAME_START_CHARS
  303. = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
  304. private static final String NAME_CHARS = ".-0123456789";
  305. private static final String DIGITS = "0123456789";
  306. private static final String HEX_CHARS = DIGITS + "abcdefABCDEF";
  307. /**
  308. * Return a boolean value indicating whether the argument is a
  309. * decimal digit (0-9).
  310. * @param c The character to check
  311. */
  312. private static boolean isDigit(char c) {
  313. return DIGITS.indexOf(c) >= 0;
  314. }
  315. /**
  316. * Return a boolean value indicating whether the argument is a
  317. * hexadecimal digit (0-9, A-F, a-f).
  318. * @param c The character to check
  319. */
  320. private static boolean isHexDigit(char c) {
  321. return HEX_CHARS.indexOf(c) >= 0;
  322. }
  323. /**
  324. * Return a boolean value indicating whether the argument is whitespace
  325. * as defined by XSL (space, newline, CR, tab).
  326. * @param c The character to check
  327. */
  328. private static boolean isSpace(char c) {
  329. switch (c) {
  330. case ' ':
  331. case '\r':
  332. case '\n':
  333. case '\t':
  334. return true;
  335. default:
  336. return false;
  337. }
  338. }
  339. /**
  340. * Return a boolean value indicating whether the argument is a valid name
  341. * start character, ie. can start a NAME as defined by XSL.
  342. * @param c The character to check
  343. */
  344. private static boolean isNameStartChar(char c) {
  345. return NAME_START_CHARS.indexOf(c) >= 0 || c >= 0x80;
  346. }
  347. /**
  348. * Return a boolean value indicating whether the argument is a valid name
  349. * character, ie. can occur in a NAME as defined by XSL.
  350. * @param c The character to check
  351. */
  352. private static boolean isNameChar(char c) {
  353. return NAME_START_CHARS.indexOf(c) >= 0 || NAME_CHARS.indexOf(c) >= 0
  354. || c >= 0x80;
  355. }
  356. }