You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PropertyTokenizer.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.fo.expr;
  19. /**
  20. * Class to tokenize XSL FO property expression.
  21. * This class is heavily based on the epxression tokenizer in James Clark's
  22. * XT, an XSLT processor.
  23. */
  24. class PropertyTokenizer {
  25. static final int TOK_EOF = 0;
  26. static final int TOK_NCNAME = 1;
  27. static final int TOK_MULTIPLY = TOK_NCNAME + 1;
  28. static final int TOK_LPAR = TOK_MULTIPLY + 1;
  29. static final int TOK_RPAR = TOK_LPAR + 1;
  30. static final int TOK_LITERAL = TOK_RPAR + 1;
  31. static final int TOK_NUMBER = TOK_LITERAL + 1;
  32. static final int TOK_FUNCTION_LPAR = TOK_NUMBER + 1;
  33. static final int TOK_PLUS = TOK_FUNCTION_LPAR + 1;
  34. static final int TOK_MINUS = TOK_PLUS + 1;
  35. static final int TOK_MOD = TOK_MINUS + 1;
  36. static final int TOK_DIV = TOK_MOD + 1;
  37. static final int TOK_NUMERIC = TOK_DIV + 1;
  38. static final int TOK_COMMA = TOK_NUMERIC + 1;
  39. static final int TOK_PERCENT = TOK_COMMA + 1;
  40. static final int TOK_COLORSPEC = TOK_PERCENT + 1;
  41. static final int TOK_FLOAT = TOK_COLORSPEC + 1;
  42. static final int TOK_INTEGER = TOK_FLOAT + 1;
  43. protected int currentToken = TOK_EOF;
  44. protected String currentTokenValue;
  45. protected int currentUnitLength;
  46. private int currentTokenStartIndex;
  47. private /* final */ String expr;
  48. private int exprIndex;
  49. private int exprLength;
  50. /**
  51. * Construct a new PropertyTokenizer object to tokenize the passed
  52. * String.
  53. * @param s The Property expressio to tokenize.
  54. */
  55. PropertyTokenizer(String s) {
  56. this.expr = s;
  57. this.exprLength = s.length();
  58. }
  59. /**
  60. * Parse the next token in the expression string.
  61. * This sets the following package visible variables:
  62. * currentToken An enumerated value identifying the recognized token
  63. * currentTokenValue A String containing the token contents
  64. * currentUnitLength If currentToken = TOK_NUMERIC, the number of
  65. * characters in the unit name.
  66. * @throws PropertyException If un unrecognized token is encountered.
  67. */
  68. void next() throws PropertyException {
  69. currentTokenValue = null;
  70. currentTokenStartIndex = exprIndex;
  71. boolean bSawDecimal;
  72. while (true) {
  73. if (exprIndex >= exprLength) {
  74. currentToken = TOK_EOF;
  75. return;
  76. }
  77. char c = expr.charAt(exprIndex++);
  78. switch (c) {
  79. case ' ':
  80. case '\t':
  81. case '\r':
  82. case '\n':
  83. currentTokenStartIndex = exprIndex;
  84. break;
  85. case ',':
  86. currentToken = TOK_COMMA;
  87. return;
  88. case '+':
  89. currentToken = TOK_PLUS;
  90. return;
  91. case '-':
  92. currentToken = TOK_MINUS;
  93. return;
  94. case '(':
  95. currentToken = TOK_LPAR;
  96. return;
  97. case ')':
  98. currentToken = TOK_RPAR;
  99. return;
  100. case '"':
  101. case '\'':
  102. exprIndex = expr.indexOf(c, exprIndex);
  103. if (exprIndex < 0) {
  104. exprIndex = currentTokenStartIndex + 1;
  105. throw new PropertyException("missing quote");
  106. }
  107. currentTokenValue = expr.substring(currentTokenStartIndex
  108. + 1, exprIndex++);
  109. currentToken = TOK_LITERAL;
  110. return;
  111. case '*':
  112. /*
  113. * if (currentMaybeOperator) {
  114. * recognizeOperator = false;
  115. */
  116. currentToken = TOK_MULTIPLY;
  117. /*
  118. * }
  119. * else
  120. * throw new PropertyException("illegal operator *");
  121. */
  122. return;
  123. case '0':
  124. case '1':
  125. case '2':
  126. case '3':
  127. case '4':
  128. case '5':
  129. case '6':
  130. case '7':
  131. case '8':
  132. case '9':
  133. scanDigits();
  134. if (exprIndex < exprLength && expr.charAt(exprIndex) == '.') {
  135. exprIndex++;
  136. bSawDecimal = true;
  137. if (exprIndex < exprLength
  138. && isDigit(expr.charAt(exprIndex))) {
  139. exprIndex++;
  140. scanDigits();
  141. }
  142. } else {
  143. bSawDecimal = false;
  144. }
  145. if (exprIndex < exprLength && expr.charAt(exprIndex) == '%') {
  146. exprIndex++;
  147. currentToken = TOK_PERCENT;
  148. } else {
  149. // Check for possible unit name following number
  150. currentUnitLength = exprIndex;
  151. scanName();
  152. currentUnitLength = exprIndex - currentUnitLength;
  153. currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
  154. : (bSawDecimal ? TOK_FLOAT : TOK_INTEGER);
  155. }
  156. currentTokenValue = expr.substring(currentTokenStartIndex,
  157. exprIndex);
  158. return;
  159. case '.':
  160. nextDecimalPoint();
  161. return;
  162. case '#': // Start of color value
  163. nextColor();
  164. return;
  165. default:
  166. --exprIndex;
  167. scanName();
  168. if (exprIndex == currentTokenStartIndex) {
  169. throw new PropertyException("illegal character");
  170. }
  171. currentTokenValue = expr.substring(currentTokenStartIndex, exprIndex);
  172. if (currentTokenValue.equals("mod")) {
  173. currentToken = TOK_MOD;
  174. return;
  175. } else if (currentTokenValue.equals("div")) {
  176. currentToken = TOK_DIV;
  177. return;
  178. }
  179. if (followingParen()) {
  180. currentToken = TOK_FUNCTION_LPAR;
  181. } else {
  182. currentToken = TOK_NCNAME;
  183. }
  184. return;
  185. }
  186. }
  187. }
  188. private void nextDecimalPoint() throws PropertyException {
  189. if (exprIndex < exprLength
  190. && isDigit(expr.charAt(exprIndex))) {
  191. ++exprIndex;
  192. scanDigits();
  193. if (exprIndex < exprLength
  194. && expr.charAt(exprIndex) == '%') {
  195. exprIndex++;
  196. currentToken = TOK_PERCENT;
  197. } else {
  198. // Check for possible unit name following number
  199. currentUnitLength = exprIndex;
  200. scanName();
  201. currentUnitLength = exprIndex - currentUnitLength;
  202. currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
  203. : TOK_FLOAT;
  204. }
  205. currentTokenValue = expr.substring(currentTokenStartIndex,
  206. exprIndex);
  207. return;
  208. }
  209. throw new PropertyException("illegal character '.'");
  210. }
  211. private void nextColor() throws PropertyException {
  212. if (exprIndex < exprLength) {
  213. ++exprIndex;
  214. scanHexDigits();
  215. int len = exprIndex - currentTokenStartIndex - 1;
  216. if (len % 3 == 0) {
  217. currentToken = TOK_COLORSPEC;
  218. } else {
  219. //Actually not a color at all, but an NCNAME starting with "#"
  220. scanRestOfName();
  221. currentToken = TOK_NCNAME;
  222. }
  223. currentTokenValue = expr.substring(currentTokenStartIndex,
  224. exprIndex);
  225. return;
  226. } else {
  227. throw new PropertyException("illegal character '#'");
  228. }
  229. }
  230. /**
  231. * Attempt to recognize a valid NAME token in the input expression.
  232. */
  233. private void scanName() {
  234. if (exprIndex < exprLength && isNameStartChar(expr.charAt(exprIndex))) {
  235. scanRestOfName();
  236. }
  237. }
  238. private void scanRestOfName() {
  239. while (++exprIndex < exprLength) {
  240. if (!isNameChar(expr.charAt(exprIndex))) {
  241. break;
  242. }
  243. }
  244. }
  245. /**
  246. * Attempt to recognize a valid sequence of decimal DIGITS in the
  247. * input expression.
  248. */
  249. private void scanDigits() {
  250. while (exprIndex < exprLength && isDigit(expr.charAt(exprIndex))) {
  251. exprIndex++;
  252. }
  253. }
  254. /**
  255. * Attempt to recognize a valid sequence of hexadecimal DIGITS in the
  256. * input expression.
  257. */
  258. private void scanHexDigits() {
  259. while (exprIndex < exprLength && isHexDigit(expr.charAt(exprIndex))) {
  260. exprIndex++;
  261. }
  262. }
  263. /**
  264. * Return a boolean value indicating whether the following non-whitespace
  265. * character is an opening parenthesis.
  266. */
  267. private boolean followingParen() {
  268. for (int i = exprIndex; i < exprLength; i++) {
  269. switch (expr.charAt(i)) {
  270. case '(':
  271. exprIndex = i + 1;
  272. return true;
  273. case ' ':
  274. case '\r':
  275. case '\n':
  276. case '\t':
  277. break;
  278. default:
  279. return false;
  280. }
  281. }
  282. return false;
  283. }
  284. private static final String NAME_START_CHARS
  285. = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
  286. private static final String NAME_CHARS = ".-0123456789";
  287. private static final String DIGITS = "0123456789";
  288. private static final String HEX_CHARS = DIGITS + "abcdefABCDEF";
  289. /**
  290. * Return a boolean value indicating whether the argument is a
  291. * decimal digit (0-9).
  292. * @param c The character to check
  293. */
  294. private static boolean isDigit(char c) {
  295. return DIGITS.indexOf(c) >= 0;
  296. }
  297. /**
  298. * Return a boolean value indicating whether the argument is a
  299. * hexadecimal digit (0-9, A-F, a-f).
  300. * @param c The character to check
  301. */
  302. private static boolean isHexDigit(char c) {
  303. return HEX_CHARS.indexOf(c) >= 0;
  304. }
  305. /**
  306. * Return a boolean value indicating whether the argument is whitespace
  307. * as defined by XSL (space, newline, CR, tab).
  308. * @param c The character to check
  309. */
  310. private static boolean isSpace(char c) {
  311. switch (c) {
  312. case ' ':
  313. case '\r':
  314. case '\n':
  315. case '\t':
  316. return true;
  317. default:
  318. return false;
  319. }
  320. }
  321. /**
  322. * Return a boolean value indicating whether the argument is a valid name
  323. * start character, ie. can start a NAME as defined by XSL.
  324. * @param c The character to check
  325. */
  326. private static boolean isNameStartChar(char c) {
  327. return NAME_START_CHARS.indexOf(c) >= 0 || c >= 0x80;
  328. }
  329. /**
  330. * Return a boolean value indicating whether the argument is a valid name
  331. * character, ie. can occur in a NAME as defined by XSL.
  332. * @param c The character to check
  333. */
  334. private static boolean isNameChar(char c) {
  335. return NAME_START_CHARS.indexOf(c) >= 0 || NAME_CHARS.indexOf(c) >= 0
  336. || c >= 0x80;
  337. }
  338. }