You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LightXMLParser.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /*******************************************************************************
  2. * Copyright (c) 2011 Contributors.
  3. * All rights reserved.
  4. * This program and the accompanying materials are made available
  5. * under the terms of the Eclipse Public License v 2.0
  6. * which accompanies this distribution and is available at
  7. * https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.txt
  8. *
  9. * Contributors:
  10. * Abraham Nevado - Lucierna initial implementation
  11. *******************************************************************************/
  12. package org.aspectj.weaver.loadtime.definition;
  13. import java.io.Reader;
  14. import java.util.ArrayList;
  15. import java.util.HashMap;
  16. import java.util.Map;
  17. public class LightXMLParser {
  18. private final static char NULL_CHAR = '\0';
  19. private Map<String, Object> attributes;
  20. private ArrayList<LightXMLParser> children;
  21. private String name;
  22. private char pushedBackChar;
  23. private Reader reader;
  24. private static Map<String, char[]> entities = new HashMap<>();
  25. static {
  26. entities.put("amp", new char[] { '&' });
  27. entities.put("quot", new char[] { '"' });
  28. entities.put("apos", new char[] { '\'' });
  29. entities.put("lt", new char[] { '<' });
  30. entities.put("gt", new char[] { '>' });
  31. }
  32. public LightXMLParser() {
  33. this.name = null;
  34. this.attributes = new HashMap<>();
  35. this.children = new ArrayList<>();
  36. }
  37. public ArrayList getChildrens() {
  38. return this.children;
  39. }
  40. public String getName() {
  41. return this.name;
  42. }
  43. public void parseFromReader(Reader reader) throws Exception {
  44. this.pushedBackChar = NULL_CHAR;
  45. this.attributes = new HashMap<>();
  46. this.name = null;
  47. this.children = new ArrayList<>();
  48. this.reader = reader;
  49. while (true) {
  50. // Skips whiteSpaces, blanks, \r\n..
  51. char c = this.skipBlanks();
  52. // All xml should start by <xml, a <!-- or <nodeName, if not throw
  53. // exception
  54. if (c != '<') {
  55. throw new Exception("LightParser Exception: Expected < but got: " + c);
  56. }
  57. // read next character
  58. c = this.getNextChar();
  59. // if starts with ! or ? it is <?xml or a comment: skip
  60. if ((c == '!') || (c == '?')) {
  61. this.skipCommentOrXmlTag(0);
  62. } else {
  63. // it is a node, pusch character back
  64. this.pushBackChar(c);
  65. // parse node
  66. this.parseNode(this);
  67. // Only one root node, so finsh.
  68. return;
  69. }
  70. }
  71. }
  72. private char skipBlanks() throws Exception {
  73. while (true) {
  74. char c = this.getNextChar();
  75. switch (c) {
  76. case '\n':
  77. case '\r':
  78. case ' ':
  79. case '\t':
  80. break;
  81. default:
  82. return c;
  83. }
  84. }
  85. }
  86. private char getWhitespaces(StringBuffer result) throws Exception {
  87. while (true) {
  88. char c = this.getNextChar();
  89. switch (c) {
  90. case ' ':
  91. case '\t':
  92. case '\n':
  93. result.append(c);
  94. case '\r':
  95. break;
  96. default:
  97. return c;
  98. }
  99. }
  100. }
  101. private void getNodeName(StringBuffer result) throws Exception {
  102. char c;
  103. while (true) {
  104. // Iterate while next character is not [a-z] [A-Z] [0-9] [ .:_-] not
  105. // null
  106. c = this.getNextChar();
  107. if (((c < 'a') || (c > 'z')) && ((c > 'Z') || (c < 'A')) && ((c > '9') || (c < '0')) && (c != '_') && (c != '-')
  108. && (c != '.') && (c != ':')) {
  109. this.pushBackChar(c);
  110. return;
  111. }
  112. result.append(c);
  113. }
  114. }
  115. private void getString(StringBuffer string) throws Exception {
  116. char delimiter = this.getNextChar();
  117. if ((delimiter != '\'') && (delimiter != '"')) {
  118. throw new Exception("Parsing error. Expected ' or \" but got: " + delimiter);
  119. }
  120. while (true) {
  121. char c = this.getNextChar();
  122. if (c == delimiter) {
  123. return;
  124. } else if (c == '&') {
  125. this.mapEntity(string);
  126. } else {
  127. string.append(c);
  128. }
  129. }
  130. }
  131. private void getPCData(StringBuffer data) throws Exception {
  132. while (true) {
  133. char c = this.getNextChar();
  134. if (c == '<') {
  135. c = this.getNextChar();
  136. if (c == '!') {
  137. this.checkCDATA(data);
  138. } else {
  139. this.pushBackChar(c);
  140. return;
  141. }
  142. } else {
  143. data.append(c);
  144. }
  145. }
  146. }
  147. private boolean checkCDATA(StringBuffer buf) throws Exception {
  148. char c = this.getNextChar();
  149. if (c != '[') {
  150. this.pushBackChar(c);
  151. this.skipCommentOrXmlTag(0);
  152. return false;
  153. } else if (!this.checkLiteral("CDATA[")) {
  154. this.skipCommentOrXmlTag(1); // one [ has already been read
  155. return false;
  156. } else {
  157. int delimiterCharsSkipped = 0;
  158. while (delimiterCharsSkipped < 3) {
  159. c = this.getNextChar();
  160. switch (c) {
  161. case ']':
  162. if (delimiterCharsSkipped < 2) {
  163. delimiterCharsSkipped++;
  164. } else {
  165. buf.append(']');
  166. buf.append(']');
  167. delimiterCharsSkipped = 0;
  168. }
  169. break;
  170. case '>':
  171. if (delimiterCharsSkipped < 2) {
  172. for (int i = 0; i < delimiterCharsSkipped; i++) {
  173. buf.append(']');
  174. }
  175. delimiterCharsSkipped = 0;
  176. buf.append('>');
  177. } else {
  178. delimiterCharsSkipped = 3;
  179. }
  180. break;
  181. default:
  182. for (int i = 0; i < delimiterCharsSkipped; i++) {
  183. buf.append(']');
  184. }
  185. buf.append(c);
  186. delimiterCharsSkipped = 0;
  187. }
  188. }
  189. return true;
  190. }
  191. }
  192. private void skipCommentOrXmlTag(int bracketLevel) throws Exception {
  193. char delim = NULL_CHAR;
  194. int level = 1;
  195. char c;
  196. if (bracketLevel == 0) {
  197. c = this.getNextChar();
  198. if (c == '-') {
  199. c = this.getNextChar();
  200. if (c == ']') {
  201. bracketLevel--;
  202. } else if (c == '[') {
  203. bracketLevel++;
  204. } else if (c == '-') {
  205. this.skipComment();
  206. return;
  207. }
  208. } else if (c == '[') {
  209. bracketLevel++;
  210. }
  211. }
  212. while (level > 0) {
  213. c = this.getNextChar();
  214. if (delim == NULL_CHAR) {
  215. if ((c == '"') || (c == '\'')) {
  216. delim = c;
  217. } else if (bracketLevel <= 0) {
  218. if (c == '<') {
  219. level++;
  220. } else if (c == '>') {
  221. level--;
  222. }
  223. }
  224. if (c == '[') {
  225. bracketLevel++;
  226. } else if (c == ']') {
  227. bracketLevel--;
  228. }
  229. } else {
  230. if (c == delim) {
  231. delim = NULL_CHAR;
  232. }
  233. }
  234. }
  235. }
  236. private void parseNode(LightXMLParser elt) throws Exception {
  237. // Now we are in a new node element. Get its name
  238. StringBuffer buf = new StringBuffer();
  239. this.getNodeName(buf);
  240. String name = buf.toString();
  241. elt.setName(name);
  242. char c = this.skipBlanks();
  243. while ((c != '>') && (c != '/')) {
  244. // Get attributes
  245. emptyBuf(buf);
  246. this.pushBackChar(c);
  247. this.getNodeName(buf);
  248. String key = buf.toString();
  249. c = this.skipBlanks();
  250. if (c != '=') {
  251. throw new Exception("Parsing error. Expected = but got: " + c);
  252. }
  253. // Go up to " character and push it back
  254. this.pushBackChar(this.skipBlanks());
  255. emptyBuf(buf);
  256. this.getString(buf);
  257. elt.setAttribute(key, buf);
  258. // Skip blanks
  259. c = this.skipBlanks();
  260. }
  261. if (c == '/') {
  262. c = this.getNextChar();
  263. if (c != '>') {
  264. throw new Exception("Parsing error. Expected > but got: " + c);
  265. }
  266. return;
  267. }
  268. // Now see if we got content, or CDATA, if content get it: it is free...
  269. emptyBuf(buf);
  270. c = this.getWhitespaces(buf);
  271. if (c != '<') {
  272. // It is PCDATA
  273. this.pushBackChar(c);
  274. this.getPCData(buf);
  275. } else {
  276. // It is content: get it, or CDATA.
  277. while (true) {
  278. c = this.getNextChar();
  279. if (c == '!') {
  280. if (this.checkCDATA(buf)) {
  281. this.getPCData(buf);
  282. break;
  283. } else {
  284. c = this.getWhitespaces(buf);
  285. if (c != '<') {
  286. this.pushBackChar(c);
  287. this.getPCData(buf);
  288. break;
  289. }
  290. }
  291. } else {
  292. if (c != '/') {
  293. emptyBuf(buf);
  294. }
  295. if (c == '/') {
  296. this.pushBackChar(c);
  297. }
  298. break;
  299. }
  300. }
  301. }
  302. if (buf.length() == 0) {
  303. // It is a comment
  304. while (c != '/') {
  305. if (c == '!') {
  306. for (int i = 0; i < 2; i++) {
  307. c = this.getNextChar();
  308. if (c != '-') {
  309. throw new Exception("Parsing error. Expected element or comment");
  310. }
  311. }
  312. this.skipComment();
  313. } else {
  314. // it is a new node
  315. this.pushBackChar(c);
  316. LightXMLParser child = this.createAnotherElement();
  317. this.parseNode(child);
  318. elt.addChild(child);
  319. }
  320. c = this.skipBlanks();
  321. if (c != '<') {
  322. throw new Exception("Parsing error. Expected <, but got: " + c);
  323. }
  324. c = this.getNextChar();
  325. }
  326. this.pushBackChar(c);
  327. } // Here content could be grabbed
  328. c = this.getNextChar();
  329. if (c != '/') {
  330. throw new Exception("Parsing error. Expected /, but got: " + c);
  331. }
  332. this.pushBackChar(this.skipBlanks());
  333. if (!this.checkLiteral(name)) {
  334. throw new Exception("Parsing error. Expected " + name);
  335. }
  336. if (this.skipBlanks() != '>') {
  337. throw new Exception("Parsing error. Expected >, but got: " + c);
  338. }
  339. }
  340. private void skipComment() throws Exception {
  341. int dashes = 2;
  342. while (dashes > 0) {
  343. char ch = this.getNextChar();
  344. if (ch == '-') {
  345. dashes -= 1;
  346. } else {
  347. dashes = 2;
  348. }
  349. }
  350. char nextChar = this.getNextChar();
  351. if (nextChar != '>') {
  352. throw new Exception("Parsing error. Expected > but got: " + nextChar);
  353. }
  354. }
  355. private boolean checkLiteral(String literal) throws Exception {
  356. int length = literal.length();
  357. for (int i = 0; i < length; i++) {
  358. if (this.getNextChar() != literal.charAt(i)) {
  359. return false;
  360. }
  361. }
  362. return true;
  363. }
  364. private char getNextChar() throws Exception {
  365. if (this.pushedBackChar != NULL_CHAR) {
  366. char c = this.pushedBackChar;
  367. this.pushedBackChar = NULL_CHAR;
  368. return c;
  369. } else {
  370. int i = this.reader.read();
  371. if (i < 0) {
  372. throw new Exception("Parsing error. Unexpected end of data");
  373. } else {
  374. return (char) i;
  375. }
  376. }
  377. }
  378. private void mapEntity(StringBuffer buf) throws Exception {
  379. char c = this.NULL_CHAR;
  380. StringBuilder keyBuf = new StringBuilder();
  381. while (true) {
  382. c = this.getNextChar();
  383. if (c == ';') {
  384. break;
  385. }
  386. keyBuf.append(c);
  387. }
  388. String key = keyBuf.toString();
  389. if (key.charAt(0) == '#') {
  390. try {
  391. if (key.charAt(1) == 'x') {
  392. c = (char) Integer.parseInt(key.substring(2), 16);
  393. } else {
  394. c = (char) Integer.parseInt(key.substring(1), 10);
  395. }
  396. } catch (NumberFormatException e) {
  397. throw new Exception("Unknown entity: " + key);
  398. }
  399. buf.append(c);
  400. } else {
  401. char[] value = entities.get(key);
  402. if (value == null) {
  403. throw new Exception("Unknown entity: " + key);
  404. }
  405. buf.append(value);
  406. }
  407. }
  408. private void pushBackChar(char c) {
  409. this.pushedBackChar = c;
  410. }
  411. private void addChild(LightXMLParser child) {
  412. this.children.add(child);
  413. }
  414. private void setAttribute(String name, Object value) {
  415. this.attributes.put(name, value.toString());
  416. }
  417. public Map<String, Object> getAttributes() {
  418. return this.attributes;
  419. }
  420. private LightXMLParser createAnotherElement() {
  421. return new LightXMLParser();
  422. }
  423. private void setName(String name) {
  424. this.name = name;
  425. }
  426. private void emptyBuf(StringBuffer buf) {
  427. buf.setLength(0);
  428. }
  429. }