You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RegexpFilter.java 25KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. /* *******************************************************************
  2. * Copyright (c) 1999-2001 Xerox Corporation,
  3. * 2002 Palo Alto Research Center, Incorporated (PARC).
  4. * All rights reserved.
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Common Public License v1.0
  7. * which accompanies this distribution and is available at
  8. * http://www.eclipse.org/legal/cpl-v10.html
  9. *
  10. * Contributors:
  11. * Xerox/PARC initial implementation
  12. * ******************************************************************/
  13. // todo: non-distribution license?
  14. package org.aspectj.testing.compare;
  15. import org.aspectj.testing.compare.Regexp;
  16. import java.util.*;
  17. import java.io.*;
  18. /** utility class for logging */
  19. class D {
  20. public static boolean LOG = false;
  21. public static void log(String s) { if (LOG) System.err.println("## " + s); }
  22. static {
  23. try {
  24. LOG = (null != System.getProperty("DEBUG"));
  25. } catch (Throwable t) {
  26. // ignore
  27. }
  28. }
  29. }
  30. /** utility class for handling errors */
  31. class ErrHandler {
  32. public static final ErrHandler DEFAULT = new ErrHandler();
  33. public static final Action INFO = new Action("info") {
  34. public void invoke(String message) { System.out.println("INFO: " + message); } };
  35. public static final Action WARN = new Action("warn") {
  36. public void invoke(String message) { System.err.println("WARNING: " + message); } };
  37. public static final Action HALT = new Action("halt") {
  38. public void invoke(String message) { throw new RuntimeException(message); } };
  39. public static final Action ABORT = new Action("abort") {
  40. public void invoke(String message) { throw new Error(message); } };
  41. public static final Action EXIT = new Action("exit") {
  42. public void invoke(String message) { System.err.println(message); System.exit(1); } };
  43. abstract static class Action {
  44. protected final String name;
  45. private Action(String name) {
  46. this.name = name.toLowerCase().trim();
  47. }
  48. abstract public void invoke(String message);
  49. public String toString() { return name; }
  50. }
  51. public static final void handleErr(String message, Throwable t) {
  52. DEFAULT.handle(message, t);
  53. }
  54. public static final void handleErr(String message) {
  55. DEFAULT.handle(message);
  56. }
  57. public static final void handleErr(String message, Action suggestion) {
  58. DEFAULT.handle(message, suggestion);
  59. }
  60. public void handle(String message) { handle(message, INFO); }
  61. public void handle(String message, Throwable t) {
  62. String eMessage = (null == t ? "" :
  63. t.getClass().getName() + ": " + t.getMessage());
  64. handle(message + eMessage, HALT);
  65. }
  66. /**
  67. * The default implementation just takes the suggested action
  68. * @param message the String to pass to any Action
  69. * @param suggestion the Action proposed by the caller
  70. */
  71. public void handle(String message, Action suggestion) {
  72. suggestion.invoke(message);
  73. }
  74. }
  75. /* old comments, not correct:
  76. * <li>test line against all registered select statements
  77. * to get all the matching (replace) operations (unsupported)</li>
  78. * The algorithm is greedy in that if the user requests a line
  79. * and the default is no-output, it will read lines from the input
  80. * until one is matched (or EOF).
  81. */
  82. /**
  83. * Process files in a minimal version of sed:
  84. * <li>read line using superclass LineNumberReader.readLine()</li>
  85. * <li>Preprocess with case and white space operations</li>
  86. * <li>run all the replace operations on the input, in order</li>
  87. * <li>return the line.</li>
  88. * Using anything but the <code>readLine()</code> method will circumvent
  89. * the regular expression replacement processing.
  90. */
  91. public class RegexpFilter {
  92. protected static final String[] NO_STRINGS = new String[]{};
  93. // ---------------------------------------------- static methods
  94. /**
  95. * Process file (or System.in) like sed.
  96. * This only calls <code>RegexpFilterReader.main(args)</code>.
  97. * @param args same as for init(String[], RegexpFilter)
  98. */
  99. public static void main(String[] args) throws IOException {
  100. RegexpFilterReader.main(args);
  101. }
  102. // todo: move String -> String[] (commas) out into utility
  103. /**
  104. * Initialize a RegexpFilter based on command-line style arguments
  105. * in a single String. (Otherwise, same as
  106. * <code>init(String[], RegexpFilter)</code>)
  107. * The Strings are separated at , (unless \ escaped) and trimmed.
  108. * Note that the escape characters are removed from before the ,.
  109. * @param spec the String to break into String[]
  110. * @param toSet the RegexpFilter to initialize - if null, construct one from
  111. * the file argument or stdin if there is no file argument.
  112. */
  113. public static RegexpFilter init(String arg, RegexpFilter toSet) {
  114. if ((null == arg) || (1 > arg.length())) {
  115. return init(NO_STRINGS, toSet);
  116. }
  117. StringTokenizer st = new StringTokenizer(arg, ",");
  118. Vector result = new Vector();
  119. String last = null;
  120. String next;
  121. while (st.hasMoreTokens()) {
  122. next = st.nextToken();
  123. if (next.endsWith("\\") && (st.hasMoreTokens())) {
  124. next = next.substring(0, next.length()-1);
  125. last = last == null ? next : last + next;
  126. continue;
  127. }
  128. if (null != last) {
  129. next = last + next;
  130. last = null;
  131. }
  132. result.add(next.trim());
  133. }
  134. String[] args = new String[result.size()];
  135. result.copyInto(args);
  136. return RegexpFilter.init(args, toSet);
  137. }
  138. /**
  139. * Initialize a RegexpFilter based on command-line style arguments.
  140. * This is the only way (currently) to set up a RegexpFilter.
  141. * syntax: <code>{file | {-i|-t|-b|-s <pattern>|-s <patternFile>}..}</code>
  142. * (for booleans, use lowercase to enable, uppercase to disable).
  143. * @param args the String[] containing file to input plus any number of...
  144. * <li>-i "ignore": ignore case</li>
  145. * <li>-t "trim" : ignore leading and trailing white space</li>
  146. * <li>-b "blanks": ignore differences in all white space</li>
  147. * <li>-s "{s/pattern/expression/};...":
  148. * replace pattern in lines with expression</li>
  149. * <li>-S <file> : same as s, but read commands from file</li>
  150. * @param toSet the RegexpFilter to initialize - if null, construct one from
  151. * the file argument or stdin if there is no file argument.
  152. */
  153. public static RegexpFilter init(String[] args, RegexpFilter toSet) {
  154. final String syntax = " - syntax: {file | {-i|-t|-b|-s <pattern>|-s <patternFile>}..}";
  155. RegexpFilter result = (null != toSet ? toSet : new RegexpFilter());
  156. if ((null != args) && (0 < args.length)) {
  157. for (int i = 0; i < args.length; i++) {
  158. String arg = args[i];
  159. if ((null == arg) || (1 > arg.length())) continue;
  160. if (arg.startsWith("-")) {
  161. switch (arg.charAt(1)) {
  162. case 'i' : result.ignoreCase = true; break;
  163. case 'I' : result.ignoreCase = false; break;
  164. case 'b' : result.collapseWhitespace = true; break;
  165. case 'B' : result.collapseWhitespace = false; break;
  166. case 't' : result.trimWhitespace = true; break;
  167. case 'T' : result.trimWhitespace = false; break;
  168. case 's' : ++i;
  169. if (i < args.length) {
  170. result.getOperationList().addOperation(args[i]);
  171. } else {
  172. String err = "need arg after -s " + syntax;
  173. ErrHandler.handleErr(err, ErrHandler.WARN);
  174. }
  175. break;
  176. case 'S' : ++i;
  177. if (i < args.length) {
  178. result.getOperationList().addFile(args[i]);
  179. } else {
  180. String err = "need arg after -s " + syntax;
  181. ErrHandler.handleErr(err, ErrHandler.WARN);
  182. }
  183. break;
  184. default:
  185. String err = "unrecognized flag : " + arg + syntax;
  186. ErrHandler.handleErr(err, ErrHandler.WARN);
  187. break;
  188. }
  189. } else if (null != result) {
  190. ErrHandler.handleErr("unexpected arg " + arg + syntax, ErrHandler.WARN);
  191. break;
  192. } else { // unflagged argument, need file - should be input file
  193. File _file = new File(arg);
  194. if (_file.exists() && _file.canRead()) {
  195. result.setFile(_file);
  196. }
  197. }
  198. } // reading args
  199. } // have args
  200. return result;
  201. } // init
  202. // ---------------------------------------------- instance fields
  203. /** ignore case by converting lines to upper case */
  204. protected boolean ignoreCase = false;
  205. /** collapse internal whitespace by converting to space character */
  206. protected boolean collapseWhitespace = true;
  207. /** trim leading and trailing whitespace from lines before comparison */
  208. protected boolean trimWhitespace = false;
  209. /** replace input per replace operations */
  210. protected boolean replace = false;
  211. /** operations to process the file with */
  212. protected OperationList operations;
  213. /** handler for our errors*/
  214. protected ErrHandler handler = ErrHandler.DEFAULT;
  215. /** the File to use */
  216. protected File file = null;
  217. // ---------------------------------------------- constructors
  218. /** no getter/setters yet, so construct using
  219. * <code>static RegexpFilter init(String[],RegexpFilter)</code>
  220. */
  221. protected RegexpFilter() { }
  222. // ---------------------------------------------- instance methods
  223. /**
  224. * Set a file for this RegexpFilter.
  225. * This makes command-line initialization easier.
  226. * @param file the File to set for this RegexpFilter
  227. */
  228. public void setFile(File file) { this.file = file; }
  229. /**
  230. * Return file this RegexpFilter was initialized with.
  231. * @return the File this RegexpFilter was initialized with (may be null).
  232. */
  233. public File getFile() { return file; }
  234. /**
  235. * Lazy construction of operations list
  236. */
  237. protected OperationList getOperationList() {
  238. if (null == operations) {
  239. operations = new OperationList();
  240. replace = true;
  241. }
  242. return operations;
  243. }
  244. /**
  245. * Process line, applying case and whitespace operations
  246. * before delegating to replace.
  247. * @param string the String to proces
  248. * @return the String as processed
  249. */
  250. protected String process(String string) {
  251. String label = "process(\"" + string + "\")";
  252. D.log(label);
  253. if (null == string) return null;
  254. String result = string;
  255. if (ignoreCase) {
  256. result = result.toUpperCase();
  257. }
  258. if (trimWhitespace) {
  259. result = result.trim();
  260. }
  261. if (collapseWhitespace) {
  262. final StringBuffer collapse = new StringBuffer();
  263. StringTokenizer tokens = new StringTokenizer(result);
  264. boolean hasMoreTokens = tokens.hasMoreTokens();
  265. while (hasMoreTokens) {
  266. collapse.append(tokens.nextToken());
  267. hasMoreTokens = tokens.hasMoreTokens();
  268. if (hasMoreTokens) {
  269. collapse.append(" ");
  270. }
  271. }
  272. result = collapse.toString();
  273. }
  274. if (replace) {
  275. result = getOperationList().replace(result);
  276. D.log(label + " result " + result);
  277. }
  278. return result;
  279. }
  280. /**
  281. * container for ReplaceOperations constructs on add,
  282. * runs operations against input.
  283. */
  284. class OperationList {
  285. final ArrayList list;
  286. public OperationList() {
  287. list = new ArrayList();
  288. }
  289. /**
  290. * Run input through all the operations in this list
  291. * and return the result.
  292. * @param input the String to process
  293. * @return the String result of running input through all replace
  294. * operations in order.
  295. */
  296. public String replace(String input) {
  297. if (null == input) return null;
  298. Iterator operations = operations();
  299. while (operations.hasNext()) {
  300. ReplaceOperation operation = (ReplaceOperation) operations.next();
  301. input = operation.replace(input);
  302. }
  303. return input;
  304. }
  305. /**
  306. * Add operations read from file, one per line,
  307. * ignoring empty lines and # or // comments.
  308. * ';' delimits operations within a line as it does
  309. * for addOperation(String), so you must \ escape ;
  310. * in the search or replace segments
  311. */
  312. public void addFile(String path) {
  313. if (null == path) {
  314. handler.handle("null path", ErrHandler.ABORT);
  315. } else {
  316. File file = new File(path);
  317. if (!file.exists() && file.canRead()) {
  318. handler.handle("invalid path: " + path, ErrHandler.ABORT);
  319. } else {
  320. BufferedReader reader = null;
  321. int lineNumber = 0;
  322. String line = null;
  323. try {
  324. reader = new BufferedReader(new FileReader(file));
  325. while (null != (line = reader.readLine())) {
  326. lineNumber++;
  327. int loc = line.indexOf("#");
  328. if (-1 != loc) {
  329. line = line.substring(0,loc);
  330. }
  331. loc = line.indexOf("//");
  332. if (-1 != loc) {
  333. line = line.substring(0,loc);
  334. }
  335. line = line.trim();
  336. if (1 > line.length()) continue;
  337. addOperation(line);
  338. }
  339. } catch (IOException e) {
  340. String message ="Error processing file " + path
  341. + " at line " + lineNumber + ": \"" + line + "\""
  342. + ": " + e.getClass().getName() + ": " + e.getMessage() ;
  343. handler.handle(message, ErrHandler.ABORT);
  344. } finally {
  345. try {
  346. if (reader != null) reader.close();
  347. } catch (IOException e) {
  348. // ignore
  349. }
  350. }
  351. }
  352. }
  353. }
  354. /**
  355. * Add operation to list, emitting warning and returning false if not created.
  356. * Add multiple operations at once by separating with ';'
  357. * (so any ; in search or replace must be escaped).
  358. * @param operation a String acceptable to
  359. * <code>ReplaceOperation.makeReplaceOperation(String, ErrHandler)</code>,
  360. * of the form sX{search}X{replace}X{g};..
  361. * @return false if not all added.
  362. */
  363. public boolean addOperation(String operation) {
  364. StringTokenizer st = new StringTokenizer(operation, ";", false);
  365. String last = null;
  366. ReplaceOperation toAdd;
  367. boolean allAdded = true;
  368. while (st.hasMoreTokens()) {
  369. // grab tokens, accumulating if \ escapes ; delimiter
  370. String next = st.nextToken();
  371. if (next.endsWith("\\") && (st.hasMoreTokens())) {
  372. next = next.substring(0, next.length()-1);
  373. last = (last == null ? next : last + next);
  374. continue;
  375. }
  376. if (null != last) {
  377. next = last + next;
  378. last = null;
  379. }
  380. toAdd = ReplaceOperation.makeReplaceOperation(next, handler);
  381. if (null != toAdd) {
  382. list.add(toAdd);
  383. } else {
  384. String label = "RegexpFilter.OperationList.addOperation(\"" + operation + "\"): ";
  385. handler.handle(label + " input not accepted " , ErrHandler.WARN);
  386. if (allAdded) allAdded = false;
  387. }
  388. }
  389. return allAdded;
  390. }
  391. /**
  392. * @return an Iterator over the list of ReplaceOperation
  393. */
  394. public Iterator operations() {
  395. return list.iterator();
  396. }
  397. } // class OperationList
  398. } // class RegexpFilter
  399. /**
  400. * Encapsulate a search/replace operation which uses a RegExp.
  401. */
  402. class ReplaceOperation {
  403. /**
  404. * This accepts a sed-like substitute command, except that
  405. * the delimiter character may not be used anywhere in the
  406. * search or replace strings, even if escaped. You may use
  407. * any delimiter character.
  408. * Note that although g (replace-globally) is supported as input,
  409. * it is ignored in this implementation.
  410. * @param operation a String of the form sX{search}X{replace}X{g}
  411. */
  412. public static ReplaceOperation makeReplaceOperation(String operation, ErrHandler handler) {
  413. ReplaceOperation result = null;
  414. StringBuffer err = (null == handler ? null : new StringBuffer());
  415. final String syntax = "sX{search}X{replace}X{g}";
  416. // todo: use Point p = isValidOperation(operation);
  417. if (null == operation) {
  418. if (null != err) err.append("null operation");
  419. } else if (5 > operation.length()) {
  420. if (null != err) err.append("empty operation");
  421. } else if (!operation.startsWith("s")) {
  422. if (null != err) err.append("expecting s: " + syntax);
  423. } else {
  424. String sep = operation.substring(1,2);
  425. int mid = operation.indexOf(sep, 2);
  426. if (-1 == mid) {
  427. if (null != handler) err.append("expecting middle \"" + sep + "\": " + syntax);
  428. } else if (mid == 2) {
  429. if (null != handler) err.append("expecting search before middle \"" + sep + "\": " + syntax);
  430. } else {
  431. int end = operation.indexOf(sep, mid+1);
  432. if (-1 == end) {
  433. if (null != handler) err.append("expecting final \"" + sep + "\": " + syntax);
  434. } else {
  435. String search = operation.substring(2,mid);
  436. if (!ReplaceOperation.isValidSearch(search)) {
  437. if (null != handler) err.append("invalid search \"" + search + "\": " + syntax);
  438. } else {
  439. String replace = operation.substring(mid+1,end);
  440. if (!ReplaceOperation.isValidReplace(replace)) {
  441. if (null != handler) err.append("invalid replace \"" + replace + "\": " + syntax);
  442. } else {
  443. result = new ReplaceOperation(search, replace, operation.endsWith("g"), handler);
  444. }
  445. }
  446. }
  447. }
  448. }
  449. if ((0 < err.length()) && (null != handler)) {
  450. err.append(" operation=\"" + operation + "\"");
  451. handler.handle(err.toString(), ErrHandler.HALT);
  452. }
  453. return result;
  454. }
  455. /**
  456. * Return true if the input string represents a valid search operation
  457. * @param replace the String representing a search expression
  458. */
  459. protected static boolean isValidSearch(String search) { // todo: too weak to be useful now
  460. return ((null != search) && (0 < search.length()));
  461. }
  462. /**
  463. * Return Point x=mid, y=end if the input string represents a valid search operation
  464. * @param search the String representing a search expression
  465. protected static Point isValidOperation(String search) {
  466. if (null != search) {
  467. final int length = search.length();
  468. if (5 < length) {
  469. String sep = search.substring(2,3);
  470. int mid = search.indexOf(sep, 3);
  471. if (3 < mid) {
  472. int end = search.indexOf(sep, mid+1);
  473. if ((end == length-1)
  474. || ((end == length-2)
  475. && search.endsWith("g"))) {
  476. return new Point(mid, end);
  477. }
  478. }
  479. }
  480. }
  481. return null;
  482. }
  483. */
  484. /**
  485. * Return true if the input string represents a valid replace operation
  486. * @param replace the String representing a replace expression
  487. */
  488. protected static boolean isValidReplace(String replace) { // todo: too weak to be useful now
  489. boolean result = (null != replace);
  490. return result;
  491. } // isValidReplace
  492. // ------------------------------------------------- instance members
  493. /** If true, repeat replace as often as possible (todo: repeat not supported) */
  494. protected final boolean repeat;
  495. /** search pattern */
  496. protected final String search;
  497. /** replace pattern */
  498. protected final String replace;
  499. /** regexp processor */
  500. protected final Regexp regexp;
  501. /** replace buffer (read-only) */
  502. protected final char[] replaceBuffer;
  503. /** error handler */
  504. protected final ErrHandler handler;
  505. // ------------------------------------------------- constructors
  506. private ReplaceOperation(String search, String replace, boolean repeat, ErrHandler handler) {
  507. this.search = search;
  508. this.replace = replace;
  509. this.replaceBuffer = replace.toCharArray();
  510. this.repeat = repeat;
  511. this.handler = (null != handler ? handler : ErrHandler.DEFAULT);
  512. this.regexp = RegexpFactory.makeRegexp();
  513. try {
  514. this.regexp.setPattern(search);
  515. } catch (Exception e) {
  516. this.handler.handle("setting search=" + search, e);
  517. }
  518. }
  519. /**
  520. * Return true if the input would be matched by the search string of this ReplaceOperation.
  521. * @param input the String to compare
  522. * @return true if the input would be matched by the search string of this ReplaceOperation
  523. */
  524. public boolean matches(String input) {
  525. return ((null != input) && regexp.matches(input));
  526. } // matches
  527. /**
  528. * Replace any search text in input with replacement text,
  529. * returning input if there is no match. More specifically,
  530. * <li> emit unmatched prefix, if any</li>
  531. * <li> emit replacement text as-is, except that
  532. * \[0-9] in the replacement text is replaced
  533. * with the matching subsection of the input text</li>
  534. * <li> emit unmatched suffix, if any</li>
  535. * @param input the String to search and replace
  536. * @throws IllegalArgumentException if null == input
  537. */
  538. public String replace(String input) {
  539. if (null == input) throw new IllegalArgumentException("null input");
  540. String label = "replace(\"" + input + "\") ";
  541. D.log(label);
  542. if (matches(input)) {
  543. StringBuffer buffer = new StringBuffer();
  544. final int length = replaceBuffer.length;
  545. Vector groups = regexp.getGroups(input);
  546. if ((null == groups) || (1 > groups.size())) {
  547. handler.handle(label + "matched but no groups? ");
  548. return input;
  549. }
  550. buffer.setLength(0);
  551. // group 0 is whole; if not same as input, print prefix/suffix
  552. String matchedPart = (String) groups.elementAt(0);
  553. final int matchStart = input.indexOf(matchedPart);
  554. final int matchEnd = matchStart + matchedPart.length();
  555. if (0 < matchStart) {
  556. buffer.append(input.substring(0, matchStart));
  557. }
  558. // true if \ escaping special char, esp. replace \[0-9]
  559. boolean specialChar = false;
  560. for (int i = 0; i < length; i++) {
  561. char c = replaceBuffer[i];
  562. if (specialChar) {
  563. int value = Character.digit(c, 10); // only 0-9 supported
  564. if ((0 <= value) && (value < groups.size())) {
  565. buffer.append((String) groups.elementAt(value));
  566. } else {
  567. buffer.append(c);
  568. }
  569. specialChar = false;
  570. } else if ('\\' != c) {
  571. D.log("." + c);
  572. buffer.append(c);
  573. } else {
  574. specialChar = true;
  575. }
  576. }
  577. if (specialChar) {
  578. handler.handle(label + "\\ without register: " + replace,
  579. ErrHandler.ABORT);
  580. }
  581. if (matchEnd < input.length()) {
  582. buffer.append(input.substring(matchEnd));
  583. }
  584. input = buffer.toString();
  585. }
  586. return input;
  587. } // replace
  588. } // class ReplaceOperation