You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LinkCheck.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /* *******************************************************************
  2. * Copyright (c) 2003 Contributors.
  3. * All rights reserved.
  4. * This program and the accompanying materials are made available
  5. * under the terms of the Eclipse Public License v1.0
  6. * which accompanies this distribution and is available at
  7. * http://www.eclipse.org/legal/epl-v10.html
  8. *
  9. * Contributors:
  10. * Wes Isberg initial implementation
  11. * ******************************************************************/
  12. package org.aspectj.testing.util;
  13. import java.io.*;
  14. import java.net.*;
  15. import java.util.*;
  16. import javax.swing.text.MutableAttributeSet;
  17. import javax.swing.text.html.*;
  18. import javax.swing.text.html.HTML.Tag;
  19. import org.aspectj.bridge.*;
  20. import org.aspectj.bridge.IMessageHandler;
  21. import org.aspectj.util.LangUtil;
  22. //import org.aspectj.util.FileUtil;
  23. /**
  24. * Quick and dirty link checker.
  25. * This checks that links into file: and http: targets work,
  26. * and that links out of file: targets work.
  27. */
  28. public class LinkCheck {
  29. /*
  30. * Known issues:
  31. * - main interface only, though API's easily upgraded
  32. * - https MalformedUrlExceptions on redirect
  33. * - Swing won't quit without System.exit
  34. * - single-threaded
  35. */
  36. static final URL COMMAND_LINE;
  37. static {
  38. URL commandLine = null;
  39. try {
  40. commandLine = new URL("file://commandLine");
  41. } catch (Throwable t) {
  42. }
  43. COMMAND_LINE = commandLine;
  44. }
  45. /** @param args file {-logFile {file} | -printInfo } */
  46. public static void main(String[] args) {
  47. final String syntax = "java "
  48. + LinkCheck.class.getName()
  49. + " file {-log <file> | -printInfo}..";
  50. if ((null == args) || (0 >= args.length)) {
  51. System.err.println(syntax);
  52. System.exit(1);
  53. }
  54. final String startingURL = "file:///" + args[0].replace('\\', '/');
  55. String logFile = null;
  56. boolean printInfo = false;
  57. for (int i = 1; i < args.length; i++) {
  58. if ("-log".equals(args[i]) && ((i+1) < args.length)) {
  59. logFile = args[++i];
  60. } else if ("-printInfo".equals(args[i])) {
  61. printInfo = true;
  62. } else {
  63. System.err.println(syntax);
  64. System.exit(1);
  65. }
  66. }
  67. final boolean useSystemOut = (null == logFile);
  68. final MessageHandler mh;
  69. final OutputStream out;
  70. if (useSystemOut) {
  71. mh = new MessageHandler();
  72. out = null;
  73. } else {
  74. try {
  75. out = new FileOutputStream(logFile);
  76. } catch (FileNotFoundException e) {
  77. e.printStackTrace();
  78. return;
  79. }
  80. final PrintStream ps = new PrintStream(out, true);
  81. final boolean printAll = printInfo;
  82. mh = new MessageHandler() {
  83. public boolean handleMessage(IMessage message) {
  84. if (printAll || !message.isInfo()) {
  85. ps.println(message.toString());
  86. }
  87. return super.handleMessage(message);
  88. }
  89. };
  90. }
  91. Link.Check exists
  92. = Link.getProtocolChecker(new String[] {"file", "http"});
  93. Link.Check contents
  94. = Link.getProtocolChecker(new String[] {"file"});
  95. LinkCheck me = new LinkCheck(mh, exists, contents);
  96. me.addLinkToCheck(COMMAND_LINE, startingURL); // pwd as base?
  97. try {
  98. String label = "checking URLs from " + startingURL;
  99. if (useSystemOut) {
  100. System.out.println(label);
  101. }
  102. MessageUtil.info("START " + label);
  103. long start = System.currentTimeMillis();
  104. me.run();
  105. long duration = (System.currentTimeMillis() - start)/1000;
  106. long numChecked = me.checkedUrls.size();
  107. if (numChecked > 0) {
  108. float cps = (float) duration / (float) numChecked;
  109. StringBuffer sb = new StringBuffer();
  110. sb.append("DONE. Checked " + numChecked);
  111. sb.append(" URL's in " + duration);
  112. sb.append(" seconds (" + cps);
  113. sb.append(" seconds per URL).");
  114. MessageUtil.info("END " + label + ": " + sb);
  115. if (useSystemOut) {
  116. System.out.println(sb.toString());
  117. }
  118. }
  119. MessageUtil.info(MessageUtil.renderCounts(mh));
  120. try {
  121. if (null != out) {
  122. out.flush();
  123. }
  124. } catch (IOException e) {
  125. // ignore
  126. }
  127. if (useSystemOut && (null != logFile)) {
  128. System.out.println("Find log in " + logFile);
  129. }
  130. } finally {
  131. if (null != out) {
  132. try {
  133. out.close();
  134. } catch (IOException e1) {
  135. }
  136. }
  137. System.exit(mh.numMessages(IMessage.ERROR, true)); // XXX dumb swing
  138. }
  139. }
  140. // private static boolean isCheckedFileType(URL url) {
  141. // if (null == url) {
  142. // return false;
  143. // }
  144. // String file = url.getFile();
  145. // return !FileUtil.hasZipSuffix(file)
  146. // && !file.endsWith(".pdf");
  147. // }
  148. private final Messages messages;
  149. private final HTMLEditorKit.Parser parser; // XXX untested - stateful
  150. private final ArrayList<Link> linksToCheck;
  151. private final ArrayList<String> checkedUrls; // String (URL.toString)
  152. private final ArrayList<String> validRefs; // String (URL.toString)
  153. private final ArrayList<String> refsToCheck; // String (URL.toString)
  154. private final Link.Check checkExists;
  155. private final Link.Check checkContents;
  156. public LinkCheck(IMessageHandler handler,
  157. Link.Check checkExists,
  158. Link.Check checkContents) {
  159. LangUtil.throwIaxIfNull(handler, "handler");
  160. LangUtil.throwIaxIfNull(checkExists, "checkExists");
  161. LangUtil.throwIaxIfNull(checkContents, "checkContents");
  162. this.messages = new Messages(handler);
  163. linksToCheck = new ArrayList<Link>();
  164. checkedUrls = new ArrayList<String>();
  165. refsToCheck = new ArrayList<String>();
  166. validRefs = new ArrayList<String>();
  167. parser = new HTMLEditorKit() {
  168. public HTMLEditorKit.Parser getParser() {
  169. return super.getParser();
  170. }
  171. }
  172. .getParser();
  173. this.checkExists = checkExists;
  174. this.checkContents = checkContents;
  175. }
  176. public synchronized void addLinkToCheck(URL doc, String link) {
  177. URL linkURL = makeURL(doc, link);
  178. if (null == linkURL) {
  179. // messages.addingNullLinkFrom(doc);
  180. return;
  181. }
  182. String linkString = linkURL.toString();
  183. if ((null != link) && !checkedUrls.contains(linkString) ) {
  184. if (!checkExists.check(linkURL)) {
  185. checkedUrls.add(linkString);
  186. messages.acceptingUncheckedLink(doc, linkURL);
  187. } else {
  188. Link toAdd = new Link(doc, linkURL);
  189. if (!linksToCheck.contains(toAdd)) { // equals overridden
  190. linksToCheck.add(toAdd);
  191. }
  192. }
  193. }
  194. }
  195. public synchronized void run() {
  196. ArrayList<Link> list = new ArrayList<Link>();
  197. while (0 < linksToCheck.size()) {
  198. messages.checkingLinks(linksToCheck.size());
  199. list.clear();
  200. list.addAll(linksToCheck);
  201. for (Iterator<Link> iter = list.iterator(); iter.hasNext();) {
  202. final Link link = iter.next();
  203. String urlString = link.url.toString();
  204. if (!checkedUrls.contains(urlString)) {
  205. checkedUrls.add(urlString);
  206. messages.checkingLink(link);
  207. checkLink(link);
  208. }
  209. }
  210. linksToCheck.removeAll(list);
  211. }
  212. // now check that all named references are accounted for
  213. for (Iterator<String> iter = refsToCheck.iterator(); iter.hasNext();) {
  214. String ref = iter.next();
  215. if (!validRefs.contains(ref)) {
  216. messages.namedReferenceNotFound(ref);
  217. }
  218. }
  219. }
  220. /** @return null if link known or if unable to create */
  221. private URL makeURL(URL doc, String link) {
  222. if (checkedUrls.contains(link)) {
  223. return null;
  224. }
  225. URL result = null;
  226. try {
  227. result = new URL(link);
  228. } catch (MalformedURLException e) {
  229. if (null == doc) {
  230. messages.malformedUrl(null, link, e);
  231. } else {
  232. try {
  233. URL res = new URL(doc, link);
  234. String resultString = res.toString();
  235. if (checkedUrls.contains(resultString)) {
  236. return null;
  237. }
  238. result = res;
  239. } catch (MalformedURLException me) {
  240. messages.malformedUrl(doc, link, me);
  241. }
  242. }
  243. }
  244. return result;
  245. }
  246. /** @param link a Link with a url we can handle */
  247. private void checkLink(final Link link) {
  248. if (handleAsRef(link)) {
  249. return;
  250. }
  251. URL url = link.url;
  252. InputStream input = null;
  253. try {
  254. URLConnection connection = url.openConnection();
  255. if (null == connection) {
  256. messages.cantOpenConnection(url);
  257. return;
  258. }
  259. // get bad urls to fail on read before skipping by type
  260. input = connection.getInputStream();
  261. String type = connection.getContentType();
  262. if (null == type) {
  263. messages.noContentType(link);
  264. } else if (!type.toLowerCase().startsWith("text/")) {
  265. messages.notTextContentType(link);
  266. } else {
  267. boolean addingLinks = checkContents.check(url);
  268. parser.parse(
  269. new InputStreamReader(input),
  270. new LinkListener(url, addingLinks), true);
  271. }
  272. } catch (IOException e) {
  273. messages.exceptionReading(link, e);
  274. } finally {
  275. if (null != input) {
  276. try {
  277. input.close();
  278. } catch (IOException e1) {
  279. // ignore
  280. }
  281. }
  282. }
  283. }
  284. /** @return true if link is to an internal ...#name */
  285. private boolean handleAsRef(Link link) {
  286. String ref = link.url.getRef();
  287. if (!LangUtil.isEmpty(ref)) {
  288. String refString = link.url.toString(); // XXX canonicalize?
  289. if (!refsToCheck.contains(refString)) {
  290. refsToCheck.add(refString);
  291. // add pseudo-link to force reading of ref'd doc XXX hmm
  292. int refLoc = refString.indexOf("#");
  293. if (-1 == refLoc) {
  294. messages.uncommentedReference(link);
  295. } else {
  296. refString = refString.substring(0, refLoc);
  297. addLinkToCheck(link.doc, refString);
  298. }
  299. return true;
  300. }
  301. }
  302. return false;
  303. }
  304. /** LinkListener callback */
  305. private boolean addKnownNamedAnchor(URL doc, String name) {
  306. String namedRef = "#" + name;
  307. try {
  308. String ref = new URL(doc + namedRef).toString();
  309. if (!validRefs.contains(ref)) {
  310. validRefs.add(ref);
  311. }
  312. return true;
  313. } catch (MalformedURLException e) {
  314. messages.malformedUrl(doc, namedRef, e);
  315. return false;
  316. }
  317. }
  318. private class Messages {
  319. private final IMessageHandler handler;
  320. private Messages(IMessageHandler handler) {
  321. LangUtil.throwIaxIfNull(handler, "handler");
  322. this.handler = handler;
  323. }
  324. private void info(String label, Object more) {
  325. MessageUtil.info(handler, label + " " + more);
  326. }
  327. private void fail(String label, Object more, Throwable thrown) {
  328. MessageUtil.fail(handler, label + " " + more, thrown);
  329. }
  330. private void uncommentedReference(Link link) {
  331. info("uncommentedReference", link); // XXX bug?
  332. }
  333. // private void addingNullLinkFrom(URL doc) {
  334. // info("addingNullLinkFrom", doc);
  335. // }
  336. //
  337. // private void noContentCheck(Link link) {
  338. // info("noContentCheck", link);
  339. // }
  340. private void notTextContentType(Link link) {
  341. info("notTextContentType", link);
  342. }
  343. private void noContentType(Link link) {
  344. info("noContentType", link);
  345. }
  346. private void checkingLinks(int i) {
  347. info("checkingLinks", new Integer(i));
  348. }
  349. private void checkingLink(Link link) {
  350. info("checkingLink", link);
  351. }
  352. private void acceptingUncheckedLink(URL doc, URL link) {
  353. info("acceptingUncheckedLink", "doc=" + doc + " link=" + link);
  354. }
  355. // private void cantHandleRefsYet(Link link) {
  356. // info("cantHandleRefsYet", link.url);
  357. // }
  358. private void namedReferenceNotFound(String ref) {
  359. // XXX find all references to this unfound named reference
  360. fail("namedReferenceNotFound", ref, null);
  361. }
  362. private void malformedUrl(URL doc, String link, MalformedURLException e) {
  363. fail("malformedUrl", "doc=" + doc + " link=" + link, e);
  364. }
  365. private void cantOpenConnection(URL url) {
  366. fail("cantOpenConnection", url, null);
  367. }
  368. private void exceptionReading(Link link, IOException e) {
  369. // only info if redirect from http to https
  370. String m = e.getMessage();
  371. if ((m != null)
  372. && (-1 != m.indexOf("protocol"))
  373. && (-1 != m.indexOf("https"))
  374. && "http".equals(link.url.getProtocol())) {
  375. info("httpsRedirect", link);
  376. return;
  377. }
  378. fail("exceptionReading", link, e);
  379. }
  380. private void nullLink(URL doc, Tag tag) {
  381. // ignore - many tags do not have links
  382. }
  383. private void emptyLink(URL doc, Tag tag) {
  384. fail("emptyLink", "doc=" + doc + " tag=" + tag, null);
  385. }
  386. }
  387. /**
  388. * Register named anchors and add any hrefs to the links to check.
  389. */
  390. private class LinkListener extends HTMLEditorKit.ParserCallback {
  391. private final URL doc;
  392. private final boolean addingLinks;
  393. private LinkListener(URL doc, boolean addingLinks) {
  394. this.doc = doc;
  395. this.addingLinks = addingLinks;
  396. }
  397. public void handleStartTag(
  398. HTML.Tag tag,
  399. MutableAttributeSet attributes,
  400. int position) {
  401. handleSimpleTag(tag, attributes, position);
  402. }
  403. public void handleSimpleTag(
  404. HTML.Tag tag,
  405. MutableAttributeSet attributes,
  406. int position) { // XXX use position to emit context?
  407. boolean isNameAnchor = registerIfNamedAnchor(tag, attributes);
  408. if (!addingLinks) {
  409. return;
  410. }
  411. Object key = HTML.Tag.FRAME == tag
  412. ? HTML.Attribute.SRC
  413. : HTML.Attribute.HREF;
  414. String link = (String) attributes.getAttribute(key);
  415. if (null == link) {
  416. if (!isNameAnchor) {
  417. messages.nullLink(doc, tag);
  418. }
  419. } else if (0 == link.length()) {
  420. if (!isNameAnchor) {
  421. messages.emptyLink(doc, tag);
  422. }
  423. } else {
  424. addLinkToCheck(doc, link);
  425. }
  426. }
  427. private boolean registerIfNamedAnchor(
  428. HTML.Tag tag,
  429. MutableAttributeSet attributes) {
  430. if (HTML.Tag.A.equals(tag)) {
  431. String name
  432. = (String) attributes.getAttribute(HTML.Attribute.NAME);
  433. if (null != name) {
  434. addKnownNamedAnchor(doc, name);
  435. return true;
  436. }
  437. }
  438. return false;
  439. }
  440. }
  441. private static class Link {
  442. private static final Check FALSE_CHECKER = new Check() {
  443. public boolean check(Link link) { return false; }
  444. public boolean check(URL url) { return false; }
  445. };
  446. private static Check getProtocolChecker(String[] protocols) {
  447. final String[] input
  448. = (String[]) LangUtil.safeCopy(protocols, protocols);
  449. if (0 == input.length) {
  450. return FALSE_CHECKER;
  451. }
  452. return new Check() {
  453. final List list = Arrays.asList(input);
  454. public boolean check(URL url) {
  455. return (null != url) && list.contains(url.getProtocol());
  456. }
  457. };
  458. }
  459. private final URL doc;
  460. private final URL url;
  461. private String toString;
  462. private Link(URL doc, URL url) {
  463. LangUtil.throwIaxIfNull(doc, "doc");
  464. LangUtil.throwIaxIfNull(url, "url");
  465. this.doc = doc;
  466. this.url = url;
  467. }
  468. public boolean equals(Object o) {
  469. if (null == o) {
  470. return false;
  471. }
  472. if (this == o) {
  473. return true;
  474. }
  475. if (Link.class != o.getClass()) {
  476. return false; // exact class
  477. }
  478. Link other = (Link) o;
  479. return doc.equals(other) && url.equals(other);
  480. //return toString().equals(o.toString());
  481. }
  482. public int hashCode() { // XXX
  483. return doc.hashCode() + (url.hashCode() >> 4);
  484. // return toString.hashCode();
  485. }
  486. public String toString() {
  487. if (null == toString) {
  488. toString = url + " linked from " + doc;
  489. }
  490. return toString;
  491. }
  492. private static class Check {
  493. public boolean check(Link link) {
  494. return (null != link) && check(link.url);
  495. }
  496. public boolean check(URL url) {
  497. return (null != url);
  498. }
  499. }
  500. }
  501. }