You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LinkCheck.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /* *******************************************************************
  2. * Copyright (c) 2003 Contributors.
  3. * All rights reserved.
  4. * This program and the accompanying materials are made available
  5. * under the terms of the Eclipse Public License v1.0
  6. * which accompanies this distribution and is available at
  7. * http://www.eclipse.org/legal/epl-v10.html
  8. *
  9. * Contributors:
  10. * Wes Isberg initial implementation
  11. * ******************************************************************/
  12. package org.aspectj.testing.util;
  13. import java.io.FileNotFoundException;
  14. import java.io.FileOutputStream;
  15. import java.io.IOException;
  16. import java.io.InputStream;
  17. import java.io.InputStreamReader;
  18. import java.io.OutputStream;
  19. import java.io.PrintStream;
  20. import java.net.MalformedURLException;
  21. import java.net.URL;
  22. import java.net.URLConnection;
  23. import java.util.ArrayList;
  24. import java.util.Arrays;
  25. import java.util.Iterator;
  26. import java.util.List;
  27. import javax.swing.text.MutableAttributeSet;
  28. import javax.swing.text.html.HTML;
  29. import javax.swing.text.html.HTML.Tag;
  30. import javax.swing.text.html.HTMLEditorKit;
  31. import org.aspectj.bridge.IMessage;
  32. import org.aspectj.bridge.IMessageHandler;
  33. import org.aspectj.bridge.MessageHandler;
  34. import org.aspectj.bridge.MessageUtil;
  35. import org.aspectj.util.LangUtil;
  36. //import org.aspectj.util.FileUtil;
  37. /**
  38. * Quick and dirty link checker.
  39. * This checks that links into file: and http: targets work,
  40. * and that links out of file: targets work.
  41. */
  42. public class LinkCheck {
  43. /*
  44. * Known issues:
  45. * - main interface only, though API's easily upgraded
  46. * - https MalformedUrlExceptions on redirect
  47. * - Swing won't quit without System.exit
  48. * - single-threaded
  49. */
  50. static final URL COMMAND_LINE;
  51. static {
  52. URL commandLine = null;
  53. try {
  54. commandLine = new URL("file://commandLine");
  55. } catch (Throwable t) {
  56. }
  57. COMMAND_LINE = commandLine;
  58. }
  59. /** @param args file {-logFile {file} | -printInfo } */
  60. public static void main(String[] args) {
  61. final String syntax = "java "
  62. + LinkCheck.class.getName()
  63. + " file {-log <file> | -printInfo}..";
  64. if ((null == args) || (0 >= args.length)) {
  65. System.err.println(syntax);
  66. System.exit(1);
  67. }
  68. final String startingURL = "file:///" + args[0].replace('\\', '/');
  69. String logFile = null;
  70. boolean printInfo = false;
  71. for (int i = 1; i < args.length; i++) {
  72. if ("-log".equals(args[i]) && ((i+1) < args.length)) {
  73. logFile = args[++i];
  74. } else if ("-printInfo".equals(args[i])) {
  75. printInfo = true;
  76. } else {
  77. System.err.println(syntax);
  78. System.exit(1);
  79. }
  80. }
  81. final boolean useSystemOut = (null == logFile);
  82. final MessageHandler mh;
  83. final OutputStream out;
  84. if (useSystemOut) {
  85. mh = new MessageHandler();
  86. out = null;
  87. } else {
  88. try {
  89. out = new FileOutputStream(logFile);
  90. } catch (FileNotFoundException e) {
  91. e.printStackTrace();
  92. return;
  93. }
  94. final PrintStream ps = new PrintStream(out, true);
  95. final boolean printAll = printInfo;
  96. mh = new MessageHandler() {
  97. public boolean handleMessage(IMessage message) {
  98. if (printAll || !message.isInfo()) {
  99. ps.println(message.toString());
  100. }
  101. return super.handleMessage(message);
  102. }
  103. };
  104. }
  105. Link.Check exists
  106. = Link.getProtocolChecker(new String[] {"file", "http"});
  107. Link.Check contents
  108. = Link.getProtocolChecker(new String[] {"file"});
  109. LinkCheck me = new LinkCheck(mh, exists, contents);
  110. me.addLinkToCheck(COMMAND_LINE, startingURL); // pwd as base?
  111. try {
  112. String label = "checking URLs from " + startingURL;
  113. if (useSystemOut) {
  114. System.out.println(label);
  115. }
  116. MessageUtil.info("START " + label);
  117. long start = System.currentTimeMillis();
  118. me.run();
  119. long duration = (System.currentTimeMillis() - start)/1000;
  120. long numChecked = me.checkedUrls.size();
  121. if (numChecked > 0) {
  122. float cps = (float) duration / (float) numChecked;
  123. StringBuffer sb = new StringBuffer();
  124. sb.append("DONE. Checked " + numChecked);
  125. sb.append(" URL's in " + duration);
  126. sb.append(" seconds (" + cps);
  127. sb.append(" seconds per URL).");
  128. MessageUtil.info("END " + label + ": " + sb);
  129. if (useSystemOut) {
  130. System.out.println(sb.toString());
  131. }
  132. }
  133. MessageUtil.info(MessageUtil.renderCounts(mh));
  134. try {
  135. if (null != out) {
  136. out.flush();
  137. }
  138. } catch (IOException e) {
  139. // ignore
  140. }
  141. if (useSystemOut && (null != logFile)) {
  142. System.out.println("Find log in " + logFile);
  143. }
  144. } finally {
  145. if (null != out) {
  146. try {
  147. out.close();
  148. } catch (IOException e1) {
  149. }
  150. }
  151. System.exit(mh.numMessages(IMessage.ERROR, true)); // XXX dumb swing
  152. }
  153. }
  154. // private static boolean isCheckedFileType(URL url) {
  155. // if (null == url) {
  156. // return false;
  157. // }
  158. // String file = url.getFile();
  159. // return !FileUtil.hasZipSuffix(file)
  160. // && !file.endsWith(".pdf");
  161. // }
  162. private final Messages messages;
  163. private final HTMLEditorKit.Parser parser; // XXX untested - stateful
  164. private final ArrayList<Link> linksToCheck;
  165. private final ArrayList<String> checkedUrls; // String (URL.toString)
  166. private final ArrayList<String> validRefs; // String (URL.toString)
  167. private final ArrayList<String> refsToCheck; // String (URL.toString)
  168. private final Link.Check checkExists;
  169. private final Link.Check checkContents;
  170. public LinkCheck(IMessageHandler handler,
  171. Link.Check checkExists,
  172. Link.Check checkContents) {
  173. LangUtil.throwIaxIfNull(handler, "handler");
  174. LangUtil.throwIaxIfNull(checkExists, "checkExists");
  175. LangUtil.throwIaxIfNull(checkContents, "checkContents");
  176. this.messages = new Messages(handler);
  177. linksToCheck = new ArrayList<Link>();
  178. checkedUrls = new ArrayList<String>();
  179. refsToCheck = new ArrayList<String>();
  180. validRefs = new ArrayList<String>();
  181. parser = new HTMLEditorKit() {
  182. public HTMLEditorKit.Parser getParser() {
  183. return super.getParser();
  184. }
  185. }
  186. .getParser();
  187. this.checkExists = checkExists;
  188. this.checkContents = checkContents;
  189. }
  190. public synchronized void addLinkToCheck(URL doc, String link) {
  191. URL linkURL = makeURL(doc, link);
  192. if (null == linkURL) {
  193. // messages.addingNullLinkFrom(doc);
  194. return;
  195. }
  196. String linkString = linkURL.toString();
  197. if ((null != link) && !checkedUrls.contains(linkString) ) {
  198. if (!checkExists.check(linkURL)) {
  199. checkedUrls.add(linkString);
  200. messages.acceptingUncheckedLink(doc, linkURL);
  201. } else {
  202. Link toAdd = new Link(doc, linkURL);
  203. if (!linksToCheck.contains(toAdd)) { // equals overridden
  204. linksToCheck.add(toAdd);
  205. }
  206. }
  207. }
  208. }
  209. public synchronized void run() {
  210. ArrayList<Link> list = new ArrayList<Link>();
  211. while (0 < linksToCheck.size()) {
  212. messages.checkingLinks(linksToCheck.size());
  213. list.clear();
  214. list.addAll(linksToCheck);
  215. for (Iterator<Link> iter = list.iterator(); iter.hasNext();) {
  216. final Link link = iter.next();
  217. String urlString = link.url.toString();
  218. if (!checkedUrls.contains(urlString)) {
  219. checkedUrls.add(urlString);
  220. messages.checkingLink(link);
  221. checkLink(link);
  222. }
  223. }
  224. linksToCheck.removeAll(list);
  225. }
  226. // now check that all named references are accounted for
  227. for (Iterator<String> iter = refsToCheck.iterator(); iter.hasNext();) {
  228. String ref = iter.next();
  229. if (!validRefs.contains(ref)) {
  230. messages.namedReferenceNotFound(ref);
  231. }
  232. }
  233. }
  234. /** @return null if link known or if unable to create */
  235. private URL makeURL(URL doc, String link) {
  236. if (checkedUrls.contains(link)) {
  237. return null;
  238. }
  239. URL result = null;
  240. try {
  241. result = new URL(link);
  242. } catch (MalformedURLException e) {
  243. if (null == doc) {
  244. messages.malformedUrl(null, link, e);
  245. } else {
  246. try {
  247. URL res = new URL(doc, link);
  248. String resultString = res.toString();
  249. if (checkedUrls.contains(resultString)) {
  250. return null;
  251. }
  252. result = res;
  253. } catch (MalformedURLException me) {
  254. messages.malformedUrl(doc, link, me);
  255. }
  256. }
  257. }
  258. return result;
  259. }
  260. /** @param link a Link with a url we can handle */
  261. private void checkLink(final Link link) {
  262. if (handleAsRef(link)) {
  263. return;
  264. }
  265. URL url = link.url;
  266. InputStream input = null;
  267. try {
  268. URLConnection connection = url.openConnection();
  269. if (null == connection) {
  270. messages.cantOpenConnection(url);
  271. return;
  272. }
  273. // get bad urls to fail on read before skipping by type
  274. input = connection.getInputStream();
  275. String type = connection.getContentType();
  276. if (null == type) {
  277. messages.noContentType(link);
  278. } else if (!type.toLowerCase().startsWith("text/")) {
  279. messages.notTextContentType(link);
  280. } else {
  281. boolean addingLinks = checkContents.check(url);
  282. parser.parse(
  283. new InputStreamReader(input),
  284. new LinkListener(url, addingLinks), true);
  285. }
  286. } catch (IOException e) {
  287. messages.exceptionReading(link, e);
  288. } finally {
  289. if (null != input) {
  290. try {
  291. input.close();
  292. } catch (IOException e1) {
  293. // ignore
  294. }
  295. }
  296. }
  297. }
  298. /** @return true if link is to an internal ...#name */
  299. private boolean handleAsRef(Link link) {
  300. String ref = link.url.getRef();
  301. if (!LangUtil.isEmpty(ref)) {
  302. String refString = link.url.toString(); // XXX canonicalize?
  303. if (!refsToCheck.contains(refString)) {
  304. refsToCheck.add(refString);
  305. // add pseudo-link to force reading of ref'd doc XXX hmm
  306. int refLoc = refString.indexOf("#");
  307. if (-1 == refLoc) {
  308. messages.uncommentedReference(link);
  309. } else {
  310. refString = refString.substring(0, refLoc);
  311. addLinkToCheck(link.doc, refString);
  312. }
  313. return true;
  314. }
  315. }
  316. return false;
  317. }
  318. /** LinkListener callback */
  319. private boolean addKnownNamedAnchor(URL doc, String name) {
  320. String namedRef = "#" + name;
  321. try {
  322. String ref = new URL(doc + namedRef).toString();
  323. if (!validRefs.contains(ref)) {
  324. validRefs.add(ref);
  325. }
  326. return true;
  327. } catch (MalformedURLException e) {
  328. messages.malformedUrl(doc, namedRef, e);
  329. return false;
  330. }
  331. }
  332. private class Messages {
  333. private final IMessageHandler handler;
  334. private Messages(IMessageHandler handler) {
  335. LangUtil.throwIaxIfNull(handler, "handler");
  336. this.handler = handler;
  337. }
  338. private void info(String label, Object more) {
  339. MessageUtil.info(handler, label + " " + more);
  340. }
  341. private void fail(String label, Object more, Throwable thrown) {
  342. MessageUtil.fail(handler, label + " " + more, thrown);
  343. }
  344. private void uncommentedReference(Link link) {
  345. info("uncommentedReference", link); // XXX bug?
  346. }
  347. // private void addingNullLinkFrom(URL doc) {
  348. // info("addingNullLinkFrom", doc);
  349. // }
  350. //
  351. // private void noContentCheck(Link link) {
  352. // info("noContentCheck", link);
  353. // }
  354. private void notTextContentType(Link link) {
  355. info("notTextContentType", link);
  356. }
  357. private void noContentType(Link link) {
  358. info("noContentType", link);
  359. }
  360. private void checkingLinks(int i) {
  361. info("checkingLinks", new Integer(i));
  362. }
  363. private void checkingLink(Link link) {
  364. info("checkingLink", link);
  365. }
  366. private void acceptingUncheckedLink(URL doc, URL link) {
  367. info("acceptingUncheckedLink", "doc=" + doc + " link=" + link);
  368. }
  369. // private void cantHandleRefsYet(Link link) {
  370. // info("cantHandleRefsYet", link.url);
  371. // }
  372. private void namedReferenceNotFound(String ref) {
  373. // XXX find all references to this unfound named reference
  374. fail("namedReferenceNotFound", ref, null);
  375. }
  376. private void malformedUrl(URL doc, String link, MalformedURLException e) {
  377. fail("malformedUrl", "doc=" + doc + " link=" + link, e);
  378. }
  379. private void cantOpenConnection(URL url) {
  380. fail("cantOpenConnection", url, null);
  381. }
  382. private void exceptionReading(Link link, IOException e) {
  383. // only info if redirect from http to https
  384. String m = e.getMessage();
  385. if ((m != null)
  386. && (-1 != m.indexOf("protocol"))
  387. && (-1 != m.indexOf("https"))
  388. && "http".equals(link.url.getProtocol())) {
  389. info("httpsRedirect", link);
  390. return;
  391. }
  392. fail("exceptionReading", link, e);
  393. }
  394. private void nullLink(URL doc, Tag tag) {
  395. // ignore - many tags do not have links
  396. }
  397. private void emptyLink(URL doc, Tag tag) {
  398. fail("emptyLink", "doc=" + doc + " tag=" + tag, null);
  399. }
  400. }
  401. /**
  402. * Register named anchors and add any hrefs to the links to check.
  403. */
  404. private class LinkListener extends HTMLEditorKit.ParserCallback {
  405. private final URL doc;
  406. private final boolean addingLinks;
  407. private LinkListener(URL doc, boolean addingLinks) {
  408. this.doc = doc;
  409. this.addingLinks = addingLinks;
  410. }
  411. public void handleStartTag(
  412. HTML.Tag tag,
  413. MutableAttributeSet attributes,
  414. int position) {
  415. handleSimpleTag(tag, attributes, position);
  416. }
  417. public void handleSimpleTag(
  418. HTML.Tag tag,
  419. MutableAttributeSet attributes,
  420. int position) { // XXX use position to emit context?
  421. boolean isNameAnchor = registerIfNamedAnchor(tag, attributes);
  422. if (!addingLinks) {
  423. return;
  424. }
  425. Object key = HTML.Tag.FRAME == tag
  426. ? HTML.Attribute.SRC
  427. : HTML.Attribute.HREF;
  428. String link = (String) attributes.getAttribute(key);
  429. if (null == link) {
  430. if (!isNameAnchor) {
  431. messages.nullLink(doc, tag);
  432. }
  433. } else if (0 == link.length()) {
  434. if (!isNameAnchor) {
  435. messages.emptyLink(doc, tag);
  436. }
  437. } else {
  438. addLinkToCheck(doc, link);
  439. }
  440. }
  441. private boolean registerIfNamedAnchor(
  442. HTML.Tag tag,
  443. MutableAttributeSet attributes) {
  444. if (HTML.Tag.A.equals(tag)) {
  445. String name
  446. = (String) attributes.getAttribute(HTML.Attribute.NAME);
  447. if (null != name) {
  448. addKnownNamedAnchor(doc, name);
  449. return true;
  450. }
  451. }
  452. return false;
  453. }
  454. }
  455. private static class Link {
  456. private static final Check FALSE_CHECKER = new Check() {
  457. public boolean check(Link link) { return false; }
  458. public boolean check(URL url) { return false; }
  459. };
  460. private static Check getProtocolChecker(String[] protocols) {
  461. final String[] input
  462. = (String[]) LangUtil.safeCopy(protocols, protocols);
  463. if (0 == input.length) {
  464. return FALSE_CHECKER;
  465. }
  466. return new Check() {
  467. final List list = Arrays.asList(input);
  468. public boolean check(URL url) {
  469. return (null != url) && list.contains(url.getProtocol());
  470. }
  471. };
  472. }
  473. private final URL doc;
  474. private final URL url;
  475. private String toString;
  476. private Link(URL doc, URL url) {
  477. LangUtil.throwIaxIfNull(doc, "doc");
  478. LangUtil.throwIaxIfNull(url, "url");
  479. this.doc = doc;
  480. this.url = url;
  481. }
  482. public boolean equals(Object o) {
  483. if (null == o) {
  484. return false;
  485. }
  486. if (this == o) {
  487. return true;
  488. }
  489. if (Link.class != o.getClass()) {
  490. return false; // exact class
  491. }
  492. Link other = (Link) o;
  493. return doc.equals(other) && url.equals(other);
  494. //return toString().equals(o.toString());
  495. }
  496. public int hashCode() { // XXX
  497. return doc.hashCode() + (url.hashCode() >> 4);
  498. // return toString.hashCode();
  499. }
  500. public String toString() {
  501. if (null == toString) {
  502. toString = url + " linked from " + doc;
  503. }
  504. return toString;
  505. }
  506. private static class Check {
  507. public boolean check(Link link) {
  508. return (null != link) && check(link.url);
  509. }
  510. public boolean check(URL url) {
  511. return (null != url);
  512. }
  513. }
  514. }
  515. }