You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LinkCheck.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /* *******************************************************************
  2. * Copyright (c) 2003 Contributors.
  3. * All rights reserved.
  4. * This program and the accompanying materials are made available
  5. * under the terms of the Eclipse Public License v1.0
  6. * which accompanies this distribution and is available at
  7. * http://www.eclipse.org/legal/epl-v10.html
  8. *
  9. * Contributors:
  10. * Wes Isberg initial implementation
  11. * ******************************************************************/
  12. package org.aspectj.testing.util;
  13. import java.io.FileNotFoundException;
  14. import java.io.FileOutputStream;
  15. import java.io.IOException;
  16. import java.io.InputStream;
  17. import java.io.InputStreamReader;
  18. import java.io.OutputStream;
  19. import java.io.PrintStream;
  20. import java.net.MalformedURLException;
  21. import java.net.URL;
  22. import java.net.URLConnection;
  23. import java.util.ArrayList;
  24. import java.util.Arrays;
  25. import java.util.Iterator;
  26. import java.util.List;
  27. import javax.swing.text.MutableAttributeSet;
  28. import javax.swing.text.html.HTML;
  29. import javax.swing.text.html.HTML.Tag;
  30. import javax.swing.text.html.HTMLEditorKit;
  31. import org.aspectj.bridge.IMessage;
  32. import org.aspectj.bridge.IMessageHandler;
  33. import org.aspectj.bridge.MessageHandler;
  34. import org.aspectj.bridge.MessageUtil;
  35. import org.aspectj.util.LangUtil;
  36. //import org.aspectj.util.FileUtil;
  37. /**
  38. * Quick and dirty link checker.
  39. * This checks that links into file: and http: targets work,
  40. * and that links out of file: targets work.
  41. */
  42. public class LinkCheck {
  43. /*
  44. * Known issues:
  45. * - main interface only, though API's easily upgraded
  46. * - https MalformedUrlExceptions on redirect
  47. * - Swing won't quit without System.exit
  48. * - single-threaded
  49. */
  50. static final URL COMMAND_LINE;
  51. static {
  52. URL commandLine = null;
  53. try {
  54. commandLine = new URL("file://commandLine");
  55. } catch (Throwable t) {
  56. }
  57. COMMAND_LINE = commandLine;
  58. }
  59. /** @param args file {-logFile {file} | -printInfo } */
  60. public static void main(String[] args) {
  61. final String syntax = "java "
  62. + LinkCheck.class.getName()
  63. + " file {-log <file> | -printInfo}..";
  64. if ((null == args) || (0 >= args.length)) {
  65. System.err.println(syntax);
  66. System.exit(1);
  67. }
  68. final String startingURL = "file:///" + args[0].replace('\\', '/');
  69. String logFile = null;
  70. boolean printInfo = false;
  71. for (int i = 1; i < args.length; i++) {
  72. if ("-log".equals(args[i]) && ((i+1) < args.length)) {
  73. logFile = args[++i];
  74. } else if ("-printInfo".equals(args[i])) {
  75. printInfo = true;
  76. } else {
  77. System.err.println(syntax);
  78. System.exit(1);
  79. }
  80. }
  81. final boolean useSystemOut = (null == logFile);
  82. final MessageHandler mh;
  83. final OutputStream out;
  84. if (useSystemOut) {
  85. mh = new MessageHandler();
  86. out = null;
  87. } else {
  88. try {
  89. out = new FileOutputStream(logFile);
  90. } catch (FileNotFoundException e) {
  91. e.printStackTrace();
  92. return;
  93. }
  94. final PrintStream ps = new PrintStream(out, true);
  95. final boolean printAll = printInfo;
  96. mh = new MessageHandler() {
  97. public boolean handleMessage(IMessage message) {
  98. if (printAll || !message.isInfo()) {
  99. ps.println(message.toString());
  100. }
  101. return super.handleMessage(message);
  102. }
  103. };
  104. }
  105. Link.Check exists
  106. = Link.getProtocolChecker(new String[] {"file", "http"});
  107. Link.Check contents
  108. = Link.getProtocolChecker(new String[] {"file"});
  109. LinkCheck me = new LinkCheck(mh, exists, contents);
  110. me.addLinkToCheck(COMMAND_LINE, startingURL); // pwd as base?
  111. try {
  112. String label = "checking URLs from " + startingURL;
  113. if (useSystemOut) {
  114. System.out.println(label);
  115. }
  116. MessageUtil.info("START " + label);
  117. long start = System.currentTimeMillis();
  118. me.run();
  119. long duration = (System.currentTimeMillis() - start)/1000;
  120. long numChecked = me.checkedUrls.size();
  121. if (numChecked > 0) {
  122. float cps = (float) duration / (float) numChecked;
  123. StringBuffer sb = new StringBuffer();
  124. sb.append("DONE. Checked " + numChecked);
  125. sb.append(" URL's in " + duration);
  126. sb.append(" seconds (" + cps);
  127. sb.append(" seconds per URL).");
  128. MessageUtil.info("END " + label + ": " + sb);
  129. if (useSystemOut) {
  130. System.out.println(sb.toString());
  131. }
  132. }
  133. MessageUtil.info(MessageUtil.renderCounts(mh));
  134. try {
  135. if (null != out) {
  136. out.flush();
  137. }
  138. } catch (IOException e) {
  139. // ignore
  140. }
  141. if (useSystemOut && (null != logFile)) {
  142. System.out.println("Find log in " + logFile);
  143. }
  144. } finally {
  145. if (null != out) {
  146. try {
  147. out.close();
  148. } catch (IOException e1) {
  149. }
  150. }
  151. System.exit(mh.numMessages(IMessage.ERROR, true)); // XXX dumb swing
  152. }
  153. }
  154. // private static boolean isCheckedFileType(URL url) {
  155. // if (null == url) {
  156. // return false;
  157. // }
  158. // String file = url.getFile();
  159. // return !FileUtil.hasZipSuffix(file)
  160. // && !file.endsWith(".pdf");
  161. // }
  162. private final Messages messages;
  163. private final HTMLEditorKit.Parser parser; // XXX untested - stateful
  164. private final ArrayList<Link> linksToCheck;
  165. private final ArrayList<String> checkedUrls; // String (URL.toString)
  166. private final ArrayList<String> validRefs; // String (URL.toString)
  167. private final ArrayList<String> refsToCheck; // String (URL.toString)
  168. private final Link.Check checkExists;
  169. private final Link.Check checkContents;
  170. public LinkCheck(IMessageHandler handler,
  171. Link.Check checkExists,
  172. Link.Check checkContents) {
  173. LangUtil.throwIaxIfNull(handler, "handler");
  174. LangUtil.throwIaxIfNull(checkExists, "checkExists");
  175. LangUtil.throwIaxIfNull(checkContents, "checkContents");
  176. this.messages = new Messages(handler);
  177. linksToCheck = new ArrayList<Link>();
  178. checkedUrls = new ArrayList<String>();
  179. refsToCheck = new ArrayList<String>();
  180. validRefs = new ArrayList<String>();
  181. parser = new HTMLEditorKit() {
  182. public HTMLEditorKit.Parser getParser() {
  183. return super.getParser();
  184. }
  185. }
  186. .getParser();
  187. this.checkExists = checkExists;
  188. this.checkContents = checkContents;
  189. }
  190. public synchronized void addLinkToCheck(URL doc, String link) {
  191. URL linkURL = makeURL(doc, link);
  192. if (null == linkURL) {
  193. // messages.addingNullLinkFrom(doc);
  194. return;
  195. }
  196. String linkString = linkURL.toString();
  197. if ((null != link) && !checkedUrls.contains(linkString) ) {
  198. if (!checkExists.check(linkURL)) {
  199. checkedUrls.add(linkString);
  200. messages.acceptingUncheckedLink(doc, linkURL);
  201. } else {
  202. Link toAdd = new Link(doc, linkURL);
  203. if (!linksToCheck.contains(toAdd)) { // equals overridden
  204. linksToCheck.add(toAdd);
  205. }
  206. }
  207. }
  208. }
  209. public synchronized void run() {
  210. ArrayList<Link> list = new ArrayList<Link>();
  211. while (0 < linksToCheck.size()) {
  212. messages.checkingLinks(linksToCheck.size());
  213. list.clear();
  214. list.addAll(linksToCheck);
  215. for (final Link link : list) {
  216. String urlString = link.url.toString();
  217. if (!checkedUrls.contains(urlString)) {
  218. checkedUrls.add(urlString);
  219. messages.checkingLink(link);
  220. checkLink(link);
  221. }
  222. }
  223. linksToCheck.removeAll(list);
  224. }
  225. // now check that all named references are accounted for
  226. for (String ref : refsToCheck) {
  227. if (!validRefs.contains(ref)) {
  228. messages.namedReferenceNotFound(ref);
  229. }
  230. }
  231. }
  232. /** @return null if link known or if unable to create */
  233. private URL makeURL(URL doc, String link) {
  234. if (checkedUrls.contains(link)) {
  235. return null;
  236. }
  237. URL result = null;
  238. try {
  239. result = new URL(link);
  240. } catch (MalformedURLException e) {
  241. if (null == doc) {
  242. messages.malformedUrl(null, link, e);
  243. } else {
  244. try {
  245. URL res = new URL(doc, link);
  246. String resultString = res.toString();
  247. if (checkedUrls.contains(resultString)) {
  248. return null;
  249. }
  250. result = res;
  251. } catch (MalformedURLException me) {
  252. messages.malformedUrl(doc, link, me);
  253. }
  254. }
  255. }
  256. return result;
  257. }
  258. /** @param link a Link with a url we can handle */
  259. private void checkLink(final Link link) {
  260. if (handleAsRef(link)) {
  261. return;
  262. }
  263. URL url = link.url;
  264. InputStream input = null;
  265. try {
  266. URLConnection connection = url.openConnection();
  267. if (null == connection) {
  268. messages.cantOpenConnection(url);
  269. return;
  270. }
  271. // get bad urls to fail on read before skipping by type
  272. input = connection.getInputStream();
  273. String type = connection.getContentType();
  274. if (null == type) {
  275. messages.noContentType(link);
  276. } else if (!type.toLowerCase().startsWith("text/")) {
  277. messages.notTextContentType(link);
  278. } else {
  279. boolean addingLinks = checkContents.check(url);
  280. parser.parse(
  281. new InputStreamReader(input),
  282. new LinkListener(url, addingLinks), true);
  283. }
  284. } catch (IOException e) {
  285. messages.exceptionReading(link, e);
  286. } finally {
  287. if (null != input) {
  288. try {
  289. input.close();
  290. } catch (IOException e1) {
  291. // ignore
  292. }
  293. }
  294. }
  295. }
  296. /** @return true if link is to an internal ...#name */
  297. private boolean handleAsRef(Link link) {
  298. String ref = link.url.getRef();
  299. if (!LangUtil.isEmpty(ref)) {
  300. String refString = link.url.toString(); // XXX canonicalize?
  301. if (!refsToCheck.contains(refString)) {
  302. refsToCheck.add(refString);
  303. // add pseudo-link to force reading of ref'd doc XXX hmm
  304. int refLoc = refString.indexOf("#");
  305. if (-1 == refLoc) {
  306. messages.uncommentedReference(link);
  307. } else {
  308. refString = refString.substring(0, refLoc);
  309. addLinkToCheck(link.doc, refString);
  310. }
  311. return true;
  312. }
  313. }
  314. return false;
  315. }
  316. /** LinkListener callback */
  317. private boolean addKnownNamedAnchor(URL doc, String name) {
  318. String namedRef = "#" + name;
  319. try {
  320. String ref = new URL(doc + namedRef).toString();
  321. if (!validRefs.contains(ref)) {
  322. validRefs.add(ref);
  323. }
  324. return true;
  325. } catch (MalformedURLException e) {
  326. messages.malformedUrl(doc, namedRef, e);
  327. return false;
  328. }
  329. }
  330. private class Messages {
  331. private final IMessageHandler handler;
  332. private Messages(IMessageHandler handler) {
  333. LangUtil.throwIaxIfNull(handler, "handler");
  334. this.handler = handler;
  335. }
  336. private void info(String label, Object more) {
  337. MessageUtil.info(handler, label + " " + more);
  338. }
  339. private void fail(String label, Object more, Throwable thrown) {
  340. MessageUtil.fail(handler, label + " " + more, thrown);
  341. }
  342. private void uncommentedReference(Link link) {
  343. info("uncommentedReference", link); // XXX bug?
  344. }
  345. // private void addingNullLinkFrom(URL doc) {
  346. // info("addingNullLinkFrom", doc);
  347. // }
  348. //
  349. // private void noContentCheck(Link link) {
  350. // info("noContentCheck", link);
  351. // }
  352. private void notTextContentType(Link link) {
  353. info("notTextContentType", link);
  354. }
  355. private void noContentType(Link link) {
  356. info("noContentType", link);
  357. }
  358. private void checkingLinks(int i) {
  359. info("checkingLinks", new Integer(i));
  360. }
  361. private void checkingLink(Link link) {
  362. info("checkingLink", link);
  363. }
  364. private void acceptingUncheckedLink(URL doc, URL link) {
  365. info("acceptingUncheckedLink", "doc=" + doc + " link=" + link);
  366. }
  367. // private void cantHandleRefsYet(Link link) {
  368. // info("cantHandleRefsYet", link.url);
  369. // }
  370. private void namedReferenceNotFound(String ref) {
  371. // XXX find all references to this unfound named reference
  372. fail("namedReferenceNotFound", ref, null);
  373. }
  374. private void malformedUrl(URL doc, String link, MalformedURLException e) {
  375. fail("malformedUrl", "doc=" + doc + " link=" + link, e);
  376. }
  377. private void cantOpenConnection(URL url) {
  378. fail("cantOpenConnection", url, null);
  379. }
  380. private void exceptionReading(Link link, IOException e) {
  381. // only info if redirect from http to https
  382. String m = e.getMessage();
  383. if ((m != null)
  384. && (m.contains("protocol"))
  385. && (m.contains("https"))
  386. && "http".equals(link.url.getProtocol())) {
  387. info("httpsRedirect", link);
  388. return;
  389. }
  390. fail("exceptionReading", link, e);
  391. }
  392. private void nullLink(URL doc, Tag tag) {
  393. // ignore - many tags do not have links
  394. }
  395. private void emptyLink(URL doc, Tag tag) {
  396. fail("emptyLink", "doc=" + doc + " tag=" + tag, null);
  397. }
  398. }
  399. /**
  400. * Register named anchors and add any hrefs to the links to check.
  401. */
  402. private class LinkListener extends HTMLEditorKit.ParserCallback {
  403. private final URL doc;
  404. private final boolean addingLinks;
  405. private LinkListener(URL doc, boolean addingLinks) {
  406. this.doc = doc;
  407. this.addingLinks = addingLinks;
  408. }
  409. public void handleStartTag(
  410. HTML.Tag tag,
  411. MutableAttributeSet attributes,
  412. int position) {
  413. handleSimpleTag(tag, attributes, position);
  414. }
  415. public void handleSimpleTag(
  416. HTML.Tag tag,
  417. MutableAttributeSet attributes,
  418. int position) { // XXX use position to emit context?
  419. boolean isNameAnchor = registerIfNamedAnchor(tag, attributes);
  420. if (!addingLinks) {
  421. return;
  422. }
  423. Object key = HTML.Tag.FRAME == tag
  424. ? HTML.Attribute.SRC
  425. : HTML.Attribute.HREF;
  426. String link = (String) attributes.getAttribute(key);
  427. if (null == link) {
  428. if (!isNameAnchor) {
  429. messages.nullLink(doc, tag);
  430. }
  431. } else if (0 == link.length()) {
  432. if (!isNameAnchor) {
  433. messages.emptyLink(doc, tag);
  434. }
  435. } else {
  436. addLinkToCheck(doc, link);
  437. }
  438. }
  439. private boolean registerIfNamedAnchor(
  440. HTML.Tag tag,
  441. MutableAttributeSet attributes) {
  442. if (HTML.Tag.A.equals(tag)) {
  443. String name
  444. = (String) attributes.getAttribute(HTML.Attribute.NAME);
  445. if (null != name) {
  446. addKnownNamedAnchor(doc, name);
  447. return true;
  448. }
  449. }
  450. return false;
  451. }
  452. }
  453. private static class Link {
  454. private static final Check FALSE_CHECKER = new Check() {
  455. public boolean check(Link link) { return false; }
  456. public boolean check(URL url) { return false; }
  457. };
  458. private static Check getProtocolChecker(String[] protocols) {
  459. final String[] input
  460. = (String[]) LangUtil.safeCopy(protocols, protocols);
  461. if (0 == input.length) {
  462. return FALSE_CHECKER;
  463. }
  464. return new Check() {
  465. final List list = Arrays.asList(input);
  466. public boolean check(URL url) {
  467. return (null != url) && list.contains(url.getProtocol());
  468. }
  469. };
  470. }
  471. private final URL doc;
  472. private final URL url;
  473. private String toString;
  474. private Link(URL doc, URL url) {
  475. LangUtil.throwIaxIfNull(doc, "doc");
  476. LangUtil.throwIaxIfNull(url, "url");
  477. this.doc = doc;
  478. this.url = url;
  479. }
  480. public boolean equals(Object o) {
  481. if (null == o) {
  482. return false;
  483. }
  484. if (this == o) {
  485. return true;
  486. }
  487. if (Link.class != o.getClass()) {
  488. return false; // exact class
  489. }
  490. Link other = (Link) o;
  491. return doc.equals(other) && url.equals(other);
  492. //return toString().equals(o.toString());
  493. }
  494. public int hashCode() { // XXX
  495. return doc.hashCode() + (url.hashCode() >> 4);
  496. // return toString.hashCode();
  497. }
  498. public String toString() {
  499. if (null == toString) {
  500. toString = url + " linked from " + doc;
  501. }
  502. return toString;
  503. }
  504. private static class Check {
  505. public boolean check(Link link) {
  506. return (null != link) && check(link.url);
  507. }
  508. public boolean check(URL url) {
  509. return (null != url);
  510. }
  511. }
  512. }
  513. }