--- /dev/null
+/* *******************************************************************
+ * Copyright (c) 2003 Contributors.
+ * All rights reserved.
+ * This program and the accompanying materials are made available
+ * under the terms of the Common Public License v1.0
+ * which accompanies this distribution and is available at
+ * http://www.eclipse.org/legal/cpl-v10.html
+ *
+ * Contributors:
+ * Wes Isberg initial implementation
+ * ******************************************************************/
+
+package org.aspectj.testing.util;
+
+import java.io.*;
+import java.net.*;
+import java.util.*;
+
+import javax.swing.text.MutableAttributeSet;
+import javax.swing.text.html.*;
+import javax.swing.text.html.HTML.Tag;
+
+import org.aspectj.bridge.*;
+import org.aspectj.bridge.IMessageHandler;
+import org.aspectj.util.LangUtil;
+import org.aspectj.util.FileUtil;
+
+/**
+ * Quick and dirty link checker.
+ * This checks that links into file: and http: targets work,
+ * and that links out of file: targets work.
+ */
+public class LinkCheck {
+ /*
+ * Known issues:
+ * - main interface only, though API's easily upgraded
+ * - https MalformedUrlExceptions on redirect
+ * - Swing won't quit without System.exit
+ * - single-threaded
+ */
+ static final URL COMMAND_LINE;
+ static {
+ URL commandLine = null;
+ try {
+ commandLine = new URL("file://commandLine");
+ } catch (Throwable t) {
+ }
+ COMMAND_LINE = commandLine;
+ }
+
+ /** @param args file {-logFile {file} | -printInfo } */
+ public static void main(String[] args) {
+ final String syntax = "java "
+ + LinkCheck.class.getName()
+ + " file {-log <file> | -printInfo}..";
+ if ((null == args) || (0 >= args.length)) {
+ System.err.println(syntax);
+ System.exit(1);
+ }
+ final String startingURL = "file:///" + args[0].replace('\\', '/');
+ String logFile = null;
+ boolean printInfo = false;
+ for (int i = 1; i < args.length; i++) {
+ if ("-log".equals(args[i]) && ((i+1) < args.length)) {
+ logFile = args[++i];
+ } else if ("-printInfo".equals(args[i])) {
+ printInfo = true;
+ } else {
+ System.err.println(syntax);
+ System.exit(1);
+ }
+ }
+ final boolean useSystemOut = (null == logFile);
+ final MessageHandler mh;
+ final OutputStream out;
+ if (useSystemOut) {
+ mh = new MessageHandler();
+ out = null;
+ } else {
+
+ try {
+ out = new FileOutputStream(logFile);
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ return;
+ }
+ final PrintStream ps = new PrintStream(out, true);
+ final boolean printAll = printInfo;
+ mh = new MessageHandler() {
+ public boolean handleMessage(IMessage message) {
+ if (printAll || !message.isInfo()) {
+ ps.println(message.toString());
+ }
+ return super.handleMessage(message);
+ }
+
+ };
+ }
+ Link.Check exists
+ = Link.getProtocolChecker(new String[] {"file", "http"});
+ Link.Check contents
+ = Link.getProtocolChecker(new String[] {"file"});
+ LinkCheck me = new LinkCheck(mh, exists, contents);
+ me.addLinkToCheck(COMMAND_LINE, startingURL); // pwd as base?
+ try {
+ String label = "checking URLs from " + startingURL;
+ if (useSystemOut) {
+ System.out.println(label);
+ }
+ MessageUtil.info("START " + label);
+ long start = System.currentTimeMillis();
+ me.run();
+ long duration = (System.currentTimeMillis() - start)/1000;
+ long numChecked = me.checkedUrls.size();
+ if (numChecked > 0) {
+ float cps = (float) duration / (float) numChecked;
+ StringBuffer sb = new StringBuffer();
+ sb.append("DONE. Checked " + numChecked);
+ sb.append(" URL's in " + duration);
+ sb.append(" seconds (" + cps);
+ sb.append(" seconds per URL).");
+ MessageUtil.info("END " + label + ": " + sb);
+ if (useSystemOut) {
+ System.out.println(sb.toString());
+ }
+ }
+ MessageUtil.info(MessageUtil.renderCounts(mh));
+ try {
+ if (null != out) {
+ out.flush();
+ }
+ } catch (IOException e) {
+ // ignore
+ }
+ if (useSystemOut && (null != logFile)) {
+ System.out.println("Find log in " + logFile);
+ }
+ } finally {
+ if (null != out) {
+ try {
+ out.close();
+ } catch (IOException e1) {
+ }
+ }
+ System.exit(mh.numMessages(IMessage.ERROR, true)); // XXX dumb swing
+ }
+ }
+
+ private static boolean isCheckedFileType(URL url) {
+ if (null == url) {
+ return false;
+ }
+ String file = url.getFile();
+ return !FileUtil.hasZipSuffix(file)
+ && !file.endsWith(".pdf");
+ }
+
+ private final Messages messages;
+ private final HTMLEditorKit.Parser parser; // XXX untested - stateful
+ private final ArrayList linksToCheck; // Link
+ private final ArrayList checkedUrls; // String (URL.toString)
+ private final ArrayList validRefs; // String (URL.toString)
+ private final ArrayList refsToCheck; // String (URL.toString)
+
+ private final Link.Check checkExists;
+ private final Link.Check checkContents;
+
+ public LinkCheck(IMessageHandler handler,
+ Link.Check checkExists,
+ Link.Check checkContents) {
+ LangUtil.throwIaxIfNull(handler, "handler");
+ LangUtil.throwIaxIfNull(checkExists, "checkExists");
+ LangUtil.throwIaxIfNull(checkContents, "checkContents");
+ this.messages = new Messages(handler);
+ linksToCheck = new ArrayList();
+ checkedUrls = new ArrayList();
+ refsToCheck = new ArrayList();
+ validRefs = new ArrayList();
+ parser = new HTMLEditorKit() {
+ public HTMLEditorKit.Parser getParser() {
+ return super.getParser();
+ }
+ }
+ .getParser();
+ this.checkExists = checkExists;
+ this.checkContents = checkContents;
+ }
+
+ public synchronized void addLinkToCheck(URL doc, String link) {
+ URL linkURL = makeURL(doc, link);
+ if (null == linkURL) {
+// messages.addingNullLinkFrom(doc);
+ return;
+ }
+ String linkString = linkURL.toString();
+ if ((null != link) && !checkedUrls.contains(linkString) ) {
+ if (!checkExists.check(linkURL)) {
+ checkedUrls.add(linkString);
+ messages.acceptingUncheckedLink(doc, linkURL);
+ } else {
+ Link toAdd = new Link(doc, linkURL);
+ if (!linksToCheck.contains(toAdd)) { // equals overridden
+ linksToCheck.add(toAdd);
+ }
+ }
+ }
+ }
+
+ public synchronized void run() {
+ ArrayList list = new ArrayList();
+ while (0 < linksToCheck.size()) {
+ messages.checkingLinks(linksToCheck.size());
+ list.clear();
+ list.addAll(linksToCheck);
+ for (Iterator iter = list.iterator(); iter.hasNext();) {
+ final Link link = (Link) iter.next();
+ String urlString = link.url.toString();
+ if (!checkedUrls.contains(urlString)) {
+ checkedUrls.add(urlString);
+ messages.checkingLink(link);
+ checkLink(link);
+ }
+ }
+ linksToCheck.removeAll(list);
+ }
+ // now check that all named references are accounted for
+ for (Iterator iter = refsToCheck.iterator(); iter.hasNext();) {
+ String ref = (String) iter.next();
+ if (!validRefs.contains(ref)) {
+ messages.namedReferenceNotFound(ref);
+ }
+ }
+ }
+
+ /** @return null if link known or if unable to create */
+ private URL makeURL(URL doc, String link) {
+ if (checkedUrls.contains(link)) {
+ return null;
+ }
+ URL result = null;
+ try {
+ result = new URL(link);
+ } catch (MalformedURLException e) {
+ if (null == doc) {
+ messages.malformedUrl(null, link, e);
+ } else {
+ try {
+ URL res = new URL(doc, link);
+ String resultString = res.toString();
+ if (checkedUrls.contains(resultString)) {
+ return null;
+ }
+ result = res;
+ } catch (MalformedURLException me) {
+ messages.malformedUrl(doc, link, me);
+ }
+ }
+ }
+ return result;
+ }
+
+ /** @param link a Link with a url we can handle */
+ private void checkLink(final Link link) {
+ if (handleAsRef(link)) {
+ return;
+ }
+ URL url = link.url;
+ InputStream input = null;
+ try {
+ URLConnection connection = url.openConnection();
+ if (null == connection) {
+ messages.cantOpenConnection(url);
+ return;
+ }
+ // get bad urls to fail on read before skipping by type
+ input = connection.getInputStream();
+ String type = connection.getContentType();
+ if (null == type) {
+ messages.noContentType(link);
+ } else if (!type.toLowerCase().startsWith("text/")) {
+ messages.notTextContentType(link);
+ } else {
+ boolean addingLinks = checkContents.check(url);
+ parser.parse(
+ new InputStreamReader(input),
+ new LinkListener(url, addingLinks), true);
+ }
+ } catch (IOException e) {
+ messages.exceptionReading(link, e);
+ } finally {
+ if (null != input) {
+ try {
+ input.close();
+ } catch (IOException e1) {
+ // ignore
+ }
+ }
+ }
+ }
+
+ /** @return true if link is to an internal ...#name */
+ private boolean handleAsRef(Link link) {
+ String ref = link.url.getRef();
+ if (!LangUtil.isEmpty(ref)) {
+ String refString = link.url.toString(); // XXX canonicalize?
+ if (!refsToCheck.contains(refString)) {
+ refsToCheck.add(refString);
+ // add pseudo-link to force reading of ref'd doc XXX hmm
+ int refLoc = refString.indexOf("#");
+ if (-1 == refLoc) {
+ messages.uncommentedReference(link);
+ } else {
+ refString = refString.substring(0, refLoc);
+ addLinkToCheck(link.doc, refString);
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /** LinkListener callback */
+ private boolean addKnownNamedAnchor(URL doc, String name) {
+ String namedRef = "#" + name;
+ try {
+ String ref = new URL(doc + namedRef).toString();
+ if (!validRefs.contains(ref)) {
+ validRefs.add(ref);
+ }
+ return true;
+ } catch (MalformedURLException e) {
+ messages.malformedUrl(doc, namedRef, e);
+ return false;
+ }
+ }
+
+ private class Messages {
+ private final IMessageHandler handler;
+ private Messages(IMessageHandler handler) {
+ LangUtil.throwIaxIfNull(handler, "handler");
+ this.handler = handler;
+ }
+
+ private void info(String label, Object more) {
+ MessageUtil.info(handler, label + " " + more);
+ }
+
+ private void fail(String label, Object more, Throwable thrown) {
+ MessageUtil.fail(handler, label + " " + more, thrown);
+ }
+
+ private void uncommentedReference(Link link) {
+ info("uncommentedReference", link); // XXX bug?
+ }
+
+ private void addingNullLinkFrom(URL doc) {
+ info("addingNullLinkFrom", doc);
+ }
+
+ private void noContentCheck(Link link) {
+ info("noContentCheck", link);
+ }
+
+ private void notTextContentType(Link link) {
+ info("notTextContentType", link);
+ }
+
+ private void noContentType(Link link) {
+ info("noContentType", link);
+ }
+
+ private void checkingLinks(int i) {
+ info("checkingLinks", new Integer(i));
+ }
+
+ private void checkingLink(Link link) {
+ info("checkingLink", link);
+ }
+
+ private void acceptingUncheckedLink(URL doc, URL link) {
+ info("acceptingUncheckedLink", "doc=" + doc + " link=" + link);
+ }
+
+ private void cantHandleRefsYet(Link link) {
+ info("cantHandleRefsYet", link.url);
+ }
+
+ private void namedReferenceNotFound(String ref) {
+ // XXX find all references to this unfound named reference
+ fail("namedReferenceNotFound", ref, null);
+ }
+
+ private void malformedUrl(URL doc, String link, MalformedURLException e) {
+ fail("malformedUrl", "doc=" + doc + " link=" + link, e);
+ }
+
+ private void cantOpenConnection(URL url) {
+ fail("cantOpenConnection", url, null);
+ }
+
+ private void exceptionReading(Link link, IOException e) {
+ // only info if redirect from http to https
+ String m = e.getMessage();
+ if ((m != null)
+ && (-1 != m.indexOf("protocol"))
+ && (-1 != m.indexOf("https"))
+ && "http".equals(link.url.getProtocol())) {
+ info("httpsRedirect", link);
+ return;
+ }
+ fail("exceptionReading", link, e);
+ }
+
+ private void nullLink(URL doc, Tag tag) {
+ // ignore - many tags do not have links
+ }
+
+ private void emptyLink(URL doc, Tag tag) {
+ fail("emptyLink", "doc=" + doc + " tag=" + tag, null);
+ }
+ }
+
+ /**
+ * Register named anchors and add any hrefs to the links to check.
+ */
+ private class LinkListener extends HTMLEditorKit.ParserCallback {
+ private final URL doc;
+ private final boolean addingLinks;
+
+ private LinkListener(URL doc, boolean addingLinks) {
+ this.doc = doc;
+ this.addingLinks = addingLinks;
+ }
+
+ public void handleStartTag(
+ HTML.Tag tag,
+ MutableAttributeSet attributes,
+ int position) {
+ handleSimpleTag(tag, attributes, position);
+ }
+
+ public void handleSimpleTag(
+ HTML.Tag tag,
+ MutableAttributeSet attributes,
+ int position) { // XXX use position to emit context?
+ boolean isNameAnchor = registerIfNamedAnchor(tag, attributes);
+ if (!addingLinks) {
+ return;
+ }
+ Object key = HTML.Tag.FRAME == tag
+ ? HTML.Attribute.SRC
+ : HTML.Attribute.HREF;
+ String link = (String) attributes.getAttribute(key);
+
+ if (null == link) {
+ if (!isNameAnchor) {
+ messages.nullLink(doc, tag);
+ }
+ } else if (0 == link.length()) {
+ if (!isNameAnchor) {
+ messages.emptyLink(doc, tag);
+ }
+ } else {
+ addLinkToCheck(doc, link);
+ }
+ }
+
+ private boolean registerIfNamedAnchor(
+ HTML.Tag tag,
+ MutableAttributeSet attributes) {
+ if (HTML.Tag.A.equals(tag)) {
+ String name
+ = (String) attributes.getAttribute(HTML.Attribute.NAME);
+ if (null != name) {
+ addKnownNamedAnchor(doc, name);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ }
+
+ private static class Link {
+ private static final Check FALSE_CHECKER = new Check() {
+ public boolean check(Link link) { return false; }
+ public boolean check(URL url) { return false; }
+ };
+ private static Check getProtocolChecker(String[] protocols) {
+ final String[] input
+ = (String[]) LangUtil.safeCopy(protocols, protocols);
+ if (0 == input.length) {
+ return FALSE_CHECKER;
+ }
+ return new Check() {
+ final List list = Arrays.asList(input);
+ public boolean check(URL url) {
+ return (null != url) && list.contains(url.getProtocol());
+ }
+ };
+ }
+ private final URL doc;
+ private final URL url;
+ private String toString;
+ private Link(URL doc, URL url) {
+ LangUtil.throwIaxIfNull(doc, "doc");
+ LangUtil.throwIaxIfNull(url, "url");
+ this.doc = doc;
+ this.url = url;
+ }
+ public boolean equals(Object o) {
+ if (null == o) {
+ return false;
+ }
+ if (this == o) {
+ return true;
+ }
+ if (Link.class != o.getClass()) {
+ return false; // exact class
+ }
+ Link other = (Link) o;
+ return doc.equals(other) && url.equals(other);
+ //return toString().equals(o.toString());
+ }
+
+ public int hashCode() { // XXX
+ return doc.hashCode() + (url.hashCode() >> 4);
+// return toString.hashCode();
+ }
+
+ public String toString() {
+ if (null == toString) {
+ toString = url + " linked from " + doc;
+ }
+ return toString;
+ }
+ private static class Check {
+ public boolean check(Link link) {
+ return (null != link) && check(link.url);
+ }
+ public boolean check(URL url) {
+ return (null != url);
+ }
+ }
+ }
+}