|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558 |
- /* *******************************************************************
- * Copyright (c) 2003 Contributors.
- * All rights reserved.
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * Wes Isberg initial implementation
- * ******************************************************************/
-
- package org.aspectj.testing.util;
-
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.io.OutputStream;
- import java.io.PrintStream;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Iterator;
- import java.util.List;
-
- import javax.swing.text.MutableAttributeSet;
- import javax.swing.text.html.HTML;
- import javax.swing.text.html.HTML.Tag;
- import javax.swing.text.html.HTMLEditorKit;
-
- import org.aspectj.bridge.IMessage;
- import org.aspectj.bridge.IMessageHandler;
- import org.aspectj.bridge.MessageHandler;
- import org.aspectj.bridge.MessageUtil;
- import org.aspectj.util.LangUtil;
- //import org.aspectj.util.FileUtil;
-
- /**
- * Quick and dirty link checker.
- * This checks that links into file: and http: targets work,
- * and that links out of file: targets work.
- */
- public class LinkCheck {
- /*
- * Known issues:
- * - main interface only, though API's easily upgraded
- * - https MalformedUrlExceptions on redirect
- * - Swing won't quit without System.exit
- * - single-threaded
- */
- static final URL COMMAND_LINE;
- static {
- URL commandLine = null;
- try {
- commandLine = new URL("file://commandLine");
- } catch (Throwable t) {
- }
- COMMAND_LINE = commandLine;
- }
-
- /** @param args file {-logFile {file} | -printInfo } */
- public static void main(String[] args) {
- final String syntax = "java "
- + LinkCheck.class.getName()
- + " file {-log <file> | -printInfo}..";
- if ((null == args) || (0 >= args.length)) {
- System.err.println(syntax);
- System.exit(1);
- }
- final String startingURL = "file:///" + args[0].replace('\\', '/');
- String logFile = null;
- boolean printInfo = false;
- for (int i = 1; i < args.length; i++) {
- if ("-log".equals(args[i]) && ((i+1) < args.length)) {
- logFile = args[++i];
- } else if ("-printInfo".equals(args[i])) {
- printInfo = true;
- } else {
- System.err.println(syntax);
- System.exit(1);
- }
- }
- final boolean useSystemOut = (null == logFile);
- final MessageHandler mh;
- final OutputStream out;
- if (useSystemOut) {
- mh = new MessageHandler();
- out = null;
- } else {
-
- try {
- out = new FileOutputStream(logFile);
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- return;
- }
- final PrintStream ps = new PrintStream(out, true);
- final boolean printAll = printInfo;
- mh = new MessageHandler() {
- public boolean handleMessage(IMessage message) {
- if (printAll || !message.isInfo()) {
- ps.println(message.toString());
- }
- return super.handleMessage(message);
- }
-
- };
- }
- Link.Check exists
- = Link.getProtocolChecker(new String[] {"file", "http"});
- Link.Check contents
- = Link.getProtocolChecker(new String[] {"file"});
- LinkCheck me = new LinkCheck(mh, exists, contents);
- me.addLinkToCheck(COMMAND_LINE, startingURL); // pwd as base?
- try {
- String label = "checking URLs from " + startingURL;
- if (useSystemOut) {
- System.out.println(label);
- }
- MessageUtil.info("START " + label);
- long start = System.currentTimeMillis();
- me.run();
- long duration = (System.currentTimeMillis() - start)/1000;
- long numChecked = me.checkedUrls.size();
- if (numChecked > 0) {
- float cps = (float) duration / (float) numChecked;
- StringBuffer sb = new StringBuffer();
- sb.append("DONE. Checked " + numChecked);
- sb.append(" URL's in " + duration);
- sb.append(" seconds (" + cps);
- sb.append(" seconds per URL).");
- MessageUtil.info("END " + label + ": " + sb);
- if (useSystemOut) {
- System.out.println(sb.toString());
- }
- }
- MessageUtil.info(MessageUtil.renderCounts(mh));
- try {
- if (null != out) {
- out.flush();
- }
- } catch (IOException e) {
- // ignore
- }
- if (useSystemOut && (null != logFile)) {
- System.out.println("Find log in " + logFile);
- }
- } finally {
- if (null != out) {
- try {
- out.close();
- } catch (IOException e1) {
- }
- }
- System.exit(mh.numMessages(IMessage.ERROR, true)); // XXX dumb swing
- }
- }
-
- // private static boolean isCheckedFileType(URL url) {
- // if (null == url) {
- // return false;
- // }
- // String file = url.getFile();
- // return !FileUtil.hasZipSuffix(file)
- // && !file.endsWith(".pdf");
- // }
-
- private final Messages messages;
- private final HTMLEditorKit.Parser parser; // XXX untested - stateful
- private final ArrayList<Link> linksToCheck;
- private final ArrayList<String> checkedUrls; // String (URL.toString)
- private final ArrayList<String> validRefs; // String (URL.toString)
- private final ArrayList<String> refsToCheck; // String (URL.toString)
-
- private final Link.Check checkExists;
- private final Link.Check checkContents;
-
- public LinkCheck(IMessageHandler handler,
- Link.Check checkExists,
- Link.Check checkContents) {
- LangUtil.throwIaxIfNull(handler, "handler");
- LangUtil.throwIaxIfNull(checkExists, "checkExists");
- LangUtil.throwIaxIfNull(checkContents, "checkContents");
- this.messages = new Messages(handler);
- linksToCheck = new ArrayList<Link>();
- checkedUrls = new ArrayList<String>();
- refsToCheck = new ArrayList<String>();
- validRefs = new ArrayList<String>();
- parser = new HTMLEditorKit() {
- public HTMLEditorKit.Parser getParser() {
- return super.getParser();
- }
- }
- .getParser();
- this.checkExists = checkExists;
- this.checkContents = checkContents;
- }
-
- public synchronized void addLinkToCheck(URL doc, String link) {
- URL linkURL = makeURL(doc, link);
- if (null == linkURL) {
- // messages.addingNullLinkFrom(doc);
- return;
- }
- String linkString = linkURL.toString();
- if ((null != link) && !checkedUrls.contains(linkString) ) {
- if (!checkExists.check(linkURL)) {
- checkedUrls.add(linkString);
- messages.acceptingUncheckedLink(doc, linkURL);
- } else {
- Link toAdd = new Link(doc, linkURL);
- if (!linksToCheck.contains(toAdd)) { // equals overridden
- linksToCheck.add(toAdd);
- }
- }
- }
- }
-
- public synchronized void run() {
- ArrayList<Link> list = new ArrayList<Link>();
- while (0 < linksToCheck.size()) {
- messages.checkingLinks(linksToCheck.size());
- list.clear();
- list.addAll(linksToCheck);
- for (final Link link : list) {
- String urlString = link.url.toString();
- if (!checkedUrls.contains(urlString)) {
- checkedUrls.add(urlString);
- messages.checkingLink(link);
- checkLink(link);
- }
- }
- linksToCheck.removeAll(list);
- }
- // now check that all named references are accounted for
- for (String ref : refsToCheck) {
- if (!validRefs.contains(ref)) {
- messages.namedReferenceNotFound(ref);
- }
- }
- }
-
- /** @return null if link known or if unable to create */
- private URL makeURL(URL doc, String link) {
- if (checkedUrls.contains(link)) {
- return null;
- }
- URL result = null;
- try {
- result = new URL(link);
- } catch (MalformedURLException e) {
- if (null == doc) {
- messages.malformedUrl(null, link, e);
- } else {
- try {
- URL res = new URL(doc, link);
- String resultString = res.toString();
- if (checkedUrls.contains(resultString)) {
- return null;
- }
- result = res;
- } catch (MalformedURLException me) {
- messages.malformedUrl(doc, link, me);
- }
- }
- }
- return result;
- }
-
- /** @param link a Link with a url we can handle */
- private void checkLink(final Link link) {
- if (handleAsRef(link)) {
- return;
- }
- URL url = link.url;
- InputStream input = null;
- try {
- URLConnection connection = url.openConnection();
- if (null == connection) {
- messages.cantOpenConnection(url);
- return;
- }
- // get bad urls to fail on read before skipping by type
- input = connection.getInputStream();
- String type = connection.getContentType();
- if (null == type) {
- messages.noContentType(link);
- } else if (!type.toLowerCase().startsWith("text/")) {
- messages.notTextContentType(link);
- } else {
- boolean addingLinks = checkContents.check(url);
- parser.parse(
- new InputStreamReader(input),
- new LinkListener(url, addingLinks), true);
- }
- } catch (IOException e) {
- messages.exceptionReading(link, e);
- } finally {
- if (null != input) {
- try {
- input.close();
- } catch (IOException e1) {
- // ignore
- }
- }
- }
- }
-
- /** @return true if link is to an internal ...#name */
- private boolean handleAsRef(Link link) {
- String ref = link.url.getRef();
- if (!LangUtil.isEmpty(ref)) {
- String refString = link.url.toString(); // XXX canonicalize?
- if (!refsToCheck.contains(refString)) {
- refsToCheck.add(refString);
- // add pseudo-link to force reading of ref'd doc XXX hmm
- int refLoc = refString.indexOf("#");
- if (-1 == refLoc) {
- messages.uncommentedReference(link);
- } else {
- refString = refString.substring(0, refLoc);
- addLinkToCheck(link.doc, refString);
- }
- return true;
- }
- }
- return false;
- }
-
- /** LinkListener callback */
- private boolean addKnownNamedAnchor(URL doc, String name) {
- String namedRef = "#" + name;
- try {
- String ref = new URL(doc + namedRef).toString();
- if (!validRefs.contains(ref)) {
- validRefs.add(ref);
- }
- return true;
- } catch (MalformedURLException e) {
- messages.malformedUrl(doc, namedRef, e);
- return false;
- }
- }
-
- private class Messages {
- private final IMessageHandler handler;
- private Messages(IMessageHandler handler) {
- LangUtil.throwIaxIfNull(handler, "handler");
- this.handler = handler;
- }
-
- private void info(String label, Object more) {
- MessageUtil.info(handler, label + " " + more);
- }
-
- private void fail(String label, Object more, Throwable thrown) {
- MessageUtil.fail(handler, label + " " + more, thrown);
- }
-
- private void uncommentedReference(Link link) {
- info("uncommentedReference", link); // XXX bug?
- }
-
- // private void addingNullLinkFrom(URL doc) {
- // info("addingNullLinkFrom", doc);
- // }
- //
- // private void noContentCheck(Link link) {
- // info("noContentCheck", link);
- // }
-
- private void notTextContentType(Link link) {
- info("notTextContentType", link);
- }
-
- private void noContentType(Link link) {
- info("noContentType", link);
- }
-
- private void checkingLinks(int i) {
- info("checkingLinks", new Integer(i));
- }
-
- private void checkingLink(Link link) {
- info("checkingLink", link);
- }
-
- private void acceptingUncheckedLink(URL doc, URL link) {
- info("acceptingUncheckedLink", "doc=" + doc + " link=" + link);
- }
-
- // private void cantHandleRefsYet(Link link) {
- // info("cantHandleRefsYet", link.url);
- // }
-
- private void namedReferenceNotFound(String ref) {
- // XXX find all references to this unfound named reference
- fail("namedReferenceNotFound", ref, null);
- }
-
- private void malformedUrl(URL doc, String link, MalformedURLException e) {
- fail("malformedUrl", "doc=" + doc + " link=" + link, e);
- }
-
- private void cantOpenConnection(URL url) {
- fail("cantOpenConnection", url, null);
- }
-
- private void exceptionReading(Link link, IOException e) {
- // only info if redirect from http to https
- String m = e.getMessage();
- if ((m != null)
- && (m.contains("protocol"))
- && (m.contains("https"))
- && "http".equals(link.url.getProtocol())) {
- info("httpsRedirect", link);
- return;
- }
- fail("exceptionReading", link, e);
- }
-
- private void nullLink(URL doc, Tag tag) {
- // ignore - many tags do not have links
- }
-
- private void emptyLink(URL doc, Tag tag) {
- fail("emptyLink", "doc=" + doc + " tag=" + tag, null);
- }
- }
-
- /**
- * Register named anchors and add any hrefs to the links to check.
- */
- private class LinkListener extends HTMLEditorKit.ParserCallback {
- private final URL doc;
- private final boolean addingLinks;
-
- private LinkListener(URL doc, boolean addingLinks) {
- this.doc = doc;
- this.addingLinks = addingLinks;
- }
-
- public void handleStartTag(
- HTML.Tag tag,
- MutableAttributeSet attributes,
- int position) {
- handleSimpleTag(tag, attributes, position);
- }
-
- public void handleSimpleTag(
- HTML.Tag tag,
- MutableAttributeSet attributes,
- int position) { // XXX use position to emit context?
- boolean isNameAnchor = registerIfNamedAnchor(tag, attributes);
- if (!addingLinks) {
- return;
- }
- Object key = HTML.Tag.FRAME == tag
- ? HTML.Attribute.SRC
- : HTML.Attribute.HREF;
- String link = (String) attributes.getAttribute(key);
-
- if (null == link) {
- if (!isNameAnchor) {
- messages.nullLink(doc, tag);
- }
- } else if (0 == link.length()) {
- if (!isNameAnchor) {
- messages.emptyLink(doc, tag);
- }
- } else {
- addLinkToCheck(doc, link);
- }
- }
-
- private boolean registerIfNamedAnchor(
- HTML.Tag tag,
- MutableAttributeSet attributes) {
- if (HTML.Tag.A.equals(tag)) {
- String name
- = (String) attributes.getAttribute(HTML.Attribute.NAME);
- if (null != name) {
- addKnownNamedAnchor(doc, name);
- return true;
- }
- }
- return false;
- }
-
- }
-
- private static class Link {
- private static final Check FALSE_CHECKER = new Check() {
- public boolean check(Link link) { return false; }
- public boolean check(URL url) { return false; }
- };
- private static Check getProtocolChecker(String[] protocols) {
- final String[] input
- = (String[]) LangUtil.safeCopy(protocols, protocols);
- if (0 == input.length) {
- return FALSE_CHECKER;
- }
- return new Check() {
- final List list = Arrays.asList(input);
- public boolean check(URL url) {
- return (null != url) && list.contains(url.getProtocol());
- }
- };
- }
- private final URL doc;
- private final URL url;
- private String toString;
- private Link(URL doc, URL url) {
- LangUtil.throwIaxIfNull(doc, "doc");
- LangUtil.throwIaxIfNull(url, "url");
- this.doc = doc;
- this.url = url;
- }
- public boolean equals(Object o) {
- if (null == o) {
- return false;
- }
- if (this == o) {
- return true;
- }
- if (Link.class != o.getClass()) {
- return false; // exact class
- }
- Link other = (Link) o;
- return doc.equals(other) && url.equals(other);
- //return toString().equals(o.toString());
- }
-
- public int hashCode() { // XXX
- return doc.hashCode() + (url.hashCode() >> 4);
- // return toString.hashCode();
- }
-
- public String toString() {
- if (null == toString) {
- toString = url + " linked from " + doc;
- }
- return toString;
- }
- private static class Check {
- public boolean check(Link link) {
- return (null != link) && check(link.url);
- }
- public boolean check(URL url) {
- return (null != url);
- }
- }
- }
- }
|