]> source.dussan.org Git - rspamd.git/commitdiff
Do not try to detect tld urls inside HTML texts as it generates too much false positi...
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 21 Aug 2012 13:12:02 +0000 (17:12 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 21 Aug 2012 13:12:02 +0000 (17:12 +0400)
Add some prototypes for lua.

src/html.c
src/lua/lua_common.h
src/message.c
src/url.c
src/url.h

index 41dd1df7ddbdcd8cd35f6e8813e641207a71c5ab..51d7ccb16a405d8172b495364e28f399a32f1990 100644 (file)
@@ -711,7 +711,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
                p ++;
        }
 
-       if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str) && url_str != NULL) {
+       if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str, TRUE) && url_str != NULL) {
                new = memory_pool_alloc0 (task->task_pool, sizeof (struct uri));
                if (new != NULL) {
                        g_strstrip (url_str);
index 0ec0466e027c6da5095f57d18ad7b9ab39d5817f..1a309e0f0dfd66559abab51eb703813e6652da94 100644 (file)
@@ -51,6 +51,16 @@ gint lua_class_tostring (lua_State *L);
  */
 gpointer lua_check_class (lua_State *L, gint index, const gchar *name);
 
+/**
+ * Initialize lua and bindings
+ */
+void init_lua (struct config_file *cfg);
+
+/**
+ * Load and initialize lua plugins
+ */
+gboolean init_lua_filters (struct config_file *cfg);
+
 /**
  * Open libraries functions
  */
index b51114fd0f5cda19bb402b1a44ed336f0d63ba54..2f9aef978c028921fe638bab80e022b76b53d523 100644 (file)
@@ -1116,7 +1116,7 @@ process_message (struct worker_task *task)
 
                while (p < end) {
                        /* Search to the end of url */
-                       if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str)) {
+                       if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) {
                                if (url_str != NULL) {
                                        subject_url = memory_pool_alloc0 (task->task_pool, sizeof (struct uri));
                                        if (subject_url != NULL) {
index d7f98ae8f251c9708d717ce37b0702e52ccde56e..803bd05fd33713db4690500e56bee2f4395256fc 100644 (file)
--- a/src/url.c
+++ b/src/url.c
@@ -58,11 +58,13 @@ typedef struct url_match_s {
        gboolean add_prefix;
 } url_match_t;
 
+#define URL_FLAG_NOHTML 0x1
 struct url_matcher {
        const gchar *pattern;
        const gchar *prefix;
        gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
        gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+       gint flags;
 };
 
 static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
@@ -79,295 +81,295 @@ static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar
 
 struct url_matcher matchers[] = {
                /* Common prefixes */
-               { "file://",            "",                     url_file_start,                 url_file_end    },
-               { "ftp://",                     "",                     url_web_start,                  url_web_end             },
-               { "sftp://",            "",                     url_web_start,                  url_web_end             },
-               { "http://",            "",                     url_web_start,                  url_web_end             },
-               { "https://",           "",                     url_web_start,                  url_web_end             },
-               { "news://",            "",                     url_web_start,                  url_web_end             },
-               { "nntp://",            "",                     url_web_start,                  url_web_end             },
-               { "telnet://",          "",                     url_web_start,                  url_web_end             },
-               { "webcal://",          "",                     url_web_start,                  url_web_end             },
-               { "mailto://",          "",                     url_email_start,                url_email_end   },
-               { "callto://",          "",                     url_web_start,                  url_web_end             },
-               { "h323:",                      "",                     url_web_start,                  url_web_end             },
-               { "sip:",                       "",                     url_web_start,                  url_web_end             },
-               { "www.",                       "http://",      url_web_start,                  url_web_end             },
-               { "ftp.",                       "ftp://",       url_web_start,                  url_web_end             },
+               { "file://",            "",                     url_file_start,                 url_file_end,   0                                       },
+               { "ftp://",                     "",                     url_web_start,                  url_web_end,    0                                       },
+               { "sftp://",            "",                     url_web_start,                  url_web_end,    0                                       },
+               { "http://",            "",                     url_web_start,                  url_web_end,    0                                       },
+               { "https://",           "",                     url_web_start,                  url_web_end,    0                                       },
+               { "news://",            "",                     url_web_start,                  url_web_end,    0                                       },
+               { "nntp://",            "",                     url_web_start,                  url_web_end,    0                                       },
+               { "telnet://",          "",                     url_web_start,                  url_web_end,    0                                       },
+               { "webcal://",          "",                     url_web_start,                  url_web_end,    0                                       },
+               { "mailto://",          "",                     url_email_start,                url_email_end,  0                                       },
+               { "callto://",          "",                     url_web_start,                  url_web_end,    0                                       },
+               { "h323:",                      "",                     url_web_start,                  url_web_end,    0                                       },
+               { "sip:",                       "",                     url_web_start,                  url_web_end,    0                                       },
+               { "www.",                       "http://",      url_web_start,                  url_web_end,    0                                       },
+               { "ftp.",                       "ftp://",       url_web_start,                  url_web_end,    URL_FLAG_NOHTML         },
                /* TLD domains parts */
-               { ".ac",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ad",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ae",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".aero",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".af",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ag",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ai",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".al",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".am",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".an",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ao",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".aq",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ar",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".arpa",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".as",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".asia",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".at",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".au",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".aw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ax",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".az",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ba",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bb",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bd",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".be",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bh",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bi",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".biz",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".bj",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bo",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".br",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bs",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bv",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".by",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".bz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ca",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cat",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".cc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cd",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ch",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ci",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ck",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cl",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".co",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".com",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".coop",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".cr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cv",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cx",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cy",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".cz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".de",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".dj",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".dk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".dm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".do",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".dz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ec",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".edu",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".ee",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".eg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".er",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".es",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".et",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".eu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".fi",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".fj",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".fk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".fm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".fo",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".fr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ga",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gb",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gd",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ge",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gh",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gi",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gl",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gov",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".gp",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gq",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gs",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".gy",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".hk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".hm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".hn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".hr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ht",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".hu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".id",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ie",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".il",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".im",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".in",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".info",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".int",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".io",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".iq",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ir",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".is",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".it",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".je",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".jm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".jo",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".jobs",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".jp",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ke",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kh",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ki",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".km",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kp",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ky",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".kz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".la",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lb",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".li",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ls",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".lv",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ly",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ma",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".md",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".me",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mh",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mil",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".mk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ml",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mo",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mobi",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".mp",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mq",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ms",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".museum",            "http://",      url_tld_start,                  url_tld_end             },
-               { ".mv",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mx",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".my",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".mz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".na",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".name",                      "http://",      url_tld_start,                  url_tld_end             },
-               { ".nc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ne",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".net",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".nf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ng",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ni",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".nl",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".no",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".np",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".nr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".nu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".nz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".om",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".org",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".pa",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pe",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ph",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pl",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pro",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".ps",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".pw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".py",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".qa",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".re",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ro",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".rs",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ru",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".rw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sa",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sb",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sd",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".se",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sh",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".si",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sj",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sl",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".so",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".st",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".su",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sv",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sx",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sy",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".sz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".td",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tel",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".tf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".th",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tj",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tl",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".to",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tp",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tr",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".travel",            "http://",      url_tld_start,                  url_tld_end             },
-               { ".tt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tv",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tw",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".tz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ua",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ug",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".uk",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".us",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".uy",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".uz",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".va",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".vc",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ve",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".vg",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".vi",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".vn",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".vu",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".wf",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".ws",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".xxx",                       "http://",      url_tld_start,                  url_tld_end             },
-               { ".ye",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".yt",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".za",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".zm",                        "http://",      url_tld_start,                  url_tld_end             },
-               { ".zw",                        "http://",      url_tld_start,                  url_tld_end             },
+               { ".ac",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ad",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ae",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".aero",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".af",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ag",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ai",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".al",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".am",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".an",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ao",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".aq",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ar",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".arpa",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".as",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".asia",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".at",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".au",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".aw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ax",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".az",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ba",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bb",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bd",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".be",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bh",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bi",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".biz",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bj",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bo",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".br",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bs",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bv",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".by",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".bz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ca",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cat",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cd",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ch",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ci",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ck",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cl",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".co",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".com",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".coop",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cv",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cx",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cy",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".cz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".de",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".dj",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".dk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".dm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".do",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".dz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ec",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".edu",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ee",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".eg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".er",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".es",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".et",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".eu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".fi",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".fj",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".fk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".fm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".fo",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".fr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ga",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gb",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gd",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ge",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gh",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gi",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gl",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gov",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gp",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gq",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gs",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".gy",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".hk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".hm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".hn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".hr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ht",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".hu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".id",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ie",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".il",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".im",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".in",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".info",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".int",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".io",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".iq",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ir",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".is",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".it",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".je",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".jm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".jo",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".jobs",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".jp",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ke",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kh",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ki",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".km",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kp",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ky",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".kz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".la",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lb",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".li",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ls",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".lv",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ly",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ma",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".md",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".me",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mh",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mil",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ml",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mo",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mobi",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mp",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mq",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ms",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".museum",            "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mv",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mx",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".my",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".mz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".na",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".name",                      "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".nc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ne",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".net",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".nf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ng",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ni",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".nl",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".no",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".np",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".nr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".nu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".nz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".om",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".org",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pa",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pe",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ph",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pl",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pro",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ps",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".pw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".py",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".qa",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".re",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ro",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".rs",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ru",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".rw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sa",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sb",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sd",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".se",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sh",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".si",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sj",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sl",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".so",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".st",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".su",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sv",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sx",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sy",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".sz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".td",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tel",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".th",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tj",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tl",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".to",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tp",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tr",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".travel",            "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tv",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".tz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ua",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ug",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".uk",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".us",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".uy",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".uz",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".va",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".vc",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ve",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".vg",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".vi",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".vn",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".vu",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".wf",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ws",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".xxx",                       "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".ye",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".yt",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".za",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".zm",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
+               { ".zw",                        "http://",      url_tld_start,                  url_tld_end,    URL_FLAG_NOHTML         },
                /* Likely emails */
-               { "@",                          "mailto://",url_email_start,            url_email_end   }
+               { "@",                          "mailto://",url_email_start,            url_email_end,  0                                       }
 };
 
 struct url_match_scanner {
@@ -1504,7 +1506,7 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
                        p = begin;
                }
                while (p < end) {
-                       if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str)) {
+                       if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) {
                                if (url_str != NULL) {
                                        new = memory_pool_alloc0 (pool, sizeof (struct uri));
                                        ex = memory_pool_alloc0 (pool, sizeof (struct process_exception));
@@ -1549,7 +1551,7 @@ url_parse_text (memory_pool_t * pool, struct worker_task *task, struct mime_text
 }
 
 gboolean
-url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str)
+url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html)
 {
        const gchar                    *end, *pos;
        gint                            idx, l;
@@ -1563,6 +1565,10 @@ url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start,
                }
                else {
                        matcher = &matchers[idx];
+                       if ((matcher->flags & URL_FLAG_NOHTML) && is_html) {
+                               /* Do not try to match non-html like urls in html texts */
+                               return FALSE;
+                       }
                        m.pattern = matcher->pattern;
                        m.prefix = matcher->prefix;
                        m.add_prefix = FALSE;
index f90820e75300b3169f61f40376b3796c6f2b3e1c..ed7cadbf13107630d3db66451f71f9cf1119fbd7 100644 (file)
--- a/src/url.h
+++ b/src/url.h
@@ -101,7 +101,7 @@ enum uri_errno parse_uri(struct uri *uri, gchar *uristring, memory_pool_t *pool)
  * @param url_str storage for url string(or NULL)
  * @return TRUE if url is found in specified text
  */
-gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str);
+gboolean url_try_text (memory_pool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html);
 
 /*
  * Return text representation of url parsing error