url_match_t *match);
gint flags;
- gsize patlen;
};
static gboolean url_file_start (struct url_callback_data *cb,
struct url_matcher static_matchers[] = {
/* Common prefixes */
{"file://", "", url_file_start, url_file_end,
- 0, 0},
+ 0},
{"file:\\\\", "", url_file_start, url_file_end,
- 0, 0},
+ 0},
{"ftp://", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"ftp:\\\\", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"sftp://", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"http:", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"https:", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"news://", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"nntp://", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"telnet://", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"tel:", "", url_tel_start, url_tel_end,
- 0, 0},
+ 0},
{"webcal://", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"mailto:", "", url_email_start, url_email_end,
- 0, 0},
+ 0},
{"callto:", "", url_tel_start, url_tel_end,
- 0, 0},
+ 0},
{"h323:", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"sip:", "", url_web_start, url_web_end,
- 0, 0},
+ 0},
{"www.", "http://", url_web_start, url_web_end,
- URL_FLAG_NOHTML, 0},
+ URL_FLAG_NOHTML},
{"ftp.", "ftp://", url_web_start, url_web_end,
- URL_FLAG_NOHTML, 0},
+ URL_FLAG_NOHTML},
/* Likely emails */
{"@", "mailto://", url_email_start, url_email_end,
- URL_FLAG_NOHTML, 0}
+ URL_FLAG_NOHTML}
};
struct url_callback_data {
RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
m.pattern = rspamd_multipattern_get_pattern (url_scanner->search_trie,
rspamd_multipattern_get_npatterns (url_scanner->search_trie) - 1);
- m.patlen = strlen (m.pattern);
+
g_array_append_val (url_scanner->matchers, m);
}
static_matchers[i].pattern,
RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
}
-
- static_matchers[i].patlen = strlen (static_matchers[i].pattern);
}
g_array_append_vals (sc->matchers, static_matchers, n);
struct url_matcher *matcher;
const gchar *start, *pos, *p;
struct rspamd_url *url = context;
- gint ndots = 1;
+ gint ndots;
matcher = &g_array_index (url_scanner->matchers, struct url_matcher,
strnum);
+ ndots = 1;
if (matcher->flags & URL_FLAG_STAR_MATCH) {
/* Skip one more tld component */
- ndots = 2;
+ ndots ++;
}
pos = text + match_start;
ndots--;
pos = p + 1;
}
+ else {
+ pos = p;
+ }
p--;
}
'example.com',
'example.co.za',
'example.in.net',
- 'example.kawasaki.jp',
+ 'example.star.kawasaki.jp',
'example.net',
'example.net.in',
- 'example.nom.br',
+ 'example.star.nom.br',
'example.org',
'example.org.ac',
'example.ru.com',
for _, p in ipairs(prefixes) do
local test = rspamd_util.get_tld(p .. d)
if (test ~= d) then
- table.insert(worry, 'util.get_tld:' .. p .. d .. ':' .. test)
+ local opt = string.format('util.get_tld:p=%s;d=%s;got=%s', p, d, test)
+ table.insert(worry, opt)
return
end
local u = rspamd_url.create(pool, p .. d)
test = u:get_tld()
if (test ~= d) then
- table.insert(worry, 'url.get_tld:' .. p .. d .. ':' .. test)
+ local opt = string.format('url.create:p=%s;d=%s;got=%s', p, d, test)
+ table.insert(worry, opt)
return
end
end
end)()
end
if (#worry == 0) then
- return true, "no worry"
+ return true, 1.0, "no worry"
else
- return true, table.concat(worry, ",")
+ return true, 1.0, worry
end
end
})