From 07d38b4fa87e152b7a191048c328755c9e44b619 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 12 Aug 2017 20:25:44 +0100 Subject: [PATCH] [Feature] Allow randomly select User-Agent from a list --- src/plugins/lua/url_redirector.lua | 37 ++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua index a6e7afc11..aefaf2d75 100644 --- a/src/plugins/lua/url_redirector.lua +++ b/src/plugins/lua/url_redirector.lua @@ -18,6 +18,18 @@ if confighelp then return end +-- Some popular UA +local default_ua = { + 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', + 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)', + 'Wget/1.9.1', + 'Mozilla/5.0 (Android; Linux armv7l; rv:9.0) Gecko/20111216 Firefox/9.0 Fennec/9.0', + 'Mozilla/5.0 (Windows NT 5.2; RW; rv:7.0a1) Gecko/20091211 SeaMonkey/9.23a1pre', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', + 'W3C-checklink/4.5 [4.160] libwww-perl/5.823', + 'Lynx/2.8.8dev.3 libwww-FM/2.14 SSL-MM/1.4.1', +} + local redis_params local N = 'url_redirector' local settings = { @@ -28,7 +40,7 @@ local settings = { key_prefix = 'rdr:', -- default hash name check_ssl = false, -- check ssl certificates max_size = 10 * 1024, -- maximum body to process - user_agent = 'Mozilla/5.0 (Maemo; Linux armv7l; rv:10.0.1) Gecko/20100101 Firefox/10.0.1 Fennec/10.0.1', + user_agent = default_ua, redirectors_only = true, -- follow merely redirectors top_urls_key = 'rdr:top_urls', -- key for top urls top_urls_count = 200, -- how many top urls to save @@ -134,11 +146,18 @@ local function resolve_cached(task, orig_url, url, key, param, ntries) cache_url(task, orig_url, url, key, param) else if code == 200 then - rspamd_logger.infox(task, 'found redirect from %s to %s, err code 200', - orig_url, url) + if orig_url == url then + rspamd_logger.infox(task, 'direct url %s, err code 200', + url) + else + rspamd_logger.infox(task, 'found redirect from %s to %s, err code 200', + orig_url, url) + end + cache_url(task, orig_url, url, key, param) + elseif code == 301 or code == 302 then - local loc = headers['Location'] + local loc = headers['location'] rspamd_logger.infox(task, 'found redirect from %s to %s, err code %s', orig_url, loc, code) if loc then @@ -154,6 +173,7 @@ local function resolve_cached(task, orig_url, url, key, param, ntries) resolve_cached(task, orig_url, loc, key, param, ntries + 1) end else + rspamd_logger.infox(task, "no location, headers: %s", headers) cache_url(task, orig_url, url, key, param) end else @@ -164,9 +184,16 @@ local function resolve_cached(task, orig_url, url, key, param, ntries) end end + local ua + if type(settings.user_agent) == 'string' then + ua = settings.user_agent + else + ua = settings.user_agent[math.random(#settings.user_agent)] + end + rspamd_http.request{ headers = { - ['User-Agent'] = settings.user_agent, + ['User-Agent'] = ua, }, url = url, task = task, -- 2.39.5