Browse Source

[Fix] More fixes to extract_specific_urls

tags/2.0
Vsevolod Stakhov 4 years ago
parent
commit
ddbbe07faf
2 changed files with 39 additions and 17 deletions
  1. 1
    0
      lualib/lua_selectors/extractors.lua
  2. 38
    17
      lualib/lua_util.lua

+ 1
- 0
lualib/lua_selectors/extractors.lua View File

@@ -283,6 +283,7 @@ e.g. `get_tld`]],
['get_value'] = function(task, args)
local params = args[1] or {}
params.task = task
params.no_cache = true
local urls = lua_util.extract_specific_urls(params)
return urls,'userdata_list'
end,

+ 38
- 17
lualib/lua_util.lua View File

@@ -668,6 +668,18 @@ exports.filter_specific_urls = function (urls, params)
local ntlds, neslds = 0, 0

local res = {}
local nres = 0

local function insert_url(str, u)
if not res[str] then
res[str] = u
nres = nres + 1

return true
end

return false
end

local function process_single_url(u)
local esld = u:get_tld()
@@ -682,13 +694,15 @@ exports.filter_specific_urls = function (urls, params)
end
end

local str_hash = tostring(u)

if esld then
if not eslds[esld] then
eslds[esld] = {u}
eslds[esld] = {{str_hash, u}}
neslds = neslds + 1
else
if #eslds[esld] < params.esld_limit then
table.insert(eslds[esld], u)
table.insert(eslds[esld], {str_hash, u})
end
end

@@ -696,21 +710,21 @@ exports.filter_specific_urls = function (urls, params)
local tld = table.concat(fun.totable(fun.tail(parts)), '.')

if not tlds[tld] then
tlds[tld] = {u}
tlds[tld] = {{str_hash, u}}
ntlds = ntlds + 1
else
table.insert(tlds[tld], u)
table.insert(tlds[tld], {str_hash, u})
end

-- Extract priority urls that are proven to be malicious
if not u:is_html_displayed() then
-- Special cases
if not u:get_protocol() == 'mailto' and not u:is_html_displayed() then
if u:is_obscured() then
table.insert(res, u)
insert_url(str_hash, u)
else
if u:get_user() then
table.insert(res, u)
insert_url(str_hash, u)
elseif u:is_subject() or u:is_phished() then
table.insert(res, u)
insert_url(str_hash, u)
end
end
end
@@ -722,7 +736,7 @@ exports.filter_specific_urls = function (urls, params)
end

local limit = params.limit
limit = limit - #res
limit = limit - nres
if limit <= 0 then limit = 1 end

if neslds <= limit then
@@ -732,7 +746,8 @@ exports.filter_specific_urls = function (urls, params)

for _,lurls in pairs(eslds) do
if #lurls > 0 then
table.insert(res, table.remove(lurls))
local last = table.remove(lurls)
insert_url(last[1], last[2])
limit = limit - 1
item_found = true
end
@@ -740,8 +755,9 @@ exports.filter_specific_urls = function (urls, params)

until limit <= 0 or not item_found

res = exports.values(res)
if params.task and not params.no_cache then
params.task:cache_set(cache_key, urls)
params.task:cache_set(cache_key, res)
end
return res
end
@@ -750,14 +766,16 @@ exports.filter_specific_urls = function (urls, params)
while limit > 0 do
for _,lurls in pairs(tlds) do
if #lurls > 0 then
table.insert(res, table.remove(lurls))
local last = table.remove(lurls)
insert_url(last[1], last[2])
limit = limit - 1
end
end
end

res = exports.values(res)
if params.task and not params.no_cache then
params.task:cache_set(cache_key, urls)
params.task:cache_set(cache_key, res)
end
return res
end
@@ -774,11 +792,13 @@ exports.filter_specific_urls = function (urls, params)
local tld1 = tlds[tlds_keys[i]]
local tld2 = tlds[tlds_keys[ntlds - i]]
if #tld1 > 0 then
table.insert(res, table.remove(tld1))
local last = table.remove(tld1)
insert_url(last[1], last[2])
limit = limit - 1
end
if #tld2 > 0 then
table.insert(res, table.remove(tld2))
local last = table.remove(tld2)
insert_url(last[1], last[2])
limit = limit - 1
end

@@ -787,8 +807,9 @@ exports.filter_specific_urls = function (urls, params)
end
end

res = exports.values(res)
if params.task and not params.no_cache then
params.task:cache_set(cache_key, urls)
params.task:cache_set(cache_key, res)
end

return res

Loading…
Cancel
Save