Procházet zdrojové kódy

[Project] Add logic for checking of the text parts

tags/1.8.2
Vsevolod Stakhov před 5 roky
rodič
revize
2a0b5e9c41
1 změnil soubory, kde provedl 71 přidání a 2 odebrání
  1. 71
    2
      lualib/lua_fuzzy.lua

+ 71
- 2
lualib/lua_fuzzy.lua Zobrazit soubor

@@ -34,10 +34,12 @@ local policies = {
min_bytes = 1024,
min_height = 500,
min_width = 500,
min_length = 100, -- short words multiplier
min_length = 32,
text_multiplier = 4.0, -- divide min_bytes by 4 for texts
mime_types = {"*"},
mime_types = {"application/*"},
scan_archives = true,
short_text_direct_hash = true,
text_shingles = true,
}
}

@@ -50,7 +52,9 @@ local policy_schema = ts.shape{
min_length = ts.number + ts.string / tonumber,
text_multiplier = ts.number,
mime_types = ts.array_of(ts.string),
scan_archives = ts.bool,
short_text_direct_hash = ts.bool,
text_shingles = ts.bool,
}


@@ -100,4 +104,69 @@ exports.process_rule = function(rule)
return #rules
end

local function check_length(task, part, rule)
local length_ok = true

if rule.min_bytes then
local bytes = part:get_length()
local adjusted_bytes = bytes

if part:is_text() then
if rule.text_multiplier then
adjusted_bytes = bytes * rule.text_multiplier
end
end

if rule.min_bytes > adjusted_bytes then
lua_util.debugm(N, task, 'skip part of length %s (%s adjusted)' ..
'as it has less than %s bytes',
bytes, adjusted_bytes, rule.min_bytes)
length_ok = false
end
end

return length_ok
end

local function check_text_part(task, part, rule, text)
local allow_direct,allow_shingles = false,false

if rule.text_shingles then
-- Check number of words
local wcnt = text:get_words_count()
if rule.min_length and wcnt < rule.min_length then
lua_util.debugm(N, task, 'text has less than %s words: %s',
rule.min_length, wcnt)
allow_shingles = false
else
allow_shingles = true
end

if not rule.short_text_direct_hash and not allow_shingles then
allow_direct = false
else
allow_direct = check_length(task, part, rule)
end

else
allow_direct = check_length(task, part, rule)
end

return allow_direct,allow_shingles
end

exports.check_mime_part = function(task, part, rule_id)
local rule = rules[rule_id]

if not rule then
rspamd_logger.errx(task, 'cannot find rule with id %s', rule_id)

return false,false
end

if part:is_text() then
return check_text_part(task, part, rule, part:get_text())
end
end

return exports

Načítá se…
Zrušit
Uložit