mirror of
https://github.com/rspamd/rspamd.git
synced 2024-09-13 23:56:50 +02:00
135 lines
3.7 KiB
Lua
135 lines
3.7 KiB
Lua
--[[
|
|
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
]]--
|
|
|
|
-- Detect language of message and selects appropriate statfiles for it
|
|
|
|
-- Common labels for specific statfiles
|
|
local many_recipients_label = 'many recipients'
|
|
local undisclosed_recipients_label = 'undisclosed recipients'
|
|
local list_label = 'maillist'
|
|
local long_subject_label = 'long subject'
|
|
|
|
-- Get specific statfiles set based on message rules
|
|
local function get_specific_statfiles(classifier, task)
|
|
if not table.foreach then
|
|
table.foreach = function(t, f)
|
|
for k, v in pairs(t) do f(k, v) end
|
|
end
|
|
end
|
|
local spec_st = {}
|
|
-- More 5 recipients
|
|
local st_many = classifier:get_statfile_by_label(many_recipients_label)
|
|
if st_many then
|
|
rcpt = task:get_recipients(2)
|
|
if rcpt and table.maxn(rcpt) > 5 then
|
|
print(table.maxn(rcpt))
|
|
table.foreach(st_many, function(i,v) table.insert(spec_st,v) end)
|
|
end
|
|
end
|
|
-- Undisclosed
|
|
local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label)
|
|
if st_undisc then
|
|
rcpt = task:get_recipients(2)
|
|
if rcpt and table.maxn(rcpt) == 0 then
|
|
table.foreach(st_undisc, function(i,v) table.insert(spec_st,v) end)
|
|
end
|
|
end
|
|
-- Maillist
|
|
local st_maillist = classifier:get_statfile_by_label(list_label)
|
|
if st_maillist then
|
|
local unsub_header = task:get_header_raw('List-Unsubscribe')
|
|
if unsub_header then
|
|
table.foreach(st_maillist, function(i,v) table.insert(spec_st,v) end)
|
|
end
|
|
end
|
|
-- Long subject
|
|
local st_longsubj = classifier:get_statfile_by_label(long_subject_label)
|
|
if st_longsubj then
|
|
local subj = task:get_header_raw('Subject')
|
|
if subj and string.len(subj) > 150 then
|
|
table.foreach(st_longsubj, function(i,v) table.insert(spec_st,v) end)
|
|
end
|
|
end
|
|
|
|
if table.maxn(spec_st) > 1 then
|
|
return spec_st
|
|
else
|
|
return nil
|
|
end
|
|
end
|
|
|
|
classifiers['bayes'] = function(classifier, task, is_learn, is_spam)
|
|
-- Subfunction for detection of message's language
|
|
local detect_language = function(task)
|
|
local parts = task:get_text_parts()
|
|
for _,p in ipairs(parts) do
|
|
local l = p:get_language()
|
|
if l then
|
|
return l
|
|
end
|
|
end
|
|
return nil
|
|
end
|
|
|
|
-- Main procedure
|
|
local selected = {}
|
|
local spec_st = get_specific_statfiles(classifier, task)
|
|
if spec_st then
|
|
if is_learn then
|
|
return spec_st
|
|
else
|
|
-- Merge tables
|
|
table.foreach(spec_st, function(i,v) table.insert(selected,v) end)
|
|
end
|
|
end
|
|
-- Detect statfile by language
|
|
language = detect_language(task)
|
|
if language then
|
|
-- Find statfiles with specified language
|
|
for _,st in ipairs(classifier:get_statfiles()) do
|
|
-- Skip labeled statfiles
|
|
if not st:get_label() then
|
|
local st_l = st:get_param('language')
|
|
if st_l and st_l == language then
|
|
-- Insert statfile with specified language
|
|
table.insert(selected, st)
|
|
end
|
|
end
|
|
end
|
|
if table.maxn(selected) > 1 then
|
|
return selected
|
|
end
|
|
end
|
|
|
|
-- Language not detected or specific language statfiles have not been found
|
|
for _,st in ipairs(classifier:get_statfiles()) do
|
|
-- Skip labeled statfiles
|
|
if not st:get_label() then
|
|
local st_l = st:get_param('language')
|
|
-- Insert only statfiles without language
|
|
if not st_l then
|
|
table.insert(selected, st)
|
|
end
|
|
end
|
|
end
|
|
if table.maxn(selected) > 1 then
|
|
return selected
|
|
end
|
|
|
|
return nil
|
|
end
|
|
|