2015-08-07 15:09:14 +02:00
|
|
|
--[[
|
|
|
|
Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
|
|
|
|
|
2016-02-04 10:37:21 +01:00
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
2015-08-07 15:09:14 +02:00
|
|
|
|
2016-02-04 10:37:21 +01:00
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
2015-08-07 15:09:14 +02:00
|
|
|
|
2016-02-04 10:37:21 +01:00
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
2015-08-07 15:09:14 +02:00
|
|
|
]]--
|
|
|
|
|
|
|
|
-- Detect language of message and selects appropriate statfiles for it
|
|
|
|
|
2016-11-13 12:13:47 +01:00
|
|
|
local fun = require "fun"
|
|
|
|
|
2015-08-07 15:09:14 +02:00
|
|
|
-- Common labels for specific statfiles
|
|
|
|
local many_recipients_label = 'many recipients'
|
|
|
|
local undisclosed_recipients_label = 'undisclosed recipients'
|
|
|
|
local list_label = 'maillist'
|
|
|
|
local long_subject_label = 'long subject'
|
|
|
|
|
|
|
|
-- Get specific statfiles set based on message rules
|
|
|
|
local function get_specific_statfiles(classifier, task)
|
|
|
|
local spec_st = {}
|
|
|
|
-- More 5 recipients
|
|
|
|
local st_many = classifier:get_statfile_by_label(many_recipients_label)
|
|
|
|
if st_many then
|
2016-11-03 16:11:28 +01:00
|
|
|
local rcpt = task:get_recipients(2)
|
2016-09-01 10:22:50 +02:00
|
|
|
if rcpt and #rcpt > 5 then
|
2016-11-15 13:49:32 +01:00
|
|
|
fun.each(function(v) table.insert(spec_st,v) end, st_many)
|
2015-08-07 15:09:14 +02:00
|
|
|
end
|
|
|
|
end
|
|
|
|
-- Undisclosed
|
|
|
|
local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label)
|
|
|
|
if st_undisc then
|
2016-11-03 16:11:28 +01:00
|
|
|
local rcpt = task:get_recipients(2)
|
2016-09-01 10:22:50 +02:00
|
|
|
if rcpt and #rcpt == 0 then
|
2016-11-15 13:49:32 +01:00
|
|
|
fun.each(function(v) table.insert(spec_st,v) end, st_undisc)
|
2015-08-07 15:09:14 +02:00
|
|
|
end
|
|
|
|
end
|
|
|
|
-- Maillist
|
|
|
|
local st_maillist = classifier:get_statfile_by_label(list_label)
|
|
|
|
if st_maillist then
|
|
|
|
local unsub_header = task:get_header_raw('List-Unsubscribe')
|
|
|
|
if unsub_header then
|
2016-11-15 13:49:32 +01:00
|
|
|
fun.each(function(v) table.insert(spec_st,v) end, st_maillist)
|
2015-08-07 15:09:14 +02:00
|
|
|
end
|
|
|
|
end
|
|
|
|
-- Long subject
|
|
|
|
local st_longsubj = classifier:get_statfile_by_label(long_subject_label)
|
|
|
|
if st_longsubj then
|
|
|
|
local subj = task:get_header_raw('Subject')
|
|
|
|
if subj and string.len(subj) > 150 then
|
2016-11-15 13:49:32 +01:00
|
|
|
fun.each(function(v) table.insert(spec_st,v) end, st_longsubj)
|
2015-08-07 15:09:14 +02:00
|
|
|
end
|
|
|
|
end
|
2016-11-18 19:06:22 +01:00
|
|
|
|
2016-09-01 10:22:50 +02:00
|
|
|
if #spec_st > 1 then
|
2015-08-07 15:09:14 +02:00
|
|
|
return spec_st
|
|
|
|
else
|
|
|
|
return nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-11-14 14:54:21 +01:00
|
|
|
classifiers['bayes'] = function(classifier, task, is_learn)
|
2015-08-07 15:09:14 +02:00
|
|
|
-- Subfunction for detection of message's language
|
2016-11-13 12:13:47 +01:00
|
|
|
local detect_language = function()
|
2015-08-07 15:09:14 +02:00
|
|
|
local parts = task:get_text_parts()
|
|
|
|
for _,p in ipairs(parts) do
|
|
|
|
local l = p:get_language()
|
|
|
|
if l then
|
|
|
|
return l
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|
|
|
|
-- Main procedure
|
|
|
|
local selected = {}
|
|
|
|
local spec_st = get_specific_statfiles(classifier, task)
|
|
|
|
if spec_st then
|
|
|
|
if is_learn then
|
|
|
|
return spec_st
|
|
|
|
else
|
|
|
|
-- Merge tables
|
2016-11-15 13:49:32 +01:00
|
|
|
fun.each(function(v) table.insert(selected,v) end, spec_st)
|
2015-08-07 15:09:14 +02:00
|
|
|
end
|
|
|
|
end
|
|
|
|
-- Detect statfile by language
|
2016-11-13 12:13:47 +01:00
|
|
|
local language = detect_language()
|
2015-08-07 15:09:14 +02:00
|
|
|
if language then
|
|
|
|
-- Find statfiles with specified language
|
|
|
|
for _,st in ipairs(classifier:get_statfiles()) do
|
|
|
|
-- Skip labeled statfiles
|
|
|
|
if not st:get_label() then
|
|
|
|
local st_l = st:get_param('language')
|
|
|
|
if st_l and st_l == language then
|
2016-11-18 19:06:22 +01:00
|
|
|
-- Insert statfile with specified language
|
2015-08-07 15:09:14 +02:00
|
|
|
table.insert(selected, st)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2016-09-01 10:22:50 +02:00
|
|
|
if #selected > 1 then
|
2015-08-07 15:09:14 +02:00
|
|
|
return selected
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
-- Language not detected or specific language statfiles have not been found
|
|
|
|
for _,st in ipairs(classifier:get_statfiles()) do
|
|
|
|
-- Skip labeled statfiles
|
|
|
|
if not st:get_label() then
|
|
|
|
local st_l = st:get_param('language')
|
|
|
|
-- Insert only statfiles without language
|
|
|
|
if not st_l then
|
|
|
|
table.insert(selected, st)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2016-09-01 10:22:50 +02:00
|
|
|
if #selected > 1 then
|
2015-08-07 15:09:14 +02:00
|
|
|
return selected
|
|
|
|
end
|
2016-11-18 19:06:22 +01:00
|
|
|
|
2015-08-07 15:09:14 +02:00
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|