diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-06 12:41:20 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-06 12:41:20 +0100 |
commit | 933c82f6ed2f4450d3c0cfad7d35a9750918b74e (patch) | |
tree | 9024f2ac0d015365fe87e5290f088ee35741ea0f | |
parent | cb40d91448fee3cd8069a155e42effd39c8b0f92 (diff) | |
download | rspamd-933c82f6ed2f4450d3c0cfad7d35a9750918b74e.tar.gz rspamd-933c82f6ed2f4450d3c0cfad7d35a9750918b74e.zip |
[Project] Neural: Implement scoring
-rw-r--r-- | src/plugins/lua/neural.lua | 169 |
1 files changed, 83 insertions, 86 deletions
diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua index cca6f647c..fdb138321 100644 --- a/src/plugins/lua/neural.lua +++ b/src/plugins/lua/neural.lua @@ -50,6 +50,26 @@ local default_options = { symbol_ham = 'NEURAL_HAM', } +-- Rule structure: +-- * static config fields (see `default_options`) +-- * prefix - name or defined prefix +-- * settings - table of settings indexed by settings id, -1 is used when no settings defined + +-- Rule settings element defines elements for specific settings id: +-- * symbols - static symbols profile (defined by config or extracted from symcache) +-- * name - name of settings id +-- * digest - digest of all symbols +-- * ann - dynamic ANN configuration loaded from Redis +-- * train - train data for ANN (e.g. the currently trained ANN) + +-- Settings ANN table is loaded from Redis and represents dynamic profile for ANN +-- Some elements are directly stored in Redis, ANN is, in turn loaded dynamically +-- * version - version of ANN loaded from redis +-- * ann_key - name of ANN key in Redis +-- * symbols - symbols in THIS PARTICULAR ANN (might be different from set.symbols) +-- * distance - distance between set.symbols and set.ann.symbols +-- * ann - kann object + local settings = { rules = {}, prefix = 'rn', -- Neural network default prefix @@ -212,30 +232,75 @@ local function load_scripts(params) params) end +local function result_to_vector(task, profile) + if not profile.zeros then + -- Fill zeros vector + local zeros = {} + for i=1,meta_functions.count_metatokens() do + zeros[i] = 0.0 + end + for _,_ in ipairs(profile.symbols) do + zeros[#zeros + 1] = 0.0 + end + profile.zeros = zeros + end + + local vec = lua_util.shallowcopy(profile.zeros) + local mt = meta_functions.rspamd_gen_metatokens(task) + + for i,v in ipairs(mt) do + vec[i] = v + end + + task:process_ann_tokens(profile.symbols, vec, #mt) + + return vec +end local function ann_scores_filter(task) for _,rule in pairs(settings.rules) do - local id = '0' - if rule.use_settings then - local sid = task:get_settings_id() - if sid then - id = tostring(sid) - end - end - if rule.per_user then - local r = task:get_principal_recipient() - id = id .. r + local sid = task:get_settings_id() + local ann + local profile + + if sid then + if rule.settings[sid] then + local set = rule.settings[sid] + + if set.ann then + ann = set.ann.ann + profile = set.ann + else + lua_util.debugm(N, task, 'no ann loaded for %s:%s', + rule.prefix, set.name) + end + else + lua_util.debugm(N, task, 'no ann defined in %s for settings id %s', + rule.prefix, sid) + end + else + if rule.settings[-1] then + local set = rule.settings[-1] + + if set.ann then + ann = set.ann.ann + profile = set.ann + else + lua_util.debugm(N, task, 'no ann loaded for %s:%s', + rule.prefix, set.name) + end + else + lua_util.debugm(N, task, 'no default ann for rule %s', + rule.prefix) + end end - if rule.anns[id] and rule.anns[id].ann then - local ann_data = task:get_symbols_tokens() - local mt = meta_functions.rspamd_gen_metatokens(task) - -- Add filtered meta tokens - fun.each(function(e) table.insert(ann_data, e) end, mt) + if ann then + local vec = result_to_vector(task, profile) local score - local out = rule.anns[id].ann:apply1(ann_data) + local out = ann:apply1(vec) score = out[1] local symscore = string.format('%.3f', score) @@ -262,76 +327,6 @@ local function create_ann(n, nlayers) return rspamd_kann.new.kann(t) end -local function create_train_ann(rule, n, id) - local prefix = gen_ann_prefix(rule, id) - if not rule.anns[id] then - rule.anns[id] = {} - end - -- Fix that for flexibe layers number - if rule.anns[id].ann then - if not is_ann_valid(rule, prefix, rule.anns[id].ann) then - rule.anns[id].ann_train = create_ann(n, rule.nlayers) - rule.anns[id].ann = nil - rspamd_logger.infox(rspamd_config, 'invalidate existing ANN, create train ANN %s', prefix) - elseif rule.train.max_usages > 0 and - rule.anns[id].version % rule.train.max_usages == 0 then - -- Forget last ann - rspamd_logger.infox(rspamd_config, 'recreate ANN %s, version %s', prefix, - rule.anns[id].version) - rule.anns[id].ann_train = create_ann(n, rule.nlayers) - else - rule.anns[id].ann_train = rule.anns[id].ann - rspamd_logger.infox(rspamd_config, 'reuse ANN for training %s', prefix) - end - else - rule.anns[id].ann_train = create_ann(n, rule.nlayers) - rspamd_logger.infox(rspamd_config, 'create train ANN %s', prefix) - rule.anns[id].version = 0 - end -end - -local function load_or_invalidate_ann(rule, data, id, ev_base) - local ver = data[2] - local prefix = gen_ann_prefix(rule, id) - - if not ver or not tonumber(ver) then - rspamd_logger.errx(rspamd_config, 'cannot get version for ANN: %s', prefix) - return - end - - local err,ann_data = rspamd_util.zstd_decompress(data[1]) - local ann - - if err or not ann_data then - rspamd_logger.errx(rspamd_config, 'cannot decompress ANN %s: %s', prefix, err) - return - else - ann = rspamd_kann.load(ann_data) - end - - if is_ann_valid(rule, prefix, ann) then - if not rule.anns[id] then rule.anns[id] = {} end - rule.anns[id].ann = ann - rspamd_logger.infox(rspamd_config, 'loaded ANN %s version %s from redis', - prefix, ver) - rule.anns[id].version = tonumber(ver) - else - local function redis_invalidate_cb(_err, _data) - if _err then - rspamd_logger.errx(rspamd_config, 'cannot invalidate ANN %s from redis: %s', prefix, _err) - elseif type(_data) == 'string' then - rspamd_logger.infox(rspamd_config, 'invalidated ANN %s from redis: %s', prefix, _err) - rule.anns[id].version = 0 - end - end - -- Invalidate ANN - rspamd_logger.infox(rspamd_config, 'invalidate ANN %s', prefix) - lua_redis.exec_redis_script(redis_maybe_invalidate_id, - {ev_base = ev_base, is_write = true}, - redis_invalidate_cb, - {prefix}) - end -end local function ann_train_callback(rule, task, score, required_score, id) local train_opts = rule['train'] @@ -901,6 +896,7 @@ local function cleanup_anns(rule, cfg, ev_base) end end end + lua_redis.exec_redis_script(redis_maybe_invalidate_id, {ev_base = ev_base, is_write = true}, invalidate_cb, @@ -1095,6 +1091,7 @@ for k,rule in pairs(settings.rules) do -- We also want to train neural nets when they have enough data rspamd_config:add_periodic(ev_base, 0.0, function(_, _) + -- Clean old ANNs cleanup_anns(rule, cfg, ev_base) return maybe_train_anns(rule, cfg, ev_base, worker) end) |