diff options
author | Andrew Lewis <nerf@judo.za.org> | 2020-12-17 11:28:09 +0200 |
---|---|---|
committer | Andrew Lewis <nerf@judo.za.org> | 2020-12-17 11:28:09 +0200 |
commit | 960b608d352e8c820b0725d898d78959ca59ee7d (patch) | |
tree | 9d9f192e1c3161a804e94e1aed1c0a63b77929c0 /test | |
parent | 5ce6a2d97ff655651e4bba7737b834d866b94c94 (diff) | |
download | rspamd-960b608d352e8c820b0725d898d78959ca59ee7d.tar.gz rspamd-960b608d352e8c820b0725d898d78959ca59ee7d.zip |
[Feature] Add controller endpoint for training neural
- Move neural functions to library
- Parameterise spawn_train
- neural plugin: Fix store_pool_only when autotrain is true
- neural plugin: Use cache_set instead of mempool
- Add test
Diffstat (limited to 'test')
-rw-r--r-- | test/functional/cases/330_neural/001_autotrain.robot (renamed from test/functional/cases/330_neural.robot) | 0 | ||||
-rw-r--r-- | test/functional/cases/330_neural/002_manualtrain.robot | 75 | ||||
-rw-r--r-- | test/functional/configs/neural_noauto.conf | 85 | ||||
-rw-r--r-- | test/functional/lib/rspamd.robot | 4 | ||||
-rw-r--r-- | test/functional/lua/neural.lua | 39 | ||||
-rw-r--r-- | test/functional/util/nn_unpack.lua | 16 |
6 files changed, 217 insertions, 2 deletions
diff --git a/test/functional/cases/330_neural.robot b/test/functional/cases/330_neural/001_autotrain.robot index 8ce342838..8ce342838 100644 --- a/test/functional/cases/330_neural.robot +++ b/test/functional/cases/330_neural/001_autotrain.robot diff --git a/test/functional/cases/330_neural/002_manualtrain.robot b/test/functional/cases/330_neural/002_manualtrain.robot new file mode 100644 index 000000000..22a42120e --- /dev/null +++ b/test/functional/cases/330_neural/002_manualtrain.robot @@ -0,0 +1,75 @@ +*** Settings *** +Suite Setup Neural Setup +Suite Teardown Neural Teardown +Library Process +Library ${TESTDIR}/lib/rspamd.py +Resource ${TESTDIR}/lib/rspamd.robot +Variables ${TESTDIR}/lib/vars.py + +*** Variables *** +${URL_TLD} ${TESTDIR}/../lua/unit/test_tld.dat +${CONFIG} ${TESTDIR}/configs/neural_noauto.conf +${MESSAGE} ${TESTDIR}/messages/spam_message.eml +${REDIS_SCOPE} Suite +${RSPAMD_SCOPE} Suite + +*** Test Cases *** +Collect training vectors & train manually + Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL","SAVE_NN_ROW"]} + Expect Symbol SPAM_SYMBOL + # Save neural inputs for later + ${SPAM_ROW} = Get File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0] + Remove File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0] + Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL","SAVE_NN_ROW"]} + Expect Symbol HAM_SYMBOL + # Save neural inputs for later + ${HAM_ROW} = Get File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0] + Remove File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0] + ${HAM_ROW} = Run ${RSPAMADM} lua -a ${HAM_ROW} ${TESTDIR}/util/nn_unpack.lua + ${HAM_ROW} = Evaluate json.loads("${HAM_ROW}") + ${SPAM_ROW} = Run ${RSPAMADM} lua -a ${SPAM_ROW} ${TESTDIR}/util/nn_unpack.lua + ${SPAM_ROW} = Evaluate json.loads("${SPAM_ROW}") + ${HAM_VEC} = Evaluate [${HAM_ROW}] * 10 + ${SPAM_VEC} = Evaluate [${SPAM_ROW}] * 10 + ${json1} = Evaluate json.dumps({"spam_vec": ${SPAM_VEC}, "ham_vec": ${HAM_VEC}, "rule": "SHORT"}) + # Save variables for use in inverse training + Set Suite Variable ${HAM_VEC} + Set Suite Variable ${SPAM_VEC} + HTTP POST ${LOCAL_ADDR} ${PORT_CONTROLLER} /plugins/neural/learn ${json1} + Sleep 2s Wait for neural to be loaded + +Check Neural HAM + Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Do Not Expect Symbol NEURAL_SPAM_SHORT + Expect Symbol NEURAL_HAM_SHORT + +Check Neural SPAM + Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Do Not Expect Symbol NEURAL_HAM_SHORT + Expect Symbol NEURAL_SPAM_SHORT + +Train inverse + ${json2} = Evaluate json.dumps({"spam_vec": ${HAM_VEC}, "ham_vec": ${SPAM_VEC}, "rule": "SHORT"}) + HTTP POST ${LOCAL_ADDR} ${PORT_CONTROLLER} /plugins/neural/learn ${json2} + Sleep 2s Wait for neural to be loaded + +Check Neural HAM - inverse + Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Do Not Expect Symbol NEURAL_HAM_SHORT + Expect Symbol NEURAL_SPAM_SHORT + +Check Neural SPAM - inverse + Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]} + Do Not Expect Symbol NEURAL_SPAM_SHORT + Expect Symbol NEURAL_HAM_SHORT + +*** Keywords *** +Neural Setup + ${TMPDIR} = Make Temporary Directory + Set Suite Variable ${TMPDIR} + Run Redis + Generic Setup + +Neural Teardown + Shutdown Process With Children ${REDIS_PID} + Normal Teardown diff --git a/test/functional/configs/neural_noauto.conf b/test/functional/configs/neural_noauto.conf new file mode 100644 index 000000000..55f0a4283 --- /dev/null +++ b/test/functional/configs/neural_noauto.conf @@ -0,0 +1,85 @@ +options = { + url_tld = "${URL_TLD}" + pidfile = "${TMPDIR}/rspamd.pid" + lua_path = "${INSTALLROOT}/share/rspamd/lib/?.lua" + filters = []; + explicit_modules = ["settings"]; +} + +logging = { + type = "file", + level = "debug" + filename = "${TMPDIR}/rspamd.log" + log_usec = true; +} +metric = { + name = "default", + actions = { + reject = 100500, + add_header = 50500, + } + unknown_weight = 1 +} +worker { + type = normal + bind_socket = ${LOCAL_ADDR}:${PORT_NORMAL} + count = 1 + task_timeout = 10s; +} +worker { + type = controller + bind_socket = ${LOCAL_ADDR}:${PORT_CONTROLLER} + count = 1 + secure_ip = ["127.0.0.1", "::1"]; + stats_path = "${TMPDIR}/stats.ucl" +} + +modules { + path = "${TESTDIR}/../../src/plugins/lua/" +} + +lua = "${TESTDIR}/lua/test_coverage.lua"; + +neural { + rules { + SHORT { + train { + learning_rate = 0.001; + max_usages = 2; + spam_score = 1; + ham_score = -1; + max_trains = 10; + max_iterations = 250; + store_pool_only = true; + } + symbol_spam = "NEURAL_SPAM_SHORT"; + symbol_ham = "NEURAL_HAM_SHORT"; + ann_expire = 86400; + watch_interval = 0.5; + } + SHORT_PCA { + train { + learning_rate = 0.001; + max_usages = 2; + spam_score = 1; + ham_score = -1; + max_trains = 10; + max_iterations = 250; + store_pool_only = true; + } + symbol_spam = "NEURAL_SPAM_SHORT_PCA"; + symbol_ham = "NEURAL_HAM_SHORT_PCA"; + ann_expire = 86400; + watch_interval = 0.5; + max_inputs = 2; + } + } + allow_local = true; + +} +redis { + servers = "${REDIS_ADDR}:${REDIS_PORT}"; + expand_keys = true; +} + +lua = "${TESTDIR}/lua/neural.lua"; diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot index 53d4e70f9..0b6cc6f38 100644 --- a/test/functional/lib/rspamd.robot +++ b/test/functional/lib/rspamd.robot @@ -209,6 +209,7 @@ Run Rspamd ... ELSE Make Temporary Directory Set Directory Ownership ${tmpdir} ${RSPAMD_USER} ${RSPAMD_GROUP} ${template} = Get File ${CONFIG} + # TODO: stop using this; we have Lupa now FOR ${i} IN @{vargs} ${newvalue} = Replace Variables ${${i}} Set To Dictionary ${d} ${i}=${newvalue} @@ -218,7 +219,8 @@ Run Rspamd Log ${config} Create File ${tmpdir}/rspamd.conf ${config} ${result} = Run Process ${RSPAMD} -u ${RSPAMD_USER} -g ${RSPAMD_GROUP} - ... -c ${tmpdir}/rspamd.conf env:TMPDIR=${tmpdir} env:DBDIR=${tmpdir} env:LD_LIBRARY_PATH=${TESTDIR}/../../contrib/aho-corasick stdout=DEVNULL stderr=DEVNULL + ... -c ${tmpdir}/rspamd.conf env:TMPDIR=${tmpdir} env:DBDIR=${tmpdir} env:LD_LIBRARY_PATH=${TESTDIR}/../../contrib/aho-corasick + ... env:RSPAMD_INSTALLROOT=${INSTALLROOT} stdout=DEVNULL stderr=DEVNULL Run Keyword If ${result.rc} != 0 Log ${result.stderr} Should Be Equal As Integers ${result.rc} 0 Wait Until Keyword Succeeds 10x 1 sec Check Pidfile ${tmpdir}/rspamd.pid timeout=0.5s diff --git a/test/functional/lua/neural.lua b/test/functional/lua/neural.lua index 70857d429..ccdad1b68 100644 --- a/test/functional/lua/neural.lua +++ b/test/functional/lua/neural.lua @@ -1,3 +1,5 @@ +local logger = require "rspamd_logger" + rspamd_config:register_symbol({ name = 'SPAM_SYMBOL', score = 5.0, @@ -21,4 +23,39 @@ rspamd_config:register_symbol({ callback = function() return true, 'Fires always' end -})
\ No newline at end of file +}) + +rspamd_config.SAVE_NN_ROW = { + callback = function(task) + local fname = os.tmpname() + task:cache_set('nn_row_tmpfile', fname) + return true, 1.0, fname + end +} + +rspamd_config.SAVE_NN_ROW_IDEMPOTENT = { + callback = function(task) + local function tohex(str) + return (str:gsub('.', function (c) + return string.format('%02X', string.byte(c)) + end)) + end + local fname = task:cache_get('nn_row_tmpfile') + if not fname then + return + end + local f, err = io.open(fname, 'w') + if not f then + logger.errx(task, err) + return + end + f:write(tohex(task:cache_get('neural_vec_mpack') or '')) + f:close() + return + end, + type = 'idempotent', + flags = 'explicit_disable', + priority = 10, +} + +dofile(rspamd_env.INSTALLROOT .. "/share/rspamd/rules/controller/init.lua") diff --git a/test/functional/util/nn_unpack.lua b/test/functional/util/nn_unpack.lua new file mode 100644 index 000000000..fee98d5a0 --- /dev/null +++ b/test/functional/util/nn_unpack.lua @@ -0,0 +1,16 @@ +local ucl = require "ucl" + +local function unhex(str) + return (str:gsub('..', function (cc) + return string.char(tonumber(cc, 16)) + end)) +end + +local parser = ucl.parser() +local ok, err = parser:parse_string(unhex(arg[1]), 'msgpack') +if not ok then + io.stderr:write(err) + os.exit(1) +end + +print(ucl.to_format(parser:get_object(), 'json-compact')) |