summaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorAndrew Lewis <nerf@judo.za.org>2020-12-17 11:28:09 +0200
committerAndrew Lewis <nerf@judo.za.org>2020-12-17 11:28:09 +0200
commit960b608d352e8c820b0725d898d78959ca59ee7d (patch)
tree9d9f192e1c3161a804e94e1aed1c0a63b77929c0 /test
parent5ce6a2d97ff655651e4bba7737b834d866b94c94 (diff)
downloadrspamd-960b608d352e8c820b0725d898d78959ca59ee7d.tar.gz
rspamd-960b608d352e8c820b0725d898d78959ca59ee7d.zip
[Feature] Add controller endpoint for training neural
- Move neural functions to library - Parameterise spawn_train - neural plugin: Fix store_pool_only when autotrain is true - neural plugin: Use cache_set instead of mempool - Add test
Diffstat (limited to 'test')
-rw-r--r--test/functional/cases/330_neural/001_autotrain.robot (renamed from test/functional/cases/330_neural.robot)0
-rw-r--r--test/functional/cases/330_neural/002_manualtrain.robot75
-rw-r--r--test/functional/configs/neural_noauto.conf85
-rw-r--r--test/functional/lib/rspamd.robot4
-rw-r--r--test/functional/lua/neural.lua39
-rw-r--r--test/functional/util/nn_unpack.lua16
6 files changed, 217 insertions, 2 deletions
diff --git a/test/functional/cases/330_neural.robot b/test/functional/cases/330_neural/001_autotrain.robot
index 8ce342838..8ce342838 100644
--- a/test/functional/cases/330_neural.robot
+++ b/test/functional/cases/330_neural/001_autotrain.robot
diff --git a/test/functional/cases/330_neural/002_manualtrain.robot b/test/functional/cases/330_neural/002_manualtrain.robot
new file mode 100644
index 000000000..22a42120e
--- /dev/null
+++ b/test/functional/cases/330_neural/002_manualtrain.robot
@@ -0,0 +1,75 @@
+*** Settings ***
+Suite Setup Neural Setup
+Suite Teardown Neural Teardown
+Library Process
+Library ${TESTDIR}/lib/rspamd.py
+Resource ${TESTDIR}/lib/rspamd.robot
+Variables ${TESTDIR}/lib/vars.py
+
+*** Variables ***
+${URL_TLD} ${TESTDIR}/../lua/unit/test_tld.dat
+${CONFIG} ${TESTDIR}/configs/neural_noauto.conf
+${MESSAGE} ${TESTDIR}/messages/spam_message.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_SCOPE} Suite
+
+*** Test Cases ***
+Collect training vectors & train manually
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL","SAVE_NN_ROW"]}
+ Expect Symbol SPAM_SYMBOL
+ # Save neural inputs for later
+ ${SPAM_ROW} = Get File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0]
+ Remove File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0]
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL","SAVE_NN_ROW"]}
+ Expect Symbol HAM_SYMBOL
+ # Save neural inputs for later
+ ${HAM_ROW} = Get File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0]
+ Remove File ${SCAN_RESULT}[symbols][SAVE_NN_ROW][options][0]
+ ${HAM_ROW} = Run ${RSPAMADM} lua -a ${HAM_ROW} ${TESTDIR}/util/nn_unpack.lua
+ ${HAM_ROW} = Evaluate json.loads("${HAM_ROW}")
+ ${SPAM_ROW} = Run ${RSPAMADM} lua -a ${SPAM_ROW} ${TESTDIR}/util/nn_unpack.lua
+ ${SPAM_ROW} = Evaluate json.loads("${SPAM_ROW}")
+ ${HAM_VEC} = Evaluate [${HAM_ROW}] * 10
+ ${SPAM_VEC} = Evaluate [${SPAM_ROW}] * 10
+ ${json1} = Evaluate json.dumps({"spam_vec": ${SPAM_VEC}, "ham_vec": ${HAM_VEC}, "rule": "SHORT"})
+ # Save variables for use in inverse training
+ Set Suite Variable ${HAM_VEC}
+ Set Suite Variable ${SPAM_VEC}
+ HTTP POST ${LOCAL_ADDR} ${PORT_CONTROLLER} /plugins/neural/learn ${json1}
+ Sleep 2s Wait for neural to be loaded
+
+Check Neural HAM
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_SPAM_SHORT
+ Expect Symbol NEURAL_HAM_SHORT
+
+Check Neural SPAM
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_HAM_SHORT
+ Expect Symbol NEURAL_SPAM_SHORT
+
+Train inverse
+ ${json2} = Evaluate json.dumps({"spam_vec": ${HAM_VEC}, "ham_vec": ${SPAM_VEC}, "rule": "SHORT"})
+ HTTP POST ${LOCAL_ADDR} ${PORT_CONTROLLER} /plugins/neural/learn ${json2}
+ Sleep 2s Wait for neural to be loaded
+
+Check Neural HAM - inverse
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["HAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_HAM_SHORT
+ Expect Symbol NEURAL_SPAM_SHORT
+
+Check Neural SPAM - inverse
+ Scan File ${MESSAGE} Settings={symbols_enabled = ["SPAM_SYMBOL"];groups_enabled=["neural"];symbols_disabled = ["NEURAL_LEARN"]}
+ Do Not Expect Symbol NEURAL_SPAM_SHORT
+ Expect Symbol NEURAL_HAM_SHORT
+
+*** Keywords ***
+Neural Setup
+ ${TMPDIR} = Make Temporary Directory
+ Set Suite Variable ${TMPDIR}
+ Run Redis
+ Generic Setup
+
+Neural Teardown
+ Shutdown Process With Children ${REDIS_PID}
+ Normal Teardown
diff --git a/test/functional/configs/neural_noauto.conf b/test/functional/configs/neural_noauto.conf
new file mode 100644
index 000000000..55f0a4283
--- /dev/null
+++ b/test/functional/configs/neural_noauto.conf
@@ -0,0 +1,85 @@
+options = {
+ url_tld = "${URL_TLD}"
+ pidfile = "${TMPDIR}/rspamd.pid"
+ lua_path = "${INSTALLROOT}/share/rspamd/lib/?.lua"
+ filters = [];
+ explicit_modules = ["settings"];
+}
+
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "${TMPDIR}/rspamd.log"
+ log_usec = true;
+}
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ add_header = 50500,
+ }
+ unknown_weight = 1
+}
+worker {
+ type = normal
+ bind_socket = ${LOCAL_ADDR}:${PORT_NORMAL}
+ count = 1
+ task_timeout = 10s;
+}
+worker {
+ type = controller
+ bind_socket = ${LOCAL_ADDR}:${PORT_CONTROLLER}
+ count = 1
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "${TMPDIR}/stats.ucl"
+}
+
+modules {
+ path = "${TESTDIR}/../../src/plugins/lua/"
+}
+
+lua = "${TESTDIR}/lua/test_coverage.lua";
+
+neural {
+ rules {
+ SHORT {
+ train {
+ learning_rate = 0.001;
+ max_usages = 2;
+ spam_score = 1;
+ ham_score = -1;
+ max_trains = 10;
+ max_iterations = 250;
+ store_pool_only = true;
+ }
+ symbol_spam = "NEURAL_SPAM_SHORT";
+ symbol_ham = "NEURAL_HAM_SHORT";
+ ann_expire = 86400;
+ watch_interval = 0.5;
+ }
+ SHORT_PCA {
+ train {
+ learning_rate = 0.001;
+ max_usages = 2;
+ spam_score = 1;
+ ham_score = -1;
+ max_trains = 10;
+ max_iterations = 250;
+ store_pool_only = true;
+ }
+ symbol_spam = "NEURAL_SPAM_SHORT_PCA";
+ symbol_ham = "NEURAL_HAM_SHORT_PCA";
+ ann_expire = 86400;
+ watch_interval = 0.5;
+ max_inputs = 2;
+ }
+ }
+ allow_local = true;
+
+}
+redis {
+ servers = "${REDIS_ADDR}:${REDIS_PORT}";
+ expand_keys = true;
+}
+
+lua = "${TESTDIR}/lua/neural.lua";
diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot
index 53d4e70f9..0b6cc6f38 100644
--- a/test/functional/lib/rspamd.robot
+++ b/test/functional/lib/rspamd.robot
@@ -209,6 +209,7 @@ Run Rspamd
... ELSE Make Temporary Directory
Set Directory Ownership ${tmpdir} ${RSPAMD_USER} ${RSPAMD_GROUP}
${template} = Get File ${CONFIG}
+ # TODO: stop using this; we have Lupa now
FOR ${i} IN @{vargs}
${newvalue} = Replace Variables ${${i}}
Set To Dictionary ${d} ${i}=${newvalue}
@@ -218,7 +219,8 @@ Run Rspamd
Log ${config}
Create File ${tmpdir}/rspamd.conf ${config}
${result} = Run Process ${RSPAMD} -u ${RSPAMD_USER} -g ${RSPAMD_GROUP}
- ... -c ${tmpdir}/rspamd.conf env:TMPDIR=${tmpdir} env:DBDIR=${tmpdir} env:LD_LIBRARY_PATH=${TESTDIR}/../../contrib/aho-corasick stdout=DEVNULL stderr=DEVNULL
+ ... -c ${tmpdir}/rspamd.conf env:TMPDIR=${tmpdir} env:DBDIR=${tmpdir} env:LD_LIBRARY_PATH=${TESTDIR}/../../contrib/aho-corasick
+ ... env:RSPAMD_INSTALLROOT=${INSTALLROOT} stdout=DEVNULL stderr=DEVNULL
Run Keyword If ${result.rc} != 0 Log ${result.stderr}
Should Be Equal As Integers ${result.rc} 0
Wait Until Keyword Succeeds 10x 1 sec Check Pidfile ${tmpdir}/rspamd.pid timeout=0.5s
diff --git a/test/functional/lua/neural.lua b/test/functional/lua/neural.lua
index 70857d429..ccdad1b68 100644
--- a/test/functional/lua/neural.lua
+++ b/test/functional/lua/neural.lua
@@ -1,3 +1,5 @@
+local logger = require "rspamd_logger"
+
rspamd_config:register_symbol({
name = 'SPAM_SYMBOL',
score = 5.0,
@@ -21,4 +23,39 @@ rspamd_config:register_symbol({
callback = function()
return true, 'Fires always'
end
-}) \ No newline at end of file
+})
+
+rspamd_config.SAVE_NN_ROW = {
+ callback = function(task)
+ local fname = os.tmpname()
+ task:cache_set('nn_row_tmpfile', fname)
+ return true, 1.0, fname
+ end
+}
+
+rspamd_config.SAVE_NN_ROW_IDEMPOTENT = {
+ callback = function(task)
+ local function tohex(str)
+ return (str:gsub('.', function (c)
+ return string.format('%02X', string.byte(c))
+ end))
+ end
+ local fname = task:cache_get('nn_row_tmpfile')
+ if not fname then
+ return
+ end
+ local f, err = io.open(fname, 'w')
+ if not f then
+ logger.errx(task, err)
+ return
+ end
+ f:write(tohex(task:cache_get('neural_vec_mpack') or ''))
+ f:close()
+ return
+ end,
+ type = 'idempotent',
+ flags = 'explicit_disable',
+ priority = 10,
+}
+
+dofile(rspamd_env.INSTALLROOT .. "/share/rspamd/rules/controller/init.lua")
diff --git a/test/functional/util/nn_unpack.lua b/test/functional/util/nn_unpack.lua
new file mode 100644
index 000000000..fee98d5a0
--- /dev/null
+++ b/test/functional/util/nn_unpack.lua
@@ -0,0 +1,16 @@
+local ucl = require "ucl"
+
+local function unhex(str)
+ return (str:gsub('..', function (cc)
+ return string.char(tonumber(cc, 16))
+ end))
+end
+
+local parser = ucl.parser()
+local ok, err = parser:parse_string(unhex(arg[1]), 'msgpack')
+if not ok then
+ io.stderr:write(err)
+ os.exit(1)
+end
+
+print(ucl.to_format(parser:get_object(), 'json-compact'))