aboutsummaryrefslogtreecommitdiffstats
path: root/test/functional
diff options
context:
space:
mode:
Diffstat (limited to 'test/functional')
-rw-r--r--test/functional/cases/110_statistics/300-multiclass-redis.robot42
-rw-r--r--test/functional/cases/110_statistics/320-multiclass-peruser.robot31
-rw-r--r--test/functional/cases/110_statistics/multiclass_lib.robot169
-rw-r--r--test/functional/configs/multiclass_bayes.conf129
-rw-r--r--test/functional/lib/rspamd.robot17
-rw-r--r--test/functional/messages/newsletter.eml50
-rw-r--r--test/functional/messages/transactional.eml18
7 files changed, 454 insertions, 2 deletions
diff --git a/test/functional/cases/110_statistics/300-multiclass-redis.robot b/test/functional/cases/110_statistics/300-multiclass-redis.robot
new file mode 100644
index 000000000..278f7e0a0
--- /dev/null
+++ b/test/functional/cases/110_statistics/300-multiclass-redis.robot
@@ -0,0 +1,42 @@
+*** Settings ***
+Documentation Multiclass Bayes Classification Tests with Redis Backend
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Test Setup Set Test Hash Documentation
+Resource multiclass_lib.robot
+
+*** Variables ***
+${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
+${RSPAMD_STATS_HASH} siphash
+${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
+
+*** Test Cases ***
+Multiclass Basic Learning and Classification
+ [Documentation] Test basic multiclass learning and classification
+ [Tags] multiclass basic learning
+ Multiclass Basic Learn Test
+
+Multiclass Legacy Compatibility
+ [Documentation] Test that old learn_spam/learn_ham commands still work
+ [Tags] multiclass compatibility legacy
+ Multiclass Legacy Compatibility Test
+
+Multiclass Relearn
+ [Documentation] Test reclassifying messages to different classes
+ [Tags] multiclass relearn
+ Multiclass Relearn Test
+
+Multiclass Cross-Class Learning
+ [Documentation] Test learning message as different class than expected
+ [Tags] multiclass cross-learn
+ Multiclass Cross-Learn Test
+
+Multiclass Unlearn
+ [Documentation] Test unlearning (learning message as different class)
+ [Tags] multiclass unlearn
+ Multiclass Unlearn Test
+
+Multiclass Statistics
+ [Documentation] Test that statistics show all class information
+ [Tags] multiclass statistics
+ Multiclass Stats Test \ No newline at end of file
diff --git a/test/functional/cases/110_statistics/320-multiclass-peruser.robot b/test/functional/cases/110_statistics/320-multiclass-peruser.robot
new file mode 100644
index 000000000..e8ca34616
--- /dev/null
+++ b/test/functional/cases/110_statistics/320-multiclass-peruser.robot
@@ -0,0 +1,31 @@
+*** Settings ***
+Suite Setup Rspamd Redis Setup
+Suite Teardown Rspamd Redis Teardown
+Test Setup Set Test Hash Documentation
+Resource multiclass_lib.robot
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
+${REDIS_SCOPE} Suite
+${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
+${RSPAMD_SCOPE} Suite
+${RSPAMD_STATS_BACKEND} redis
+${RSPAMD_STATS_HASH} null
+${RSPAMD_STATS_KEY} null
+${RSPAMD_STATS_PER_USER} true
+
+*** Test Cases ***
+Multiclass Per-User Basic Learn Test
+ Multiclass Basic Learn Test test@example.com
+
+Multiclass Per-User Legacy Compatibility Test
+ Multiclass Legacy Compatibility Test test@example.com
+
+Multiclass Per-User Relearn Test
+ Multiclass Relearn Test test@example.com
+
+Multiclass Per-User Cross-Learn Test
+ Multiclass Cross-Learn Test test@example.com
+
+Multiclass Per-User Unlearn Test
+ Multiclass Unlearn Test test@example.com \ No newline at end of file
diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot
new file mode 100644
index 000000000..9f70e05fb
--- /dev/null
+++ b/test/functional/cases/110_statistics/multiclass_lib.robot
@@ -0,0 +1,169 @@
+*** Settings ***
+Library OperatingSystem
+Resource lib.robot
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
+${MESSAGE_HAM} ${RSPAMD_TESTDIR}/messages/ham.eml
+${MESSAGE_SPAM} ${RSPAMD_TESTDIR}/messages/spam_message.eml
+${MESSAGE_NEWSLETTER} ${RSPAMD_TESTDIR}/messages/newsletter.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_REDIS_SERVER} null
+${RSPAMD_SCOPE} Suite
+${RSPAMD_STATS_BACKEND} redis
+${RSPAMD_STATS_HASH} null
+${RSPAMD_STATS_KEY} null
+${RSPAMD_STATS_PER_USER} ${EMPTY}
+
+*** Keywords ***
+Learn Multiclass
+ [Arguments] ${user} ${class} ${message}
+ # Extract filename from message path for queue-id
+ ${path} ${filename} = Split Path ${message}
+ IF "${user}"
+ ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message}
+ ELSE
+ ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message}
+ END
+ Check Rspamc ${result}
+
+Learn Multiclass Legacy
+ [Arguments] ${user} ${class} ${message}
+ # Test backward compatibility with old learn_spam/learn_ham commands
+ # Extract filename from message path for queue-id
+ ${path} ${filename} = Split Path ${message}
+ IF "${user}"
+ ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message}
+ ELSE
+ ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message}
+ END
+ Check Rspamc ${result}
+
+Multiclass Basic Learn Test
+ [Arguments] ${user}=${EMPTY}
+ Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 0
+ Set Test Variable ${kwargs} &{EMPTY}
+ IF "${user}"
+ Set To Dictionary ${kwargs} Deliver-To=${user}
+ END
+
+ # Learn all classes
+ Learn Multiclass ${user} spam ${MESSAGE_SPAM}
+ Learn Multiclass ${user} ham ${MESSAGE_HAM}
+ Learn Multiclass ${user} newsletter ${MESSAGE_NEWSLETTER}
+
+ # Test classification
+ Scan File ${MESSAGE_SPAM} &{kwargs}
+ Expect Symbol BAYES_SPAM
+
+ Scan File ${MESSAGE_HAM} &{kwargs}
+ Expect Symbol BAYES_HAM
+
+ Scan File ${MESSAGE_NEWSLETTER} &{kwargs}
+ Expect Symbol BAYES_NEWSLETTER
+
+ Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 1
+
+Multiclass Legacy Compatibility Test
+ [Arguments] ${user}=${EMPTY}
+ Set Test Variable ${kwargs} &{EMPTY}
+ IF "${user}"
+ Set To Dictionary ${kwargs} Deliver-To=${user}
+ END
+
+ # Test legacy learn_spam and learn_ham commands still work
+ Learn Multiclass Legacy ${user} spam ${MESSAGE_SPAM}
+ Learn Multiclass Legacy ${user} ham ${MESSAGE_HAM}
+
+ # Should still classify correctly
+ Scan File ${MESSAGE_SPAM} &{kwargs}
+ Expect Symbol BAYES_SPAM
+
+ Scan File ${MESSAGE_HAM} &{kwargs}
+ Expect Symbol BAYES_HAM
+
+Multiclass Relearn Test
+ [Arguments] ${user}=${EMPTY}
+ IF ${RSPAMD_STATS_LEARNTEST} == 0
+ Fail "Learn test was not run"
+ END
+
+ Set Test Variable ${kwargs} &{EMPTY}
+ IF "${user}"
+ Set To Dictionary ${kwargs} Deliver-To=${user}
+ END
+
+ # Relearn spam message as ham
+ Learn Multiclass ${user} ham ${MESSAGE_SPAM}
+
+ # Should now classify as ham or at least not spam
+ Scan File ${MESSAGE_SPAM} &{kwargs}
+ ${pass} = Run Keyword And Return Status Expect Symbol BAYES_HAM
+ IF ${pass}
+ Pass Execution Successfully reclassified spam as ham
+ END
+ Do Not Expect Symbol BAYES_SPAM
+
+Multiclass Cross-Learn Test
+ [Arguments] ${user}=${EMPTY}
+ Set Test Variable ${kwargs} &{EMPTY}
+ IF "${user}"
+ Set To Dictionary ${kwargs} Deliver-To=${user}
+ END
+
+ # Learn newsletter message as ham to test cross-class learning
+ Learn Multiclass ${user} ham ${MESSAGE_NEWSLETTER}
+
+ # Should classify as ham, not newsletter (since we trained it as ham)
+ Scan File ${MESSAGE_NEWSLETTER} &{kwargs}
+ Expect Symbol BAYES_HAM
+ Do Not Expect Symbol BAYES_NEWSLETTER
+
+Multiclass Unlearn Test
+ [Arguments] ${user}=${EMPTY}
+ Set Test Variable ${kwargs} &{EMPTY}
+ IF "${user}"
+ Set To Dictionary ${kwargs} Deliver-To=${user}
+ END
+
+ # First learn spam
+ Learn Multiclass ${user} spam ${MESSAGE_SPAM}
+ Scan File ${MESSAGE_SPAM} &{kwargs}
+ Expect Symbol BAYES_SPAM
+
+ # Then unlearn spam (learn as ham)
+ Learn Multiclass ${user} ham ${MESSAGE_SPAM}
+
+ # Should no longer classify as spam
+ Scan File ${MESSAGE_SPAM} &{kwargs}
+ Do Not Expect Symbol BAYES_SPAM
+
+Check Multiclass Results
+ [Arguments] ${result} ${expected_class}
+ # Check that scan result contains expected class information
+ Should Contain ${result.stdout} BAYES_${expected_class.upper()}
+ # Check for multiclass result format [class_name]
+ Should Match Regexp ${result.stdout} BAYES_${expected_class.upper()}.*\\[${expected_class}\\]
+
+Multiclass Stats Test
+ # Check that rspamc stat shows learning counts for all classes
+ ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat
+ # Don't use Check Rspamc for stat command as it expects JSON success format
+ Should Be Equal As Integers ${result.rc} 0
+
+ # Should show statistics for all classes
+ Should Contain ${result.stdout} BAYES_SPAM
+ Should Contain ${result.stdout} BAYES_HAM
+ Should Contain ${result.stdout} BAYES_NEWSLETTER
+
+Multiclass Configuration Migration Test
+ # Test that old binary config can be automatically migrated
+ Set Test Variable ${binary_config} ${RSPAMD_TESTDIR}/configs/stats.conf
+
+ # Start with binary config
+ ${result} = Run Rspamc --config ${binary_config} stat
+ Check Rspamc ${result}
+
+ # Should show deprecation warning but work
+ Should Contain ${result.stderr} deprecated ignore_case=True
+
diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf
new file mode 100644
index 000000000..278aeeee9
--- /dev/null
+++ b/test/functional/configs/multiclass_bayes.conf
@@ -0,0 +1,129 @@
+options = {
+ filters = ["spf", "dkim", "regexp"]
+ url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat"
+ pidfile = "{= env.TMPDIR =}/rspamd.pid"
+ dns {
+ retransmits = 10;
+ timeout = 2s;
+ fake_records = [{
+ name = "example.net";
+ type = txt;
+ replies = ["v=spf1 -all"];
+ }]
+ }
+}
+
+logging = {
+ type = "file",
+ level = "debug"
+ filename = "{= env.TMPDIR =}/rspamd.log"
+}
+
+metric = {
+ name = "default",
+ actions = {
+ reject = 100500,
+ }
+ unknown_weight = 1
+}
+
+worker {
+ type = normal
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+ count = 1
+ keypair {
+ pubkey = "{= env.KEY_PUB1 =}";
+ privkey = "{= env.KEY_PVT1 =}";
+ }
+ task_timeout = 60s;
+}
+
+worker {
+ type = controller
+ bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+ count = 1
+ keypair {
+ pubkey = "{= env.KEY_PUB1 =}";
+ privkey = "{= env.KEY_PVT1 =}";
+ }
+ secure_ip = ["127.0.0.1", "::1"];
+ stats_path = "{= env.TMPDIR =}/stats.ucl";
+}
+
+# Multi-class Bayes classifier configuration
+classifier {
+ languages_enabled = true;
+ tokenizer {
+ name = "osb";
+ hash = {= env.STATS_HASH =};
+ key = {= env.STATS_KEY =};
+ }
+ backend = "{= env.STATS_BACKEND =}";
+
+ # Multi-class statfiles
+ statfile {
+ class = "spam";
+ symbol = BAYES_SPAM;
+ server = {= env.REDIS_SERVER =}
+ }
+ statfile {
+ class = "ham";
+ symbol = BAYES_HAM;
+ server = {= env.REDIS_SERVER =}
+ }
+ statfile {
+ class = "newsletter";
+ symbol = BAYES_NEWSLETTER;
+ server = {= env.REDIS_SERVER =}
+ }
+
+ # Backend class labels for Redis
+ class_labels = {
+ "spam" = "S";
+ "ham" = "H";
+ "newsletter" = "N";
+ }
+
+ cache {
+ server = {= env.REDIS_SERVER =}
+ }
+
+ # Multi-class autolearn configuration
+ autolearn = {
+ classes = {
+ spam = {
+ threshold = 15.0;
+ verdict_mapping = { spam = true };
+ };
+ ham = {
+ threshold = -5.0;
+ verdict_mapping = { ham = true };
+ };
+ newsletter = {
+ symbols = ["NEWSLETTER_HEADER", "BULK_MAIL", "UNSUBSCRIBE_LINK"];
+ threshold = 8.0;
+ };
+ };
+
+ check_balance = true;
+ max_class_ratio = 0.6;
+ skip_threshold = 0.95;
+ }
+
+ min_learns = 1;
+ min_tokens = 1;
+ min_token_hits = 1;
+ min_prob_strength = 0.05;
+
+ {% if env.STATS_PER_USER ~= '' %}
+ per_user = <<EOD
+return function(task)
+ return task:get_principal_recipient()
+end
+EOD;
+ {% endif %}
+}
+
+lua = "{= env.TESTDIR =}/lua/test_coverage.lua";
+
+settings {}
diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot
index 5d23e3ceb..f61998f46 100644
--- a/test/functional/lib/rspamd.robot
+++ b/test/functional/lib/rspamd.robot
@@ -419,10 +419,23 @@ Run Nginx
${nginx_log} = Get File ${RSPAMD_TMPDIR}/nginx.log
Log ${nginx_log}
+Set Test Hash Documentation
+ ${log_tag} = Evaluate __import__('hashlib').md5('${TEST NAME}'.encode()).hexdigest()[:8]
+ Log TEST CONTEXT: [${log_tag}] ${TEST NAME} console=True
+
Run Rspamc
[Arguments] @{args}
- ${result} = Run Process ${RSPAMC} -t 60 --header Queue-ID\=${TEST NAME}
- ... @{args} env:LD_LIBRARY_PATH=${RSPAMD_TESTDIR}/../../contrib/aho-corasick
+ ${log_tag} = Evaluate __import__('hashlib').md5('${TEST NAME}'.encode()).hexdigest()[:8]
+ # Check if --queue-id is already provided in the arguments
+ ${args_str} = Evaluate ' '.join(@{args})
+ ${has_queue_id} = Evaluate '--queue-id' in '${args_str}'
+ IF ${has_queue_id}
+ ${result} = Run Process ${RSPAMC} -t 60 --log-tag ${log_tag}
+ ... @{args} env:LD_LIBRARY_PATH=${RSPAMD_TESTDIR}/../../contrib/aho-corasick
+ ELSE
+ ${result} = Run Process ${RSPAMC} -t 60 --queue-id ${TEST NAME} --log-tag ${log_tag}
+ ... @{args} env:LD_LIBRARY_PATH=${RSPAMD_TESTDIR}/../../contrib/aho-corasick
+ END
Log ${result.stdout}
[Return] ${result}
diff --git a/test/functional/messages/newsletter.eml b/test/functional/messages/newsletter.eml
new file mode 100644
index 000000000..93c996956
--- /dev/null
+++ b/test/functional/messages/newsletter.eml
@@ -0,0 +1,50 @@
+From: "Marketing Team" <newsletter@example.com>
+To: user@example.org
+Subject: 🎉 Monthly Newsletter - Exclusive Deals & Product Updates!
+Date: Thu, 21 Jul 2023 10:00:00 +0000
+Message-ID: <newsletter-123@example.com>
+MIME-Version: 1.0
+Content-Type: text/html; charset=utf-8
+List-Unsubscribe: <https://example.com/unsubscribe?id=123>
+Precedence: bulk
+X-Mailer: MailChimp/Pro 12.345
+
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <title>Monthly Newsletter</title>
+</head>
+<body>
+ <h1>🎉 Exclusive Monthly Offers!</h1>
+
+ <p>Dear Valued Subscriber,</p>
+
+ <p>This month we're excited to bring you our <strong>BIGGEST SALE</strong> of the year!</p>
+
+ <h2>🔥 Hot Deals This Month:</h2>
+ <ul>
+ <li>50% OFF all premium products</li>
+ <li>FREE shipping on orders over $50</li>
+ <li>Buy 2 Get 1 FREE on selected items</li>
+ </ul>
+
+ <p><a href="https://example.com/shop?utm_source=newsletter&utm_campaign=monthly">SHOP NOW</a></p>
+
+ <h2>📱 New Product Launch</h2>
+ <p>Check out our revolutionary new gadget that everyone is talking about!</p>
+
+ <h2>🎁 Refer a Friend</h2>
+ <p>Share this newsletter and both you and your friend get $10 credit!</p>
+
+ <hr>
+
+ <p><small>
+ You're receiving this because you subscribed to our newsletter.<br>
+ <a href="https://example.com/unsubscribe?id=123">Unsubscribe here</a> |
+ <a href="https://example.com/preferences">Update preferences</a><br>
+ Marketing Team, Example Corp<br>
+ 123 Business St, City, State 12345
+ </small></p>
+</body>
+</html> \ No newline at end of file
diff --git a/test/functional/messages/transactional.eml b/test/functional/messages/transactional.eml
new file mode 100644
index 000000000..e227aaa77
--- /dev/null
+++ b/test/functional/messages/transactional.eml
@@ -0,0 +1,18 @@
+From: noreply@example.com
+To: user@example.org
+Subject: Password Reset Request
+Date: Thu, 21 Jul 2023 11:00:00 +0000
+Message-ID: <pwd-reset-456@example.com>
+MIME-Version: 1.0
+Content-Type: text/plain
+
+Hello,
+
+You have requested a password reset for your account.
+
+Click here to reset your password: https://example.com/reset?token=abc123
+
+This link expires in 24 hours.
+
+Best regards,
+Security Team \ No newline at end of file