diff options
Diffstat (limited to 'test/functional')
-rw-r--r-- | test/functional/cases/110_statistics/300-multiclass-redis.robot | 42 | ||||
-rw-r--r-- | test/functional/cases/110_statistics/320-multiclass-peruser.robot | 31 | ||||
-rw-r--r-- | test/functional/cases/110_statistics/multiclass_lib.robot | 169 | ||||
-rw-r--r-- | test/functional/configs/multiclass_bayes.conf | 129 | ||||
-rw-r--r-- | test/functional/lib/rspamd.robot | 17 | ||||
-rw-r--r-- | test/functional/messages/newsletter.eml | 50 | ||||
-rw-r--r-- | test/functional/messages/transactional.eml | 18 |
7 files changed, 454 insertions, 2 deletions
diff --git a/test/functional/cases/110_statistics/300-multiclass-redis.robot b/test/functional/cases/110_statistics/300-multiclass-redis.robot new file mode 100644 index 000000000..278f7e0a0 --- /dev/null +++ b/test/functional/cases/110_statistics/300-multiclass-redis.robot @@ -0,0 +1,42 @@ +*** Settings *** +Documentation Multiclass Bayes Classification Tests with Redis Backend +Suite Setup Rspamd Redis Setup +Suite Teardown Rspamd Redis Teardown +Test Setup Set Test Hash Documentation +Resource multiclass_lib.robot + +*** Variables *** +${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT} +${RSPAMD_STATS_HASH} siphash +${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf + +*** Test Cases *** +Multiclass Basic Learning and Classification + [Documentation] Test basic multiclass learning and classification + [Tags] multiclass basic learning + Multiclass Basic Learn Test + +Multiclass Legacy Compatibility + [Documentation] Test that old learn_spam/learn_ham commands still work + [Tags] multiclass compatibility legacy + Multiclass Legacy Compatibility Test + +Multiclass Relearn + [Documentation] Test reclassifying messages to different classes + [Tags] multiclass relearn + Multiclass Relearn Test + +Multiclass Cross-Class Learning + [Documentation] Test learning message as different class than expected + [Tags] multiclass cross-learn + Multiclass Cross-Learn Test + +Multiclass Unlearn + [Documentation] Test unlearning (learning message as different class) + [Tags] multiclass unlearn + Multiclass Unlearn Test + +Multiclass Statistics + [Documentation] Test that statistics show all class information + [Tags] multiclass statistics + Multiclass Stats Test
\ No newline at end of file diff --git a/test/functional/cases/110_statistics/320-multiclass-peruser.robot b/test/functional/cases/110_statistics/320-multiclass-peruser.robot new file mode 100644 index 000000000..e8ca34616 --- /dev/null +++ b/test/functional/cases/110_statistics/320-multiclass-peruser.robot @@ -0,0 +1,31 @@ +*** Settings *** +Suite Setup Rspamd Redis Setup +Suite Teardown Rspamd Redis Teardown +Test Setup Set Test Hash Documentation +Resource multiclass_lib.robot + +*** Variables *** +${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf +${REDIS_SCOPE} Suite +${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT} +${RSPAMD_SCOPE} Suite +${RSPAMD_STATS_BACKEND} redis +${RSPAMD_STATS_HASH} null +${RSPAMD_STATS_KEY} null +${RSPAMD_STATS_PER_USER} true + +*** Test Cases *** +Multiclass Per-User Basic Learn Test + Multiclass Basic Learn Test test@example.com + +Multiclass Per-User Legacy Compatibility Test + Multiclass Legacy Compatibility Test test@example.com + +Multiclass Per-User Relearn Test + Multiclass Relearn Test test@example.com + +Multiclass Per-User Cross-Learn Test + Multiclass Cross-Learn Test test@example.com + +Multiclass Per-User Unlearn Test + Multiclass Unlearn Test test@example.com
\ No newline at end of file diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot new file mode 100644 index 000000000..9f70e05fb --- /dev/null +++ b/test/functional/cases/110_statistics/multiclass_lib.robot @@ -0,0 +1,169 @@ +*** Settings *** +Library OperatingSystem +Resource lib.robot + +*** Variables *** +${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf +${MESSAGE_HAM} ${RSPAMD_TESTDIR}/messages/ham.eml +${MESSAGE_SPAM} ${RSPAMD_TESTDIR}/messages/spam_message.eml +${MESSAGE_NEWSLETTER} ${RSPAMD_TESTDIR}/messages/newsletter.eml +${REDIS_SCOPE} Suite +${RSPAMD_REDIS_SERVER} null +${RSPAMD_SCOPE} Suite +${RSPAMD_STATS_BACKEND} redis +${RSPAMD_STATS_HASH} null +${RSPAMD_STATS_KEY} null +${RSPAMD_STATS_PER_USER} ${EMPTY} + +*** Keywords *** +Learn Multiclass + [Arguments] ${user} ${class} ${message} + # Extract filename from message path for queue-id + ${path} ${filename} = Split Path ${message} + IF "${user}" + ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} + ELSE + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} + END + Check Rspamc ${result} + +Learn Multiclass Legacy + [Arguments] ${user} ${class} ${message} + # Test backward compatibility with old learn_spam/learn_ham commands + # Extract filename from message path for queue-id + ${path} ${filename} = Split Path ${message} + IF "${user}" + ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} + ELSE + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} + END + Check Rspamc ${result} + +Multiclass Basic Learn Test + [Arguments] ${user}=${EMPTY} + Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 0 + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Learn all classes + Learn Multiclass ${user} spam ${MESSAGE_SPAM} + Learn Multiclass ${user} ham ${MESSAGE_HAM} + Learn Multiclass ${user} newsletter ${MESSAGE_NEWSLETTER} + + # Test classification + Scan File ${MESSAGE_SPAM} &{kwargs} + Expect Symbol BAYES_SPAM + + Scan File ${MESSAGE_HAM} &{kwargs} + Expect Symbol BAYES_HAM + + Scan File ${MESSAGE_NEWSLETTER} &{kwargs} + Expect Symbol BAYES_NEWSLETTER + + Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 1 + +Multiclass Legacy Compatibility Test + [Arguments] ${user}=${EMPTY} + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Test legacy learn_spam and learn_ham commands still work + Learn Multiclass Legacy ${user} spam ${MESSAGE_SPAM} + Learn Multiclass Legacy ${user} ham ${MESSAGE_HAM} + + # Should still classify correctly + Scan File ${MESSAGE_SPAM} &{kwargs} + Expect Symbol BAYES_SPAM + + Scan File ${MESSAGE_HAM} &{kwargs} + Expect Symbol BAYES_HAM + +Multiclass Relearn Test + [Arguments] ${user}=${EMPTY} + IF ${RSPAMD_STATS_LEARNTEST} == 0 + Fail "Learn test was not run" + END + + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Relearn spam message as ham + Learn Multiclass ${user} ham ${MESSAGE_SPAM} + + # Should now classify as ham or at least not spam + Scan File ${MESSAGE_SPAM} &{kwargs} + ${pass} = Run Keyword And Return Status Expect Symbol BAYES_HAM + IF ${pass} + Pass Execution Successfully reclassified spam as ham + END + Do Not Expect Symbol BAYES_SPAM + +Multiclass Cross-Learn Test + [Arguments] ${user}=${EMPTY} + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Learn newsletter message as ham to test cross-class learning + Learn Multiclass ${user} ham ${MESSAGE_NEWSLETTER} + + # Should classify as ham, not newsletter (since we trained it as ham) + Scan File ${MESSAGE_NEWSLETTER} &{kwargs} + Expect Symbol BAYES_HAM + Do Not Expect Symbol BAYES_NEWSLETTER + +Multiclass Unlearn Test + [Arguments] ${user}=${EMPTY} + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # First learn spam + Learn Multiclass ${user} spam ${MESSAGE_SPAM} + Scan File ${MESSAGE_SPAM} &{kwargs} + Expect Symbol BAYES_SPAM + + # Then unlearn spam (learn as ham) + Learn Multiclass ${user} ham ${MESSAGE_SPAM} + + # Should no longer classify as spam + Scan File ${MESSAGE_SPAM} &{kwargs} + Do Not Expect Symbol BAYES_SPAM + +Check Multiclass Results + [Arguments] ${result} ${expected_class} + # Check that scan result contains expected class information + Should Contain ${result.stdout} BAYES_${expected_class.upper()} + # Check for multiclass result format [class_name] + Should Match Regexp ${result.stdout} BAYES_${expected_class.upper()}.*\\[${expected_class}\\] + +Multiclass Stats Test + # Check that rspamc stat shows learning counts for all classes + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat + # Don't use Check Rspamc for stat command as it expects JSON success format + Should Be Equal As Integers ${result.rc} 0 + + # Should show statistics for all classes + Should Contain ${result.stdout} BAYES_SPAM + Should Contain ${result.stdout} BAYES_HAM + Should Contain ${result.stdout} BAYES_NEWSLETTER + +Multiclass Configuration Migration Test + # Test that old binary config can be automatically migrated + Set Test Variable ${binary_config} ${RSPAMD_TESTDIR}/configs/stats.conf + + # Start with binary config + ${result} = Run Rspamc --config ${binary_config} stat + Check Rspamc ${result} + + # Should show deprecation warning but work + Should Contain ${result.stderr} deprecated ignore_case=True + diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf new file mode 100644 index 000000000..278aeeee9 --- /dev/null +++ b/test/functional/configs/multiclass_bayes.conf @@ -0,0 +1,129 @@ +options = { + filters = ["spf", "dkim", "regexp"] + url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat" + pidfile = "{= env.TMPDIR =}/rspamd.pid" + dns { + retransmits = 10; + timeout = 2s; + fake_records = [{ + name = "example.net"; + type = txt; + replies = ["v=spf1 -all"]; + }] + } +} + +logging = { + type = "file", + level = "debug" + filename = "{= env.TMPDIR =}/rspamd.log" +} + +metric = { + name = "default", + actions = { + reject = 100500, + } + unknown_weight = 1 +} + +worker { + type = normal + bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}" + count = 1 + keypair { + pubkey = "{= env.KEY_PUB1 =}"; + privkey = "{= env.KEY_PVT1 =}"; + } + task_timeout = 60s; +} + +worker { + type = controller + bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}" + count = 1 + keypair { + pubkey = "{= env.KEY_PUB1 =}"; + privkey = "{= env.KEY_PVT1 =}"; + } + secure_ip = ["127.0.0.1", "::1"]; + stats_path = "{= env.TMPDIR =}/stats.ucl"; +} + +# Multi-class Bayes classifier configuration +classifier { + languages_enabled = true; + tokenizer { + name = "osb"; + hash = {= env.STATS_HASH =}; + key = {= env.STATS_KEY =}; + } + backend = "{= env.STATS_BACKEND =}"; + + # Multi-class statfiles + statfile { + class = "spam"; + symbol = BAYES_SPAM; + server = {= env.REDIS_SERVER =} + } + statfile { + class = "ham"; + symbol = BAYES_HAM; + server = {= env.REDIS_SERVER =} + } + statfile { + class = "newsletter"; + symbol = BAYES_NEWSLETTER; + server = {= env.REDIS_SERVER =} + } + + # Backend class labels for Redis + class_labels = { + "spam" = "S"; + "ham" = "H"; + "newsletter" = "N"; + } + + cache { + server = {= env.REDIS_SERVER =} + } + + # Multi-class autolearn configuration + autolearn = { + classes = { + spam = { + threshold = 15.0; + verdict_mapping = { spam = true }; + }; + ham = { + threshold = -5.0; + verdict_mapping = { ham = true }; + }; + newsletter = { + symbols = ["NEWSLETTER_HEADER", "BULK_MAIL", "UNSUBSCRIBE_LINK"]; + threshold = 8.0; + }; + }; + + check_balance = true; + max_class_ratio = 0.6; + skip_threshold = 0.95; + } + + min_learns = 1; + min_tokens = 1; + min_token_hits = 1; + min_prob_strength = 0.05; + + {% if env.STATS_PER_USER ~= '' %} + per_user = <<EOD +return function(task) + return task:get_principal_recipient() +end +EOD; + {% endif %} +} + +lua = "{= env.TESTDIR =}/lua/test_coverage.lua"; + +settings {} diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot index 5d23e3ceb..f61998f46 100644 --- a/test/functional/lib/rspamd.robot +++ b/test/functional/lib/rspamd.robot @@ -419,10 +419,23 @@ Run Nginx ${nginx_log} = Get File ${RSPAMD_TMPDIR}/nginx.log Log ${nginx_log} +Set Test Hash Documentation + ${log_tag} = Evaluate __import__('hashlib').md5('${TEST NAME}'.encode()).hexdigest()[:8] + Log TEST CONTEXT: [${log_tag}] ${TEST NAME} console=True + Run Rspamc [Arguments] @{args} - ${result} = Run Process ${RSPAMC} -t 60 --header Queue-ID\=${TEST NAME} - ... @{args} env:LD_LIBRARY_PATH=${RSPAMD_TESTDIR}/../../contrib/aho-corasick + ${log_tag} = Evaluate __import__('hashlib').md5('${TEST NAME}'.encode()).hexdigest()[:8] + # Check if --queue-id is already provided in the arguments + ${args_str} = Evaluate ' '.join(@{args}) + ${has_queue_id} = Evaluate '--queue-id' in '${args_str}' + IF ${has_queue_id} + ${result} = Run Process ${RSPAMC} -t 60 --log-tag ${log_tag} + ... @{args} env:LD_LIBRARY_PATH=${RSPAMD_TESTDIR}/../../contrib/aho-corasick + ELSE + ${result} = Run Process ${RSPAMC} -t 60 --queue-id ${TEST NAME} --log-tag ${log_tag} + ... @{args} env:LD_LIBRARY_PATH=${RSPAMD_TESTDIR}/../../contrib/aho-corasick + END Log ${result.stdout} [Return] ${result} diff --git a/test/functional/messages/newsletter.eml b/test/functional/messages/newsletter.eml new file mode 100644 index 000000000..93c996956 --- /dev/null +++ b/test/functional/messages/newsletter.eml @@ -0,0 +1,50 @@ +From: "Marketing Team" <newsletter@example.com> +To: user@example.org +Subject: 🎉 Monthly Newsletter - Exclusive Deals & Product Updates! +Date: Thu, 21 Jul 2023 10:00:00 +0000 +Message-ID: <newsletter-123@example.com> +MIME-Version: 1.0 +Content-Type: text/html; charset=utf-8 +List-Unsubscribe: <https://example.com/unsubscribe?id=123> +Precedence: bulk +X-Mailer: MailChimp/Pro 12.345 + +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + <title>Monthly Newsletter</title> +</head> +<body> + <h1>🎉 Exclusive Monthly Offers!</h1> + + <p>Dear Valued Subscriber,</p> + + <p>This month we're excited to bring you our <strong>BIGGEST SALE</strong> of the year!</p> + + <h2>🔥 Hot Deals This Month:</h2> + <ul> + <li>50% OFF all premium products</li> + <li>FREE shipping on orders over $50</li> + <li>Buy 2 Get 1 FREE on selected items</li> + </ul> + + <p><a href="https://example.com/shop?utm_source=newsletter&utm_campaign=monthly">SHOP NOW</a></p> + + <h2>📱 New Product Launch</h2> + <p>Check out our revolutionary new gadget that everyone is talking about!</p> + + <h2>🎁 Refer a Friend</h2> + <p>Share this newsletter and both you and your friend get $10 credit!</p> + + <hr> + + <p><small> + You're receiving this because you subscribed to our newsletter.<br> + <a href="https://example.com/unsubscribe?id=123">Unsubscribe here</a> | + <a href="https://example.com/preferences">Update preferences</a><br> + Marketing Team, Example Corp<br> + 123 Business St, City, State 12345 + </small></p> +</body> +</html>
\ No newline at end of file diff --git a/test/functional/messages/transactional.eml b/test/functional/messages/transactional.eml new file mode 100644 index 000000000..e227aaa77 --- /dev/null +++ b/test/functional/messages/transactional.eml @@ -0,0 +1,18 @@ +From: noreply@example.com +To: user@example.org +Subject: Password Reset Request +Date: Thu, 21 Jul 2023 11:00:00 +0000 +Message-ID: <pwd-reset-456@example.com> +MIME-Version: 1.0 +Content-Type: text/plain + +Hello, + +You have requested a password reset for your account. + +Click here to reset your password: https://example.com/reset?token=abc123 + +This link expires in 24 hours. + +Best regards, +Security Team
\ No newline at end of file |