diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-04 16:03:34 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rspamd.com> | 2024-11-04 16:03:34 +0000 |
commit | 3548785ebdc0e9a9aefc782c85bccb08456e8f3f (patch) | |
tree | 7577414613aa45d28c7444eb5ef9ca39ee9b47c1 | |
parent | 80cb50dea482246656a49e54a915bdc343ffe897 (diff) | |
download | rspamd-3548785ebdc0e9a9aefc782c85bccb08456e8f3f.tar.gz rspamd-3548785ebdc0e9a9aefc782c85bccb08456e8f3f.zip |
[Fix] Another story about char sign
Ragel still produces a wrong state machine if `const char *` is used
for characters representation that are actually utf8.
This PR changes all types to `unsigned char *` for consistency.
-rw-r--r-- | src/ragel/smtp_addr_parser.rl | 18 | ||||
-rw-r--r-- | test/lua/unit/smtp_addr.lua | 67 |
2 files changed, 43 insertions, 42 deletions
diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl index b5b4863d3..330f3f01d 100644 --- a/src/ragel/smtp_addr_parser.rl +++ b/src/ragel/smtp_addr_parser.rl @@ -8,33 +8,33 @@ action IP4_end {} action User_start { - addr->user = p; + addr->user = (const char *)p; } action User_end { if (addr->user) { - addr->user_len = p - addr->user; + addr->user_len = (const char *)p - addr->user; } } action Domain_start { - addr->domain = p; + addr->domain = (const char *)p; } action Domain_end { if (addr->domain) { - addr->domain_len = p - addr->domain; + addr->domain_len = (const char *)p - addr->domain; } } action Domain_addr_start { - addr->domain = p; + addr->domain = (const char *)p; addr->flags |= RSPAMD_EMAIL_ADDR_IP; } action Domain_addr_end { if (addr->domain) { - addr->domain_len = p - addr->domain; + addr->domain_len = (const char *)p - addr->domain; } } @@ -64,12 +64,12 @@ } action Addr_start { - addr->addr = p; + addr->addr = (const char *)p; } action Addr_end { if (addr->addr) { - addr->addr_len = p - addr->addr; + addr->addr_len = (const char *)p - addr->addr; } } @@ -87,7 +87,7 @@ int rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr) { - const char *p = data, *pe = data + len, *eof; + const unsigned char *p = (const unsigned char *)data, *pe = (const unsigned char *)data + len, *eof; int cs; g_assert (addr != NULL); diff --git a/test/lua/unit/smtp_addr.lua b/test/lua/unit/smtp_addr.lua index 2cb7755f8..ffabd838a 100644 --- a/test/lua/unit/smtp_addr.lua +++ b/test/lua/unit/smtp_addr.lua @@ -5,7 +5,7 @@ context("SMTP address check functions", function() local ffi = require("ffi") local util = require("rspamd_util") local fun = require "fun" - ffi.cdef[[ + ffi.cdef [[ struct rspamd_email_address { const char *raw; const char *addr; @@ -24,29 +24,30 @@ context("SMTP address check functions", function() ]] local cases_valid = { - {'<>', {addr = ''}}, - {'<a@example.com>', {user = 'a', domain = 'example.com', addr = 'a@example.com'}}, - {'<a-b@example.com>', {user = 'a-b', domain = 'example.com', addr = 'a-b@example.com'}}, - {'<a-b@ex-ample.com>', {user = 'a-b', domain = 'ex-ample.com', addr = 'a-b@ex-ample.com'}}, - {'1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370@example.220-volt.ru', - {user = '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370', - domain = 'example.220-volt.ru', - addr = '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370@example.220-volt.ru'}}, - {'notification+kjdm---m7wwd@facebookmail.com', {user = 'notification+kjdm---m7wwd'}}, - {'a@example.com', {user = 'a', domain = 'example.com', addr = 'a@example.com'}}, - {'a+b@example.com', {user = 'a+b', domain = 'example.com', addr = 'a+b@example.com'}}, - {'"a"@example.com', {user = 'a', domain = 'example.com', addr = 'a@example.com'}}, - {'"a+b"@example.com', {user = 'a+b', domain = 'example.com', addr = 'a+b@example.com'}}, - {'"<>"@example.com', {user = '<>', domain = 'example.com', addr = '<>@example.com'}}, - {'<"<>"@example.com>', {user = '<>', domain = 'example.com', addr = '<>@example.com'}}, - {'"\\""@example.com', {user = '"', domain = 'example.com', addr = '"@example.com'}}, - {'"\\"abc"@example.com', {user = '"abc', domain = 'example.com', addr = '"abc@example.com'}}, - {'<@domain1,@domain2,@domain3:abc@example.com>', - {user = 'abc', domain = 'example.com', addr = 'abc@example.com'}}, + { '<>', { addr = '' } }, + { '<a@example.com>', { user = 'a', domain = 'example.com', addr = 'a@example.com' } }, + { '<a-b@example.com>', { user = 'a-b', domain = 'example.com', addr = 'a-b@example.com' } }, + { '<a-b@ex-ample.com>', { user = 'a-b', domain = 'ex-ample.com', addr = 'a-b@ex-ample.com' } }, + { '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370@example.220-volt.ru', + { user = '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370', + domain = 'example.220-volt.ru', + addr = '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370@example.220-volt.ru' } }, + { 'notification+kjdm---m7wwd@facebookmail.com', { user = 'notification+kjdm---m7wwd' } }, + { 'a@example.com', { user = 'a', domain = 'example.com', addr = 'a@example.com' } }, + { 'a+b@example.com', { user = 'a+b', domain = 'example.com', addr = 'a+b@example.com' } }, + { '"a"@example.com', { user = 'a', domain = 'example.com', addr = 'a@example.com' } }, + { '"a+b"@example.com', { user = 'a+b', domain = 'example.com', addr = 'a+b@example.com' } }, + { '"<>"@example.com', { user = '<>', domain = 'example.com', addr = '<>@example.com' } }, + { '<"<>"@example.com>', { user = '<>', domain = 'example.com', addr = '<>@example.com' } }, + { '"\\""@example.com', { user = '"', domain = 'example.com', addr = '"@example.com' } }, + { '"\\"abc"@example.com', { user = '"abc', domain = 'example.com', addr = '"abc@example.com' } }, + { '<@domain1,@domain2,@domain3:abc@example.com>', + { user = 'abc', domain = 'example.com', addr = 'abc@example.com' } }, + -- SMTP UTF8 + { 'ñ@example.com', { user = 'ñ', domain = 'example.com' } } } - fun.each(function(case) test("Parse valid smtp addr: " .. case[1], function() local st = ffi.C.rspamd_email_address_from_smtp(case[1], #case[1]) @@ -69,17 +70,17 @@ context("SMTP address check functions", function() end) end, cases_valid) - local cases_invalid = { - 'a', - 'a"b"@example.com', - 'a"@example.com', - '"a@example.com', - '<a@example.com', - 'a@example.com>', - '<a@.example.com>', - '<a@example.com>>', - '<a@example.com><>', - } + local cases_invalid = { + 'a', + 'a"b"@example.com', + 'a"@example.com', + '"a@example.com', + '<a@example.com', + 'a@example.com>', + '<a@.example.com>', + '<a@example.com>>', + '<a@example.com><>', + } fun.each(function(case) test("Parse invalid smtp addr: " .. case, function() @@ -95,7 +96,7 @@ context("SMTP address check functions", function() local niter = 100000 local total = 0 - for i = 1,niter do + for i = 1, niter do local ncase = string.format(case, i) local t1 = util.get_ticks() local st = ffi.C.rspamd_email_address_from_smtp(ncase, #ncase) |