From fffd9e6f1084f425a9fcda67cad90c2f8a04f264 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 15 Nov 2019 18:51:24 +0000 Subject: [PATCH] [Test] Improve tests --- test/lua/unit/base64.lua | 2 +- test/lua/unit/utf.lua | 130 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) diff --git a/test/lua/unit/base64.lua b/test/lua/unit/base64.lua index dcf235791..43606e91e 100644 --- a/test/lua/unit/base64.lua +++ b/test/lua/unit/base64.lua @@ -12,7 +12,7 @@ context("Base64 encoding", function() void g_free(void *ptr); int memcmp(const void *a1, const void *a2, size_t len); double base64_test (bool generic, size_t niters, size_t len, size_t str_len); - double rspamd_get_ticks (void); + double rspamd_get_ticks (int); ]] ffi.C.rspamd_cryptobox_init() diff --git a/test/lua/unit/utf.lua b/test/lua/unit/utf.lua index 2d2c77f67..34217afa4 100644 --- a/test/lua/unit/utf.lua +++ b/test/lua/unit/utf.lua @@ -5,6 +5,13 @@ context("UTF8 check functions", function() ffi.cdef[[ unsigned int rspamd_str_lc_utf8 (char *str, unsigned int size); unsigned int rspamd_str_lc (char *str, unsigned int size); + void rspamd_fast_utf8_library_init (unsigned flags); + void ottery_rand_bytes(void *buf, size_t n); + double rspamd_get_ticks(int allow); + size_t rspamd_fast_utf8_validate (const unsigned char *data, size_t len); + size_t rspamd_fast_utf8_validate_ref (const unsigned char *data, size_t len); + size_t rspamd_fast_utf8_validate_sse41 (const unsigned char *data, size_t len); + size_t rspamd_fast_utf8_validate_avx2 (const unsigned char *data, size_t len); char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen, void *); ]] @@ -69,4 +76,127 @@ context("UTF8 check functions", function() assert_equal(s, c[2]) end) end + + -- Enable sse and avx2 + ffi.C.rspamd_fast_utf8_library_init(3) + local valid_cases = { + "a", + "\xc3\xb1", + "\xe2\x82\xa1", + "\xf0\x90\x8c\xbc", + "안녕하세요, 세상" + } + for i,c in ipairs(valid_cases) do + test("Unicode validate success: " .. tostring(i), function() + local buf = ffi.new("char[?]", #c + 1) + ffi.copy(buf, c) + + local ret = ffi.C.rspamd_fast_utf8_validate(buf, #c) + assert_equal(ret, 0) + end) + end + local invalid_cases = { + "\xc3\x28", + "\xa0\xa1", + "\xe2\x28\xa1", + "\xe2\x82\x28", + "\xf0\x28\x8c\xbc", + "\xf0\x90\x28\xbc", + "\xf0\x28\x8c\x28", + "\xc0\x9f", + "\xf5\xff\xff\xff", + "\xed\xa0\x81", + "\xf8\x90\x80\x80\x80", + "123456789012345\xed", + "123456789012345\xf1", + "123456789012345\xc2", + "\xC2\x7F" + } + for i,c in ipairs(invalid_cases) do + test("Unicode validate fail: " .. tostring(i), function() + local buf = ffi.new("char[?]", #c + 1) + ffi.copy(buf, c) + + local ret = ffi.C.rspamd_fast_utf8_validate(buf, #c) + assert_not_equal(ret, 0) + end) + end + + local speed_iters = 10000 + local function test_size(buflen, is_valid, impl) + local logger = require "rspamd_logger" + local test_str + if is_valid then + test_str = table.concat(valid_cases) + else + test_str = table.concat(valid_cases) .. table.concat(invalid_cases) + end + + local buf = ffi.new("char[?]", buflen) + if #test_str < buflen then + local t = {} + local len = #test_str + while len < buflen do + t[#t + 1] = test_str + len = len + #test_str + end + test_str = table.concat(t) + end + ffi.copy(buf, test_str:sub(1, buflen)) + + local tm = 0 + + for _=1,speed_iters do + if impl == 'ref' then + local t1 = ffi.C.rspamd_get_ticks(1) + ffi.C.rspamd_fast_utf8_validate_ref(buf, buflen) + local t2 = ffi.C.rspamd_get_ticks(1) + tm = tm + (t2 - t1) + elseif impl == 'sse' then + local t1 = ffi.C.rspamd_get_ticks(1) + ffi.C.rspamd_fast_utf8_validate_sse41(buf, buflen) + local t2 = ffi.C.rspamd_get_ticks(1) + tm = tm + (t2 - t1) + else + local t1 = ffi.C.rspamd_get_ticks(1) + ffi.C.rspamd_fast_utf8_validate_avx2(buf, buflen) + local t2 = ffi.C.rspamd_get_ticks(1) + tm = tm + (t2 - t1) + end + end + + logger.messagex("%s utf8 %s check (valid = %s): %s ticks per iter, %s ticks per byte", + impl, buflen, is_valid, + tm / speed_iters, tm / speed_iters / buflen) + + return 0 + end + + for _,sz in ipairs({78, 512, 65535}) do + test(string.format("Utf8 test %s %d buffer, %s", 'ref', sz, 'valid'), function() + local res = test_size(sz, true, 'ref') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'ref', sz, 'invalid'), function() + local res = test_size(sz, false, 'ref') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'sse', sz, 'valid'), function() + local res = test_size(sz, true, 'sse') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'sse', sz, 'invalid'), function() + local res = test_size(sz, false, 'sse') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'avx2', sz, 'valid'), function() + local res = test_size(sz, true, 'avx2') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'avx2', sz, 'invalid'), function() + local res = test_size(sz, false, 'avx2') + assert_equal(res, 0) + end) + end + end) \ No newline at end of file -- 2.39.5