diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-12-03 11:30:22 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-12-03 11:30:22 +0000 |
commit | b4442b41baab6160e87098f52b94def24b97e066 (patch) | |
tree | 5361f47875c1399c0c8a0836e9c74475af0ae459 /test | |
parent | fe940c7d3d9d72f0196b9cd847dd0160603dcbe9 (diff) | |
download | rspamd-b4442b41baab6160e87098f52b94def24b97e066.tar.gz rspamd-b4442b41baab6160e87098f52b94def24b97e066.zip |
[Minor] Add rspamd_str_make_utf_valid routine + unit tests
Diffstat (limited to 'test')
-rw-r--r-- | test/lua/unit/utf.lua | 72 |
1 files changed, 52 insertions, 20 deletions
diff --git a/test/lua/unit/utf.lua b/test/lua/unit/utf.lua index e22eb2a2f..277d99e41 100644 --- a/test/lua/unit/utf.lua +++ b/test/lua/unit/utf.lua @@ -5,36 +5,68 @@ context("UTF8 check functions", function() ffi.cdef[[ void rspamd_str_lc_utf8 (char *str, unsigned int size); void rspamd_str_lc (char *str, unsigned int size); + char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen); ]] - test("UTF lowercase", function() - local cases = { - {"АбЫрвАлг", "абырвалг"}, - {"АAБBвc", "аaбbвc"} - } - - for _,c in ipairs(cases) do + local cases = { + {"АбЫрвАлг", "абырвалг"}, + {"АAБBвc", "аaбbвc"}, + --{"STRASSE", "straße"}, XXX: NYI + {"KEÇİ", "keçi"}, + } + + for i,c in ipairs(cases) do + test("UTF lowercase " .. tostring(i), function() local buf = ffi.new("char[?]", #c[1] + 1) ffi.copy(buf, c[1]) ffi.C.rspamd_str_lc_utf8(buf, #c[1]) local s = ffi.string(buf) assert_equal(s, c[2]) - end - end) - test("ASCII lowercase", function() - local cases = { - {"AbCdEf", "abcdef"}, - {"A", "a"}, - {"AaAa", "aaaa"}, - {"AaAaAaAa", "aaaaaaaa"} - } - - for _,c in ipairs(cases) do + end) + end + + cases = { + {"AbCdEf", "abcdef"}, + {"A", "a"}, + {"AaAa", "aaaa"}, + {"AaAaAaAa", "aaaaaaaa"} + } + + for i,c in ipairs(cases) do + test("ASCII lowercase " .. tostring(i), function() local buf = ffi.new("char[?]", #c[1] + 1) ffi.copy(buf, c[1]) ffi.C.rspamd_str_lc(buf, #c[1]) local s = ffi.string(buf) assert_equal(s, c[2]) - end - end) + end) + end + + cases = { + {'тест', 'тест'}, + {'\200\213\202', '���'}, + {'тест\200\213\202test', 'тест���test'}, + {'\200\213\202test', '���test'}, + {'\200\213\202test\200\213\202', '���test���'}, + {'тест\200\213\202test\200\213\202', 'тест���test���'}, + {'тест\200\213\202test\200\213\202тест', 'тест���test���тест'}, + } + + local NULL = ffi.new 'void*' + for i,c in ipairs(cases) do + test("Unicode make valid " .. tostring(i), function() + local buf = ffi.new("char[?]", #c[1] + 1) + ffi.copy(buf, c[1]) + + local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL)) + local function to_hex(s) + return (s:gsub('.', function (c) + return string.format('%02X', string.byte(c)) + end)) + end + print(to_hex(s)) + print(to_hex(c[2])) + assert_equal(s, c[2]) + end) + end end)
\ No newline at end of file |