rspamd/test/lua/unit/utf.lua

72 行
2.0 KiB
Lua
原始文件 Blame 歷史記錄

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- Test utf routines
context("UTF8 check functions", function()
local ffi = require("ffi")
ffi.cdef[[
void rspamd_str_lc_utf8 (char *str, unsigned int size);
void rspamd_str_lc (char *str, unsigned int size);
char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen);
]]
local cases = {
{"АбЫрвАлг", "абырвалг"},
{"АAБBвc", "аaбbвc"},
--{"STRASSE", "straße"}, XXX: NYI
{"KEÇİ", "keçi"},
}
for i,c in ipairs(cases) do
test("UTF lowercase " .. tostring(i), function()
local buf = ffi.new("char[?]", #c[1] + 1)
ffi.copy(buf, c[1])
ffi.C.rspamd_str_lc_utf8(buf, #c[1])
local s = ffi.string(buf)
assert_equal(s, c[2])
end)
end
cases = {
{"AbCdEf", "abcdef"},
{"A", "a"},
{"AaAa", "aaaa"},
{"AaAaAaAa", "aaaaaaaa"}
}
for i,c in ipairs(cases) do
test("ASCII lowercase " .. tostring(i), function()
local buf = ffi.new("char[?]", #c[1] + 1)
ffi.copy(buf, c[1])
ffi.C.rspamd_str_lc(buf, #c[1])
local s = ffi.string(buf)
assert_equal(s, c[2])
end)
end
cases = {
{'тест', 'тест'},
{'\200\213\202', '<EFBFBD><EFBFBD><EFBFBD>'},
{'тест\200\213\202test', 'тест<EFBFBD><EFBFBD><EFBFBD>test'},
{'\200\213\202test', '<EFBFBD><EFBFBD><EFBFBD>test'},
{'\200\213\202test\200\213\202', '<EFBFBD><EFBFBD><EFBFBD>test<EFBFBD><EFBFBD><EFBFBD>'},
{'тест\200\213\202test\200\213\202', 'тест<EFBFBD><EFBFBD><EFBFBD>test<EFBFBD><EFBFBD><EFBFBD>'},
{'тест\200\213\202test\200\213\202тест', 'тест<EFBFBD><EFBFBD><EFBFBD>test<EFBFBD><EFBFBD><EFBFBD>тест'},
}
local NULL = ffi.new 'void*'
for i,c in ipairs(cases) do
test("Unicode make valid " .. tostring(i), function()
local buf = ffi.new("char[?]", #c[1] + 1)
ffi.copy(buf, c[1])
local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL))
local function to_hex(s)
return (s:gsub('.', function (c)
return string.format('%02X', string.byte(c))
end))
end
print(to_hex(s))
print(to_hex(c[2]))
assert_equal(s, c[2])
end)
end
end)