2015-02-28 11:31:47 +01:00
|
|
|
|
-- Test utf routines
|
|
|
|
|
|
|
|
|
|
context("UTF8 check functions", function()
|
|
|
|
|
local ffi = require("ffi")
|
|
|
|
|
ffi.cdef[[
|
|
|
|
|
void rspamd_str_lc_utf8 (char *str, unsigned int size);
|
2015-04-02 14:50:11 +02:00
|
|
|
|
void rspamd_str_lc (char *str, unsigned int size);
|
2018-12-03 12:30:22 +01:00
|
|
|
|
char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen);
|
2015-02-28 11:31:47 +01:00
|
|
|
|
]]
|
|
|
|
|
|
2018-12-03 12:30:22 +01:00
|
|
|
|
local cases = {
|
|
|
|
|
{"АбЫрвАлг", "абырвалг"},
|
|
|
|
|
{"АAБBвc", "аaбbвc"},
|
|
|
|
|
--{"STRASSE", "straße"}, XXX: NYI
|
|
|
|
|
{"KEÇİ", "keçi"},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for i,c in ipairs(cases) do
|
|
|
|
|
test("UTF lowercase " .. tostring(i), function()
|
2015-04-15 18:14:55 +02:00
|
|
|
|
local buf = ffi.new("char[?]", #c[1] + 1)
|
2015-02-28 11:31:47 +01:00
|
|
|
|
ffi.copy(buf, c[1])
|
|
|
|
|
ffi.C.rspamd_str_lc_utf8(buf, #c[1])
|
|
|
|
|
local s = ffi.string(buf)
|
|
|
|
|
assert_equal(s, c[2])
|
2018-12-03 12:30:22 +01:00
|
|
|
|
end)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
cases = {
|
|
|
|
|
{"AbCdEf", "abcdef"},
|
|
|
|
|
{"A", "a"},
|
|
|
|
|
{"AaAa", "aaaa"},
|
|
|
|
|
{"AaAaAaAa", "aaaaaaaa"}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for i,c in ipairs(cases) do
|
|
|
|
|
test("ASCII lowercase " .. tostring(i), function()
|
2015-04-15 18:14:55 +02:00
|
|
|
|
local buf = ffi.new("char[?]", #c[1] + 1)
|
2015-04-02 14:50:11 +02:00
|
|
|
|
ffi.copy(buf, c[1])
|
|
|
|
|
ffi.C.rspamd_str_lc(buf, #c[1])
|
|
|
|
|
local s = ffi.string(buf)
|
|
|
|
|
assert_equal(s, c[2])
|
2018-12-03 12:30:22 +01:00
|
|
|
|
end)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
cases = {
|
|
|
|
|
{'тест', 'тест'},
|
|
|
|
|
{'\200\213\202', '<EFBFBD><EFBFBD><EFBFBD>'},
|
|
|
|
|
{'тест\200\213\202test', 'тест<EFBFBD><EFBFBD><EFBFBD>test'},
|
|
|
|
|
{'\200\213\202test', '<EFBFBD><EFBFBD><EFBFBD>test'},
|
|
|
|
|
{'\200\213\202test\200\213\202', '<EFBFBD><EFBFBD><EFBFBD>test<EFBFBD><EFBFBD><EFBFBD>'},
|
|
|
|
|
{'тест\200\213\202test\200\213\202', 'тест<EFBFBD><EFBFBD><EFBFBD>test<EFBFBD><EFBFBD><EFBFBD>'},
|
|
|
|
|
{'тест\200\213\202test\200\213\202тест', 'тест<EFBFBD><EFBFBD><EFBFBD>test<EFBFBD><EFBFBD><EFBFBD>тест'},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
local NULL = ffi.new 'void*'
|
|
|
|
|
for i,c in ipairs(cases) do
|
|
|
|
|
test("Unicode make valid " .. tostring(i), function()
|
|
|
|
|
local buf = ffi.new("char[?]", #c[1] + 1)
|
|
|
|
|
ffi.copy(buf, c[1])
|
|
|
|
|
|
|
|
|
|
local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL))
|
|
|
|
|
local function to_hex(s)
|
|
|
|
|
return (s:gsub('.', function (c)
|
|
|
|
|
return string.format('%02X', string.byte(c))
|
|
|
|
|
end))
|
|
|
|
|
end
|
|
|
|
|
print(to_hex(s))
|
|
|
|
|
print(to_hex(c[2]))
|
|
|
|
|
assert_equal(s, c[2])
|
|
|
|
|
end)
|
|
|
|
|
end
|
2015-02-28 11:31:47 +01:00
|
|
|
|
end)
|