You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

utf.lua 2.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. -- Test utf routines
  2. context("UTF8 check functions", function()
  3. local ffi = require("ffi")
  4. ffi.cdef[[
  5. void rspamd_str_lc_utf8 (char *str, unsigned int size);
  6. void rspamd_str_lc (char *str, unsigned int size);
  7. char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen);
  8. ]]
  9. local cases = {
  10. {"АбЫрвАлг", "абырвалг"},
  11. {"АAБBвc", "аaбbвc"},
  12. --{"STRASSE", "straße"}, XXX: NYI
  13. {"KEÇİ", "keçi"},
  14. }
  15. for i,c in ipairs(cases) do
  16. test("UTF lowercase " .. tostring(i), function()
  17. local buf = ffi.new("char[?]", #c[1] + 1)
  18. ffi.copy(buf, c[1])
  19. ffi.C.rspamd_str_lc_utf8(buf, #c[1])
  20. local s = ffi.string(buf)
  21. assert_equal(s, c[2])
  22. end)
  23. end
  24. cases = {
  25. {"AbCdEf", "abcdef"},
  26. {"A", "a"},
  27. {"AaAa", "aaaa"},
  28. {"AaAaAaAa", "aaaaaaaa"}
  29. }
  30. for i,c in ipairs(cases) do
  31. test("ASCII lowercase " .. tostring(i), function()
  32. local buf = ffi.new("char[?]", #c[1] + 1)
  33. ffi.copy(buf, c[1])
  34. ffi.C.rspamd_str_lc(buf, #c[1])
  35. local s = ffi.string(buf)
  36. assert_equal(s, c[2])
  37. end)
  38. end
  39. cases = {
  40. {'тест', 'тест'},
  41. {'\200\213\202', '���'},
  42. {'тест\200\213\202test', 'тест���test'},
  43. {'\200\213\202test', '���test'},
  44. {'\200\213\202test\200\213\202', '���test���'},
  45. {'тест\200\213\202test\200\213\202', 'тест���test���'},
  46. {'тест\200\213\202test\200\213\202тест', 'тест���test���тест'},
  47. }
  48. local NULL = ffi.new 'void*'
  49. for i,c in ipairs(cases) do
  50. test("Unicode make valid " .. tostring(i), function()
  51. local buf = ffi.new("char[?]", #c[1] + 1)
  52. ffi.copy(buf, c[1])
  53. local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL))
  54. local function to_hex(s)
  55. return (s:gsub('.', function (c)
  56. return string.format('%02X', string.byte(c))
  57. end))
  58. end
  59. print(to_hex(s))
  60. print(to_hex(c[2]))
  61. assert_equal(s, c[2])
  62. end)
  63. end
  64. end)