aboutsummaryrefslogtreecommitdiffstats
path: root/test/lua/unit/utf.lua
blob: 75dd3397714a47aa2a535a446a72011276319b03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
-- Test utf routines

context("UTF8 check functions", function()
  local ffi = require("ffi")
  ffi.cdef[[
    unsigned int rspamd_str_lc_utf8 (char *str, unsigned int size);
    unsigned int rspamd_str_lc (char *str, unsigned int size);
    char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen);
  ]]

  local cases = {
    {"АбЫрвАлг", "абырвалг"},
    {"АAБBвc", "аaбbвc"},
    --{"STRASSE", "straße"}, XXX: NYI
    {"KEÇİ", "keçi"},
  }

  for i,c in ipairs(cases) do
    test("UTF lowercase " .. tostring(i), function()
      local buf = ffi.new("char[?]", #c[1] + 1)
      ffi.copy(buf, c[1])
      local nlen = ffi.C.rspamd_str_lc_utf8(buf, #c[1])
      local s = ffi.string(buf, nlen)
      assert_equal(s, c[2])
    end)
  end

  cases = {
    {"AbCdEf", "abcdef"},
    {"A", "a"},
    {"AaAa", "aaaa"},
    {"AaAaAaAa", "aaaaaaaa"}
  }

  for i,c in ipairs(cases) do
    test("ASCII lowercase " .. tostring(i), function()
      local buf = ffi.new("char[?]", #c[1] + 1)
      ffi.copy(buf, c[1])
      ffi.C.rspamd_str_lc(buf, #c[1])
      local s = ffi.string(buf)
      assert_equal(s, c[2])
    end)
  end

  cases = {
    {'тест', 'тест'},
    {'\200\213\202', '���'},
    {'тест\200\213\202test', 'тест���test'},
    {'\200\213\202test', '���test'},
    {'\200\213\202test\200\213\202', '���test���'},
    {'тест\200\213\202test\200\213\202', 'тест���test���'},
    {'тест\200\213\202test\200\213\202тест', 'тест���test���тест'},
  }

  local NULL = ffi.new 'void*'
  for i,c in ipairs(cases) do
    test("Unicode make valid " .. tostring(i), function()
      local buf = ffi.new("char[?]", #c[1] + 1)
      ffi.copy(buf, c[1])

      local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL))
      local function to_hex(s)
        return (s:gsub('.', function (c)
          return string.format('%02X', string.byte(c))
        end))
      end
      print(to_hex(s))
      print(to_hex(c[2]))
      assert_equal(s, c[2])
    end)
  end
end)