mirrors
/
rspamd
mirror of https://github.com/vstakhov/rspamd.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
							-- Test utf routines

context("UTF8 check functions", function()
  local ffi = require("ffi")
  ffi.cdef[[
    void rspamd_str_lc_utf8 (char *str, unsigned int size);
    void rspamd_str_lc (char *str, unsigned int size);
    char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen);
  ]]

  local cases = {
    {"АбЫрвАлг", "абырвалг"},
    {"АAБBвc", "аaбbвc"},
    --{"STRASSE", "straße"}, XXX: NYI
    {"KEÇİ", "keçi"},
  }

  for i,c in ipairs(cases) do
    test("UTF lowercase " .. tostring(i), function()
      local buf = ffi.new("char[?]", #c[1] + 1)
      ffi.copy(buf, c[1])
      ffi.C.rspamd_str_lc_utf8(buf, #c[1])
      local s = ffi.string(buf)
      assert_equal(s, c[2])
    end)
  end

  cases = {
    {"AbCdEf", "abcdef"},
    {"A", "a"},
    {"AaAa", "aaaa"},
    {"AaAaAaAa", "aaaaaaaa"}
  }

  for i,c in ipairs(cases) do
    test("ASCII lowercase " .. tostring(i), function()
      local buf = ffi.new("char[?]", #c[1] + 1)
      ffi.copy(buf, c[1])
      ffi.C.rspamd_str_lc(buf, #c[1])
      local s = ffi.string(buf)
      assert_equal(s, c[2])
    end)
  end

  cases = {
    {'тест', 'тест'},
    {'\200\213\202', '���'},
    {'тест\200\213\202test', 'тест���test'},
    {'\200\213\202test', '���test'},
    {'\200\213\202test\200\213\202', '���test���'},
    {'тест\200\213\202test\200\213\202', 'тест���test���'},
    {'тест\200\213\202test\200\213\202тест', 'тест���test���тест'},
  }

  local NULL = ffi.new 'void*'
  for i,c in ipairs(cases) do
    test("Unicode make valid " .. tostring(i), function()
      local buf = ffi.new("char[?]", #c[1] + 1)
      ffi.copy(buf, c[1])

      local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL))
      local function to_hex(s)
        return (s:gsub('.', function (c)
          return string.format('%02X', string.byte(c))
        end))
      end
      print(to_hex(s))
      print(to_hex(c[2]))
      assert_equal(s, c[2])
    end)
  end
end)