local h = u:get_host()
if h then
- local non_latin,total = util.count_non_ascii(h)
+ local parts = rspamd_str_split(h, '.')
- if non_latin ~= total and non_latin > 0 then
- return true, 1.0, h
+ local bad_omographs = 0
+
+ for _,p in ipairs(parts) do
+ local cnlat,ctot = util.count_non_ascii(p)
+
+ if cnlat > 0 and cnlat ~= ctot then
+ bad_omographs = bad_omographs + 1.0 / cnlat
+ end
+ end
+
+ if bad_omographs > 0 then
+ if bad_omographs > 1 then bad_omographs = 1.0 end
+ return true, bad_omographs, h
end
end
end
{
gsize len;
const gchar *str = lua_tolstring (L, 1, &len);
- const gchar *p, *end;
+ const gchar *p, *end, *np;
gint ret = 0, total = 0;
if (str != NULL) {
while (p < end) {
if (*p & 0x80) {
+ np = g_utf8_find_next_char (p, end);
ret ++;
total ++;
+
+ p = (np != p) ? np : p + 1;
+
+ continue;
}
else if (g_ascii_isalpha (*p)) {
total ++;