aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-30 16:14:01 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-30 16:14:01 +0100
commitd594c736dbddd7120ee3a612bbbdc3b48efdcd1e (patch)
treedcb3485e0c8473bf66ebe6bca73a9a1ec64c9a14 /src
parent07159d72fe43537111cc2f030b986bb34a62a868 (diff)
downloadrspamd-d594c736dbddd7120ee3a612bbbdc3b48efdcd1e.tar.gz
rspamd-d594c736dbddd7120ee3a612bbbdc3b48efdcd1e.zip
[Minor] Allow to check spoofed characters in a single string
Diffstat (limited to 'src')
-rw-r--r--src/lua/lua_util.c71
1 files changed, 50 insertions, 21 deletions
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 93e668251..6c6a4201f 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -361,8 +361,8 @@ LUA_FUNCTION_DEF (util, zstd_decompress);
*/
LUA_FUNCTION_DEF (util, normalize_prob);
/***
- * @function util.is_utf_spoofed(str)
- * Returns true if a string is spoofed
+ * @function util.is_utf_spoofed(str, [str2])
+ * Returns true if a string is spoofed (possibly with another string `str2`)
* @return {boolean} true if a string is spoofed
*/
LUA_FUNCTION_DEF (util, is_utf_spoofed);
@@ -1913,8 +1913,9 @@ lua_util_is_utf_spoofed (lua_State *L)
{
gsize l1, l2;
gint ret, nres = 2;
- const gchar *s1 = lua_tolstring (L, 1, &l1), *s2 = lua_tolstring (L, 2, &l2);
- static USpoofChecker *spc;
+ const gchar *s1 = lua_tolstring (L, 1, &l1),
+ *s2 = lua_tolstring (L, 2, &l2);
+ static USpoofChecker *spc, *spc_sgl;
UErrorCode uc_err = U_ZERO_ERROR;
if (s1 && s2) {
@@ -1930,30 +1931,58 @@ lua_util_is_utf_spoofed (lua_State *L)
}
ret = uspoof_areConfusableUTF8 (spc, s1, l1, s2, l2, &uc_err);
- lua_pushboolean (L, !!(ret != 0));
+ }
+ else if (s1) {
+ /* We have just s1, not s2 */
+ if (spc_sgl == NULL) {
+ USet *allowed = uset_openEmpty ();
- switch (ret) {
- case 0:
- nres = 1;
- break;
- case USPOOF_SINGLE_SCRIPT_CONFUSABLE:
- lua_pushstring (L, "single");
- break;
- case USPOOF_MIXED_SCRIPT_CONFUSABLE:
- lua_pushstring (L, "multiple");
- break;
- case USPOOF_WHOLE_SCRIPT_CONFUSABLE:
- lua_pushstring (L, "whole");
- break;
- default:
- lua_pushstring (L, "unknown");
- break;
+#if U_ICU_VERSION_MAJOR_NUM >= 51
+ uset_addAll (allowed, uspoof_getRecommendedSet (&uc_err));
+ uset_addAll (allowed, uspoof_getInclusionSet (&uc_err));
+#endif
+
+ spc_sgl = uspoof_open (&uc_err);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
+ lua_pushboolean (L, false);
+
+ return 1;
+ }
+
+ uspoof_setAllowedChars (spc_sgl, allowed, &uc_err);
+#if U_ICU_VERSION_MAJOR_NUM >= 51
+ uspoof_setRestrictionLevel (spc_sgl, USPOOF_MODERATELY_RESTRICTIVE);
+#endif
}
+
+ ret = uspoof_checkUTF8 (spc_sgl, s1, l1, NULL, &uc_err);
}
else {
return luaL_error (L, "invalid arguments");
}
+ lua_pushboolean (L, !!(ret != 0));
+
+ switch (ret) {
+ case 0:
+ nres = 1;
+ break;
+ case USPOOF_SINGLE_SCRIPT_CONFUSABLE:
+ lua_pushstring (L, "single");
+ break;
+ case USPOOF_MIXED_SCRIPT_CONFUSABLE:
+ lua_pushstring (L, "multiple");
+ break;
+ case USPOOF_WHOLE_SCRIPT_CONFUSABLE:
+ lua_pushstring (L, "whole");
+ break;
+ default:
+ lua_pushstring (L, "unknown");
+ break;
+ }
+
return nres;
}