]> source.dussan.org Git - rspamd.git/commitdiff
[Rework] Rework url flags handling API
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 21 Apr 2020 13:27:20 +0000 (14:27 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 21 Apr 2020 13:27:20 +0000 (14:27 +0100)
src/libserver/url.c
src/libserver/url.h
src/lua/lua_url.c

index db89073f5d4e6dfa4d24af243152335be33f1d3b..195727c1392cf9373a1a30344555551fec1ba940 100644 (file)
@@ -214,6 +214,35 @@ struct url_matcher static_matchers[] = {
                                0}
 };
 
+struct rspamd_url_flag_name {
+       const gchar *name;
+       gint flag;
+       gint hash;
+} url_flag_names[] = {
+               {"phished", RSPAMD_URL_FLAG_PHISHED, -1},
+               {"numeric", RSPAMD_URL_FLAG_NUMERIC, -1},
+               {"obscured", RSPAMD_URL_FLAG_OBSCURED, -1},
+               {"redirected", RSPAMD_URL_FLAG_REDIRECTED, -1},
+               {"html_displayed", RSPAMD_URL_FLAG_HTML_DISPLAYED, -1},
+               {"text", RSPAMD_URL_FLAG_FROM_TEXT, -1},
+               {"subject", RSPAMD_URL_FLAG_SUBJECT, -1},
+               {"host_encoded", RSPAMD_URL_FLAG_HOSTENCODED, -1},
+               {"schema_encoded", RSPAMD_URL_FLAG_SCHEMAENCODED, -1},
+               {"path_encoded", RSPAMD_URL_FLAG_PATHENCODED, -1},
+               {"query_encoded", RSPAMD_URL_FLAG_QUERYENCODED, -1},
+               {"missing_slahes", RSPAMD_URL_FLAG_MISSINGSLASHES, -1},
+               {"idn", RSPAMD_URL_FLAG_IDN, -1},
+               {"has_port", RSPAMD_URL_FLAG_HAS_PORT, -1},
+               {"has_user", RSPAMD_URL_FLAG_HAS_USER, -1},
+               {"schemaless", RSPAMD_URL_FLAG_SCHEMALESS, -1},
+               {"unnormalised", RSPAMD_URL_FLAG_UNNORMALISED, -1},
+               {"zw_spaces", RSPAMD_URL_FLAG_ZW_SPACES, -1},
+               {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1},
+               {"image", RSPAMD_URL_FLAG_IMAGE, -1},
+               {"query", RSPAMD_URL_FLAG_QUERY, -1},
+               {"content", RSPAMD_URL_FLAG_CONTENT, -1}
+};
+
 
 static inline khint_t rspamd_url_hash (struct rspamd_url *u);
 
@@ -610,6 +639,26 @@ rspamd_url_init (const gchar *tld_file)
                                        url_scanner->matchers_strict->len);
                }
        }
+
+       /* Generate hashes for flags */
+       for (gint i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+               url_flag_names[i].hash =
+                               rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+                                               url_flag_names[i].name,
+                                               strlen (url_flag_names[i].name), 0);
+       }
+       /* Ensure that we have no hashes collisions O(N^2) but this array is small */
+       for (gint i = 0; i < G_N_ELEMENTS (url_flag_names) - 1; i ++) {
+               for (gint j = i + 1; j < G_N_ELEMENTS (url_flag_names); j ++) {
+                       if (url_flag_names[i].hash == url_flag_names[j].hash) {
+                               msg_err ("collision: both %s and %s map to %d",
+                                               url_flag_names[i].name, url_flag_names[j].name,
+                                               url_flag_names[i].hash);
+                               abort ();
+                       }
+               }
+       }
+
 }
 
 #define SET_U(u, field) do {                                                \
@@ -3991,3 +4040,33 @@ rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url
 
        return false;
 }
+
+bool
+rspamd_url_flag_from_string (const gchar *str, gint *flag)
+{
+       gint h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+                       str, strlen (str), 0);
+
+       for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+               if (url_flag_names[i].hash == h) {
+                       *flag |= url_flag_names[i].flag;
+
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+
+const gchar *
+rspamd_url_flag_to_string (int flag)
+{
+       for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+               if (url_flag_names[i].flag & flag) {
+                       return url_flag_names[i].name;
+               }
+       }
+
+       return NULL;
+}
index bb9c57399ac7975a1f908f97bb84d2d743bd6747..2a5892fc5e45fd1c052e91ba6fe1a6f57be04d1e 100644 (file)
@@ -36,6 +36,7 @@ enum rspamd_url_flags {
        RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
        RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
        RSPAMD_URL_FLAG_QUERY = 1u << 20u,
+       RSPAMD_URL_FLAG_CONTENT = 1u << 21u,
 };
 
 struct rspamd_url_tag {
@@ -268,6 +269,21 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto);
  */
 enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str);
 
+/**
+ * Converts string to a url flag
+ * @param str
+ * @param flag
+ * @return
+ */
+bool rspamd_url_flag_from_string (const gchar *str, gint *flag);
+
+/**
+ * Converts url flag to a string
+ * @param flag
+ * @return
+ */
+const gchar * rspamd_url_flag_to_string (int flag);
+
 /* Defines sets of urls indexed by url as is */
 KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char);
 KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
index 6540919eaf6d23dff2f105eff52d4a29e15659b3..94cb51dbd9615607b1a08e86f487a8cc6a88d52d 100644 (file)
@@ -728,6 +728,7 @@ lua_url_create (lua_State *L)
        const gchar *text;
        size_t length;
        gboolean own_pool = FALSE;
+       struct rspamd_lua_url *u;
 
        if (lua_type (L, 1) == LUA_TUSERDATA) {
                pool = rspamd_lua_check_mempool (L, 1);
@@ -753,6 +754,26 @@ lua_url_create (lua_State *L)
                if (lua_type (L, -1) != LUA_TUSERDATA) {
                        /* URL is actually not found */
                        lua_pushnil (L);
+
+                       return 1;
+               }
+
+               u = (struct rspamd_lua_url *)lua_touserdata (L, -1);
+
+               if (lua_type (L, 3) == LUA_TTABLE) {
+                       /* Add flags */
+                       for (lua_pushnil (L); lua_next (L, 3); lua_pop (L, 1)) {
+                               int nmask = 0;
+                               const gchar *fname = lua_tostring (L, -1);
+
+                               if (rspamd_url_flag_from_string (fname, &nmask)) {
+                                       u->url->flags |= nmask;
+                               }
+                               else {
+                                       lua_pop (L, 1);
+                                       return luaL_error (L, "invalid flag: %s", fname);
+                               }
+                       }
                }
        }
 
@@ -854,9 +875,9 @@ lua_url_all (lua_State *L)
  * - `image`: URL is from src attribute of img HTML tag
  * @return {table} URL flags
  */
-#define PUSH_FLAG(fl, name) do { \
+#define PUSH_FLAG(fl) do { \
        if (flags & (fl)) { \
-               lua_pushstring (L, (name)); \
+               lua_pushstring (L, rspamd_url_flag_to_string (fl)); \
                lua_pushboolean (L, true); \
                lua_settable (L, -3); \
        } \
@@ -874,26 +895,27 @@ lua_url_get_flags (lua_State *L)
 
                lua_createtable (L, 0, 4);
 
-               PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
-               PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
-               PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
-               PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
-               PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
-               PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
-               PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
-               PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
-               PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
-               PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
-               PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
-               PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
-               PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
-               PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
-               PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
-               PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
-               PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
-               PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
-               PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
-               PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
+               PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC);
+               PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT);
+               PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT);
+               PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES);
+               PUSH_FLAG (RSPAMD_URL_FLAG_IDN);
+               PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT);
+               PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER);
+               PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS);
+               PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED);
+               PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES);
+               PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL);
+               PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE);
+               PUSH_FLAG (RSPAMD_URL_FLAG_CONTENT);
        }
        else {
                return luaL_error (L, "invalid arguments");