From: Vsevolod Stakhov Date: Mon, 23 Apr 2018 12:19:35 +0000 (+0100) Subject: [Project] Start map helpers project X-Git-Tag: 1.7.4~49^2~5 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=3856d40776b033e4f9b249b87bc97b66be40b30e;p=rspamd.git [Project] Start map helpers project --- diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index 0bf4590e2..aab754195 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -9,6 +9,7 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/http.c ${CMAKE_CURRENT_SOURCE_DIR}/logger.c ${CMAKE_CURRENT_SOURCE_DIR}/map.c + ${CMAKE_CURRENT_SOURCE_DIR}/map_helpers.c ${CMAKE_CURRENT_SOURCE_DIR}/mem_pool.c ${CMAKE_CURRENT_SOURCE_DIR}/printf.c ${CMAKE_CURRENT_SOURCE_DIR}/radix.c diff --git a/src/libutil/map.c b/src/libutil/map.c index bb9294553..931e9b90e 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -24,15 +24,6 @@ #include "rspamd.h" #include "contrib/zstd/zstd.h" -#ifdef WITH_HYPERSCAN -#include "hs.h" -#endif -#ifndef WITH_PCRE2 -#include -#else -#include -#endif - #undef MAP_DEBUG_REFS #ifdef MAP_DEBUG_REFS #define MAP_RETAIN(x, t) do { \ @@ -49,7 +40,6 @@ #define MAP_RELEASE(x, t) REF_RELEASE(x) #endif -static const gchar *hash_fill = "1"; static void free_http_cbdata_common (struct http_callback_data *cbd, gboolean plan_new); static void free_http_cbdata_dtor (gpointer p); static void free_http_cbdata (struct http_callback_data *cbd); @@ -2232,973 +2222,3 @@ err: return NULL; } - -/** - * FSM for parsing lists - */ - -#define MAP_STORE_KEY do { \ - while (g_ascii_isspace (*c) && p > c) { c ++; } \ - key = g_malloc (p - c + 1); \ - rspamd_strlcpy (key, c, p - c + 1); \ - key = g_strchomp (key); \ -} while (0) - -#define MAP_STORE_VALUE do { \ - while (g_ascii_isspace (*c) && p > c) { c ++; } \ - value = g_malloc (p - c + 1); \ - rspamd_strlcpy (value, c, p - c + 1); \ - value = g_strchomp (value); \ -} while (0) - -gchar * -rspamd_parse_kv_list ( - gchar * chunk, - gint len, - struct map_cb_data *data, - insert_func func, - const gchar *default_value, - gboolean final) -{ - enum { - map_skip_spaces_before_key = 0, - map_read_key, - map_read_key_quoted, - map_read_key_slashed, - map_skip_spaces_after_key, - map_backslash_quoted, - map_backslash_slashed, - map_read_key_after_slash, - map_read_value, - map_read_comment_start, - map_skip_comment, - map_read_eol, - }; - - gchar *c, *p, *key = NULL, *value = NULL, *end; - struct rspamd_map *map = data->map; - - p = chunk; - c = p; - end = p + len; - - while (p < end) { - switch (data->state) { - case map_skip_spaces_before_key: - if (g_ascii_isspace (*p)) { - p ++; - } - else { - if (*p == '"') { - p++; - c = p; - data->state = map_read_key_quoted; - } - else if (*p == '/') { - /* Note that c is on '/' here as '/' is a part of key */ - c = p; - p++; - data->state = map_read_key_slashed; - } - else { - c = p; - data->state = map_read_key; - } - } - break; - case map_read_key: - /* read key */ - /* Check here comments, eol and end of buffer */ - if (*p == '#') { - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_KEY; - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - } - - key = NULL; - data->state = map_read_comment_start; - } - else if (*p == '\r' || *p == '\n') { - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_KEY; - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - } - - data->state = map_read_eol; - key = NULL; - } - else if (g_ascii_isspace (*p)) { - if (p - c > 0) { - MAP_STORE_KEY; - data->state = map_skip_spaces_after_key; - } - else { - /* Should not happen */ - g_assert_not_reached (); - } - } - else { - p++; - } - break; - case map_read_key_quoted: - if (*p == '\\') { - data->state = map_backslash_quoted; - p ++; - } - else if (*p == '"') { - /* Allow empty keys in this case */ - if (p - c >= 0) { - MAP_STORE_KEY; - data->state = map_skip_spaces_after_key; - } - else { - g_assert_not_reached (); - } - p ++; - } - else { - p ++; - } - break; - case map_read_key_slashed: - if (*p == '\\') { - data->state = map_backslash_slashed; - p ++; - } - else if (*p == '/') { - /* Allow empty keys in this case */ - if (p - c >= 0) { - data->state = map_read_key_after_slash; - } - else { - g_assert_not_reached (); - } - } - else { - p ++; - } - break; - case map_read_key_after_slash: - /* - * This state is equal to reading of key but '/' is not - * treated specially - */ - if (*p == '#') { - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_KEY; - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - key = NULL; - } - - data->state = map_read_comment_start; - } - else if (*p == '\r' || *p == '\n') { - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_KEY; - func (data->cur_data, key, default_value); - - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - key = NULL; - } - - data->state = map_read_eol; - key = NULL; - } - else if (g_ascii_isspace (*p)) { - if (p - c > 0) { - MAP_STORE_KEY; - data->state = map_skip_spaces_after_key; - } - else { - /* Should not happen */ - g_assert_not_reached (); - } - } - else { - p ++; - } - break; - case map_backslash_quoted: - p ++; - data->state = map_read_key_quoted; - break; - case map_backslash_slashed: - p ++; - data->state = map_read_key_slashed; - break; - case map_skip_spaces_after_key: - if (*p == ' ' || *p == '\t') { - p ++; - } - else { - c = p; - data->state = map_read_value; - } - break; - case map_read_value: - g_assert (key != NULL); - if (*p == '#') { - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_VALUE; - func (data->cur_data, key, value); - msg_debug_map ("insert key value pair: %s -> %s", - key, value); - g_free (key); - g_free (value); - key = NULL; - value = NULL; - } - else { - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - key = NULL; - } - - data->state = map_read_comment_start; - } - else if (*p == '\r' || *p == '\n') { - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_VALUE; - func (data->cur_data, key, value); - msg_debug_map ("insert key value pair: %s -> %s", - key, value); - g_free (key); - g_free (value); - key = NULL; - value = NULL; - } - else { - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - key = NULL; - } - - data->state = map_read_eol; - key = NULL; - } - else { - p ++; - } - break; - case map_read_comment_start: - if (*p == '#') { - data->state = map_skip_comment; - p ++; - key = NULL; - value = NULL; - } - else { - g_assert_not_reached (); - } - break; - case map_skip_comment: - if (*p == '\r' || *p == '\n') { - data->state = map_read_eol; - } - else { - p ++; - } - break; - case map_read_eol: - /* Skip \r\n and whitespaces */ - if (*p == '\r' || *p == '\n') { - p++; - } - else { - data->state = map_skip_spaces_before_key; - } - break; - default: - g_assert_not_reached (); - break; - } - } - - if (final) { - /* Examine the state */ - switch (data->state) { - case map_read_key: - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_KEY; - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - key = NULL; - } - break; - case map_read_value: - g_assert (key != NULL); - if (p - c > 0) { - /* Store a single key */ - MAP_STORE_VALUE; - func (data->cur_data, key, value); - msg_debug_map ("insert key value pair: %s -> %s", - key, value); - g_free (key); - g_free (value); - key = NULL; - value = NULL; - } - else { - func (data->cur_data, key, default_value); - msg_debug_map ("insert key only pair: %s -> %s", - key, default_value); - g_free (key); - key = NULL; - } - break; - } - } - - return c; -} - -/** - * Radix tree helper function - */ -static void -radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value) -{ - radix_compressed_t *tree = (radix_compressed_t *)st; - rspamd_mempool_t *pool; - gpointer nvalue; - - pool = radix_get_pool (tree); - nvalue = rspamd_mempool_strdup (pool, value); - rspamd_radix_add_iplist (key, ",", tree, nvalue, FALSE); -} - -static void -hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value) -{ - GHashTable *ht = st; - gpointer k, v; - - k = g_strdup (key); - v = g_strdup (value); - g_hash_table_replace (ht, k, v); -} - -/* Helpers */ -gchar * -rspamd_hosts_read ( - gchar * chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, g_free, g_free); - } - return rspamd_parse_kv_list ( - chunk, - len, - data, - hash_insert_helper, - hash_fill, - final); -} - -void -rspamd_hosts_fin (struct map_cb_data *data) -{ - struct rspamd_map *map = data->map; - - if (data->prev_data) { - g_hash_table_unref (data->prev_data); - } - if (data->cur_data) { - msg_info_map ("read hash of %d elements", g_hash_table_size - (data->cur_data)); - } -} - -gchar * -rspamd_kv_list_read ( - gchar * chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, g_free, g_free); - } - return rspamd_parse_kv_list ( - chunk, - len, - data, - hash_insert_helper, - "", - final); -} - -void -rspamd_kv_list_fin (struct map_cb_data *data) -{ - struct rspamd_map *map = data->map; - - if (data->prev_data) { - g_hash_table_unref (data->prev_data); - } - if (data->cur_data) { - msg_info_map ("read hash of %d elements", g_hash_table_size - (data->cur_data)); - } -} - -gchar * -rspamd_radix_read ( - gchar * chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - radix_compressed_t *tree; - rspamd_mempool_t *rpool; - struct rspamd_map *map = data->map; - - if (data->cur_data == NULL) { - tree = radix_create_compressed (); - rpool = radix_get_pool (tree); - memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid)); - data->cur_data = tree; - } - return rspamd_parse_kv_list ( - chunk, - len, - data, - radix_tree_insert_helper, - hash_fill, - final); -} - -void -rspamd_radix_fin (struct map_cb_data *data) -{ - struct rspamd_map *map = data->map; - - if (data->prev_data) { - radix_destroy_compressed (data->prev_data); - } - if (data->cur_data) { - msg_info_map ("read radix trie of %z elements: %s", - radix_get_size (data->cur_data), radix_get_info (data->cur_data)); - } -} - -enum rspamd_regexp_map_flags { - RSPAMD_REGEXP_FLAG_UTF = (1 << 0), - RSPAMD_REGEXP_FLAG_MULTIPLE = (1 << 1) -}; - -struct rspamd_regexp_map { - struct rspamd_map *map; - GPtrArray *regexps; - GPtrArray *values; - enum rspamd_regexp_map_flags map_flags; -#ifdef WITH_HYPERSCAN - hs_database_t *hs_db; - hs_scratch_t *hs_scratch; - const gchar **patterns; - gint *flags; - gint *ids; -#endif -}; - -static struct rspamd_regexp_map * -rspamd_regexp_map_create (struct rspamd_map *map, - enum rspamd_regexp_map_flags flags) -{ - struct rspamd_regexp_map *re_map; - - re_map = g_malloc0 (sizeof (*re_map)); - re_map->values = g_ptr_array_new (); - re_map->regexps = g_ptr_array_new (); - re_map->map = map; - re_map->map_flags = flags; - - return re_map; -} - - -static void -rspamd_regexp_map_destroy (struct rspamd_regexp_map *re_map) -{ - rspamd_regexp_t *re; - guint i; - - for (i = 0; i < re_map->regexps->len; i ++) { - re = g_ptr_array_index (re_map->regexps, i); - rspamd_regexp_unref (re); - } - - for (i = 0; i < re_map->values->len; i ++) { - g_free (g_ptr_array_index (re_map->values, i)); - } - - g_ptr_array_free (re_map->regexps, TRUE); - g_ptr_array_free (re_map->values, TRUE); - -#ifdef WITH_HYPERSCAN - if (re_map->hs_scratch) { - hs_free_scratch (re_map->hs_scratch); - } - if (re_map->hs_db) { - hs_free_database (re_map->hs_db); - } - if (re_map->patterns) { - g_free (re_map->patterns); - } - if (re_map->flags) { - g_free (re_map->flags); - } - if (re_map->ids) { - g_free (re_map->ids); - } -#endif - - g_free (re_map); -} - -static void -rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) -{ - struct rspamd_regexp_map *re_map = st; - struct rspamd_map *map; - rspamd_regexp_t *re; - GError *err = NULL; - gint pcre_flags; - - map = re_map->map; - re = rspamd_regexp_new (key, NULL, &err); - - if (re == NULL) { - msg_err_map ("cannot parse regexp %s: %e", key, err); - - if (err) { - g_error_free (err); - } - - return; - } - - pcre_flags = rspamd_regexp_get_pcre_flags (re); - -#ifndef WITH_PCRE2 - if (pcre_flags & PCRE_FLAG(UTF8)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; - } -#else - if (pcre_flags & PCRE_FLAG(UTF)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; - } -#endif - - g_ptr_array_add (re_map->regexps, re); - g_ptr_array_add (re_map->values, g_strdup (value)); -} - -static void -rspamd_glob_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) -{ - struct rspamd_regexp_map *re_map = st; - struct rspamd_map *map; - rspamd_regexp_t *re; - gchar *escaped; - GError *err = NULL; - gint pcre_flags; - gsize escaped_len; - - map = re_map->map; - escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, TRUE); - re = rspamd_regexp_new (escaped, NULL, &err); - g_free (escaped); - - if (re == NULL) { - msg_err_map ("cannot parse regexp %s: %e", key, err); - - if (err) { - g_error_free (err); - } - - return; - } - - pcre_flags = rspamd_regexp_get_pcre_flags (re); - -#ifndef WITH_PCRE2 - if (pcre_flags & PCRE_FLAG(UTF8)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; - } -#else - if (pcre_flags & PCRE_FLAG(UTF)) { - re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; - } -#endif - - g_ptr_array_add (re_map->regexps, re); - g_ptr_array_add (re_map->values, g_strdup (value)); -} - -static void -rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) -{ -#ifdef WITH_HYPERSCAN - guint i; - hs_platform_info_t plt; - hs_compile_error_t *err; - struct rspamd_map *map; - rspamd_regexp_t *re; - gint pcre_flags; - - map = re_map->map; - - if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) { - msg_info_map ("disable hyperscan for map %s, ssse3 instructons are not supported by CPU", - map->name); - return; - } - - if (hs_populate_platform (&plt) != HS_SUCCESS) { - msg_err_map ("cannot populate hyperscan platform"); - return; - } - - re_map->patterns = g_new (const gchar *, re_map->regexps->len); - re_map->flags = g_new (gint, re_map->regexps->len); - re_map->ids = g_new (gint, re_map->regexps->len); - - for (i = 0; i < re_map->regexps->len; i ++) { - re = g_ptr_array_index (re_map->regexps, i); - re_map->patterns[i] = rspamd_regexp_get_pattern (re); - re_map->flags[i] = HS_FLAG_SINGLEMATCH; - pcre_flags = rspamd_regexp_get_pcre_flags (re); - -#ifndef WITH_PCRE2 - if (pcre_flags & PCRE_FLAG(UTF8)) { - re_map->flags[i] |= HS_FLAG_UTF8; - } -#else - if (pcre_flags & PCRE_FLAG(UTF)) { - re_map->flags[i] |= HS_FLAG_UTF8; - } -#endif - if (pcre_flags & PCRE_FLAG(CASELESS)) { - re_map->flags[i] |= HS_FLAG_CASELESS; - } - if (pcre_flags & PCRE_FLAG(MULTILINE)) { - re_map->flags[i] |= HS_FLAG_MULTILINE; - } - if (pcre_flags & PCRE_FLAG(DOTALL)) { - re_map->flags[i] |= HS_FLAG_DOTALL; - } - if (rspamd_regexp_get_maxhits (re) == 1) { - re_map->flags[i] |= HS_FLAG_SINGLEMATCH; - } - - re_map->ids[i] = i; - } - - if (re_map->regexps->len > 0 && re_map->patterns) { - if (hs_compile_multi (re_map->patterns, - re_map->flags, - re_map->ids, - re_map->regexps->len, - HS_MODE_BLOCK, - &plt, - &re_map->hs_db, - &err) != HS_SUCCESS) { - - msg_err_map ("cannot create tree of regexp when processing '%s': %s", - err->expression >= 0 ? - re_map->patterns[err->expression] : - "unknown regexp", err->message); - re_map->hs_db = NULL; - hs_free_compile_error (err); - - return; - } - - if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { - msg_err_map ("cannot allocate scratch space for hyperscan"); - hs_free_database (re_map->hs_db); - re_map->hs_db = NULL; - } - } - else { - msg_err_map ("regexp map is empty"); - } -#endif -} - -gchar * -rspamd_regexp_list_read_single ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - struct rspamd_regexp_map *re_map; - - if (data->cur_data == NULL) { - re_map = rspamd_regexp_map_create (data->map, 0); - data->cur_data = re_map; - } - - return rspamd_parse_kv_list ( - chunk, - len, - data, - rspamd_re_map_insert_helper, - hash_fill, - final); -} - -gchar * -rspamd_glob_list_read_single ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - struct rspamd_regexp_map *re_map; - - if (data->cur_data == NULL) { - re_map = rspamd_regexp_map_create (data->map, 0); - data->cur_data = re_map; - } - - return rspamd_parse_kv_list ( - chunk, - len, - data, - rspamd_glob_map_insert_helper, - hash_fill, - final); -} - -gchar * -rspamd_regexp_list_read_multiple ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final) -{ - struct rspamd_regexp_map *re_map; - - if (data->cur_data == NULL) { - re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_MULTIPLE); - data->cur_data = re_map; - } - - return rspamd_parse_kv_list ( - chunk, - len, - data, - rspamd_re_map_insert_helper, - hash_fill, - final); -} - -void -rspamd_regexp_list_fin (struct map_cb_data *data) -{ - struct rspamd_regexp_map *re_map; - struct rspamd_map *map = data->map; - - if (data->prev_data) { - rspamd_regexp_map_destroy (data->prev_data); - } - if (data->cur_data) { - re_map = data->cur_data; - rspamd_re_map_finalize (re_map); - msg_info_map ("read regexp list of %ud elements", - re_map->regexps->len); - } -} - -#ifdef WITH_HYPERSCAN -static int -rspamd_match_hs_single_handler (unsigned int id, unsigned long long from, - unsigned long long to, - unsigned int flags, void *context) -{ - guint *i = context; - /* Always return non-zero as we need a single match here */ - - *i = id; - - return 1; -} -#endif - -gpointer -rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, - const gchar *in, gsize len) -{ - guint i; - rspamd_regexp_t *re; - gint res = 0; - gpointer ret = NULL; - gboolean validated = FALSE; - - g_assert (in != NULL); - - if (map == NULL || len == 0) { - return NULL; - } - - if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) { - if (g_utf8_validate (in, len, NULL)) { - validated = TRUE; - } - } - else { - validated = TRUE; - } - -#ifdef WITH_HYPERSCAN - if (map->hs_db && map->hs_scratch) { - - if (validated) { - - res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch, - rspamd_match_hs_single_handler, (void *)&i); - - if (res == HS_SCAN_TERMINATED) { - res = 1; - ret = g_ptr_array_index (map->values, i); - } - - return ret; - } - } -#endif - - if (!res) { - /* PCRE version */ - for (i = 0; i < map->regexps->len; i ++) { - re = g_ptr_array_index (map->regexps, i); - - if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) { - ret = g_ptr_array_index (map->values, i); - break; - } - } - } - - return ret; -} - -#ifdef WITH_HYPERSCAN -struct rspamd_multiple_cbdata { - GPtrArray *ar; - struct rspamd_regexp_map *map; -}; - -static int -rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, - unsigned long long to, - unsigned int flags, void *context) -{ - struct rspamd_multiple_cbdata *cbd = context; - - if (id < cbd->map->values->len) { - g_ptr_array_add (cbd->ar, g_ptr_array_index (cbd->map->values, id)); - } - - /* Always return zero as we need all matches here */ - return 0; -} -#endif - -gpointer -rspamd_match_regexp_map_all (struct rspamd_regexp_map *map, - const gchar *in, gsize len) -{ - guint i; - rspamd_regexp_t *re; - GPtrArray *ret; - gint res = 0; - gboolean validated = FALSE; - - g_assert (in != NULL); - - if (map == NULL || len == 0) { - return NULL; - } - - if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) { - if (g_utf8_validate (in, len, NULL)) { - validated = TRUE; - } - } - else { - validated = TRUE; - } - - ret = g_ptr_array_new (); - -#ifdef WITH_HYPERSCAN - if (map->hs_db && map->hs_scratch) { - - if (validated) { - struct rspamd_multiple_cbdata cbd; - - cbd.ar = ret; - cbd.map = map; - - if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch, - rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) { - res = 1; - } - } - } -#endif - - if (!res) { - /* PCRE version */ - for (i = 0; i < map->regexps->len; i ++) { - re = g_ptr_array_index (map->regexps, i); - - if (rspamd_regexp_search (re, in, len, NULL, NULL, - !validated, NULL)) { - g_ptr_array_add (ret, g_ptr_array_index (map->values, i)); - } - } - } - - if (ret->len > 0) { - return ret; - } - - g_ptr_array_free (ret, TRUE); - - return NULL; -} diff --git a/src/libutil/map.h b/src/libutil/map.h index 0523c2a20..171c0c55f 100644 --- a/src/libutil/map.h +++ b/src/libutil/map.h @@ -77,97 +77,4 @@ void rspamd_map_watch (struct rspamd_config *cfg, struct event_base *ev_base, */ void rspamd_map_remove_all (struct rspamd_config *cfg); -typedef void (*insert_func) (gpointer st, gconstpointer key, - gconstpointer value); - -/** - * Common callbacks for frequent types of lists - */ - -/** - * Radix list is a list like ip/mask - */ -gchar * rspamd_radix_read ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -void rspamd_radix_fin (struct map_cb_data *data); - -/** - * Host list is an ordinal list of hosts or domains - */ -gchar * rspamd_hosts_read ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -void rspamd_hosts_fin (struct map_cb_data *data); - -/** - * Kv list is an ordinal list of keys and values separated by whitespace - */ -gchar * rspamd_kv_list_read ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -void rspamd_kv_list_fin (struct map_cb_data *data); - -/** - * Regexp list is a list of regular expressions - */ -struct rspamd_regexp_map; - -gchar * rspamd_regexp_list_read_single ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -gchar * rspamd_regexp_list_read_multiple ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -gchar * rspamd_glob_list_read_single ( - gchar *chunk, - gint len, - struct map_cb_data *data, - gboolean final); -void rspamd_regexp_list_fin (struct map_cb_data *data); - -/** - * FSM for lists parsing (support comments, blank lines and partial replies) - */ -gchar * -rspamd_parse_kv_list ( - gchar * chunk, - gint len, - struct map_cb_data *data, - insert_func func, - const gchar *default_value, - gboolean final); - -/** - * Find a single (any) matching regexp for the specified text or NULL if - * no matches found - * @param map - * @param in - * @param len - * @return - */ -gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, - const gchar *in, gsize len); - -/** - * Find a multiple (all) matching regexp for the specified text or NULL if - * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL - * @param map - * @param in - * @param len - * @return - */ -gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map, - const gchar *in, gsize len); - #endif diff --git a/src/libutil/map_helpers.c b/src/libutil/map_helpers.c new file mode 100644 index 000000000..f06174237 --- /dev/null +++ b/src/libutil/map_helpers.c @@ -0,0 +1,1021 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map_helpers.h" +#include "map_private.h" +#include "khash.h" +#include "radix.h" + +#ifdef WITH_HYPERSCAN +#include "hs.h" +#endif +#ifndef WITH_PCRE2 +#include +#else +#include +#endif + +static const gchar *hash_fill = "1"; + +struct rspamd_map_helper_value { + gsize hits; + gchar value[]; /* Null terminated */ +}; + +struct rspamd_radix_map_helper { + rspamd_mempool_t *pool; + radix_compressed_t *trie; +}; + +KHASH_INIT (rspamd_map_hash, const gchar *, + struct rspamd_map_helper_value *, true, + rspamd_strcase_hash, rspamd_strcase_equal); + +struct rspamd_hash_map_helper { + rspamd_mempool_t *pool; + khash_t(rspamd_map_hash) *htb; +}; + +enum rspamd_regexp_map_flags { + RSPAMD_REGEXP_FLAG_UTF = (1u << 0), + RSPAMD_REGEXP_FLAG_MULTIPLE = (1u << 1) +}; + +struct rspamd_regexp_map_helper { + rspamd_mempool_t *pool; + struct rspamd_map *map; + GPtrArray *regexps; + GPtrArray *values; + enum rspamd_regexp_map_flags map_flags; +#ifdef WITH_HYPERSCAN + hs_database_t *hs_db; + hs_scratch_t *hs_scratch; + const gchar **patterns; + gint *flags; + gint *ids; +#endif +}; + +/** + * FSM for parsing lists + */ + +#define MAP_STORE_KEY do { \ + while (g_ascii_isspace (*c) && p > c) { c ++; } \ + key = g_malloc (p - c + 1); \ + rspamd_strlcpy (key, c, p - c + 1); \ + key = g_strchomp (key); \ +} while (0) + +#define MAP_STORE_VALUE do { \ + while (g_ascii_isspace (*c) && p > c) { c ++; } \ + value = g_malloc (p - c + 1); \ + rspamd_strlcpy (value, c, p - c + 1); \ + value = g_strchomp (value); \ +} while (0) + +gchar * +rspamd_parse_kv_list ( + gchar * chunk, + gint len, + struct map_cb_data *data, + insert_func func, + const gchar *default_value, + gboolean final) +{ + enum { + map_skip_spaces_before_key = 0, + map_read_key, + map_read_key_quoted, + map_read_key_slashed, + map_skip_spaces_after_key, + map_backslash_quoted, + map_backslash_slashed, + map_read_key_after_slash, + map_read_value, + map_read_comment_start, + map_skip_comment, + map_read_eol, + }; + + gchar *c, *p, *key = NULL, *value = NULL, *end; + struct rspamd_map *map = data->map; + + p = chunk; + c = p; + end = p + len; + + while (p < end) { + switch (data->state) { + case map_skip_spaces_before_key: + if (g_ascii_isspace (*p)) { + p ++; + } + else { + if (*p == '"') { + p++; + c = p; + data->state = map_read_key_quoted; + } + else if (*p == '/') { + /* Note that c is on '/' here as '/' is a part of key */ + c = p; + p++; + data->state = map_read_key_slashed; + } + else { + c = p; + data->state = map_read_key; + } + } + break; + case map_read_key: + /* read key */ + /* Check here comments, eol and end of buffer */ + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + } + + key = NULL; + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + } + + data->state = map_read_eol; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + /* Should not happen */ + g_assert_not_reached (); + } + } + else { + p++; + } + break; + case map_read_key_quoted: + if (*p == '\\') { + data->state = map_backslash_quoted; + p ++; + } + else if (*p == '"') { + /* Allow empty keys in this case */ + if (p - c >= 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + g_assert_not_reached (); + } + p ++; + } + else { + p ++; + } + break; + case map_read_key_slashed: + if (*p == '\\') { + data->state = map_backslash_slashed; + p ++; + } + else if (*p == '/') { + /* Allow empty keys in this case */ + if (p - c >= 0) { + data->state = map_read_key_after_slash; + } + else { + g_assert_not_reached (); + } + } + else { + p ++; + } + break; + case map_read_key_after_slash: + /* + * This state is equal to reading of key but '/' is not + * treated specially + */ + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + + data->state = map_read_eol; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + MAP_STORE_KEY; + data->state = map_skip_spaces_after_key; + } + else { + /* Should not happen */ + g_assert_not_reached (); + } + } + else { + p ++; + } + break; + case map_backslash_quoted: + p ++; + data->state = map_read_key_quoted; + break; + case map_backslash_slashed: + p ++; + data->state = map_read_key_slashed; + break; + case map_skip_spaces_after_key: + if (*p == ' ' || *p == '\t') { + p ++; + } + else { + c = p; + data->state = map_read_value; + } + break; + case map_read_value: + g_assert (key != NULL); + if (*p == '#') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } + else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + + data->state = map_read_comment_start; + } + else if (*p == '\r' || *p == '\n') { + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } + else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + + data->state = map_read_eol; + key = NULL; + } + else { + p ++; + } + break; + case map_read_comment_start: + if (*p == '#') { + data->state = map_skip_comment; + p ++; + key = NULL; + value = NULL; + } + else { + g_assert_not_reached (); + } + break; + case map_skip_comment: + if (*p == '\r' || *p == '\n') { + data->state = map_read_eol; + } + else { + p ++; + } + break; + case map_read_eol: + /* Skip \r\n and whitespaces */ + if (*p == '\r' || *p == '\n') { + p++; + } + else { + data->state = map_skip_spaces_before_key; + } + break; + default: + g_assert_not_reached (); + break; + } + } + + if (final) { + /* Examine the state */ + switch (data->state) { + case map_read_key: + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_KEY; + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + break; + case map_read_value: + g_assert (key != NULL); + if (p - c > 0) { + /* Store a single key */ + MAP_STORE_VALUE; + func (data->cur_data, key, value); + msg_debug_map ("insert key value pair: %s -> %s", + key, value); + g_free (key); + g_free (value); + key = NULL; + value = NULL; + } + else { + func (data->cur_data, key, default_value); + msg_debug_map ("insert key only pair: %s -> %s", + key, default_value); + g_free (key); + key = NULL; + } + break; + } + } + + return c; +} + +/** + * Radix tree helper function + */ +static void +radix_tree_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +{ + radix_compressed_t *tree = (radix_compressed_t *)st; + rspamd_mempool_t *pool; + gpointer nvalue; + + pool = radix_get_pool (tree); + nvalue = rspamd_mempool_strdup (pool, value); + rspamd_radix_add_iplist (key, ",", tree, nvalue, FALSE); +} + +static void +hash_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +{ + GHashTable *ht = st; + gpointer k, v; + + k = g_strdup (key); + v = g_strdup (value); + g_hash_table_replace (ht, k, v); +} + +/* Helpers */ +gchar * +rspamd_hosts_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); + } + return rspamd_parse_kv_list ( + chunk, + len, + data, + hash_insert_helper, + hash_fill, + final); +} + +void +rspamd_hosts_fin (struct map_cb_data *data) +{ + struct rspamd_map *map = data->map; + + if (data->prev_data) { + g_hash_table_unref (data->prev_data); + } + if (data->cur_data) { + msg_info_map ("read hash of %d elements", g_hash_table_size + (data->cur_data)); + } +} + +gchar * +rspamd_kv_list_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new_full (rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); + } + return rspamd_parse_kv_list ( + chunk, + len, + data, + hash_insert_helper, + "", + final); +} + +void +rspamd_kv_list_fin (struct map_cb_data *data) +{ + struct rspamd_map *map = data->map; + + if (data->prev_data) { + g_hash_table_unref (data->prev_data); + } + if (data->cur_data) { + msg_info_map ("read hash of %d elements", g_hash_table_size + (data->cur_data)); + } +} + +gchar * +rspamd_radix_read ( + gchar * chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + radix_compressed_t *tree; + rspamd_mempool_t *rpool; + struct rspamd_map *map = data->map; + + if (data->cur_data == NULL) { + tree = radix_create_compressed (); + rpool = radix_get_pool (tree); + memcpy (rpool->tag.uid, map->tag, sizeof (rpool->tag.uid)); + data->cur_data = tree; + } + return rspamd_parse_kv_list ( + chunk, + len, + data, + radix_tree_insert_helper, + hash_fill, + final); +} + +void +rspamd_radix_fin (struct map_cb_data *data) +{ + struct rspamd_map *map = data->map; + + if (data->prev_data) { + radix_destroy_compressed (data->prev_data); + } + if (data->cur_data) { + msg_info_map ("read radix trie of %z elements: %s", + radix_get_size (data->cur_data), radix_get_info (data->cur_data)); + } +} + +static struct rspamd_regexp_map_helper * +rspamd_regexp_map_create (struct rspamd_map *map, + enum rspamd_regexp_map_flags flags) +{ + struct rspamd_regexp_map_helper *re_map; + + re_map = g_malloc0 (sizeof (*re_map)); + re_map->values = g_ptr_array_new (); + re_map->regexps = g_ptr_array_new (); + re_map->map = map; + re_map->map_flags = flags; + + return re_map; +} + + +static void +rspamd_regexp_map_destroy (struct rspamd_regexp_map_helper *re_map) +{ + rspamd_regexp_t *re; + guint i; + + for (i = 0; i < re_map->regexps->len; i ++) { + re = g_ptr_array_index (re_map->regexps, i); + rspamd_regexp_unref (re); + } + + for (i = 0; i < re_map->values->len; i ++) { + g_free (g_ptr_array_index (re_map->values, i)); + } + + g_ptr_array_free (re_map->regexps, TRUE); + g_ptr_array_free (re_map->values, TRUE); + +#ifdef WITH_HYPERSCAN + if (re_map->hs_scratch) { + hs_free_scratch (re_map->hs_scratch); + } + if (re_map->hs_db) { + hs_free_database (re_map->hs_db); + } + if (re_map->patterns) { + g_free (re_map->patterns); + } + if (re_map->flags) { + g_free (re_map->flags); + } + if (re_map->ids) { + g_free (re_map->ids); + } +#endif + + g_free (re_map); +} + +static void +rspamd_re_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_regexp_map_helper *re_map = st; + struct rspamd_map *map; + rspamd_regexp_t *re; + GError *err = NULL; + gint pcre_flags; + + map = re_map->map; + re = rspamd_regexp_new (key, NULL, &err); + + if (re == NULL) { + msg_err_map ("cannot parse regexp %s: %e", key, err); + + if (err) { + g_error_free (err); + } + + return; + } + + pcre_flags = rspamd_regexp_get_pcre_flags (re); + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + } +#endif + + g_ptr_array_add (re_map->regexps, re); + g_ptr_array_add (re_map->values, g_strdup (value)); +} + +static void +rspamd_glob_map_insert_helper (gpointer st, gconstpointer key, gconstpointer value) +{ + struct rspamd_regexp_map_helper *re_map = st; + struct rspamd_map *map; + rspamd_regexp_t *re; + gchar *escaped; + GError *err = NULL; + gint pcre_flags; + gsize escaped_len; + + map = re_map->map; + escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len, TRUE); + re = rspamd_regexp_new (escaped, NULL, &err); + g_free (escaped); + + if (re == NULL) { + msg_err_map ("cannot parse regexp %s: %e", key, err); + + if (err) { + g_error_free (err); + } + + return; + } + + pcre_flags = rspamd_regexp_get_pcre_flags (re); + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->map_flags |= RSPAMD_REGEXP_FLAG_UTF; + } +#endif + + g_ptr_array_add (re_map->regexps, re); + g_ptr_array_add (re_map->values, g_strdup (value)); +} + +static void +rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map) +{ +#ifdef WITH_HYPERSCAN + guint i; + hs_platform_info_t plt; + hs_compile_error_t *err; + struct rspamd_map *map; + rspamd_regexp_t *re; + gint pcre_flags; + + map = re_map->map; + + if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) { + msg_info_map ("disable hyperscan for map %s, ssse3 instructons are not supported by CPU", + map->name); + return; + } + + if (hs_populate_platform (&plt) != HS_SUCCESS) { + msg_err_map ("cannot populate hyperscan platform"); + return; + } + + re_map->patterns = g_new (const gchar *, re_map->regexps->len); + re_map->flags = g_new (gint, re_map->regexps->len); + re_map->ids = g_new (gint, re_map->regexps->len); + + for (i = 0; i < re_map->regexps->len; i ++) { + re = g_ptr_array_index (re_map->regexps, i); + re_map->patterns[i] = rspamd_regexp_get_pattern (re); + re_map->flags[i] = HS_FLAG_SINGLEMATCH; + pcre_flags = rspamd_regexp_get_pcre_flags (re); + +#ifndef WITH_PCRE2 + if (pcre_flags & PCRE_FLAG(UTF8)) { + re_map->flags[i] |= HS_FLAG_UTF8; + } +#else + if (pcre_flags & PCRE_FLAG(UTF)) { + re_map->flags[i] |= HS_FLAG_UTF8; + } +#endif + if (pcre_flags & PCRE_FLAG(CASELESS)) { + re_map->flags[i] |= HS_FLAG_CASELESS; + } + if (pcre_flags & PCRE_FLAG(MULTILINE)) { + re_map->flags[i] |= HS_FLAG_MULTILINE; + } + if (pcre_flags & PCRE_FLAG(DOTALL)) { + re_map->flags[i] |= HS_FLAG_DOTALL; + } + if (rspamd_regexp_get_maxhits (re) == 1) { + re_map->flags[i] |= HS_FLAG_SINGLEMATCH; + } + + re_map->ids[i] = i; + } + + if (re_map->regexps->len > 0 && re_map->patterns) { + if (hs_compile_multi (re_map->patterns, + re_map->flags, + re_map->ids, + re_map->regexps->len, + HS_MODE_BLOCK, + &plt, + &re_map->hs_db, + &err) != HS_SUCCESS) { + + msg_err_map ("cannot create tree of regexp when processing '%s': %s", + err->expression >= 0 ? + re_map->patterns[err->expression] : + "unknown regexp", err->message); + re_map->hs_db = NULL; + hs_free_compile_error (err); + + return; + } + + if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { + msg_err_map ("cannot allocate scratch space for hyperscan"); + hs_free_database (re_map->hs_db); + re_map->hs_db = NULL; + } + } + else { + msg_err_map ("regexp map is empty"); + } +#endif +} + +gchar * +rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_regexp_map_create (data->map, 0); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_re_map_insert_helper, + hash_fill, + final); +} + +gchar * +rspamd_glob_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_regexp_map_create (data->map, 0); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_glob_map_insert_helper, + hash_fill, + final); +} + +gchar * +rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final) +{ + struct rspamd_regexp_map_helper *re_map; + + if (data->cur_data == NULL) { + re_map = rspamd_regexp_map_create (data->map, RSPAMD_REGEXP_FLAG_MULTIPLE); + data->cur_data = re_map; + } + + return rspamd_parse_kv_list ( + chunk, + len, + data, + rspamd_re_map_insert_helper, + hash_fill, + final); +} + +void +rspamd_regexp_list_fin (struct map_cb_data *data) +{ + struct rspamd_regexp_map_helper *re_map; + struct rspamd_map *map = data->map; + + if (data->prev_data) { + rspamd_regexp_map_destroy (data->prev_data); + } + if (data->cur_data) { + re_map = data->cur_data; + rspamd_re_map_finalize (re_map); + msg_info_map ("read regexp list of %ud elements", + re_map->regexps->len); + } +} + +#ifdef WITH_HYPERSCAN +static int +rspamd_match_hs_single_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + guint *i = context; + /* Always return non-zero as we need a single match here */ + + *i = id; + + return 1; +} +#endif + +gpointer +rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + gint res = 0; + gpointer ret = NULL; + gboolean validated = FALSE; + + g_assert (in != NULL); + + if (map == NULL || len == 0) { + return NULL; + } + + if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) { + if (g_utf8_validate (in, len, NULL)) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + + res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_single_handler, (void *)&i); + + if (res == HS_SCAN_TERMINATED) { + res = 1; + ret = g_ptr_array_index (map->values, i); + } + + return ret; + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) { + ret = g_ptr_array_index (map->values, i); + break; + } + } + } + + return ret; +} + +#ifdef WITH_HYPERSCAN +struct rspamd_multiple_cbdata { + GPtrArray *ar; + struct rspamd_regexp_map_helper *map; +}; + +static int +rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from, + unsigned long long to, + unsigned int flags, void *context) +{ + struct rspamd_multiple_cbdata *cbd = context; + + if (id < cbd->map->values->len) { + g_ptr_array_add (cbd->ar, g_ptr_array_index (cbd->map->values, id)); + } + + /* Always return zero as we need all matches here */ + return 0; +} +#endif + +gpointer +rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map, + const gchar *in, gsize len) +{ + guint i; + rspamd_regexp_t *re; + GPtrArray *ret; + gint res = 0; + gboolean validated = FALSE; + + g_assert (in != NULL); + + if (map == NULL || len == 0) { + return NULL; + } + + if (map->map_flags & RSPAMD_REGEXP_FLAG_UTF) { + if (g_utf8_validate (in, len, NULL)) { + validated = TRUE; + } + } + else { + validated = TRUE; + } + + ret = g_ptr_array_new (); + +#ifdef WITH_HYPERSCAN + if (map->hs_db && map->hs_scratch) { + + if (validated) { + struct rspamd_multiple_cbdata cbd; + + cbd.ar = ret; + cbd.map = map; + + if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch, + rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) { + res = 1; + } + } + } +#endif + + if (!res) { + /* PCRE version */ + for (i = 0; i < map->regexps->len; i ++) { + re = g_ptr_array_index (map->regexps, i); + + if (rspamd_regexp_search (re, in, len, NULL, NULL, + !validated, NULL)) { + g_ptr_array_add (ret, g_ptr_array_index (map->values, i)); + } + } + } + + if (ret->len > 0) { + return ret; + } + + g_ptr_array_free (ret, TRUE); + + return NULL; +} diff --git a/src/libutil/map_helpers.h b/src/libutil/map_helpers.h new file mode 100644 index 000000000..cbdb80478 --- /dev/null +++ b/src/libutil/map_helpers.h @@ -0,0 +1,124 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_MAP_HELPERS_H +#define RSPAMD_MAP_HELPERS_H + +#include "config.h" +#include "map.h" + +/** + * @file map_helpers.h + * + * Defines helper structures to deal with different map types + */ + +/** + * Common structures, abstract for simplicity + */ +struct rspamd_radix_map_helper; +struct rspamd_hash_map_helper; +struct rspamd_regexp_map_helper; + +typedef void (*insert_func) (gpointer st, gconstpointer key, + gconstpointer value); + +/** + * Radix list is a list like ip/mask + */ +gchar * rspamd_radix_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +void rspamd_radix_fin (struct map_cb_data *data); + +/** + * Host list is an ordinal list of hosts or domains + */ +gchar * rspamd_hosts_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +void rspamd_hosts_fin (struct map_cb_data *data); + +/** + * Kv list is an ordinal list of keys and values separated by whitespace + */ +gchar * rspamd_kv_list_read ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +void rspamd_kv_list_fin (struct map_cb_data *data); + +/** + * Regexp list is a list of regular expressions + */ + +gchar * rspamd_regexp_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +gchar * rspamd_regexp_list_read_multiple ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +gchar * rspamd_glob_list_read_single ( + gchar *chunk, + gint len, + struct map_cb_data *data, + gboolean final); +void rspamd_regexp_list_fin (struct map_cb_data *data); + +/** + * FSM for lists parsing (support comments, blank lines and partial replies) + */ +gchar * +rspamd_parse_kv_list ( + gchar * chunk, + gint len, + struct map_cb_data *data, + insert_func func, + const gchar *default_value, + gboolean final); + +/** + * Find a single (any) matching regexp for the specified text or NULL if + * no matches found + * @param map + * @param in + * @param len + * @return + */ +gpointer rspamd_match_regexp_map_single (struct rspamd_regexp_map *map, + const gchar *in, gsize len); + +/** + * Find a multiple (all) matching regexp for the specified text or NULL if + * no matches found. Returns GPtrArray that *must* be freed by a caller if not NULL + * @param map + * @param in + * @param len + * @return + */ +gpointer rspamd_match_regexp_map_all (struct rspamd_regexp_map *map, + const gchar *in, gsize len); + +#endif