diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-07-22 10:24:21 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-07-22 10:24:21 +0100 |
commit | c4055be5894c40807e24009afef04830f2f74fcb (patch) | |
tree | 589607fa3ac540a8d53e4177f51474af8e6ce025 /src/libutil/multipattern.c | |
parent | 74f6d73a40512220b05a7f0e0d6f5558a58b2146 (diff) | |
download | rspamd-c4055be5894c40807e24009afef04830f2f74fcb.tar.gz rspamd-c4055be5894c40807e24009afef04830f2f74fcb.zip |
[Feature] Dynamically detect if a CPU is incompatible with hyperscan
Diffstat (limited to 'src/libutil/multipattern.c')
-rw-r--r-- | src/libutil/multipattern.c | 286 |
1 files changed, 166 insertions, 120 deletions
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index c4085e315..4f64016d7 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -24,13 +24,15 @@ #ifdef WITH_HYPERSCAN #include "hs.h" -#else -#include "acism.h" #endif +#include "acism.h" #define MAX_SCRATCH 4 static const char *hs_cache_dir = NULL; +#ifdef WITH_HYPERSCAN +static gboolean hs_suitable_cpu = FALSE; +#endif struct rspamd_multipattern { #ifdef WITH_HYPERSCAN @@ -41,10 +43,10 @@ struct rspamd_multipattern { GArray *hs_flags; rspamd_cryptobox_hash_state_t hash_state; guint scratch_used; -#else +#endif ac_trie_t *t; GArray *pats; -#endif + gboolean compiled; guint cnt; enum rspamd_multipattern_flags flags; @@ -57,9 +59,15 @@ rspamd_multipattern_quark (void) } void -rspamd_multipattern_library_init (const gchar *cache_dir) +rspamd_multipattern_library_init (const gchar *cache_dir, + struct rspamd_cryptobox_library_ctx *crypto_ctx) { hs_cache_dir = cache_dir; +#ifdef WITH_HYPERSCAN + if (crypto_ctx->cpu_config & CPUID_SSSE3) { + hs_suitable_cpu = TRUE; + } +#endif } #ifdef WITH_HYPERSCAN @@ -218,7 +226,7 @@ rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen, return res; } -#else +#endif static gchar * rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len, gsize *dst_len) @@ -266,7 +274,7 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len, return res; } -#endif + /* * Escapes special characters from specific pattern */ @@ -277,20 +285,25 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len, { gchar *ret = NULL; #ifdef WITH_HYPERSCAN - if (flags & RSPAMD_MULTIPATTERN_TLD) { - ret = rspamd_multipattern_escape_tld_hyperscan (pattern, len, dst_len); - } - else if (flags & RSPAMD_MULTIPATTERN_RE) { - ret = malloc (len + 1); - *dst_len = rspamd_strlcpy (ret, pattern, len + 1); - } - else if (flags & RSPAMD_MULTIPATTERN_GLOB) { - ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE); - } - else { - ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE); + if (hs_suitable_cpu) { + if (flags & RSPAMD_MULTIPATTERN_TLD) { + ret = rspamd_multipattern_escape_tld_hyperscan (pattern, len, dst_len); + } + else if (flags & RSPAMD_MULTIPATTERN_RE) { + ret = malloc (len + 1); + *dst_len = rspamd_strlcpy (ret, pattern, len + 1); + } + else if (flags & RSPAMD_MULTIPATTERN_GLOB) { + ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE); + } + else { + ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE); + } + + return ret; } -#else +#endif + if (flags & RSPAMD_MULTIPATTERN_TLD) { ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len); } @@ -299,7 +312,6 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len, memcpy (ret, pattern, len); *dst_len = len; } -#endif return ret; } @@ -313,14 +325,18 @@ rspamd_multipattern_create (enum rspamd_multipattern_flags flags) mp->flags = flags; #ifdef WITH_HYPERSCAN - mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *)); - mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint)); - mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint)); - rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); -#else - mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t)); + if (hs_suitable_cpu) { + mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *)); + mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint)); + mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint)); + rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); + + return mp; + } #endif + mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t)); + return mp; } @@ -334,14 +350,18 @@ rspamd_multipattern_create_sized (guint npatterns, mp->flags = flags; #ifdef WITH_HYPERSCAN - mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns); - mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); - mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); - rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); -#else - mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns); + if (hs_suitable_cpu) { + mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns); + mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); + mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns); + rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0); + + return mp; + } #endif + mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns); + return mp; } @@ -365,30 +385,35 @@ rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp, g_assert (!mp->compiled); #ifdef WITH_HYPERSCAN - gchar *np; - gint fl = HS_FLAG_SOM_LEFTMOST; + if (hs_suitable_cpu) { + gchar *np; + gint fl = HS_FLAG_SOM_LEFTMOST; - if (mp->flags & RSPAMD_MULTIPATTERN_ICASE) { - fl |= HS_FLAG_CASELESS; - } - if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) { - fl |= HS_FLAG_UTF8|HS_FLAG_UCP; - } + if (mp->flags & RSPAMD_MULTIPATTERN_ICASE) { + fl |= HS_FLAG_CASELESS; + } + if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) { + fl |= HS_FLAG_UTF8|HS_FLAG_UCP; + } + + g_array_append_val (mp->hs_flags, fl); + np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen); + g_array_append_val (mp->hs_pats, np); + fl = mp->cnt; + g_array_append_val (mp->hs_ids, fl); + rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen); + + mp->cnt ++; - g_array_append_val (mp->hs_flags, fl); - np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen); - g_array_append_val (mp->hs_pats, np); - fl = mp->cnt; - g_array_append_val (mp->hs_ids, fl); - rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen); -#else + return; + } +#endif ac_trie_pat_t pat; pat.ptr = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen); pat.len = dlen; g_array_append_val (mp->pats, pat); -#endif mp->cnt ++; } @@ -476,47 +501,54 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) g_assert (!mp->compiled); #ifdef WITH_HYPERSCAN - guint i; - hs_platform_info_t plt; - hs_compile_error_t *hs_errors; - guchar hash[rspamd_cryptobox_HASHBYTES]; + if (hs_suitable_cpu) { + guint i; + hs_platform_info_t plt; + hs_compile_error_t *hs_errors; + guchar hash[rspamd_cryptobox_HASHBYTES]; + + if (mp->cnt > 0) { + g_assert (hs_populate_platform (&plt) == HS_SUCCESS); + rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt)); + rspamd_cryptobox_hash_final (&mp->hash_state, hash); + + if (!rspamd_multipattern_try_load_hs (mp, hash)) { + if (hs_compile_multi ((const char *const *)mp->hs_pats->data, + (const unsigned int *)mp->hs_flags->data, + (const unsigned int *)mp->hs_ids->data, + mp->cnt, + HS_MODE_BLOCK, + &plt, + &mp->db, + &hs_errors) != HS_SUCCESS) { + + g_set_error (err, rspamd_multipattern_quark (), EINVAL, + "cannot create tree of regexp when processing '%s': %s", + g_array_index (mp->hs_pats, char *, hs_errors->expression), + hs_errors->message); + hs_free_compile_error (hs_errors); + + return FALSE; + } + } - if (mp->cnt > 0) { - g_assert (hs_populate_platform (&plt) == HS_SUCCESS); - rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt)); - rspamd_cryptobox_hash_final (&mp->hash_state, hash); - - if (!rspamd_multipattern_try_load_hs (mp, hash)) { - if (hs_compile_multi ((const char *const *)mp->hs_pats->data, - (const unsigned int *)mp->hs_flags->data, - (const unsigned int *)mp->hs_ids->data, - mp->cnt, - HS_MODE_BLOCK, - &plt, - &mp->db, - &hs_errors) != HS_SUCCESS) { - - g_set_error (err, rspamd_multipattern_quark (), EINVAL, - "cannot create tree of regexp when processing '%s': %s", - g_array_index (mp->hs_pats, char *, hs_errors->expression), - hs_errors->message); - hs_free_compile_error (hs_errors); - - return FALSE; + rspamd_multipattern_try_save_hs (mp, hash); + + for (i = 0; i < MAX_SCRATCH; i ++) { + g_assert (hs_alloc_scratch (mp->db, &mp->scratch[i]) == HS_SUCCESS); } } - rspamd_multipattern_try_save_hs (mp, hash); + mp->compiled = TRUE; - for (i = 0; i < MAX_SCRATCH; i ++) { - g_assert (hs_alloc_scratch (mp->db, &mp->scratch[i]) == HS_SUCCESS); - } + return TRUE; } -#else +#endif + if (mp->cnt > 0) { mp->t = acism_create ((const ac_trie_pat_t *)mp->pats->data, mp->cnt); } -#endif + mp->compiled = TRUE; return TRUE; @@ -557,7 +589,8 @@ rspamd_multipattern_hs_cb (unsigned int id, return ret; } -#else +#endif + static gint rspamd_multipattern_acism_cb (int strnum, int textpos, void *context) { @@ -574,7 +607,6 @@ rspamd_multipattern_acism_cb (int strnum, int textpos, void *context) return ret; } -#endif gint rspamd_multipattern_lookup (struct rspamd_multipattern *mp, @@ -599,36 +631,44 @@ rspamd_multipattern_lookup (struct rspamd_multipattern *mp, cbd.ret = 0; #ifdef WITH_HYPERSCAN - hs_scratch_t *scr = NULL; - guint i; + if (hs_suitable_cpu) { + hs_scratch_t *scr = NULL; + guint i; - for (i = 0; i < MAX_SCRATCH; i ++) { - if (!(mp->scratch_used & (1 << i))) { - mp->scratch_used |= (1 << i); - scr = mp->scratch[i]; - break; + for (i = 0; i < MAX_SCRATCH; i ++) { + if (!(mp->scratch_used & (1 << i))) { + mp->scratch_used |= (1 << i); + scr = mp->scratch[i]; + break; + } } - } - g_assert (scr != NULL); + g_assert (scr != NULL); - ret = hs_scan (mp->db, in, len, 0, scr, - rspamd_multipattern_hs_cb, &cbd); + ret = hs_scan (mp->db, in, len, 0, scr, + rspamd_multipattern_hs_cb, &cbd); - mp->scratch_used &= ~(1 << i); + mp->scratch_used &= ~(1 << i); - if (ret == HS_SUCCESS) { - ret = 0; - } - else if (ret == HS_SCAN_TERMINATED) { - ret = cbd.ret; + if (ret == HS_SUCCESS) { + ret = 0; + } + else if (ret == HS_SCAN_TERMINATED) { + ret = cbd.ret; + } + + if (pnfound) { + *pnfound = cbd.nfound; + } + + return ret; } -#else +#endif + gint state = 0; ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd, &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE); -#endif if (pnfound) { *pnfound = cbd.nfound; @@ -645,25 +685,30 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp) if (mp) { #ifdef WITH_HYPERSCAN - gchar *p; + if (hs_suitable_cpu) { + gchar *p; - if (mp->compiled && mp->cnt > 0) { - for (i = 0; i < MAX_SCRATCH; i ++) { - hs_free_scratch (mp->scratch[i]); + if (mp->compiled && mp->cnt > 0) { + for (i = 0; i < MAX_SCRATCH; i ++) { + hs_free_scratch (mp->scratch[i]); + } + + hs_free_database (mp->db); } - hs_free_database (mp->db); - } + for (i = 0; i < mp->cnt; i ++) { + p = g_array_index (mp->hs_pats, gchar *, i); + g_free (p); + } - for (i = 0; i < mp->cnt; i ++) { - p = g_array_index (mp->hs_pats, gchar *, i); - g_free (p); - } + g_array_free (mp->hs_pats, TRUE); + g_array_free (mp->hs_ids, TRUE); + g_array_free (mp->hs_flags, TRUE); + g_slice_free1 (sizeof (*mp), mp); - g_array_free (mp->hs_pats, TRUE); - g_array_free (mp->hs_ids, TRUE); - g_array_free (mp->hs_flags, TRUE); -#else + return; + } +#endif ac_trie_pat_t pat; if (mp->compiled && mp->cnt > 0) { @@ -676,7 +721,7 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp) } g_array_free (mp->pats, TRUE); -#endif + g_slice_free1 (sizeof (*mp), mp); } } @@ -689,15 +734,16 @@ rspamd_multipattern_get_pattern (struct rspamd_multipattern *mp, g_assert (index < mp->cnt); #ifdef WITH_HYPERSCAN - return g_array_index (mp->hs_pats, gchar *, index); -#else + if (hs_suitable_cpu) { + return g_array_index (mp->hs_pats, gchar *, index); + } +#endif ac_trie_pat_t pat; pat = g_array_index (mp->pats, ac_trie_pat_t, index); return pat.ptr; -#endif } guint |