diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-28 13:32:26 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-11-28 13:32:26 +0000 |
commit | 3a995b7221266ab58f8b3fac8fb02e70c23c42a9 (patch) | |
tree | 59e62d57f0abee986ceb81829c806112e8505a2b | |
parent | 71499b6811fdd2df0ecc7581120b83a0935b1370 (diff) | |
download | rspamd-3a995b7221266ab58f8b3fac8fb02e70c23c42a9.tar.gz rspamd-3a995b7221266ab58f8b3fac8fb02e70c23c42a9.zip |
[Minor] Store the fact that we have utf8 only regexps in corpus
-rw-r--r-- | src/libserver/re_cache.c | 9 | ||||
-rw-r--r-- | src/libutil/regexp.c | 2 | ||||
-rw-r--r-- | src/libutil/regexp.h | 1 |
3 files changed, 11 insertions, 1 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 1dcd4bbad..bbc19a59a 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -78,11 +78,14 @@ static const guchar rspamd_hs_magic[] = {'r', 's', 'h', 's', 'r', 'e', '1', '1'} struct rspamd_re_class { guint64 id; enum rspamd_re_type type; + gboolean has_utf8; /* if there are any utf8 regexps */ gpointer type_data; gsize type_len; GHashTable *re; - gchar hash[rspamd_cryptobox_HASHBYTES + 1]; rspamd_cryptobox_hash_state_t *st; + + gchar hash[rspamd_cryptobox_HASHBYTES + 1]; + #ifdef WITH_HYPERSCAN hs_database_t *hs_db; hs_scratch_t *hs_scratch; @@ -298,6 +301,10 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre); } + if (rspamd_regexp_get_flags (re) & RSPAMD_REGEXP_FLAG_UTF) { + re_class->has_utf8 = TRUE; + } + return nre; } diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index a3246ae9f..4eb0361ec 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -379,6 +379,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, break; case 'u': rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW; + rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF; #ifndef WITH_PCRE2 regexp_flags |= PCRE_FLAG(UTF8); #else @@ -392,6 +393,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, break; case 'r': rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW; + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF; #ifndef WITH_PCRE2 regexp_flags &= ~PCRE_FLAG(UTF8); #else diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h index 6b1bd50f9..b982c08f6 100644 --- a/src/libutil/regexp.h +++ b/src/libutil/regexp.h @@ -33,6 +33,7 @@ #define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3) #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4) #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5) +#define RSPAMD_REGEXP_FLAG_UTF (1 << 6) struct rspamd_config; |