From 3a995b7221266ab58f8b3fac8fb02e70c23c42a9 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 28 Nov 2018 13:32:26 +0000 Subject: [Minor] Store the fact that we have utf8 only regexps in corpus --- src/libserver/re_cache.c | 9 ++++++++- src/libutil/regexp.c | 2 ++ src/libutil/regexp.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 1dcd4bbad..bbc19a59a 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -78,11 +78,14 @@ static const guchar rspamd_hs_magic[] = {'r', 's', 'h', 's', 'r', 'e', '1', '1'} struct rspamd_re_class { guint64 id; enum rspamd_re_type type; + gboolean has_utf8; /* if there are any utf8 regexps */ gpointer type_data; gsize type_len; GHashTable *re; - gchar hash[rspamd_cryptobox_HASHBYTES + 1]; rspamd_cryptobox_hash_state_t *st; + + gchar hash[rspamd_cryptobox_HASHBYTES + 1]; + #ifdef WITH_HYPERSCAN hs_database_t *hs_db; hs_scratch_t *hs_scratch; @@ -298,6 +301,10 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re, g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre); } + if (rspamd_regexp_get_flags (re) & RSPAMD_REGEXP_FLAG_UTF) { + re_class->has_utf8 = TRUE; + } + return nre; } diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index a3246ae9f..4eb0361ec 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -379,6 +379,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, break; case 'u': rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW; + rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF; #ifndef WITH_PCRE2 regexp_flags |= PCRE_FLAG(UTF8); #else @@ -392,6 +393,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, break; case 'r': rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW; + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF; #ifndef WITH_PCRE2 regexp_flags &= ~PCRE_FLAG(UTF8); #else diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h index 6b1bd50f9..b982c08f6 100644 --- a/src/libutil/regexp.h +++ b/src/libutil/regexp.h @@ -33,6 +33,7 @@ #define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3) #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4) #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5) +#define RSPAMD_REGEXP_FLAG_UTF (1 << 6) struct rspamd_config; -- cgit v1.2.3