aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-28 13:32:26 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-28 13:32:26 +0000
commit3a995b7221266ab58f8b3fac8fb02e70c23c42a9 (patch)
tree59e62d57f0abee986ceb81829c806112e8505a2b
parent71499b6811fdd2df0ecc7581120b83a0935b1370 (diff)
downloadrspamd-3a995b7221266ab58f8b3fac8fb02e70c23c42a9.tar.gz
rspamd-3a995b7221266ab58f8b3fac8fb02e70c23c42a9.zip
[Minor] Store the fact that we have utf8 only regexps in corpus
-rw-r--r--src/libserver/re_cache.c9
-rw-r--r--src/libutil/regexp.c2
-rw-r--r--src/libutil/regexp.h1
3 files changed, 11 insertions, 1 deletions
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 1dcd4bbad..bbc19a59a 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -78,11 +78,14 @@ static const guchar rspamd_hs_magic[] = {'r', 's', 'h', 's', 'r', 'e', '1', '1'}
struct rspamd_re_class {
guint64 id;
enum rspamd_re_type type;
+ gboolean has_utf8; /* if there are any utf8 regexps */
gpointer type_data;
gsize type_len;
GHashTable *re;
- gchar hash[rspamd_cryptobox_HASHBYTES + 1];
rspamd_cryptobox_hash_state_t *st;
+
+ gchar hash[rspamd_cryptobox_HASHBYTES + 1];
+
#ifdef WITH_HYPERSCAN
hs_database_t *hs_db;
hs_scratch_t *hs_scratch;
@@ -298,6 +301,10 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre);
}
+ if (rspamd_regexp_get_flags (re) & RSPAMD_REGEXP_FLAG_UTF) {
+ re_class->has_utf8 = TRUE;
+ }
+
return nre;
}
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index a3246ae9f..4eb0361ec 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -379,6 +379,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
break;
case 'u':
rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF;
#ifndef WITH_PCRE2
regexp_flags |= PCRE_FLAG(UTF8);
#else
@@ -392,6 +393,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
break;
case 'r':
rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
+ rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF;
#ifndef WITH_PCRE2
regexp_flags &= ~PCRE_FLAG(UTF8);
#else
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
index 6b1bd50f9..b982c08f6 100644
--- a/src/libutil/regexp.h
+++ b/src/libutil/regexp.h
@@ -33,6 +33,7 @@
#define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3)
#define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
#define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
+#define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
struct rspamd_config;