]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Store the fact that we have utf8 only regexps in corpus
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 28 Nov 2018 13:32:26 +0000 (13:32 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 28 Nov 2018 13:32:26 +0000 (13:32 +0000)
src/libserver/re_cache.c
src/libutil/regexp.c
src/libutil/regexp.h

index 1dcd4bbad9f43169c1db2e15d7325c0f544791d5..bbc19a59a64bbb4b8416c77c22dc7658157229c3 100644 (file)
@@ -78,11 +78,14 @@ static const guchar rspamd_hs_magic[] = {'r', 's', 'h', 's', 'r', 'e', '1', '1'}
 struct rspamd_re_class {
        guint64 id;
        enum rspamd_re_type type;
+       gboolean has_utf8; /* if there are any utf8 regexps */
        gpointer type_data;
        gsize type_len;
        GHashTable *re;
-       gchar hash[rspamd_cryptobox_HASHBYTES + 1];
        rspamd_cryptobox_hash_state_t *st;
+
+       gchar hash[rspamd_cryptobox_HASHBYTES + 1];
+
 #ifdef WITH_HYPERSCAN
        hs_database_t *hs_db;
        hs_scratch_t *hs_scratch;
@@ -298,6 +301,10 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
                g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre);
        }
 
+       if (rspamd_regexp_get_flags (re) & RSPAMD_REGEXP_FLAG_UTF) {
+               re_class->has_utf8 = TRUE;
+       }
+
        return nre;
 }
 
index a3246ae9f18bcd154b7b0123f47edfc9776670e4..4eb0361ec45aa8d9ef564a750c50f84cfca6ada8 100644 (file)
@@ -379,6 +379,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
                                break;
                        case 'u':
                                rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
+                               rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF;
 #ifndef WITH_PCRE2
                                regexp_flags |= PCRE_FLAG(UTF8);
 #else
@@ -392,6 +393,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
                                break;
                        case 'r':
                                rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
+                               rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF;
 #ifndef WITH_PCRE2
                                regexp_flags &= ~PCRE_FLAG(UTF8);
 #else
index 6b1bd50f9f15eeaea3f5fd07dcc921853002badd..b982c08f67108fc3bb455d099726ddd2b0680cf0 100644 (file)
@@ -33,6 +33,7 @@
 #define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3)
 #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
 #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
+#define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
 
 struct rspamd_config;