Bladeren bron

[Minor] Store the fact that we have utf8 only regexps in corpus

tags/1.8.3
Vsevolod Stakhov 5 jaren geleden
bovenliggende
commit
3a995b7221
3 gewijzigde bestanden met toevoegingen van 11 en 1 verwijderingen
  1. 8
    1
      src/libserver/re_cache.c
  2. 2
    0
      src/libutil/regexp.c
  3. 1
    0
      src/libutil/regexp.h

+ 8
- 1
src/libserver/re_cache.c Bestand weergeven

@@ -78,11 +78,14 @@ static const guchar rspamd_hs_magic[] = {'r', 's', 'h', 's', 'r', 'e', '1', '1'}
struct rspamd_re_class {
guint64 id;
enum rspamd_re_type type;
gboolean has_utf8; /* if there are any utf8 regexps */
gpointer type_data;
gsize type_len;
GHashTable *re;
gchar hash[rspamd_cryptobox_HASHBYTES + 1];
rspamd_cryptobox_hash_state_t *st;

gchar hash[rspamd_cryptobox_HASHBYTES + 1];

#ifdef WITH_HYPERSCAN
hs_database_t *hs_db;
hs_scratch_t *hs_scratch;
@@ -298,6 +301,10 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre);
}

if (rspamd_regexp_get_flags (re) & RSPAMD_REGEXP_FLAG_UTF) {
re_class->has_utf8 = TRUE;
}

return nre;
}


+ 2
- 0
src/libutil/regexp.c Bestand weergeven

@@ -379,6 +379,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
break;
case 'u':
rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF;
#ifndef WITH_PCRE2
regexp_flags |= PCRE_FLAG(UTF8);
#else
@@ -392,6 +393,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
break;
case 'r':
rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF;
#ifndef WITH_PCRE2
regexp_flags &= ~PCRE_FLAG(UTF8);
#else

+ 1
- 0
src/libutil/regexp.h Bestand weergeven

@@ -33,6 +33,7 @@
#define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3)
#define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
#define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
#define RSPAMD_REGEXP_FLAG_UTF (1 << 6)

struct rspamd_config;


Laden…
Annuleren
Opslaan