Quellcode durchsuchen

[Feature] Dynamically detect if a CPU is incompatible with hyperscan

tags/1.3.0
Vsevolod Stakhov vor 7 Jahren
Ursprung
Commit
c4055be589
6 geänderte Dateien mit 184 neuen und 124 gelöschten Zeilen
  1. 1
    0
      CMakeLists.txt
  2. 0
    1
      src/CMakeLists.txt
  3. 2
    1
      src/libserver/cfg_rcl.c
  4. 12
    1
      src/libserver/cfg_utils.c
  5. 166
    120
      src/libutil/multipattern.c
  6. 3
    1
      src/libutil/multipattern.h

+ 1
- 0
CMakeLists.txt Datei anzeigen

@@ -1163,6 +1163,7 @@ LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rdns)
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES ottery)
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES event)
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES xxhash)
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES rspamd-actrie)
IF(HAVE_FETCH_H)
LIST(APPEND RSPAMD_REQUIRED_LIBRARIES fetch)
ENDIF(HAVE_FETCH_H)

+ 0
- 1
src/CMakeLists.txt Datei anzeigen

@@ -154,7 +154,6 @@ ENDIF()
IF(ENABLE_HIREDIS MATCHES "ON")
TARGET_LINK_LIBRARIES(rspamd rspamd-hiredis)
ENDIF()
TARGET_LINK_LIBRARIES(rspamd rspamd-actrie)

IF (ENABLE_FANN MATCHES "ON")
TARGET_LINK_LIBRARIES(rspamd fann)

+ 2
- 1
src/libserver/cfg_rcl.c Datei anzeigen

@@ -265,7 +265,8 @@ rspamd_rcl_options_handler (rspamd_mempool_t *pool, const ucl_object_t *obj,
section, cfg->cfg_pool, obj,
cfg, err)) {
/* We need to init this early */
rspamd_multipattern_library_init (cfg->hs_cache_dir);
rspamd_multipattern_library_init (cfg->hs_cache_dir,
cfg->libs_ctx->crypto_ctx);

return TRUE;
}

+ 12
- 1
src/libserver/cfg_utils.c Datei anzeigen

@@ -644,7 +644,18 @@ rspamd_config_post_load (struct rspamd_config *cfg,
#endif

rspamd_regexp_library_init ();
rspamd_multipattern_library_init (cfg->hs_cache_dir);
rspamd_multipattern_library_init (cfg->hs_cache_dir,
cfg->libs_ctx->crypto_ctx);

#ifdef WITH_HYPERSCAN
if (!cfg->disable_hyperscan) {
if (!(cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
msg_warn_config ("CPU doesn't have SSSE3 instructions set "
"required for hyperscan, disable it");
cfg->disable_hyperscan = TRUE;
}
}
#endif

if ((def_metric =
g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) {

+ 166
- 120
src/libutil/multipattern.c Datei anzeigen

@@ -24,13 +24,15 @@

#ifdef WITH_HYPERSCAN
#include "hs.h"
#else
#include "acism.h"
#endif
#include "acism.h"

#define MAX_SCRATCH 4

static const char *hs_cache_dir = NULL;
#ifdef WITH_HYPERSCAN
static gboolean hs_suitable_cpu = FALSE;
#endif

struct rspamd_multipattern {
#ifdef WITH_HYPERSCAN
@@ -41,10 +43,10 @@ struct rspamd_multipattern {
GArray *hs_flags;
rspamd_cryptobox_hash_state_t hash_state;
guint scratch_used;
#else
#endif
ac_trie_t *t;
GArray *pats;
#endif
gboolean compiled;
guint cnt;
enum rspamd_multipattern_flags flags;
@@ -57,9 +59,15 @@ rspamd_multipattern_quark (void)
}

void
rspamd_multipattern_library_init (const gchar *cache_dir)
rspamd_multipattern_library_init (const gchar *cache_dir,
struct rspamd_cryptobox_library_ctx *crypto_ctx)
{
hs_cache_dir = cache_dir;
#ifdef WITH_HYPERSCAN
if (crypto_ctx->cpu_config & CPUID_SSSE3) {
hs_suitable_cpu = TRUE;
}
#endif
}

#ifdef WITH_HYPERSCAN
@@ -218,7 +226,7 @@ rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen,
return res;
}

#else
#endif
static gchar *
rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
gsize *dst_len)
@@ -266,7 +274,7 @@ rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,

return res;
}
#endif
/*
* Escapes special characters from specific pattern
*/
@@ -277,20 +285,25 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
{
gchar *ret = NULL;
#ifdef WITH_HYPERSCAN
if (flags & RSPAMD_MULTIPATTERN_TLD) {
ret = rspamd_multipattern_escape_tld_hyperscan (pattern, len, dst_len);
}
else if (flags & RSPAMD_MULTIPATTERN_RE) {
ret = malloc (len + 1);
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
}
else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE);
}
else {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE);
if (hs_suitable_cpu) {
if (flags & RSPAMD_MULTIPATTERN_TLD) {
ret = rspamd_multipattern_escape_tld_hyperscan (pattern, len, dst_len);
}
else if (flags & RSPAMD_MULTIPATTERN_RE) {
ret = malloc (len + 1);
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
}
else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE);
}
else {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE);
}

return ret;
}
#else
#endif

if (flags & RSPAMD_MULTIPATTERN_TLD) {
ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
}
@@ -299,7 +312,6 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
memcpy (ret, pattern, len);
*dst_len = len;
}
#endif

return ret;
}
@@ -313,14 +325,18 @@ rspamd_multipattern_create (enum rspamd_multipattern_flags flags)
mp->flags = flags;

#ifdef WITH_HYPERSCAN
mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *));
mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint));
mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint));
rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
#else
mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t));
if (hs_suitable_cpu) {
mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *));
mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint));
mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint));
rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);

return mp;
}
#endif

mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t));

return mp;
}

@@ -334,14 +350,18 @@ rspamd_multipattern_create_sized (guint npatterns,
mp->flags = flags;

#ifdef WITH_HYPERSCAN
mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns);
mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
#else
mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns);
if (hs_suitable_cpu) {
mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns);
mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);

return mp;
}
#endif

mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns);

return mp;
}

@@ -365,30 +385,35 @@ rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp,
g_assert (!mp->compiled);

#ifdef WITH_HYPERSCAN
gchar *np;
gint fl = HS_FLAG_SOM_LEFTMOST;
if (hs_suitable_cpu) {
gchar *np;
gint fl = HS_FLAG_SOM_LEFTMOST;

if (mp->flags & RSPAMD_MULTIPATTERN_ICASE) {
fl |= HS_FLAG_CASELESS;
}
if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) {
fl |= HS_FLAG_UTF8|HS_FLAG_UCP;
}
if (mp->flags & RSPAMD_MULTIPATTERN_ICASE) {
fl |= HS_FLAG_CASELESS;
}
if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) {
fl |= HS_FLAG_UTF8|HS_FLAG_UCP;
}

g_array_append_val (mp->hs_flags, fl);
np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
g_array_append_val (mp->hs_pats, np);
fl = mp->cnt;
g_array_append_val (mp->hs_ids, fl);
rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen);

mp->cnt ++;

g_array_append_val (mp->hs_flags, fl);
np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
g_array_append_val (mp->hs_pats, np);
fl = mp->cnt;
g_array_append_val (mp->hs_ids, fl);
rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen);
#else
return;
}
#endif
ac_trie_pat_t pat;

pat.ptr = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
pat.len = dlen;

g_array_append_val (mp->pats, pat);
#endif

mp->cnt ++;
}
@@ -476,47 +501,54 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
g_assert (!mp->compiled);

#ifdef WITH_HYPERSCAN
guint i;
hs_platform_info_t plt;
hs_compile_error_t *hs_errors;
guchar hash[rspamd_cryptobox_HASHBYTES];
if (hs_suitable_cpu) {
guint i;
hs_platform_info_t plt;
hs_compile_error_t *hs_errors;
guchar hash[rspamd_cryptobox_HASHBYTES];

if (mp->cnt > 0) {
g_assert (hs_populate_platform (&plt) == HS_SUCCESS);
rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt));
rspamd_cryptobox_hash_final (&mp->hash_state, hash);

if (!rspamd_multipattern_try_load_hs (mp, hash)) {
if (hs_compile_multi ((const char *const *)mp->hs_pats->data,
(const unsigned int *)mp->hs_flags->data,
(const unsigned int *)mp->hs_ids->data,
mp->cnt,
HS_MODE_BLOCK,
&plt,
&mp->db,
&hs_errors) != HS_SUCCESS) {

g_set_error (err, rspamd_multipattern_quark (), EINVAL,
"cannot create tree of regexp when processing '%s': %s",
g_array_index (mp->hs_pats, char *, hs_errors->expression),
hs_errors->message);
hs_free_compile_error (hs_errors);

return FALSE;
}
}

if (mp->cnt > 0) {
g_assert (hs_populate_platform (&plt) == HS_SUCCESS);
rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt));
rspamd_cryptobox_hash_final (&mp->hash_state, hash);

if (!rspamd_multipattern_try_load_hs (mp, hash)) {
if (hs_compile_multi ((const char *const *)mp->hs_pats->data,
(const unsigned int *)mp->hs_flags->data,
(const unsigned int *)mp->hs_ids->data,
mp->cnt,
HS_MODE_BLOCK,
&plt,
&mp->db,
&hs_errors) != HS_SUCCESS) {

g_set_error (err, rspamd_multipattern_quark (), EINVAL,
"cannot create tree of regexp when processing '%s': %s",
g_array_index (mp->hs_pats, char *, hs_errors->expression),
hs_errors->message);
hs_free_compile_error (hs_errors);

return FALSE;
rspamd_multipattern_try_save_hs (mp, hash);

for (i = 0; i < MAX_SCRATCH; i ++) {
g_assert (hs_alloc_scratch (mp->db, &mp->scratch[i]) == HS_SUCCESS);
}
}

rspamd_multipattern_try_save_hs (mp, hash);
mp->compiled = TRUE;

for (i = 0; i < MAX_SCRATCH; i ++) {
g_assert (hs_alloc_scratch (mp->db, &mp->scratch[i]) == HS_SUCCESS);
}
return TRUE;
}
#else
#endif

if (mp->cnt > 0) {
mp->t = acism_create ((const ac_trie_pat_t *)mp->pats->data, mp->cnt);
}
#endif

mp->compiled = TRUE;

return TRUE;
@@ -557,7 +589,8 @@ rspamd_multipattern_hs_cb (unsigned int id,

return ret;
}
#else
#endif

static gint
rspamd_multipattern_acism_cb (int strnum, int textpos, void *context)
{
@@ -574,7 +607,6 @@ rspamd_multipattern_acism_cb (int strnum, int textpos, void *context)

return ret;
}
#endif

gint
rspamd_multipattern_lookup (struct rspamd_multipattern *mp,
@@ -599,36 +631,44 @@ rspamd_multipattern_lookup (struct rspamd_multipattern *mp,
cbd.ret = 0;

#ifdef WITH_HYPERSCAN
hs_scratch_t *scr = NULL;
guint i;
if (hs_suitable_cpu) {
hs_scratch_t *scr = NULL;
guint i;

for (i = 0; i < MAX_SCRATCH; i ++) {
if (!(mp->scratch_used & (1 << i))) {
mp->scratch_used |= (1 << i);
scr = mp->scratch[i];
break;
for (i = 0; i < MAX_SCRATCH; i ++) {
if (!(mp->scratch_used & (1 << i))) {
mp->scratch_used |= (1 << i);
scr = mp->scratch[i];
break;
}
}
}

g_assert (scr != NULL);
g_assert (scr != NULL);

ret = hs_scan (mp->db, in, len, 0, scr,
rspamd_multipattern_hs_cb, &cbd);
ret = hs_scan (mp->db, in, len, 0, scr,
rspamd_multipattern_hs_cb, &cbd);

mp->scratch_used &= ~(1 << i);
mp->scratch_used &= ~(1 << i);

if (ret == HS_SUCCESS) {
ret = 0;
}
else if (ret == HS_SCAN_TERMINATED) {
ret = cbd.ret;
if (ret == HS_SUCCESS) {
ret = 0;
}
else if (ret == HS_SCAN_TERMINATED) {
ret = cbd.ret;
}

if (pnfound) {
*pnfound = cbd.nfound;
}

return ret;
}
#else
#endif

gint state = 0;

ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
&state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
#endif

if (pnfound) {
*pnfound = cbd.nfound;
@@ -645,25 +685,30 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp)

if (mp) {
#ifdef WITH_HYPERSCAN
gchar *p;
if (hs_suitable_cpu) {
gchar *p;

if (mp->compiled && mp->cnt > 0) {
for (i = 0; i < MAX_SCRATCH; i ++) {
hs_free_scratch (mp->scratch[i]);
if (mp->compiled && mp->cnt > 0) {
for (i = 0; i < MAX_SCRATCH; i ++) {
hs_free_scratch (mp->scratch[i]);
}

hs_free_database (mp->db);
}

hs_free_database (mp->db);
}
for (i = 0; i < mp->cnt; i ++) {
p = g_array_index (mp->hs_pats, gchar *, i);
g_free (p);
}

for (i = 0; i < mp->cnt; i ++) {
p = g_array_index (mp->hs_pats, gchar *, i);
g_free (p);
}
g_array_free (mp->hs_pats, TRUE);
g_array_free (mp->hs_ids, TRUE);
g_array_free (mp->hs_flags, TRUE);
g_slice_free1 (sizeof (*mp), mp);

g_array_free (mp->hs_pats, TRUE);
g_array_free (mp->hs_ids, TRUE);
g_array_free (mp->hs_flags, TRUE);
#else
return;
}
#endif
ac_trie_pat_t pat;

if (mp->compiled && mp->cnt > 0) {
@@ -676,7 +721,7 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp)
}

g_array_free (mp->pats, TRUE);
#endif
g_slice_free1 (sizeof (*mp), mp);
}
}
@@ -689,15 +734,16 @@ rspamd_multipattern_get_pattern (struct rspamd_multipattern *mp,
g_assert (index < mp->cnt);

#ifdef WITH_HYPERSCAN
return g_array_index (mp->hs_pats, gchar *, index);
#else
if (hs_suitable_cpu) {
return g_array_index (mp->hs_pats, gchar *, index);
}
#endif

ac_trie_pat_t pat;

pat = g_array_index (mp->pats, ac_trie_pat_t, index);

return pat.ptr;
#endif
}

guint

+ 3
- 1
src/libutil/multipattern.h Datei anzeigen

@@ -37,6 +37,7 @@ enum rspamd_multipattern_flags {
};

struct rspamd_multipattern;
struct rspamd_cryptobox_library_ctx;

/**
* Called on pattern match
@@ -60,7 +61,8 @@ typedef gint (*rspamd_multipattern_cb_t) (struct rspamd_multipattern *mp,
* Init multipart library and set the appropriate cache dir
* @param cache_dir
*/
void rspamd_multipattern_library_init (const gchar *cache_dir);
void rspamd_multipattern_library_init (const gchar *cache_dir,
struct rspamd_cryptobox_library_ctx *crypto_ctx);

/**
* Creates empty multipattern structure

Laden…
Abbrechen
Speichern