Browse Source

[Project] Move symbols to khash for speed and inlining

tags/1.7.9
Vsevolod Stakhov 5 years ago
parent
commit
92b92556c6
2 changed files with 82 additions and 38 deletions
  1. 63
    33
      src/libmime/filter.c
  2. 19
    5
      src/libmime/filter.h

+ 63
- 33
src/libmime/filter.c View File

@@ -22,6 +22,20 @@
#include <math.h>
#include "contrib/uthash/utlist.h"

static void
rspamd_metric_result_dtor (gpointer d)
{
struct rspamd_metric_result *r = (struct rspamd_metric_result *)d;
struct rspamd_symbol_result sres;

kh_foreach_value (r->symbols, sres, {
if (sres.options) {
kh_destroy (rspamd_options_hash, sres.options);
}
});
kh_destroy (rspamd_symbols_hash, r->symbols);
kh_destroy (rspamd_symbols_group_hash, r->sym_groups);
}

struct rspamd_metric_result *
rspamd_create_metric_result (struct rspamd_task *task)
@@ -37,15 +51,8 @@ rspamd_create_metric_result (struct rspamd_task *task)

metric_res = rspamd_mempool_alloc (task->task_pool,
sizeof (struct rspamd_metric_result));
metric_res->symbols = g_hash_table_new (rspamd_str_hash,
rspamd_str_equal);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t) g_hash_table_unref,
metric_res->symbols);
metric_res->sym_groups = g_hash_table_new (g_direct_hash, g_direct_equal);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t) g_hash_table_unref,
metric_res->sym_groups);
metric_res->symbols = kh_init (rspamd_symbols_hash);
metric_res->sym_groups = kh_init (rspamd_symbols_group_hash);
metric_res->grow_factor = 0;
metric_res->score = 0;

@@ -53,6 +60,10 @@ rspamd_create_metric_result (struct rspamd_task *task)
metric_res->actions_limits[i] = task->cfg->actions[i].score;
}

rspamd_mempool_add_destructor (task->task_pool,
rspamd_metric_result_dtor,
metric_res);

return metric_res;
}

@@ -91,8 +102,9 @@ insert_metric_result (struct rspamd_task *task,
struct rspamd_symbol *sdef;
struct rspamd_symbols_group *gr = NULL;
const ucl_object_t *mobj, *sobj;
gint max_shots;
gint max_shots, ret;
guint i;
khiter_t k;
gboolean single = !!(flags & RSPAMD_SYMBOL_INSERT_SINGLE);

metric_res = rspamd_create_metric_result (task);
@@ -116,12 +128,16 @@ insert_metric_result (struct rspamd_task *task,
final_score = (*sdef->weight_ptr) * weight;

PTR_ARRAY_FOREACH (sdef->groups, i, gr) {
gr_score = g_hash_table_lookup (metric_res->sym_groups, gr);
k = kh_get (rspamd_symbols_group_hash,
metric_res->sym_groups, GPOINTER_TO_INT (gr));

if (gr_score == NULL) {
gr_score = rspamd_mempool_alloc (task->task_pool, sizeof (gdouble));
*gr_score = 0;
g_hash_table_insert (metric_res->sym_groups, gr, gr_score);
if (k == kh_end (metric_res->sym_groups)) {
k = kh_put (rspamd_symbols_group_hash, metric_res->sym_groups,
GPOINTER_TO_INT (gr), &ret);
kh_value (metric_res->sym_groups, k) = 0;
}
else {
gr_score = &kh_value (metric_res->sym_groups, k);
}
}
}
@@ -139,7 +155,9 @@ insert_metric_result (struct rspamd_task *task,
}

/* Add metric score */
if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) {
k = kh_get (rspamd_symbols_hash, metric_res->symbols, symbol);
if (k != kh_end (metric_res->symbols)) {
s = &kh_value (metric_res->symbols, k);
if (single) {
max_shots = 1;
}
@@ -157,8 +175,16 @@ insert_metric_result (struct rspamd_task *task,
}

/* Now check for the duplicate options */
if (opt && s->options && g_hash_table_lookup (s->options, opt)) {
single = TRUE;
if (opt && s->options) {
k = kh_get (rspamd_options_hash, s->options, opt);

if (k == kh_end (s->options)) {
single = TRUE;
}
else {
s->nshots ++;
rspamd_task_add_result_option (task, s, opt);
}
}
else {
s->nshots ++;
@@ -170,7 +196,8 @@ insert_metric_result (struct rspamd_task *task,
diff = final_score;
}
else {
if (fabs (s->score) < fabs (final_score) && signbit (s->score) == signbit (final_score)) {
if (fabs (s->score) < fabs (final_score) &&
signbit (s->score) == signbit (final_score)) {
/* Replace less significant weight with a more significant one */
diff = final_score - s->score;
}
@@ -209,7 +236,10 @@ insert_metric_result (struct rspamd_task *task,
}
}
else {
s = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_symbol_result));
k = kh_put (rspamd_symbols_hash, metric_res->symbols,
symbol, &ret);
s = &kh_value (metric_res->symbols, k);
memset (s, 0, sizeof (*s));

/* Handle grow factor */
if (metric_res->grow_factor && final_score > 0) {
@@ -241,7 +271,6 @@ insert_metric_result (struct rspamd_task *task,
}

rspamd_task_add_result_option (task, s, opt);
g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s);
}

msg_debug_task ("symbol %s, score %.2f, factor: %f",
@@ -289,33 +318,34 @@ rspamd_task_add_result_option (struct rspamd_task *task,
{
struct rspamd_symbol_option *opt;
gboolean ret = FALSE;
khiter_t k;
gint r;

if (s && val) {
if (s->options && !(s->sym &&
(s->sym->flags & RSPAMD_SYMBOL_FLAG_ONEPARAM)) &&
g_hash_table_size (s->options) < task->cfg->default_max_shots) {
kh_size (s->options) < task->cfg->default_max_shots) {
/* Append new options */
if (!g_hash_table_lookup (s->options, val)) {
opt = rspamd_mempool_alloc (task->task_pool, sizeof (*opt));
k = kh_get (rspamd_options_hash, s->options, val);

if (k == kh_end (s->options)) {
k = kh_put (rspamd_options_hash, s->options, val, &r);

opt = &kh_value (s->options, k);
opt->option = rspamd_mempool_strdup (task->task_pool, val);
DL_APPEND (s->opts_head, opt);

g_hash_table_insert (s->options, opt->option, opt);
ret = TRUE;
}
}
else {
s->options = g_hash_table_new (rspamd_strcase_hash,
rspamd_strcase_equal);
rspamd_mempool_add_destructor (task->task_pool,
(rspamd_mempool_destruct_t)g_hash_table_unref,
s->options);
opt = rspamd_mempool_alloc (task->task_pool, sizeof (*opt));
s->options = kh_init (rspamd_options_hash);
k = kh_put (rspamd_options_hash, s->options, val, &r);

opt = &kh_value (s->options, k);
opt->option = rspamd_mempool_strdup (task->task_pool, val);
s->opts_head = NULL;
DL_APPEND (s->opts_head, opt);

g_hash_table_insert (s->options, opt->option, opt);
ret = TRUE;
}
}

+ 19
- 5
src/libmime/filter.h View File

@@ -9,6 +9,7 @@
#include "config.h"
#include "symbols_cache.h"
#include "task.h"
#include "khash.h"

struct rspamd_task;
struct rspamd_settings;
@@ -27,12 +28,18 @@ enum rspamd_symbol_result_flags {
/**
* Rspamd symbol
*/
KHASH_INIT (rspamd_options_hash,
const char *,
struct rspamd_symbol_option,
true,
rspamd_str_hash,
rspamd_str_equal);
struct rspamd_symbol_result {
double score; /**< symbol's score */
GHashTable *options; /**< list of symbol's options */
double score; /**< symbol's score */
khash_t(rspamd_options_hash) *options; /**< list of symbol's options */
struct rspamd_symbol_option *opts_head; /**< head of linked list of options */
const gchar *name;
struct rspamd_symbol *sym; /**< symbol configuration */
struct rspamd_symbol *sym; /**< symbol configuration */
guint nshots;
enum rspamd_symbol_result_flags flags;
};
@@ -40,11 +47,18 @@ struct rspamd_symbol_result {
/**
* Result of metric processing
*/
KHASH_INIT (rspamd_symbols_hash,
const char *,
struct rspamd_symbol_result,
true,
rspamd_str_hash,
rspamd_str_equal);
KHASH_MAP_INIT_INT (rspamd_symbols_group_hash, double);
struct rspamd_metric_result {
double score; /**< total score */
double grow_factor; /**< current grow factor */
GHashTable *symbols; /**< symbols of metric */
GHashTable *sym_groups; /**< groups of symbols */
khash_t(rspamd_symbols_hash) *symbols; /**< symbols of metric */
khash_t(rspamd_symbols_group_hash) *sym_groups; /**< groups of symbols */
gdouble actions_limits[METRIC_ACTION_MAX]; /**< set of actions for this metric */
};


Loading…
Cancel
Save