diff options
-rw-r--r-- | src/libstat/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/libstat/learn_cache/learn_cache.h | 42 | ||||
-rw-r--r-- | src/libstat/learn_cache/redis_cache.c | 73 | ||||
-rw-r--r-- | src/libstat/learn_cache/sqlite3_cache.c | 134 | ||||
-rw-r--r-- | src/libstat/stat_config.c | 36 | ||||
-rw-r--r-- | src/libstat/stat_internal.h | 2 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 13 |
7 files changed, 227 insertions, 79 deletions
diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt index 4a1a848bb..11f48bdc0 100644 --- a/src/libstat/CMakeLists.txt +++ b/src/libstat/CMakeLists.txt @@ -9,11 +9,15 @@ SET(CLASSIFIERSSRC ${CMAKE_CURRENT_SOURCE_DIR}/classifiers/bayes.c) SET(BACKENDSSRC ${CMAKE_CURRENT_SOURCE_DIR}/backends/mmaped_file.c ${CMAKE_CURRENT_SOURCE_DIR}/backends/sqlite3_backend.c) +SET(CACHESSRC ${CMAKE_CURRENT_SOURCE_DIR}/learn_cache/sqlite3_cache.c) + IF(ENABLE_HIREDIS MATCHES "ON") SET(BACKENDSSRC ${BACKENDSSRC} ${CMAKE_CURRENT_SOURCE_DIR}/backends/redis_backend.c) + SET(CACHESSRC ${CACHESSRC} + ${CMAKE_CURRENT_SOURCE_DIR}/learn_cache/redis_cache.c) ENDIF(ENABLE_HIREDIS MATCHES "ON") -SET(CACHESSRC ${CMAKE_CURRENT_SOURCE_DIR}/learn_cache/sqlite3_cache.c) + SET(RSPAMD_STAT ${LIBSTATSRC} ${TOKENIZERSSRC} diff --git a/src/libstat/learn_cache/learn_cache.h b/src/libstat/learn_cache/learn_cache.h index 84851649f..1ebe2864a 100644 --- a/src/libstat/learn_cache/learn_cache.h +++ b/src/libstat/learn_cache/learn_cache.h @@ -33,24 +33,48 @@ struct rspamd_task; struct rspamd_stat_ctx; struct rspamd_config; +struct rspamd_statfile; struct rspamd_stat_cache { const char *name; gpointer (*init)(struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, const ucl_object_t *cf); - gint (*process)(struct rspamd_task *task, + struct rspamd_config *cfg, + struct rspamd_statfile *st, + const ucl_object_t *cf); + gpointer (*runtime)(struct rspamd_task *task, + gpointer ctx); + gint (*check)(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime, + gpointer ctx); + gint (*learn)(struct rspamd_task *task, gboolean is_spam, + gpointer runtime, gpointer ctx); void (*close) (gpointer ctx); gpointer ctx; }; -gpointer rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - const ucl_object_t *cf); -gint rspamd_stat_cache_sqlite3_process ( - struct rspamd_task *task, - gboolean is_spam, gpointer c); -void rspamd_stat_cache_sqlite3_close (gpointer c); +#define RSPAMD_STAT_CACHE_DEF(name) \ + gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \ + struct rspamd_config *cfg, \ + struct rspamd_statfile *st, \ + const ucl_object_t *cf); \ + gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \ + gpointer ctx); \ + gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \ + gboolean is_spam, \ + gpointer runtime, \ + gpointer ctx); \ + gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \ + gboolean is_spam, \ + gpointer runtime, \ + gpointer ctx); \ + void rspamd_stat_cache_##name##_close (gpointer ctx) + +RSPAMD_STAT_CACHE_DEF(sqlite3); +#ifdef WITH_HIREDIS +RSPAMD_STAT_CACHE_DEF(redis); +#endif #endif /* LEARN_CACHE_H_ */ diff --git a/src/libstat/learn_cache/redis_cache.c b/src/libstat/learn_cache/redis_cache.c new file mode 100644 index 000000000..de88936be --- /dev/null +++ b/src/libstat/learn_cache/redis_cache.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "learn_cache.h" +#include "rspamd.h" +#include "stat_api.h" +#include "stat_internal.h" +#include "cryptobox.h" +#include "ucl.h" +#include "hiredis/hiredis.h" +#include "hiredis/adapters/libevent.h" + +gpointer +rspamd_stat_cache_redis_init (struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st, + const ucl_object_t *cf) +{ + return NULL; +} + +gpointer +rspamd_stat_cache_redis_runtime (struct rspamd_task *task, + gpointer ctx) +{ + return NULL; +} + +gint +rspamd_stat_cache_redis_check (struct rspamd_task *task, + gboolean is_spam, + gpointer runtime, + gpointer c) +{ + return RSPAMD_LEARN_OK; +} + +gint +rspamd_stat_cache_redis_learn (struct rspamd_task *task, + gboolean is_spam, + gpointer runtime, + gpointer c) +{ + return RSPAMD_LEARN_OK; +} + +void +rspamd_stat_cache_redis_close (gpointer c) +{ + +} diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 299c1ebd9..4d97e3084 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -120,8 +120,9 @@ struct rspamd_stat_sqlite3_ctx { }; gpointer -rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx, +rspamd_stat_cache_sqlite3_init (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg, + struct rspamd_statfile *st, const ucl_object_t *cf) { struct rspamd_stat_sqlite3_ctx *new = NULL; @@ -131,7 +132,6 @@ rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx, sqlite3 *sqlite; GError *err = NULL; - if (cf) { elt = ucl_object_find_key (cf, "path"); @@ -169,69 +169,32 @@ rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx, return new; } -static rspamd_learn_t -rspamd_stat_cache_sqlite3_check (rspamd_mempool_t *pool, - const guchar *h, gsize len, gboolean is_spam, - struct rspamd_stat_sqlite3_ctx *ctx) +gpointer +rspamd_stat_cache_sqlite3_runtime (struct rspamd_task *task, + gpointer ctx) { - gint rc, ret = RSPAMD_LEARN_OK; - gint64 flag; - - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_START_DEF); - rc = rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_GET_LEARN, (gint64)len, h, &flag); - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); - - - if (rc == SQLITE_OK) { - /* We have some existing record in the table */ - if (!!flag == !!is_spam) { - /* Already learned */ - - ret = RSPAMD_LEARN_INGORE; - } - else { - /* Need to relearn */ - flag = !!is_spam ? 1 : 0; - - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_START_IM); - rc = rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_UPDATE_LEARN, flag, (gint64)len, h); - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); - - return RSPAMD_LEARN_UNLEARN; - } - } - else { - /* Insert result new id */ - flag = !!is_spam ? 1 : 0; - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_START_IM); - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_ADD_LEARN, (gint64)len, h, flag); - rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); - } - - return ret; + /* No need of runtime for this type of classifier */ + return NULL; } gint -rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, - gboolean is_spam, gpointer c) +rspamd_stat_cache_sqlite3_check (struct rspamd_task *task, + gboolean is_spam, + gpointer runtime, + gpointer c) { struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c; struct mime_text_part *part; rspamd_cryptobox_hash_state_t st; rspamd_ftok_t *word; - guchar out[rspamd_cryptobox_HASHBYTES]; + guchar *out; guint i, j; + gint rc; + gint64 flag; if (ctx != NULL && ctx->db != NULL) { + out = rspamd_mempool_alloc (task->task_pool, rspamd_cryptobox_HASHBYTES); + rspamd_cryptobox_hash_init (&st, NULL, 0); for (i = 0; i < task->text_parts->len; i ++) { @@ -247,8 +210,69 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task, rspamd_cryptobox_hash_final (&st, out); - return rspamd_stat_cache_sqlite3_check (task->task_pool, - out, sizeof (out), is_spam, ctx); + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_START_DEF); + rc = rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_GET_LEARN, (gint64)rspamd_cryptobox_HASHBYTES, + out, &flag); + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); + + /* Save hash into variables */ + rspamd_mempool_set_variable (task->task_pool, "words_hash", out, NULL); + + if (rc == SQLITE_OK) { + /* We have some existing record in the table */ + if (!!flag == !!is_spam) { + /* Already learned */ + return RSPAMD_LEARN_INGORE; + } + else { + /* Need to relearn */ + return RSPAMD_LEARN_UNLEARN; + } + } + else { + + } + } + + return RSPAMD_LEARN_OK; +} + +gint +rspamd_stat_cache_sqlite3_learn (struct rspamd_task *task, + gboolean is_spam, + gpointer runtime, + gpointer c) +{ + struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c; + gboolean unlearn = !!(task->flags & RSPAMD_TASK_FLAG_UNLEARN); + guchar *h; + gint64 flag; + + h = rspamd_mempool_get_variable (task->task_pool, "words_hash"); + g_assert (h != NULL); + + if (!unlearn) { + /* Insert result new id */ + flag = !!is_spam ? 1 : 0; + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_ADD_LEARN, + (gint64)rspamd_cryptobox_HASHBYTES, h, flag); + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); + } + else { + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_UPDATE_LEARN, task->task_pool, + (gint64)rspamd_cryptobox_HASHBYTES, h); + rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); } return RSPAMD_LEARN_OK; diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index 1c6989508..50897b082 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -77,13 +77,20 @@ static struct rspamd_stat_backend stat_backends[] = { #endif }; -static struct rspamd_stat_cache stat_caches[] = { - { - .name = RSPAMD_DEFAULT_CACHE, - .init = rspamd_stat_cache_sqlite3_init, - .process = rspamd_stat_cache_sqlite3_process, - .close = rspamd_stat_cache_sqlite3_close +#define RSPAMD_STAT_CACHE_ELT(nam, eltn) { \ + .name = #nam, \ + .init = rspamd_stat_cache_##eltn##_init, \ + .runtime = rspamd_stat_cache_##eltn##_runtime, \ + .check = rspamd_stat_cache_##eltn##_check, \ + .learn = rspamd_stat_cache_##eltn##_learn, \ + .close = rspamd_stat_cache_##eltn##_close \ } + +static struct rspamd_stat_cache stat_caches[] = { + RSPAMD_STAT_CACHE_ELT(sqlite3, sqlite3), +#ifdef WITH_HIREDIS + RSPAMD_STAT_CACHE_ELT(redis, redis), +#endif }; void @@ -158,9 +165,10 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base) } } - cl->cache = rspamd_stat_get_cache (cache_name); - g_assert (cl->cache != NULL); - cl->cachecf = cl->cache->init (stat_ctx, cfg, cache_obj); + if (cache_name == NULL) { + /* We assume that learn cache is the same as backend */ + cache_name = clf->backend; + } curst = clf->statfiles; @@ -174,6 +182,15 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base) msg_debug_config ("added backend %s for symbol %s", bk->name, stf->symbol); + /* XXX: bad hack to pass statfiles configuration to cache */ + if (cl->cache == NULL) { + cl->cache = rspamd_stat_get_cache (cache_name); + g_assert (cl->cache != NULL); + cl->cachecf = cl->cache->init (stat_ctx, cfg, st, cache_obj); + msg_debug_config ("added cache %s for symbol %s", + cl->cache->name, stf->symbol); + } + if (st->bkcf == NULL) { msg_err_config ("cannot init backend %s for statfile %s", clf->backend, stf->symbol); @@ -356,7 +373,6 @@ rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler, { struct rspamd_stat_async_elt *elt; struct rspamd_stat_ctx *st_ctx; - gdouble jittered_time; st_ctx = rspamd_stat_get_ctx (); g_assert (st_ctx != NULL); diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h index 7a3951a0e..892da9e9d 100644 --- a/src/libstat/stat_internal.h +++ b/src/libstat/stat_internal.h @@ -41,9 +41,9 @@ struct rspamd_statfile_runtime { /* Common classifier structure */ struct rspamd_classifier { struct rspamd_stat_ctx *ctx; + GArray *statfiles_ids; struct rspamd_stat_cache *cache; gpointer cachecf; - GArray *statfiles_ids; gulong spam_learns; gulong ham_learns; struct rspamd_classifier_config *cfg; diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 4bf99b98b..864336a61 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -376,6 +376,7 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, { rspamd_learn_t learn_res = RSPAMD_LEARN_OK; struct rspamd_classifier *cl; + gpointer rt; guint i; /* Check whether we have learned that file */ @@ -389,8 +390,9 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, } if (cl->cache && cl->cachecf) { - learn_res = cl->cache->process (task, spam, - cl->cachecf); + rt = cl->cache->runtime (task, cl->cachecf); + learn_res = cl->cache->check (task, spam, + cl->cachecf, rt); } if (learn_res == RSPAMD_LEARN_INGORE) { @@ -558,7 +560,7 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx, { struct rspamd_classifier *cl; struct rspamd_statfile *st; - gpointer bk_run; + gpointer bk_run, cache_run; guint i, j; gint id; gboolean res = TRUE; @@ -572,6 +574,11 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx, continue; } + if (cl->cache) { + cache_run = cl->cache->runtime (task, cl->cachecf); + cl->cache->learn (task, spam, cache_run, cl->cachecf); + } + for (j = 0; j < cl->statfiles_ids->len; j ++) { id = g_array_index (cl->statfiles_ids, gint, j); st = g_ptr_array_index (st_ctx->statfiles, id); |