aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/libstat/CMakeLists.txt6
-rw-r--r--src/libstat/learn_cache/learn_cache.h42
-rw-r--r--src/libstat/learn_cache/redis_cache.c73
-rw-r--r--src/libstat/learn_cache/sqlite3_cache.c134
-rw-r--r--src/libstat/stat_config.c36
-rw-r--r--src/libstat/stat_internal.h2
-rw-r--r--src/libstat/stat_process.c13
7 files changed, 227 insertions, 79 deletions
diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt
index 4a1a848bb..11f48bdc0 100644
--- a/src/libstat/CMakeLists.txt
+++ b/src/libstat/CMakeLists.txt
@@ -9,11 +9,15 @@ SET(CLASSIFIERSSRC ${CMAKE_CURRENT_SOURCE_DIR}/classifiers/bayes.c)
SET(BACKENDSSRC ${CMAKE_CURRENT_SOURCE_DIR}/backends/mmaped_file.c
${CMAKE_CURRENT_SOURCE_DIR}/backends/sqlite3_backend.c)
+SET(CACHESSRC ${CMAKE_CURRENT_SOURCE_DIR}/learn_cache/sqlite3_cache.c)
+
IF(ENABLE_HIREDIS MATCHES "ON")
SET(BACKENDSSRC ${BACKENDSSRC}
${CMAKE_CURRENT_SOURCE_DIR}/backends/redis_backend.c)
+ SET(CACHESSRC ${CACHESSRC}
+ ${CMAKE_CURRENT_SOURCE_DIR}/learn_cache/redis_cache.c)
ENDIF(ENABLE_HIREDIS MATCHES "ON")
-SET(CACHESSRC ${CMAKE_CURRENT_SOURCE_DIR}/learn_cache/sqlite3_cache.c)
+
SET(RSPAMD_STAT ${LIBSTATSRC}
${TOKENIZERSSRC}
diff --git a/src/libstat/learn_cache/learn_cache.h b/src/libstat/learn_cache/learn_cache.h
index 84851649f..1ebe2864a 100644
--- a/src/libstat/learn_cache/learn_cache.h
+++ b/src/libstat/learn_cache/learn_cache.h
@@ -33,24 +33,48 @@
struct rspamd_task;
struct rspamd_stat_ctx;
struct rspamd_config;
+struct rspamd_statfile;
struct rspamd_stat_cache {
const char *name;
gpointer (*init)(struct rspamd_stat_ctx *ctx,
- struct rspamd_config *cfg, const ucl_object_t *cf);
- gint (*process)(struct rspamd_task *task,
+ struct rspamd_config *cfg,
+ struct rspamd_statfile *st,
+ const ucl_object_t *cf);
+ gpointer (*runtime)(struct rspamd_task *task,
+ gpointer ctx);
+ gint (*check)(struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime,
+ gpointer ctx);
+ gint (*learn)(struct rspamd_task *task,
gboolean is_spam,
+ gpointer runtime,
gpointer ctx);
void (*close) (gpointer ctx);
gpointer ctx;
};
-gpointer rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx,
- struct rspamd_config *cfg,
- const ucl_object_t *cf);
-gint rspamd_stat_cache_sqlite3_process (
- struct rspamd_task *task,
- gboolean is_spam, gpointer c);
-void rspamd_stat_cache_sqlite3_close (gpointer c);
+#define RSPAMD_STAT_CACHE_DEF(name) \
+ gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \
+ struct rspamd_config *cfg, \
+ struct rspamd_statfile *st, \
+ const ucl_object_t *cf); \
+ gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \
+ gpointer ctx); \
+ gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \
+ gboolean is_spam, \
+ gpointer runtime, \
+ gpointer ctx); \
+ gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \
+ gboolean is_spam, \
+ gpointer runtime, \
+ gpointer ctx); \
+ void rspamd_stat_cache_##name##_close (gpointer ctx)
+
+RSPAMD_STAT_CACHE_DEF(sqlite3);
+#ifdef WITH_HIREDIS
+RSPAMD_STAT_CACHE_DEF(redis);
+#endif
#endif /* LEARN_CACHE_H_ */
diff --git a/src/libstat/learn_cache/redis_cache.c b/src/libstat/learn_cache/redis_cache.c
new file mode 100644
index 000000000..de88936be
--- /dev/null
+++ b/src/libstat/learn_cache/redis_cache.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "learn_cache.h"
+#include "rspamd.h"
+#include "stat_api.h"
+#include "stat_internal.h"
+#include "cryptobox.h"
+#include "ucl.h"
+#include "hiredis/hiredis.h"
+#include "hiredis/adapters/libevent.h"
+
+gpointer
+rspamd_stat_cache_redis_init (struct rspamd_stat_ctx *ctx,
+ struct rspamd_config *cfg,
+ struct rspamd_statfile *st,
+ const ucl_object_t *cf)
+{
+ return NULL;
+}
+
+gpointer
+rspamd_stat_cache_redis_runtime (struct rspamd_task *task,
+ gpointer ctx)
+{
+ return NULL;
+}
+
+gint
+rspamd_stat_cache_redis_check (struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime,
+ gpointer c)
+{
+ return RSPAMD_LEARN_OK;
+}
+
+gint
+rspamd_stat_cache_redis_learn (struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime,
+ gpointer c)
+{
+ return RSPAMD_LEARN_OK;
+}
+
+void
+rspamd_stat_cache_redis_close (gpointer c)
+{
+
+}
diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c
index 299c1ebd9..4d97e3084 100644
--- a/src/libstat/learn_cache/sqlite3_cache.c
+++ b/src/libstat/learn_cache/sqlite3_cache.c
@@ -120,8 +120,9 @@ struct rspamd_stat_sqlite3_ctx {
};
gpointer
-rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx,
+rspamd_stat_cache_sqlite3_init (struct rspamd_stat_ctx *ctx,
struct rspamd_config *cfg,
+ struct rspamd_statfile *st,
const ucl_object_t *cf)
{
struct rspamd_stat_sqlite3_ctx *new = NULL;
@@ -131,7 +132,6 @@ rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx,
sqlite3 *sqlite;
GError *err = NULL;
-
if (cf) {
elt = ucl_object_find_key (cf, "path");
@@ -169,69 +169,32 @@ rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx,
return new;
}
-static rspamd_learn_t
-rspamd_stat_cache_sqlite3_check (rspamd_mempool_t *pool,
- const guchar *h, gsize len, gboolean is_spam,
- struct rspamd_stat_sqlite3_ctx *ctx)
+gpointer
+rspamd_stat_cache_sqlite3_runtime (struct rspamd_task *task,
+ gpointer ctx)
{
- gint rc, ret = RSPAMD_LEARN_OK;
- gint64 flag;
-
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_TRANSACTION_START_DEF);
- rc = rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_GET_LEARN, (gint64)len, h, &flag);
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_TRANSACTION_COMMIT);
-
-
- if (rc == SQLITE_OK) {
- /* We have some existing record in the table */
- if (!!flag == !!is_spam) {
- /* Already learned */
-
- ret = RSPAMD_LEARN_INGORE;
- }
- else {
- /* Need to relearn */
- flag = !!is_spam ? 1 : 0;
-
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_TRANSACTION_START_IM);
- rc = rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_UPDATE_LEARN, flag, (gint64)len, h);
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_TRANSACTION_COMMIT);
-
- return RSPAMD_LEARN_UNLEARN;
- }
- }
- else {
- /* Insert result new id */
- flag = !!is_spam ? 1 : 0;
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_TRANSACTION_START_IM);
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_ADD_LEARN, (gint64)len, h, flag);
- rspamd_sqlite3_run_prstmt (pool, ctx->db, ctx->prstmt,
- RSPAMD_STAT_CACHE_TRANSACTION_COMMIT);
- }
-
- return ret;
+ /* No need of runtime for this type of classifier */
+ return NULL;
}
gint
-rspamd_stat_cache_sqlite3_process (struct rspamd_task *task,
- gboolean is_spam, gpointer c)
+rspamd_stat_cache_sqlite3_check (struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime,
+ gpointer c)
{
struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c;
struct mime_text_part *part;
rspamd_cryptobox_hash_state_t st;
rspamd_ftok_t *word;
- guchar out[rspamd_cryptobox_HASHBYTES];
+ guchar *out;
guint i, j;
+ gint rc;
+ gint64 flag;
if (ctx != NULL && ctx->db != NULL) {
+ out = rspamd_mempool_alloc (task->task_pool, rspamd_cryptobox_HASHBYTES);
+
rspamd_cryptobox_hash_init (&st, NULL, 0);
for (i = 0; i < task->text_parts->len; i ++) {
@@ -247,8 +210,69 @@ rspamd_stat_cache_sqlite3_process (struct rspamd_task *task,
rspamd_cryptobox_hash_final (&st, out);
- return rspamd_stat_cache_sqlite3_check (task->task_pool,
- out, sizeof (out), is_spam, ctx);
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_TRANSACTION_START_DEF);
+ rc = rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_GET_LEARN, (gint64)rspamd_cryptobox_HASHBYTES,
+ out, &flag);
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_TRANSACTION_COMMIT);
+
+ /* Save hash into variables */
+ rspamd_mempool_set_variable (task->task_pool, "words_hash", out, NULL);
+
+ if (rc == SQLITE_OK) {
+ /* We have some existing record in the table */
+ if (!!flag == !!is_spam) {
+ /* Already learned */
+ return RSPAMD_LEARN_INGORE;
+ }
+ else {
+ /* Need to relearn */
+ return RSPAMD_LEARN_UNLEARN;
+ }
+ }
+ else {
+
+ }
+ }
+
+ return RSPAMD_LEARN_OK;
+}
+
+gint
+rspamd_stat_cache_sqlite3_learn (struct rspamd_task *task,
+ gboolean is_spam,
+ gpointer runtime,
+ gpointer c)
+{
+ struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c;
+ gboolean unlearn = !!(task->flags & RSPAMD_TASK_FLAG_UNLEARN);
+ guchar *h;
+ gint64 flag;
+
+ h = rspamd_mempool_get_variable (task->task_pool, "words_hash");
+ g_assert (h != NULL);
+
+ if (!unlearn) {
+ /* Insert result new id */
+ flag = !!is_spam ? 1 : 0;
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_TRANSACTION_START_IM);
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_ADD_LEARN,
+ (gint64)rspamd_cryptobox_HASHBYTES, h, flag);
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_TRANSACTION_COMMIT);
+ }
+ else {
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_TRANSACTION_START_IM);
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_UPDATE_LEARN, task->task_pool,
+ (gint64)rspamd_cryptobox_HASHBYTES, h);
+ rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt,
+ RSPAMD_STAT_CACHE_TRANSACTION_COMMIT);
}
return RSPAMD_LEARN_OK;
diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c
index 1c6989508..50897b082 100644
--- a/src/libstat/stat_config.c
+++ b/src/libstat/stat_config.c
@@ -77,13 +77,20 @@ static struct rspamd_stat_backend stat_backends[] = {
#endif
};
-static struct rspamd_stat_cache stat_caches[] = {
- {
- .name = RSPAMD_DEFAULT_CACHE,
- .init = rspamd_stat_cache_sqlite3_init,
- .process = rspamd_stat_cache_sqlite3_process,
- .close = rspamd_stat_cache_sqlite3_close
+#define RSPAMD_STAT_CACHE_ELT(nam, eltn) { \
+ .name = #nam, \
+ .init = rspamd_stat_cache_##eltn##_init, \
+ .runtime = rspamd_stat_cache_##eltn##_runtime, \
+ .check = rspamd_stat_cache_##eltn##_check, \
+ .learn = rspamd_stat_cache_##eltn##_learn, \
+ .close = rspamd_stat_cache_##eltn##_close \
}
+
+static struct rspamd_stat_cache stat_caches[] = {
+ RSPAMD_STAT_CACHE_ELT(sqlite3, sqlite3),
+#ifdef WITH_HIREDIS
+ RSPAMD_STAT_CACHE_ELT(redis, redis),
+#endif
};
void
@@ -158,9 +165,10 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base)
}
}
- cl->cache = rspamd_stat_get_cache (cache_name);
- g_assert (cl->cache != NULL);
- cl->cachecf = cl->cache->init (stat_ctx, cfg, cache_obj);
+ if (cache_name == NULL) {
+ /* We assume that learn cache is the same as backend */
+ cache_name = clf->backend;
+ }
curst = clf->statfiles;
@@ -174,6 +182,15 @@ rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base)
msg_debug_config ("added backend %s for symbol %s",
bk->name, stf->symbol);
+ /* XXX: bad hack to pass statfiles configuration to cache */
+ if (cl->cache == NULL) {
+ cl->cache = rspamd_stat_get_cache (cache_name);
+ g_assert (cl->cache != NULL);
+ cl->cachecf = cl->cache->init (stat_ctx, cfg, st, cache_obj);
+ msg_debug_config ("added cache %s for symbol %s",
+ cl->cache->name, stf->symbol);
+ }
+
if (st->bkcf == NULL) {
msg_err_config ("cannot init backend %s for statfile %s",
clf->backend, stf->symbol);
@@ -356,7 +373,6 @@ rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler,
{
struct rspamd_stat_async_elt *elt;
struct rspamd_stat_ctx *st_ctx;
- gdouble jittered_time;
st_ctx = rspamd_stat_get_ctx ();
g_assert (st_ctx != NULL);
diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h
index 7a3951a0e..892da9e9d 100644
--- a/src/libstat/stat_internal.h
+++ b/src/libstat/stat_internal.h
@@ -41,9 +41,9 @@ struct rspamd_statfile_runtime {
/* Common classifier structure */
struct rspamd_classifier {
struct rspamd_stat_ctx *ctx;
+ GArray *statfiles_ids;
struct rspamd_stat_cache *cache;
gpointer cachecf;
- GArray *statfiles_ids;
gulong spam_learns;
gulong ham_learns;
struct rspamd_classifier_config *cfg;
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 4bf99b98b..864336a61 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -376,6 +376,7 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx,
{
rspamd_learn_t learn_res = RSPAMD_LEARN_OK;
struct rspamd_classifier *cl;
+ gpointer rt;
guint i;
/* Check whether we have learned that file */
@@ -389,8 +390,9 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx,
}
if (cl->cache && cl->cachecf) {
- learn_res = cl->cache->process (task, spam,
- cl->cachecf);
+ rt = cl->cache->runtime (task, cl->cachecf);
+ learn_res = cl->cache->check (task, spam,
+ cl->cachecf, rt);
}
if (learn_res == RSPAMD_LEARN_INGORE) {
@@ -558,7 +560,7 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx,
{
struct rspamd_classifier *cl;
struct rspamd_statfile *st;
- gpointer bk_run;
+ gpointer bk_run, cache_run;
guint i, j;
gint id;
gboolean res = TRUE;
@@ -572,6 +574,11 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx,
continue;
}
+ if (cl->cache) {
+ cache_run = cl->cache->runtime (task, cl->cachecf);
+ cl->cache->learn (task, spam, cache_run, cl->cachecf);
+ }
+
for (j = 0; j < cl->statfiles_ids->len; j ++) {
id = g_array_index (cl->statfiles_ids, gint, j);
st = g_ptr_array_index (st_ctx->statfiles, id);