aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-13 16:46:06 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-13 16:46:06 +0000
commitf086958b4b6b71952bb47e422aebaaa00c3143ea (patch)
tree56259e331c98ad3e6009ffbc7e3b467721219bc9 /src
parentdf94595caf6890037a1178a8bdcb753653b1133d (diff)
downloadrspamd-f086958b4b6b71952bb47e422aebaaa00c3143ea.tar.gz
rspamd-f086958b4b6b71952bb47e422aebaaa00c3143ea.zip
Add sqlite3 learn cache.
Diffstat (limited to 'src')
-rw-r--r--src/libstat/CMakeLists.txt6
-rw-r--r--src/libstat/learn_cache/learn_cache.h7
-rw-r--r--src/libstat/learn_cache/sqlite3_cache.c203
3 files changed, 214 insertions, 2 deletions
diff --git a/src/libstat/CMakeLists.txt b/src/libstat/CMakeLists.txt
index 17c553989..5c4e2bc74 100644
--- a/src/libstat/CMakeLists.txt
+++ b/src/libstat/CMakeLists.txt
@@ -8,16 +8,20 @@ SET(TOKENIZERSSRC tokenizers/tokenizers.c
SET(CLASSIFIERSSRC classifiers/bayes.c)
SET(BACKENDSSRC backends/mmaped_file.c)
+
+SET(CACHESSRC learn_cache/sqlite3_cache.c)
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC}
${TOKENIZERSSRC}
${CLASSIFIERSSRC}
- ${BACKENDSSRC})
+ ${BACKENDSSRC}
+ ${CACHESSRC})
IF(NOT DEBIAN_BUILD)
SET_TARGET_PROPERTIES(rspamd-stat PROPERTIES VERSION ${RSPAMD_VERSION})
ENDIF(NOT DEBIAN_BUILD)
SET_TARGET_PROPERTIES(rspamd-stat PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS "-DRSPAMD_LIB")
TARGET_LINK_LIBRARIES(rspamd-stat rspamd-server)
+TARGET_LINK_LIBRARIES(rspamd-stat sqlite3)
IF(CMAKE_COMPILER_IS_GNUCC)
SET_TARGET_PROPERTIES(rspamd-stat PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing")
diff --git a/src/libstat/learn_cache/learn_cache.h b/src/libstat/learn_cache/learn_cache.h
index dd9240a27..bd01d1be1 100644
--- a/src/libstat/learn_cache/learn_cache.h
+++ b/src/libstat/learn_cache/learn_cache.h
@@ -34,10 +34,15 @@ typedef enum rspamd_learn_cache_result {
RSPAMD_LEARN_INGORE
} rspamd_learn_t;
+struct rspamd_task;
+struct rspamd_stat_ctx;
+struct rspamd_config;
+
struct rspamd_stat_cache {
const char *name;
gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg);
- rspamd_learn_t (*process)(GTree *input, gboolean is_spam, gpointer ctx);
+ rspamd_learn_t (*process)(struct rspamd_task *task, gboolean is_spam,
+ gpointer ctx);
gpointer ctx;
};
diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c
new file mode 100644
index 000000000..ef6f005b3
--- /dev/null
+++ b/src/libstat/learn_cache/sqlite3_cache.c
@@ -0,0 +1,203 @@
+/* Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "learn_cache.h"
+#include "main.h"
+#include "stat_api.h"
+#include "stat_internal.h"
+#include "blake2.h"
+#include "ucl.h"
+#include "fstring.h"
+#include "message.h"
+#include <sqlite3.h>
+
+const char *create_tables_sql =
+ "BEGIN;"
+ "CREATE TABLE IF NOT EXISTS learns("
+ "id INTEGER PRIMARY KEY,"
+ "flag INTEGER NOT NULL,"
+ "digest TEXT NOT NULL);"
+ "CREATE UNIQUE INDEX IF NOT EXISTS d ON learns(digest);"
+ "COMMIT;";
+
+#define SQLITE_CACHE_PATH RSPAMD_DBDIR "/learn_cache.sqlite"
+
+struct rspamd_stat_sqlite3_ctx {
+ sqlite3 *db;
+};
+
+gpointer
+rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx,
+ struct rspamd_config *cfg)
+{
+ struct rspamd_stat_sqlite3_ctx *new = NULL;
+ struct rspamd_classifier_config *clf;
+ const ucl_object_t *obj;
+ GList *cur;
+ sqlite3 *sqlite;
+ gboolean has_sqlite_cache = FALSE;
+ gint rc;
+
+ cur = cfg->classifiers;
+
+ while (cur) {
+ clf = cur->data;
+
+ obj = ucl_object_find_key (clf->opts, "cache");
+
+ /* Sqlite3 cache is the default learn cache method */
+ if (obj == NULL || g_ascii_strcasecmp (ucl_object_tostring (obj),
+ "sqlite3") == 0) {
+ has_sqlite_cache = TRUE;
+ break;
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ if (has_sqlite_cache) {
+ if ((rc = sqlite3_open_v2 (SQLITE_CACHE_PATH, &sqlite,
+ SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_NOMUTEX, NULL))
+ != SQLITE_OK) {
+ msg_err ("Cannot open sqlite db %s: %s", SQLITE_CACHE_PATH,
+ sqlite3_errstr (rc));
+
+ return NULL;
+ }
+
+ if ((rc = sqlite3_exec (sqlite, create_tables_sql, NULL, NULL, NULL))
+ != SQLITE_OK) {
+ sqlite3_close (sqlite);
+ msg_err ("Cannot initialize sqlite db %s: %s", SQLITE_CACHE_PATH,
+ sqlite3_errstr (rc));
+
+ return NULL;
+ }
+
+ new = g_slice_alloc (sizeof (*new));
+ new->db = sqlite;
+ }
+
+ return new;
+}
+
+static rspamd_learn_t
+rspamd_stat_cache_sqlite3_check (const guchar *h, gsize len, gboolean is_spam,
+ struct rspamd_stat_sqlite3_ctx *ctx)
+{
+ static const gchar select_sql[] = "SELECT flag FROM learns WHERE digest=?1";
+ static const gchar insert_sql[] = "INSERT INTO learns(digest, flag) VALUES "
+ "(?1, ?2);";
+ static const gchar update_sql[] = "UPDATE learns SET flag=?1 WHERE digest=?2";
+ sqlite3_stmt *st = NULL;
+ gint rc, ret = RSPAMD_LEARN_OK, flag;
+
+ if ((rc = sqlite3_prepare_v2 (ctx->db, select_sql,
+ -1, &st, NULL)) != SQLITE_OK) {
+ msg_err ("Cannot prepare sql %s: %s", select_sql, sqlite3_errstr (rc));
+ return RSPAMD_LEARN_OK;
+ }
+
+ sqlite3_bind_text (st, 1, h, len, SQLITE_STATIC);
+
+ rc = sqlite3_step (st);
+
+ if (rc == SQLITE_ROW) {
+ /* We have some existing record in the table */
+ flag = sqlite3_column_int (st, 0);
+ sqlite3_finalize (st);
+
+ if ((flag && is_spam) || (!flag && !is_spam)) {
+ /* Already learned */
+ ret = RSPAMD_LEARN_INGORE;
+ }
+ else {
+ /* Need to relearn */
+ if ((rc = sqlite3_prepare_v2 (ctx->db, update_sql,
+ -1, &st, NULL)) != SQLITE_OK) {
+ msg_err ("Cannot prepare sql %s: %s", update_sql,
+ sqlite3_errstr (rc));
+ }
+ else {
+ sqlite3_bind_int (st, 1, is_spam ? 1 : 0);
+ sqlite3_bind_text (st, 2, h, len, SQLITE_STATIC);
+ sqlite3_step (st);
+ sqlite3_finalize (st);
+ }
+
+ return RSPAMD_LEARN_UNLEARN;
+ }
+ }
+ else {
+ /* Insert result new id */
+ sqlite3_finalize (st);
+ if ((rc = sqlite3_prepare_v2 (ctx->db, insert_sql,
+ -1, &st, NULL)) != SQLITE_OK) {
+ msg_err ("Cannot prepare sql %s: %s", insert_sql, sqlite3_errstr (rc));
+ }
+ else {
+ sqlite3_bind_text (st, 1, h, len, SQLITE_STATIC);
+ sqlite3_bind_int (st, 2, is_spam ? 1 : 0);
+ sqlite3_step (st);
+ sqlite3_finalize (st);
+ }
+ }
+
+ return ret;
+}
+
+rspamd_learn_t
+rspamd_stat_cache_sqlite3_process(struct rspamd_task *task,
+ gboolean is_spam, gpointer c)
+{
+ struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c;
+ struct mime_text_part *part;
+ blake2b_state st;
+ rspamd_fstring_t *word;
+ guchar out[BLAKE2B_OUTBYTES];
+ GList *cur;
+ guint i;
+
+ if (ctx != NULL && ctx->db != NULL) {
+ blake2b_init (&st, sizeof (out));
+ cur = task->text_parts;
+
+ while (cur) {
+ part = (struct mime_text_part *)cur->data;
+
+ for (i = 0; i < part->words->len; i ++) {
+ word = &g_array_index (part->words, rspamd_fstring_t, i);
+ blake2b_update (&st, word->begin, word->len);
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ blake2b_final (&st, out, sizeof (out));
+
+ return rspamd_stat_cache_sqlite3_check (out, sizeof (out), is_spam, ctx);
+ }
+
+ return RSPAMD_LEARN_OK;
+}