Browse Source

Add sqlite3 learn cache.

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
f086958b4b

+ 5
- 1
src/libstat/CMakeLists.txt View File

@@ -8,16 +8,20 @@ SET(TOKENIZERSSRC tokenizers/tokenizers.c
SET(CLASSIFIERSSRC classifiers/bayes.c)
SET(BACKENDSSRC backends/mmaped_file.c)

SET(CACHESSRC learn_cache/sqlite3_cache.c)
ADD_LIBRARY(rspamd-stat ${LINK_TYPE} ${LIBSTATSRC}
${TOKENIZERSSRC}
${CLASSIFIERSSRC}
${BACKENDSSRC})
${BACKENDSSRC}
${CACHESSRC})
IF(NOT DEBIAN_BUILD)
SET_TARGET_PROPERTIES(rspamd-stat PROPERTIES VERSION ${RSPAMD_VERSION})
ENDIF(NOT DEBIAN_BUILD)
SET_TARGET_PROPERTIES(rspamd-stat PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS "-DRSPAMD_LIB")
TARGET_LINK_LIBRARIES(rspamd-stat rspamd-server)
TARGET_LINK_LIBRARIES(rspamd-stat sqlite3)

IF(CMAKE_COMPILER_IS_GNUCC)
SET_TARGET_PROPERTIES(rspamd-stat PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing")

+ 6
- 1
src/libstat/learn_cache/learn_cache.h View File

@@ -34,10 +34,15 @@ typedef enum rspamd_learn_cache_result {
RSPAMD_LEARN_INGORE
} rspamd_learn_t;

struct rspamd_task;
struct rspamd_stat_ctx;
struct rspamd_config;

struct rspamd_stat_cache {
const char *name;
gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg);
rspamd_learn_t (*process)(GTree *input, gboolean is_spam, gpointer ctx);
rspamd_learn_t (*process)(struct rspamd_task *task, gboolean is_spam,
gpointer ctx);
gpointer ctx;
};


+ 203
- 0
src/libstat/learn_cache/sqlite3_cache.c View File

@@ -0,0 +1,203 @@
/* Copyright (c) 2015, Vsevolod Stakhov
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "config.h"
#include "learn_cache.h"
#include "main.h"
#include "stat_api.h"
#include "stat_internal.h"
#include "blake2.h"
#include "ucl.h"
#include "fstring.h"
#include "message.h"
#include <sqlite3.h>

const char *create_tables_sql =
"BEGIN;"
"CREATE TABLE IF NOT EXISTS learns("
"id INTEGER PRIMARY KEY,"
"flag INTEGER NOT NULL,"
"digest TEXT NOT NULL);"
"CREATE UNIQUE INDEX IF NOT EXISTS d ON learns(digest);"
"COMMIT;";

#define SQLITE_CACHE_PATH RSPAMD_DBDIR "/learn_cache.sqlite"

struct rspamd_stat_sqlite3_ctx {
sqlite3 *db;
};

gpointer
rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx,
struct rspamd_config *cfg)
{
struct rspamd_stat_sqlite3_ctx *new = NULL;
struct rspamd_classifier_config *clf;
const ucl_object_t *obj;
GList *cur;
sqlite3 *sqlite;
gboolean has_sqlite_cache = FALSE;
gint rc;

cur = cfg->classifiers;

while (cur) {
clf = cur->data;

obj = ucl_object_find_key (clf->opts, "cache");

/* Sqlite3 cache is the default learn cache method */
if (obj == NULL || g_ascii_strcasecmp (ucl_object_tostring (obj),
"sqlite3") == 0) {
has_sqlite_cache = TRUE;
break;
}

cur = g_list_next (cur);
}

if (has_sqlite_cache) {
if ((rc = sqlite3_open_v2 (SQLITE_CACHE_PATH, &sqlite,
SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_NOMUTEX, NULL))
!= SQLITE_OK) {
msg_err ("Cannot open sqlite db %s: %s", SQLITE_CACHE_PATH,
sqlite3_errstr (rc));

return NULL;
}

if ((rc = sqlite3_exec (sqlite, create_tables_sql, NULL, NULL, NULL))
!= SQLITE_OK) {
sqlite3_close (sqlite);
msg_err ("Cannot initialize sqlite db %s: %s", SQLITE_CACHE_PATH,
sqlite3_errstr (rc));

return NULL;
}

new = g_slice_alloc (sizeof (*new));
new->db = sqlite;
}

return new;
}

static rspamd_learn_t
rspamd_stat_cache_sqlite3_check (const guchar *h, gsize len, gboolean is_spam,
struct rspamd_stat_sqlite3_ctx *ctx)
{
static const gchar select_sql[] = "SELECT flag FROM learns WHERE digest=?1";
static const gchar insert_sql[] = "INSERT INTO learns(digest, flag) VALUES "
"(?1, ?2);";
static const gchar update_sql[] = "UPDATE learns SET flag=?1 WHERE digest=?2";
sqlite3_stmt *st = NULL;
gint rc, ret = RSPAMD_LEARN_OK, flag;

if ((rc = sqlite3_prepare_v2 (ctx->db, select_sql,
-1, &st, NULL)) != SQLITE_OK) {
msg_err ("Cannot prepare sql %s: %s", select_sql, sqlite3_errstr (rc));
return RSPAMD_LEARN_OK;
}

sqlite3_bind_text (st, 1, h, len, SQLITE_STATIC);

rc = sqlite3_step (st);

if (rc == SQLITE_ROW) {
/* We have some existing record in the table */
flag = sqlite3_column_int (st, 0);
sqlite3_finalize (st);

if ((flag && is_spam) || (!flag && !is_spam)) {
/* Already learned */
ret = RSPAMD_LEARN_INGORE;
}
else {
/* Need to relearn */
if ((rc = sqlite3_prepare_v2 (ctx->db, update_sql,
-1, &st, NULL)) != SQLITE_OK) {
msg_err ("Cannot prepare sql %s: %s", update_sql,
sqlite3_errstr (rc));
}
else {
sqlite3_bind_int (st, 1, is_spam ? 1 : 0);
sqlite3_bind_text (st, 2, h, len, SQLITE_STATIC);
sqlite3_step (st);
sqlite3_finalize (st);
}

return RSPAMD_LEARN_UNLEARN;
}
}
else {
/* Insert result new id */
sqlite3_finalize (st);
if ((rc = sqlite3_prepare_v2 (ctx->db, insert_sql,
-1, &st, NULL)) != SQLITE_OK) {
msg_err ("Cannot prepare sql %s: %s", insert_sql, sqlite3_errstr (rc));
}
else {
sqlite3_bind_text (st, 1, h, len, SQLITE_STATIC);
sqlite3_bind_int (st, 2, is_spam ? 1 : 0);
sqlite3_step (st);
sqlite3_finalize (st);
}
}

return ret;
}

rspamd_learn_t
rspamd_stat_cache_sqlite3_process(struct rspamd_task *task,
gboolean is_spam, gpointer c)
{
struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c;
struct mime_text_part *part;
blake2b_state st;
rspamd_fstring_t *word;
guchar out[BLAKE2B_OUTBYTES];
GList *cur;
guint i;

if (ctx != NULL && ctx->db != NULL) {
blake2b_init (&st, sizeof (out));
cur = task->text_parts;

while (cur) {
part = (struct mime_text_part *)cur->data;

for (i = 0; i < part->words->len; i ++) {
word = &g_array_index (part->words, rspamd_fstring_t, i);
blake2b_update (&st, word->begin, word->len);
}

cur = g_list_next (cur);
}

blake2b_final (&st, out, sizeof (out));

return rspamd_stat_cache_sqlite3_check (out, sizeof (out), is_spam, ctx);
}

return RSPAMD_LEARN_OK;
}

Loading…
Cancel
Save