123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907 |
- /*
- * Copyright 2024 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "config.h"
- #include "rspamd.h"
- #include "sqlite3.h"
- #include "libutil/sqlite_utils.h"
- #include "libstat/stat_internal.h"
- #include "libmime/message.h"
- #include "lua/lua_common.h"
- #include "unix-std.h"
-
- #define SQLITE3_BACKEND_TYPE "sqlite3"
- #define SQLITE3_SCHEMA_VERSION "1"
- #define SQLITE3_DEFAULT "default"
-
- struct rspamd_stat_sqlite3_db {
- sqlite3 *sqlite;
- char *fname;
- GArray *prstmt;
- lua_State *L;
- rspamd_mempool_t *pool;
- gboolean in_transaction;
- gboolean enable_users;
- gboolean enable_languages;
- int cbref_user;
- int cbref_language;
- };
-
- struct rspamd_stat_sqlite3_rt {
- struct rspamd_task *task;
- struct rspamd_stat_sqlite3_db *db;
- struct rspamd_statfile_config *cf;
- int64_t user_id;
- int64_t lang_id;
- };
-
- static const char *create_tables_sql =
- "BEGIN IMMEDIATE;"
- "CREATE TABLE tokenizer(data BLOB);"
- "CREATE TABLE users("
- "id INTEGER PRIMARY KEY,"
- "name TEXT,"
- "learns INTEGER"
- ");"
- "CREATE TABLE languages("
- "id INTEGER PRIMARY KEY,"
- "name TEXT,"
- "learns INTEGER"
- ");"
- "CREATE TABLE tokens("
- "token INTEGER NOT NULL,"
- "user INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,"
- "language INTEGER NOT NULL REFERENCES languages(id) ON DELETE CASCADE,"
- "value INTEGER,"
- "modified INTEGER,"
- "CONSTRAINT tid UNIQUE (token, user, language) ON CONFLICT REPLACE"
- ");"
- "CREATE UNIQUE INDEX IF NOT EXISTS un ON users(name);"
- "CREATE INDEX IF NOT EXISTS tok ON tokens(token);"
- "CREATE UNIQUE INDEX IF NOT EXISTS ln ON languages(name);"
- "PRAGMA user_version=" SQLITE3_SCHEMA_VERSION ";"
- "INSERT INTO users(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);"
- "INSERT INTO languages(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);"
- "COMMIT;";
-
- enum rspamd_stat_sqlite3_stmt_idx {
- RSPAMD_STAT_BACKEND_TRANSACTION_START_IM = 0,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT,
- RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK,
- RSPAMD_STAT_BACKEND_GET_TOKEN_FULL,
- RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE,
- RSPAMD_STAT_BACKEND_SET_TOKEN,
- RSPAMD_STAT_BACKEND_INC_LEARNS_LANG,
- RSPAMD_STAT_BACKEND_INC_LEARNS_USER,
- RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG,
- RSPAMD_STAT_BACKEND_DEC_LEARNS_USER,
- RSPAMD_STAT_BACKEND_GET_LEARNS,
- RSPAMD_STAT_BACKEND_GET_LANGUAGE,
- RSPAMD_STAT_BACKEND_GET_USER,
- RSPAMD_STAT_BACKEND_INSERT_USER,
- RSPAMD_STAT_BACKEND_INSERT_LANGUAGE,
- RSPAMD_STAT_BACKEND_SAVE_TOKENIZER,
- RSPAMD_STAT_BACKEND_LOAD_TOKENIZER,
- RSPAMD_STAT_BACKEND_NTOKENS,
- RSPAMD_STAT_BACKEND_NLANGUAGES,
- RSPAMD_STAT_BACKEND_NUSERS,
- RSPAMD_STAT_BACKEND_MAX
- };
-
- static struct rspamd_sqlite3_prstmt prepared_stmts[RSPAMD_STAT_BACKEND_MAX] =
- {
- [RSPAMD_STAT_BACKEND_TRANSACTION_START_IM] = {
- .idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_IM,
- .sql = "BEGIN IMMEDIATE TRANSACTION;",
- .args = "",
- .stmt = NULL,
- .result = SQLITE_DONE,
- .flags = 0,
- .ret = "",
- },
- [RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF, .sql = "BEGIN DEFERRED TRANSACTION;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL, .sql = "BEGIN EXCLUSIVE TRANSACTION;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT, .sql = "COMMIT;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK, .sql = "ROLLBACK;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_GET_TOKEN_FULL] = {.idx = RSPAMD_STAT_BACKEND_GET_TOKEN_FULL, .sql = "SELECT value FROM tokens "
- "LEFT JOIN languages ON tokens.language=languages.id "
- "LEFT JOIN users ON tokens.user=users.id "
- "WHERE token=?1 AND (users.id=?2) "
- "AND (languages.id=?3 OR languages.id=0);",
- .stmt = NULL,
- .args = "III",
- .result = SQLITE_ROW,
- .flags = 0,
- .ret = "I"},
- [RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE] = {.idx = RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE, .sql = "SELECT value FROM tokens WHERE token=?1", .stmt = NULL, .args = "I", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
- [RSPAMD_STAT_BACKEND_SET_TOKEN] = {.idx = RSPAMD_STAT_BACKEND_SET_TOKEN, .sql = "INSERT OR REPLACE INTO tokens (token, user, language, value, modified) "
- "VALUES (?1, ?2, ?3, ?4, strftime('%s','now'))",
- .stmt = NULL,
- .args = "IIII",
- .result = SQLITE_DONE,
- .flags = 0,
- .ret = ""},
- [RSPAMD_STAT_BACKEND_INC_LEARNS_LANG] = {.idx = RSPAMD_STAT_BACKEND_INC_LEARNS_LANG, .sql = "UPDATE languages SET learns=learns + 1 WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_INC_LEARNS_USER] = {.idx = RSPAMD_STAT_BACKEND_INC_LEARNS_USER, .sql = "UPDATE users SET learns=learns + 1 WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG] = {.idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG, .sql = "UPDATE languages SET learns=MAX(0, learns - 1) WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_DEC_LEARNS_USER] = {.idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_USER, .sql = "UPDATE users SET learns=MAX(0, learns - 1) WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_GET_LEARNS] = {.idx = RSPAMD_STAT_BACKEND_GET_LEARNS, .sql = "SELECT SUM(MAX(0, learns)) FROM languages", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
- [RSPAMD_STAT_BACKEND_GET_LANGUAGE] = {.idx = RSPAMD_STAT_BACKEND_GET_LANGUAGE, .sql = "SELECT id FROM languages WHERE name=?1", .stmt = NULL, .args = "T", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
- [RSPAMD_STAT_BACKEND_GET_USER] = {.idx = RSPAMD_STAT_BACKEND_GET_USER, .sql = "SELECT id FROM users WHERE name=?1", .stmt = NULL, .args = "T", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
- [RSPAMD_STAT_BACKEND_INSERT_USER] = {.idx = RSPAMD_STAT_BACKEND_INSERT_USER, .sql = "INSERT INTO users (name, learns) VALUES (?1, 0)", .stmt = NULL, .args = "T", .result = SQLITE_DONE, .flags = 0, .ret = "L"},
- [RSPAMD_STAT_BACKEND_INSERT_LANGUAGE] = {.idx = RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, .sql = "INSERT INTO languages (name, learns) VALUES (?1, 0)", .stmt = NULL, .args = "T", .result = SQLITE_DONE, .flags = 0, .ret = "L"},
- [RSPAMD_STAT_BACKEND_SAVE_TOKENIZER] = {.idx = RSPAMD_STAT_BACKEND_SAVE_TOKENIZER, .sql = "INSERT INTO tokenizer(data) VALUES (?1)", .stmt = NULL, .args = "B", .result = SQLITE_DONE, .flags = 0, .ret = ""},
- [RSPAMD_STAT_BACKEND_LOAD_TOKENIZER] = {.idx = RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, .sql = "SELECT data FROM tokenizer", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "B"},
- [RSPAMD_STAT_BACKEND_NTOKENS] = {.idx = RSPAMD_STAT_BACKEND_NTOKENS, .sql = "SELECT COUNT(*) FROM tokens", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
- [RSPAMD_STAT_BACKEND_NLANGUAGES] = {.idx = RSPAMD_STAT_BACKEND_NLANGUAGES, .sql = "SELECT COUNT(*) FROM languages", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
- [RSPAMD_STAT_BACKEND_NUSERS] = {.idx = RSPAMD_STAT_BACKEND_NUSERS, .sql = "SELECT COUNT(*) FROM users", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"}};
-
- static GQuark
- rspamd_sqlite3_backend_quark(void)
- {
- return g_quark_from_static_string("sqlite3-stat-backend");
- }
-
- static int64_t
- rspamd_sqlite3_get_user(struct rspamd_stat_sqlite3_db *db,
- struct rspamd_task *task, gboolean learn)
- {
- int64_t id = 0; /* Default user is 0 */
- int rc, err_idx;
- const char *user = NULL;
- struct rspamd_task **ptask;
- lua_State *L = db->L;
-
- if (db->cbref_user == -1) {
- user = rspamd_task_get_principal_recipient(task);
- }
- else {
- /* Execute lua function to get userdata */
- lua_pushcfunction(L, &rspamd_lua_traceback);
- err_idx = lua_gettop(L);
-
- lua_rawgeti(L, LUA_REGISTRYINDEX, db->cbref_user);
- ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
- *ptask = task;
- rspamd_lua_setclass(L, rspamd_task_classname, -1);
-
- if (lua_pcall(L, 1, 1, err_idx) != 0) {
- msg_err_task("call to user extraction script failed: %s",
- lua_tostring(L, -1));
- }
- else {
- user = rspamd_mempool_strdup(task->task_pool, lua_tostring(L, -1));
- }
-
- /* Result + error function */
- lua_settop(L, err_idx - 1);
- }
-
-
- if (user != NULL) {
- rspamd_mempool_set_variable(task->task_pool, "stat_user",
- (gpointer) user, NULL);
-
- rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
- RSPAMD_STAT_BACKEND_GET_USER, user, &id);
-
- if (rc != SQLITE_OK && learn) {
- /* We need to insert a new user */
- if (!db->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_IM);
- db->in_transaction = TRUE;
- }
-
- rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
- RSPAMD_STAT_BACKEND_INSERT_USER, user, &id);
- }
- }
-
- return id;
- }
-
- static int64_t
- rspamd_sqlite3_get_language(struct rspamd_stat_sqlite3_db *db,
- struct rspamd_task *task, gboolean learn)
- {
- int64_t id = 0; /* Default language is 0 */
- int rc, err_idx;
- unsigned int i;
- const char *language = NULL;
- struct rspamd_mime_text_part *tp;
- struct rspamd_task **ptask;
- lua_State *L = db->L;
-
- if (db->cbref_language == -1) {
- PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp)
- {
-
- if (tp->language != NULL && tp->language[0] != '\0' &&
- strcmp(tp->language, "en") != 0) {
- language = tp->language;
- break;
- }
- }
- }
- else {
- /* Execute lua function to get userdata */
- lua_pushcfunction(L, &rspamd_lua_traceback);
- err_idx = lua_gettop(L);
-
- lua_rawgeti(L, LUA_REGISTRYINDEX, db->cbref_language);
- ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
- *ptask = task;
- rspamd_lua_setclass(L, rspamd_task_classname, -1);
-
- if (lua_pcall(L, 1, 1, err_idx) != 0) {
- msg_err_task("call to language extraction script failed: %s",
- lua_tostring(L, -1));
- }
- else {
- language = rspamd_mempool_strdup(task->task_pool,
- lua_tostring(L, -1));
- }
-
- /* Result + error function */
- lua_settop(L, err_idx - 1);
- }
-
-
- /* XXX: We ignore multiple languages but default + extra */
- if (language != NULL) {
- rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
- RSPAMD_STAT_BACKEND_GET_LANGUAGE, language, &id);
-
- if (rc != SQLITE_OK && learn) {
- /* We need to insert a new language */
- if (!db->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_IM);
- db->in_transaction = TRUE;
- }
-
- rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
- RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, language, &id);
- }
- }
-
- return id;
- }
-
- static struct rspamd_stat_sqlite3_db *
- rspamd_sqlite3_opendb(rspamd_mempool_t *pool,
- struct rspamd_statfile_config *stcf,
- const char *path, const ucl_object_t *opts,
- gboolean create, GError **err)
- {
- struct rspamd_stat_sqlite3_db *bk;
- struct rspamd_stat_tokenizer *tokenizer;
- gpointer tk_conf;
- gsize sz = 0;
- int64_t sz64 = 0;
- char *tok_conf_encoded;
- int ret, ntries = 0;
- const int max_tries = 100;
- struct timespec sleep_ts = {
- .tv_sec = 0,
- .tv_nsec = 1000000};
-
- bk = g_malloc0(sizeof(*bk));
- bk->sqlite = rspamd_sqlite3_open_or_create(pool, path, create_tables_sql,
- 0, err);
- bk->pool = pool;
-
- if (bk->sqlite == NULL) {
- g_free(bk);
-
- return NULL;
- }
-
- bk->fname = g_strdup(path);
-
- bk->prstmt = rspamd_sqlite3_init_prstmt(bk->sqlite, prepared_stmts,
- RSPAMD_STAT_BACKEND_MAX, err);
-
- if (bk->prstmt == NULL) {
- sqlite3_close(bk->sqlite);
- g_free(bk);
-
- return NULL;
- }
-
- /* Check tokenizer configuration */
- if (rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz64, &tk_conf) != SQLITE_OK ||
- sz64 == 0) {
-
- while ((ret = rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL)) == SQLITE_BUSY &&
- ++ntries <= max_tries) {
- nanosleep(&sleep_ts, NULL);
- }
-
- msg_info_pool("absent tokenizer conf in %s, creating a new one",
- bk->fname);
- g_assert(stcf->clcf->tokenizer != NULL);
- tokenizer = rspamd_stat_get_tokenizer(stcf->clcf->tokenizer->name);
- g_assert(tokenizer != NULL);
- tk_conf = tokenizer->get_config(pool, stcf->clcf->tokenizer, &sz);
-
- /* Encode to base32 */
- tok_conf_encoded = rspamd_encode_base32(tk_conf, sz, RSPAMD_BASE32_DEFAULT);
-
- if (rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_SAVE_TOKENIZER,
- (int64_t) strlen(tok_conf_encoded),
- tok_conf_encoded) != SQLITE_OK) {
- sqlite3_close(bk->sqlite);
- g_free(bk);
- g_free(tok_conf_encoded);
-
- return NULL;
- }
-
- rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
- g_free(tok_conf_encoded);
- }
- else {
- g_free(tk_conf);
- }
-
- return bk;
- }
-
- gpointer
- rspamd_sqlite3_init(struct rspamd_stat_ctx *ctx,
- struct rspamd_config *cfg,
- struct rspamd_statfile *st)
- {
- struct rspamd_classifier_config *clf = st->classifier->cfg;
- struct rspamd_statfile_config *stf = st->stcf;
- const ucl_object_t *filenameo, *lang_enabled, *users_enabled;
- const char *filename, *lua_script;
- struct rspamd_stat_sqlite3_db *bk;
- GError *err = NULL;
-
- filenameo = ucl_object_lookup(stf->opts, "filename");
- if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
- filenameo = ucl_object_lookup(stf->opts, "path");
- if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
- msg_err_config("statfile %s has no filename defined", stf->symbol);
- return NULL;
- }
- }
-
- filename = ucl_object_tostring(filenameo);
-
- if ((bk = rspamd_sqlite3_opendb(cfg->cfg_pool, stf, filename,
- stf->opts, TRUE, &err)) == NULL) {
- msg_err_config("cannot open sqlite3 db %s: %e", filename, err);
- g_error_free(err);
- return NULL;
- }
-
- bk->L = cfg->lua_state;
-
- users_enabled = ucl_object_lookup_any(clf->opts, "per_user",
- "users_enabled", NULL);
- if (users_enabled != NULL) {
- if (ucl_object_type(users_enabled) == UCL_BOOLEAN) {
- bk->enable_users = ucl_object_toboolean(users_enabled);
- bk->cbref_user = -1;
- }
- else if (ucl_object_type(users_enabled) == UCL_STRING) {
- lua_script = ucl_object_tostring(users_enabled);
-
- if (luaL_dostring(cfg->lua_state, lua_script) != 0) {
- msg_err_config("cannot execute lua script for users "
- "extraction: %s",
- lua_tostring(cfg->lua_state, -1));
- }
- else {
- if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) {
- bk->enable_users = TRUE;
- bk->cbref_user = luaL_ref(cfg->lua_state,
- LUA_REGISTRYINDEX);
- }
- else {
- msg_err_config("lua script must return "
- "function(task) and not %s",
- lua_typename(cfg->lua_state, lua_type(
- cfg->lua_state, -1)));
- }
- }
- }
- }
- else {
- bk->enable_users = FALSE;
- }
-
- lang_enabled = ucl_object_lookup_any(clf->opts,
- "per_language", "languages_enabled", NULL);
-
- if (lang_enabled != NULL) {
- if (ucl_object_type(lang_enabled) == UCL_BOOLEAN) {
- bk->enable_languages = ucl_object_toboolean(lang_enabled);
- bk->cbref_language = -1;
- }
- else if (ucl_object_type(lang_enabled) == UCL_STRING) {
- lua_script = ucl_object_tostring(lang_enabled);
-
- if (luaL_dostring(cfg->lua_state, lua_script) != 0) {
- msg_err_config(
- "cannot execute lua script for languages "
- "extraction: %s",
- lua_tostring(cfg->lua_state, -1));
- }
- else {
- if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) {
- bk->enable_languages = TRUE;
- bk->cbref_language = luaL_ref(cfg->lua_state,
- LUA_REGISTRYINDEX);
- }
- else {
- msg_err_config("lua script must return "
- "function(task) and not %s",
- lua_typename(cfg->lua_state,
- lua_type(cfg->lua_state, -1)));
- }
- }
- }
- }
- else {
- bk->enable_languages = FALSE;
- }
-
- if (bk->enable_languages) {
- msg_info_config("enable per language statistics for %s",
- stf->symbol);
- }
-
- if (bk->enable_users) {
- msg_info_config("enable per users statistics for %s",
- stf->symbol);
- }
-
-
- return (gpointer) bk;
- }
-
- void rspamd_sqlite3_close(gpointer p)
- {
- struct rspamd_stat_sqlite3_db *bk = p;
-
- if (bk->sqlite) {
- if (bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(bk->pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
- }
-
- rspamd_sqlite3_close_prstmt(bk->sqlite, bk->prstmt);
- sqlite3_close(bk->sqlite);
- g_free(bk->fname);
- g_free(bk);
- }
- }
-
- gpointer
- rspamd_sqlite3_runtime(struct rspamd_task *task,
- struct rspamd_statfile_config *stcf, gboolean learn, gpointer p, int _id)
- {
- struct rspamd_stat_sqlite3_rt *rt = NULL;
- struct rspamd_stat_sqlite3_db *bk = p;
-
- if (bk) {
- rt = rspamd_mempool_alloc(task->task_pool, sizeof(*rt));
- rt->db = bk;
- rt->task = task;
- rt->user_id = -1;
- rt->lang_id = -1;
- rt->cf = stcf;
- }
-
- return rt;
- }
-
- gboolean
- rspamd_sqlite3_process_tokens(struct rspamd_task *task,
- GPtrArray *tokens,
- int id, gpointer p)
- {
- struct rspamd_stat_sqlite3_db *bk;
- struct rspamd_stat_sqlite3_rt *rt = p;
- int64_t iv = 0;
- unsigned int i;
- rspamd_token_t *tok;
-
- g_assert(p != NULL);
- g_assert(tokens != NULL);
-
- bk = rt->db;
-
- for (i = 0; i < tokens->len; i++) {
- tok = g_ptr_array_index(tokens, i);
-
- if (bk == NULL) {
- /* Statfile is does not exist, so all values are zero */
- tok->values[id] = 0.0f;
- continue;
- }
-
- if (!bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF);
- bk->in_transaction = TRUE;
- }
-
- if (rt->user_id == -1) {
- if (bk->enable_users) {
- rt->user_id = rspamd_sqlite3_get_user(bk, task, FALSE);
- }
- else {
- rt->user_id = 0;
- }
- }
-
- if (rt->lang_id == -1) {
- if (bk->enable_languages) {
- rt->lang_id = rspamd_sqlite3_get_language(bk, task, FALSE);
- }
- else {
- rt->lang_id = 0;
- }
- }
-
- if (bk->enable_languages || bk->enable_users) {
- if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_TOKEN_FULL,
- tok->data, rt->user_id, rt->lang_id, &iv) == SQLITE_OK) {
- tok->values[id] = iv;
- }
- else {
- tok->values[id] = 0.0f;
- }
- }
- else {
- if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE,
- tok->data, &iv) == SQLITE_OK) {
- tok->values[id] = iv;
- }
- else {
- tok->values[id] = 0.0f;
- }
- }
-
- if (rt->cf->is_spam) {
- task->flags |= RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS;
- }
- else {
- task->flags |= RSPAMD_TASK_FLAG_HAS_HAM_TOKENS;
- }
- }
-
-
- return TRUE;
- }
-
- gboolean
- rspamd_sqlite3_finalize_process(struct rspamd_task *task, gpointer runtime,
- gpointer ctx)
- {
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
-
- g_assert(rt != NULL);
- bk = rt->db;
-
- if (bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
- bk->in_transaction = FALSE;
- }
-
- rt->lang_id = -1;
- rt->user_id = -1;
-
- return TRUE;
- }
-
- gboolean
- rspamd_sqlite3_learn_tokens(struct rspamd_task *task, GPtrArray *tokens,
- int id, gpointer p)
- {
- struct rspamd_stat_sqlite3_db *bk;
- struct rspamd_stat_sqlite3_rt *rt = p;
- int64_t iv = 0;
- unsigned int i;
- rspamd_token_t *tok;
-
- g_assert(tokens != NULL);
- g_assert(p != NULL);
-
- bk = rt->db;
-
- for (i = 0; i < tokens->len; i++) {
- tok = g_ptr_array_index(tokens, i);
- if (bk == NULL) {
- /* Statfile is does not exist, so all values are zero */
- return FALSE;
- }
-
- if (!bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_START_IM);
- bk->in_transaction = TRUE;
- }
-
- if (rt->user_id == -1) {
- if (bk->enable_users) {
- rt->user_id = rspamd_sqlite3_get_user(bk, task, TRUE);
- }
- else {
- rt->user_id = 0;
- }
- }
-
- if (rt->lang_id == -1) {
- if (bk->enable_languages) {
- rt->lang_id = rspamd_sqlite3_get_language(bk, task, TRUE);
- }
- else {
- rt->lang_id = 0;
- }
- }
-
- iv = tok->values[id];
-
- if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_SET_TOKEN,
- tok->data, rt->user_id, rt->lang_id, iv) != SQLITE_OK) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK);
- bk->in_transaction = FALSE;
-
- return FALSE;
- }
- }
-
- return TRUE;
- }
-
- gboolean
- rspamd_sqlite3_finalize_learn(struct rspamd_task *task, gpointer runtime,
- gpointer ctx, GError **err)
- {
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
- int wal_frames, wal_checkpointed, mode;
-
- g_assert(rt != NULL);
- bk = rt->db;
-
- if (bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
- bk->in_transaction = FALSE;
- }
-
- #ifdef SQLITE_OPEN_WAL
- #ifdef SQLITE_CHECKPOINT_TRUNCATE
- mode = SQLITE_CHECKPOINT_TRUNCATE;
- #elif defined(SQLITE_CHECKPOINT_RESTART)
- mode = SQLITE_CHECKPOINT_RESTART;
- #elif defined(SQLITE_CHECKPOINT_FULL)
- mode = SQLITE_CHECKPOINT_FULL;
- #endif
- /* Perform wal checkpoint (might be long) */
- if (sqlite3_wal_checkpoint_v2(bk->sqlite,
- NULL,
- mode,
- &wal_frames,
- &wal_checkpointed) != SQLITE_OK) {
- msg_warn_task("cannot commit checkpoint: %s",
- sqlite3_errmsg(bk->sqlite));
-
- g_set_error(err, rspamd_sqlite3_backend_quark(), 500,
- "cannot commit checkpoint: %s",
- sqlite3_errmsg(bk->sqlite));
- return FALSE;
- }
- #endif
-
- return TRUE;
- }
-
- gulong
- rspamd_sqlite3_total_learns(struct rspamd_task *task, gpointer runtime,
- gpointer ctx)
- {
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
- uint64_t res;
-
- g_assert(rt != NULL);
- bk = rt->db;
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
-
- return res;
- }
-
- gulong
- rspamd_sqlite3_inc_learns(struct rspamd_task *task, gpointer runtime,
- gpointer ctx)
- {
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
- uint64_t res;
-
- g_assert(rt != NULL);
- bk = rt->db;
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_INC_LEARNS_LANG,
- rt->lang_id);
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_INC_LEARNS_USER,
- rt->user_id);
-
- if (bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
- bk->in_transaction = FALSE;
- }
-
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
-
- return res;
- }
-
- gulong
- rspamd_sqlite3_dec_learns(struct rspamd_task *task, gpointer runtime,
- gpointer ctx)
- {
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
- uint64_t res;
-
- g_assert(rt != NULL);
- bk = rt->db;
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG,
- rt->lang_id);
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_DEC_LEARNS_USER,
- rt->user_id);
-
- if (bk->in_transaction) {
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
- bk->in_transaction = FALSE;
- }
-
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
-
- return res;
- }
-
- gulong
- rspamd_sqlite3_learns(struct rspamd_task *task, gpointer runtime,
- gpointer ctx)
- {
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
- uint64_t res;
-
- g_assert(rt != NULL);
- bk = rt->db;
- rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
-
- return res;
- }
-
- ucl_object_t *
- rspamd_sqlite3_get_stat(gpointer runtime,
- gpointer ctx)
- {
- ucl_object_t *res = NULL;
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
- rspamd_mempool_t *pool;
- struct stat st;
- int64_t rev;
-
- g_assert(rt != NULL);
- bk = rt->db;
- pool = bk->pool;
-
- (void) stat(bk->fname, &st);
- rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_GET_LEARNS, &rev);
-
- res = ucl_object_typed_new(UCL_OBJECT);
- ucl_object_insert_key(res, ucl_object_fromint(rev), "revision",
- 0, false);
- ucl_object_insert_key(res, ucl_object_fromint(st.st_size), "size",
- 0, false);
- rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_NTOKENS, &rev);
- ucl_object_insert_key(res, ucl_object_fromint(rev), "total", 0, false);
- ucl_object_insert_key(res, ucl_object_fromint(rev), "used", 0, false);
- ucl_object_insert_key(res, ucl_object_fromstring(rt->cf->symbol),
- "symbol", 0, false);
- ucl_object_insert_key(res, ucl_object_fromstring("sqlite3"),
- "type", 0, false);
- rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_NLANGUAGES, &rev);
- ucl_object_insert_key(res, ucl_object_fromint(rev),
- "languages", 0, false);
- rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_NUSERS, &rev);
- ucl_object_insert_key(res, ucl_object_fromint(rev),
- "users", 0, false);
-
- if (rt->cf->label) {
- ucl_object_insert_key(res, ucl_object_fromstring(rt->cf->label),
- "label", 0, false);
- }
-
- return res;
- }
-
- gpointer
- rspamd_sqlite3_load_tokenizer_config(gpointer runtime,
- gsize *len)
- {
- gpointer tk_conf, copied_conf;
- uint64_t sz;
- struct rspamd_stat_sqlite3_rt *rt = runtime;
- struct rspamd_stat_sqlite3_db *bk;
-
- g_assert(rt != NULL);
- bk = rt->db;
-
- g_assert(rspamd_sqlite3_run_prstmt(rt->db->pool, bk->sqlite, bk->prstmt,
- RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz, &tk_conf) == SQLITE_OK);
- g_assert(sz > 0);
- /*
- * Here we can have either decoded or undecoded version of tokenizer config
- * XXX: dirty hack to check if we have osb magic here
- */
- if (sz > 7 && memcmp(tk_conf, "osbtokv", 7) == 0) {
- copied_conf = rspamd_mempool_alloc(rt->task->task_pool, sz);
- memcpy(copied_conf, tk_conf, sz);
- g_free(tk_conf);
- }
- else {
- /* Need to decode */
- copied_conf = rspamd_decode_base32(tk_conf, sz, len, RSPAMD_BASE32_DEFAULT);
- g_free(tk_conf);
- rspamd_mempool_add_destructor(rt->task->task_pool, g_free, copied_conf);
- }
-
- if (len) {
- *len = sz;
- }
-
- return copied_conf;
- }
|