You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sqlite3_backend.c 27KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "sqlite3.h"
  19. #include "libutil/sqlite_utils.h"
  20. #include "libstat/stat_internal.h"
  21. #include "libmime/message.h"
  22. #include "lua/lua_common.h"
  23. #include "unix-std.h"
  24. #define SQLITE3_BACKEND_TYPE "sqlite3"
  25. #define SQLITE3_SCHEMA_VERSION "1"
  26. #define SQLITE3_DEFAULT "default"
  27. struct rspamd_stat_sqlite3_db {
  28. sqlite3 *sqlite;
  29. char *fname;
  30. GArray *prstmt;
  31. lua_State *L;
  32. rspamd_mempool_t *pool;
  33. gboolean in_transaction;
  34. gboolean enable_users;
  35. gboolean enable_languages;
  36. int cbref_user;
  37. int cbref_language;
  38. };
  39. struct rspamd_stat_sqlite3_rt {
  40. struct rspamd_task *task;
  41. struct rspamd_stat_sqlite3_db *db;
  42. struct rspamd_statfile_config *cf;
  43. int64_t user_id;
  44. int64_t lang_id;
  45. };
  46. static const char *create_tables_sql =
  47. "BEGIN IMMEDIATE;"
  48. "CREATE TABLE tokenizer(data BLOB);"
  49. "CREATE TABLE users("
  50. "id INTEGER PRIMARY KEY,"
  51. "name TEXT,"
  52. "learns INTEGER"
  53. ");"
  54. "CREATE TABLE languages("
  55. "id INTEGER PRIMARY KEY,"
  56. "name TEXT,"
  57. "learns INTEGER"
  58. ");"
  59. "CREATE TABLE tokens("
  60. "token INTEGER NOT NULL,"
  61. "user INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,"
  62. "language INTEGER NOT NULL REFERENCES languages(id) ON DELETE CASCADE,"
  63. "value INTEGER,"
  64. "modified INTEGER,"
  65. "CONSTRAINT tid UNIQUE (token, user, language) ON CONFLICT REPLACE"
  66. ");"
  67. "CREATE UNIQUE INDEX IF NOT EXISTS un ON users(name);"
  68. "CREATE INDEX IF NOT EXISTS tok ON tokens(token);"
  69. "CREATE UNIQUE INDEX IF NOT EXISTS ln ON languages(name);"
  70. "PRAGMA user_version=" SQLITE3_SCHEMA_VERSION ";"
  71. "INSERT INTO users(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);"
  72. "INSERT INTO languages(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);"
  73. "COMMIT;";
  74. enum rspamd_stat_sqlite3_stmt_idx {
  75. RSPAMD_STAT_BACKEND_TRANSACTION_START_IM = 0,
  76. RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF,
  77. RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL,
  78. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT,
  79. RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK,
  80. RSPAMD_STAT_BACKEND_GET_TOKEN_FULL,
  81. RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE,
  82. RSPAMD_STAT_BACKEND_SET_TOKEN,
  83. RSPAMD_STAT_BACKEND_INC_LEARNS_LANG,
  84. RSPAMD_STAT_BACKEND_INC_LEARNS_USER,
  85. RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG,
  86. RSPAMD_STAT_BACKEND_DEC_LEARNS_USER,
  87. RSPAMD_STAT_BACKEND_GET_LEARNS,
  88. RSPAMD_STAT_BACKEND_GET_LANGUAGE,
  89. RSPAMD_STAT_BACKEND_GET_USER,
  90. RSPAMD_STAT_BACKEND_INSERT_USER,
  91. RSPAMD_STAT_BACKEND_INSERT_LANGUAGE,
  92. RSPAMD_STAT_BACKEND_SAVE_TOKENIZER,
  93. RSPAMD_STAT_BACKEND_LOAD_TOKENIZER,
  94. RSPAMD_STAT_BACKEND_NTOKENS,
  95. RSPAMD_STAT_BACKEND_NLANGUAGES,
  96. RSPAMD_STAT_BACKEND_NUSERS,
  97. RSPAMD_STAT_BACKEND_MAX
  98. };
  99. static struct rspamd_sqlite3_prstmt prepared_stmts[RSPAMD_STAT_BACKEND_MAX] =
  100. {
  101. [RSPAMD_STAT_BACKEND_TRANSACTION_START_IM] = {
  102. .idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_IM,
  103. .sql = "BEGIN IMMEDIATE TRANSACTION;",
  104. .args = "",
  105. .stmt = NULL,
  106. .result = SQLITE_DONE,
  107. .flags = 0,
  108. .ret = "",
  109. },
  110. [RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF, .sql = "BEGIN DEFERRED TRANSACTION;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
  111. [RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL, .sql = "BEGIN EXCLUSIVE TRANSACTION;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
  112. [RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT, .sql = "COMMIT;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
  113. [RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK, .sql = "ROLLBACK;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""},
  114. [RSPAMD_STAT_BACKEND_GET_TOKEN_FULL] = {.idx = RSPAMD_STAT_BACKEND_GET_TOKEN_FULL, .sql = "SELECT value FROM tokens "
  115. "LEFT JOIN languages ON tokens.language=languages.id "
  116. "LEFT JOIN users ON tokens.user=users.id "
  117. "WHERE token=?1 AND (users.id=?2) "
  118. "AND (languages.id=?3 OR languages.id=0);",
  119. .stmt = NULL,
  120. .args = "III",
  121. .result = SQLITE_ROW,
  122. .flags = 0,
  123. .ret = "I"},
  124. [RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE] = {.idx = RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE, .sql = "SELECT value FROM tokens WHERE token=?1", .stmt = NULL, .args = "I", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
  125. [RSPAMD_STAT_BACKEND_SET_TOKEN] = {.idx = RSPAMD_STAT_BACKEND_SET_TOKEN, .sql = "INSERT OR REPLACE INTO tokens (token, user, language, value, modified) "
  126. "VALUES (?1, ?2, ?3, ?4, strftime('%s','now'))",
  127. .stmt = NULL,
  128. .args = "IIII",
  129. .result = SQLITE_DONE,
  130. .flags = 0,
  131. .ret = ""},
  132. [RSPAMD_STAT_BACKEND_INC_LEARNS_LANG] = {.idx = RSPAMD_STAT_BACKEND_INC_LEARNS_LANG, .sql = "UPDATE languages SET learns=learns + 1 WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
  133. [RSPAMD_STAT_BACKEND_INC_LEARNS_USER] = {.idx = RSPAMD_STAT_BACKEND_INC_LEARNS_USER, .sql = "UPDATE users SET learns=learns + 1 WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
  134. [RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG] = {.idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG, .sql = "UPDATE languages SET learns=MAX(0, learns - 1) WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
  135. [RSPAMD_STAT_BACKEND_DEC_LEARNS_USER] = {.idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_USER, .sql = "UPDATE users SET learns=MAX(0, learns - 1) WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""},
  136. [RSPAMD_STAT_BACKEND_GET_LEARNS] = {.idx = RSPAMD_STAT_BACKEND_GET_LEARNS, .sql = "SELECT SUM(MAX(0, learns)) FROM languages", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
  137. [RSPAMD_STAT_BACKEND_GET_LANGUAGE] = {.idx = RSPAMD_STAT_BACKEND_GET_LANGUAGE, .sql = "SELECT id FROM languages WHERE name=?1", .stmt = NULL, .args = "T", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
  138. [RSPAMD_STAT_BACKEND_GET_USER] = {.idx = RSPAMD_STAT_BACKEND_GET_USER, .sql = "SELECT id FROM users WHERE name=?1", .stmt = NULL, .args = "T", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
  139. [RSPAMD_STAT_BACKEND_INSERT_USER] = {.idx = RSPAMD_STAT_BACKEND_INSERT_USER, .sql = "INSERT INTO users (name, learns) VALUES (?1, 0)", .stmt = NULL, .args = "T", .result = SQLITE_DONE, .flags = 0, .ret = "L"},
  140. [RSPAMD_STAT_BACKEND_INSERT_LANGUAGE] = {.idx = RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, .sql = "INSERT INTO languages (name, learns) VALUES (?1, 0)", .stmt = NULL, .args = "T", .result = SQLITE_DONE, .flags = 0, .ret = "L"},
  141. [RSPAMD_STAT_BACKEND_SAVE_TOKENIZER] = {.idx = RSPAMD_STAT_BACKEND_SAVE_TOKENIZER, .sql = "INSERT INTO tokenizer(data) VALUES (?1)", .stmt = NULL, .args = "B", .result = SQLITE_DONE, .flags = 0, .ret = ""},
  142. [RSPAMD_STAT_BACKEND_LOAD_TOKENIZER] = {.idx = RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, .sql = "SELECT data FROM tokenizer", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "B"},
  143. [RSPAMD_STAT_BACKEND_NTOKENS] = {.idx = RSPAMD_STAT_BACKEND_NTOKENS, .sql = "SELECT COUNT(*) FROM tokens", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
  144. [RSPAMD_STAT_BACKEND_NLANGUAGES] = {.idx = RSPAMD_STAT_BACKEND_NLANGUAGES, .sql = "SELECT COUNT(*) FROM languages", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"},
  145. [RSPAMD_STAT_BACKEND_NUSERS] = {.idx = RSPAMD_STAT_BACKEND_NUSERS, .sql = "SELECT COUNT(*) FROM users", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"}};
  146. static GQuark
  147. rspamd_sqlite3_backend_quark(void)
  148. {
  149. return g_quark_from_static_string("sqlite3-stat-backend");
  150. }
  151. static int64_t
  152. rspamd_sqlite3_get_user(struct rspamd_stat_sqlite3_db *db,
  153. struct rspamd_task *task, gboolean learn)
  154. {
  155. int64_t id = 0; /* Default user is 0 */
  156. int rc, err_idx;
  157. const char *user = NULL;
  158. struct rspamd_task **ptask;
  159. lua_State *L = db->L;
  160. if (db->cbref_user == -1) {
  161. user = rspamd_task_get_principal_recipient(task);
  162. }
  163. else {
  164. /* Execute lua function to get userdata */
  165. lua_pushcfunction(L, &rspamd_lua_traceback);
  166. err_idx = lua_gettop(L);
  167. lua_rawgeti(L, LUA_REGISTRYINDEX, db->cbref_user);
  168. ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
  169. *ptask = task;
  170. rspamd_lua_setclass(L, rspamd_task_classname, -1);
  171. if (lua_pcall(L, 1, 1, err_idx) != 0) {
  172. msg_err_task("call to user extraction script failed: %s",
  173. lua_tostring(L, -1));
  174. }
  175. else {
  176. user = rspamd_mempool_strdup(task->task_pool, lua_tostring(L, -1));
  177. }
  178. /* Result + error function */
  179. lua_settop(L, err_idx - 1);
  180. }
  181. if (user != NULL) {
  182. rspamd_mempool_set_variable(task->task_pool, "stat_user",
  183. (gpointer) user, NULL);
  184. rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
  185. RSPAMD_STAT_BACKEND_GET_USER, user, &id);
  186. if (rc != SQLITE_OK && learn) {
  187. /* We need to insert a new user */
  188. if (!db->in_transaction) {
  189. rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
  190. RSPAMD_STAT_BACKEND_TRANSACTION_START_IM);
  191. db->in_transaction = TRUE;
  192. }
  193. rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
  194. RSPAMD_STAT_BACKEND_INSERT_USER, user, &id);
  195. }
  196. }
  197. return id;
  198. }
  199. static int64_t
  200. rspamd_sqlite3_get_language(struct rspamd_stat_sqlite3_db *db,
  201. struct rspamd_task *task, gboolean learn)
  202. {
  203. int64_t id = 0; /* Default language is 0 */
  204. int rc, err_idx;
  205. unsigned int i;
  206. const char *language = NULL;
  207. struct rspamd_mime_text_part *tp;
  208. struct rspamd_task **ptask;
  209. lua_State *L = db->L;
  210. if (db->cbref_language == -1) {
  211. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp)
  212. {
  213. if (tp->language != NULL && tp->language[0] != '\0' &&
  214. strcmp(tp->language, "en") != 0) {
  215. language = tp->language;
  216. break;
  217. }
  218. }
  219. }
  220. else {
  221. /* Execute lua function to get userdata */
  222. lua_pushcfunction(L, &rspamd_lua_traceback);
  223. err_idx = lua_gettop(L);
  224. lua_rawgeti(L, LUA_REGISTRYINDEX, db->cbref_language);
  225. ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
  226. *ptask = task;
  227. rspamd_lua_setclass(L, rspamd_task_classname, -1);
  228. if (lua_pcall(L, 1, 1, err_idx) != 0) {
  229. msg_err_task("call to language extraction script failed: %s",
  230. lua_tostring(L, -1));
  231. }
  232. else {
  233. language = rspamd_mempool_strdup(task->task_pool,
  234. lua_tostring(L, -1));
  235. }
  236. /* Result + error function */
  237. lua_settop(L, err_idx - 1);
  238. }
  239. /* XXX: We ignore multiple languages but default + extra */
  240. if (language != NULL) {
  241. rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
  242. RSPAMD_STAT_BACKEND_GET_LANGUAGE, language, &id);
  243. if (rc != SQLITE_OK && learn) {
  244. /* We need to insert a new language */
  245. if (!db->in_transaction) {
  246. rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
  247. RSPAMD_STAT_BACKEND_TRANSACTION_START_IM);
  248. db->in_transaction = TRUE;
  249. }
  250. rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt,
  251. RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, language, &id);
  252. }
  253. }
  254. return id;
  255. }
  256. static struct rspamd_stat_sqlite3_db *
  257. rspamd_sqlite3_opendb(rspamd_mempool_t *pool,
  258. struct rspamd_statfile_config *stcf,
  259. const char *path, const ucl_object_t *opts,
  260. gboolean create, GError **err)
  261. {
  262. struct rspamd_stat_sqlite3_db *bk;
  263. struct rspamd_stat_tokenizer *tokenizer;
  264. gpointer tk_conf;
  265. gsize sz = 0;
  266. int64_t sz64 = 0;
  267. char *tok_conf_encoded;
  268. int ret, ntries = 0;
  269. const int max_tries = 100;
  270. struct timespec sleep_ts = {
  271. .tv_sec = 0,
  272. .tv_nsec = 1000000};
  273. bk = g_malloc0(sizeof(*bk));
  274. bk->sqlite = rspamd_sqlite3_open_or_create(pool, path, create_tables_sql,
  275. 0, err);
  276. bk->pool = pool;
  277. if (bk->sqlite == NULL) {
  278. g_free(bk);
  279. return NULL;
  280. }
  281. bk->fname = g_strdup(path);
  282. bk->prstmt = rspamd_sqlite3_init_prstmt(bk->sqlite, prepared_stmts,
  283. RSPAMD_STAT_BACKEND_MAX, err);
  284. if (bk->prstmt == NULL) {
  285. sqlite3_close(bk->sqlite);
  286. g_free(bk);
  287. return NULL;
  288. }
  289. /* Check tokenizer configuration */
  290. if (rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  291. RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz64, &tk_conf) != SQLITE_OK ||
  292. sz64 == 0) {
  293. while ((ret = rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  294. RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL)) == SQLITE_BUSY &&
  295. ++ntries <= max_tries) {
  296. nanosleep(&sleep_ts, NULL);
  297. }
  298. msg_info_pool("absent tokenizer conf in %s, creating a new one",
  299. bk->fname);
  300. g_assert(stcf->clcf->tokenizer != NULL);
  301. tokenizer = rspamd_stat_get_tokenizer(stcf->clcf->tokenizer->name);
  302. g_assert(tokenizer != NULL);
  303. tk_conf = tokenizer->get_config(pool, stcf->clcf->tokenizer, &sz);
  304. /* Encode to base32 */
  305. tok_conf_encoded = rspamd_encode_base32(tk_conf, sz, RSPAMD_BASE32_DEFAULT);
  306. if (rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  307. RSPAMD_STAT_BACKEND_SAVE_TOKENIZER,
  308. (int64_t) strlen(tok_conf_encoded),
  309. tok_conf_encoded) != SQLITE_OK) {
  310. sqlite3_close(bk->sqlite);
  311. g_free(bk);
  312. g_free(tok_conf_encoded);
  313. return NULL;
  314. }
  315. rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  316. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
  317. g_free(tok_conf_encoded);
  318. }
  319. else {
  320. g_free(tk_conf);
  321. }
  322. return bk;
  323. }
  324. gpointer
  325. rspamd_sqlite3_init(struct rspamd_stat_ctx *ctx,
  326. struct rspamd_config *cfg,
  327. struct rspamd_statfile *st)
  328. {
  329. struct rspamd_classifier_config *clf = st->classifier->cfg;
  330. struct rspamd_statfile_config *stf = st->stcf;
  331. const ucl_object_t *filenameo, *lang_enabled, *users_enabled;
  332. const char *filename, *lua_script;
  333. struct rspamd_stat_sqlite3_db *bk;
  334. GError *err = NULL;
  335. filenameo = ucl_object_lookup(stf->opts, "filename");
  336. if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
  337. filenameo = ucl_object_lookup(stf->opts, "path");
  338. if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) {
  339. msg_err_config("statfile %s has no filename defined", stf->symbol);
  340. return NULL;
  341. }
  342. }
  343. filename = ucl_object_tostring(filenameo);
  344. if ((bk = rspamd_sqlite3_opendb(cfg->cfg_pool, stf, filename,
  345. stf->opts, TRUE, &err)) == NULL) {
  346. msg_err_config("cannot open sqlite3 db %s: %e", filename, err);
  347. g_error_free(err);
  348. return NULL;
  349. }
  350. bk->L = cfg->lua_state;
  351. users_enabled = ucl_object_lookup_any(clf->opts, "per_user",
  352. "users_enabled", NULL);
  353. if (users_enabled != NULL) {
  354. if (ucl_object_type(users_enabled) == UCL_BOOLEAN) {
  355. bk->enable_users = ucl_object_toboolean(users_enabled);
  356. bk->cbref_user = -1;
  357. }
  358. else if (ucl_object_type(users_enabled) == UCL_STRING) {
  359. lua_script = ucl_object_tostring(users_enabled);
  360. if (luaL_dostring(cfg->lua_state, lua_script) != 0) {
  361. msg_err_config("cannot execute lua script for users "
  362. "extraction: %s",
  363. lua_tostring(cfg->lua_state, -1));
  364. }
  365. else {
  366. if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) {
  367. bk->enable_users = TRUE;
  368. bk->cbref_user = luaL_ref(cfg->lua_state,
  369. LUA_REGISTRYINDEX);
  370. }
  371. else {
  372. msg_err_config("lua script must return "
  373. "function(task) and not %s",
  374. lua_typename(cfg->lua_state, lua_type(
  375. cfg->lua_state, -1)));
  376. }
  377. }
  378. }
  379. }
  380. else {
  381. bk->enable_users = FALSE;
  382. }
  383. lang_enabled = ucl_object_lookup_any(clf->opts,
  384. "per_language", "languages_enabled", NULL);
  385. if (lang_enabled != NULL) {
  386. if (ucl_object_type(lang_enabled) == UCL_BOOLEAN) {
  387. bk->enable_languages = ucl_object_toboolean(lang_enabled);
  388. bk->cbref_language = -1;
  389. }
  390. else if (ucl_object_type(lang_enabled) == UCL_STRING) {
  391. lua_script = ucl_object_tostring(lang_enabled);
  392. if (luaL_dostring(cfg->lua_state, lua_script) != 0) {
  393. msg_err_config(
  394. "cannot execute lua script for languages "
  395. "extraction: %s",
  396. lua_tostring(cfg->lua_state, -1));
  397. }
  398. else {
  399. if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) {
  400. bk->enable_languages = TRUE;
  401. bk->cbref_language = luaL_ref(cfg->lua_state,
  402. LUA_REGISTRYINDEX);
  403. }
  404. else {
  405. msg_err_config("lua script must return "
  406. "function(task) and not %s",
  407. lua_typename(cfg->lua_state,
  408. lua_type(cfg->lua_state, -1)));
  409. }
  410. }
  411. }
  412. }
  413. else {
  414. bk->enable_languages = FALSE;
  415. }
  416. if (bk->enable_languages) {
  417. msg_info_config("enable per language statistics for %s",
  418. stf->symbol);
  419. }
  420. if (bk->enable_users) {
  421. msg_info_config("enable per users statistics for %s",
  422. stf->symbol);
  423. }
  424. return (gpointer) bk;
  425. }
  426. void rspamd_sqlite3_close(gpointer p)
  427. {
  428. struct rspamd_stat_sqlite3_db *bk = p;
  429. if (bk->sqlite) {
  430. if (bk->in_transaction) {
  431. rspamd_sqlite3_run_prstmt(bk->pool, bk->sqlite, bk->prstmt,
  432. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
  433. }
  434. rspamd_sqlite3_close_prstmt(bk->sqlite, bk->prstmt);
  435. sqlite3_close(bk->sqlite);
  436. g_free(bk->fname);
  437. g_free(bk);
  438. }
  439. }
  440. gpointer
  441. rspamd_sqlite3_runtime(struct rspamd_task *task,
  442. struct rspamd_statfile_config *stcf, gboolean learn, gpointer p, int _id)
  443. {
  444. struct rspamd_stat_sqlite3_rt *rt = NULL;
  445. struct rspamd_stat_sqlite3_db *bk = p;
  446. if (bk) {
  447. rt = rspamd_mempool_alloc(task->task_pool, sizeof(*rt));
  448. rt->db = bk;
  449. rt->task = task;
  450. rt->user_id = -1;
  451. rt->lang_id = -1;
  452. rt->cf = stcf;
  453. }
  454. return rt;
  455. }
  456. gboolean
  457. rspamd_sqlite3_process_tokens(struct rspamd_task *task,
  458. GPtrArray *tokens,
  459. int id, gpointer p)
  460. {
  461. struct rspamd_stat_sqlite3_db *bk;
  462. struct rspamd_stat_sqlite3_rt *rt = p;
  463. int64_t iv = 0;
  464. unsigned int i;
  465. rspamd_token_t *tok;
  466. g_assert(p != NULL);
  467. g_assert(tokens != NULL);
  468. bk = rt->db;
  469. for (i = 0; i < tokens->len; i++) {
  470. tok = g_ptr_array_index(tokens, i);
  471. if (bk == NULL) {
  472. /* Statfile is does not exist, so all values are zero */
  473. tok->values[id] = 0.0f;
  474. continue;
  475. }
  476. if (!bk->in_transaction) {
  477. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  478. RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF);
  479. bk->in_transaction = TRUE;
  480. }
  481. if (rt->user_id == -1) {
  482. if (bk->enable_users) {
  483. rt->user_id = rspamd_sqlite3_get_user(bk, task, FALSE);
  484. }
  485. else {
  486. rt->user_id = 0;
  487. }
  488. }
  489. if (rt->lang_id == -1) {
  490. if (bk->enable_languages) {
  491. rt->lang_id = rspamd_sqlite3_get_language(bk, task, FALSE);
  492. }
  493. else {
  494. rt->lang_id = 0;
  495. }
  496. }
  497. if (bk->enable_languages || bk->enable_users) {
  498. if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  499. RSPAMD_STAT_BACKEND_GET_TOKEN_FULL,
  500. tok->data, rt->user_id, rt->lang_id, &iv) == SQLITE_OK) {
  501. tok->values[id] = iv;
  502. }
  503. else {
  504. tok->values[id] = 0.0f;
  505. }
  506. }
  507. else {
  508. if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  509. RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE,
  510. tok->data, &iv) == SQLITE_OK) {
  511. tok->values[id] = iv;
  512. }
  513. else {
  514. tok->values[id] = 0.0f;
  515. }
  516. }
  517. if (rt->cf->is_spam) {
  518. task->flags |= RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS;
  519. }
  520. else {
  521. task->flags |= RSPAMD_TASK_FLAG_HAS_HAM_TOKENS;
  522. }
  523. }
  524. return TRUE;
  525. }
  526. gboolean
  527. rspamd_sqlite3_finalize_process(struct rspamd_task *task, gpointer runtime,
  528. gpointer ctx)
  529. {
  530. struct rspamd_stat_sqlite3_rt *rt = runtime;
  531. struct rspamd_stat_sqlite3_db *bk;
  532. g_assert(rt != NULL);
  533. bk = rt->db;
  534. if (bk->in_transaction) {
  535. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  536. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
  537. bk->in_transaction = FALSE;
  538. }
  539. rt->lang_id = -1;
  540. rt->user_id = -1;
  541. return TRUE;
  542. }
  543. gboolean
  544. rspamd_sqlite3_learn_tokens(struct rspamd_task *task, GPtrArray *tokens,
  545. int id, gpointer p)
  546. {
  547. struct rspamd_stat_sqlite3_db *bk;
  548. struct rspamd_stat_sqlite3_rt *rt = p;
  549. int64_t iv = 0;
  550. unsigned int i;
  551. rspamd_token_t *tok;
  552. g_assert(tokens != NULL);
  553. g_assert(p != NULL);
  554. bk = rt->db;
  555. for (i = 0; i < tokens->len; i++) {
  556. tok = g_ptr_array_index(tokens, i);
  557. if (bk == NULL) {
  558. /* Statfile is does not exist, so all values are zero */
  559. return FALSE;
  560. }
  561. if (!bk->in_transaction) {
  562. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  563. RSPAMD_STAT_BACKEND_TRANSACTION_START_IM);
  564. bk->in_transaction = TRUE;
  565. }
  566. if (rt->user_id == -1) {
  567. if (bk->enable_users) {
  568. rt->user_id = rspamd_sqlite3_get_user(bk, task, TRUE);
  569. }
  570. else {
  571. rt->user_id = 0;
  572. }
  573. }
  574. if (rt->lang_id == -1) {
  575. if (bk->enable_languages) {
  576. rt->lang_id = rspamd_sqlite3_get_language(bk, task, TRUE);
  577. }
  578. else {
  579. rt->lang_id = 0;
  580. }
  581. }
  582. iv = tok->values[id];
  583. if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  584. RSPAMD_STAT_BACKEND_SET_TOKEN,
  585. tok->data, rt->user_id, rt->lang_id, iv) != SQLITE_OK) {
  586. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  587. RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK);
  588. bk->in_transaction = FALSE;
  589. return FALSE;
  590. }
  591. }
  592. return TRUE;
  593. }
  594. gboolean
  595. rspamd_sqlite3_finalize_learn(struct rspamd_task *task, gpointer runtime,
  596. gpointer ctx, GError **err)
  597. {
  598. struct rspamd_stat_sqlite3_rt *rt = runtime;
  599. struct rspamd_stat_sqlite3_db *bk;
  600. int wal_frames, wal_checkpointed, mode;
  601. g_assert(rt != NULL);
  602. bk = rt->db;
  603. if (bk->in_transaction) {
  604. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  605. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
  606. bk->in_transaction = FALSE;
  607. }
  608. #ifdef SQLITE_OPEN_WAL
  609. #ifdef SQLITE_CHECKPOINT_TRUNCATE
  610. mode = SQLITE_CHECKPOINT_TRUNCATE;
  611. #elif defined(SQLITE_CHECKPOINT_RESTART)
  612. mode = SQLITE_CHECKPOINT_RESTART;
  613. #elif defined(SQLITE_CHECKPOINT_FULL)
  614. mode = SQLITE_CHECKPOINT_FULL;
  615. #endif
  616. /* Perform wal checkpoint (might be long) */
  617. if (sqlite3_wal_checkpoint_v2(bk->sqlite,
  618. NULL,
  619. mode,
  620. &wal_frames,
  621. &wal_checkpointed) != SQLITE_OK) {
  622. msg_warn_task("cannot commit checkpoint: %s",
  623. sqlite3_errmsg(bk->sqlite));
  624. g_set_error(err, rspamd_sqlite3_backend_quark(), 500,
  625. "cannot commit checkpoint: %s",
  626. sqlite3_errmsg(bk->sqlite));
  627. return FALSE;
  628. }
  629. #endif
  630. return TRUE;
  631. }
  632. gulong
  633. rspamd_sqlite3_total_learns(struct rspamd_task *task, gpointer runtime,
  634. gpointer ctx)
  635. {
  636. struct rspamd_stat_sqlite3_rt *rt = runtime;
  637. struct rspamd_stat_sqlite3_db *bk;
  638. uint64_t res;
  639. g_assert(rt != NULL);
  640. bk = rt->db;
  641. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  642. RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
  643. return res;
  644. }
  645. gulong
  646. rspamd_sqlite3_inc_learns(struct rspamd_task *task, gpointer runtime,
  647. gpointer ctx)
  648. {
  649. struct rspamd_stat_sqlite3_rt *rt = runtime;
  650. struct rspamd_stat_sqlite3_db *bk;
  651. uint64_t res;
  652. g_assert(rt != NULL);
  653. bk = rt->db;
  654. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  655. RSPAMD_STAT_BACKEND_INC_LEARNS_LANG,
  656. rt->lang_id);
  657. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  658. RSPAMD_STAT_BACKEND_INC_LEARNS_USER,
  659. rt->user_id);
  660. if (bk->in_transaction) {
  661. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  662. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
  663. bk->in_transaction = FALSE;
  664. }
  665. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  666. RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
  667. return res;
  668. }
  669. gulong
  670. rspamd_sqlite3_dec_learns(struct rspamd_task *task, gpointer runtime,
  671. gpointer ctx)
  672. {
  673. struct rspamd_stat_sqlite3_rt *rt = runtime;
  674. struct rspamd_stat_sqlite3_db *bk;
  675. uint64_t res;
  676. g_assert(rt != NULL);
  677. bk = rt->db;
  678. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  679. RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG,
  680. rt->lang_id);
  681. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  682. RSPAMD_STAT_BACKEND_DEC_LEARNS_USER,
  683. rt->user_id);
  684. if (bk->in_transaction) {
  685. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  686. RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT);
  687. bk->in_transaction = FALSE;
  688. }
  689. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  690. RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
  691. return res;
  692. }
  693. gulong
  694. rspamd_sqlite3_learns(struct rspamd_task *task, gpointer runtime,
  695. gpointer ctx)
  696. {
  697. struct rspamd_stat_sqlite3_rt *rt = runtime;
  698. struct rspamd_stat_sqlite3_db *bk;
  699. uint64_t res;
  700. g_assert(rt != NULL);
  701. bk = rt->db;
  702. rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt,
  703. RSPAMD_STAT_BACKEND_GET_LEARNS, &res);
  704. return res;
  705. }
  706. ucl_object_t *
  707. rspamd_sqlite3_get_stat(gpointer runtime,
  708. gpointer ctx)
  709. {
  710. ucl_object_t *res = NULL;
  711. struct rspamd_stat_sqlite3_rt *rt = runtime;
  712. struct rspamd_stat_sqlite3_db *bk;
  713. rspamd_mempool_t *pool;
  714. struct stat st;
  715. int64_t rev;
  716. g_assert(rt != NULL);
  717. bk = rt->db;
  718. pool = bk->pool;
  719. (void) stat(bk->fname, &st);
  720. rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  721. RSPAMD_STAT_BACKEND_GET_LEARNS, &rev);
  722. res = ucl_object_typed_new(UCL_OBJECT);
  723. ucl_object_insert_key(res, ucl_object_fromint(rev), "revision",
  724. 0, false);
  725. ucl_object_insert_key(res, ucl_object_fromint(st.st_size), "size",
  726. 0, false);
  727. rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  728. RSPAMD_STAT_BACKEND_NTOKENS, &rev);
  729. ucl_object_insert_key(res, ucl_object_fromint(rev), "total", 0, false);
  730. ucl_object_insert_key(res, ucl_object_fromint(rev), "used", 0, false);
  731. ucl_object_insert_key(res, ucl_object_fromstring(rt->cf->symbol),
  732. "symbol", 0, false);
  733. ucl_object_insert_key(res, ucl_object_fromstring("sqlite3"),
  734. "type", 0, false);
  735. rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  736. RSPAMD_STAT_BACKEND_NLANGUAGES, &rev);
  737. ucl_object_insert_key(res, ucl_object_fromint(rev),
  738. "languages", 0, false);
  739. rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt,
  740. RSPAMD_STAT_BACKEND_NUSERS, &rev);
  741. ucl_object_insert_key(res, ucl_object_fromint(rev),
  742. "users", 0, false);
  743. if (rt->cf->label) {
  744. ucl_object_insert_key(res, ucl_object_fromstring(rt->cf->label),
  745. "label", 0, false);
  746. }
  747. return res;
  748. }
  749. gpointer
  750. rspamd_sqlite3_load_tokenizer_config(gpointer runtime,
  751. gsize *len)
  752. {
  753. gpointer tk_conf, copied_conf;
  754. uint64_t sz;
  755. struct rspamd_stat_sqlite3_rt *rt = runtime;
  756. struct rspamd_stat_sqlite3_db *bk;
  757. g_assert(rt != NULL);
  758. bk = rt->db;
  759. g_assert(rspamd_sqlite3_run_prstmt(rt->db->pool, bk->sqlite, bk->prstmt,
  760. RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz, &tk_conf) == SQLITE_OK);
  761. g_assert(sz > 0);
  762. /*
  763. * Here we can have either decoded or undecoded version of tokenizer config
  764. * XXX: dirty hack to check if we have osb magic here
  765. */
  766. if (sz > 7 && memcmp(tk_conf, "osbtokv", 7) == 0) {
  767. copied_conf = rspamd_mempool_alloc(rt->task->task_pool, sz);
  768. memcpy(copied_conf, tk_conf, sz);
  769. g_free(tk_conf);
  770. }
  771. else {
  772. /* Need to decode */
  773. copied_conf = rspamd_decode_base32(tk_conf, sz, len, RSPAMD_BASE32_DEFAULT);
  774. g_free(tk_conf);
  775. rspamd_mempool_add_destructor(rt->task->task_pool, g_free, copied_conf);
  776. }
  777. if (len) {
  778. *len = sz;
  779. }
  780. return copied_conf;
  781. }