You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_backend_sqlite.c 27KB


  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "fuzzy_backend.h"
  19. #include "fuzzy_backend_sqlite.h"
  20. #include "unix-std.h"
  21. #include <sqlite3.h>
  22. #include "libutil/sqlite_utils.h"
  23. struct rspamd_fuzzy_backend_sqlite {
  24. sqlite3 *db;
  25. char *path;
  26. gchar id[MEMPOOL_UID_LEN];
  27. gsize count;
  28. gsize expired;
  29. rspamd_mempool_t *pool;
  30. };
  31. static const gdouble sql_sleep_time = 0.1;
  32. static const guint max_retries = 10;
  33. #define msg_err_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  34. backend->pool->tag.tagname, backend->pool->tag.uid, \
  35. G_STRFUNC, \
  36. __VA_ARGS__)
  37. #define msg_warn_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  38. backend->pool->tag.tagname, backend->pool->tag.uid, \
  39. G_STRFUNC, \
  40. __VA_ARGS__)
  41. #define msg_info_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  42. backend->pool->tag.tagname, backend->pool->tag.uid, \
  43. G_STRFUNC, \
  44. __VA_ARGS__)
  45. #define msg_debug_fuzzy_backend(...) rspamd_conditional_debug_fast (NULL, NULL, \
  46. rspamd_fuzzy_sqlite_log_id, backend->pool->tag.tagname, backend->pool->tag.uid, \
  47. G_STRFUNC, \
  48. __VA_ARGS__)
  49. INIT_LOG_MODULE(fuzzy_sqlite)
  50. static const char *create_tables_sql =
  51. "BEGIN;"
  52. "CREATE TABLE IF NOT EXISTS digests("
  53. " id INTEGER PRIMARY KEY,"
  54. " flag INTEGER NOT NULL,"
  55. " digest TEXT NOT NULL,"
  56. " value INTEGER,"
  57. " time INTEGER);"
  58. "CREATE TABLE IF NOT EXISTS shingles("
  59. " value INTEGER NOT NULL,"
  60. " number INTEGER NOT NULL,"
  61. " digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE "
  62. " ON UPDATE CASCADE);"
  63. "CREATE TABLE IF NOT EXISTS sources("
  64. " name TEXT UNIQUE,"
  65. " version INTEGER,"
  66. " last INTEGER);"
  67. "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
  68. "CREATE INDEX IF NOT EXISTS t ON digests(time);"
  69. "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);"
  70. "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
  71. "COMMIT;";
  72. #if 0
  73. static const char *create_index_sql =
  74. "BEGIN;"
  75. "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
  76. "CREATE INDEX IF NOT EXISTS t ON digests(time);"
  77. "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);"
  78. "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
  79. "COMMIT;";
  80. #endif
  81. enum rspamd_fuzzy_statement_idx {
  82. RSPAMD_FUZZY_BACKEND_TRANSACTION_START = 0,
  83. RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
  84. RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
  85. RSPAMD_FUZZY_BACKEND_INSERT,
  86. RSPAMD_FUZZY_BACKEND_UPDATE,
  87. RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
  88. RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
  89. RSPAMD_FUZZY_BACKEND_CHECK,
  90. RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
  91. RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
  92. RSPAMD_FUZZY_BACKEND_DELETE,
  93. RSPAMD_FUZZY_BACKEND_COUNT,
  94. RSPAMD_FUZZY_BACKEND_EXPIRE,
  95. RSPAMD_FUZZY_BACKEND_VACUUM,
  96. RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
  97. RSPAMD_FUZZY_BACKEND_ADD_SOURCE,
  98. RSPAMD_FUZZY_BACKEND_VERSION,
  99. RSPAMD_FUZZY_BACKEND_SET_VERSION,
  100. RSPAMD_FUZZY_BACKEND_MAX
  101. };
  102. static struct rspamd_fuzzy_stmts {
  103. enum rspamd_fuzzy_statement_idx idx;
  104. const gchar *sql;
  105. const gchar *args;
  106. sqlite3_stmt *stmt;
  107. gint result;
  108. } prepared_stmts[RSPAMD_FUZZY_BACKEND_MAX] =
  109. {
  110. {
  111. .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
  112. .sql = "BEGIN TRANSACTION;",
  113. .args = "",
  114. .stmt = NULL,
  115. .result = SQLITE_DONE
  116. },
  117. {
  118. .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
  119. .sql = "COMMIT;",
  120. .args = "",
  121. .stmt = NULL,
  122. .result = SQLITE_DONE
  123. },
  124. {
  125. .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
  126. .sql = "ROLLBACK;",
  127. .args = "",
  128. .stmt = NULL,
  129. .result = SQLITE_DONE
  130. },
  131. {
  132. .idx = RSPAMD_FUZZY_BACKEND_INSERT,
  133. .sql = "INSERT INTO digests(flag, digest, value, time) VALUES"
  134. "(?1, ?2, ?3, strftime('%s','now'));",
  135. .args = "SDI",
  136. .stmt = NULL,
  137. .result = SQLITE_DONE
  138. },
  139. {
  140. .idx = RSPAMD_FUZZY_BACKEND_UPDATE,
  141. .sql = "UPDATE digests SET value = value + ?1, time = strftime('%s','now') WHERE "
  142. "digest==?2;",
  143. .args = "ID",
  144. .stmt = NULL,
  145. .result = SQLITE_DONE
  146. },
  147. {
  148. .idx = RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
  149. .sql = "UPDATE digests SET value = ?1, flag = ?2, time = strftime('%s','now') WHERE "
  150. "digest==?3;",
  151. .args = "IID",
  152. .stmt = NULL,
  153. .result = SQLITE_DONE
  154. },
  155. {
  156. .idx = RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
  157. .sql = "INSERT OR REPLACE INTO shingles(value, number, digest_id) "
  158. "VALUES (?1, ?2, ?3);",
  159. .args = "III",
  160. .stmt = NULL,
  161. .result = SQLITE_DONE
  162. },
  163. {
  164. .idx = RSPAMD_FUZZY_BACKEND_CHECK,
  165. .sql = "SELECT value, time, flag FROM digests WHERE digest==?1;",
  166. .args = "D",
  167. .stmt = NULL,
  168. .result = SQLITE_ROW
  169. },
  170. {
  171. .idx = RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
  172. .sql = "SELECT digest_id FROM shingles WHERE value=?1 AND number=?2",
  173. .args = "IS",
  174. .stmt = NULL,
  175. .result = SQLITE_ROW
  176. },
  177. {
  178. .idx = RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
  179. .sql = "SELECT digest, value, time, flag FROM digests WHERE id=?1",
  180. .args = "I",
  181. .stmt = NULL,
  182. .result = SQLITE_ROW
  183. },
  184. {
  185. .idx = RSPAMD_FUZZY_BACKEND_DELETE,
  186. .sql = "DELETE FROM digests WHERE digest==?1;",
  187. .args = "D",
  188. .stmt = NULL,
  189. .result = SQLITE_DONE
  190. },
  191. {
  192. .idx = RSPAMD_FUZZY_BACKEND_COUNT,
  193. .sql = "SELECT COUNT(*) FROM digests;",
  194. .args = "",
  195. .stmt = NULL,
  196. .result = SQLITE_ROW
  197. },
  198. {
  199. .idx = RSPAMD_FUZZY_BACKEND_EXPIRE,
  200. .sql = "DELETE FROM digests WHERE id IN (SELECT id FROM digests WHERE time < ?1 LIMIT ?2);",
  201. .args = "II",
  202. .stmt = NULL,
  203. .result = SQLITE_DONE
  204. },
  205. {
  206. .idx = RSPAMD_FUZZY_BACKEND_VACUUM,
  207. .sql = "VACUUM;",
  208. .args = "",
  209. .stmt = NULL,
  210. .result = SQLITE_DONE
  211. },
  212. {
  213. .idx = RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
  214. .sql = "DELETE FROM shingles WHERE value=?1 AND number=?2;",
  215. .args = "II",
  216. .stmt = NULL,
  217. .result = SQLITE_DONE
  218. },
  219. {
  220. .idx = RSPAMD_FUZZY_BACKEND_ADD_SOURCE,
  221. .sql = "INSERT OR IGNORE INTO sources(name, version, last) VALUES (?1, ?2, ?3);",
  222. .args = "TII",
  223. .stmt = NULL,
  224. .result = SQLITE_DONE
  225. },
  226. {
  227. .idx = RSPAMD_FUZZY_BACKEND_VERSION,
  228. .sql = "SELECT version FROM sources WHERE name=?1;",
  229. .args = "T",
  230. .stmt = NULL,
  231. .result = SQLITE_ROW
  232. },
  233. {
  234. .idx = RSPAMD_FUZZY_BACKEND_SET_VERSION,
  235. .sql = "INSERT OR REPLACE INTO sources (name, version, last) VALUES (?3, ?1, ?2);",
  236. .args = "IIT",
  237. .stmt = NULL,
  238. .result = SQLITE_DONE
  239. },
  240. };
  241. static GQuark
  242. rspamd_fuzzy_backend_sqlite_quark (void)
  243. {
  244. return g_quark_from_static_string ("fuzzy-backend-sqlite");
  245. }
  246. static gboolean
  247. rspamd_fuzzy_backend_sqlite_prepare_stmts (struct rspamd_fuzzy_backend_sqlite *bk, GError **err)
  248. {
  249. int i;
  250. for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i ++) {
  251. if (prepared_stmts[i].stmt != NULL) {
  252. /* Skip already prepared statements */
  253. continue;
  254. }
  255. if (sqlite3_prepare_v2 (bk->db, prepared_stmts[i].sql, -1,
  256. &prepared_stmts[i].stmt, NULL) != SQLITE_OK) {
  257. g_set_error (err, rspamd_fuzzy_backend_sqlite_quark (),
  258. -1, "Cannot initialize prepared sql `%s`: %s",
  259. prepared_stmts[i].sql, sqlite3_errmsg (bk->db));
  260. return FALSE;
  261. }
  262. }
  263. return TRUE;
  264. }
  265. static int
  266. rspamd_fuzzy_backend_sqlite_cleanup_stmt (struct rspamd_fuzzy_backend_sqlite *backend,
  267. int idx)
  268. {
  269. sqlite3_stmt *stmt;
  270. if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
  271. return -1;
  272. }
  273. msg_debug_fuzzy_backend ("resetting `%s`", prepared_stmts[idx].sql);
  274. stmt = prepared_stmts[idx].stmt;
  275. sqlite3_clear_bindings (stmt);
  276. sqlite3_reset (stmt);
  277. return SQLITE_OK;
  278. }
  279. static int
  280. rspamd_fuzzy_backend_sqlite_run_stmt (struct rspamd_fuzzy_backend_sqlite *backend,
  281. gboolean auto_cleanup,
  282. int idx, ...)
  283. {
  284. int retcode;
  285. va_list ap;
  286. sqlite3_stmt *stmt;
  287. int i;
  288. const char *argtypes;
  289. guint retries = 0;
  290. struct timespec ts;
  291. if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
  292. return -1;
  293. }
  294. stmt = prepared_stmts[idx].stmt;
  295. g_assert ((int)prepared_stmts[idx].idx == idx);
  296. if (stmt == NULL) {
  297. if ((retcode = sqlite3_prepare_v2 (backend->db, prepared_stmts[idx].sql, -1,
  298. &prepared_stmts[idx].stmt, NULL)) != SQLITE_OK) {
  299. msg_err_fuzzy_backend ("Cannot initialize prepared sql `%s`: %s",
  300. prepared_stmts[idx].sql, sqlite3_errmsg (backend->db));
  301. return retcode;
  302. }
  303. stmt = prepared_stmts[idx].stmt;
  304. }
  305. msg_debug_fuzzy_backend ("executing `%s` %s auto cleanup",
  306. prepared_stmts[idx].sql, auto_cleanup ? "with" : "without");
  307. argtypes = prepared_stmts[idx].args;
  308. sqlite3_clear_bindings (stmt);
  309. sqlite3_reset (stmt);
  310. va_start (ap, idx);
  311. for (i = 0; argtypes[i] != '\0'; i++) {
  312. switch (argtypes[i]) {
  313. case 'T':
  314. sqlite3_bind_text (stmt, i + 1, va_arg (ap, const char*), -1,
  315. SQLITE_STATIC);
  316. break;
  317. case 'I':
  318. sqlite3_bind_int64 (stmt, i + 1, va_arg (ap, gint64));
  319. break;
  320. case 'S':
  321. sqlite3_bind_int (stmt, i + 1, va_arg (ap, gint));
  322. break;
  323. case 'D':
  324. /* Special case for digests variable */
  325. sqlite3_bind_text (stmt, i + 1, va_arg (ap, const char*), 64,
  326. SQLITE_STATIC);
  327. break;
  328. }
  329. }
  330. va_end (ap);
  331. retry:
  332. retcode = sqlite3_step (stmt);
  333. if (retcode == prepared_stmts[idx].result) {
  334. retcode = SQLITE_OK;
  335. }
  336. else {
  337. if ((retcode == SQLITE_BUSY ||
  338. retcode == SQLITE_LOCKED) && retries++ < max_retries) {
  339. double_to_ts (sql_sleep_time, &ts);
  340. nanosleep (&ts, NULL);
  341. goto retry;
  342. }
  343. msg_debug_fuzzy_backend ("failed to execute query %s: %d, %s", prepared_stmts[idx].sql,
  344. retcode, sqlite3_errmsg (backend->db));
  345. }
  346. if (auto_cleanup) {
  347. sqlite3_clear_bindings (stmt);
  348. sqlite3_reset (stmt);
  349. }
  350. return retcode;
  351. }
  352. static void
  353. rspamd_fuzzy_backend_sqlite_close_stmts (struct rspamd_fuzzy_backend_sqlite *bk)
  354. {
  355. int i;
  356. for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i++) {
  357. if (prepared_stmts[i].stmt != NULL) {
  358. sqlite3_finalize (prepared_stmts[i].stmt);
  359. prepared_stmts[i].stmt = NULL;
  360. }
  361. }
  362. return;
  363. }
  364. static gboolean
  365. rspamd_fuzzy_backend_sqlite_run_sql (const gchar *sql, struct rspamd_fuzzy_backend_sqlite *bk,
  366. GError **err)
  367. {
  368. guint retries = 0;
  369. struct timespec ts;
  370. gint ret;
  371. do {
  372. ret = sqlite3_exec (bk->db, sql, NULL, NULL, NULL);
  373. double_to_ts (sql_sleep_time, &ts);
  374. } while (ret == SQLITE_BUSY && retries++ < max_retries &&
  375. nanosleep (&ts, NULL) == 0);
  376. if (ret != SQLITE_OK) {
  377. g_set_error (err, rspamd_fuzzy_backend_sqlite_quark (),
  378. -1, "Cannot execute raw sql `%s`: %s",
  379. sql, sqlite3_errmsg (bk->db));
  380. return FALSE;
  381. }
  382. return TRUE;
  383. }
  384. static struct rspamd_fuzzy_backend_sqlite *
  385. rspamd_fuzzy_backend_sqlite_open_db (const gchar *path, GError **err)
  386. {
  387. struct rspamd_fuzzy_backend_sqlite *bk;
  388. rspamd_cryptobox_hash_state_t st;
  389. guchar hash_out[rspamd_cryptobox_HASHBYTES];
  390. g_assert (path != NULL);
  391. bk = g_malloc0 (sizeof (*bk));
  392. bk->path = g_strdup (path);
  393. bk->expired = 0;
  394. bk->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_backend");
  395. bk->db = rspamd_sqlite3_open_or_create (bk->pool, bk->path,
  396. create_tables_sql, 1, err);
  397. if (bk->db == NULL) {
  398. rspamd_fuzzy_backend_sqlite_close (bk);
  399. return NULL;
  400. }
  401. if (!rspamd_fuzzy_backend_sqlite_prepare_stmts (bk, err)) {
  402. rspamd_fuzzy_backend_sqlite_close (bk);
  403. return NULL;
  404. }
  405. /* Set id for the backend */
  406. rspamd_cryptobox_hash_init (&st, NULL, 0);
  407. rspamd_cryptobox_hash_update (&st, path, strlen (path));
  408. rspamd_cryptobox_hash_final (&st, hash_out);
  409. rspamd_snprintf (bk->id, sizeof (bk->id), "%xs", hash_out);
  410. memcpy (bk->pool->tag.uid, bk->id, sizeof (bk->pool->tag.uid));
  411. return bk;
  412. }
  413. struct rspamd_fuzzy_backend_sqlite *
  414. rspamd_fuzzy_backend_sqlite_open (const gchar *path,
  415. gboolean vacuum,
  416. GError **err)
  417. {
  418. struct rspamd_fuzzy_backend_sqlite *backend;
  419. if (path == NULL) {
  420. g_set_error (err, rspamd_fuzzy_backend_sqlite_quark (),
  421. ENOENT, "Path has not been specified");
  422. return NULL;
  423. }
  424. /* Open database */
  425. if ((backend = rspamd_fuzzy_backend_sqlite_open_db (path, err)) == NULL) {
  426. return NULL;
  427. }
  428. if (rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE, RSPAMD_FUZZY_BACKEND_COUNT)
  429. == SQLITE_OK) {
  430. backend->count = sqlite3_column_int64 (
  431. prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
  432. }
  433. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_COUNT);
  434. return backend;
  435. }
  436. static gint
  437. rspamd_fuzzy_backend_sqlite_int64_cmp (const void *a, const void *b)
  438. {
  439. gint64 ia = *(gint64 *)a, ib = *(gint64 *)b;
  440. return (ia - ib);
  441. }
  442. struct rspamd_fuzzy_reply
  443. rspamd_fuzzy_backend_sqlite_check (struct rspamd_fuzzy_backend_sqlite *backend,
  444. const struct rspamd_fuzzy_cmd *cmd, gint64 expire)
  445. {
  446. struct rspamd_fuzzy_reply rep;
  447. const struct rspamd_fuzzy_shingle_cmd *shcmd;
  448. int rc;
  449. gint64 timestamp;
  450. gint64 shingle_values[RSPAMD_SHINGLE_SIZE], i, sel_id, cur_id,
  451. cur_cnt, max_cnt;
  452. memset (&rep, 0, sizeof (rep));
  453. memcpy (rep.digest, cmd->digest, sizeof (rep.digest));
  454. if (backend == NULL) {
  455. return rep;
  456. }
  457. /* Try direct match first of all */
  458. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  459. RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
  460. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  461. RSPAMD_FUZZY_BACKEND_CHECK,
  462. cmd->digest);
  463. if (rc == SQLITE_OK) {
  464. timestamp = sqlite3_column_int64 (
  465. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 1);
  466. if (time (NULL) - timestamp > expire) {
  467. /* Expire element */
  468. msg_debug_fuzzy_backend ("requested hash has been expired");
  469. }
  470. else {
  471. rep.v1.value = sqlite3_column_int64 (
  472. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 0);
  473. rep.v1.prob = 1.0;
  474. rep.v1.flag = sqlite3_column_int (
  475. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 2);
  476. }
  477. }
  478. else if (cmd->shingles_count > 0) {
  479. /* Fuzzy match */
  480. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
  481. shcmd = (const struct rspamd_fuzzy_shingle_cmd *)cmd;
  482. for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  483. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  484. RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
  485. shcmd->sgl.hashes[i], i);
  486. if (rc == SQLITE_OK) {
  487. shingle_values[i] = sqlite3_column_int64 (
  488. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE].stmt,
  489. 0);
  490. }
  491. else {
  492. shingle_values[i] = -1;
  493. }
  494. msg_debug_fuzzy_backend ("looking for shingle %L -> %L: %d", i,
  495. shcmd->sgl.hashes[i], rc);
  496. }
  497. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend,
  498. RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE);
  499. qsort (shingle_values, RSPAMD_SHINGLE_SIZE, sizeof (gint64),
  500. rspamd_fuzzy_backend_sqlite_int64_cmp);
  501. sel_id = -1;
  502. cur_id = -1;
  503. cur_cnt = 0;
  504. max_cnt = 0;
  505. for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  506. if (shingle_values[i] == -1) {
  507. continue;
  508. }
  509. /* We have some value here, so we need to check it */
  510. if (shingle_values[i] == cur_id) {
  511. cur_cnt ++;
  512. }
  513. else {
  514. cur_id = shingle_values[i];
  515. if (cur_cnt >= max_cnt) {
  516. max_cnt = cur_cnt;
  517. sel_id = cur_id;
  518. }
  519. cur_cnt = 0;
  520. }
  521. }
  522. if (cur_cnt > max_cnt) {
  523. max_cnt = cur_cnt;
  524. }
  525. if (sel_id != -1) {
  526. /* We have some id selected here */
  527. rep.v1.prob = (float)max_cnt / (float)RSPAMD_SHINGLE_SIZE;
  528. if (rep.v1.prob > 0.5) {
  529. msg_debug_fuzzy_backend (
  530. "found fuzzy hash with probability %.2f",
  531. rep.v1.prob);
  532. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  533. RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID, sel_id);
  534. if (rc == SQLITE_OK) {
  535. timestamp = sqlite3_column_int64 (
  536. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
  537. 2);
  538. if (time (NULL) - timestamp > expire) {
  539. /* Expire element */
  540. msg_debug_fuzzy_backend (
  541. "requested hash has been expired");
  542. rep.v1.prob = 0.0;
  543. }
  544. else {
  545. rep.ts = timestamp;
  546. memcpy (rep.digest, sqlite3_column_blob (
  547. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
  548. 0), sizeof (rep.digest));
  549. rep.v1.value = sqlite3_column_int64 (
  550. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
  551. 1);
  552. rep.v1.flag = sqlite3_column_int (
  553. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
  554. 3);
  555. }
  556. }
  557. }
  558. else {
  559. /* Otherwise we assume that as error */
  560. rep.v1.value = 0;
  561. }
  562. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend,
  563. RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID);
  564. }
  565. }
  566. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
  567. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  568. RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
  569. return rep;
  570. }
  571. gboolean
  572. rspamd_fuzzy_backend_sqlite_prepare_update (struct rspamd_fuzzy_backend_sqlite *backend,
  573. const gchar *source)
  574. {
  575. gint rc;
  576. if (backend == NULL) {
  577. return FALSE;
  578. }
  579. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  580. RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
  581. if (rc != SQLITE_OK) {
  582. msg_warn_fuzzy_backend ("cannot start transaction for updates: %s",
  583. sqlite3_errmsg (backend->db));
  584. return FALSE;
  585. }
  586. return TRUE;
  587. }
  588. gboolean
  589. rspamd_fuzzy_backend_sqlite_add (struct rspamd_fuzzy_backend_sqlite *backend,
  590. const struct rspamd_fuzzy_cmd *cmd)
  591. {
  592. int rc, i;
  593. gint64 id, flag;
  594. const struct rspamd_fuzzy_shingle_cmd *shcmd;
  595. if (backend == NULL) {
  596. return FALSE;
  597. }
  598. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  599. RSPAMD_FUZZY_BACKEND_CHECK,
  600. cmd->digest);
  601. if (rc == SQLITE_OK) {
  602. /* Check flag */
  603. flag = sqlite3_column_int64 (
  604. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt,
  605. 2);
  606. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
  607. if (flag == cmd->flag) {
  608. /* We need to increase weight */
  609. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  610. RSPAMD_FUZZY_BACKEND_UPDATE,
  611. (gint64) cmd->value,
  612. cmd->digest);
  613. if (rc != SQLITE_OK) {
  614. msg_warn_fuzzy_backend ("cannot update hash to %d -> "
  615. "%*xs: %s", (gint) cmd->flag,
  616. (gint) sizeof (cmd->digest), cmd->digest,
  617. sqlite3_errmsg (backend->db));
  618. }
  619. }
  620. else {
  621. /* We need to relearn actually */
  622. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  623. RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
  624. (gint64) cmd->value,
  625. (gint64) cmd->flag,
  626. cmd->digest);
  627. if (rc != SQLITE_OK) {
  628. msg_warn_fuzzy_backend ("cannot update hash to %d -> "
  629. "%*xs: %s", (gint) cmd->flag,
  630. (gint) sizeof (cmd->digest), cmd->digest,
  631. sqlite3_errmsg (backend->db));
  632. }
  633. }
  634. }
  635. else {
  636. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
  637. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  638. RSPAMD_FUZZY_BACKEND_INSERT,
  639. (gint) cmd->flag,
  640. cmd->digest,
  641. (gint64) cmd->value);
  642. if (rc == SQLITE_OK) {
  643. if (cmd->shingles_count > 0) {
  644. id = sqlite3_last_insert_rowid (backend->db);
  645. shcmd = (const struct rspamd_fuzzy_shingle_cmd *) cmd;
  646. for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
  647. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  648. RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
  649. shcmd->sgl.hashes[i], (gint64)i, id);
  650. msg_debug_fuzzy_backend ("add shingle %d -> %L: %L",
  651. i,
  652. shcmd->sgl.hashes[i],
  653. id);
  654. if (rc != SQLITE_OK) {
  655. msg_warn_fuzzy_backend ("cannot add shingle %d -> "
  656. "%L: %L: %s", i,
  657. shcmd->sgl.hashes[i],
  658. id, sqlite3_errmsg (backend->db));
  659. }
  660. }
  661. }
  662. }
  663. else {
  664. msg_warn_fuzzy_backend ("cannot add hash to %d -> "
  665. "%*xs: %s", (gint)cmd->flag,
  666. (gint)sizeof (cmd->digest), cmd->digest,
  667. sqlite3_errmsg (backend->db));
  668. }
  669. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend,
  670. RSPAMD_FUZZY_BACKEND_INSERT);
  671. }
  672. return (rc == SQLITE_OK);
  673. }
  674. gboolean
  675. rspamd_fuzzy_backend_sqlite_finish_update (struct rspamd_fuzzy_backend_sqlite *backend,
  676. const gchar *source, gboolean version_bump)
  677. {
  678. gint rc = SQLITE_OK, wal_frames, wal_checkpointed, ver;
  679. /* Get and update version */
  680. if (version_bump) {
  681. ver = rspamd_fuzzy_backend_sqlite_version (backend, source);
  682. ++ver;
  683. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  684. RSPAMD_FUZZY_BACKEND_SET_VERSION,
  685. (gint64)ver, (gint64)time (NULL), source);
  686. }
  687. if (rc == SQLITE_OK) {
  688. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  689. RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
  690. if (rc != SQLITE_OK) {
  691. msg_warn_fuzzy_backend ("cannot commit updates: %s",
  692. sqlite3_errmsg (backend->db));
  693. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  694. RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
  695. return FALSE;
  696. }
  697. else {
  698. if (!rspamd_sqlite3_sync (backend->db, &wal_frames, &wal_checkpointed)) {
  699. msg_warn_fuzzy_backend ("cannot commit checkpoint: %s",
  700. sqlite3_errmsg (backend->db));
  701. }
  702. else if (wal_checkpointed > 0) {
  703. msg_info_fuzzy_backend ("total number of frames in the wal file: "
  704. "%d, checkpointed: %d", wal_frames, wal_checkpointed);
  705. }
  706. }
  707. }
  708. else {
  709. msg_warn_fuzzy_backend ("cannot update version for %s: %s", source,
  710. sqlite3_errmsg (backend->db));
  711. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  712. RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
  713. return FALSE;
  714. }
  715. return TRUE;
  716. }
  717. gboolean
  718. rspamd_fuzzy_backend_sqlite_del (struct rspamd_fuzzy_backend_sqlite *backend,
  719. const struct rspamd_fuzzy_cmd *cmd)
  720. {
  721. int rc = -1;
  722. if (backend == NULL) {
  723. return FALSE;
  724. }
  725. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  726. RSPAMD_FUZZY_BACKEND_CHECK,
  727. cmd->digest);
  728. if (rc == SQLITE_OK) {
  729. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
  730. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  731. RSPAMD_FUZZY_BACKEND_DELETE,
  732. cmd->digest);
  733. if (rc != SQLITE_OK) {
  734. msg_warn_fuzzy_backend ("cannot update hash to %d -> "
  735. "%*xs: %s", (gint) cmd->flag,
  736. (gint) sizeof (cmd->digest), cmd->digest,
  737. sqlite3_errmsg (backend->db));
  738. }
  739. }
  740. else {
  741. /* Hash is missing */
  742. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
  743. }
  744. return (rc == SQLITE_OK);
  745. }
  746. gboolean
  747. rspamd_fuzzy_backend_sqlite_sync (struct rspamd_fuzzy_backend_sqlite *backend,
  748. gint64 expire,
  749. gboolean clean_orphaned)
  750. {
  751. struct orphaned_shingle_elt {
  752. gint64 value;
  753. gint64 number;
  754. };
  755. /* Do not do more than 5k ops per step */
  756. const guint64 max_changes = 5000;
  757. gboolean ret = FALSE;
  758. gint64 expire_lim, expired;
  759. gint rc, i, orphaned_cnt = 0;
  760. GError *err = NULL;
  761. static const gchar orphaned_shingles[] = "SELECT shingles.value,shingles.number "
  762. "FROM shingles "
  763. "LEFT JOIN digests ON "
  764. "shingles.digest_id=digests.id WHERE "
  765. "digests.id IS NULL;";
  766. sqlite3_stmt *stmt;
  767. GArray *orphaned;
  768. struct orphaned_shingle_elt orphaned_elt, *pelt;
  769. if (backend == NULL) {
  770. return FALSE;
  771. }
  772. /* Perform expire */
  773. if (expire > 0) {
  774. expire_lim = time (NULL) - expire;
  775. if (expire_lim > 0) {
  776. ret = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  777. RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
  778. if (ret == SQLITE_OK) {
  779. rc = rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  780. RSPAMD_FUZZY_BACKEND_EXPIRE, expire_lim, max_changes);
  781. if (rc == SQLITE_OK) {
  782. expired = sqlite3_changes (backend->db);
  783. if (expired > 0) {
  784. backend->expired += expired;
  785. msg_info_fuzzy_backend ("expired %L hashes", expired);
  786. }
  787. }
  788. else {
  789. msg_warn_fuzzy_backend (
  790. "cannot execute expired statement: %s",
  791. sqlite3_errmsg (backend->db));
  792. }
  793. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend,
  794. RSPAMD_FUZZY_BACKEND_EXPIRE);
  795. ret = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  796. RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
  797. if (ret != SQLITE_OK) {
  798. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  799. RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
  800. }
  801. }
  802. if (ret != SQLITE_OK) {
  803. msg_warn_fuzzy_backend ("cannot expire db: %s",
  804. sqlite3_errmsg (backend->db));
  805. }
  806. }
  807. }
  808. /* Cleanup database */
  809. if (clean_orphaned) {
  810. ret = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  811. RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
  812. if (ret == SQLITE_OK) {
  813. if ((rc = sqlite3_prepare_v2 (backend->db,
  814. orphaned_shingles,
  815. -1,
  816. &stmt,
  817. NULL)) != SQLITE_OK) {
  818. msg_warn_fuzzy_backend ("cannot cleanup shingles: %s",
  819. sqlite3_errmsg (backend->db));
  820. }
  821. else {
  822. orphaned = g_array_new (FALSE,
  823. FALSE,
  824. sizeof (struct orphaned_shingle_elt));
  825. while (sqlite3_step (stmt) == SQLITE_ROW) {
  826. orphaned_elt.value = sqlite3_column_int64 (stmt, 0);
  827. orphaned_elt.number = sqlite3_column_int64 (stmt, 1);
  828. g_array_append_val (orphaned, orphaned_elt);
  829. if (orphaned->len > max_changes) {
  830. break;
  831. }
  832. }
  833. sqlite3_finalize (stmt);
  834. orphaned_cnt = orphaned->len;
  835. if (orphaned_cnt > 0) {
  836. msg_info_fuzzy_backend (
  837. "going to delete %ud orphaned shingles",
  838. orphaned_cnt);
  839. /* Need to delete orphaned elements */
  840. for (i = 0; i < (gint) orphaned_cnt; i++) {
  841. pelt = &g_array_index (orphaned,
  842. struct orphaned_shingle_elt,
  843. i);
  844. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  845. RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
  846. pelt->value, pelt->number);
  847. }
  848. }
  849. g_array_free (orphaned, TRUE);
  850. }
  851. ret = rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  852. RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
  853. if (ret == SQLITE_OK) {
  854. msg_info_fuzzy_backend (
  855. "deleted %ud orphaned shingles",
  856. orphaned_cnt);
  857. }
  858. else {
  859. msg_warn_fuzzy_backend (
  860. "cannot synchronize fuzzy backend: %e",
  861. err);
  862. rspamd_fuzzy_backend_sqlite_run_stmt (backend, TRUE,
  863. RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
  864. }
  865. }
  866. }
  867. return ret;
  868. }
  869. void
  870. rspamd_fuzzy_backend_sqlite_close (struct rspamd_fuzzy_backend_sqlite *backend)
  871. {
  872. if (backend != NULL) {
  873. if (backend->db != NULL) {
  874. rspamd_fuzzy_backend_sqlite_close_stmts (backend);
  875. sqlite3_close (backend->db);
  876. }
  877. if (backend->path != NULL) {
  878. g_free (backend->path);
  879. }
  880. if (backend->pool) {
  881. rspamd_mempool_delete (backend->pool);
  882. }
  883. g_free (backend);
  884. }
  885. }
  886. gsize
  887. rspamd_fuzzy_backend_sqlite_count (struct rspamd_fuzzy_backend_sqlite *backend)
  888. {
  889. if (backend) {
  890. if (rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  891. RSPAMD_FUZZY_BACKEND_COUNT) == SQLITE_OK) {
  892. backend->count = sqlite3_column_int64 (
  893. prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
  894. }
  895. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_COUNT);
  896. return backend->count;
  897. }
  898. return 0;
  899. }
  900. gint
  901. rspamd_fuzzy_backend_sqlite_version (struct rspamd_fuzzy_backend_sqlite *backend,
  902. const gchar *source)
  903. {
  904. gint ret = 0;
  905. if (backend) {
  906. if (rspamd_fuzzy_backend_sqlite_run_stmt (backend, FALSE,
  907. RSPAMD_FUZZY_BACKEND_VERSION, source) == SQLITE_OK) {
  908. ret = sqlite3_column_int64 (
  909. prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0);
  910. }
  911. rspamd_fuzzy_backend_sqlite_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_VERSION);
  912. }
  913. return ret;
  914. }
  915. gsize
  916. rspamd_fuzzy_backend_sqlite_expired (struct rspamd_fuzzy_backend_sqlite *backend)
  917. {
  918. return backend != NULL ? backend->expired : 0;
  919. }
  920. const gchar *
  921. rspamd_fuzzy_sqlite_backend_id (struct rspamd_fuzzy_backend_sqlite *backend)
  922. {
  923. return backend != NULL ? backend->id : 0;
  924. }