Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

fuzzy_backend.c 19KB


  1. /* Copyright (c) 2014, Vsevolod Stakhov
  2. * All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. * * Redistributions of source code must retain the above copyright
  7. * notice, this list of conditions and the following disclaimer.
  8. * * Redistributions in binary form must reproduce the above copyright
  9. * notice, this list of conditions and the following disclaimer in the
  10. * documentation and/or other materials provided with the distribution.
  11. *
  12. * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15. * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. */
  23. #include "config.h"
  24. #include "main.h"
  25. #include "fuzzy_backend.h"
  26. #include "fuzzy_storage.h"
  27. #include <sqlite3.h>
  28. /* Magic sequence for hashes file */
  29. #define FUZZY_FILE_MAGIC "rsh"
  30. struct rspamd_legacy_fuzzy_node {
  31. gint32 value;
  32. gint32 flag;
  33. guint64 time;
  34. rspamd_fuzzy_t h;
  35. };
  36. struct rspamd_fuzzy_backend {
  37. sqlite3 *db;
  38. char *path;
  39. gsize count;
  40. gsize expired;
  41. };
  42. const char *create_tables_sql =
  43. "BEGIN;"
  44. "CREATE TABLE digests("
  45. "id INTEGER PRIMARY KEY,"
  46. "flag INTEGER NOT NULL,"
  47. "digest TEXT NOT NULL,"
  48. "value INTEGER,"
  49. "time INTEGER);"
  50. "CREATE TABLE shingles("
  51. "value INTEGER NOT NULL,"
  52. "number INTEGER NOT NULL,"
  53. "digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE "
  54. "ON UPDATE CASCADE);"
  55. "COMMIT;";
  56. const char *create_index_sql =
  57. "BEGIN;"
  58. "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
  59. "CREATE INDEX IF NOT EXISTS t ON digests(time);"
  60. "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
  61. "COMMIT;";
  62. enum rspamd_fuzzy_statement_idx {
  63. RSPAMD_FUZZY_BACKEND_TRANSACTION_START = 0,
  64. RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
  65. RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
  66. RSPAMD_FUZZY_BACKEND_INSERT,
  67. RSPAMD_FUZZY_BACKEND_UPDATE,
  68. RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
  69. RSPAMD_FUZZY_BACKEND_CHECK,
  70. RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
  71. RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
  72. RSPAMD_FUZZY_BACKEND_DELETE,
  73. RSPAMD_FUZZY_BACKEND_COUNT,
  74. RSPAMD_FUZZY_BACKEND_EXPIRE,
  75. RSPAMD_FUZZY_BACKEND_VACUUM,
  76. RSPAMD_FUZZY_BACKEND_MAX
  77. };
  78. static struct rspamd_fuzzy_stmts {
  79. enum rspamd_fuzzy_statement_idx idx;
  80. const gchar *sql;
  81. const gchar *args;
  82. sqlite3_stmt *stmt;
  83. gint result;
  84. } prepared_stmts[RSPAMD_FUZZY_BACKEND_MAX] =
  85. {
  86. {
  87. .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
  88. .sql = "BEGIN TRANSACTION;",
  89. .args = "",
  90. .stmt = NULL,
  91. .result = SQLITE_DONE
  92. },
  93. {
  94. .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
  95. .sql = "COMMIT;",
  96. .args = "",
  97. .stmt = NULL,
  98. .result = SQLITE_DONE
  99. },
  100. {
  101. .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
  102. .sql = "ROLLBACK;",
  103. .args = "",
  104. .stmt = NULL,
  105. .result = SQLITE_DONE
  106. },
  107. {
  108. .idx = RSPAMD_FUZZY_BACKEND_INSERT,
  109. .sql = "INSERT INTO digests(flag, digest, value, time) VALUES"
  110. "(?1, ?2, ?3, ?4);",
  111. .args = "SDII",
  112. .stmt = NULL,
  113. .result = SQLITE_DONE
  114. },
  115. {
  116. .idx = RSPAMD_FUZZY_BACKEND_UPDATE,
  117. .sql = "UPDATE digests SET value = value + ?1 WHERE "
  118. "digest==?2;",
  119. .args = "ID",
  120. .stmt = NULL,
  121. .result = SQLITE_DONE
  122. },
  123. {
  124. .idx = RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
  125. .sql = "INSERT OR REPLACE INTO shingles(value, number, digest_id) "
  126. "VALUES (?1, ?2, ?3);",
  127. .args = "III",
  128. .stmt = NULL,
  129. .result = SQLITE_DONE
  130. },
  131. {
  132. .idx = RSPAMD_FUZZY_BACKEND_CHECK,
  133. .sql = "SELECT value, time, flag FROM digests WHERE digest==?1;",
  134. .args = "D",
  135. .stmt = NULL,
  136. .result = SQLITE_ROW
  137. },
  138. {
  139. .idx = RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
  140. .sql = "SELECT digest_id FROM shingles WHERE value=?1 AND number=?2",
  141. .args = "IS",
  142. .stmt = NULL,
  143. .result = SQLITE_ROW
  144. },
  145. {
  146. .idx = RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
  147. .sql = "SELECT digest, value, time, flag FROM digests WHERE id=?1",
  148. .args = "I",
  149. .stmt = NULL,
  150. .result = SQLITE_ROW
  151. },
  152. {
  153. .idx = RSPAMD_FUZZY_BACKEND_DELETE,
  154. .sql = "DELETE FROM digests WHERE digest==?1;",
  155. .args = "D",
  156. .stmt = NULL,
  157. .result = SQLITE_DONE
  158. },
  159. {
  160. .idx = RSPAMD_FUZZY_BACKEND_COUNT,
  161. .sql = "SELECT COUNT(*) FROM digests;",
  162. .args = "",
  163. .stmt = NULL,
  164. .result = SQLITE_ROW
  165. },
  166. {
  167. .idx = RSPAMD_FUZZY_BACKEND_EXPIRE,
  168. .sql = "DELETE FROM digests WHERE time < ?1;",
  169. .args = "I",
  170. .stmt = NULL,
  171. .result = SQLITE_DONE
  172. },
  173. {
  174. .idx = RSPAMD_FUZZY_BACKEND_VACUUM,
  175. .sql = "VACUUM;",
  176. .args = "",
  177. .stmt = NULL,
  178. .result = SQLITE_DONE
  179. }
  180. };
  181. static GQuark
  182. rspamd_fuzzy_backend_quark(void)
  183. {
  184. return g_quark_from_static_string ("fuzzy-storage-backend");
  185. }
  186. static gboolean
  187. rspamd_fuzzy_backend_prepare_stmts (struct rspamd_fuzzy_backend *bk, GError **err)
  188. {
  189. int i;
  190. for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i ++) {
  191. if (prepared_stmts[i].stmt != NULL) {
  192. /* Skip already prepared statements */
  193. continue;
  194. }
  195. if (sqlite3_prepare_v2 (bk->db, prepared_stmts[i].sql, -1,
  196. &prepared_stmts[i].stmt, NULL) != SQLITE_OK) {
  197. g_set_error (err, rspamd_fuzzy_backend_quark (),
  198. -1, "Cannot initialize prepared sql `%s`: %s",
  199. prepared_stmts[i].sql, sqlite3_errmsg (bk->db));
  200. return FALSE;
  201. }
  202. }
  203. return TRUE;
  204. }
  205. static int
  206. rspamd_fuzzy_backend_run_stmt (struct rspamd_fuzzy_backend *bk, int idx, ...)
  207. {
  208. int retcode;
  209. va_list ap;
  210. sqlite3_stmt *stmt;
  211. int i;
  212. const char *argtypes;
  213. if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
  214. return -1;
  215. }
  216. stmt = prepared_stmts[idx].stmt;
  217. if (stmt == NULL) {
  218. if ((retcode = sqlite3_prepare_v2 (bk->db, prepared_stmts[idx].sql, -1,
  219. &prepared_stmts[idx].stmt, NULL)) != SQLITE_OK) {
  220. msg_err ("Cannot initialize prepared sql `%s`: %s",
  221. prepared_stmts[idx].sql, sqlite3_errmsg (bk->db));
  222. return retcode;
  223. }
  224. stmt = prepared_stmts[idx].stmt;
  225. }
  226. msg_debug ("executing `%s`", prepared_stmts[idx].sql);
  227. argtypes = prepared_stmts[idx].args;
  228. sqlite3_reset (stmt);
  229. va_start (ap, idx);
  230. for (i = 0; argtypes[i] != '\0'; i++) {
  231. switch (argtypes[i]) {
  232. case 'T':
  233. sqlite3_bind_text (stmt, i + 1, va_arg (ap, const char*), -1,
  234. SQLITE_STATIC);
  235. break;
  236. case 'I':
  237. sqlite3_bind_int64 (stmt, i + 1, va_arg (ap, gint64));
  238. break;
  239. case 'S':
  240. sqlite3_bind_int (stmt, i + 1, va_arg (ap, gint));
  241. break;
  242. case 'D':
  243. /* Special case for digests variable */
  244. sqlite3_bind_text (stmt, i + 1, va_arg (ap, const char*), 64,
  245. SQLITE_STATIC);
  246. break;
  247. }
  248. }
  249. va_end (ap);
  250. retcode = sqlite3_step (stmt);
  251. if (retcode == prepared_stmts[idx].result) {
  252. return SQLITE_OK;
  253. }
  254. else if (retcode != SQLITE_DONE) {
  255. msg_debug ("failed to execute query %s: %d, %s", prepared_stmts[idx].sql,
  256. retcode, sqlite3_errmsg (bk->db));
  257. }
  258. return retcode;
  259. }
  260. static void
  261. rspamd_fuzzy_backend_close_stmts (struct rspamd_fuzzy_backend *bk)
  262. {
  263. int i;
  264. for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i++) {
  265. if (prepared_stmts[i].stmt != NULL) {
  266. sqlite3_finalize (prepared_stmts[i].stmt);
  267. prepared_stmts[i].stmt = NULL;
  268. }
  269. }
  270. return;
  271. }
  272. static gboolean
  273. rspamd_fuzzy_backend_run_simple (int idx, struct rspamd_fuzzy_backend *bk,
  274. GError **err)
  275. {
  276. if (rspamd_fuzzy_backend_run_stmt (bk, idx) != SQLITE_OK) {
  277. g_set_error (err, rspamd_fuzzy_backend_quark (),
  278. -1, "Cannot execute sql `%s`: %s",
  279. prepared_stmts[idx].sql,
  280. sqlite3_errmsg (bk->db));
  281. return FALSE;
  282. }
  283. return TRUE;
  284. }
  285. static gboolean
  286. rspamd_fuzzy_backend_run_sql (const gchar *sql, struct rspamd_fuzzy_backend *bk,
  287. GError **err)
  288. {
  289. if (sqlite3_exec (bk->db, sql, NULL, NULL, NULL) != SQLITE_OK) {
  290. g_set_error (err, rspamd_fuzzy_backend_quark (),
  291. -1, "Cannot execute raw sql `%s`: %s",
  292. sql, sqlite3_errmsg (bk->db));
  293. return FALSE;
  294. }
  295. return TRUE;
  296. }
  297. static struct rspamd_fuzzy_backend *
  298. rspamd_fuzzy_backend_create_db (const gchar *path, gboolean add_index,
  299. GError **err)
  300. {
  301. struct rspamd_fuzzy_backend *bk;
  302. sqlite3 *sqlite;
  303. int rc;
  304. if ((rc = sqlite3_open_v2 (path, &sqlite,
  305. SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_NOMUTEX, NULL))
  306. != SQLITE_OK) {
  307. g_set_error (err, rspamd_fuzzy_backend_quark (),
  308. rc, "Cannot open sqlite db %s: %d",
  309. path, rc);
  310. return NULL;
  311. }
  312. bk = g_slice_alloc (sizeof (*bk));
  313. bk->path = g_strdup (path);
  314. bk->db = sqlite;
  315. bk->expired = 0;
  316. bk->count = 0;
  317. /*
  318. * Here we need to run create prior to preparing other statements
  319. */
  320. if (!rspamd_fuzzy_backend_run_sql (create_tables_sql, bk, err)) {
  321. rspamd_fuzzy_backend_close (bk);
  322. return NULL;
  323. }
  324. if (!rspamd_fuzzy_backend_prepare_stmts (bk, err)) {
  325. rspamd_fuzzy_backend_close (bk);
  326. return NULL;
  327. }
  328. if (add_index) {
  329. rspamd_fuzzy_backend_run_sql (create_index_sql, bk, NULL);
  330. }
  331. rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
  332. bk, NULL);
  333. return bk;
  334. }
  335. static struct rspamd_fuzzy_backend *
  336. rspamd_fuzzy_backend_open_db (const gchar *path, GError **err)
  337. {
  338. struct rspamd_fuzzy_backend *bk;
  339. sqlite3 *sqlite;
  340. int rc;
  341. if ((rc = sqlite3_open_v2 (path, &sqlite,
  342. SQLITE_OPEN_READWRITE|SQLITE_OPEN_NOMUTEX, NULL)) != SQLITE_OK) {
  343. g_set_error (err, rspamd_fuzzy_backend_quark (),
  344. rc, "Cannot open sqlite db %s: %d",
  345. path, rc);
  346. return NULL;
  347. }
  348. bk = g_slice_alloc (sizeof (*bk));
  349. bk->path = g_strdup (path);
  350. bk = g_slice_alloc (sizeof (*bk));
  351. bk->db = sqlite;
  352. bk->expired = 0;
  353. /* Cleanup database */
  354. rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_VACUUM, bk, NULL);
  355. if (rspamd_fuzzy_backend_run_stmt (bk, RSPAMD_FUZZY_BACKEND_COUNT)
  356. == SQLITE_OK) {
  357. bk->count = sqlite3_column_int64 (
  358. prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
  359. }
  360. rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
  361. bk, NULL);
  362. return bk;
  363. }
  364. /*
  365. * Convert old database to the new format
  366. */
  367. static gboolean
  368. rspamd_fuzzy_backend_convert (const gchar *path, int fd, GError **err)
  369. {
  370. gchar tmpdb[PATH_MAX];
  371. struct rspamd_fuzzy_backend *nbackend;
  372. struct stat st;
  373. gint off;
  374. guint8 *map, *p, *end;
  375. struct rspamd_legacy_fuzzy_node *n;
  376. rspamd_snprintf (tmpdb, sizeof (tmpdb), "%s.converted", path);
  377. (void)unlink (tmpdb);
  378. nbackend = rspamd_fuzzy_backend_create_db (tmpdb, FALSE, err);
  379. if (nbackend == NULL) {
  380. return FALSE;
  381. }
  382. (void)fstat (fd, &st);
  383. (void)lseek (fd, 0, SEEK_SET);
  384. off = sizeof (FUZZY_FILE_MAGIC);
  385. if ((map = mmap (NULL, st.st_size - off, PROT_READ, MAP_SHARED, fd,
  386. 0)) == MAP_FAILED) {
  387. g_set_error (err, rspamd_fuzzy_backend_quark (),
  388. errno, "Cannot mmap file %s: %s",
  389. path, strerror (errno));
  390. rspamd_fuzzy_backend_close (nbackend);
  391. return FALSE;
  392. }
  393. end = map + st.st_size;
  394. p = map + off;
  395. rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
  396. nbackend, NULL);
  397. while (p < end) {
  398. n = (struct rspamd_legacy_fuzzy_node *)p;
  399. /* Convert node flag, digest, value, time */
  400. if (rspamd_fuzzy_backend_run_stmt (nbackend, RSPAMD_FUZZY_BACKEND_INSERT,
  401. (gint)n->flag, n->h.hash_pipe,
  402. (gint64)n->value, n->time) != SQLITE_OK) {
  403. msg_warn ("Cannot execute init sql %s: %s",
  404. prepared_stmts[RSPAMD_FUZZY_BACKEND_INSERT].sql,
  405. sqlite3_errmsg (nbackend->db));
  406. }
  407. p += sizeof (struct rspamd_legacy_fuzzy_node);
  408. }
  409. munmap (map, st.st_size);
  410. rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
  411. nbackend, NULL);
  412. rspamd_fuzzy_backend_run_sql (create_index_sql, nbackend, NULL);
  413. rspamd_fuzzy_backend_close (nbackend);
  414. rename (tmpdb, path);
  415. return TRUE;
  416. }
  417. struct rspamd_fuzzy_backend*
  418. rspamd_fuzzy_backend_open (const gchar *path, GError **err)
  419. {
  420. gchar *dir, header[4];
  421. gint fd, r;
  422. struct rspamd_fuzzy_backend *res;
  423. /* First of all we check path for existence */
  424. dir = g_path_get_dirname (path);
  425. if (dir == NULL) {
  426. g_set_error (err, rspamd_fuzzy_backend_quark (),
  427. errno, "Cannot get directory name for %s: %s", path,
  428. strerror (errno));
  429. return NULL;
  430. }
  431. if (access (path, W_OK) == -1 && access (dir, W_OK) == -1) {
  432. g_set_error (err, rspamd_fuzzy_backend_quark (),
  433. errno, "Cannot access directory %s to create database: %s",
  434. dir, strerror (errno));
  435. g_free (dir);
  436. return NULL;
  437. }
  438. g_free (dir);
  439. if ((fd = open (path, O_RDONLY)) == -1) {
  440. if (errno != ENOENT) {
  441. g_set_error (err, rspamd_fuzzy_backend_quark (),
  442. errno, "Cannot open file %s: %s",
  443. path, strerror (errno));
  444. return NULL;
  445. }
  446. }
  447. else {
  448. /* Check for legacy format */
  449. if ((r = read (fd, header, sizeof (header))) == sizeof (header)) {
  450. if (memcmp (header, FUZZY_FILE_MAGIC, sizeof (header) - 1) == 0) {
  451. msg_info ("Trying to convert old fuzzy database");
  452. if (!rspamd_fuzzy_backend_convert (path, fd, err)) {
  453. close (fd);
  454. return NULL;
  455. }
  456. msg_info ("Old database converted");
  457. }
  458. close (fd);
  459. }
  460. }
  461. /* Open database */
  462. if ((res = rspamd_fuzzy_backend_open_db (path, err)) == NULL) {
  463. GError *tmp = NULL;
  464. if ((res = rspamd_fuzzy_backend_create_db (path, TRUE, &tmp)) == NULL) {
  465. g_clear_error (err);
  466. g_propagate_error (err, tmp);
  467. return NULL;
  468. }
  469. g_clear_error (err);
  470. }
  471. return res;
  472. }
  473. static gint
  474. rspamd_fuzzy_backend_int64_cmp (const void *a, const void *b)
  475. {
  476. gint64 ia = *(gint64 *)a, ib = *(gint64 *)b;
  477. return (ia - ib);
  478. }
  479. struct rspamd_fuzzy_reply
  480. rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend,
  481. const struct rspamd_fuzzy_cmd *cmd, gint64 expire)
  482. {
  483. struct rspamd_fuzzy_reply rep = {0, 0, 0, 0.0};
  484. const struct rspamd_fuzzy_shingle_cmd *shcmd;
  485. int rc;
  486. gint64 timestamp;
  487. gint64 shingle_values[RSPAMD_SHINGLE_SIZE], i, sel_id, cur_id,
  488. cur_cnt, max_cnt;
  489. const char *digest;
  490. /* Try direct match first of all */
  491. rc = rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK,
  492. cmd->digest);
  493. if (rc == SQLITE_OK) {
  494. timestamp = sqlite3_column_int64 (
  495. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 1);
  496. if (time (NULL) - timestamp > expire) {
  497. /* Expire element */
  498. msg_debug ("requested hash has been expired");
  499. rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_DELETE,
  500. cmd->digest);
  501. backend->expired ++;
  502. }
  503. else {
  504. rep.value = sqlite3_column_int64 (
  505. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 0);
  506. rep.prob = 1.0;
  507. rep.flag = sqlite3_column_int (
  508. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 2);
  509. }
  510. }
  511. else if (cmd->shingles_count > 0) {
  512. /* Fuzzy match */
  513. shcmd = (const struct rspamd_fuzzy_shingle_cmd *)cmd;
  514. for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  515. rc = rspamd_fuzzy_backend_run_stmt (backend,
  516. RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
  517. shcmd->sgl.hashes[i], i);
  518. if (rc == SQLITE_OK) {
  519. shingle_values[i] = sqlite3_column_int64 (
  520. prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE].stmt,
  521. 0);
  522. }
  523. else {
  524. shingle_values[i] = -1;
  525. }
  526. msg_debug ("looking for shingle %d -> %L: %d", i, shcmd->sgl.hashes[i], rc);
  527. }
  528. qsort (shingle_values, RSPAMD_SHINGLE_SIZE, sizeof (gint64),
  529. rspamd_fuzzy_backend_int64_cmp);
  530. sel_id = -1;
  531. cur_id = -1;
  532. cur_cnt = 0;
  533. max_cnt = 0;
  534. for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  535. if (shingle_values[i] == -1) {
  536. continue;
  537. }
  538. /* We have some value here, so we need to check it */
  539. if (shingle_values[i] == cur_id) {
  540. cur_cnt ++;
  541. }
  542. else {
  543. cur_id = shingle_values[i];
  544. if (cur_cnt >= max_cnt) {
  545. max_cnt = cur_cnt;
  546. sel_id = cur_id;
  547. }
  548. cur_cnt = 0;
  549. }
  550. }
  551. if (cur_cnt > max_cnt) {
  552. max_cnt = cur_cnt;
  553. }
  554. if (sel_id != -1) {
  555. /* We have some id selected here */
  556. rep.prob = (gdouble)max_cnt / (gdouble)RSPAMD_SHINGLE_SIZE;
  557. msg_debug ("found fuzzy hash with probability %.2f", rep.prob);
  558. rc = rspamd_fuzzy_backend_run_stmt (backend,
  559. RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID, sel_id);
  560. if (rc == SQLITE_OK) {
  561. digest = sqlite3_column_text (
  562. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 0);
  563. timestamp = sqlite3_column_int64 (
  564. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 2);
  565. if (time (NULL) - timestamp > expire) {
  566. /* Expire element */
  567. msg_debug ("requested hash has been expired");
  568. backend->expired ++;
  569. rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_DELETE,
  570. digest);
  571. rep.prob = 0.0;
  572. }
  573. else {
  574. rep.value = sqlite3_column_int64 (
  575. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 1);
  576. rep.flag = sqlite3_column_int (
  577. prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 3);
  578. }
  579. }
  580. }
  581. }
  582. return rep;
  583. }
  584. gboolean
  585. rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend,
  586. const struct rspamd_fuzzy_cmd *cmd)
  587. {
  588. int rc, i;
  589. gint64 id;
  590. const struct rspamd_fuzzy_shingle_cmd *shcmd;
  591. rc = rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK,
  592. cmd->digest);
  593. if (rc == SQLITE_OK) {
  594. /* We need to increase weight */
  595. rc = rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_UPDATE,
  596. (gint64)cmd->value, cmd->digest);
  597. }
  598. else {
  599. rc = rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_INSERT,
  600. (gint)cmd->flag, cmd->digest, (gint64)cmd->value, (gint64)time (NULL));
  601. if (rc == SQLITE_OK) {
  602. backend->count ++;
  603. if (cmd->shingles_count > 0) {
  604. id = sqlite3_last_insert_rowid (backend->db);
  605. shcmd = (const struct rspamd_fuzzy_shingle_cmd *)cmd;
  606. for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  607. rspamd_fuzzy_backend_run_stmt (backend,
  608. RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
  609. shcmd->sgl.hashes[i], i, id);
  610. msg_debug ("add shingle %d -> %L: %d", i, shcmd->sgl.hashes[i], id);
  611. }
  612. }
  613. }
  614. }
  615. return (rc == SQLITE_OK);
  616. }
  617. gboolean
  618. rspamd_fuzzy_backend_del (struct rspamd_fuzzy_backend *backend,
  619. const struct rspamd_fuzzy_cmd *cmd)
  620. {
  621. int rc;
  622. rc = rspamd_fuzzy_backend_run_stmt (backend, RSPAMD_FUZZY_BACKEND_DELETE,
  623. cmd->digest);
  624. backend->count -= sqlite3_changes (backend->db);
  625. return (rc == SQLITE_OK);
  626. }
  627. gboolean
  628. rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend, gint64 expire)
  629. {
  630. gboolean ret = FALSE;
  631. gint64 expire_lim, expired;
  632. gint rc;
  633. GError *err = NULL;
  634. /* Perform expire */
  635. if (expire > 0) {
  636. expire_lim = time (NULL) - expire;
  637. if (expire_lim > 0) {
  638. rc = rspamd_fuzzy_backend_run_stmt (backend,
  639. RSPAMD_FUZZY_BACKEND_EXPIRE, expire_lim);
  640. if (rc == SQLITE_OK) {
  641. expired = sqlite3_changes (backend->db);
  642. if (expired > 0) {
  643. backend->expired += expired;
  644. msg_info ("expired %L hashes", expired);
  645. }
  646. }
  647. else {
  648. msg_warn ("cannot execute expired statement: %s",
  649. sqlite3_errmsg (backend->db));
  650. }
  651. }
  652. }
  653. ret = rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
  654. backend, &err);
  655. if (ret) {
  656. ret = rspamd_fuzzy_backend_run_simple (RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
  657. backend, NULL);
  658. }
  659. else {
  660. msg_warn ("cannot synchronise fuzzy backend: %e", err);
  661. g_error_free (err);
  662. }
  663. return ret;
  664. }
  665. void
  666. rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend)
  667. {
  668. if (backend != NULL) {
  669. if (backend->db != NULL) {
  670. rspamd_fuzzy_backend_close_stmts (backend);
  671. sqlite3_close (backend->db);
  672. }
  673. if (backend->path != NULL) {
  674. g_free (backend->path);
  675. }
  676. g_slice_free1 (sizeof (*backend), backend);
  677. }
  678. }
  679. gsize
  680. rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend)
  681. {
  682. return backend->count;
  683. }
  684. gsize
  685. rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend)
  686. {
  687. return backend->expired;
  688. }