You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_backend.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "fuzzy_backend.h"
  18. #include "fuzzy_backend_sqlite.h"
  19. #include "fuzzy_backend_redis.h"
  20. #include "cfg_file.h"
  21. #include "fuzzy_wire.h"
  22. #define DEFAULT_EXPIRE 172800L
  23. enum rspamd_fuzzy_backend_type {
  24. RSPAMD_FUZZY_BACKEND_SQLITE = 0,
  25. RSPAMD_FUZZY_BACKEND_REDIS = 1,
  26. };
  27. static void* rspamd_fuzzy_backend_init_sqlite (struct rspamd_fuzzy_backend *bk,
  28. const ucl_object_t *obj, struct rspamd_config *cfg, GError **err);
  29. static void rspamd_fuzzy_backend_check_sqlite (struct rspamd_fuzzy_backend *bk,
  30. const struct rspamd_fuzzy_cmd *cmd,
  31. rspamd_fuzzy_check_cb cb, void *ud,
  32. void *subr_ud);
  33. static void rspamd_fuzzy_backend_update_sqlite (struct rspamd_fuzzy_backend *bk,
  34. GArray *updates, const gchar *src,
  35. rspamd_fuzzy_update_cb cb, void *ud,
  36. void *subr_ud);
  37. static void rspamd_fuzzy_backend_count_sqlite (struct rspamd_fuzzy_backend *bk,
  38. rspamd_fuzzy_count_cb cb, void *ud,
  39. void *subr_ud);
  40. static void rspamd_fuzzy_backend_version_sqlite (struct rspamd_fuzzy_backend *bk,
  41. const gchar *src,
  42. rspamd_fuzzy_version_cb cb, void *ud,
  43. void *subr_ud);
  44. static const gchar* rspamd_fuzzy_backend_id_sqlite (struct rspamd_fuzzy_backend *bk,
  45. void *subr_ud);
  46. static void rspamd_fuzzy_backend_expire_sqlite (struct rspamd_fuzzy_backend *bk,
  47. void *subr_ud);
  48. static void rspamd_fuzzy_backend_close_sqlite (struct rspamd_fuzzy_backend *bk,
  49. void *subr_ud);
  50. struct rspamd_fuzzy_backend_subr {
  51. void* (*init) (struct rspamd_fuzzy_backend *bk, const ucl_object_t *obj,
  52. struct rspamd_config *cfg,
  53. GError **err);
  54. void (*check) (struct rspamd_fuzzy_backend *bk,
  55. const struct rspamd_fuzzy_cmd *cmd,
  56. rspamd_fuzzy_check_cb cb, void *ud,
  57. void *subr_ud);
  58. void (*update) (struct rspamd_fuzzy_backend *bk,
  59. GArray *updates, const gchar *src,
  60. rspamd_fuzzy_update_cb cb, void *ud,
  61. void *subr_ud);
  62. void (*count) (struct rspamd_fuzzy_backend *bk,
  63. rspamd_fuzzy_count_cb cb, void *ud,
  64. void *subr_ud);
  65. void (*version) (struct rspamd_fuzzy_backend *bk,
  66. const gchar *src,
  67. rspamd_fuzzy_version_cb cb, void *ud,
  68. void *subr_ud);
  69. const gchar* (*id) (struct rspamd_fuzzy_backend *bk, void *subr_ud);
  70. void (*periodic) (struct rspamd_fuzzy_backend *bk, void *subr_ud);
  71. void (*close) (struct rspamd_fuzzy_backend *bk, void *subr_ud);
  72. };
  73. static const struct rspamd_fuzzy_backend_subr fuzzy_subrs[] = {
  74. [RSPAMD_FUZZY_BACKEND_SQLITE] = {
  75. .init = rspamd_fuzzy_backend_init_sqlite,
  76. .check = rspamd_fuzzy_backend_check_sqlite,
  77. .update = rspamd_fuzzy_backend_update_sqlite,
  78. .count = rspamd_fuzzy_backend_count_sqlite,
  79. .version = rspamd_fuzzy_backend_version_sqlite,
  80. .id = rspamd_fuzzy_backend_id_sqlite,
  81. .periodic = rspamd_fuzzy_backend_expire_sqlite,
  82. .close = rspamd_fuzzy_backend_close_sqlite,
  83. },
  84. [RSPAMD_FUZZY_BACKEND_REDIS] = {
  85. .init = rspamd_fuzzy_backend_init_redis,
  86. .check = rspamd_fuzzy_backend_check_redis,
  87. .update = rspamd_fuzzy_backend_update_redis,
  88. .count = rspamd_fuzzy_backend_count_redis,
  89. .version = rspamd_fuzzy_backend_version_redis,
  90. .id = rspamd_fuzzy_backend_id_redis,
  91. .periodic = rspamd_fuzzy_backend_expire_redis,
  92. .close = rspamd_fuzzy_backend_close_redis,
  93. }
  94. };
  95. struct rspamd_fuzzy_backend {
  96. enum rspamd_fuzzy_backend_type type;
  97. gdouble expire;
  98. gdouble sync;
  99. struct ev_loop *event_loop;
  100. rspamd_fuzzy_periodic_cb periodic_cb;
  101. void *periodic_ud;
  102. const struct rspamd_fuzzy_backend_subr *subr;
  103. void *subr_ud;
  104. ev_timer periodic_event;
  105. };
  106. static GQuark
  107. rspamd_fuzzy_backend_quark (void)
  108. {
  109. return g_quark_from_static_string ("fuzzy-backend");
  110. }
  111. static void*
  112. rspamd_fuzzy_backend_init_sqlite (struct rspamd_fuzzy_backend *bk,
  113. const ucl_object_t *obj, struct rspamd_config *cfg, GError **err)
  114. {
  115. const ucl_object_t *elt;
  116. elt = ucl_object_lookup_any (obj, "hashfile", "hash_file", "file",
  117. "database", NULL);
  118. if (elt == NULL || ucl_object_type (elt) != UCL_STRING) {
  119. g_set_error (err, rspamd_fuzzy_backend_quark (),
  120. EINVAL, "missing sqlite3 path");
  121. return NULL;
  122. }
  123. return rspamd_fuzzy_backend_sqlite_open (ucl_object_tostring (elt),
  124. FALSE, err);
  125. }
  126. static void
  127. rspamd_fuzzy_backend_check_sqlite (struct rspamd_fuzzy_backend *bk,
  128. const struct rspamd_fuzzy_cmd *cmd,
  129. rspamd_fuzzy_check_cb cb, void *ud,
  130. void *subr_ud)
  131. {
  132. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  133. struct rspamd_fuzzy_reply rep;
  134. rep = rspamd_fuzzy_backend_sqlite_check (sq, cmd, bk->expire);
  135. if (cb) {
  136. cb (&rep, ud);
  137. }
  138. }
  139. static void
  140. rspamd_fuzzy_backend_update_sqlite (struct rspamd_fuzzy_backend *bk,
  141. GArray *updates, const gchar *src,
  142. rspamd_fuzzy_update_cb cb, void *ud,
  143. void *subr_ud)
  144. {
  145. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  146. gboolean success = FALSE;
  147. guint i;
  148. struct fuzzy_peer_cmd *io_cmd;
  149. struct rspamd_fuzzy_cmd *cmd;
  150. gpointer ptr;
  151. guint nupdates = 0, nadded = 0, ndeleted = 0, nextended = 0, nignored = 0;
  152. if (rspamd_fuzzy_backend_sqlite_prepare_update (sq, src)) {
  153. for (i = 0; i < updates->len; i ++) {
  154. io_cmd = &g_array_index (updates, struct fuzzy_peer_cmd, i);
  155. if (io_cmd->is_shingle) {
  156. cmd = &io_cmd->cmd.shingle.basic;
  157. ptr = &io_cmd->cmd.shingle;
  158. }
  159. else {
  160. cmd = &io_cmd->cmd.normal;
  161. ptr = &io_cmd->cmd.normal;
  162. }
  163. if (cmd->cmd == FUZZY_WRITE) {
  164. rspamd_fuzzy_backend_sqlite_add (sq, ptr);
  165. nadded ++;
  166. nupdates ++;
  167. }
  168. else if (cmd->cmd == FUZZY_DEL) {
  169. rspamd_fuzzy_backend_sqlite_del (sq, ptr);
  170. ndeleted ++;
  171. nupdates ++;
  172. }
  173. else {
  174. if (cmd->cmd == FUZZY_REFRESH) {
  175. nextended ++;
  176. }
  177. else {
  178. nignored ++;
  179. }
  180. }
  181. }
  182. if (rspamd_fuzzy_backend_sqlite_finish_update (sq, src,
  183. nupdates > 0)) {
  184. success = TRUE;
  185. }
  186. }
  187. if (cb) {
  188. cb (success, nadded, ndeleted, nextended, nignored, ud);
  189. }
  190. }
  191. static void
  192. rspamd_fuzzy_backend_count_sqlite (struct rspamd_fuzzy_backend *bk,
  193. rspamd_fuzzy_count_cb cb, void *ud,
  194. void *subr_ud)
  195. {
  196. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  197. guint64 nhashes;
  198. nhashes = rspamd_fuzzy_backend_sqlite_count (sq);
  199. if (cb) {
  200. cb (nhashes, ud);
  201. }
  202. }
  203. static void
  204. rspamd_fuzzy_backend_version_sqlite (struct rspamd_fuzzy_backend *bk,
  205. const gchar *src,
  206. rspamd_fuzzy_version_cb cb, void *ud,
  207. void *subr_ud)
  208. {
  209. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  210. guint64 rev;
  211. rev = rspamd_fuzzy_backend_sqlite_version (sq, src);
  212. if (cb) {
  213. cb (rev, ud);
  214. }
  215. }
  216. static const gchar*
  217. rspamd_fuzzy_backend_id_sqlite (struct rspamd_fuzzy_backend *bk,
  218. void *subr_ud)
  219. {
  220. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  221. return rspamd_fuzzy_sqlite_backend_id (sq);
  222. }
  223. static void
  224. rspamd_fuzzy_backend_expire_sqlite (struct rspamd_fuzzy_backend *bk,
  225. void *subr_ud)
  226. {
  227. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  228. rspamd_fuzzy_backend_sqlite_sync (sq, bk->expire, TRUE);
  229. }
  230. static void
  231. rspamd_fuzzy_backend_close_sqlite (struct rspamd_fuzzy_backend *bk,
  232. void *subr_ud)
  233. {
  234. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  235. rspamd_fuzzy_backend_sqlite_close (sq);
  236. }
  237. struct rspamd_fuzzy_backend *
  238. rspamd_fuzzy_backend_create (struct ev_loop *ev_base,
  239. const ucl_object_t *config,
  240. struct rspamd_config *cfg,
  241. GError **err)
  242. {
  243. struct rspamd_fuzzy_backend *bk;
  244. enum rspamd_fuzzy_backend_type type = RSPAMD_FUZZY_BACKEND_SQLITE;
  245. const ucl_object_t *elt;
  246. gdouble expire = DEFAULT_EXPIRE;
  247. if (config != NULL) {
  248. elt = ucl_object_lookup (config, "backend");
  249. if (elt != NULL && ucl_object_type (elt) == UCL_STRING) {
  250. if (strcmp (ucl_object_tostring (elt), "sqlite") == 0) {
  251. type = RSPAMD_FUZZY_BACKEND_SQLITE;
  252. }
  253. else if (strcmp (ucl_object_tostring (elt), "redis") == 0) {
  254. type = RSPAMD_FUZZY_BACKEND_REDIS;
  255. }
  256. else {
  257. g_set_error (err, rspamd_fuzzy_backend_quark (),
  258. EINVAL, "invalid backend type: %s",
  259. ucl_object_tostring (elt));
  260. return NULL;
  261. }
  262. }
  263. elt = ucl_object_lookup (config, "expire");
  264. if (elt != NULL) {
  265. expire = ucl_object_todouble (elt);
  266. }
  267. }
  268. bk = g_malloc0 (sizeof (*bk));
  269. bk->event_loop = ev_base;
  270. bk->expire = expire;
  271. bk->type = type;
  272. bk->subr = &fuzzy_subrs[type];
  273. if ((bk->subr_ud = bk->subr->init (bk, config, cfg, err)) == NULL) {
  274. g_free (bk);
  275. return NULL;
  276. }
  277. return bk;
  278. }
  279. void
  280. rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *bk,
  281. const struct rspamd_fuzzy_cmd *cmd,
  282. rspamd_fuzzy_check_cb cb, void *ud)
  283. {
  284. g_assert (bk != NULL);
  285. bk->subr->check (bk, cmd, cb, ud, bk->subr_ud);
  286. }
  287. static guint
  288. rspamd_fuzzy_digest_hash (gconstpointer key)
  289. {
  290. guint ret;
  291. /* Distributed uniformly already */
  292. memcpy (&ret, key, sizeof (ret));
  293. return ret;
  294. }
  295. static gboolean
  296. rspamd_fuzzy_digest_equal (gconstpointer v, gconstpointer v2)
  297. {
  298. return memcmp (v, v2, rspamd_cryptobox_HASHBYTES) == 0;
  299. }
  300. static void
  301. rspamd_fuzzy_backend_deduplicate_queue (GArray *updates)
  302. {
  303. GHashTable *seen = g_hash_table_new (rspamd_fuzzy_digest_hash,
  304. rspamd_fuzzy_digest_equal);
  305. struct fuzzy_peer_cmd *io_cmd, *found;
  306. struct rspamd_fuzzy_cmd *cmd;
  307. guchar *digest;
  308. guint i;
  309. for (i = 0; i < updates->len; i ++) {
  310. io_cmd = &g_array_index (updates, struct fuzzy_peer_cmd, i);
  311. if (io_cmd->is_shingle) {
  312. cmd = &io_cmd->cmd.shingle.basic;
  313. }
  314. else {
  315. cmd = &io_cmd->cmd.normal;
  316. }
  317. digest = cmd->digest;
  318. found = g_hash_table_lookup (seen, digest);
  319. if (found == NULL) {
  320. /* Add to the seen list, if not a duplicate (huh?) */
  321. if (cmd->cmd != FUZZY_DUP) {
  322. g_hash_table_insert (seen, digest, io_cmd);
  323. }
  324. }
  325. else {
  326. if (found->cmd.normal.flag != cmd->flag) {
  327. /* TODO: deal with flags better at some point */
  328. continue;
  329. }
  330. /* Apply heuristic */
  331. switch (cmd->cmd) {
  332. case FUZZY_WRITE:
  333. if (found->cmd.normal.cmd == FUZZY_WRITE) {
  334. /* Already seen */
  335. found->cmd.normal.value += cmd->value;
  336. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  337. }
  338. else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
  339. /* Seen refresh command, remove it as write has higher priority */
  340. g_hash_table_replace (seen, digest, io_cmd);
  341. found->cmd.normal.cmd = FUZZY_DUP;
  342. }
  343. else if (found->cmd.normal.cmd == FUZZY_DEL) {
  344. /* Request delete + add, weird, but ignore add */
  345. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  346. }
  347. break;
  348. case FUZZY_REFRESH:
  349. if (found->cmd.normal.cmd == FUZZY_WRITE) {
  350. /* No need to expire, handled by addition */
  351. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  352. }
  353. else if (found->cmd.normal.cmd == FUZZY_DEL) {
  354. /* Request delete + expire, ignore expire */
  355. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  356. }
  357. else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
  358. /* Already handled */
  359. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  360. }
  361. break;
  362. case FUZZY_DEL:
  363. /* Delete has priority over all other commands */
  364. g_hash_table_replace (seen, digest, io_cmd);
  365. found->cmd.normal.cmd = FUZZY_DUP;
  366. break;
  367. default:
  368. break;
  369. }
  370. }
  371. }
  372. g_hash_table_unref (seen);
  373. }
  374. void
  375. rspamd_fuzzy_backend_process_updates (struct rspamd_fuzzy_backend *bk,
  376. GArray *updates, const gchar *src, rspamd_fuzzy_update_cb cb,
  377. void *ud)
  378. {
  379. g_assert (bk != NULL);
  380. g_assert (updates != NULL);
  381. if (updates) {
  382. rspamd_fuzzy_backend_deduplicate_queue (updates);
  383. bk->subr->update (bk, updates, src, cb, ud, bk->subr_ud);
  384. }
  385. else if (cb) {
  386. cb (TRUE, 0, 0, 0, 0, ud);
  387. }
  388. }
  389. void
  390. rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *bk,
  391. rspamd_fuzzy_count_cb cb, void *ud)
  392. {
  393. g_assert (bk != NULL);
  394. bk->subr->count (bk, cb, ud, bk->subr_ud);
  395. }
  396. void
  397. rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *bk,
  398. const gchar *src,
  399. rspamd_fuzzy_version_cb cb, void *ud)
  400. {
  401. g_assert (bk != NULL);
  402. bk->subr->version (bk, src, cb, ud, bk->subr_ud);
  403. }
  404. const gchar *
  405. rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *bk)
  406. {
  407. g_assert (bk != NULL);
  408. if (bk->subr->id) {
  409. return bk->subr->id (bk, bk->subr_ud);
  410. }
  411. return NULL;
  412. }
  413. static inline void
  414. rspamd_fuzzy_backend_periodic_sync (struct rspamd_fuzzy_backend *bk)
  415. {
  416. if (bk->periodic_cb) {
  417. if (bk->periodic_cb (bk->periodic_ud)) {
  418. if (bk->subr->periodic) {
  419. bk->subr->periodic (bk, bk->subr_ud);
  420. }
  421. }
  422. }
  423. else {
  424. if (bk->subr->periodic) {
  425. bk->subr->periodic (bk, bk->subr_ud);
  426. }
  427. }
  428. }
  429. static void
  430. rspamd_fuzzy_backend_periodic_cb (EV_P_ ev_timer *w, int revents)
  431. {
  432. struct rspamd_fuzzy_backend *bk = (struct rspamd_fuzzy_backend *)w->data;
  433. gdouble jittered;
  434. jittered = rspamd_time_jitter (bk->sync, bk->sync / 2.0);
  435. w->repeat = jittered;
  436. rspamd_fuzzy_backend_periodic_sync (bk);
  437. ev_timer_again (EV_A_ w);
  438. }
  439. void
  440. rspamd_fuzzy_backend_start_update (struct rspamd_fuzzy_backend *bk,
  441. gdouble timeout,
  442. rspamd_fuzzy_periodic_cb cb,
  443. void *ud)
  444. {
  445. gdouble jittered;
  446. g_assert (bk != NULL);
  447. if (bk->subr->periodic) {
  448. if (bk->sync > 0.0) {
  449. ev_timer_stop (bk->event_loop, &bk->periodic_event);
  450. }
  451. if (cb) {
  452. bk->periodic_cb = cb;
  453. bk->periodic_ud = ud;
  454. }
  455. rspamd_fuzzy_backend_periodic_sync (bk);
  456. bk->sync = timeout;
  457. jittered = rspamd_time_jitter (timeout, timeout / 2.0);
  458. bk->periodic_event.data = bk;
  459. ev_timer_init (&bk->periodic_event, rspamd_fuzzy_backend_periodic_cb,
  460. jittered, 0.0);
  461. ev_timer_start (bk->event_loop, &bk->periodic_event);
  462. }
  463. }
  464. void
  465. rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *bk)
  466. {
  467. g_assert (bk != NULL);
  468. if (bk->sync > 0.0) {
  469. rspamd_fuzzy_backend_periodic_sync (bk);
  470. ev_timer_stop (bk->event_loop, &bk->periodic_event);
  471. }
  472. bk->subr->close (bk, bk->subr_ud);
  473. g_free (bk);
  474. }
  475. struct ev_loop*
  476. rspamd_fuzzy_backend_event_base (struct rspamd_fuzzy_backend *backend)
  477. {
  478. return backend->event_loop;
  479. }
  480. gdouble
  481. rspamd_fuzzy_backend_get_expire (struct rspamd_fuzzy_backend *backend)
  482. {
  483. return backend->expire;
  484. }