Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

fuzzy_backend.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "fuzzy_backend.h"
  18. #include "fuzzy_backend_sqlite.h"
  19. #include "fuzzy_backend_redis.h"
  20. #include "cfg_file.h"
  21. #include "fuzzy_wire.h"
  22. #define DEFAULT_EXPIRE 172800L
  23. enum rspamd_fuzzy_backend_type {
  24. RSPAMD_FUZZY_BACKEND_SQLITE = 0,
  25. RSPAMD_FUZZY_BACKEND_REDIS = 1,
  26. };
  27. static void* rspamd_fuzzy_backend_init_sqlite (struct rspamd_fuzzy_backend *bk,
  28. const ucl_object_t *obj, struct rspamd_config *cfg, GError **err);
  29. static void rspamd_fuzzy_backend_check_sqlite (struct rspamd_fuzzy_backend *bk,
  30. const struct rspamd_fuzzy_cmd *cmd,
  31. rspamd_fuzzy_check_cb cb, void *ud,
  32. void *subr_ud);
  33. static void rspamd_fuzzy_backend_update_sqlite (struct rspamd_fuzzy_backend *bk,
  34. GArray *updates, const gchar *src,
  35. rspamd_fuzzy_update_cb cb, void *ud,
  36. void *subr_ud);
  37. static void rspamd_fuzzy_backend_count_sqlite (struct rspamd_fuzzy_backend *bk,
  38. rspamd_fuzzy_count_cb cb, void *ud,
  39. void *subr_ud);
  40. static void rspamd_fuzzy_backend_version_sqlite (struct rspamd_fuzzy_backend *bk,
  41. const gchar *src,
  42. rspamd_fuzzy_version_cb cb, void *ud,
  43. void *subr_ud);
  44. static const gchar* rspamd_fuzzy_backend_id_sqlite (struct rspamd_fuzzy_backend *bk,
  45. void *subr_ud);
  46. static void rspamd_fuzzy_backend_expire_sqlite (struct rspamd_fuzzy_backend *bk,
  47. void *subr_ud);
  48. static void rspamd_fuzzy_backend_close_sqlite (struct rspamd_fuzzy_backend *bk,
  49. void *subr_ud);
  50. struct rspamd_fuzzy_backend_subr {
  51. void* (*init) (struct rspamd_fuzzy_backend *bk, const ucl_object_t *obj,
  52. struct rspamd_config *cfg,
  53. GError **err);
  54. void (*check) (struct rspamd_fuzzy_backend *bk,
  55. const struct rspamd_fuzzy_cmd *cmd,
  56. rspamd_fuzzy_check_cb cb, void *ud,
  57. void *subr_ud);
  58. void (*update) (struct rspamd_fuzzy_backend *bk,
  59. GArray *updates, const gchar *src,
  60. rspamd_fuzzy_update_cb cb, void *ud,
  61. void *subr_ud);
  62. void (*count) (struct rspamd_fuzzy_backend *bk,
  63. rspamd_fuzzy_count_cb cb, void *ud,
  64. void *subr_ud);
  65. void (*version) (struct rspamd_fuzzy_backend *bk,
  66. const gchar *src,
  67. rspamd_fuzzy_version_cb cb, void *ud,
  68. void *subr_ud);
  69. const gchar* (*id) (struct rspamd_fuzzy_backend *bk, void *subr_ud);
  70. void (*periodic) (struct rspamd_fuzzy_backend *bk, void *subr_ud);
  71. void (*close) (struct rspamd_fuzzy_backend *bk, void *subr_ud);
  72. };
  73. static const struct rspamd_fuzzy_backend_subr fuzzy_subrs[] = {
  74. [RSPAMD_FUZZY_BACKEND_SQLITE] = {
  75. .init = rspamd_fuzzy_backend_init_sqlite,
  76. .check = rspamd_fuzzy_backend_check_sqlite,
  77. .update = rspamd_fuzzy_backend_update_sqlite,
  78. .count = rspamd_fuzzy_backend_count_sqlite,
  79. .version = rspamd_fuzzy_backend_version_sqlite,
  80. .id = rspamd_fuzzy_backend_id_sqlite,
  81. .periodic = rspamd_fuzzy_backend_expire_sqlite,
  82. .close = rspamd_fuzzy_backend_close_sqlite,
  83. },
  84. #ifdef WITH_HIREDIS
  85. [RSPAMD_FUZZY_BACKEND_REDIS] = {
  86. .init = rspamd_fuzzy_backend_init_redis,
  87. .check = rspamd_fuzzy_backend_check_redis,
  88. .update = rspamd_fuzzy_backend_update_redis,
  89. .count = rspamd_fuzzy_backend_count_redis,
  90. .version = rspamd_fuzzy_backend_version_redis,
  91. .id = rspamd_fuzzy_backend_id_redis,
  92. .periodic = rspamd_fuzzy_backend_expire_redis,
  93. .close = rspamd_fuzzy_backend_close_redis,
  94. }
  95. #endif
  96. };
  97. struct rspamd_fuzzy_backend {
  98. enum rspamd_fuzzy_backend_type type;
  99. gdouble expire;
  100. gdouble sync;
  101. struct event_base *ev_base;
  102. rspamd_fuzzy_periodic_cb periodic_cb;
  103. void *periodic_ud;
  104. const struct rspamd_fuzzy_backend_subr *subr;
  105. void *subr_ud;
  106. struct event periodic_event;
  107. };
  108. static GQuark
  109. rspamd_fuzzy_backend_quark (void)
  110. {
  111. return g_quark_from_static_string ("fuzzy-backend");
  112. }
  113. static void*
  114. rspamd_fuzzy_backend_init_sqlite (struct rspamd_fuzzy_backend *bk,
  115. const ucl_object_t *obj, struct rspamd_config *cfg, GError **err)
  116. {
  117. const ucl_object_t *elt;
  118. elt = ucl_object_lookup_any (obj, "hashfile", "hash_file", "file",
  119. "database", NULL);
  120. if (elt == NULL || ucl_object_type (elt) != UCL_STRING) {
  121. g_set_error (err, rspamd_fuzzy_backend_quark (),
  122. EINVAL, "missing sqlite3 path");
  123. return NULL;
  124. }
  125. return rspamd_fuzzy_backend_sqlite_open (ucl_object_tostring (elt),
  126. FALSE, err);
  127. }
  128. static void
  129. rspamd_fuzzy_backend_check_sqlite (struct rspamd_fuzzy_backend *bk,
  130. const struct rspamd_fuzzy_cmd *cmd,
  131. rspamd_fuzzy_check_cb cb, void *ud,
  132. void *subr_ud)
  133. {
  134. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  135. struct rspamd_fuzzy_reply rep;
  136. rep = rspamd_fuzzy_backend_sqlite_check (sq, cmd, bk->expire);
  137. if (cb) {
  138. cb (&rep, ud);
  139. }
  140. }
  141. static void
  142. rspamd_fuzzy_backend_update_sqlite (struct rspamd_fuzzy_backend *bk,
  143. GArray *updates, const gchar *src,
  144. rspamd_fuzzy_update_cb cb, void *ud,
  145. void *subr_ud)
  146. {
  147. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  148. gboolean success = FALSE;
  149. guint i;
  150. struct fuzzy_peer_cmd *io_cmd;
  151. struct rspamd_fuzzy_cmd *cmd;
  152. gpointer ptr;
  153. guint nupdates = 0, nadded = 0, ndeleted = 0, nextended = 0, nignored = 0;
  154. if (rspamd_fuzzy_backend_sqlite_prepare_update (sq, src)) {
  155. for (i = 0; i < updates->len; i ++) {
  156. io_cmd = &g_array_index (updates, struct fuzzy_peer_cmd, i);
  157. if (io_cmd->is_shingle) {
  158. cmd = &io_cmd->cmd.shingle.basic;
  159. ptr = &io_cmd->cmd.shingle;
  160. }
  161. else {
  162. cmd = &io_cmd->cmd.normal;
  163. ptr = &io_cmd->cmd.normal;
  164. }
  165. if (cmd->cmd == FUZZY_WRITE) {
  166. rspamd_fuzzy_backend_sqlite_add (sq, ptr);
  167. nadded ++;
  168. nupdates ++;
  169. }
  170. else if (cmd->cmd == FUZZY_DEL) {
  171. rspamd_fuzzy_backend_sqlite_del (sq, ptr);
  172. ndeleted ++;
  173. nupdates ++;
  174. }
  175. else {
  176. if (cmd->cmd == FUZZY_REFRESH) {
  177. nextended ++;
  178. }
  179. else {
  180. nignored ++;
  181. }
  182. }
  183. }
  184. if (rspamd_fuzzy_backend_sqlite_finish_update (sq, src,
  185. nupdates > 0)) {
  186. success = TRUE;
  187. }
  188. }
  189. if (cb) {
  190. cb (success, nadded, ndeleted, nextended, nignored, ud);
  191. }
  192. }
  193. static void
  194. rspamd_fuzzy_backend_count_sqlite (struct rspamd_fuzzy_backend *bk,
  195. rspamd_fuzzy_count_cb cb, void *ud,
  196. void *subr_ud)
  197. {
  198. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  199. guint64 nhashes;
  200. nhashes = rspamd_fuzzy_backend_sqlite_count (sq);
  201. if (cb) {
  202. cb (nhashes, ud);
  203. }
  204. }
  205. static void
  206. rspamd_fuzzy_backend_version_sqlite (struct rspamd_fuzzy_backend *bk,
  207. const gchar *src,
  208. rspamd_fuzzy_version_cb cb, void *ud,
  209. void *subr_ud)
  210. {
  211. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  212. guint64 rev;
  213. rev = rspamd_fuzzy_backend_sqlite_version (sq, src);
  214. if (cb) {
  215. cb (rev, ud);
  216. }
  217. }
  218. static const gchar*
  219. rspamd_fuzzy_backend_id_sqlite (struct rspamd_fuzzy_backend *bk,
  220. void *subr_ud)
  221. {
  222. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  223. return rspamd_fuzzy_sqlite_backend_id (sq);
  224. }
  225. static void
  226. rspamd_fuzzy_backend_expire_sqlite (struct rspamd_fuzzy_backend *bk,
  227. void *subr_ud)
  228. {
  229. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  230. rspamd_fuzzy_backend_sqlite_sync (sq, bk->expire, TRUE);
  231. }
  232. static void
  233. rspamd_fuzzy_backend_close_sqlite (struct rspamd_fuzzy_backend *bk,
  234. void *subr_ud)
  235. {
  236. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  237. rspamd_fuzzy_backend_sqlite_close (sq);
  238. }
  239. struct rspamd_fuzzy_backend *
  240. rspamd_fuzzy_backend_create (struct event_base *ev_base,
  241. const ucl_object_t *config,
  242. struct rspamd_config *cfg,
  243. GError **err)
  244. {
  245. struct rspamd_fuzzy_backend *bk;
  246. enum rspamd_fuzzy_backend_type type = RSPAMD_FUZZY_BACKEND_SQLITE;
  247. const ucl_object_t *elt;
  248. gdouble expire = DEFAULT_EXPIRE;
  249. if (config != NULL) {
  250. elt = ucl_object_lookup (config, "backend");
  251. if (elt != NULL && ucl_object_type (elt) == UCL_STRING) {
  252. if (strcmp (ucl_object_tostring (elt), "sqlite") == 0) {
  253. type = RSPAMD_FUZZY_BACKEND_SQLITE;
  254. }
  255. else if (strcmp (ucl_object_tostring (elt), "redis") == 0) {
  256. type = RSPAMD_FUZZY_BACKEND_REDIS;
  257. }
  258. else {
  259. g_set_error (err, rspamd_fuzzy_backend_quark (),
  260. EINVAL, "invalid backend type: %s",
  261. ucl_object_tostring (elt));
  262. return NULL;
  263. }
  264. }
  265. elt = ucl_object_lookup (config, "expire");
  266. if (elt != NULL) {
  267. expire = ucl_object_todouble (elt);
  268. }
  269. }
  270. bk = g_malloc0 (sizeof (*bk));
  271. bk->ev_base = ev_base;
  272. bk->expire = expire;
  273. bk->type = type;
  274. bk->subr = &fuzzy_subrs[type];
  275. if ((bk->subr_ud = bk->subr->init (bk, config, cfg, err)) == NULL) {
  276. g_free (bk);
  277. return NULL;
  278. }
  279. return bk;
  280. }
  281. void
  282. rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *bk,
  283. const struct rspamd_fuzzy_cmd *cmd,
  284. rspamd_fuzzy_check_cb cb, void *ud)
  285. {
  286. g_assert (bk != NULL);
  287. bk->subr->check (bk, cmd, cb, ud, bk->subr_ud);
  288. }
  289. static guint
  290. rspamd_fuzzy_digest_hash (gconstpointer key)
  291. {
  292. guint ret;
  293. /* Distirbuted uniformly already */
  294. memcpy (&ret, key, sizeof (ret));
  295. return ret;
  296. }
  297. static gboolean
  298. rspamd_fuzzy_digest_equal (gconstpointer v, gconstpointer v2)
  299. {
  300. return memcmp (v, v2, rspamd_cryptobox_HASHBYTES) == 0;
  301. }
  302. static void
  303. rspamd_fuzzy_backend_deduplicate_queue (GArray *updates)
  304. {
  305. GHashTable *seen = g_hash_table_new (rspamd_fuzzy_digest_hash,
  306. rspamd_fuzzy_digest_equal);
  307. struct fuzzy_peer_cmd *io_cmd, *found;
  308. struct rspamd_fuzzy_cmd *cmd;
  309. guchar *digest;
  310. guint i;
  311. for (i = 0; i < updates->len; i ++) {
  312. io_cmd = &g_array_index (updates, struct fuzzy_peer_cmd, i);
  313. if (io_cmd->is_shingle) {
  314. cmd = &io_cmd->cmd.shingle.basic;
  315. }
  316. else {
  317. cmd = &io_cmd->cmd.normal;
  318. }
  319. digest = cmd->digest;
  320. found = g_hash_table_lookup (seen, digest);
  321. if (found == NULL) {
  322. /* Add to the seen list, if not a duplicate (huh?) */
  323. if (cmd->cmd != FUZZY_DUP) {
  324. g_hash_table_insert (seen, digest, io_cmd);
  325. }
  326. }
  327. else {
  328. if (found->cmd.normal.flag != cmd->flag) {
  329. /* TODO: deal with flags better at some point */
  330. continue;
  331. }
  332. /* Apply heuristic */
  333. switch (cmd->cmd) {
  334. case FUZZY_WRITE:
  335. if (found->cmd.normal.cmd == FUZZY_WRITE) {
  336. /* Already seen */
  337. found->cmd.normal.value += cmd->value;
  338. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  339. }
  340. else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
  341. /* Seen refresh command, remove it as write has higher priority */
  342. g_hash_table_replace (seen, digest, io_cmd);
  343. found->cmd.normal.cmd = FUZZY_DUP;
  344. }
  345. else if (found->cmd.normal.cmd == FUZZY_DEL) {
  346. /* Request delete + add, weird, but ignore add */
  347. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  348. }
  349. break;
  350. case FUZZY_REFRESH:
  351. if (found->cmd.normal.cmd == FUZZY_WRITE) {
  352. /* No need to expire, handled by addition */
  353. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  354. }
  355. else if (found->cmd.normal.cmd == FUZZY_DEL) {
  356. /* Request delete + expire, ignore expire */
  357. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  358. }
  359. else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
  360. /* Already handled */
  361. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  362. }
  363. break;
  364. case FUZZY_DEL:
  365. /* Delete has priority over all other commands */
  366. g_hash_table_replace (seen, digest, io_cmd);
  367. found->cmd.normal.cmd = FUZZY_DUP;
  368. break;
  369. default:
  370. break;
  371. }
  372. }
  373. }
  374. g_hash_table_unref (seen);
  375. }
  376. void
  377. rspamd_fuzzy_backend_process_updates (struct rspamd_fuzzy_backend *bk,
  378. GArray *updates, const gchar *src, rspamd_fuzzy_update_cb cb,
  379. void *ud)
  380. {
  381. g_assert (bk != NULL);
  382. g_assert (updates != NULL);
  383. if (updates) {
  384. rspamd_fuzzy_backend_deduplicate_queue (updates);
  385. bk->subr->update (bk, updates, src, cb, ud, bk->subr_ud);
  386. }
  387. else if (cb) {
  388. cb (TRUE, 0, 0, 0, 0, ud);
  389. }
  390. }
  391. void
  392. rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *bk,
  393. rspamd_fuzzy_count_cb cb, void *ud)
  394. {
  395. g_assert (bk != NULL);
  396. bk->subr->count (bk, cb, ud, bk->subr_ud);
  397. }
  398. void
  399. rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *bk,
  400. const gchar *src,
  401. rspamd_fuzzy_version_cb cb, void *ud)
  402. {
  403. g_assert (bk != NULL);
  404. bk->subr->version (bk, src, cb, ud, bk->subr_ud);
  405. }
  406. const gchar *
  407. rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *bk)
  408. {
  409. g_assert (bk != NULL);
  410. if (bk->subr->id) {
  411. return bk->subr->id (bk, bk->subr_ud);
  412. }
  413. return NULL;
  414. }
  415. static inline void
  416. rspamd_fuzzy_backend_periodic_sync (struct rspamd_fuzzy_backend *bk)
  417. {
  418. if (bk->periodic_cb) {
  419. if (bk->periodic_cb (bk->periodic_ud)) {
  420. if (bk->subr->periodic) {
  421. bk->subr->periodic (bk, bk->subr_ud);
  422. }
  423. }
  424. }
  425. else {
  426. if (bk->subr->periodic) {
  427. bk->subr->periodic (bk, bk->subr_ud);
  428. }
  429. }
  430. }
  431. static void
  432. rspamd_fuzzy_backend_periodic_cb (gint fd, short what, void *ud)
  433. {
  434. struct rspamd_fuzzy_backend *bk = ud;
  435. gdouble jittered;
  436. struct timeval tv;
  437. jittered = rspamd_time_jitter (bk->sync, bk->sync / 2.0);
  438. double_to_tv (jittered, &tv);
  439. event_del (&bk->periodic_event);
  440. rspamd_fuzzy_backend_periodic_sync (bk);
  441. event_add (&bk->periodic_event, &tv);
  442. }
  443. void
  444. rspamd_fuzzy_backend_start_update (struct rspamd_fuzzy_backend *bk,
  445. gdouble timeout,
  446. rspamd_fuzzy_periodic_cb cb,
  447. void *ud)
  448. {
  449. gdouble jittered;
  450. struct timeval tv;
  451. g_assert (bk != NULL);
  452. if (bk->subr->periodic) {
  453. if (bk->sync > 0.0) {
  454. event_del (&bk->periodic_event);
  455. }
  456. if (cb) {
  457. bk->periodic_cb = cb;
  458. bk->periodic_ud = ud;
  459. }
  460. rspamd_fuzzy_backend_periodic_sync (bk);
  461. bk->sync = timeout;
  462. jittered = rspamd_time_jitter (timeout, timeout / 2.0);
  463. double_to_tv (jittered, &tv);
  464. event_set (&bk->periodic_event, -1, EV_TIMEOUT,
  465. rspamd_fuzzy_backend_periodic_cb, bk);
  466. event_base_set (bk->ev_base, &bk->periodic_event);
  467. event_add (&bk->periodic_event, &tv);
  468. }
  469. }
  470. void
  471. rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *bk)
  472. {
  473. g_assert (bk != NULL);
  474. if (bk->sync > 0.0) {
  475. rspamd_fuzzy_backend_periodic_sync (bk);
  476. event_del (&bk->periodic_event);
  477. }
  478. bk->subr->close (bk, bk->subr_ud);
  479. g_free (bk);
  480. }
  481. struct event_base*
  482. rspamd_fuzzy_backend_event_base (struct rspamd_fuzzy_backend *backend)
  483. {
  484. return backend->ev_base;
  485. }
  486. gdouble
  487. rspamd_fuzzy_backend_get_expire (struct rspamd_fuzzy_backend *backend)
  488. {
  489. return backend->expire;
  490. }