You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_backend.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "fuzzy_backend.h"
  18. #include "fuzzy_backend_sqlite.h"
  19. #include "fuzzy_backend_redis.h"
  20. #include "cfg_file.h"
  21. #include "fuzzy_wire.h"
  22. #define DEFAULT_EXPIRE 172800L
  23. enum rspamd_fuzzy_backend_type {
  24. RSPAMD_FUZZY_BACKEND_SQLITE = 0,
  25. RSPAMD_FUZZY_BACKEND_REDIS = 1,
  26. };
  27. static void *rspamd_fuzzy_backend_init_sqlite(struct rspamd_fuzzy_backend *bk,
  28. const ucl_object_t *obj, struct rspamd_config *cfg, GError **err);
  29. static void rspamd_fuzzy_backend_check_sqlite(struct rspamd_fuzzy_backend *bk,
  30. const struct rspamd_fuzzy_cmd *cmd,
  31. rspamd_fuzzy_check_cb cb, void *ud,
  32. void *subr_ud);
  33. static void rspamd_fuzzy_backend_update_sqlite(struct rspamd_fuzzy_backend *bk,
  34. GArray *updates, const char *src,
  35. rspamd_fuzzy_update_cb cb, void *ud,
  36. void *subr_ud);
  37. static void rspamd_fuzzy_backend_count_sqlite(struct rspamd_fuzzy_backend *bk,
  38. rspamd_fuzzy_count_cb cb, void *ud,
  39. void *subr_ud);
  40. static void rspamd_fuzzy_backend_version_sqlite(struct rspamd_fuzzy_backend *bk,
  41. const char *src,
  42. rspamd_fuzzy_version_cb cb, void *ud,
  43. void *subr_ud);
  44. static const char *rspamd_fuzzy_backend_id_sqlite(struct rspamd_fuzzy_backend *bk,
  45. void *subr_ud);
  46. static void rspamd_fuzzy_backend_expire_sqlite(struct rspamd_fuzzy_backend *bk,
  47. void *subr_ud);
  48. static void rspamd_fuzzy_backend_close_sqlite(struct rspamd_fuzzy_backend *bk,
  49. void *subr_ud);
  50. struct rspamd_fuzzy_backend_subr {
  51. void *(*init)(struct rspamd_fuzzy_backend *bk, const ucl_object_t *obj,
  52. struct rspamd_config *cfg,
  53. GError **err);
  54. void (*check)(struct rspamd_fuzzy_backend *bk,
  55. const struct rspamd_fuzzy_cmd *cmd,
  56. rspamd_fuzzy_check_cb cb, void *ud,
  57. void *subr_ud);
  58. void (*update)(struct rspamd_fuzzy_backend *bk,
  59. GArray *updates, const char *src,
  60. rspamd_fuzzy_update_cb cb, void *ud,
  61. void *subr_ud);
  62. void (*count)(struct rspamd_fuzzy_backend *bk,
  63. rspamd_fuzzy_count_cb cb, void *ud,
  64. void *subr_ud);
  65. void (*version)(struct rspamd_fuzzy_backend *bk,
  66. const char *src,
  67. rspamd_fuzzy_version_cb cb, void *ud,
  68. void *subr_ud);
  69. const char *(*id)(struct rspamd_fuzzy_backend *bk, void *subr_ud);
  70. void (*periodic)(struct rspamd_fuzzy_backend *bk, void *subr_ud);
  71. void (*close)(struct rspamd_fuzzy_backend *bk, void *subr_ud);
  72. };
  73. static const struct rspamd_fuzzy_backend_subr fuzzy_subrs[] = {
  74. [RSPAMD_FUZZY_BACKEND_SQLITE] = {
  75. .init = rspamd_fuzzy_backend_init_sqlite,
  76. .check = rspamd_fuzzy_backend_check_sqlite,
  77. .update = rspamd_fuzzy_backend_update_sqlite,
  78. .count = rspamd_fuzzy_backend_count_sqlite,
  79. .version = rspamd_fuzzy_backend_version_sqlite,
  80. .id = rspamd_fuzzy_backend_id_sqlite,
  81. .periodic = rspamd_fuzzy_backend_expire_sqlite,
  82. .close = rspamd_fuzzy_backend_close_sqlite,
  83. },
  84. [RSPAMD_FUZZY_BACKEND_REDIS] = {
  85. .init = rspamd_fuzzy_backend_init_redis,
  86. .check = rspamd_fuzzy_backend_check_redis,
  87. .update = rspamd_fuzzy_backend_update_redis,
  88. .count = rspamd_fuzzy_backend_count_redis,
  89. .version = rspamd_fuzzy_backend_version_redis,
  90. .id = rspamd_fuzzy_backend_id_redis,
  91. .periodic = rspamd_fuzzy_backend_expire_redis,
  92. .close = rspamd_fuzzy_backend_close_redis,
  93. }};
  94. struct rspamd_fuzzy_backend {
  95. enum rspamd_fuzzy_backend_type type;
  96. double expire;
  97. double sync;
  98. struct ev_loop *event_loop;
  99. rspamd_fuzzy_periodic_cb periodic_cb;
  100. void *periodic_ud;
  101. const struct rspamd_fuzzy_backend_subr *subr;
  102. void *subr_ud;
  103. ev_timer periodic_event;
  104. };
  105. static GQuark
  106. rspamd_fuzzy_backend_quark(void)
  107. {
  108. return g_quark_from_static_string("fuzzy-backend");
  109. }
  110. static void *
  111. rspamd_fuzzy_backend_init_sqlite(struct rspamd_fuzzy_backend *bk,
  112. const ucl_object_t *obj, struct rspamd_config *cfg, GError **err)
  113. {
  114. const ucl_object_t *elt;
  115. elt = ucl_object_lookup_any(obj, "hashfile", "hash_file", "file",
  116. "database", NULL);
  117. if (elt == NULL || ucl_object_type(elt) != UCL_STRING) {
  118. g_set_error(err, rspamd_fuzzy_backend_quark(),
  119. EINVAL, "missing sqlite3 path");
  120. return NULL;
  121. }
  122. return rspamd_fuzzy_backend_sqlite_open(ucl_object_tostring(elt),
  123. FALSE, err);
  124. }
  125. static void
  126. rspamd_fuzzy_backend_check_sqlite(struct rspamd_fuzzy_backend *bk,
  127. const struct rspamd_fuzzy_cmd *cmd,
  128. rspamd_fuzzy_check_cb cb, void *ud,
  129. void *subr_ud)
  130. {
  131. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  132. struct rspamd_fuzzy_reply rep;
  133. rep = rspamd_fuzzy_backend_sqlite_check(sq, cmd, bk->expire);
  134. if (cb) {
  135. cb(&rep, ud);
  136. }
  137. }
  138. static void
  139. rspamd_fuzzy_backend_update_sqlite(struct rspamd_fuzzy_backend *bk,
  140. GArray *updates, const char *src,
  141. rspamd_fuzzy_update_cb cb, void *ud,
  142. void *subr_ud)
  143. {
  144. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  145. gboolean success = FALSE;
  146. unsigned int i;
  147. struct fuzzy_peer_cmd *io_cmd;
  148. struct rspamd_fuzzy_cmd *cmd;
  149. gpointer ptr;
  150. unsigned int nupdates = 0, nadded = 0, ndeleted = 0, nextended = 0, nignored = 0;
  151. if (rspamd_fuzzy_backend_sqlite_prepare_update(sq, src)) {
  152. for (i = 0; i < updates->len; i++) {
  153. io_cmd = &g_array_index(updates, struct fuzzy_peer_cmd, i);
  154. if (io_cmd->is_shingle) {
  155. cmd = &io_cmd->cmd.shingle.basic;
  156. ptr = &io_cmd->cmd.shingle;
  157. }
  158. else {
  159. cmd = &io_cmd->cmd.normal;
  160. ptr = &io_cmd->cmd.normal;
  161. }
  162. if (cmd->cmd == FUZZY_WRITE) {
  163. rspamd_fuzzy_backend_sqlite_add(sq, ptr);
  164. nadded++;
  165. nupdates++;
  166. }
  167. else if (cmd->cmd == FUZZY_DEL) {
  168. rspamd_fuzzy_backend_sqlite_del(sq, ptr);
  169. ndeleted++;
  170. nupdates++;
  171. }
  172. else {
  173. if (cmd->cmd == FUZZY_REFRESH) {
  174. nextended++;
  175. }
  176. else {
  177. nignored++;
  178. }
  179. }
  180. }
  181. if (rspamd_fuzzy_backend_sqlite_finish_update(sq, src,
  182. nupdates > 0)) {
  183. success = TRUE;
  184. }
  185. }
  186. if (cb) {
  187. cb(success, nadded, ndeleted, nextended, nignored, ud);
  188. }
  189. }
  190. static void
  191. rspamd_fuzzy_backend_count_sqlite(struct rspamd_fuzzy_backend *bk,
  192. rspamd_fuzzy_count_cb cb, void *ud,
  193. void *subr_ud)
  194. {
  195. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  196. uint64_t nhashes;
  197. nhashes = rspamd_fuzzy_backend_sqlite_count(sq);
  198. if (cb) {
  199. cb(nhashes, ud);
  200. }
  201. }
  202. static void
  203. rspamd_fuzzy_backend_version_sqlite(struct rspamd_fuzzy_backend *bk,
  204. const char *src,
  205. rspamd_fuzzy_version_cb cb, void *ud,
  206. void *subr_ud)
  207. {
  208. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  209. uint64_t rev;
  210. rev = rspamd_fuzzy_backend_sqlite_version(sq, src);
  211. if (cb) {
  212. cb(rev, ud);
  213. }
  214. }
  215. static const char *
  216. rspamd_fuzzy_backend_id_sqlite(struct rspamd_fuzzy_backend *bk,
  217. void *subr_ud)
  218. {
  219. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  220. return rspamd_fuzzy_sqlite_backend_id(sq);
  221. }
  222. static void
  223. rspamd_fuzzy_backend_expire_sqlite(struct rspamd_fuzzy_backend *bk,
  224. void *subr_ud)
  225. {
  226. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  227. rspamd_fuzzy_backend_sqlite_sync(sq, bk->expire, TRUE);
  228. }
  229. static void
  230. rspamd_fuzzy_backend_close_sqlite(struct rspamd_fuzzy_backend *bk,
  231. void *subr_ud)
  232. {
  233. struct rspamd_fuzzy_backend_sqlite *sq = subr_ud;
  234. rspamd_fuzzy_backend_sqlite_close(sq);
  235. }
  236. struct rspamd_fuzzy_backend *
  237. rspamd_fuzzy_backend_create(struct ev_loop *ev_base,
  238. const ucl_object_t *config,
  239. struct rspamd_config *cfg,
  240. GError **err)
  241. {
  242. struct rspamd_fuzzy_backend *bk;
  243. enum rspamd_fuzzy_backend_type type = RSPAMD_FUZZY_BACKEND_SQLITE;
  244. const ucl_object_t *elt;
  245. double expire = DEFAULT_EXPIRE;
  246. if (config != NULL) {
  247. elt = ucl_object_lookup(config, "backend");
  248. if (elt != NULL && ucl_object_type(elt) == UCL_STRING) {
  249. if (strcmp(ucl_object_tostring(elt), "sqlite") == 0) {
  250. type = RSPAMD_FUZZY_BACKEND_SQLITE;
  251. }
  252. else if (strcmp(ucl_object_tostring(elt), "redis") == 0) {
  253. type = RSPAMD_FUZZY_BACKEND_REDIS;
  254. }
  255. else {
  256. g_set_error(err, rspamd_fuzzy_backend_quark(),
  257. EINVAL, "invalid backend type: %s",
  258. ucl_object_tostring(elt));
  259. return NULL;
  260. }
  261. }
  262. elt = ucl_object_lookup(config, "expire");
  263. if (elt != NULL) {
  264. expire = ucl_object_todouble(elt);
  265. }
  266. }
  267. bk = g_malloc0(sizeof(*bk));
  268. bk->event_loop = ev_base;
  269. bk->expire = expire;
  270. bk->type = type;
  271. bk->subr = &fuzzy_subrs[type];
  272. if ((bk->subr_ud = bk->subr->init(bk, config, cfg, err)) == NULL) {
  273. g_free(bk);
  274. return NULL;
  275. }
  276. return bk;
  277. }
  278. void rspamd_fuzzy_backend_check(struct rspamd_fuzzy_backend *bk,
  279. const struct rspamd_fuzzy_cmd *cmd,
  280. rspamd_fuzzy_check_cb cb, void *ud)
  281. {
  282. g_assert(bk != NULL);
  283. bk->subr->check(bk, cmd, cb, ud, bk->subr_ud);
  284. }
  285. static unsigned int
  286. rspamd_fuzzy_digest_hash(gconstpointer key)
  287. {
  288. unsigned int ret;
  289. /* Distributed uniformly already */
  290. memcpy(&ret, key, sizeof(ret));
  291. return ret;
  292. }
  293. static gboolean
  294. rspamd_fuzzy_digest_equal(gconstpointer v, gconstpointer v2)
  295. {
  296. return memcmp(v, v2, rspamd_cryptobox_HASHBYTES) == 0;
  297. }
  298. static void
  299. rspamd_fuzzy_backend_deduplicate_queue(GArray *updates)
  300. {
  301. GHashTable *seen = g_hash_table_new(rspamd_fuzzy_digest_hash,
  302. rspamd_fuzzy_digest_equal);
  303. struct fuzzy_peer_cmd *io_cmd, *found;
  304. struct rspamd_fuzzy_cmd *cmd;
  305. unsigned char *digest;
  306. unsigned int i;
  307. for (i = 0; i < updates->len; i++) {
  308. io_cmd = &g_array_index(updates, struct fuzzy_peer_cmd, i);
  309. if (io_cmd->is_shingle) {
  310. cmd = &io_cmd->cmd.shingle.basic;
  311. }
  312. else {
  313. cmd = &io_cmd->cmd.normal;
  314. }
  315. digest = cmd->digest;
  316. found = g_hash_table_lookup(seen, digest);
  317. if (found == NULL) {
  318. /* Add to the seen list, if not a duplicate (huh?) */
  319. if (cmd->cmd != FUZZY_DUP) {
  320. g_hash_table_insert(seen, digest, io_cmd);
  321. }
  322. }
  323. else {
  324. if (found->cmd.normal.flag != cmd->flag) {
  325. /* TODO: deal with flags better at some point */
  326. continue;
  327. }
  328. /* Apply heuristic */
  329. switch (cmd->cmd) {
  330. case FUZZY_WRITE:
  331. if (found->cmd.normal.cmd == FUZZY_WRITE) {
  332. /* Already seen */
  333. found->cmd.normal.value += cmd->value;
  334. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  335. }
  336. else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
  337. /* Seen refresh command, remove it as write has higher priority */
  338. g_hash_table_replace(seen, digest, io_cmd);
  339. found->cmd.normal.cmd = FUZZY_DUP;
  340. }
  341. else if (found->cmd.normal.cmd == FUZZY_DEL) {
  342. /* Request delete + add, weird, but ignore add */
  343. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  344. }
  345. break;
  346. case FUZZY_REFRESH:
  347. if (found->cmd.normal.cmd == FUZZY_WRITE) {
  348. /* No need to expire, handled by addition */
  349. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  350. }
  351. else if (found->cmd.normal.cmd == FUZZY_DEL) {
  352. /* Request delete + expire, ignore expire */
  353. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  354. }
  355. else if (found->cmd.normal.cmd == FUZZY_REFRESH) {
  356. /* Already handled */
  357. cmd->cmd = FUZZY_DUP; /* Ignore this one */
  358. }
  359. break;
  360. case FUZZY_DEL:
  361. /* Delete has priority over all other commands */
  362. g_hash_table_replace(seen, digest, io_cmd);
  363. found->cmd.normal.cmd = FUZZY_DUP;
  364. break;
  365. default:
  366. break;
  367. }
  368. }
  369. }
  370. g_hash_table_unref(seen);
  371. }
  372. void rspamd_fuzzy_backend_process_updates(struct rspamd_fuzzy_backend *bk,
  373. GArray *updates, const char *src, rspamd_fuzzy_update_cb cb,
  374. void *ud)
  375. {
  376. g_assert(bk != NULL);
  377. g_assert(updates != NULL);
  378. if (updates) {
  379. rspamd_fuzzy_backend_deduplicate_queue(updates);
  380. bk->subr->update(bk, updates, src, cb, ud, bk->subr_ud);
  381. }
  382. else if (cb) {
  383. cb(TRUE, 0, 0, 0, 0, ud);
  384. }
  385. }
  386. void rspamd_fuzzy_backend_count(struct rspamd_fuzzy_backend *bk,
  387. rspamd_fuzzy_count_cb cb, void *ud)
  388. {
  389. g_assert(bk != NULL);
  390. bk->subr->count(bk, cb, ud, bk->subr_ud);
  391. }
  392. void rspamd_fuzzy_backend_version(struct rspamd_fuzzy_backend *bk,
  393. const char *src,
  394. rspamd_fuzzy_version_cb cb, void *ud)
  395. {
  396. g_assert(bk != NULL);
  397. bk->subr->version(bk, src, cb, ud, bk->subr_ud);
  398. }
  399. const char *
  400. rspamd_fuzzy_backend_id(struct rspamd_fuzzy_backend *bk)
  401. {
  402. g_assert(bk != NULL);
  403. if (bk->subr->id) {
  404. return bk->subr->id(bk, bk->subr_ud);
  405. }
  406. return NULL;
  407. }
  408. static inline void
  409. rspamd_fuzzy_backend_periodic_sync(struct rspamd_fuzzy_backend *bk)
  410. {
  411. if (bk->periodic_cb) {
  412. if (bk->periodic_cb(bk->periodic_ud)) {
  413. if (bk->subr->periodic) {
  414. bk->subr->periodic(bk, bk->subr_ud);
  415. }
  416. }
  417. }
  418. else {
  419. if (bk->subr->periodic) {
  420. bk->subr->periodic(bk, bk->subr_ud);
  421. }
  422. }
  423. }
  424. static void
  425. rspamd_fuzzy_backend_periodic_cb(EV_P_ ev_timer *w, int revents)
  426. {
  427. struct rspamd_fuzzy_backend *bk = (struct rspamd_fuzzy_backend *) w->data;
  428. double jittered;
  429. jittered = rspamd_time_jitter(bk->sync, bk->sync / 2.0);
  430. w->repeat = jittered;
  431. rspamd_fuzzy_backend_periodic_sync(bk);
  432. ev_timer_again(EV_A_ w);
  433. }
  434. void rspamd_fuzzy_backend_start_update(struct rspamd_fuzzy_backend *bk,
  435. double timeout,
  436. rspamd_fuzzy_periodic_cb cb,
  437. void *ud)
  438. {
  439. double jittered;
  440. g_assert(bk != NULL);
  441. if (bk->subr->periodic) {
  442. if (bk->sync > 0.0) {
  443. ev_timer_stop(bk->event_loop, &bk->periodic_event);
  444. }
  445. if (cb) {
  446. bk->periodic_cb = cb;
  447. bk->periodic_ud = ud;
  448. }
  449. rspamd_fuzzy_backend_periodic_sync(bk);
  450. bk->sync = timeout;
  451. jittered = rspamd_time_jitter(timeout, timeout / 2.0);
  452. bk->periodic_event.data = bk;
  453. ev_timer_init(&bk->periodic_event, rspamd_fuzzy_backend_periodic_cb,
  454. jittered, 0.0);
  455. ev_timer_start(bk->event_loop, &bk->periodic_event);
  456. }
  457. }
  458. void rspamd_fuzzy_backend_close(struct rspamd_fuzzy_backend *bk)
  459. {
  460. g_assert(bk != NULL);
  461. if (bk->sync > 0.0) {
  462. rspamd_fuzzy_backend_periodic_sync(bk);
  463. ev_timer_stop(bk->event_loop, &bk->periodic_event);
  464. }
  465. bk->subr->close(bk, bk->subr_ud);
  466. g_free(bk);
  467. }
  468. struct ev_loop *
  469. rspamd_fuzzy_backend_event_base(struct rspamd_fuzzy_backend *backend)
  470. {
  471. return backend->event_loop;
  472. }
  473. double
  474. rspamd_fuzzy_backend_get_expire(struct rspamd_fuzzy_backend *backend)
  475. {
  476. return backend->expire;
  477. }