You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

redis_cache.c 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "learn_cache.h"
  18. #include "rspamd.h"
  19. #include "stat_api.h"
  20. #include "stat_internal.h"
  21. #include "cryptobox.h"
  22. #include "ucl.h"
  23. #include "hiredis.h"
  24. #include "adapters/libevent.h"
  25. #define REDIS_DEFAULT_TIMEOUT 0.5
  26. #define REDIS_STAT_TIMEOUT 30
  27. #define REDIS_DEFAULT_PORT 6379
  28. #define DEFAULT_REDIS_KEY "learned_ids"
  29. struct rspamd_redis_cache_ctx {
  30. struct rspamd_statfile_config *stcf;
  31. struct upstream_list *read_servers;
  32. struct upstream_list *write_servers;
  33. const gchar *redis_object;
  34. gdouble timeout;
  35. };
  36. struct rspamd_redis_cache_runtime {
  37. struct rspamd_redis_cache_ctx *ctx;
  38. struct rspamd_task *task;
  39. struct upstream *selected;
  40. struct event timeout_event;
  41. redisAsyncContext *redis;
  42. };
  43. static GQuark
  44. rspamd_stat_cache_redis_quark (void)
  45. {
  46. return g_quark_from_static_string ("redis-statistics");
  47. }
  48. /* Called on connection termination */
  49. static void
  50. rspamd_redis_cache_fin (gpointer data)
  51. {
  52. struct rspamd_redis_cache_runtime *rt = data;
  53. event_del (&rt->timeout_event);
  54. redisAsyncFree (rt->redis);
  55. }
  56. static void
  57. rspamd_redis_cache_timeout (gint fd, short what, gpointer d)
  58. {
  59. struct rspamd_redis_cache_runtime *rt = d;
  60. struct rspamd_task *task;
  61. task = rt->task;
  62. msg_err_task ("connection to redis server %s timed out",
  63. rspamd_upstream_name (rt->selected));
  64. rspamd_upstream_fail (rt->selected);
  65. rspamd_session_remove_event (task->s, rspamd_redis_cache_fin, d);
  66. }
  67. /* Called when we have checked the specified message id */
  68. static void
  69. rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv)
  70. {
  71. struct rspamd_redis_cache_runtime *rt = priv;
  72. redisReply *reply = r;
  73. struct rspamd_task *task;
  74. glong val = 0;
  75. task = rt->task;
  76. if (c->err == 0) {
  77. if (G_LIKELY (reply->type == REDIS_REPLY_INTEGER)) {
  78. val = reply->integer;
  79. }
  80. else if (reply->type == REDIS_REPLY_STRING) {
  81. rspamd_strtol (reply->str, reply->len, &val);
  82. }
  83. else {
  84. if (reply->type != REDIS_REPLY_NIL) {
  85. msg_err_task ("bad learned type for %s: %d",
  86. rt->ctx->stcf->symbol, reply->type);
  87. }
  88. val = 0;
  89. }
  90. if ((val > 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM)) ||
  91. (val < 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM))) {
  92. /* Already learned */
  93. g_set_error (&task->err, rspamd_stat_quark (), 404,
  94. "<%s> has been already "
  95. "learned as %s, ignore it", task->message_id,
  96. (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? "spam" : "ham");
  97. task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED;
  98. }
  99. else if (val != 0) {
  100. /* Unlearn flag */
  101. task->flags |= RSPAMD_TASK_FLAG_UNLEARN;
  102. }
  103. rspamd_upstream_ok (rt->selected);
  104. }
  105. else {
  106. rspamd_upstream_fail (rt->selected);
  107. }
  108. rspamd_session_remove_event (task->s, rspamd_redis_cache_fin, rt);
  109. }
  110. /* Called when we have learned the specified message id */
  111. static void
  112. rspamd_stat_cache_redis_set (redisAsyncContext *c, gpointer r, gpointer priv)
  113. {
  114. struct rspamd_redis_cache_runtime *rt = priv;
  115. struct rspamd_task *task;
  116. task = rt->task;
  117. if (c->err == 0) {
  118. /* XXX: we ignore results here */
  119. rspamd_upstream_ok (rt->selected);
  120. }
  121. else {
  122. rspamd_upstream_fail (rt->selected);
  123. }
  124. rspamd_session_remove_event (task->s, rspamd_redis_cache_fin, rt);
  125. }
  126. static void
  127. rspamd_stat_cache_redis_generate_id (struct rspamd_task *task)
  128. {
  129. rspamd_cryptobox_hash_state_t st;
  130. rspamd_token_t *tok;
  131. guint i;
  132. guchar out[rspamd_cryptobox_HASHBYTES];
  133. gchar *b32out;
  134. gchar *user = NULL;
  135. rspamd_cryptobox_hash_init (&st, NULL, 0);
  136. user = rspamd_mempool_get_variable (task->task_pool, "stat_user");
  137. /* Use dedicated hash space for per users cache */
  138. if (user != NULL) {
  139. rspamd_cryptobox_hash_update (&st, user, strlen (user));
  140. }
  141. for (i = 0; i < task->tokens->len; i ++) {
  142. tok = g_ptr_array_index (task->tokens, i);
  143. rspamd_cryptobox_hash_update (&st, tok->data, tok->datalen);
  144. }
  145. rspamd_cryptobox_hash_final (&st, out);
  146. b32out = rspamd_encode_base32 (out, sizeof (out));
  147. g_assert (b32out != NULL);
  148. rspamd_mempool_set_variable (task->task_pool, "words_hash", b32out, g_free);
  149. }
  150. gpointer
  151. rspamd_stat_cache_redis_init (struct rspamd_stat_ctx *ctx,
  152. struct rspamd_config *cfg,
  153. struct rspamd_statfile *st,
  154. const ucl_object_t *cf)
  155. {
  156. struct rspamd_redis_cache_ctx *cache_ctx;
  157. struct rspamd_statfile_config *stf = st->stcf;
  158. const ucl_object_t *elt, *relt;
  159. cache_ctx = g_slice_alloc0 (sizeof (*cache_ctx));
  160. elt = ucl_object_find_any_key (stf->opts, "read_servers", "servers", NULL);
  161. if (elt == NULL) {
  162. if (st->classifier->cfg->opts) {
  163. elt = ucl_object_find_any_key (st->classifier->cfg->opts,
  164. "read_servers", "servers", NULL);
  165. }
  166. if (elt == NULL) {
  167. msg_err ("statfile %s has no redis servers needed by cache", stf->symbol);
  168. return NULL;
  169. }
  170. }
  171. relt = elt;
  172. cache_ctx->read_servers = rspamd_upstreams_create (cfg->ups_ctx);
  173. if (!rspamd_upstreams_from_ucl (cache_ctx->read_servers, elt,
  174. REDIS_DEFAULT_PORT, NULL)) {
  175. msg_err ("statfile %s cannot get read servers configuration for the cache",
  176. stf->symbol);
  177. return NULL;
  178. }
  179. elt = ucl_object_find_key (stf->opts, "write_servers");
  180. if (elt == NULL) {
  181. /* Use read servers as write ones */
  182. g_assert (relt != NULL);
  183. cache_ctx->write_servers = rspamd_upstreams_create (cfg->ups_ctx);
  184. if (!rspamd_upstreams_from_ucl (cache_ctx->write_servers, relt,
  185. REDIS_DEFAULT_PORT, NULL)) {
  186. msg_err ("statfile %s cannot get write servers configuration for the cache",
  187. stf->symbol);
  188. return NULL;
  189. }
  190. }
  191. else {
  192. cache_ctx->write_servers = rspamd_upstreams_create (cfg->ups_ctx);
  193. if (!rspamd_upstreams_from_ucl (cache_ctx->write_servers, elt,
  194. REDIS_DEFAULT_PORT, NULL)) {
  195. msg_err ("statfile %s cannot get write servers configuration for the cache",
  196. stf->symbol);
  197. rspamd_upstreams_destroy (cache_ctx->write_servers);
  198. cache_ctx->write_servers = NULL;
  199. }
  200. }
  201. elt = ucl_object_find_key (stf->opts, "key");
  202. if (elt == NULL || ucl_object_type (elt) != UCL_STRING) {
  203. cache_ctx->redis_object = DEFAULT_REDIS_KEY;
  204. }
  205. else {
  206. cache_ctx->redis_object = ucl_object_tostring (elt);
  207. }
  208. elt = ucl_object_find_key (stf->opts, "timeout");
  209. if (elt) {
  210. cache_ctx->timeout = ucl_object_todouble (elt);
  211. }
  212. else {
  213. cache_ctx->timeout = REDIS_DEFAULT_TIMEOUT;
  214. }
  215. cache_ctx->stcf = stf;
  216. return (gpointer)cache_ctx;
  217. }
  218. gpointer
  219. rspamd_stat_cache_redis_runtime (struct rspamd_task *task,
  220. gpointer c, gboolean learn)
  221. {
  222. struct rspamd_redis_cache_ctx *ctx = c;
  223. struct rspamd_redis_cache_runtime *rt;
  224. struct upstream *up;
  225. rspamd_inet_addr_t *addr;
  226. g_assert (ctx != NULL);
  227. if (learn && ctx->write_servers == NULL) {
  228. msg_err_task ("no write servers defined for %s, cannot learn",
  229. ctx->stcf->symbol);
  230. return NULL;
  231. }
  232. if (learn) {
  233. up = rspamd_upstream_get (ctx->write_servers,
  234. RSPAMD_UPSTREAM_MASTER_SLAVE,
  235. NULL,
  236. 0);
  237. }
  238. else {
  239. up = rspamd_upstream_get (ctx->read_servers,
  240. RSPAMD_UPSTREAM_ROUND_ROBIN,
  241. NULL,
  242. 0);
  243. }
  244. if (up == NULL) {
  245. msg_err_task ("no upstreams reachable");
  246. return NULL;
  247. }
  248. rt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*rt));
  249. rt->selected = up;
  250. rt->task = task;
  251. rt->ctx = ctx;
  252. addr = rspamd_upstream_addr (up);
  253. g_assert (addr != NULL);
  254. rt->redis = redisAsyncConnect (rspamd_inet_address_to_string (addr),
  255. rspamd_inet_address_get_port (addr));
  256. g_assert (rt->redis != NULL);
  257. redisLibeventAttach (rt->redis, task->ev_base);
  258. /* Now check stats */
  259. event_set (&rt->timeout_event, -1, EV_TIMEOUT, rspamd_redis_cache_timeout, rt);
  260. event_base_set (task->ev_base, &rt->timeout_event);
  261. if (!learn) {
  262. rspamd_stat_cache_redis_generate_id (task);
  263. }
  264. return rt;
  265. }
  266. gint
  267. rspamd_stat_cache_redis_check (struct rspamd_task *task,
  268. gboolean is_spam,
  269. gpointer runtime)
  270. {
  271. struct rspamd_redis_cache_runtime *rt = runtime;
  272. struct timeval tv;
  273. gchar *h;
  274. h = rspamd_mempool_get_variable (task->task_pool, "words_hash");
  275. g_assert (h != NULL);
  276. double_to_tv (rt->ctx->timeout, &tv);
  277. if (redisAsyncCommand (rt->redis, rspamd_stat_cache_redis_get, rt,
  278. "HGET %s %s",
  279. rt->ctx->redis_object, h) == REDIS_OK) {
  280. rspamd_session_add_event (task->s, rspamd_redis_cache_fin, rt,
  281. rspamd_stat_cache_redis_quark ());
  282. event_add (&rt->timeout_event, &tv);
  283. }
  284. /* We need to return OK every time */
  285. return RSPAMD_LEARN_OK;
  286. }
  287. gint
  288. rspamd_stat_cache_redis_learn (struct rspamd_task *task,
  289. gboolean is_spam,
  290. gpointer runtime)
  291. {
  292. struct rspamd_redis_cache_runtime *rt = runtime;
  293. struct timeval tv;
  294. gchar *h;
  295. gint flag;
  296. h = rspamd_mempool_get_variable (task->task_pool, "words_hash");
  297. g_assert (h != NULL);
  298. double_to_tv (rt->ctx->timeout, &tv);
  299. flag = (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? 1 : -1;
  300. if (redisAsyncCommand (rt->redis, rspamd_stat_cache_redis_set, rt,
  301. "HSET %s %s %d",
  302. rt->ctx->redis_object, h, flag) == REDIS_OK) {
  303. rspamd_session_add_event (task->s, rspamd_redis_cache_fin, rt,
  304. rspamd_stat_cache_redis_quark ());
  305. event_add (&rt->timeout_event, &tv);
  306. }
  307. /* We need to return OK every time */
  308. return RSPAMD_LEARN_OK;
  309. }
  310. void
  311. rspamd_stat_cache_redis_close (gpointer c)
  312. {
  313. }