You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

stat_config.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "stat_api.h"
  18. #include "rspamd.h"
  19. #include "cfg_rcl.h"
  20. #include "stat_internal.h"
  21. static struct rspamd_stat_ctx *stat_ctx = NULL;
  22. static struct rspamd_stat_classifier stat_classifiers[] = {
  23. {
  24. .name = "bayes",
  25. .init_func = bayes_init,
  26. .classify_func = bayes_classify,
  27. .learn_spam_func = bayes_learn_spam,
  28. }
  29. };
  30. static struct rspamd_stat_tokenizer stat_tokenizers[] = {
  31. {
  32. .name = "osb-text",
  33. .get_config = rspamd_tokenizer_osb_get_config,
  34. .tokenize_func = rspamd_tokenizer_osb,
  35. },
  36. {
  37. .name = "osb",
  38. .get_config = rspamd_tokenizer_osb_get_config,
  39. .tokenize_func = rspamd_tokenizer_osb,
  40. },
  41. };
  42. #define RSPAMD_STAT_BACKEND_ELT(nam, eltn) { \
  43. .name = #nam, \
  44. .init = rspamd_##eltn##_init, \
  45. .runtime = rspamd_##eltn##_runtime, \
  46. .process_tokens = rspamd_##eltn##_process_tokens, \
  47. .finalize_process = rspamd_##eltn##_finalize_process, \
  48. .learn_tokens = rspamd_##eltn##_learn_tokens, \
  49. .finalize_learn = rspamd_##eltn##_finalize_learn, \
  50. .total_learns = rspamd_##eltn##_total_learns, \
  51. .inc_learns = rspamd_##eltn##_inc_learns, \
  52. .dec_learns = rspamd_##eltn##_dec_learns, \
  53. .get_stat = rspamd_##eltn##_get_stat, \
  54. .load_tokenizer_config = rspamd_##eltn##_load_tokenizer_config, \
  55. .close = rspamd_##eltn##_close \
  56. }
  57. static struct rspamd_stat_backend stat_backends[] = {
  58. RSPAMD_STAT_BACKEND_ELT(mmap, mmaped_file),
  59. RSPAMD_STAT_BACKEND_ELT(sqlite3, sqlite3),
  60. #ifdef WITH_HIREDIS
  61. RSPAMD_STAT_BACKEND_ELT(redis, redis)
  62. #endif
  63. };
  64. #define RSPAMD_STAT_CACHE_ELT(nam, eltn) { \
  65. .name = #nam, \
  66. .init = rspamd_stat_cache_##eltn##_init, \
  67. .runtime = rspamd_stat_cache_##eltn##_runtime, \
  68. .check = rspamd_stat_cache_##eltn##_check, \
  69. .learn = rspamd_stat_cache_##eltn##_learn, \
  70. .close = rspamd_stat_cache_##eltn##_close \
  71. }
  72. static struct rspamd_stat_cache stat_caches[] = {
  73. RSPAMD_STAT_CACHE_ELT(sqlite3, sqlite3),
  74. #ifdef WITH_HIREDIS
  75. RSPAMD_STAT_CACHE_ELT(redis, redis),
  76. #endif
  77. };
  78. void
  79. rspamd_stat_init (struct rspamd_config *cfg, struct event_base *ev_base)
  80. {
  81. GList *cur, *curst;
  82. struct rspamd_classifier_config *clf;
  83. struct rspamd_statfile_config *stf;
  84. struct rspamd_stat_backend *bk;
  85. struct rspamd_statfile *st;
  86. struct rspamd_classifier *cl;
  87. const ucl_object_t *cache_obj = NULL, *cache_name_obj;
  88. const gchar *cache_name = NULL;
  89. if (stat_ctx == NULL) {
  90. stat_ctx = g_slice_alloc0 (sizeof (*stat_ctx));
  91. }
  92. stat_ctx->backends_subrs = stat_backends;
  93. stat_ctx->backends_count = G_N_ELEMENTS (stat_backends);
  94. stat_ctx->classifiers_subrs = stat_classifiers;
  95. stat_ctx->classifiers_count = G_N_ELEMENTS (stat_classifiers);
  96. stat_ctx->tokenizers_subrs = stat_tokenizers;
  97. stat_ctx->tokenizers_count = G_N_ELEMENTS (stat_tokenizers);
  98. stat_ctx->caches_subrs = stat_caches;
  99. stat_ctx->caches_count = G_N_ELEMENTS (stat_caches);
  100. stat_ctx->cfg = cfg;
  101. stat_ctx->statfiles = g_ptr_array_new ();
  102. stat_ctx->classifiers = g_ptr_array_new ();
  103. stat_ctx->async_elts = g_queue_new ();
  104. stat_ctx->ev_base = ev_base;
  105. REF_RETAIN (stat_ctx->cfg);
  106. /* Create statfiles from the classifiers */
  107. cur = cfg->classifiers;
  108. while (cur) {
  109. clf = cur->data;
  110. bk = rspamd_stat_get_backend (clf->backend);
  111. if (bk == NULL) {
  112. msg_err_config ("cannot get backend of type %s, so disable classifier"
  113. " %s completely", clf->backend, clf->name);
  114. cur = g_list_next (cur);
  115. continue;
  116. }
  117. /* XXX:
  118. * Here we get the first classifier tokenizer config as the only one
  119. * We NO LONGER support multiple tokenizers per rspamd instance
  120. */
  121. if (stat_ctx->tkcf == NULL) {
  122. stat_ctx->tokenizer = rspamd_stat_get_tokenizer (clf->tokenizer->name);
  123. g_assert (stat_ctx->tokenizer != NULL);
  124. stat_ctx->tkcf = stat_ctx->tokenizer->get_config (cfg->cfg_pool,
  125. clf->tokenizer, NULL);
  126. }
  127. cl = g_slice_alloc0 (sizeof (*cl));
  128. cl->cfg = clf;
  129. cl->ctx = stat_ctx;
  130. cl->statfiles_ids = g_array_new (FALSE, FALSE, sizeof (gint));
  131. cl->subrs = rspamd_stat_get_classifier (clf->classifier);
  132. g_assert (cl->subrs != NULL);
  133. cl->subrs->init_func (cfg->cfg_pool, cl);
  134. /* Init classifier cache */
  135. cache_name = NULL;
  136. if (clf->opts) {
  137. cache_obj = ucl_object_find_key (clf->opts, "cache");
  138. cache_name_obj = NULL;
  139. if (cache_obj) {
  140. cache_name_obj = ucl_object_find_any_key (cache_obj,
  141. "name", "type", NULL);
  142. }
  143. if (cache_name_obj) {
  144. cache_name = ucl_object_tostring (cache_name_obj);
  145. }
  146. }
  147. if (cache_name == NULL) {
  148. /* We assume that learn cache is the same as backend */
  149. cache_name = clf->backend;
  150. }
  151. curst = clf->statfiles;
  152. while (curst) {
  153. stf = curst->data;
  154. st = g_slice_alloc0 (sizeof (*st));
  155. st->classifier = cl;
  156. st->stcf = stf;
  157. st->backend = bk;
  158. st->bkcf = bk->init (stat_ctx, cfg, st);
  159. msg_debug_config ("added backend %s for symbol %s",
  160. bk->name, stf->symbol);
  161. /* XXX: bad hack to pass statfiles configuration to cache */
  162. if (cl->cache == NULL) {
  163. cl->cache = rspamd_stat_get_cache (cache_name);
  164. g_assert (cl->cache != NULL);
  165. cl->cachecf = cl->cache->init (stat_ctx, cfg, st, cache_obj);
  166. if (cl->cachecf == NULL) {
  167. msg_err_config ("error adding cache %s for symbol %s",
  168. cl->cache->name, stf->symbol);
  169. cl->cache = NULL;
  170. }
  171. else {
  172. msg_debug_config ("added cache %s for symbol %s",
  173. cl->cache->name, stf->symbol);
  174. }
  175. }
  176. if (st->bkcf == NULL) {
  177. msg_err_config ("cannot init backend %s for statfile %s",
  178. clf->backend, stf->symbol);
  179. g_slice_free1 (sizeof (*st), st);
  180. }
  181. else {
  182. st->id = stat_ctx->statfiles->len;
  183. g_ptr_array_add (stat_ctx->statfiles, st);
  184. g_array_append_val (cl->statfiles_ids, st->id);
  185. }
  186. curst = curst->next;
  187. }
  188. g_ptr_array_add (stat_ctx->classifiers, cl);
  189. cur = cur->next;
  190. }
  191. }
  192. void
  193. rspamd_stat_close (void)
  194. {
  195. struct rspamd_classifier *cl;
  196. struct rspamd_statfile *st;
  197. struct rspamd_stat_ctx *st_ctx;
  198. struct rspamd_stat_async_elt *aelt;
  199. GList *cur;
  200. guint i, j;
  201. gint id;
  202. st_ctx = rspamd_stat_get_ctx ();
  203. g_assert (st_ctx != NULL);
  204. for (i = 0; i < st_ctx->classifiers->len; i ++) {
  205. cl = g_ptr_array_index (st_ctx->classifiers, i);
  206. for (j = 0; j < cl->statfiles_ids->len; j ++) {
  207. id = g_array_index (cl->statfiles_ids, gint, j);
  208. st = g_ptr_array_index (st_ctx->statfiles, id);
  209. st->backend->close (st->bkcf);
  210. g_slice_free1 (sizeof (*st), st);
  211. }
  212. if (cl->cache && cl->cachecf) {
  213. cl->cache->close (cl->cachecf);
  214. }
  215. g_array_free (cl->statfiles_ids, TRUE);
  216. g_slice_free1 (sizeof (*cl), cl);
  217. }
  218. cur = st_ctx->async_elts->head;
  219. while (cur) {
  220. aelt = cur->data;
  221. REF_RELEASE (aelt);
  222. cur = g_list_next (cur);
  223. }
  224. g_queue_free (stat_ctx->async_elts);
  225. g_ptr_array_free (st_ctx->statfiles, TRUE);
  226. g_ptr_array_free (st_ctx->classifiers, TRUE);
  227. REF_RELEASE (stat_ctx->cfg);
  228. g_slice_free1 (sizeof (*st_ctx), st_ctx);
  229. /* Set global var to NULL */
  230. stat_ctx = NULL;
  231. }
  232. struct rspamd_stat_ctx *
  233. rspamd_stat_get_ctx (void)
  234. {
  235. return stat_ctx;
  236. }
  237. struct rspamd_stat_classifier *
  238. rspamd_stat_get_classifier (const gchar *name)
  239. {
  240. guint i;
  241. if (name == NULL || name[0] == '\0') {
  242. name = RSPAMD_DEFAULT_CLASSIFIER;
  243. }
  244. for (i = 0; i < stat_ctx->classifiers_count; i ++) {
  245. if (strcmp (name, stat_ctx->classifiers_subrs[i].name) == 0) {
  246. return &stat_ctx->classifiers_subrs[i];
  247. }
  248. }
  249. return NULL;
  250. }
  251. struct rspamd_stat_backend *
  252. rspamd_stat_get_backend (const gchar *name)
  253. {
  254. guint i;
  255. if (name == NULL || name[0] == '\0') {
  256. name = RSPAMD_DEFAULT_BACKEND;
  257. }
  258. for (i = 0; i < stat_ctx->backends_count; i ++) {
  259. if (strcmp (name, stat_ctx->backends_subrs[i].name) == 0) {
  260. return &stat_ctx->backends_subrs[i];
  261. }
  262. }
  263. return NULL;
  264. }
  265. struct rspamd_stat_tokenizer *
  266. rspamd_stat_get_tokenizer (const gchar *name)
  267. {
  268. guint i;
  269. if (name == NULL || name[0] == '\0') {
  270. name = RSPAMD_DEFAULT_TOKENIZER;
  271. }
  272. for (i = 0; i < stat_ctx->tokenizers_count; i ++) {
  273. if (strcmp (name, stat_ctx->tokenizers_subrs[i].name) == 0) {
  274. return &stat_ctx->tokenizers_subrs[i];
  275. }
  276. }
  277. return NULL;
  278. }
  279. struct rspamd_stat_cache *
  280. rspamd_stat_get_cache (const gchar *name)
  281. {
  282. guint i;
  283. if (name == NULL || name[0] == '\0') {
  284. name = RSPAMD_DEFAULT_CACHE;
  285. }
  286. for (i = 0; i < stat_ctx->caches_count; i++) {
  287. if (strcmp (name, stat_ctx->caches_subrs[i].name) == 0) {
  288. return &stat_ctx->caches_subrs[i];
  289. }
  290. }
  291. return NULL;
  292. }
  293. static void
  294. rspamd_async_elt_dtor (struct rspamd_stat_async_elt *elt)
  295. {
  296. if (elt->cleanup) {
  297. elt->cleanup (elt, elt->ud);
  298. }
  299. event_del (&elt->timer_ev);
  300. g_slice_free1 (sizeof (*elt), elt);
  301. }
  302. static void
  303. rspamd_async_elt_on_timer (gint fd, short what, gpointer d)
  304. {
  305. struct rspamd_stat_async_elt *elt = d;
  306. gdouble jittered_time;
  307. event_del (&elt->timer_ev);
  308. if (elt->enabled) {
  309. elt->handler (elt, elt->ud);
  310. }
  311. jittered_time = rspamd_time_jitter (elt->timeout, 0);
  312. double_to_tv (jittered_time, &elt->tv);
  313. event_add (&elt->timer_ev, &elt->tv);
  314. }
  315. struct rspamd_stat_async_elt*
  316. rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler,
  317. rspamd_stat_async_cleanup cleanup,
  318. gpointer d,
  319. gdouble timeout)
  320. {
  321. struct rspamd_stat_async_elt *elt;
  322. struct rspamd_stat_ctx *st_ctx;
  323. st_ctx = rspamd_stat_get_ctx ();
  324. g_assert (st_ctx != NULL);
  325. elt = g_slice_alloc (sizeof (*elt));
  326. REF_INIT_RETAIN (elt, rspamd_async_elt_dtor);
  327. elt->handler = handler;
  328. elt->cleanup = cleanup;
  329. elt->ud = d;
  330. elt->timeout = timeout;
  331. /* Enabled by default */
  332. elt->enabled = TRUE;
  333. event_set (&elt->timer_ev, -1, EV_TIMEOUT, rspamd_async_elt_on_timer, elt);
  334. event_base_set (st_ctx->ev_base, &elt->timer_ev);
  335. /*
  336. * First we set timeval to zero as we want cb to be executed as
  337. * fast as possible
  338. */
  339. elt->tv.tv_sec = 0;
  340. elt->tv.tv_usec = 0;
  341. event_add (&elt->timer_ev, &elt->tv);
  342. g_queue_push_tail (st_ctx->async_elts, elt);
  343. return elt;
  344. }