You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

filter.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "mem_pool.h"
  18. #include "filter.h"
  19. #include "rspamd.h"
  20. #include "message.h"
  21. #include "lua/lua_common.h"
  22. #include <math.h>
  23. #define COMMON_PART_FACTOR 95
  24. struct metric_result *
  25. rspamd_create_metric_result (struct rspamd_task *task, const gchar *name)
  26. {
  27. struct metric_result *metric_res;
  28. struct metric *metric;
  29. metric_res = g_hash_table_lookup (task->results, name);
  30. if (metric_res != NULL) {
  31. return metric_res;
  32. }
  33. metric = g_hash_table_lookup (task->cfg->metrics, name);
  34. if (metric == NULL) {
  35. return NULL;
  36. }
  37. metric_res =
  38. rspamd_mempool_alloc (task->task_pool,
  39. sizeof (struct metric_result));
  40. metric_res->symbols = g_hash_table_new (rspamd_str_hash,
  41. rspamd_str_equal);
  42. rspamd_mempool_add_destructor (task->task_pool,
  43. (rspamd_mempool_destruct_t) g_hash_table_unref,
  44. metric_res->symbols);
  45. metric_res->sym_groups = g_hash_table_new (g_direct_hash, g_direct_equal);
  46. rspamd_mempool_add_destructor (task->task_pool,
  47. (rspamd_mempool_destruct_t) g_hash_table_unref,
  48. metric_res->sym_groups);
  49. metric_res->checked = FALSE;
  50. metric_res->metric = metric;
  51. metric_res->grow_factor = 0;
  52. metric_res->score = 0;
  53. g_hash_table_insert (task->results, (gpointer) metric->name,
  54. metric_res);
  55. metric_res->action = METRIC_ACTION_MAX;
  56. return metric_res;
  57. }
  58. static void
  59. insert_metric_result (struct rspamd_task *task,
  60. struct metric *metric,
  61. const gchar *symbol,
  62. double flag,
  63. GList * opts,
  64. gboolean single)
  65. {
  66. struct metric_result *metric_res;
  67. struct symbol *s;
  68. gdouble w, *gr_score = NULL;
  69. struct rspamd_symbol_def *sdef;
  70. struct rspamd_symbols_group *gr = NULL;
  71. const ucl_object_t *mobj, *sobj;
  72. metric_res = rspamd_create_metric_result (task, metric->name);
  73. sdef = g_hash_table_lookup (metric->symbols, symbol);
  74. if (sdef == NULL) {
  75. w = 0.0;
  76. }
  77. else {
  78. w = (*sdef->weight_ptr) * flag;
  79. gr = sdef->gr;
  80. if (gr != NULL) {
  81. gr_score = g_hash_table_lookup (metric_res->sym_groups, gr);
  82. if (gr_score == NULL) {
  83. gr_score = rspamd_mempool_alloc (task->task_pool, sizeof (gdouble));
  84. *gr_score = 0;
  85. g_hash_table_insert (metric_res->sym_groups, gr, gr_score);
  86. }
  87. }
  88. }
  89. if (task->settings) {
  90. mobj = ucl_object_find_key (task->settings, metric->name);
  91. if (mobj) {
  92. gdouble corr;
  93. sobj = ucl_object_find_key (mobj, symbol);
  94. if (sobj != NULL && ucl_object_todouble_safe (sobj, &corr)) {
  95. msg_debug ("settings: changed weight of symbol %s from %.2f to %.2f",
  96. symbol, w, corr);
  97. w = corr * flag;
  98. }
  99. }
  100. }
  101. /* XXX: does not take grow factor into account */
  102. if (gr != NULL && gr_score != NULL && gr->max_score > 0.0) {
  103. if (*gr_score >= gr->max_score) {
  104. msg_info_task ("maximum group score %.2f for group %s has been reached,"
  105. " ignoring symbol %s with weight %.2f", gr->max_score,
  106. gr->name, symbol, w);
  107. return;
  108. }
  109. else if (*gr_score + w > gr->max_score) {
  110. w = gr->max_score - *gr_score;
  111. }
  112. *gr_score += w;
  113. }
  114. /* Add metric score */
  115. if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) {
  116. if (sdef && (sdef->flags & RSPAMD_SYMBOL_FLAG_ONESHOT)) {
  117. /*
  118. * For one shot symbols we do not need to add them again, so
  119. * we just force single behaviour here
  120. */
  121. single = TRUE;
  122. }
  123. if (s->options && opts && opts != s->options) {
  124. /* Append new options */
  125. s->options = g_list_concat (s->options, g_list_copy (opts));
  126. /*
  127. * Note that there is no need to add new destructor of GList as elements of appended
  128. * GList are used directly, so just free initial GList
  129. */
  130. }
  131. else if (opts) {
  132. s->options = g_list_copy (opts);
  133. rspamd_mempool_add_destructor (task->task_pool,
  134. (rspamd_mempool_destruct_t) g_list_free, s->options);
  135. }
  136. if (!single) {
  137. /* Handle grow factor */
  138. if (metric_res->grow_factor && w > 0) {
  139. w *= metric_res->grow_factor;
  140. metric_res->grow_factor *= metric->grow_factor;
  141. }
  142. s->score += w;
  143. metric_res->score += w;
  144. }
  145. else {
  146. if (fabs (s->score) < fabs (w)) {
  147. /* Replace less weight with a bigger one */
  148. metric_res->score = metric_res->score - s->score + w;
  149. s->score = w;
  150. }
  151. }
  152. }
  153. else {
  154. s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol));
  155. /* Handle grow factor */
  156. if (metric_res->grow_factor && w > 0) {
  157. w *= metric_res->grow_factor;
  158. metric_res->grow_factor *= metric->grow_factor;
  159. }
  160. else if (w > 0) {
  161. metric_res->grow_factor = metric->grow_factor;
  162. }
  163. s->score = w;
  164. s->name = symbol;
  165. s->def = sdef;
  166. metric_res->score += w;
  167. if (opts) {
  168. s->options = g_list_copy (opts);
  169. rspamd_mempool_add_destructor (task->task_pool,
  170. (rspamd_mempool_destruct_t) g_list_free, s->options);
  171. }
  172. else {
  173. s->options = NULL;
  174. }
  175. g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s);
  176. }
  177. msg_debug ("symbol %s, score %.2f, metric %s, factor: %f",
  178. symbol,
  179. s->score,
  180. metric->name,
  181. w);
  182. }
  183. #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
  184. static GStaticMutex result_mtx = G_STATIC_MUTEX_INIT;
  185. #else
  186. G_LOCK_DEFINE (result_mtx);
  187. #endif
  188. static void
  189. insert_result_common (struct rspamd_task *task,
  190. const gchar *symbol,
  191. double flag,
  192. GList * opts,
  193. gboolean single)
  194. {
  195. struct metric *metric;
  196. GList *cur, *metric_list;
  197. /* Avoid concurrenting inserting of results */
  198. #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
  199. g_static_mutex_lock (&result_mtx);
  200. #else
  201. G_LOCK (result_mtx);
  202. #endif
  203. metric_list = g_hash_table_lookup (task->cfg->metrics_symbols, symbol);
  204. if (metric_list) {
  205. cur = metric_list;
  206. while (cur) {
  207. metric = cur->data;
  208. insert_metric_result (task, metric, symbol, flag, opts, single);
  209. cur = g_list_next (cur);
  210. }
  211. }
  212. else {
  213. /* Insert symbol to default metric */
  214. insert_metric_result (task,
  215. task->cfg->default_metric,
  216. symbol,
  217. flag,
  218. opts,
  219. single);
  220. }
  221. /* Process cache item */
  222. if (task->cfg->cache) {
  223. rspamd_symbols_cache_inc_frequency (task->cfg->cache, symbol);
  224. }
  225. if (opts != NULL) {
  226. /* XXX: it is not wise to destroy them here */
  227. g_list_free (opts);
  228. }
  229. #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
  230. g_static_mutex_unlock (&result_mtx);
  231. #else
  232. G_UNLOCK (result_mtx);
  233. #endif
  234. }
  235. /* Insert result that may be increased on next insertions */
  236. void
  237. rspamd_task_insert_result (struct rspamd_task *task,
  238. const gchar *symbol,
  239. double flag,
  240. GList * opts)
  241. {
  242. insert_result_common (task, symbol, flag, opts, task->cfg->one_shot_mode);
  243. }
  244. /* Insert result as a single option */
  245. void
  246. rspamd_task_insert_result_single (struct rspamd_task *task,
  247. const gchar *symbol,
  248. double flag,
  249. GList * opts)
  250. {
  251. insert_result_common (task, symbol, flag, opts, TRUE);
  252. }
  253. gboolean
  254. rspamd_action_from_str (const gchar *data, gint *result)
  255. {
  256. if (g_ascii_strncasecmp (data, "reject", sizeof ("reject") - 1) == 0) {
  257. *result = METRIC_ACTION_REJECT;
  258. }
  259. else if (g_ascii_strncasecmp (data, "greylist",
  260. sizeof ("greylist") - 1) == 0) {
  261. *result = METRIC_ACTION_GREYLIST;
  262. }
  263. else if (g_ascii_strncasecmp (data, "add_header", sizeof ("add_header") -
  264. 1) == 0) {
  265. *result = METRIC_ACTION_ADD_HEADER;
  266. }
  267. else if (g_ascii_strncasecmp (data, "rewrite_subject",
  268. sizeof ("rewrite_subject") - 1) == 0) {
  269. *result = METRIC_ACTION_REWRITE_SUBJECT;
  270. }
  271. else if (g_ascii_strncasecmp (data, "add header", sizeof ("add header") -
  272. 1) == 0) {
  273. *result = METRIC_ACTION_ADD_HEADER;
  274. }
  275. else if (g_ascii_strncasecmp (data, "rewrite subject",
  276. sizeof ("rewrite subject") - 1) == 0) {
  277. *result = METRIC_ACTION_REWRITE_SUBJECT;
  278. }
  279. else if (g_ascii_strncasecmp (data, "soft_reject",
  280. sizeof ("soft_reject") - 1) == 0) {
  281. *result = METRIC_ACTION_SOFT_REJECT;
  282. }
  283. else if (g_ascii_strncasecmp (data, "soft reject",
  284. sizeof ("soft reject") - 1) == 0) {
  285. *result = METRIC_ACTION_SOFT_REJECT;
  286. }
  287. else if (g_ascii_strncasecmp (data, "no_action",
  288. sizeof ("soft_reject") - 1) == 0) {
  289. *result = METRIC_ACTION_NOACTION;
  290. }
  291. else if (g_ascii_strncasecmp (data, "no action",
  292. sizeof ("soft reject") - 1) == 0) {
  293. *result = METRIC_ACTION_NOACTION;
  294. }
  295. else {
  296. return FALSE;
  297. }
  298. return TRUE;
  299. }
  300. const gchar *
  301. rspamd_action_to_str (enum rspamd_metric_action action)
  302. {
  303. switch (action) {
  304. case METRIC_ACTION_REJECT:
  305. return "reject";
  306. case METRIC_ACTION_SOFT_REJECT:
  307. return "soft reject";
  308. case METRIC_ACTION_REWRITE_SUBJECT:
  309. return "rewrite subject";
  310. case METRIC_ACTION_ADD_HEADER:
  311. return "add header";
  312. case METRIC_ACTION_GREYLIST:
  313. return "greylist";
  314. case METRIC_ACTION_NOACTION:
  315. return "no action";
  316. case METRIC_ACTION_MAX:
  317. return "invalid max action";
  318. }
  319. return "unknown action";
  320. }
  321. static double
  322. get_specific_action_score (struct rspamd_task *task,
  323. const ucl_object_t *metric,
  324. struct metric_action *action)
  325. {
  326. const ucl_object_t *act, *sact;
  327. const gchar *act_name;
  328. double score;
  329. if (metric) {
  330. act = ucl_object_find_key (metric, "actions");
  331. if (act) {
  332. act_name = rspamd_action_to_str (action->action);
  333. sact = ucl_object_find_key (act, act_name);
  334. if (sact != NULL && ucl_object_todouble_safe (sact, &score)) {
  335. msg_debug_task ("found override score %.2f for action %s in settings",
  336. score, act_name);
  337. return score;
  338. }
  339. }
  340. }
  341. return action->score;
  342. }
  343. gint
  344. rspamd_check_action_metric (struct rspamd_task *task,
  345. double score, double *rscore, struct metric *metric)
  346. {
  347. struct metric_action *action, *selected_action = NULL;
  348. double max_score = 0;
  349. const ucl_object_t *ms = NULL;
  350. int i;
  351. if (task->settings) {
  352. ms = ucl_object_find_key (task->settings, metric->name);
  353. }
  354. for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i++) {
  355. double sc;
  356. action = &metric->actions[i];
  357. sc = get_specific_action_score (task, ms, action);
  358. if (sc < 0) {
  359. continue;
  360. }
  361. if (score >= sc && sc > max_score) {
  362. selected_action = action;
  363. max_score = sc;
  364. }
  365. if (rscore != NULL && i == METRIC_ACTION_REJECT) {
  366. *rscore = sc;
  367. }
  368. }
  369. if (selected_action) {
  370. return selected_action->action;
  371. }
  372. return METRIC_ACTION_NOACTION;
  373. }