You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scan_result.c 25KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "mem_pool.h"
  18. #include "scan_result.h"
  19. #include "rspamd.h"
  20. #include "message.h"
  21. #include "lua/lua_common.h"
  22. #include "libserver/cfg_file_private.h"
  23. #include "libmime/scan_result_private.h"
  24. #include "contrib/fastutf8/fastutf8.h"
  25. #include <math.h>
  26. #include "contrib/uthash/utlist.h"
  27. #define msg_debug_metric(...) rspamd_conditional_debug_fast (NULL, NULL, \
  28. rspamd_metric_log_id, "metric", task->task_pool->tag.uid, \
  29. G_STRFUNC, \
  30. __VA_ARGS__)
  31. INIT_LOG_MODULE(metric)
  32. /* Average symbols count to optimize hash allocation */
  33. static struct rspamd_counter_data symbols_count;
  34. static void
  35. rspamd_scan_result_dtor (gpointer d)
  36. {
  37. struct rspamd_scan_result *r = (struct rspamd_scan_result *)d;
  38. struct rspamd_symbol_result *sres;
  39. rspamd_set_counter_ema (&symbols_count, kh_size (r->symbols), 0.5);
  40. if (r->symbol_cbref != -1) {
  41. luaL_unref (r->task->cfg->lua_state, LUA_REGISTRYINDEX, r->symbol_cbref);
  42. }
  43. kh_foreach_value (r->symbols, sres, {
  44. if (sres->options) {
  45. kh_destroy (rspamd_options_hash, sres->options);
  46. }
  47. });
  48. kh_destroy (rspamd_symbols_hash, r->symbols);
  49. kh_destroy (rspamd_symbols_group_hash, r->sym_groups);
  50. }
  51. struct rspamd_scan_result *
  52. rspamd_create_metric_result (struct rspamd_task *task,
  53. const gchar *name, gint lua_sym_cbref)
  54. {
  55. struct rspamd_scan_result *metric_res;
  56. guint i;
  57. metric_res = rspamd_mempool_alloc0 (task->task_pool,
  58. sizeof (struct rspamd_scan_result));
  59. metric_res->symbols = kh_init (rspamd_symbols_hash);
  60. metric_res->sym_groups = kh_init (rspamd_symbols_group_hash);
  61. if (name) {
  62. metric_res->name = rspamd_mempool_strdup (task->task_pool, name);
  63. }
  64. else {
  65. metric_res->name = NULL;
  66. }
  67. metric_res->symbol_cbref = lua_sym_cbref;
  68. metric_res->task = task;
  69. /* Optimize allocation */
  70. kh_resize (rspamd_symbols_group_hash, metric_res->sym_groups, 4);
  71. if (symbols_count.mean > 4) {
  72. kh_resize (rspamd_symbols_hash, metric_res->symbols, symbols_count.mean);
  73. }
  74. else {
  75. kh_resize (rspamd_symbols_hash, metric_res->symbols, 4);
  76. }
  77. if (task->cfg) {
  78. struct rspamd_action *act, *tmp;
  79. metric_res->actions_config = rspamd_mempool_alloc0 (task->task_pool,
  80. sizeof (struct rspamd_action_config) * HASH_COUNT (task->cfg->actions));
  81. i = 0;
  82. HASH_ITER (hh, task->cfg->actions, act, tmp) {
  83. metric_res->actions_config[i].flags = RSPAMD_ACTION_RESULT_DEFAULT;
  84. if (!(act->flags & RSPAMD_ACTION_NO_THRESHOLD)) {
  85. metric_res->actions_config[i].cur_limit = act->threshold;
  86. }
  87. else {
  88. metric_res->actions_config[i].flags |= RSPAMD_ACTION_RESULT_NO_THRESHOLD;
  89. }
  90. metric_res->actions_config[i].action = act;
  91. i ++;
  92. }
  93. metric_res->nactions = i;
  94. }
  95. rspamd_mempool_add_destructor (task->task_pool,
  96. rspamd_scan_result_dtor,
  97. metric_res);
  98. DL_APPEND (task->result, metric_res);
  99. return metric_res;
  100. }
  101. static inline int
  102. rspamd_pr_sort (const struct rspamd_passthrough_result *pra,
  103. const struct rspamd_passthrough_result *prb)
  104. {
  105. return prb->priority - pra->priority;
  106. }
  107. bool
  108. rspamd_add_passthrough_result (struct rspamd_task *task,
  109. struct rspamd_action *action,
  110. guint priority,
  111. double target_score,
  112. const gchar *message,
  113. const gchar *module,
  114. uint flags,
  115. struct rspamd_scan_result *scan_result)
  116. {
  117. struct rspamd_passthrough_result *pr;
  118. if (scan_result == NULL) {
  119. scan_result = task->result;
  120. }
  121. /* Find the specific action config */
  122. struct rspamd_action_config *action_config = NULL;
  123. for (unsigned int i = 0; i < scan_result->nactions; i ++) {
  124. struct rspamd_action_config *cur = &scan_result->actions_config[i];
  125. /* We assume that all action pointers are static */
  126. if (cur->action == action) {
  127. action_config = cur;
  128. break;
  129. }
  130. }
  131. if (action_config && (action_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
  132. msg_info_task ("<%s>: NOT set pre-result to '%s' %s(%.2f): '%s' from %s(%d); action is disabled",
  133. MESSAGE_FIELD_CHECK (task, message_id), action->name,
  134. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  135. target_score,
  136. message, module, priority);
  137. return false;
  138. }
  139. pr = rspamd_mempool_alloc (task->task_pool, sizeof (*pr));
  140. pr->action = action;
  141. pr->priority = priority;
  142. pr->message = message;
  143. pr->module = module;
  144. pr->target_score = target_score;
  145. pr->flags = flags;
  146. DL_APPEND (scan_result->passthrough_result, pr);
  147. DL_SORT (scan_result->passthrough_result, rspamd_pr_sort);
  148. if (!isnan (target_score)) {
  149. msg_info_task ("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)",
  150. MESSAGE_FIELD_CHECK (task, message_id), action->name,
  151. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  152. target_score,
  153. message, module, priority);
  154. }
  155. else {
  156. msg_info_task ("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)",
  157. MESSAGE_FIELD_CHECK (task, message_id), action->name,
  158. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  159. message, module, priority);
  160. }
  161. scan_result->nresults ++;
  162. return true;
  163. }
  164. static inline gdouble
  165. rspamd_check_group_score (struct rspamd_task *task,
  166. const gchar *symbol,
  167. struct rspamd_symbols_group *gr,
  168. gdouble *group_score,
  169. gdouble w)
  170. {
  171. if (gr != NULL && group_score && gr->max_score > 0.0 && w > 0.0) {
  172. if (*group_score >= gr->max_score && w > 0) {
  173. msg_info_task ("maximum group score %.2f for group %s has been reached,"
  174. " ignoring symbol %s with weight %.2f", gr->max_score,
  175. gr->name, symbol, w);
  176. return NAN;
  177. }
  178. else if (*group_score + w > gr->max_score) {
  179. w = gr->max_score - *group_score;
  180. }
  181. }
  182. return w;
  183. }
  184. #ifndef DBL_EPSILON
  185. #define DBL_EPSILON 2.2204460492503131e-16
  186. #endif
  187. static struct rspamd_symbol_result *
  188. insert_metric_result (struct rspamd_task *task,
  189. const gchar *symbol,
  190. double weight,
  191. const gchar *opt,
  192. struct rspamd_scan_result *metric_res,
  193. enum rspamd_symbol_insert_flags flags,
  194. bool *new_sym)
  195. {
  196. struct rspamd_symbol_result *s = NULL;
  197. gdouble final_score, *gr_score = NULL, next_gf = 1.0, diff;
  198. struct rspamd_symbol *sdef;
  199. struct rspamd_symbols_group *gr = NULL;
  200. const ucl_object_t *mobj, *sobj;
  201. gint max_shots, ret;
  202. guint i;
  203. khiter_t k;
  204. gboolean single = !!(flags & RSPAMD_SYMBOL_INSERT_SINGLE);
  205. gchar *sym_cpy;
  206. if (!isfinite (weight)) {
  207. msg_warn_task ("detected %s score for symbol %s, replace it with zero",
  208. isnan (weight) ? "NaN" : "infinity", symbol);
  209. weight = 0.0;
  210. }
  211. msg_debug_metric ("want to insert symbol %s, initial weight %.2f",
  212. symbol, weight);
  213. sdef = g_hash_table_lookup (task->cfg->symbols, symbol);
  214. if (sdef == NULL) {
  215. if (flags & RSPAMD_SYMBOL_INSERT_ENFORCE) {
  216. final_score = 1.0 * weight; /* Enforce static weight to 1.0 */
  217. }
  218. else {
  219. final_score = 0.0;
  220. }
  221. msg_debug_metric ("no symbol definition for %s; final multiplier %.2f",
  222. symbol, final_score);
  223. }
  224. else {
  225. if (sdef->cache_item) {
  226. /* Check if we can insert this symbol at all */
  227. if (!rspamd_symcache_is_item_allowed (task, sdef->cache_item, FALSE)) {
  228. msg_debug_metric ("symbol %s is not allowed to be inserted due to settings",
  229. symbol);
  230. return NULL;
  231. }
  232. }
  233. final_score = (*sdef->weight_ptr) * weight;
  234. PTR_ARRAY_FOREACH (sdef->groups, i, gr) {
  235. k = kh_get (rspamd_symbols_group_hash, metric_res->sym_groups, gr);
  236. if (k == kh_end (metric_res->sym_groups)) {
  237. k = kh_put (rspamd_symbols_group_hash, metric_res->sym_groups,
  238. gr, &ret);
  239. kh_value (metric_res->sym_groups, k) = 0;
  240. }
  241. }
  242. msg_debug_metric ("metric multiplier for %s is %.2f",
  243. symbol, *sdef->weight_ptr);
  244. }
  245. if (task->settings) {
  246. gdouble corr;
  247. mobj = ucl_object_lookup (task->settings, "scores");
  248. if (!mobj) {
  249. /* Legacy */
  250. mobj = task->settings;
  251. }
  252. else {
  253. msg_debug_metric ("found scores in the settings");
  254. }
  255. sobj = ucl_object_lookup (mobj, symbol);
  256. if (sobj != NULL && ucl_object_todouble_safe (sobj, &corr)) {
  257. msg_debug_metric ("settings: changed weight of symbol %s from %.2f "
  258. "to %.2f * %.2f",
  259. symbol, final_score, corr, weight);
  260. final_score = corr * weight;
  261. }
  262. }
  263. k = kh_get (rspamd_symbols_hash, metric_res->symbols, symbol);
  264. if (k != kh_end (metric_res->symbols)) {
  265. /* Existing metric score */
  266. s = kh_value (metric_res->symbols, k);
  267. if (single) {
  268. max_shots = 1;
  269. }
  270. else {
  271. if (sdef) {
  272. max_shots = sdef->nshots;
  273. }
  274. else {
  275. max_shots = task->cfg->default_max_shots;
  276. }
  277. }
  278. msg_debug_metric ("nshots: %d for symbol %s", max_shots, symbol);
  279. if (!single && (max_shots > 0 && (s->nshots >= max_shots))) {
  280. single = TRUE;
  281. }
  282. s->nshots ++;
  283. if (opt) {
  284. rspamd_task_add_result_option (task, s, opt, strlen (opt));
  285. }
  286. /* Adjust diff */
  287. if (!single) {
  288. diff = final_score;
  289. msg_debug_metric ("symbol %s can be inserted multiple times: %.2f weight",
  290. symbol, diff);
  291. }
  292. else {
  293. if (fabs (s->score) < fabs (final_score) &&
  294. signbit (s->score) == signbit (final_score)) {
  295. /* Replace less significant weight with a more significant one */
  296. diff = final_score - s->score;
  297. msg_debug_metric ("symbol %s can be inserted single time;"
  298. " weight adjusted %.2f + %.2f",
  299. symbol, s->score, diff);
  300. }
  301. else {
  302. diff = 0;
  303. }
  304. }
  305. if (diff) {
  306. /* Handle grow factor */
  307. if (metric_res->grow_factor && diff > 0) {
  308. diff *= metric_res->grow_factor;
  309. next_gf *= task->cfg->grow_factor;
  310. }
  311. else if (diff > 0) {
  312. next_gf = task->cfg->grow_factor;
  313. }
  314. msg_debug_metric ("adjust grow factor to %.2f for symbol %s (%.2f final)",
  315. next_gf, symbol, diff);
  316. if (sdef) {
  317. PTR_ARRAY_FOREACH (sdef->groups, i, gr) {
  318. gdouble cur_diff;
  319. k = kh_get (rspamd_symbols_group_hash,
  320. metric_res->sym_groups, gr);
  321. g_assert (k != kh_end (metric_res->sym_groups));
  322. gr_score = &kh_value (metric_res->sym_groups, k);
  323. cur_diff = rspamd_check_group_score (task, symbol, gr,
  324. gr_score, diff);
  325. if (isnan (cur_diff)) {
  326. /* Limit reached, do not add result */
  327. msg_debug_metric (
  328. "group limit %.2f is reached for %s when inserting symbol %s;"
  329. " drop score %.2f",
  330. *gr_score, gr->name, symbol, diff);
  331. diff = NAN;
  332. break;
  333. }
  334. else if (gr_score) {
  335. *gr_score += cur_diff;
  336. if (cur_diff < diff) {
  337. /* Reduce */
  338. msg_debug_metric (
  339. "group limit %.2f is reached for %s when inserting symbol %s;"
  340. " reduce score %.2f - %.2f",
  341. *gr_score, gr->name, symbol, diff, cur_diff);
  342. diff = cur_diff;
  343. }
  344. }
  345. }
  346. }
  347. if (!isnan (diff)) {
  348. metric_res->score += diff;
  349. metric_res->grow_factor = next_gf;
  350. if (single) {
  351. msg_debug_metric ("final score for single symbol %s = %.2f; %.2f diff",
  352. symbol, final_score, diff);
  353. s->score = final_score;
  354. } else {
  355. msg_debug_metric ("increase final score for multiple symbol %s += %.2f = %.2f",
  356. symbol, s->score, diff);
  357. s->score += diff;
  358. }
  359. }
  360. }
  361. }
  362. else {
  363. /* New result */
  364. if (new_sym) {
  365. *new_sym = true;
  366. }
  367. sym_cpy = rspamd_mempool_strdup (task->task_pool, symbol);
  368. k = kh_put (rspamd_symbols_hash, metric_res->symbols,
  369. sym_cpy, &ret);
  370. g_assert (ret > 0);
  371. s = rspamd_mempool_alloc0 (task->task_pool, sizeof (*s));
  372. kh_value (metric_res->symbols, k) = s;
  373. /* Handle grow factor */
  374. if (metric_res->grow_factor && final_score > 0) {
  375. final_score *= metric_res->grow_factor;
  376. next_gf *= task->cfg->grow_factor;
  377. }
  378. else if (final_score > 0) {
  379. next_gf = task->cfg->grow_factor;
  380. }
  381. msg_debug_metric ("adjust grow factor to %.2f for symbol %s (%.2f final)",
  382. next_gf, symbol, final_score);
  383. s->name = sym_cpy;
  384. s->sym = sdef;
  385. s->nshots = 1;
  386. if (sdef) {
  387. /* Check group limits */
  388. PTR_ARRAY_FOREACH (sdef->groups, i, gr) {
  389. gdouble cur_score;
  390. k = kh_get (rspamd_symbols_group_hash, metric_res->sym_groups, gr);
  391. g_assert (k != kh_end (metric_res->sym_groups));
  392. gr_score = &kh_value (metric_res->sym_groups, k);
  393. cur_score = rspamd_check_group_score (task, symbol, gr,
  394. gr_score, final_score);
  395. if (isnan (cur_score)) {
  396. /* Limit reached, do not add result */
  397. msg_debug_metric (
  398. "group limit %.2f is reached for %s when inserting symbol %s;"
  399. " drop score %.2f",
  400. *gr_score, gr->name, symbol, final_score);
  401. final_score = NAN;
  402. break;
  403. } else if (gr_score) {
  404. *gr_score += cur_score;
  405. if (cur_score < final_score) {
  406. /* Reduce */
  407. msg_debug_metric (
  408. "group limit %.2f is reached for %s when inserting symbol %s;"
  409. " reduce score %.2f - %.2f",
  410. *gr_score, gr->name, symbol, final_score, cur_score);
  411. final_score = cur_score;
  412. }
  413. }
  414. }
  415. }
  416. if (!isnan (final_score)) {
  417. const double epsilon = DBL_EPSILON;
  418. metric_res->score += final_score;
  419. metric_res->grow_factor = next_gf;
  420. s->score = final_score;
  421. if (final_score > epsilon) {
  422. metric_res->npositive ++;
  423. metric_res->positive_score += final_score;
  424. }
  425. else if (final_score < -epsilon) {
  426. metric_res->nnegative ++;
  427. metric_res->negative_score += fabs (final_score);
  428. }
  429. }
  430. else {
  431. s->score = 0;
  432. }
  433. if (opt) {
  434. rspamd_task_add_result_option (task, s, opt, strlen (opt));
  435. }
  436. }
  437. msg_debug_metric ("final insertion for symbol %s, score %.2f, factor: %f",
  438. symbol,
  439. s->score,
  440. final_score);
  441. metric_res->nresults ++;
  442. return s;
  443. }
  444. struct rspamd_symbol_result *
  445. rspamd_task_insert_result_full (struct rspamd_task *task,
  446. const gchar *symbol,
  447. double weight,
  448. const gchar *opt,
  449. enum rspamd_symbol_insert_flags flags,
  450. struct rspamd_scan_result *result)
  451. {
  452. struct rspamd_symbol_result *s = NULL, *ret = NULL;
  453. struct rspamd_scan_result *mres;
  454. if (task->processed_stages & (RSPAMD_TASK_STAGE_IDEMPOTENT >> 1)) {
  455. msg_err_task ("cannot insert symbol %s on idempotent phase",
  456. symbol);
  457. return NULL;
  458. }
  459. if (result == NULL) {
  460. /* Insert everywhere */
  461. DL_FOREACH (task->result, mres) {
  462. if (mres->symbol_cbref != -1) {
  463. /* Check if we can insert this symbol to this symbol result */
  464. GError *err = NULL;
  465. lua_State *L = (lua_State *) task->cfg->lua_state;
  466. if (!rspamd_lua_universal_pcall (L, mres->symbol_cbref,
  467. G_STRLOC, 1, "uss", &err,
  468. "rspamd{task}", task, symbol, mres->name ? mres->name : "default")) {
  469. msg_warn_task ("cannot call for symbol_cbref for result %s: %e",
  470. mres->name ? mres->name : "default", err);
  471. g_error_free (err);
  472. continue;
  473. }
  474. else {
  475. if (!lua_toboolean (L, -1)) {
  476. /* Skip symbol */
  477. msg_debug_metric ("skip symbol %s for result %s due to Lua return value",
  478. symbol, mres->name);
  479. lua_pop (L, 1); /* Remove result */
  480. continue;
  481. }
  482. lua_pop (L, 1); /* Remove result */
  483. }
  484. }
  485. bool new_symbol = false;
  486. s = insert_metric_result (task,
  487. symbol,
  488. weight,
  489. opt,
  490. mres,
  491. flags,
  492. &new_symbol);
  493. if (mres->name == NULL) {
  494. /* Default result */
  495. ret = s;
  496. /* Process cache item */
  497. if (s && task->cfg->cache && s->sym) {
  498. rspamd_symcache_inc_frequency (task->cfg->cache,
  499. s->sym->cache_item);
  500. }
  501. }
  502. else if (new_symbol) {
  503. /* O(N) but we normally don't have any shadow results */
  504. LL_APPEND (ret, s);
  505. }
  506. }
  507. }
  508. else {
  509. /* Specific insertion */
  510. s = insert_metric_result (task,
  511. symbol,
  512. weight,
  513. opt,
  514. result,
  515. flags,
  516. NULL);
  517. ret = s;
  518. if (result->name == NULL) {
  519. /* Process cache item */
  520. if (s && task->cfg->cache && s->sym) {
  521. rspamd_symcache_inc_frequency (task->cfg->cache,
  522. s->sym->cache_item);
  523. }
  524. }
  525. }
  526. return ret;
  527. }
  528. static gchar *
  529. rspamd_task_option_safe_copy (struct rspamd_task *task,
  530. const gchar *val,
  531. gsize vlen,
  532. gsize *outlen)
  533. {
  534. const gchar *p, *end;
  535. p = val;
  536. end = val + vlen;
  537. vlen = 0; /* Reuse */
  538. while (p < end) {
  539. if (*p & 0x80) {
  540. UChar32 uc;
  541. gint off = 0;
  542. U8_NEXT (p, off, end - p, uc);
  543. if (uc > 0) {
  544. if (u_isprint (uc)) {
  545. vlen += off;
  546. }
  547. else {
  548. /* We will replace it with 0xFFFD */
  549. vlen += MAX (off, 3);
  550. }
  551. }
  552. else {
  553. vlen += MAX (off, 3);
  554. }
  555. p += off;
  556. }
  557. else if (!g_ascii_isprint (*p)) {
  558. /* Another 0xFFFD */
  559. vlen += 3;
  560. p ++;
  561. }
  562. else {
  563. p ++;
  564. vlen ++;
  565. }
  566. }
  567. gchar *dest, *d;
  568. dest = rspamd_mempool_alloc (task->task_pool, vlen + 1);
  569. d = dest;
  570. p = val;
  571. while (p < end) {
  572. if (*p & 0x80) {
  573. UChar32 uc;
  574. gint off = 0;
  575. U8_NEXT (p, off, end - p, uc);
  576. if (uc > 0) {
  577. if (u_isprint (uc)) {
  578. memcpy (d, p, off);
  579. d += off;
  580. }
  581. else {
  582. /* We will replace it with 0xFFFD */
  583. *d++ = '\357';
  584. *d++ = '\277';
  585. *d++ = '\275';
  586. }
  587. }
  588. else {
  589. *d++ = '\357';
  590. *d++ = '\277';
  591. *d++ = '\275';
  592. }
  593. p += off;
  594. }
  595. else if (!g_ascii_isprint (*p)) {
  596. /* Another 0xFFFD */
  597. *d++ = '\357';
  598. *d++ = '\277';
  599. *d++ = '\275';
  600. p ++;
  601. }
  602. else {
  603. *d++ = *p++;
  604. }
  605. }
  606. *d = '\0';
  607. *(outlen) = d - dest;
  608. return dest;
  609. }
  610. gboolean
  611. rspamd_task_add_result_option (struct rspamd_task *task,
  612. struct rspamd_symbol_result *s,
  613. const gchar *val,
  614. gsize vlen)
  615. {
  616. struct rspamd_symbol_option *opt, srch;
  617. gboolean ret = FALSE;
  618. gchar *opt_cpy = NULL;
  619. gsize cpy_len;
  620. khiter_t k;
  621. gint r;
  622. struct rspamd_symbol_result *cur;
  623. if (s && val) {
  624. /*
  625. * Here we assume that this function is all the time called with the
  626. * symbol from the default result, not some shadow result, or
  627. * the option insertion will be wrong
  628. */
  629. LL_FOREACH (s, cur) {
  630. if (cur->opts_len < 0) {
  631. /* Cannot add more options, give up */
  632. msg_debug_task ("cannot add more options to symbol %s when adding option %s",
  633. cur->name, val);
  634. ret = FALSE;
  635. continue;
  636. }
  637. if (!cur->options) {
  638. cur->options = kh_init (rspamd_options_hash);
  639. }
  640. if (vlen + cur->opts_len > task->cfg->max_opts_len) {
  641. /* Add truncated option */
  642. msg_info_task ("cannot add more options to symbol %s when adding option %s",
  643. cur->name, val);
  644. val = "...";
  645. vlen = 3;
  646. cur->opts_len = -1;
  647. }
  648. if (!(cur->sym && (cur->sym->flags & RSPAMD_SYMBOL_FLAG_ONEPARAM)) &&
  649. kh_size (cur->options) < task->cfg->default_max_shots) {
  650. srch.option = (gchar *) val;
  651. srch.optlen = vlen;
  652. k = kh_get (rspamd_options_hash, cur->options, &srch);
  653. if (k == kh_end (cur->options)) {
  654. opt_cpy = rspamd_task_option_safe_copy (task, val, vlen, &cpy_len);
  655. if (cpy_len != vlen) {
  656. srch.option = (gchar *) opt_cpy;
  657. srch.optlen = cpy_len;
  658. k = kh_get (rspamd_options_hash, cur->options, &srch);
  659. }
  660. /* Append new options */
  661. if (k == kh_end (cur->options)) {
  662. opt = rspamd_mempool_alloc0 (task->task_pool, sizeof(*opt));
  663. opt->optlen = cpy_len;
  664. opt->option = opt_cpy;
  665. kh_put (rspamd_options_hash, cur->options, opt, &r);
  666. DL_APPEND (cur->opts_head, opt);
  667. if (s == cur) {
  668. ret = TRUE;
  669. }
  670. }
  671. }
  672. }
  673. else {
  674. /* Skip addition */
  675. if (s == cur) {
  676. ret = FALSE;
  677. }
  678. }
  679. if (ret && cur->opts_len >= 0) {
  680. cur->opts_len += vlen;
  681. }
  682. }
  683. }
  684. else if (!val) {
  685. ret = TRUE;
  686. }
  687. task->result->nresults ++;
  688. return ret;
  689. }
  690. static struct rspamd_action_config *
  691. rspamd_find_action_config_for_action (struct rspamd_scan_result *scan_result,
  692. struct rspamd_action *act)
  693. {
  694. for (unsigned int i = 0; i < scan_result->nactions; i ++) {
  695. struct rspamd_action_config *cur = &scan_result->actions_config[i];
  696. if (act == cur->action) {
  697. return cur;
  698. }
  699. }
  700. return NULL;
  701. }
  702. struct rspamd_action *
  703. rspamd_check_action_metric (struct rspamd_task *task,
  704. struct rspamd_passthrough_result **ppr,
  705. struct rspamd_scan_result *scan_result)
  706. {
  707. struct rspamd_action_config *action_lim,
  708. *noaction = NULL;
  709. struct rspamd_action *selected_action = NULL, *least_action = NULL;
  710. struct rspamd_passthrough_result *pr, *sel_pr = NULL;
  711. double max_score = -(G_MAXDOUBLE), sc;
  712. gboolean seen_least = FALSE;
  713. if (scan_result == NULL) {
  714. scan_result = task->result;
  715. }
  716. if (scan_result->passthrough_result != NULL) {
  717. DL_FOREACH (scan_result->passthrough_result, pr) {
  718. struct rspamd_action_config *act_config =
  719. rspamd_find_action_config_for_action (scan_result, pr->action);
  720. /* Skip disabled actions */
  721. if (act_config && (act_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
  722. continue;
  723. }
  724. if (!seen_least || !(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
  725. sc = pr->target_score;
  726. selected_action = pr->action;
  727. if (!(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
  728. if (!isnan (sc)) {
  729. if (pr->action->action_type == METRIC_ACTION_NOACTION) {
  730. scan_result->score = MIN (sc, scan_result->score);
  731. }
  732. else {
  733. scan_result->score = sc;
  734. }
  735. }
  736. if (ppr) {
  737. *ppr = pr;
  738. }
  739. return selected_action;
  740. }
  741. else {
  742. seen_least = true;
  743. least_action = selected_action;
  744. if (isnan (sc)) {
  745. if (selected_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
  746. /*
  747. * In this case, we have a passthrough action that
  748. * is `least` action, however, there is no threshold
  749. * on it.
  750. *
  751. * Hence, we imply the following logic:
  752. *
  753. * - we leave score unchanged
  754. * - we apply passthrough no threshold action unless
  755. * score based action *is not* reject, otherwise
  756. * we apply reject action
  757. */
  758. }
  759. else {
  760. sc = selected_action->threshold;
  761. max_score = sc;
  762. sel_pr = pr;
  763. }
  764. }
  765. else {
  766. max_score = sc;
  767. sel_pr = pr;
  768. }
  769. }
  770. }
  771. }
  772. }
  773. /*
  774. * Select result by score
  775. */
  776. for (size_t i = scan_result->nactions - 1; i != (size_t)-1; i--) {
  777. action_lim = &scan_result->actions_config[i];
  778. sc = action_lim->cur_limit;
  779. if (action_lim->action->action_type == METRIC_ACTION_NOACTION) {
  780. noaction = action_lim;
  781. }
  782. if ((action_lim->flags & (RSPAMD_ACTION_RESULT_DISABLED|RSPAMD_ACTION_RESULT_NO_THRESHOLD))) {
  783. continue;
  784. }
  785. if (isnan (sc) ||
  786. (action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD|RSPAMD_ACTION_HAM))) {
  787. continue;
  788. }
  789. if (scan_result->score >= sc && sc > max_score) {
  790. selected_action = action_lim->action;
  791. max_score = sc;
  792. }
  793. }
  794. if (selected_action == NULL) {
  795. selected_action = noaction->action;
  796. }
  797. if (selected_action) {
  798. if (seen_least) {
  799. /* Adjust least action */
  800. if (least_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
  801. if (selected_action->action_type != METRIC_ACTION_REJECT &&
  802. selected_action->action_type != METRIC_ACTION_DISCARD) {
  803. /* Override score based action with least action */
  804. selected_action = least_action;
  805. if (ppr) {
  806. *ppr = sel_pr;
  807. }
  808. }
  809. }
  810. else {
  811. /* Adjust score if needed */
  812. if (max_score > scan_result->score) {
  813. if (ppr) {
  814. *ppr = sel_pr;
  815. }
  816. scan_result->score = max_score;
  817. }
  818. }
  819. }
  820. return selected_action;
  821. }
  822. if (ppr) {
  823. *ppr = sel_pr;
  824. }
  825. return noaction->action;
  826. }
  827. struct rspamd_symbol_result *
  828. rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym,
  829. struct rspamd_scan_result *result)
  830. {
  831. struct rspamd_symbol_result *res = NULL;
  832. khiter_t k;
  833. if (result == NULL) {
  834. /* Use default result */
  835. result = task->result;
  836. }
  837. k = kh_get(rspamd_symbols_hash, result->symbols, sym);
  838. if (k != kh_end (result->symbols)) {
  839. res = kh_value (result->symbols, k);
  840. }
  841. return res;
  842. }
  843. struct rspamd_symbol_result* rspamd_task_remove_symbol_result (
  844. struct rspamd_task *task,
  845. const gchar *symbol,
  846. struct rspamd_scan_result *result)
  847. {
  848. struct rspamd_symbol_result *res = NULL;
  849. khiter_t k;
  850. if (result == NULL) {
  851. /* Use default result */
  852. result = task->result;
  853. }
  854. k = kh_get (rspamd_symbols_hash, result->symbols, symbol);
  855. if (k != kh_end (result->symbols)) {
  856. res = kh_value (result->symbols, k);
  857. if (!isnan (res->score)) {
  858. /* Remove score from the result */
  859. result->score -= res->score;
  860. /* Also check the group limit */
  861. if (result->sym_groups && res->sym) {
  862. struct rspamd_symbol_group *gr;
  863. gint i;
  864. khiter_t k_groups;
  865. PTR_ARRAY_FOREACH (res->sym->groups, i, gr) {
  866. gdouble *gr_score;
  867. k_groups = kh_get (rspamd_symbols_group_hash,
  868. result->sym_groups, gr);
  869. if (k_groups != kh_end (result->sym_groups)) {
  870. gr_score = &kh_value (result->sym_groups, k_groups);
  871. if (gr_score) {
  872. *gr_score -= res->score;
  873. }
  874. }
  875. }
  876. }
  877. }
  878. kh_del (rspamd_symbols_hash, result->symbols, k);
  879. }
  880. else {
  881. return NULL;
  882. }
  883. return res;
  884. }
  885. void
  886. rspamd_task_symbol_result_foreach (struct rspamd_task *task,
  887. struct rspamd_scan_result *result, GHFunc func,
  888. gpointer ud)
  889. {
  890. const gchar *kk;
  891. struct rspamd_symbol_result *res;
  892. if (result == NULL) {
  893. /* Use default result */
  894. result = task->result;
  895. }
  896. if (func) {
  897. kh_foreach (result->symbols, kk, res, {
  898. func ((gpointer)kk, (gpointer)res, ud);
  899. });
  900. }
  901. }
  902. struct rspamd_scan_result *
  903. rspamd_find_metric_result (struct rspamd_task *task,
  904. const gchar *name)
  905. {
  906. struct rspamd_scan_result *res;
  907. if (name == NULL) {
  908. return task->result;
  909. }
  910. else if (strcmp (name, "default") == 0) {
  911. return task->result;
  912. }
  913. DL_FOREACH (task->result, res) {
  914. if (res->name && strcmp (res->name, name) == 0) {
  915. return res;
  916. }
  917. }
  918. return NULL;
  919. }