Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

scan_result.c 27KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "mem_pool.h"
  18. #include "scan_result.h"
  19. #include "rspamd.h"
  20. #include "message.h"
  21. #include "lua/lua_common.h"
  22. #include "libserver/cfg_file_private.h"
  23. #include "libmime/scan_result_private.h"
  24. #include "contrib/fastutf8/fastutf8.h"
  25. #include <math.h>
  26. #include "contrib/uthash/utlist.h"
  27. #define msg_debug_metric(...) rspamd_conditional_debug_fast(NULL, NULL, \
  28. rspamd_metric_log_id, "metric", task->task_pool->tag.uid, \
  29. RSPAMD_LOG_FUNC, \
  30. __VA_ARGS__)
  31. INIT_LOG_MODULE(metric)
  32. /* Average symbols count to optimize hash allocation */
  33. static struct rspamd_counter_data symbols_count;
  34. static void
  35. rspamd_scan_result_dtor(gpointer d)
  36. {
  37. struct rspamd_scan_result *r = (struct rspamd_scan_result *) d;
  38. struct rspamd_symbol_result *sres;
  39. rspamd_set_counter_ema(&symbols_count, kh_size(r->symbols), 0.5);
  40. if (r->symbol_cbref != -1) {
  41. luaL_unref(r->task->cfg->lua_state, LUA_REGISTRYINDEX, r->symbol_cbref);
  42. }
  43. kh_foreach_value(r->symbols, sres, {
  44. if (sres->options) {
  45. kh_destroy(rspamd_options_hash, sres->options);
  46. }
  47. });
  48. kh_destroy(rspamd_symbols_hash, r->symbols);
  49. kh_destroy(rspamd_symbols_group_hash, r->sym_groups);
  50. }
  51. static void
  52. rspamd_metric_actions_foreach_cb(int i, struct rspamd_action *act, void *cbd)
  53. {
  54. struct rspamd_scan_result *metric_res = (struct rspamd_scan_result *) cbd;
  55. metric_res->actions_config[i].flags = RSPAMD_ACTION_RESULT_DEFAULT;
  56. if (!(act->flags & RSPAMD_ACTION_NO_THRESHOLD)) {
  57. metric_res->actions_config[i].cur_limit = act->threshold;
  58. }
  59. else {
  60. metric_res->actions_config[i].flags |= RSPAMD_ACTION_RESULT_NO_THRESHOLD;
  61. }
  62. metric_res->actions_config[i].action = act;
  63. }
  64. struct rspamd_scan_result *
  65. rspamd_create_metric_result(struct rspamd_task *task,
  66. const char *name, int lua_sym_cbref)
  67. {
  68. struct rspamd_scan_result *metric_res;
  69. metric_res = rspamd_mempool_alloc0(task->task_pool,
  70. sizeof(struct rspamd_scan_result));
  71. metric_res->symbols = kh_init(rspamd_symbols_hash);
  72. metric_res->sym_groups = kh_init(rspamd_symbols_group_hash);
  73. if (name) {
  74. metric_res->name = rspamd_mempool_strdup(task->task_pool, name);
  75. }
  76. else {
  77. metric_res->name = NULL;
  78. }
  79. metric_res->symbol_cbref = lua_sym_cbref;
  80. metric_res->task = task;
  81. /* Optimize allocation */
  82. kh_resize(rspamd_symbols_group_hash, metric_res->sym_groups, 4);
  83. if (symbols_count.mean > 4) {
  84. kh_resize(rspamd_symbols_hash, metric_res->symbols, symbols_count.mean);
  85. }
  86. else {
  87. kh_resize(rspamd_symbols_hash, metric_res->symbols, 4);
  88. }
  89. if (task->cfg) {
  90. size_t nact = rspamd_config_actions_size(task->cfg);
  91. metric_res->actions_config = rspamd_mempool_alloc0(task->task_pool,
  92. sizeof(struct rspamd_action_config) * nact);
  93. rspamd_config_actions_foreach_enumerate(task->cfg, rspamd_metric_actions_foreach_cb, metric_res);
  94. metric_res->nactions = nact;
  95. }
  96. rspamd_mempool_add_destructor(task->task_pool,
  97. rspamd_scan_result_dtor,
  98. metric_res);
  99. DL_APPEND(task->result, metric_res);
  100. return metric_res;
  101. }
  102. static inline int
  103. rspamd_pr_sort(const struct rspamd_passthrough_result *pra,
  104. const struct rspamd_passthrough_result *prb)
  105. {
  106. return prb->priority - pra->priority;
  107. }
  108. bool rspamd_add_passthrough_result(struct rspamd_task *task,
  109. struct rspamd_action *action,
  110. unsigned int priority,
  111. double target_score,
  112. const char *message,
  113. const char *module,
  114. uint flags,
  115. struct rspamd_scan_result *scan_result)
  116. {
  117. struct rspamd_passthrough_result *pr;
  118. if (scan_result == NULL) {
  119. scan_result = task->result;
  120. }
  121. /* Find the specific action config */
  122. struct rspamd_action_config *action_config = NULL;
  123. for (unsigned int i = 0; i < scan_result->nactions; i++) {
  124. struct rspamd_action_config *cur = &scan_result->actions_config[i];
  125. /* We assume that all action pointers are static */
  126. if (cur->action == action) {
  127. action_config = cur;
  128. break;
  129. }
  130. }
  131. if (action_config && (action_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
  132. msg_info_task("<%s>: NOT set pre-result to '%s' %s(%.2f): '%s' from %s(%d); action is disabled",
  133. MESSAGE_FIELD_CHECK(task, message_id), action->name,
  134. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  135. target_score,
  136. message, module, priority);
  137. return false;
  138. }
  139. pr = rspamd_mempool_alloc(task->task_pool, sizeof(*pr));
  140. pr->action = action;
  141. pr->priority = priority;
  142. pr->message = message;
  143. pr->module = module;
  144. pr->target_score = target_score;
  145. pr->flags = flags;
  146. DL_APPEND(scan_result->passthrough_result, pr);
  147. DL_SORT(scan_result->passthrough_result, rspamd_pr_sort);
  148. if (!isnan(target_score)) {
  149. msg_info_task("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)",
  150. MESSAGE_FIELD_CHECK(task, message_id), action->name,
  151. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  152. target_score,
  153. message, module, priority);
  154. }
  155. else {
  156. msg_info_task("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)",
  157. MESSAGE_FIELD_CHECK(task, message_id), action->name,
  158. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  159. message, module, priority);
  160. }
  161. scan_result->nresults++;
  162. return true;
  163. }
  164. static inline double
  165. rspamd_check_group_score(struct rspamd_task *task,
  166. const char *symbol,
  167. struct rspamd_symbols_group *gr,
  168. double *group_score,
  169. double w)
  170. {
  171. if (gr != NULL && group_score && gr->max_score > 0.0 && w > 0.0) {
  172. if (*group_score >= gr->max_score && w > 0) {
  173. msg_info_task("maximum group score %.2f for group %s has been reached,"
  174. " ignoring symbol %s with weight %.2f",
  175. gr->max_score,
  176. gr->name, symbol, w);
  177. return NAN;
  178. }
  179. else if (*group_score + w > gr->max_score) {
  180. w = gr->max_score - *group_score;
  181. }
  182. }
  183. return w;
  184. }
  185. #ifndef DBL_EPSILON
  186. #define DBL_EPSILON 2.2204460492503131e-16
  187. #endif
  188. static struct rspamd_symbol_result *
  189. insert_metric_result(struct rspamd_task *task,
  190. const char *symbol,
  191. double weight,
  192. const char *opt,
  193. struct rspamd_scan_result *metric_res,
  194. enum rspamd_symbol_insert_flags flags,
  195. bool *new_sym)
  196. {
  197. struct rspamd_symbol_result *symbol_result = NULL;
  198. double final_score, *gr_score = NULL, diff;
  199. struct rspamd_symbol *sdef;
  200. struct rspamd_symbols_group *gr = NULL;
  201. const ucl_object_t *mobj, *sobj;
  202. int max_shots = G_MAXINT, ret;
  203. unsigned int i;
  204. khiter_t k;
  205. gboolean single = !!(flags & RSPAMD_SYMBOL_INSERT_SINGLE);
  206. char *sym_cpy;
  207. if (!isfinite(weight)) {
  208. msg_warn_task("detected %s score for symbol %s, replace it with zero",
  209. isnan(weight) ? "NaN" : "infinity", symbol);
  210. weight = 0.0;
  211. }
  212. msg_debug_metric("want to insert symbol %s, initial weight %.2f",
  213. symbol, weight);
  214. sdef = g_hash_table_lookup(task->cfg->symbols, symbol);
  215. if (sdef == NULL) {
  216. if (flags & RSPAMD_SYMBOL_INSERT_ENFORCE) {
  217. final_score = 1.0 * weight; /* Enforce static weight to 1.0 */
  218. }
  219. else {
  220. final_score = 0.0;
  221. }
  222. msg_debug_metric("no symbol definition for %s; final multiplier %.2f",
  223. symbol, final_score);
  224. }
  225. else {
  226. if (sdef->cache_item) {
  227. /* Check if we can insert this symbol at all */
  228. if (!rspamd_symcache_is_item_allowed(task, sdef->cache_item, FALSE)) {
  229. msg_debug_metric("symbol %s is not allowed to be inserted due to settings",
  230. symbol);
  231. return NULL;
  232. }
  233. }
  234. final_score = (*sdef->weight_ptr) * weight;
  235. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  236. {
  237. k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr);
  238. if (k == kh_end(metric_res->sym_groups)) {
  239. k = kh_put(rspamd_symbols_group_hash, metric_res->sym_groups,
  240. gr, &ret);
  241. kh_value(metric_res->sym_groups, k) = 0;
  242. }
  243. }
  244. msg_debug_metric("metric multiplier for %s is %.2f",
  245. symbol, *sdef->weight_ptr);
  246. }
  247. if (task->settings) {
  248. double corr;
  249. mobj = ucl_object_lookup(task->settings, "scores");
  250. if (!mobj) {
  251. /* Legacy */
  252. mobj = task->settings;
  253. }
  254. else {
  255. msg_debug_metric("found scores in the settings");
  256. }
  257. sobj = ucl_object_lookup(mobj, symbol);
  258. if (sobj != NULL && ucl_object_todouble_safe(sobj, &corr)) {
  259. msg_debug_metric("settings: changed weight of symbol %s from %.2f "
  260. "to %.2f * %.2f",
  261. symbol, final_score, corr, weight);
  262. final_score = corr * weight;
  263. }
  264. }
  265. k = kh_get(rspamd_symbols_hash, metric_res->symbols, symbol);
  266. if (k != kh_end(metric_res->symbols)) {
  267. /* Existing metric score */
  268. symbol_result = kh_value(metric_res->symbols, k);
  269. if (single) {
  270. max_shots = 1;
  271. }
  272. else {
  273. if (sdef) {
  274. if (sdef->groups) {
  275. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  276. {
  277. if (gr->flags & RSPAMD_SYMBOL_GROUP_ONE_SHOT) {
  278. max_shots = 1;
  279. }
  280. }
  281. }
  282. max_shots = MIN(max_shots, sdef->nshots);
  283. }
  284. else {
  285. max_shots = task->cfg->default_max_shots;
  286. }
  287. }
  288. msg_debug_metric("nshots: %d for symbol %s", max_shots, symbol);
  289. if (!single && (max_shots > 0 && (symbol_result->nshots >= max_shots))) {
  290. single = TRUE;
  291. }
  292. symbol_result->nshots++;
  293. if (opt) {
  294. rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt));
  295. }
  296. /* Adjust diff */
  297. if (!single) {
  298. diff = final_score;
  299. msg_debug_metric("symbol %s can be inserted multiple times: %.2f weight",
  300. symbol, diff);
  301. }
  302. else {
  303. if (fabs(symbol_result->score) < fabs(final_score) &&
  304. signbit(symbol_result->score) == signbit(final_score)) {
  305. /* Replace less significant weight with a more significant one */
  306. diff = final_score - symbol_result->score;
  307. msg_debug_metric("symbol %s can be inserted single time;"
  308. " weight adjusted %.2f + %.2f",
  309. symbol, symbol_result->score, diff);
  310. }
  311. else {
  312. diff = 0;
  313. }
  314. }
  315. if (diff) {
  316. if (sdef) {
  317. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  318. {
  319. double cur_diff;
  320. k = kh_get(rspamd_symbols_group_hash,
  321. metric_res->sym_groups, gr);
  322. g_assert(k != kh_end(metric_res->sym_groups));
  323. gr_score = &kh_value(metric_res->sym_groups, k);
  324. cur_diff = rspamd_check_group_score(task, symbol, gr,
  325. gr_score, diff);
  326. if (isnan(cur_diff)) {
  327. /* Limit reached, do not add result */
  328. msg_debug_metric(
  329. "group limit %.2f is reached for %s when inserting symbol %s;"
  330. " drop score %.2f",
  331. *gr_score, gr->name, symbol, diff);
  332. diff = NAN;
  333. break;
  334. }
  335. else if (gr_score) {
  336. *gr_score += cur_diff;
  337. if (cur_diff < diff) {
  338. /* Reduce */
  339. msg_debug_metric(
  340. "group limit %.2f is reached for %s when inserting symbol %s;"
  341. " reduce score %.2f - %.2f",
  342. *gr_score, gr->name, symbol, diff, cur_diff);
  343. diff = cur_diff;
  344. }
  345. }
  346. }
  347. }
  348. if (!isnan(diff)) {
  349. if (single) {
  350. msg_debug_metric("final score for single symbol %s = %.2f; %.2f diff",
  351. symbol, final_score, diff);
  352. symbol_result->score = final_score;
  353. }
  354. else {
  355. msg_debug_metric("increase final score for multiple symbol %s += %.2f = %.2f",
  356. symbol, symbol_result->score, diff);
  357. symbol_result->score += diff;
  358. }
  359. }
  360. }
  361. }
  362. else {
  363. /* New result */
  364. if (new_sym) {
  365. *new_sym = true;
  366. }
  367. sym_cpy = rspamd_mempool_strdup(task->task_pool, symbol);
  368. k = kh_put(rspamd_symbols_hash, metric_res->symbols,
  369. sym_cpy, &ret);
  370. g_assert(ret > 0);
  371. symbol_result = rspamd_mempool_alloc0(task->task_pool, sizeof(*symbol_result));
  372. kh_value(metric_res->symbols, k) = symbol_result;
  373. symbol_result->name = sym_cpy;
  374. symbol_result->sym = sdef;
  375. symbol_result->nshots = 1;
  376. if (sdef) {
  377. /* Check group limits */
  378. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  379. {
  380. double cur_score;
  381. k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr);
  382. g_assert(k != kh_end(metric_res->sym_groups));
  383. gr_score = &kh_value(metric_res->sym_groups, k);
  384. cur_score = rspamd_check_group_score(task, symbol, gr,
  385. gr_score, final_score);
  386. if (isnan(cur_score)) {
  387. /* Limit reached, do not add result */
  388. msg_debug_metric(
  389. "group limit %.2f is reached for %s when inserting symbol %s;"
  390. " drop score %.2f",
  391. *gr_score, gr->name, symbol, final_score);
  392. final_score = NAN;
  393. break;
  394. }
  395. else if (gr_score) {
  396. *gr_score += cur_score;
  397. if (cur_score < final_score) {
  398. /* Reduce */
  399. msg_debug_metric(
  400. "group limit %.2f is reached for %s when inserting symbol %s;"
  401. " reduce score %.2f - %.2f",
  402. *gr_score, gr->name, symbol, final_score, cur_score);
  403. final_score = cur_score;
  404. }
  405. }
  406. }
  407. }
  408. if (!isnan(final_score)) {
  409. const double epsilon = DBL_EPSILON;
  410. metric_res->score += final_score;
  411. symbol_result->score = final_score;
  412. if (final_score > epsilon) {
  413. metric_res->npositive++;
  414. metric_res->positive_score += final_score;
  415. }
  416. else if (final_score < -epsilon) {
  417. metric_res->nnegative++;
  418. metric_res->negative_score += fabs(final_score);
  419. }
  420. }
  421. else {
  422. symbol_result->score = 0;
  423. }
  424. if (opt) {
  425. rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt));
  426. }
  427. }
  428. msg_debug_metric("final insertion for symbol %s, score %.2f, factor: %f",
  429. symbol,
  430. symbol_result->score,
  431. final_score);
  432. metric_res->nresults++;
  433. return symbol_result;
  434. }
  435. struct rspamd_symbol_result *
  436. rspamd_task_insert_result_full(struct rspamd_task *task,
  437. const char *symbol,
  438. double weight,
  439. const char *opt,
  440. enum rspamd_symbol_insert_flags flags,
  441. struct rspamd_scan_result *result)
  442. {
  443. struct rspamd_symbol_result *symbol_result = NULL, *ret = NULL;
  444. struct rspamd_scan_result *mres;
  445. /*
  446. * We allow symbols to be inserted for skipped tasks, as it might be a
  447. * race condition before some symbol is finished and skip flag being set.
  448. */
  449. if (!RSPAMD_TASK_IS_SKIPPED(task) && (task->processed_stages & (RSPAMD_TASK_STAGE_IDEMPOTENT >> 1))) {
  450. msg_err_task("cannot insert symbol %s on idempotent phase",
  451. symbol);
  452. return NULL;
  453. }
  454. if (result == NULL) {
  455. /* Insert everywhere */
  456. DL_FOREACH(task->result, mres)
  457. {
  458. if (mres->symbol_cbref != -1) {
  459. /* Check if we can insert this symbol to this symbol result */
  460. GError *err = NULL;
  461. lua_State *L = (lua_State *) task->cfg->lua_state;
  462. if (!rspamd_lua_universal_pcall(L, mres->symbol_cbref,
  463. G_STRLOC, 1, "uss", &err,
  464. rspamd_task_classname, task, symbol, mres->name ? mres->name : "default")) {
  465. msg_warn_task("cannot call for symbol_cbref for result %s: %e",
  466. mres->name ? mres->name : "default", err);
  467. g_error_free(err);
  468. continue;
  469. }
  470. else {
  471. if (!lua_toboolean(L, -1)) {
  472. /* Skip symbol */
  473. msg_debug_metric("skip symbol %s for result %s due to Lua return value",
  474. symbol, mres->name);
  475. lua_pop(L, 1); /* Remove result */
  476. continue;
  477. }
  478. lua_pop(L, 1); /* Remove result */
  479. }
  480. }
  481. bool new_symbol = false;
  482. symbol_result = insert_metric_result(task,
  483. symbol,
  484. weight,
  485. opt,
  486. mres,
  487. flags,
  488. &new_symbol);
  489. if (mres->name == NULL) {
  490. /* Default result */
  491. ret = symbol_result;
  492. /* Process cache item */
  493. if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) {
  494. rspamd_symcache_inc_frequency(task->cfg->cache,
  495. symbol_result->sym->cache_item,
  496. symbol_result->sym->name);
  497. }
  498. }
  499. else if (new_symbol) {
  500. /* O(N) but we normally don't have any shadow results */
  501. LL_APPEND(ret, symbol_result);
  502. }
  503. }
  504. }
  505. else {
  506. /* Specific insertion */
  507. symbol_result = insert_metric_result(task,
  508. symbol,
  509. weight,
  510. opt,
  511. result,
  512. flags,
  513. NULL);
  514. ret = symbol_result;
  515. if (result->name == NULL) {
  516. /* Process cache item */
  517. if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) {
  518. rspamd_symcache_inc_frequency(task->cfg->cache,
  519. symbol_result->sym->cache_item,
  520. symbol_result->sym->name);
  521. }
  522. }
  523. }
  524. return ret;
  525. }
  526. static char *
  527. rspamd_task_option_safe_copy(struct rspamd_task *task,
  528. const char *val,
  529. gsize vlen,
  530. gsize *outlen)
  531. {
  532. const char *p, *end;
  533. p = val;
  534. end = val + vlen;
  535. vlen = 0; /* Reuse */
  536. while (p < end) {
  537. if (*p & 0x80) {
  538. UChar32 uc;
  539. int off = 0;
  540. U8_NEXT(p, off, end - p, uc);
  541. if (uc > 0) {
  542. if (u_isprint(uc)) {
  543. vlen += off;
  544. }
  545. else {
  546. /* We will replace it with 0xFFFD */
  547. vlen += MAX(off, 3);
  548. }
  549. }
  550. else {
  551. vlen += MAX(off, 3);
  552. }
  553. p += off;
  554. }
  555. else if (!g_ascii_isprint(*p)) {
  556. /* Another 0xFFFD */
  557. vlen += 3;
  558. p++;
  559. }
  560. else {
  561. p++;
  562. vlen++;
  563. }
  564. }
  565. char *dest, *d;
  566. dest = rspamd_mempool_alloc(task->task_pool, vlen + 1);
  567. d = dest;
  568. p = val;
  569. while (p < end) {
  570. if (*p & 0x80) {
  571. UChar32 uc;
  572. int off = 0;
  573. U8_NEXT(p, off, end - p, uc);
  574. if (uc > 0) {
  575. if (u_isprint(uc)) {
  576. memcpy(d, p, off);
  577. d += off;
  578. }
  579. else {
  580. /* We will replace it with 0xFFFD */
  581. *d++ = '\357';
  582. *d++ = '\277';
  583. *d++ = '\275';
  584. }
  585. }
  586. else {
  587. *d++ = '\357';
  588. *d++ = '\277';
  589. *d++ = '\275';
  590. }
  591. p += off;
  592. }
  593. else if (!g_ascii_isprint(*p)) {
  594. /* Another 0xFFFD */
  595. *d++ = '\357';
  596. *d++ = '\277';
  597. *d++ = '\275';
  598. p++;
  599. }
  600. else {
  601. *d++ = *p++;
  602. }
  603. }
  604. *d = '\0';
  605. *(outlen) = d - dest;
  606. return dest;
  607. }
  608. gboolean
  609. rspamd_task_add_result_option(struct rspamd_task *task,
  610. struct rspamd_symbol_result *s,
  611. const char *val,
  612. gsize vlen)
  613. {
  614. struct rspamd_symbol_option *opt, srch;
  615. gboolean ret = FALSE;
  616. char *opt_cpy = NULL;
  617. gsize cpy_len;
  618. khiter_t k;
  619. int r;
  620. struct rspamd_symbol_result *cur;
  621. if (s && val) {
  622. /*
  623. * Here we assume that this function is all the time called with the
  624. * symbol from the default result, not some shadow result, or
  625. * the option insertion will be wrong
  626. */
  627. LL_FOREACH(s, cur)
  628. {
  629. if (cur->opts_len < 0) {
  630. /* Cannot add more options, give up */
  631. msg_debug_task("cannot add more options to symbol %s when adding option %s",
  632. cur->name, val);
  633. ret = FALSE;
  634. continue;
  635. }
  636. if (!cur->options) {
  637. cur->options = kh_init(rspamd_options_hash);
  638. }
  639. if (vlen + cur->opts_len > task->cfg->max_opts_len) {
  640. /* Add truncated option */
  641. msg_info_task("cannot add more options to symbol %s when adding option %s",
  642. cur->name, val);
  643. val = "...";
  644. vlen = 3;
  645. cur->opts_len = -1;
  646. }
  647. if (!(cur->sym && (cur->sym->flags & RSPAMD_SYMBOL_FLAG_ONEPARAM))) {
  648. srch.option = (char *) val;
  649. srch.optlen = vlen;
  650. k = kh_get(rspamd_options_hash, cur->options, &srch);
  651. if (k == kh_end(cur->options)) {
  652. opt_cpy = rspamd_task_option_safe_copy(task, val, vlen, &cpy_len);
  653. if (cpy_len != vlen) {
  654. srch.option = (char *) opt_cpy;
  655. srch.optlen = cpy_len;
  656. k = kh_get(rspamd_options_hash, cur->options, &srch);
  657. }
  658. /* Append new options */
  659. if (k == kh_end(cur->options)) {
  660. opt = rspamd_mempool_alloc0(task->task_pool, sizeof(*opt));
  661. opt->optlen = cpy_len;
  662. opt->option = opt_cpy;
  663. kh_put(rspamd_options_hash, cur->options, opt, &r);
  664. DL_APPEND(cur->opts_head, opt);
  665. if (s == cur) {
  666. ret = TRUE;
  667. }
  668. }
  669. }
  670. }
  671. else {
  672. /* Skip addition */
  673. if (s == cur) {
  674. ret = FALSE;
  675. }
  676. }
  677. if (ret && cur->opts_len >= 0) {
  678. cur->opts_len += vlen;
  679. }
  680. }
  681. }
  682. else if (!val) {
  683. ret = TRUE;
  684. }
  685. task->result->nresults++;
  686. return ret;
  687. }
  688. struct rspamd_action_config *
  689. rspamd_find_action_config_for_action(struct rspamd_scan_result *scan_result,
  690. struct rspamd_action *act)
  691. {
  692. for (unsigned int i = 0; i < scan_result->nactions; i++) {
  693. struct rspamd_action_config *cur = &scan_result->actions_config[i];
  694. if (act == cur->action) {
  695. return cur;
  696. }
  697. }
  698. return NULL;
  699. }
  700. struct rspamd_action *
  701. rspamd_check_action_metric(struct rspamd_task *task,
  702. struct rspamd_passthrough_result **ppr,
  703. struct rspamd_scan_result *scan_result)
  704. {
  705. struct rspamd_action_config *action_lim,
  706. *noaction = NULL;
  707. struct rspamd_action *selected_action = NULL, *least_action = NULL;
  708. struct rspamd_passthrough_result *pr, *sel_pr = NULL;
  709. double max_score = -(G_MAXDOUBLE), sc;
  710. gboolean seen_least = FALSE;
  711. if (scan_result == NULL) {
  712. scan_result = task->result;
  713. }
  714. if (scan_result->passthrough_result != NULL) {
  715. DL_FOREACH(scan_result->passthrough_result, pr)
  716. {
  717. struct rspamd_action_config *act_config =
  718. rspamd_find_action_config_for_action(scan_result, pr->action);
  719. /* Skip disabled actions */
  720. if (act_config && (act_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
  721. continue;
  722. }
  723. if (!seen_least || !(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
  724. sc = pr->target_score;
  725. selected_action = pr->action;
  726. if (!(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
  727. if (!isnan(sc)) {
  728. if (pr->action->action_type == METRIC_ACTION_NOACTION) {
  729. scan_result->score = MIN(sc, scan_result->score);
  730. }
  731. else {
  732. scan_result->score = sc;
  733. }
  734. }
  735. if (ppr) {
  736. *ppr = pr;
  737. }
  738. return selected_action;
  739. }
  740. else {
  741. seen_least = true;
  742. least_action = selected_action;
  743. if (isnan(sc)) {
  744. if (selected_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
  745. /*
  746. * In this case, we have a passthrough action that
  747. * is `least` action, however, there is no threshold
  748. * on it.
  749. *
  750. * Hence, we imply the following logic:
  751. *
  752. * - we leave score unchanged
  753. * - we apply passthrough no threshold action unless
  754. * score based action *is not* reject, otherwise
  755. * we apply reject action
  756. */
  757. }
  758. else {
  759. sc = selected_action->threshold;
  760. max_score = sc;
  761. sel_pr = pr;
  762. }
  763. }
  764. else {
  765. max_score = sc;
  766. sel_pr = pr;
  767. }
  768. }
  769. }
  770. }
  771. }
  772. /*
  773. * Select result by score
  774. */
  775. for (size_t i = scan_result->nactions - 1; i != (size_t) -1; i--) {
  776. action_lim = &scan_result->actions_config[i];
  777. sc = action_lim->cur_limit;
  778. if (action_lim->action->action_type == METRIC_ACTION_NOACTION) {
  779. noaction = action_lim;
  780. }
  781. if ((action_lim->flags & (RSPAMD_ACTION_RESULT_DISABLED | RSPAMD_ACTION_RESULT_NO_THRESHOLD))) {
  782. continue;
  783. }
  784. if (isnan(sc) ||
  785. (action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
  786. continue;
  787. }
  788. if (scan_result->score >= sc && sc > max_score) {
  789. selected_action = action_lim->action;
  790. max_score = sc;
  791. }
  792. }
  793. if (selected_action == NULL) {
  794. selected_action = noaction->action;
  795. }
  796. if (selected_action) {
  797. if (seen_least) {
  798. /* Adjust least action */
  799. if (least_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
  800. if (selected_action->action_type != METRIC_ACTION_REJECT &&
  801. selected_action->action_type != METRIC_ACTION_DISCARD) {
  802. /* Override score based action with least action */
  803. selected_action = least_action;
  804. if (ppr) {
  805. *ppr = sel_pr;
  806. }
  807. }
  808. }
  809. else {
  810. /* Adjust score if needed */
  811. if (max_score > scan_result->score) {
  812. if (ppr) {
  813. *ppr = sel_pr;
  814. }
  815. scan_result->score = max_score;
  816. }
  817. }
  818. }
  819. return selected_action;
  820. }
  821. if (ppr) {
  822. *ppr = sel_pr;
  823. }
  824. return noaction->action;
  825. }
  826. struct rspamd_symbol_result *
  827. rspamd_task_find_symbol_result(struct rspamd_task *task, const char *sym,
  828. struct rspamd_scan_result *result)
  829. {
  830. struct rspamd_symbol_result *res = NULL;
  831. khiter_t k;
  832. if (result == NULL) {
  833. /* Use default result */
  834. result = task->result;
  835. }
  836. k = kh_get(rspamd_symbols_hash, result->symbols, sym);
  837. if (k != kh_end(result->symbols)) {
  838. res = kh_value(result->symbols, k);
  839. }
  840. return res;
  841. }
  842. struct rspamd_symbol_result *rspamd_task_remove_symbol_result(
  843. struct rspamd_task *task,
  844. const char *symbol,
  845. struct rspamd_scan_result *result)
  846. {
  847. struct rspamd_symbol_result *res = NULL;
  848. khiter_t k;
  849. if (result == NULL) {
  850. /* Use default result */
  851. result = task->result;
  852. }
  853. k = kh_get(rspamd_symbols_hash, result->symbols, symbol);
  854. if (k != kh_end(result->symbols)) {
  855. res = kh_value(result->symbols, k);
  856. if (!isnan(res->score)) {
  857. /* Remove score from the result */
  858. result->score -= res->score;
  859. /* Also check the group limit */
  860. if (result->sym_groups && res->sym) {
  861. struct rspamd_symbol_group *gr;
  862. int i;
  863. khiter_t k_groups;
  864. PTR_ARRAY_FOREACH(res->sym->groups, i, gr)
  865. {
  866. double *gr_score;
  867. k_groups = kh_get(rspamd_symbols_group_hash,
  868. result->sym_groups, gr);
  869. if (k_groups != kh_end(result->sym_groups)) {
  870. gr_score = &kh_value(result->sym_groups, k_groups);
  871. if (gr_score) {
  872. *gr_score -= res->score;
  873. }
  874. }
  875. }
  876. }
  877. }
  878. kh_del(rspamd_symbols_hash, result->symbols, k);
  879. }
  880. else {
  881. return NULL;
  882. }
  883. return res;
  884. }
  885. void rspamd_task_symbol_result_foreach(struct rspamd_task *task,
  886. struct rspamd_scan_result *result, GHFunc func,
  887. gpointer ud)
  888. {
  889. const char *kk;
  890. struct rspamd_symbol_result *res;
  891. if (result == NULL) {
  892. /* Use default result */
  893. result = task->result;
  894. }
  895. if (func) {
  896. kh_foreach(result->symbols, kk, res, {
  897. func((gpointer) kk, (gpointer) res, ud);
  898. });
  899. }
  900. }
  901. struct rspamd_scan_result *
  902. rspamd_find_metric_result(struct rspamd_task *task,
  903. const char *name)
  904. {
  905. struct rspamd_scan_result *res;
  906. if (name == NULL || strcmp(name, "default") == 0) {
  907. return task->result;
  908. }
  909. DL_FOREACH(task->result, res)
  910. {
  911. if (res->name && strcmp(res->name, name) == 0) {
  912. return res;
  913. }
  914. }
  915. return NULL;
  916. }
  917. void rspamd_task_result_adjust_grow_factor(struct rspamd_task *task,
  918. struct rspamd_scan_result *result,
  919. double grow_factor)
  920. {
  921. const char *kk;
  922. struct rspamd_symbol_result *res;
  923. double final_grow_factor = grow_factor;
  924. double max_limit = G_MINDOUBLE;
  925. if (grow_factor > 1.0) {
  926. for (unsigned int i = 0; i < result->nactions; i++) {
  927. struct rspamd_action_config *cur = &result->actions_config[i];
  928. if (cur->cur_limit > 0 && max_limit < cur->cur_limit) {
  929. max_limit = cur->cur_limit;
  930. }
  931. }
  932. /* Adjust factor by selecting all symbols and checking those with positive scores */
  933. kh_foreach(result->symbols, kk, res, {
  934. if (res->score > 0) {
  935. double mult = grow_factor - 1.0;
  936. /* We adjust the factor by the ratio of the score to the max limit */
  937. if (max_limit > 0 && !isnan(res->score)) {
  938. mult *= res->score / max_limit;
  939. final_grow_factor *= 1.0 + mult;
  940. }
  941. }
  942. });
  943. /* At this stage we know that we have some grow factor to apply */
  944. if (final_grow_factor > 1.0) {
  945. msg_info_task("calculated final grow factor for task: %.3f (%.2f the original one)",
  946. final_grow_factor, grow_factor);
  947. kh_foreach(result->symbols, kk, res, {
  948. if (res->score > 0) {
  949. result->score -= res->score;
  950. res->score *= final_grow_factor;
  951. result->score += res->score;
  952. }
  953. });
  954. }
  955. }
  956. }