You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scan_result.c 27KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "mem_pool.h"
  18. #include "scan_result.h"
  19. #include "rspamd.h"
  20. #include "message.h"
  21. #include "lua/lua_common.h"
  22. #include "libserver/cfg_file_private.h"
  23. #include "libmime/scan_result_private.h"
  24. #include "contrib/fastutf8/fastutf8.h"
  25. #include <math.h>
  26. #include "contrib/uthash/utlist.h"
  27. #define msg_debug_metric(...) rspamd_conditional_debug_fast(NULL, NULL, \
  28. rspamd_metric_log_id, "metric", task->task_pool->tag.uid, \
  29. RSPAMD_LOG_FUNC, \
  30. __VA_ARGS__)
  31. INIT_LOG_MODULE(metric)
  32. /* Average symbols count to optimize hash allocation */
  33. static struct rspamd_counter_data symbols_count;
  34. static void
  35. rspamd_scan_result_dtor(gpointer d)
  36. {
  37. struct rspamd_scan_result *r = (struct rspamd_scan_result *) d;
  38. struct rspamd_symbol_result *sres;
  39. rspamd_set_counter_ema(&symbols_count, kh_size(r->symbols), 0.5);
  40. if (r->symbol_cbref != -1) {
  41. luaL_unref(r->task->cfg->lua_state, LUA_REGISTRYINDEX, r->symbol_cbref);
  42. }
  43. kh_foreach_value(r->symbols, sres, {
  44. if (sres->options) {
  45. kh_destroy(rspamd_options_hash, sres->options);
  46. }
  47. });
  48. kh_destroy(rspamd_symbols_hash, r->symbols);
  49. kh_destroy(rspamd_symbols_group_hash, r->sym_groups);
  50. }
  51. static void
  52. rspamd_metric_actions_foreach_cb(int i, struct rspamd_action *act, void *cbd)
  53. {
  54. struct rspamd_scan_result *metric_res = (struct rspamd_scan_result *) cbd;
  55. metric_res->actions_config[i].flags = RSPAMD_ACTION_RESULT_DEFAULT;
  56. if (!(act->flags & RSPAMD_ACTION_NO_THRESHOLD)) {
  57. metric_res->actions_config[i].cur_limit = act->threshold;
  58. }
  59. else {
  60. metric_res->actions_config[i].flags |= RSPAMD_ACTION_RESULT_NO_THRESHOLD;
  61. }
  62. metric_res->actions_config[i].action = act;
  63. }
  64. struct rspamd_scan_result *
  65. rspamd_create_metric_result(struct rspamd_task *task,
  66. const char *name, int lua_sym_cbref)
  67. {
  68. struct rspamd_scan_result *metric_res;
  69. metric_res = rspamd_mempool_alloc0(task->task_pool,
  70. sizeof(struct rspamd_scan_result));
  71. metric_res->symbols = kh_init(rspamd_symbols_hash);
  72. metric_res->sym_groups = kh_init(rspamd_symbols_group_hash);
  73. if (name) {
  74. metric_res->name = rspamd_mempool_strdup(task->task_pool, name);
  75. }
  76. else {
  77. metric_res->name = NULL;
  78. }
  79. metric_res->symbol_cbref = lua_sym_cbref;
  80. metric_res->task = task;
  81. /* Optimize allocation */
  82. kh_resize(rspamd_symbols_group_hash, metric_res->sym_groups, 4);
  83. if (symbols_count.mean > 4) {
  84. kh_resize(rspamd_symbols_hash, metric_res->symbols, symbols_count.mean);
  85. }
  86. else {
  87. kh_resize(rspamd_symbols_hash, metric_res->symbols, 4);
  88. }
  89. if (task->cfg) {
  90. size_t nact = rspamd_config_actions_size(task->cfg);
  91. metric_res->actions_config = rspamd_mempool_alloc0(task->task_pool,
  92. sizeof(struct rspamd_action_config) * nact);
  93. rspamd_config_actions_foreach_enumerate(task->cfg, rspamd_metric_actions_foreach_cb, metric_res);
  94. metric_res->nactions = nact;
  95. }
  96. rspamd_mempool_add_destructor(task->task_pool,
  97. rspamd_scan_result_dtor,
  98. metric_res);
  99. DL_APPEND(task->result, metric_res);
  100. return metric_res;
  101. }
  102. static inline int
  103. rspamd_pr_sort(const struct rspamd_passthrough_result *pra,
  104. const struct rspamd_passthrough_result *prb)
  105. {
  106. return prb->priority - pra->priority;
  107. }
  108. bool rspamd_add_passthrough_result(struct rspamd_task *task,
  109. struct rspamd_action *action,
  110. unsigned int priority,
  111. double target_score,
  112. const char *message,
  113. const char *module,
  114. uint flags,
  115. struct rspamd_scan_result *scan_result)
  116. {
  117. struct rspamd_passthrough_result *pr;
  118. if (scan_result == NULL) {
  119. scan_result = task->result;
  120. }
  121. /* Find the specific action config */
  122. struct rspamd_action_config *action_config = NULL;
  123. for (unsigned int i = 0; i < scan_result->nactions; i++) {
  124. struct rspamd_action_config *cur = &scan_result->actions_config[i];
  125. /* We assume that all action pointers are static */
  126. if (cur->action == action) {
  127. action_config = cur;
  128. break;
  129. }
  130. }
  131. if (action_config && (action_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
  132. msg_info_task("<%s>: NOT set pre-result to '%s' %s(%.2f): '%s' from %s(%d); action is disabled",
  133. MESSAGE_FIELD_CHECK(task, message_id), action->name,
  134. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  135. target_score,
  136. message, module, priority);
  137. return false;
  138. }
  139. pr = rspamd_mempool_alloc(task->task_pool, sizeof(*pr));
  140. pr->action = action;
  141. pr->priority = priority;
  142. pr->message = message;
  143. pr->module = module;
  144. pr->target_score = target_score;
  145. pr->flags = flags;
  146. DL_APPEND(scan_result->passthrough_result, pr);
  147. DL_SORT(scan_result->passthrough_result, rspamd_pr_sort);
  148. if (!isnan(target_score)) {
  149. msg_info_task("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)",
  150. MESSAGE_FIELD_CHECK(task, message_id), action->name,
  151. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  152. target_score,
  153. message, module, priority);
  154. }
  155. else {
  156. msg_info_task("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)",
  157. MESSAGE_FIELD_CHECK(task, message_id), action->name,
  158. flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
  159. message, module, priority);
  160. }
  161. scan_result->nresults++;
  162. return true;
  163. }
  164. static inline double
  165. rspamd_check_group_score(struct rspamd_task *task,
  166. const char *symbol,
  167. struct rspamd_symbols_group *gr,
  168. double *group_score,
  169. double w)
  170. {
  171. if (gr != NULL && group_score && gr->max_score > 0.0 && w > 0.0) {
  172. if (*group_score >= gr->max_score && w > 0) {
  173. msg_info_task("maximum group score %.2f for group %s has been reached,"
  174. " ignoring symbol %s with weight %.2f",
  175. gr->max_score,
  176. gr->name, symbol, w);
  177. return NAN;
  178. }
  179. else if (*group_score + w > gr->max_score) {
  180. w = gr->max_score - *group_score;
  181. }
  182. }
  183. return w;
  184. }
  185. #ifndef DBL_EPSILON
  186. #define DBL_EPSILON 2.2204460492503131e-16
  187. #endif
  188. static struct rspamd_symbol_result *
  189. insert_metric_result(struct rspamd_task *task,
  190. const char *symbol,
  191. double weight,
  192. const char *opt,
  193. struct rspamd_scan_result *metric_res,
  194. enum rspamd_symbol_insert_flags flags,
  195. bool *new_sym)
  196. {
  197. struct rspamd_symbol_result *symbol_result = NULL;
  198. double final_score, *gr_score = NULL, next_gf = 1.0, diff;
  199. struct rspamd_symbol *sdef;
  200. struct rspamd_symbols_group *gr = NULL;
  201. const ucl_object_t *mobj, *sobj;
  202. int max_shots = G_MAXINT, ret;
  203. unsigned int i;
  204. khiter_t k;
  205. gboolean single = !!(flags & RSPAMD_SYMBOL_INSERT_SINGLE);
  206. char *sym_cpy;
  207. if (!isfinite(weight)) {
  208. msg_warn_task("detected %s score for symbol %s, replace it with zero",
  209. isnan(weight) ? "NaN" : "infinity", symbol);
  210. weight = 0.0;
  211. }
  212. msg_debug_metric("want to insert symbol %s, initial weight %.2f",
  213. symbol, weight);
  214. sdef = g_hash_table_lookup(task->cfg->symbols, symbol);
  215. if (sdef == NULL) {
  216. if (flags & RSPAMD_SYMBOL_INSERT_ENFORCE) {
  217. final_score = 1.0 * weight; /* Enforce static weight to 1.0 */
  218. }
  219. else {
  220. final_score = 0.0;
  221. }
  222. msg_debug_metric("no symbol definition for %s; final multiplier %.2f",
  223. symbol, final_score);
  224. }
  225. else {
  226. if (sdef->cache_item) {
  227. /* Check if we can insert this symbol at all */
  228. if (!rspamd_symcache_is_item_allowed(task, sdef->cache_item, FALSE)) {
  229. msg_debug_metric("symbol %s is not allowed to be inserted due to settings",
  230. symbol);
  231. return NULL;
  232. }
  233. }
  234. final_score = (*sdef->weight_ptr) * weight;
  235. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  236. {
  237. k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr);
  238. if (k == kh_end(metric_res->sym_groups)) {
  239. k = kh_put(rspamd_symbols_group_hash, metric_res->sym_groups,
  240. gr, &ret);
  241. kh_value(metric_res->sym_groups, k) = 0;
  242. }
  243. }
  244. msg_debug_metric("metric multiplier for %s is %.2f",
  245. symbol, *sdef->weight_ptr);
  246. }
  247. if (task->settings) {
  248. double corr;
  249. mobj = ucl_object_lookup(task->settings, "scores");
  250. if (!mobj) {
  251. /* Legacy */
  252. mobj = task->settings;
  253. }
  254. else {
  255. msg_debug_metric("found scores in the settings");
  256. }
  257. sobj = ucl_object_lookup(mobj, symbol);
  258. if (sobj != NULL && ucl_object_todouble_safe(sobj, &corr)) {
  259. msg_debug_metric("settings: changed weight of symbol %s from %.2f "
  260. "to %.2f * %.2f",
  261. symbol, final_score, corr, weight);
  262. final_score = corr * weight;
  263. }
  264. }
  265. k = kh_get(rspamd_symbols_hash, metric_res->symbols, symbol);
  266. if (k != kh_end(metric_res->symbols)) {
  267. /* Existing metric score */
  268. symbol_result = kh_value(metric_res->symbols, k);
  269. if (single) {
  270. max_shots = 1;
  271. }
  272. else {
  273. if (sdef) {
  274. if (sdef->groups) {
  275. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  276. {
  277. if (gr->flags & RSPAMD_SYMBOL_GROUP_ONE_SHOT) {
  278. max_shots = 1;
  279. }
  280. }
  281. }
  282. max_shots = MIN(max_shots, sdef->nshots);
  283. }
  284. else {
  285. max_shots = task->cfg->default_max_shots;
  286. }
  287. }
  288. msg_debug_metric("nshots: %d for symbol %s", max_shots, symbol);
  289. if (!single && (max_shots > 0 && (symbol_result->nshots >= max_shots))) {
  290. single = TRUE;
  291. }
  292. symbol_result->nshots++;
  293. if (opt) {
  294. rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt));
  295. }
  296. /* Adjust diff */
  297. if (!single) {
  298. diff = final_score;
  299. msg_debug_metric("symbol %s can be inserted multiple times: %.2f weight",
  300. symbol, diff);
  301. }
  302. else {
  303. if (fabs(symbol_result->score) < fabs(final_score) &&
  304. signbit(symbol_result->score) == signbit(final_score)) {
  305. /* Replace less significant weight with a more significant one */
  306. diff = final_score - symbol_result->score;
  307. msg_debug_metric("symbol %s can be inserted single time;"
  308. " weight adjusted %.2f + %.2f",
  309. symbol, symbol_result->score, diff);
  310. }
  311. else {
  312. diff = 0;
  313. }
  314. }
  315. if (diff) {
  316. /* Handle grow factor */
  317. if (metric_res->grow_factor && diff > 0) {
  318. diff *= metric_res->grow_factor;
  319. next_gf *= task->cfg->grow_factor;
  320. }
  321. else if (diff > 0) {
  322. next_gf = task->cfg->grow_factor;
  323. }
  324. msg_debug_metric("adjust grow factor to %.2f for symbol %s (%.2f final)",
  325. next_gf, symbol, diff);
  326. if (sdef) {
  327. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  328. {
  329. double cur_diff;
  330. k = kh_get(rspamd_symbols_group_hash,
  331. metric_res->sym_groups, gr);
  332. g_assert(k != kh_end(metric_res->sym_groups));
  333. gr_score = &kh_value(metric_res->sym_groups, k);
  334. cur_diff = rspamd_check_group_score(task, symbol, gr,
  335. gr_score, diff);
  336. if (isnan(cur_diff)) {
  337. /* Limit reached, do not add result */
  338. msg_debug_metric(
  339. "group limit %.2f is reached for %s when inserting symbol %s;"
  340. " drop score %.2f",
  341. *gr_score, gr->name, symbol, diff);
  342. diff = NAN;
  343. break;
  344. }
  345. else if (gr_score) {
  346. *gr_score += cur_diff;
  347. if (cur_diff < diff) {
  348. /* Reduce */
  349. msg_debug_metric(
  350. "group limit %.2f is reached for %s when inserting symbol %s;"
  351. " reduce score %.2f - %.2f",
  352. *gr_score, gr->name, symbol, diff, cur_diff);
  353. diff = cur_diff;
  354. }
  355. }
  356. }
  357. }
  358. if (!isnan(diff)) {
  359. metric_res->score += diff;
  360. metric_res->grow_factor = next_gf;
  361. if (single) {
  362. msg_debug_metric("final score for single symbol %s = %.2f; %.2f diff",
  363. symbol, final_score, diff);
  364. symbol_result->score = final_score;
  365. }
  366. else {
  367. msg_debug_metric("increase final score for multiple symbol %s += %.2f = %.2f",
  368. symbol, symbol_result->score, diff);
  369. symbol_result->score += diff;
  370. }
  371. }
  372. }
  373. }
  374. else {
  375. /* New result */
  376. if (new_sym) {
  377. *new_sym = true;
  378. }
  379. sym_cpy = rspamd_mempool_strdup(task->task_pool, symbol);
  380. k = kh_put(rspamd_symbols_hash, metric_res->symbols,
  381. sym_cpy, &ret);
  382. g_assert(ret > 0);
  383. symbol_result = rspamd_mempool_alloc0(task->task_pool, sizeof(*symbol_result));
  384. kh_value(metric_res->symbols, k) = symbol_result;
  385. /* Handle grow factor */
  386. if (metric_res->grow_factor && final_score > 0) {
  387. final_score *= metric_res->grow_factor;
  388. next_gf *= task->cfg->grow_factor;
  389. }
  390. else if (final_score > 0) {
  391. next_gf = task->cfg->grow_factor;
  392. }
  393. msg_debug_metric("adjust grow factor to %.2f for symbol %s (%.2f final)",
  394. next_gf, symbol, final_score);
  395. symbol_result->name = sym_cpy;
  396. symbol_result->sym = sdef;
  397. symbol_result->nshots = 1;
  398. if (sdef) {
  399. /* Check group limits */
  400. PTR_ARRAY_FOREACH(sdef->groups, i, gr)
  401. {
  402. double cur_score;
  403. k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr);
  404. g_assert(k != kh_end(metric_res->sym_groups));
  405. gr_score = &kh_value(metric_res->sym_groups, k);
  406. cur_score = rspamd_check_group_score(task, symbol, gr,
  407. gr_score, final_score);
  408. if (isnan(cur_score)) {
  409. /* Limit reached, do not add result */
  410. msg_debug_metric(
  411. "group limit %.2f is reached for %s when inserting symbol %s;"
  412. " drop score %.2f",
  413. *gr_score, gr->name, symbol, final_score);
  414. final_score = NAN;
  415. break;
  416. }
  417. else if (gr_score) {
  418. *gr_score += cur_score;
  419. if (cur_score < final_score) {
  420. /* Reduce */
  421. msg_debug_metric(
  422. "group limit %.2f is reached for %s when inserting symbol %s;"
  423. " reduce score %.2f - %.2f",
  424. *gr_score, gr->name, symbol, final_score, cur_score);
  425. final_score = cur_score;
  426. }
  427. }
  428. }
  429. }
  430. if (!isnan(final_score)) {
  431. const double epsilon = DBL_EPSILON;
  432. metric_res->score += final_score;
  433. metric_res->grow_factor = next_gf;
  434. symbol_result->score = final_score;
  435. if (final_score > epsilon) {
  436. metric_res->npositive++;
  437. metric_res->positive_score += final_score;
  438. }
  439. else if (final_score < -epsilon) {
  440. metric_res->nnegative++;
  441. metric_res->negative_score += fabs(final_score);
  442. }
  443. }
  444. else {
  445. symbol_result->score = 0;
  446. }
  447. if (opt) {
  448. rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt));
  449. }
  450. }
  451. msg_debug_metric("final insertion for symbol %s, score %.2f, factor: %f",
  452. symbol,
  453. symbol_result->score,
  454. final_score);
  455. metric_res->nresults++;
  456. return symbol_result;
  457. }
  458. struct rspamd_symbol_result *
  459. rspamd_task_insert_result_full(struct rspamd_task *task,
  460. const char *symbol,
  461. double weight,
  462. const char *opt,
  463. enum rspamd_symbol_insert_flags flags,
  464. struct rspamd_scan_result *result)
  465. {
  466. struct rspamd_symbol_result *symbol_result = NULL, *ret = NULL;
  467. struct rspamd_scan_result *mres;
  468. /*
  469. * We allow symbols to be inserted for skipped tasks, as it might be a
  470. * race condition before some symbol is finished and skip flag being set.
  471. */
  472. if (!RSPAMD_TASK_IS_SKIPPED(task) && (task->processed_stages & (RSPAMD_TASK_STAGE_IDEMPOTENT >> 1))) {
  473. msg_err_task("cannot insert symbol %s on idempotent phase",
  474. symbol);
  475. return NULL;
  476. }
  477. if (result == NULL) {
  478. /* Insert everywhere */
  479. DL_FOREACH(task->result, mres)
  480. {
  481. if (mres->symbol_cbref != -1) {
  482. /* Check if we can insert this symbol to this symbol result */
  483. GError *err = NULL;
  484. lua_State *L = (lua_State *) task->cfg->lua_state;
  485. if (!rspamd_lua_universal_pcall(L, mres->symbol_cbref,
  486. G_STRLOC, 1, "uss", &err,
  487. rspamd_task_classname, task, symbol, mres->name ? mres->name : "default")) {
  488. msg_warn_task("cannot call for symbol_cbref for result %s: %e",
  489. mres->name ? mres->name : "default", err);
  490. g_error_free(err);
  491. continue;
  492. }
  493. else {
  494. if (!lua_toboolean(L, -1)) {
  495. /* Skip symbol */
  496. msg_debug_metric("skip symbol %s for result %s due to Lua return value",
  497. symbol, mres->name);
  498. lua_pop(L, 1); /* Remove result */
  499. continue;
  500. }
  501. lua_pop(L, 1); /* Remove result */
  502. }
  503. }
  504. bool new_symbol = false;
  505. symbol_result = insert_metric_result(task,
  506. symbol,
  507. weight,
  508. opt,
  509. mres,
  510. flags,
  511. &new_symbol);
  512. if (mres->name == NULL) {
  513. /* Default result */
  514. ret = symbol_result;
  515. /* Process cache item */
  516. if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) {
  517. rspamd_symcache_inc_frequency(task->cfg->cache,
  518. symbol_result->sym->cache_item,
  519. symbol_result->sym->name);
  520. }
  521. }
  522. else if (new_symbol) {
  523. /* O(N) but we normally don't have any shadow results */
  524. LL_APPEND(ret, symbol_result);
  525. }
  526. }
  527. }
  528. else {
  529. /* Specific insertion */
  530. symbol_result = insert_metric_result(task,
  531. symbol,
  532. weight,
  533. opt,
  534. result,
  535. flags,
  536. NULL);
  537. ret = symbol_result;
  538. if (result->name == NULL) {
  539. /* Process cache item */
  540. if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) {
  541. rspamd_symcache_inc_frequency(task->cfg->cache,
  542. symbol_result->sym->cache_item,
  543. symbol_result->sym->name);
  544. }
  545. }
  546. }
  547. return ret;
  548. }
  549. static char *
  550. rspamd_task_option_safe_copy(struct rspamd_task *task,
  551. const char *val,
  552. gsize vlen,
  553. gsize *outlen)
  554. {
  555. const char *p, *end;
  556. p = val;
  557. end = val + vlen;
  558. vlen = 0; /* Reuse */
  559. while (p < end) {
  560. if (*p & 0x80) {
  561. UChar32 uc;
  562. int off = 0;
  563. U8_NEXT(p, off, end - p, uc);
  564. if (uc > 0) {
  565. if (u_isprint(uc)) {
  566. vlen += off;
  567. }
  568. else {
  569. /* We will replace it with 0xFFFD */
  570. vlen += MAX(off, 3);
  571. }
  572. }
  573. else {
  574. vlen += MAX(off, 3);
  575. }
  576. p += off;
  577. }
  578. else if (!g_ascii_isprint(*p)) {
  579. /* Another 0xFFFD */
  580. vlen += 3;
  581. p++;
  582. }
  583. else {
  584. p++;
  585. vlen++;
  586. }
  587. }
  588. char *dest, *d;
  589. dest = rspamd_mempool_alloc(task->task_pool, vlen + 1);
  590. d = dest;
  591. p = val;
  592. while (p < end) {
  593. if (*p & 0x80) {
  594. UChar32 uc;
  595. int off = 0;
  596. U8_NEXT(p, off, end - p, uc);
  597. if (uc > 0) {
  598. if (u_isprint(uc)) {
  599. memcpy(d, p, off);
  600. d += off;
  601. }
  602. else {
  603. /* We will replace it with 0xFFFD */
  604. *d++ = '\357';
  605. *d++ = '\277';
  606. *d++ = '\275';
  607. }
  608. }
  609. else {
  610. *d++ = '\357';
  611. *d++ = '\277';
  612. *d++ = '\275';
  613. }
  614. p += off;
  615. }
  616. else if (!g_ascii_isprint(*p)) {
  617. /* Another 0xFFFD */
  618. *d++ = '\357';
  619. *d++ = '\277';
  620. *d++ = '\275';
  621. p++;
  622. }
  623. else {
  624. *d++ = *p++;
  625. }
  626. }
  627. *d = '\0';
  628. *(outlen) = d - dest;
  629. return dest;
  630. }
  631. gboolean
  632. rspamd_task_add_result_option(struct rspamd_task *task,
  633. struct rspamd_symbol_result *s,
  634. const char *val,
  635. gsize vlen)
  636. {
  637. struct rspamd_symbol_option *opt, srch;
  638. gboolean ret = FALSE;
  639. char *opt_cpy = NULL;
  640. gsize cpy_len;
  641. khiter_t k;
  642. int r;
  643. struct rspamd_symbol_result *cur;
  644. if (s && val) {
  645. /*
  646. * Here we assume that this function is all the time called with the
  647. * symbol from the default result, not some shadow result, or
  648. * the option insertion will be wrong
  649. */
  650. LL_FOREACH(s, cur)
  651. {
  652. if (cur->opts_len < 0) {
  653. /* Cannot add more options, give up */
  654. msg_debug_task("cannot add more options to symbol %s when adding option %s",
  655. cur->name, val);
  656. ret = FALSE;
  657. continue;
  658. }
  659. if (!cur->options) {
  660. cur->options = kh_init(rspamd_options_hash);
  661. }
  662. if (vlen + cur->opts_len > task->cfg->max_opts_len) {
  663. /* Add truncated option */
  664. msg_info_task("cannot add more options to symbol %s when adding option %s",
  665. cur->name, val);
  666. val = "...";
  667. vlen = 3;
  668. cur->opts_len = -1;
  669. }
  670. if (!(cur->sym && (cur->sym->flags & RSPAMD_SYMBOL_FLAG_ONEPARAM))) {
  671. srch.option = (char *) val;
  672. srch.optlen = vlen;
  673. k = kh_get(rspamd_options_hash, cur->options, &srch);
  674. if (k == kh_end(cur->options)) {
  675. opt_cpy = rspamd_task_option_safe_copy(task, val, vlen, &cpy_len);
  676. if (cpy_len != vlen) {
  677. srch.option = (char *) opt_cpy;
  678. srch.optlen = cpy_len;
  679. k = kh_get(rspamd_options_hash, cur->options, &srch);
  680. }
  681. /* Append new options */
  682. if (k == kh_end(cur->options)) {
  683. opt = rspamd_mempool_alloc0(task->task_pool, sizeof(*opt));
  684. opt->optlen = cpy_len;
  685. opt->option = opt_cpy;
  686. kh_put(rspamd_options_hash, cur->options, opt, &r);
  687. DL_APPEND(cur->opts_head, opt);
  688. if (s == cur) {
  689. ret = TRUE;
  690. }
  691. }
  692. }
  693. }
  694. else {
  695. /* Skip addition */
  696. if (s == cur) {
  697. ret = FALSE;
  698. }
  699. }
  700. if (ret && cur->opts_len >= 0) {
  701. cur->opts_len += vlen;
  702. }
  703. }
  704. }
  705. else if (!val) {
  706. ret = TRUE;
  707. }
  708. task->result->nresults++;
  709. return ret;
  710. }
  711. struct rspamd_action_config *
  712. rspamd_find_action_config_for_action(struct rspamd_scan_result *scan_result,
  713. struct rspamd_action *act)
  714. {
  715. for (unsigned int i = 0; i < scan_result->nactions; i++) {
  716. struct rspamd_action_config *cur = &scan_result->actions_config[i];
  717. if (act == cur->action) {
  718. return cur;
  719. }
  720. }
  721. return NULL;
  722. }
  723. struct rspamd_action *
  724. rspamd_check_action_metric(struct rspamd_task *task,
  725. struct rspamd_passthrough_result **ppr,
  726. struct rspamd_scan_result *scan_result)
  727. {
  728. struct rspamd_action_config *action_lim,
  729. *noaction = NULL;
  730. struct rspamd_action *selected_action = NULL, *least_action = NULL;
  731. struct rspamd_passthrough_result *pr, *sel_pr = NULL;
  732. double max_score = -(G_MAXDOUBLE), sc;
  733. gboolean seen_least = FALSE;
  734. if (scan_result == NULL) {
  735. scan_result = task->result;
  736. }
  737. if (scan_result->passthrough_result != NULL) {
  738. DL_FOREACH(scan_result->passthrough_result, pr)
  739. {
  740. struct rspamd_action_config *act_config =
  741. rspamd_find_action_config_for_action(scan_result, pr->action);
  742. /* Skip disabled actions */
  743. if (act_config && (act_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
  744. continue;
  745. }
  746. if (!seen_least || !(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
  747. sc = pr->target_score;
  748. selected_action = pr->action;
  749. if (!(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
  750. if (!isnan(sc)) {
  751. if (pr->action->action_type == METRIC_ACTION_NOACTION) {
  752. scan_result->score = MIN(sc, scan_result->score);
  753. }
  754. else {
  755. scan_result->score = sc;
  756. }
  757. }
  758. if (ppr) {
  759. *ppr = pr;
  760. }
  761. return selected_action;
  762. }
  763. else {
  764. seen_least = true;
  765. least_action = selected_action;
  766. if (isnan(sc)) {
  767. if (selected_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
  768. /*
  769. * In this case, we have a passthrough action that
  770. * is `least` action, however, there is no threshold
  771. * on it.
  772. *
  773. * Hence, we imply the following logic:
  774. *
  775. * - we leave score unchanged
  776. * - we apply passthrough no threshold action unless
  777. * score based action *is not* reject, otherwise
  778. * we apply reject action
  779. */
  780. }
  781. else {
  782. sc = selected_action->threshold;
  783. max_score = sc;
  784. sel_pr = pr;
  785. }
  786. }
  787. else {
  788. max_score = sc;
  789. sel_pr = pr;
  790. }
  791. }
  792. }
  793. }
  794. }
  795. /*
  796. * Select result by score
  797. */
  798. for (size_t i = scan_result->nactions - 1; i != (size_t) -1; i--) {
  799. action_lim = &scan_result->actions_config[i];
  800. sc = action_lim->cur_limit;
  801. if (action_lim->action->action_type == METRIC_ACTION_NOACTION) {
  802. noaction = action_lim;
  803. }
  804. if ((action_lim->flags & (RSPAMD_ACTION_RESULT_DISABLED | RSPAMD_ACTION_RESULT_NO_THRESHOLD))) {
  805. continue;
  806. }
  807. if (isnan(sc) ||
  808. (action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
  809. continue;
  810. }
  811. if (scan_result->score >= sc && sc > max_score) {
  812. selected_action = action_lim->action;
  813. max_score = sc;
  814. }
  815. }
  816. if (selected_action == NULL) {
  817. selected_action = noaction->action;
  818. }
  819. if (selected_action) {
  820. if (seen_least) {
  821. /* Adjust least action */
  822. if (least_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
  823. if (selected_action->action_type != METRIC_ACTION_REJECT &&
  824. selected_action->action_type != METRIC_ACTION_DISCARD) {
  825. /* Override score based action with least action */
  826. selected_action = least_action;
  827. if (ppr) {
  828. *ppr = sel_pr;
  829. }
  830. }
  831. }
  832. else {
  833. /* Adjust score if needed */
  834. if (max_score > scan_result->score) {
  835. if (ppr) {
  836. *ppr = sel_pr;
  837. }
  838. scan_result->score = max_score;
  839. }
  840. }
  841. }
  842. return selected_action;
  843. }
  844. if (ppr) {
  845. *ppr = sel_pr;
  846. }
  847. return noaction->action;
  848. }
  849. struct rspamd_symbol_result *
  850. rspamd_task_find_symbol_result(struct rspamd_task *task, const char *sym,
  851. struct rspamd_scan_result *result)
  852. {
  853. struct rspamd_symbol_result *res = NULL;
  854. khiter_t k;
  855. if (result == NULL) {
  856. /* Use default result */
  857. result = task->result;
  858. }
  859. k = kh_get(rspamd_symbols_hash, result->symbols, sym);
  860. if (k != kh_end(result->symbols)) {
  861. res = kh_value(result->symbols, k);
  862. }
  863. return res;
  864. }
  865. struct rspamd_symbol_result *rspamd_task_remove_symbol_result(
  866. struct rspamd_task *task,
  867. const char *symbol,
  868. struct rspamd_scan_result *result)
  869. {
  870. struct rspamd_symbol_result *res = NULL;
  871. khiter_t k;
  872. if (result == NULL) {
  873. /* Use default result */
  874. result = task->result;
  875. }
  876. k = kh_get(rspamd_symbols_hash, result->symbols, symbol);
  877. if (k != kh_end(result->symbols)) {
  878. res = kh_value(result->symbols, k);
  879. if (!isnan(res->score)) {
  880. /* Remove score from the result */
  881. result->score -= res->score;
  882. /* Also check the group limit */
  883. if (result->sym_groups && res->sym) {
  884. struct rspamd_symbol_group *gr;
  885. int i;
  886. khiter_t k_groups;
  887. PTR_ARRAY_FOREACH(res->sym->groups, i, gr)
  888. {
  889. double *gr_score;
  890. k_groups = kh_get(rspamd_symbols_group_hash,
  891. result->sym_groups, gr);
  892. if (k_groups != kh_end(result->sym_groups)) {
  893. gr_score = &kh_value(result->sym_groups, k_groups);
  894. if (gr_score) {
  895. *gr_score -= res->score;
  896. }
  897. }
  898. }
  899. }
  900. }
  901. kh_del(rspamd_symbols_hash, result->symbols, k);
  902. }
  903. else {
  904. return NULL;
  905. }
  906. return res;
  907. }
  908. void rspamd_task_symbol_result_foreach(struct rspamd_task *task,
  909. struct rspamd_scan_result *result, GHFunc func,
  910. gpointer ud)
  911. {
  912. const char *kk;
  913. struct rspamd_symbol_result *res;
  914. if (result == NULL) {
  915. /* Use default result */
  916. result = task->result;
  917. }
  918. if (func) {
  919. kh_foreach(result->symbols, kk, res, {
  920. func((gpointer) kk, (gpointer) res, ud);
  921. });
  922. }
  923. }
  924. struct rspamd_scan_result *
  925. rspamd_find_metric_result(struct rspamd_task *task,
  926. const char *name)
  927. {
  928. struct rspamd_scan_result *res;
  929. if (name == NULL || strcmp(name, "default") == 0) {
  930. return task->result;
  931. }
  932. DL_FOREACH(task->result, res)
  933. {
  934. if (res->name && strcmp(res->name, name) == 0) {
  935. return res;
  936. }
  937. }
  938. return NULL;
  939. }