You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.c 40KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "task.h"
  17. #include "rspamd.h"
  18. #include "filter.h"
  19. #include "protocol.h"
  20. #include "message.h"
  21. #include "lua/lua_common.h"
  22. #include "email_addr.h"
  23. #include "composites.h"
  24. #include "stat_api.h"
  25. #include "unix-std.h"
  26. #include "utlist.h"
  27. #include "contrib/zstd/zstd.h"
  28. #include "libserver/mempool_vars_internal.h"
  29. #include "libserver/cfg_file_private.h"
  30. #include "libmime/lang_detection.h"
  31. #include "libmime/filter_private.h"
  32. #include <math.h>
  33. /*
  34. * Do not print more than this amount of elts
  35. */
  36. static const int max_log_elts = 7;
  37. static GQuark
  38. rspamd_task_quark (void)
  39. {
  40. return g_quark_from_static_string ("task-error");
  41. }
  42. static void
  43. rspamd_request_header_dtor (gpointer p)
  44. {
  45. GPtrArray *ar = p;
  46. guint i;
  47. rspamd_ftok_t *tok;
  48. if (ar) {
  49. for (i = 0; i < ar->len; i ++) {
  50. tok = g_ptr_array_index (ar, i);
  51. rspamd_fstring_mapped_ftok_free (tok);
  52. }
  53. g_ptr_array_free (ar, TRUE);
  54. }
  55. }
  56. /*
  57. * Create new task
  58. */
  59. struct rspamd_task *
  60. rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
  61. rspamd_mempool_t *pool,
  62. struct rspamd_lang_detector *lang_det,
  63. struct event_base *ev_base)
  64. {
  65. struct rspamd_task *new_task;
  66. new_task = g_malloc0 (sizeof (struct rspamd_task));
  67. new_task->worker = worker;
  68. new_task->lang_det = lang_det;
  69. if (cfg) {
  70. new_task->cfg = cfg;
  71. REF_RETAIN (cfg);
  72. if (cfg->check_all_filters) {
  73. new_task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
  74. }
  75. if (cfg->re_cache) {
  76. new_task->re_rt = rspamd_re_cache_runtime_new (cfg->re_cache);
  77. }
  78. if (new_task->lang_det == NULL && cfg->lang_det != NULL) {
  79. new_task->lang_det = cfg->lang_det;
  80. }
  81. }
  82. new_task->ev_base = ev_base;
  83. #ifdef HAVE_EVENT_NO_CACHE_TIME_FUNC
  84. if (ev_base) {
  85. event_base_update_cache_time (ev_base);
  86. event_base_gettimeofday_cached (ev_base, &new_task->tv);
  87. new_task->time_real = tv_to_double (&new_task->tv);
  88. }
  89. else {
  90. gettimeofday (&new_task->tv, NULL);
  91. new_task->time_real = tv_to_double (&new_task->tv);
  92. }
  93. #else
  94. gettimeofday (&new_task->tv, NULL);
  95. new_task->time_real = tv_to_double (&new_task->tv);
  96. #endif
  97. new_task->time_virtual = rspamd_get_virtual_ticks ();
  98. new_task->time_real_finish = NAN;
  99. new_task->time_virtual_finish = NAN;
  100. if (pool == NULL) {
  101. new_task->task_pool =
  102. rspamd_mempool_new (rspamd_mempool_suggest_size (), "task");
  103. new_task->flags |= RSPAMD_TASK_FLAG_OWN_POOL;
  104. }
  105. else {
  106. new_task->task_pool = pool;
  107. }
  108. new_task->raw_headers = g_hash_table_new_full (rspamd_strcase_hash,
  109. rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
  110. new_task->headers_order = g_queue_new ();
  111. new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
  112. rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
  113. rspamd_request_header_dtor);
  114. rspamd_mempool_add_destructor (new_task->task_pool,
  115. (rspamd_mempool_destruct_t) g_hash_table_unref,
  116. new_task->request_headers);
  117. new_task->reply_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
  118. rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
  119. rspamd_fstring_mapped_ftok_free);
  120. rspamd_mempool_add_destructor (new_task->task_pool,
  121. (rspamd_mempool_destruct_t) g_hash_table_unref,
  122. new_task->reply_headers);
  123. rspamd_mempool_add_destructor (new_task->task_pool,
  124. (rspamd_mempool_destruct_t) g_hash_table_unref,
  125. new_task->raw_headers);
  126. rspamd_mempool_add_destructor (new_task->task_pool,
  127. (rspamd_mempool_destruct_t) g_queue_free,
  128. new_task->headers_order);
  129. new_task->emails = g_hash_table_new (rspamd_email_hash, rspamd_emails_cmp);
  130. rspamd_mempool_add_destructor (new_task->task_pool,
  131. (rspamd_mempool_destruct_t) g_hash_table_unref,
  132. new_task->emails);
  133. new_task->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp);
  134. rspamd_mempool_add_destructor (new_task->task_pool,
  135. (rspamd_mempool_destruct_t) g_hash_table_unref,
  136. new_task->urls);
  137. new_task->parts = g_ptr_array_sized_new (4);
  138. rspamd_mempool_add_destructor (new_task->task_pool,
  139. rspamd_ptr_array_free_hard, new_task->parts);
  140. new_task->text_parts = g_ptr_array_sized_new (2);
  141. rspamd_mempool_add_destructor (new_task->task_pool,
  142. rspamd_ptr_array_free_hard, new_task->text_parts);
  143. new_task->received = g_ptr_array_sized_new (8);
  144. rspamd_mempool_add_destructor (new_task->task_pool,
  145. rspamd_ptr_array_free_hard, new_task->received);
  146. new_task->sock = -1;
  147. new_task->flags |= (RSPAMD_TASK_FLAG_MIME|RSPAMD_TASK_FLAG_JSON);
  148. new_task->result = rspamd_create_metric_result (new_task);
  149. new_task->message_id = new_task->queue_id = "undef";
  150. new_task->messages = ucl_object_typed_new (UCL_OBJECT);
  151. new_task->lua_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
  152. return new_task;
  153. }
  154. static void
  155. rspamd_task_reply (struct rspamd_task *task)
  156. {
  157. if (task->fin_callback) {
  158. task->fin_callback (task, task->fin_arg);
  159. }
  160. else {
  161. rspamd_protocol_write_reply (task);
  162. }
  163. }
  164. /*
  165. * Called if all filters are processed
  166. * @return TRUE if session should be terminated
  167. */
  168. gboolean
  169. rspamd_task_fin (void *arg)
  170. {
  171. struct rspamd_task *task = (struct rspamd_task *) arg;
  172. /* Task is already finished or skipped */
  173. if (RSPAMD_TASK_IS_PROCESSED (task)) {
  174. rspamd_task_reply (task);
  175. return TRUE;
  176. }
  177. if (!rspamd_task_process (task, RSPAMD_TASK_PROCESS_ALL)) {
  178. rspamd_task_reply (task);
  179. return TRUE;
  180. }
  181. if (RSPAMD_TASK_IS_PROCESSED (task)) {
  182. rspamd_task_reply (task);
  183. return TRUE;
  184. }
  185. /* One more iteration */
  186. return FALSE;
  187. }
  188. /*
  189. * Called if session was restored inside fin callback
  190. */
  191. void
  192. rspamd_task_restore (void *arg)
  193. {
  194. /* XXX: not needed now ? */
  195. }
  196. /*
  197. * Free all structures of worker_task
  198. */
  199. void
  200. rspamd_task_free (struct rspamd_task *task)
  201. {
  202. struct rspamd_mime_part *p;
  203. struct rspamd_mime_text_part *tp;
  204. struct rspamd_email_address *addr;
  205. struct rspamd_lua_cached_entry *entry;
  206. GHashTableIter it;
  207. gpointer k, v;
  208. guint i;
  209. if (task) {
  210. debug_task ("free pointer %p", task);
  211. for (i = 0; i < task->parts->len; i ++) {
  212. p = g_ptr_array_index (task->parts, i);
  213. if (p->raw_headers) {
  214. g_hash_table_unref (p->raw_headers);
  215. }
  216. if (p->headers_order) {
  217. g_queue_free (p->headers_order);
  218. }
  219. if (IS_CT_MULTIPART (p->ct)) {
  220. if (p->specific.mp->children) {
  221. g_ptr_array_free (p->specific.mp->children, TRUE);
  222. }
  223. }
  224. }
  225. for (i = 0; i < task->text_parts->len; i ++) {
  226. tp = g_ptr_array_index (task->text_parts, i);
  227. if (tp->utf_words) {
  228. g_array_free (tp->utf_words, TRUE);
  229. }
  230. if (tp->normalized_hashes) {
  231. g_array_free (tp->normalized_hashes, TRUE);
  232. }
  233. if (tp->languages) {
  234. g_ptr_array_unref (tp->languages);
  235. }
  236. }
  237. if (task->rcpt_envelope) {
  238. for (i = 0; i < task->rcpt_envelope->len; i ++) {
  239. addr = g_ptr_array_index (task->rcpt_envelope, i);
  240. rspamd_email_address_free (addr);
  241. }
  242. g_ptr_array_free (task->rcpt_envelope, TRUE);
  243. }
  244. if (task->from_envelope) {
  245. rspamd_email_address_free (task->from_envelope);
  246. }
  247. if (task->meta_words) {
  248. g_array_free (task->meta_words, TRUE);
  249. }
  250. ucl_object_unref (task->messages);
  251. if (task->re_rt) {
  252. rspamd_re_cache_runtime_destroy (task->re_rt);
  253. }
  254. if (task->http_conn != NULL) {
  255. rspamd_http_connection_reset (task->http_conn);
  256. rspamd_http_connection_unref (task->http_conn);
  257. }
  258. if (task->settings != NULL) {
  259. ucl_object_unref (task->settings);
  260. }
  261. if (task->client_addr) {
  262. rspamd_inet_address_free (task->client_addr);
  263. }
  264. if (task->from_addr) {
  265. rspamd_inet_address_free (task->from_addr);
  266. }
  267. if (task->err) {
  268. g_error_free (task->err);
  269. }
  270. if (rspamd_event_pending (&task->timeout_ev, EV_TIMEOUT)) {
  271. event_del (&task->timeout_ev);
  272. }
  273. if (task->guard_ev) {
  274. event_del (task->guard_ev);
  275. }
  276. if (task->sock != -1) {
  277. close (task->sock);
  278. }
  279. if (task->cfg) {
  280. if (task->lua_cache) {
  281. g_hash_table_iter_init (&it, task->lua_cache);
  282. while (g_hash_table_iter_next (&it, &k, &v)) {
  283. entry = (struct rspamd_lua_cached_entry *)v;
  284. luaL_unref (task->cfg->lua_state,
  285. LUA_REGISTRYINDEX, entry->ref);
  286. }
  287. g_hash_table_unref (task->lua_cache);
  288. }
  289. REF_RELEASE (task->cfg);
  290. }
  291. if (task->flags & RSPAMD_TASK_FLAG_OWN_POOL) {
  292. rspamd_mempool_delete (task->task_pool);
  293. }
  294. g_free (task);
  295. }
  296. }
  297. struct rspamd_task_map {
  298. gpointer begin;
  299. gulong len;
  300. };
  301. static void
  302. rspamd_task_unmapper (gpointer ud)
  303. {
  304. struct rspamd_task_map *m = ud;
  305. munmap (m->begin, m->len);
  306. }
  307. gboolean
  308. rspamd_task_load_message (struct rspamd_task *task,
  309. struct rspamd_http_message *msg, const gchar *start, gsize len)
  310. {
  311. guint control_len, r;
  312. struct ucl_parser *parser;
  313. ucl_object_t *control_obj;
  314. gchar filepath[PATH_MAX], *fp;
  315. gint fd, flen;
  316. gulong offset = 0, shmem_size = 0;
  317. rspamd_ftok_t *tok;
  318. gpointer map;
  319. struct stat st;
  320. struct rspamd_task_map *m;
  321. const gchar *ft;
  322. #ifdef HAVE_SANE_SHMEM
  323. ft = "shm";
  324. #else
  325. ft = "file";
  326. #endif
  327. if (msg) {
  328. rspamd_protocol_handle_headers (task, msg);
  329. }
  330. tok = rspamd_task_get_request_header (task, "shm");
  331. if (tok) {
  332. /* Shared memory part */
  333. r = rspamd_strlcpy (filepath, tok->begin,
  334. MIN (sizeof (filepath), tok->len + 1));
  335. rspamd_url_decode (filepath, filepath, r + 1);
  336. flen = strlen (filepath);
  337. if (filepath[0] == '"' && flen > 2) {
  338. /* We need to unquote filepath */
  339. fp = &filepath[1];
  340. fp[flen - 2] = '\0';
  341. }
  342. else {
  343. fp = &filepath[0];
  344. }
  345. #ifdef HAVE_SANE_SHMEM
  346. fd = shm_open (fp, O_RDONLY, 00600);
  347. #else
  348. fd = open (fp, O_RDONLY, 00600);
  349. #endif
  350. if (fd == -1) {
  351. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  352. "Cannot open %s segment (%s): %s", ft, fp, strerror (errno));
  353. return FALSE;
  354. }
  355. if (fstat (fd, &st) == -1) {
  356. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  357. "Cannot stat %s segment (%s): %s", ft, fp, strerror (errno));
  358. close (fd);
  359. return FALSE;
  360. }
  361. map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  362. if (map == MAP_FAILED) {
  363. close (fd);
  364. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  365. "Cannot mmap %s (%s): %s", ft, fp, strerror (errno));
  366. return FALSE;
  367. }
  368. close (fd);
  369. tok = rspamd_task_get_request_header (task, "shm-offset");
  370. if (tok) {
  371. rspamd_strtoul (tok->begin, tok->len, &offset);
  372. if (offset > (gulong)st.st_size) {
  373. msg_err_task ("invalid offset %ul (%ul available) for shm "
  374. "segment %s", offset, st.st_size, fp);
  375. munmap (map, st.st_size);
  376. return FALSE;
  377. }
  378. }
  379. tok = rspamd_task_get_request_header (task, "shm-length");
  380. shmem_size = st.st_size;
  381. if (tok) {
  382. rspamd_strtoul (tok->begin, tok->len, &shmem_size);
  383. if (shmem_size > (gulong)st.st_size) {
  384. msg_err_task ("invalid length %ul (%ul available) for %s "
  385. "segment %s", shmem_size, st.st_size, ft, fp);
  386. munmap (map, st.st_size);
  387. return FALSE;
  388. }
  389. }
  390. task->msg.begin = ((guchar *)map) + offset;
  391. task->msg.len = shmem_size;
  392. m = rspamd_mempool_alloc (task->task_pool, sizeof (*m));
  393. m->begin = map;
  394. m->len = st.st_size;
  395. msg_info_task ("loaded message from shared memory %s (%ul size, %ul offset)",
  396. fp, shmem_size, offset);
  397. rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, m);
  398. return TRUE;
  399. }
  400. tok = rspamd_task_get_request_header (task, "file");
  401. if (tok == NULL) {
  402. tok = rspamd_task_get_request_header (task, "path");
  403. }
  404. if (tok) {
  405. debug_task ("want to scan file %T", tok);
  406. r = rspamd_strlcpy (filepath, tok->begin,
  407. MIN (sizeof (filepath), tok->len + 1));
  408. rspamd_url_decode (filepath, filepath, r + 1);
  409. flen = strlen (filepath);
  410. if (filepath[0] == '"' && flen > 2) {
  411. /* We need to unquote filepath */
  412. fp = &filepath[1];
  413. fp[flen - 2] = '\0';
  414. }
  415. else {
  416. fp = &filepath[0];
  417. }
  418. if (stat (fp, &st) == -1) {
  419. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  420. "Invalid file (%s): %s", fp, strerror (errno));
  421. return FALSE;
  422. }
  423. if (G_UNLIKELY (st.st_size == 0)) {
  424. /* Empty file */
  425. task->flags |= RSPAMD_TASK_FLAG_EMPTY;
  426. task->msg.begin = rspamd_mempool_strdup (task->task_pool, "");
  427. task->msg.len = 0;
  428. }
  429. else {
  430. fd = open (fp, O_RDONLY);
  431. if (fd == -1) {
  432. g_set_error (&task->err, rspamd_task_quark (),
  433. RSPAMD_PROTOCOL_ERROR,
  434. "Cannot open file (%s): %s", fp, strerror (errno));
  435. return FALSE;
  436. }
  437. map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  438. if (map == MAP_FAILED) {
  439. close (fd);
  440. g_set_error (&task->err, rspamd_task_quark (),
  441. RSPAMD_PROTOCOL_ERROR,
  442. "Cannot mmap file (%s): %s", fp, strerror (errno));
  443. return FALSE;
  444. }
  445. close (fd);
  446. task->msg.begin = map;
  447. task->msg.len = st.st_size;
  448. m = rspamd_mempool_alloc (task->task_pool, sizeof (*m));
  449. m->begin = map;
  450. m->len = st.st_size;
  451. rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, m);
  452. }
  453. task->msg.fpath = rspamd_mempool_strdup (task->task_pool, fp);
  454. task->flags |= RSPAMD_TASK_FLAG_FILE;
  455. msg_info_task ("loaded message from file %s", fp);
  456. return TRUE;
  457. }
  458. /* Plain data */
  459. debug_task ("got input of length %z", task->msg.len);
  460. /* Check compression */
  461. tok = rspamd_task_get_request_header (task, "compression");
  462. if (tok) {
  463. /* Need to uncompress */
  464. rspamd_ftok_t t;
  465. t.begin = "zstd";
  466. t.len = 4;
  467. if (rspamd_ftok_casecmp (tok, &t) == 0) {
  468. ZSTD_DStream *zstream;
  469. ZSTD_inBuffer zin;
  470. ZSTD_outBuffer zout;
  471. guchar *out;
  472. gsize outlen, r;
  473. gulong dict_id;
  474. if (!rspamd_libs_reset_decompression (task->cfg->libs_ctx)) {
  475. g_set_error (&task->err, rspamd_task_quark(),
  476. RSPAMD_PROTOCOL_ERROR,
  477. "Cannot decompress, decompressor init failed");
  478. return FALSE;
  479. }
  480. tok = rspamd_task_get_request_header (task, "dictionary");
  481. if (tok != NULL) {
  482. /* We need to use custom dictionary */
  483. if (!rspamd_strtoul (tok->begin, tok->len, &dict_id)) {
  484. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  485. "Non numeric dictionary");
  486. return FALSE;
  487. }
  488. if (!task->cfg->libs_ctx->in_dict) {
  489. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  490. "Unknown dictionary, undefined locally");
  491. return FALSE;
  492. }
  493. if (task->cfg->libs_ctx->in_dict->id != dict_id) {
  494. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  495. "Unknown dictionary, invalid dictionary id");
  496. return FALSE;
  497. }
  498. }
  499. zstream = task->cfg->libs_ctx->in_zstream;
  500. zin.pos = 0;
  501. zin.src = start;
  502. zin.size = len;
  503. if ((outlen = ZSTD_getDecompressedSize (start, len)) == 0) {
  504. outlen = ZSTD_DStreamOutSize ();
  505. }
  506. out = g_malloc (outlen);
  507. zout.dst = out;
  508. zout.pos = 0;
  509. zout.size = outlen;
  510. while (zin.pos < zin.size) {
  511. r = ZSTD_decompressStream (zstream, &zout, &zin);
  512. if (ZSTD_isError (r)) {
  513. g_set_error (&task->err, rspamd_task_quark(),
  514. RSPAMD_PROTOCOL_ERROR,
  515. "Decompression error: %s", ZSTD_getErrorName (r));
  516. return FALSE;
  517. }
  518. if (zout.pos == zout.size) {
  519. /* We need to extend output buffer */
  520. zout.size = zout.size * 1.5 + 1.0;
  521. zout.dst = g_realloc (zout.dst, zout.size);
  522. }
  523. }
  524. rspamd_mempool_add_destructor (task->task_pool, g_free, zout.dst);
  525. task->msg.begin = zout.dst;
  526. task->msg.len = zout.pos;
  527. task->flags |= RSPAMD_TASK_FLAG_COMPRESSED;
  528. msg_info_task ("loaded message from zstd compressed stream; "
  529. "compressed: %ul; uncompressed: %ul",
  530. (gulong)zin.size, (gulong)zout.pos);
  531. }
  532. else {
  533. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  534. "Invalid compression method");
  535. return FALSE;
  536. }
  537. }
  538. else {
  539. task->msg.begin = start;
  540. task->msg.len = len;
  541. }
  542. if (task->msg.len == 0) {
  543. task->flags |= RSPAMD_TASK_FLAG_EMPTY;
  544. }
  545. if (task->flags & RSPAMD_TASK_FLAG_HAS_CONTROL) {
  546. /* We have control chunk, so we need to process it separately */
  547. if (task->msg.len < task->message_len) {
  548. msg_warn_task ("message has invalid message length: %ul and total len: %ul",
  549. task->message_len, task->msg.len);
  550. g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
  551. "Invalid length");
  552. return FALSE;
  553. }
  554. control_len = task->msg.len - task->message_len;
  555. if (control_len > 0) {
  556. parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE);
  557. if (!ucl_parser_add_chunk (parser, task->msg.begin, control_len)) {
  558. msg_warn_task ("processing of control chunk failed: %s",
  559. ucl_parser_get_error (parser));
  560. ucl_parser_free (parser);
  561. }
  562. else {
  563. control_obj = ucl_parser_get_object (parser);
  564. ucl_parser_free (parser);
  565. rspamd_protocol_handle_control (task, control_obj);
  566. ucl_object_unref (control_obj);
  567. }
  568. task->msg.begin += control_len;
  569. task->msg.len -= control_len;
  570. }
  571. }
  572. return TRUE;
  573. }
  574. static gint
  575. rspamd_task_select_processing_stage (struct rspamd_task *task, guint stages)
  576. {
  577. gint st, mask;
  578. mask = task->processed_stages;
  579. if (mask == 0) {
  580. st = 0;
  581. }
  582. else {
  583. for (st = 1; mask != 1; st ++) {
  584. mask = (unsigned int)mask >> 1;
  585. }
  586. }
  587. st = 1 << st;
  588. if (stages & st) {
  589. return st;
  590. }
  591. else if (st < RSPAMD_TASK_STAGE_DONE) {
  592. /* We assume that the stage that was not requested is done */
  593. task->processed_stages |= st;
  594. return rspamd_task_select_processing_stage (task, stages);
  595. }
  596. /* We are done */
  597. return RSPAMD_TASK_STAGE_DONE;
  598. }
  599. gboolean
  600. rspamd_task_process (struct rspamd_task *task, guint stages)
  601. {
  602. gint st;
  603. gboolean ret = TRUE;
  604. GError *stat_error = NULL;
  605. /* Avoid nested calls */
  606. if (task->flags & RSPAMD_TASK_FLAG_PROCESSING) {
  607. return TRUE;
  608. }
  609. if (RSPAMD_TASK_IS_PROCESSED (task)) {
  610. return TRUE;
  611. }
  612. task->flags |= RSPAMD_TASK_FLAG_PROCESSING;
  613. st = rspamd_task_select_processing_stage (task, stages);
  614. switch (st) {
  615. case RSPAMD_TASK_STAGE_READ_MESSAGE:
  616. if (!rspamd_message_parse (task)) {
  617. ret = FALSE;
  618. }
  619. break;
  620. case RSPAMD_TASK_STAGE_PRE_FILTERS:
  621. rspamd_symcache_process_symbols (task, task->cfg->cache,
  622. RSPAMD_TASK_STAGE_PRE_FILTERS);
  623. break;
  624. case RSPAMD_TASK_STAGE_PROCESS_MESSAGE:
  625. if (!(task->flags & RSPAMD_TASK_FLAG_SKIP_PROCESS)) {
  626. rspamd_message_process (task);
  627. }
  628. break;
  629. case RSPAMD_TASK_STAGE_FILTERS:
  630. rspamd_symcache_process_symbols (task, task->cfg->cache,
  631. RSPAMD_TASK_STAGE_FILTERS);
  632. break;
  633. case RSPAMD_TASK_STAGE_CLASSIFIERS:
  634. case RSPAMD_TASK_STAGE_CLASSIFIERS_PRE:
  635. case RSPAMD_TASK_STAGE_CLASSIFIERS_POST:
  636. if (!RSPAMD_TASK_IS_EMPTY (task)) {
  637. if (rspamd_stat_classify (task, task->cfg->lua_state, st, &stat_error) ==
  638. RSPAMD_STAT_PROCESS_ERROR) {
  639. msg_err_task ("classify error: %e", stat_error);
  640. g_error_free (stat_error);
  641. }
  642. }
  643. break;
  644. case RSPAMD_TASK_STAGE_COMPOSITES:
  645. rspamd_make_composites (task);
  646. break;
  647. case RSPAMD_TASK_STAGE_POST_FILTERS:
  648. rspamd_symcache_process_symbols (task, task->cfg->cache,
  649. RSPAMD_TASK_STAGE_POST_FILTERS);
  650. if ((task->flags & RSPAMD_TASK_FLAG_LEARN_AUTO) &&
  651. !RSPAMD_TASK_IS_EMPTY (task) &&
  652. !(task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM|RSPAMD_TASK_FLAG_LEARN_HAM))) {
  653. rspamd_stat_check_autolearn (task);
  654. }
  655. break;
  656. case RSPAMD_TASK_STAGE_LEARN:
  657. case RSPAMD_TASK_STAGE_LEARN_PRE:
  658. case RSPAMD_TASK_STAGE_LEARN_POST:
  659. if (task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM|RSPAMD_TASK_FLAG_LEARN_HAM)) {
  660. if (task->err == NULL) {
  661. if (!rspamd_stat_learn (task,
  662. task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM,
  663. task->cfg->lua_state, task->classifier,
  664. st, &stat_error)) {
  665. if (stat_error == NULL) {
  666. g_set_error (&stat_error,
  667. g_quark_from_static_string ("stat"), 500,
  668. "Unknown statistics error, found on stage %s;"
  669. " classifier: %s",
  670. rspamd_task_stage_name (st), task->classifier);
  671. }
  672. if (stat_error->code >= 400) {
  673. msg_err_task ("learn error: %e", stat_error);
  674. }
  675. else {
  676. msg_notice_task ("skip learning: %e", stat_error);
  677. }
  678. if (!(task->flags & RSPAMD_TASK_FLAG_LEARN_AUTO)) {
  679. task->err = stat_error;
  680. task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
  681. }
  682. else {
  683. /* Do not skip idempotent in case of learn error */
  684. if (stat_error) {
  685. g_error_free (stat_error);
  686. }
  687. task->processed_stages |= RSPAMD_TASK_STAGE_LEARN|
  688. RSPAMD_TASK_STAGE_LEARN_PRE|
  689. RSPAMD_TASK_STAGE_LEARN_POST;
  690. }
  691. }
  692. }
  693. }
  694. break;
  695. case RSPAMD_TASK_STAGE_COMPOSITES_POST:
  696. /* Second run of composites processing before idempotent filters */
  697. rspamd_make_composites (task);
  698. break;
  699. case RSPAMD_TASK_STAGE_IDEMPOTENT:
  700. rspamd_symcache_process_symbols (task, task->cfg->cache,
  701. RSPAMD_TASK_STAGE_IDEMPOTENT);
  702. break;
  703. case RSPAMD_TASK_STAGE_DONE:
  704. task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
  705. break;
  706. default:
  707. /* TODO: not implemented stage */
  708. break;
  709. }
  710. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  711. /* Set all bits except idempotent filters */
  712. task->processed_stages |= 0x7FFF;
  713. }
  714. task->flags &= ~RSPAMD_TASK_FLAG_PROCESSING;
  715. if (!ret || RSPAMD_TASK_IS_PROCESSED (task)) {
  716. if (!ret) {
  717. /* Set processed flags */
  718. task->processed_stages |= RSPAMD_TASK_STAGE_DONE;
  719. }
  720. msg_debug_task ("task is processed");
  721. return ret;
  722. }
  723. if (rspamd_session_events_pending (task->s) != 0) {
  724. /* We have events pending, so we consider this stage as incomplete */
  725. msg_debug_task ("need more work on stage %d", st);
  726. }
  727. else {
  728. /* Mark the current stage as done and go to the next stage */
  729. msg_debug_task ("completed stage %d", st);
  730. task->processed_stages |= st;
  731. /* Tail recursion */
  732. return rspamd_task_process (task, stages);
  733. }
  734. return ret;
  735. }
  736. struct rspamd_email_address*
  737. rspamd_task_get_sender (struct rspamd_task *task)
  738. {
  739. return task->from_envelope;
  740. }
  741. static const gchar *
  742. rspamd_task_cache_principal_recipient (struct rspamd_task *task,
  743. const gchar *rcpt, gsize len)
  744. {
  745. gchar *rcpt_lc;
  746. if (rcpt == NULL) {
  747. return NULL;
  748. }
  749. rcpt_lc = rspamd_mempool_alloc (task->task_pool, len + 1);
  750. rspamd_strlcpy (rcpt_lc, rcpt, len + 1);
  751. rspamd_str_lc (rcpt_lc, len);
  752. rspamd_mempool_set_variable (task->task_pool,
  753. RSPAMD_MEMPOOL_PRINCIPAL_RECIPIENT, rcpt_lc, NULL);
  754. return rcpt_lc;
  755. }
  756. const gchar *
  757. rspamd_task_get_principal_recipient (struct rspamd_task *task)
  758. {
  759. const gchar *val;
  760. struct rspamd_email_address *addr;
  761. val = rspamd_mempool_get_variable (task->task_pool,
  762. RSPAMD_MEMPOOL_PRINCIPAL_RECIPIENT);
  763. if (val) {
  764. return val;
  765. }
  766. if (task->deliver_to) {
  767. return rspamd_task_cache_principal_recipient (task, task->deliver_to,
  768. strlen (task->deliver_to));
  769. }
  770. if (task->rcpt_envelope != NULL) {
  771. addr = g_ptr_array_index (task->rcpt_envelope, 0);
  772. if (addr->addr) {
  773. return rspamd_task_cache_principal_recipient (task, addr->addr,
  774. addr->addr_len);
  775. }
  776. }
  777. if (task->rcpt_mime != NULL && task->rcpt_mime->len > 0) {
  778. addr = g_ptr_array_index (task->rcpt_mime, 0);
  779. if (addr->addr) {
  780. return rspamd_task_cache_principal_recipient (task, addr->addr,
  781. addr->addr_len);
  782. }
  783. }
  784. return NULL;
  785. }
  786. gboolean
  787. rspamd_learn_task_spam (struct rspamd_task *task,
  788. gboolean is_spam,
  789. const gchar *classifier,
  790. GError **err)
  791. {
  792. if (is_spam) {
  793. task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM;
  794. }
  795. else {
  796. task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM;
  797. }
  798. task->classifier = classifier;
  799. return TRUE;
  800. }
  801. static gboolean
  802. rspamd_task_log_check_condition (struct rspamd_task *task,
  803. struct rspamd_log_format *lf)
  804. {
  805. gboolean ret = FALSE;
  806. switch (lf->type) {
  807. case RSPAMD_LOG_MID:
  808. if (task->message_id && strcmp (task->message_id, "undef") != 0) {
  809. ret = TRUE;
  810. }
  811. break;
  812. case RSPAMD_LOG_QID:
  813. if (task->queue_id && strcmp (task->queue_id, "undef") != 0) {
  814. ret = TRUE;
  815. }
  816. break;
  817. case RSPAMD_LOG_USER:
  818. if (task->user) {
  819. ret = TRUE;
  820. }
  821. break;
  822. case RSPAMD_LOG_IP:
  823. if (task->from_addr && rspamd_ip_is_valid (task->from_addr)) {
  824. ret = TRUE;
  825. }
  826. break;
  827. case RSPAMD_LOG_SMTP_RCPT:
  828. case RSPAMD_LOG_SMTP_RCPTS:
  829. if (task->rcpt_envelope && task->rcpt_envelope->len > 0) {
  830. ret = TRUE;
  831. }
  832. break;
  833. case RSPAMD_LOG_MIME_RCPT:
  834. case RSPAMD_LOG_MIME_RCPTS:
  835. if (task->rcpt_mime && task->rcpt_mime->len > 0) {
  836. ret = TRUE;
  837. }
  838. break;
  839. case RSPAMD_LOG_SMTP_FROM:
  840. if (task->from_envelope) {
  841. ret = TRUE;
  842. }
  843. break;
  844. case RSPAMD_LOG_MIME_FROM:
  845. if (task->from_mime && task->from_mime->len > 0) {
  846. ret = TRUE;
  847. }
  848. break;
  849. case RSPAMD_LOG_FILENAME:
  850. if (task->msg.fpath) {
  851. ret = TRUE;
  852. }
  853. break;
  854. case RSPAMD_LOG_FORCED_ACTION:
  855. if (task->result->passthrough_result) {
  856. ret = TRUE;
  857. }
  858. break;
  859. default:
  860. ret = TRUE;
  861. break;
  862. }
  863. return ret;
  864. }
  865. /*
  866. * Sort by symbol's score -> name
  867. */
  868. static gint
  869. rspamd_task_compare_log_sym (gconstpointer a, gconstpointer b)
  870. {
  871. const struct rspamd_symbol_result *s1 = *(const struct rspamd_symbol_result **)a,
  872. *s2 = *(const struct rspamd_symbol_result **)b;
  873. gdouble w1, w2;
  874. w1 = fabs (s1->score);
  875. w2 = fabs (s2->score);
  876. if (w1 == w2 && s1->name && s2->name) {
  877. return strcmp (s1->name, s2->name);
  878. }
  879. return (w2 - w1) * 1000.0;
  880. }
  881. static rspamd_ftok_t
  882. rspamd_task_log_metric_res (struct rspamd_task *task,
  883. struct rspamd_log_format *lf)
  884. {
  885. static gchar scorebuf[32];
  886. rspamd_ftok_t res = {.begin = NULL, .len = 0};
  887. struct rspamd_metric_result *mres;
  888. gboolean first = TRUE;
  889. rspamd_fstring_t *symbuf;
  890. struct rspamd_symbol_result *sym;
  891. GPtrArray *sorted_symbols;
  892. struct rspamd_action *act;
  893. guint i, j;
  894. mres = task->result;
  895. act = rspamd_check_action_metric (task);
  896. if (mres != NULL) {
  897. switch (lf->type) {
  898. case RSPAMD_LOG_ISSPAM:
  899. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  900. res.begin = "S";
  901. }
  902. else if (!(act->flags & RSPAMD_ACTION_HAM)) {
  903. res.begin = "T";
  904. }
  905. else {
  906. res.begin = "F";
  907. }
  908. res.len = 1;
  909. break;
  910. case RSPAMD_LOG_ACTION:
  911. res.begin = act->name;
  912. res.len = strlen (res.begin);
  913. break;
  914. case RSPAMD_LOG_SCORES:
  915. res.len = rspamd_snprintf (scorebuf, sizeof (scorebuf), "%.2f/%.2f",
  916. mres->score, rspamd_task_get_required_score (task, mres));
  917. res.begin = scorebuf;
  918. break;
  919. case RSPAMD_LOG_SYMBOLS:
  920. symbuf = rspamd_fstring_sized_new (128);
  921. sorted_symbols = g_ptr_array_sized_new (kh_size (mres->symbols));
  922. kh_foreach_value_ptr (mres->symbols, sym, {
  923. if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
  924. g_ptr_array_add (sorted_symbols, (gpointer)sym);
  925. }
  926. });
  927. g_ptr_array_sort (sorted_symbols, rspamd_task_compare_log_sym);
  928. for (i = 0; i < sorted_symbols->len; i ++) {
  929. sym = g_ptr_array_index (sorted_symbols, i);
  930. if (first) {
  931. rspamd_printf_fstring (&symbuf, "%s", sym->name);
  932. }
  933. else {
  934. rspamd_printf_fstring (&symbuf, ",%s", sym->name);
  935. }
  936. if (lf->flags & RSPAMD_LOG_FMT_FLAG_SYMBOLS_SCORES) {
  937. rspamd_printf_fstring (&symbuf, "(%.2f)", sym->score);
  938. }
  939. if (lf->flags & RSPAMD_LOG_FMT_FLAG_SYMBOLS_PARAMS) {
  940. rspamd_printf_fstring (&symbuf, "{");
  941. if (sym->options) {
  942. struct rspamd_symbol_option *opt;
  943. j = 0;
  944. DL_FOREACH (sym->opts_head, opt) {
  945. rspamd_printf_fstring (&symbuf, "%s;", opt->option);
  946. if (j >= max_log_elts) {
  947. rspamd_printf_fstring (&symbuf, "...;");
  948. break;
  949. }
  950. j ++;
  951. }
  952. }
  953. rspamd_printf_fstring (&symbuf, "}");
  954. }
  955. first = FALSE;
  956. }
  957. g_ptr_array_free (sorted_symbols, TRUE);
  958. rspamd_mempool_add_destructor (task->task_pool,
  959. (rspamd_mempool_destruct_t)rspamd_fstring_free,
  960. symbuf);
  961. res.begin = symbuf->str;
  962. res.len = symbuf->len;
  963. break;
  964. default:
  965. break;
  966. }
  967. }
  968. return res;
  969. }
  970. static rspamd_fstring_t *
  971. rspamd_task_log_write_var (struct rspamd_task *task, rspamd_fstring_t *logbuf,
  972. const rspamd_ftok_t *var, const rspamd_ftok_t *content)
  973. {
  974. rspamd_fstring_t *res = logbuf;
  975. const gchar *p, *c, *end;
  976. if (content == NULL) {
  977. /* Just output variable */
  978. res = rspamd_fstring_append (res, var->begin, var->len);
  979. }
  980. else {
  981. /* Replace $ with variable value */
  982. p = content->begin;
  983. c = p;
  984. end = p + content->len;
  985. while (p < end) {
  986. if (*p == '$') {
  987. if (p > c) {
  988. res = rspamd_fstring_append (res, c, p - c);
  989. }
  990. res = rspamd_fstring_append (res, var->begin, var->len);
  991. p ++;
  992. c = p;
  993. }
  994. else {
  995. p ++;
  996. }
  997. }
  998. if (p > c) {
  999. res = rspamd_fstring_append (res, c, p - c);
  1000. }
  1001. }
  1002. return res;
  1003. }
  1004. static rspamd_fstring_t *
  1005. rspamd_task_write_ialist (struct rspamd_task *task,
  1006. GPtrArray *addrs, gint lim,
  1007. struct rspamd_log_format *lf,
  1008. rspamd_fstring_t *logbuf)
  1009. {
  1010. rspamd_fstring_t *res = logbuf, *varbuf;
  1011. rspamd_ftok_t var = {.begin = NULL, .len = 0};
  1012. struct rspamd_email_address *addr;
  1013. gint i, nchars = 0, cur_chars;
  1014. if (addrs && lim <= 0) {
  1015. lim = addrs->len;
  1016. }
  1017. varbuf = rspamd_fstring_new ();
  1018. PTR_ARRAY_FOREACH (addrs, i, addr) {
  1019. if (i >= lim) {
  1020. break;
  1021. }
  1022. cur_chars = addr->addr_len;
  1023. varbuf = rspamd_fstring_append (varbuf, addr->addr,
  1024. cur_chars);
  1025. nchars += cur_chars;
  1026. if (varbuf->len > 0) {
  1027. if (i != lim - 1) {
  1028. varbuf = rspamd_fstring_append (varbuf, ",", 1);
  1029. }
  1030. }
  1031. if (i >= max_log_elts || nchars >= max_log_elts * 10) {
  1032. varbuf = rspamd_fstring_append (varbuf, "...", 3);
  1033. break;
  1034. }
  1035. }
  1036. if (varbuf->len > 0) {
  1037. var.begin = varbuf->str;
  1038. var.len = varbuf->len;
  1039. res = rspamd_task_log_write_var (task, logbuf,
  1040. &var, (const rspamd_ftok_t *) lf->data);
  1041. }
  1042. rspamd_fstring_free (varbuf);
  1043. return res;
  1044. }
  1045. static rspamd_fstring_t *
  1046. rspamd_task_write_addr_list (struct rspamd_task *task,
  1047. GPtrArray *addrs, gint lim,
  1048. struct rspamd_log_format *lf,
  1049. rspamd_fstring_t *logbuf)
  1050. {
  1051. rspamd_fstring_t *res = logbuf, *varbuf;
  1052. rspamd_ftok_t var = {.begin = NULL, .len = 0};
  1053. struct rspamd_email_address *addr;
  1054. gint i;
  1055. if (lim <= 0) {
  1056. lim = addrs->len;
  1057. }
  1058. varbuf = rspamd_fstring_new ();
  1059. for (i = 0; i < lim; i++) {
  1060. addr = g_ptr_array_index (addrs, i);
  1061. if (addr->addr) {
  1062. varbuf = rspamd_fstring_append (varbuf, addr->addr, addr->addr_len);
  1063. }
  1064. if (varbuf->len > 0) {
  1065. if (i != lim - 1) {
  1066. varbuf = rspamd_fstring_append (varbuf, ",", 1);
  1067. }
  1068. }
  1069. if (i >= max_log_elts) {
  1070. varbuf = rspamd_fstring_append (varbuf, "...", 3);
  1071. break;
  1072. }
  1073. }
  1074. if (varbuf->len > 0) {
  1075. var.begin = varbuf->str;
  1076. var.len = varbuf->len;
  1077. res = rspamd_task_log_write_var (task, logbuf,
  1078. &var, (const rspamd_ftok_t *) lf->data);
  1079. }
  1080. rspamd_fstring_free (varbuf);
  1081. return res;
  1082. }
  1083. static rspamd_fstring_t *
  1084. rspamd_task_log_variable (struct rspamd_task *task,
  1085. struct rspamd_log_format *lf, rspamd_fstring_t *logbuf)
  1086. {
  1087. rspamd_fstring_t *res = logbuf;
  1088. rspamd_ftok_t var = {.begin = NULL, .len = 0};
  1089. static gchar numbuf[128];
  1090. static const gchar undef[] = "undef";
  1091. switch (lf->type) {
  1092. /* String vars */
  1093. case RSPAMD_LOG_MID:
  1094. if (task->message_id) {
  1095. var.begin = task->message_id;
  1096. var.len = strlen (var.begin);
  1097. }
  1098. else {
  1099. var.begin = undef;
  1100. var.len = sizeof (undef) - 1;
  1101. }
  1102. break;
  1103. case RSPAMD_LOG_QID:
  1104. if (task->queue_id) {
  1105. var.begin = task->queue_id;
  1106. var.len = strlen (var.begin);
  1107. }
  1108. else {
  1109. var.begin = undef;
  1110. var.len = sizeof (undef) - 1;
  1111. }
  1112. break;
  1113. case RSPAMD_LOG_USER:
  1114. if (task->user) {
  1115. var.begin = task->user;
  1116. var.len = strlen (var.begin);
  1117. }
  1118. else {
  1119. var.begin = undef;
  1120. var.len = sizeof (undef) - 1;
  1121. }
  1122. break;
  1123. case RSPAMD_LOG_IP:
  1124. if (task->from_addr && rspamd_ip_is_valid (task->from_addr)) {
  1125. var.begin = rspamd_inet_address_to_string (task->from_addr);
  1126. var.len = strlen (var.begin);
  1127. }
  1128. else {
  1129. var.begin = undef;
  1130. var.len = sizeof (undef) - 1;
  1131. }
  1132. break;
  1133. /* Numeric vars */
  1134. case RSPAMD_LOG_LEN:
  1135. var.len = rspamd_snprintf (numbuf, sizeof (numbuf), "%uz",
  1136. task->msg.len);
  1137. var.begin = numbuf;
  1138. break;
  1139. case RSPAMD_LOG_DNS_REQ:
  1140. var.len = rspamd_snprintf (numbuf, sizeof (numbuf), "%uD",
  1141. task->dns_requests);
  1142. var.begin = numbuf;
  1143. break;
  1144. case RSPAMD_LOG_TIME_REAL:
  1145. var.begin = rspamd_log_check_time (task->time_real,
  1146. task->time_real_finish,
  1147. task->cfg->clock_res);
  1148. var.len = strlen (var.begin);
  1149. break;
  1150. case RSPAMD_LOG_TIME_VIRTUAL:
  1151. var.begin = rspamd_log_check_time (task->time_virtual,
  1152. task->time_virtual_finish,
  1153. task->cfg->clock_res);
  1154. var.len = strlen (var.begin);
  1155. break;
  1156. /* InternetAddress vars */
  1157. case RSPAMD_LOG_SMTP_FROM:
  1158. if (task->from_envelope) {
  1159. var.begin = task->from_envelope->addr;
  1160. var.len = task->from_envelope->addr_len;
  1161. }
  1162. break;
  1163. case RSPAMD_LOG_MIME_FROM:
  1164. if (task->from_mime) {
  1165. return rspamd_task_write_ialist (task, task->from_mime, 1, lf,
  1166. logbuf);
  1167. }
  1168. break;
  1169. case RSPAMD_LOG_SMTP_RCPT:
  1170. if (task->rcpt_envelope) {
  1171. return rspamd_task_write_addr_list (task, task->rcpt_envelope, 1, lf,
  1172. logbuf);
  1173. }
  1174. break;
  1175. case RSPAMD_LOG_MIME_RCPT:
  1176. if (task->rcpt_mime) {
  1177. return rspamd_task_write_ialist (task, task->rcpt_mime, 1, lf,
  1178. logbuf);
  1179. }
  1180. break;
  1181. case RSPAMD_LOG_SMTP_RCPTS:
  1182. if (task->rcpt_envelope) {
  1183. return rspamd_task_write_addr_list (task, task->rcpt_envelope, -1, lf,
  1184. logbuf);
  1185. }
  1186. break;
  1187. case RSPAMD_LOG_MIME_RCPTS:
  1188. if (task->rcpt_mime) {
  1189. return rspamd_task_write_ialist (task, task->rcpt_mime, -1, lf,
  1190. logbuf);
  1191. }
  1192. break;
  1193. case RSPAMD_LOG_DIGEST:
  1194. var.len = rspamd_snprintf (numbuf, sizeof (numbuf), "%*xs",
  1195. (gint)sizeof (task->digest), task->digest);
  1196. var.begin = numbuf;
  1197. break;
  1198. case RSPAMD_LOG_FILENAME:
  1199. if (task->msg.fpath) {
  1200. var.len = strlen (task->msg.fpath);
  1201. var.begin = task->msg.fpath;
  1202. }
  1203. else {
  1204. var.begin = undef;
  1205. var.len = sizeof (undef) - 1;
  1206. }
  1207. break;
  1208. case RSPAMD_LOG_FORCED_ACTION:
  1209. if (task->result->passthrough_result) {
  1210. struct rspamd_passthrough_result *pr = task->result->passthrough_result;
  1211. if (!isnan (pr->target_score)) {
  1212. var.len = rspamd_snprintf (numbuf, sizeof (numbuf),
  1213. "%s \"%s\"; score=%.2f (set by %s)",
  1214. pr->action->name,
  1215. pr->message,
  1216. pr->target_score,
  1217. pr->module);
  1218. }
  1219. else {
  1220. var.len = rspamd_snprintf (numbuf, sizeof (numbuf),
  1221. "%s \"%s\"; score=nan (set by %s)",
  1222. pr->action->name,
  1223. pr->message,
  1224. pr->module);
  1225. }
  1226. var.begin = numbuf;
  1227. }
  1228. else {
  1229. var.begin = undef;
  1230. var.len = sizeof (undef) - 1;
  1231. }
  1232. break;
  1233. default:
  1234. var = rspamd_task_log_metric_res (task, lf);
  1235. break;
  1236. }
  1237. if (var.len > 0) {
  1238. res = rspamd_task_log_write_var (task, logbuf,
  1239. &var, (const rspamd_ftok_t *)lf->data);
  1240. }
  1241. return res;
  1242. }
  1243. void
  1244. rspamd_task_write_log (struct rspamd_task *task)
  1245. {
  1246. rspamd_fstring_t *logbuf;
  1247. struct rspamd_log_format *lf;
  1248. struct rspamd_task **ptask;
  1249. const gchar *lua_str;
  1250. gsize lua_str_len;
  1251. lua_State *L;
  1252. g_assert (task != NULL);
  1253. if (task->cfg->log_format == NULL ||
  1254. (task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
  1255. return;
  1256. }
  1257. logbuf = rspamd_fstring_sized_new (1000);
  1258. DL_FOREACH (task->cfg->log_format, lf) {
  1259. switch (lf->type) {
  1260. case RSPAMD_LOG_STRING:
  1261. logbuf = rspamd_fstring_append (logbuf, lf->data, lf->len);
  1262. break;
  1263. case RSPAMD_LOG_LUA:
  1264. L = task->cfg->lua_state;
  1265. lua_rawgeti (L, LUA_REGISTRYINDEX, GPOINTER_TO_INT (lf->data));
  1266. ptask = lua_newuserdata (L, sizeof (*ptask));
  1267. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1268. *ptask = task;
  1269. if (lua_pcall (L, 1, 1, 0) != 0) {
  1270. msg_err_task ("call to log function failed: %s",
  1271. lua_tostring (L, -1));
  1272. lua_pop (L, 1);
  1273. }
  1274. else {
  1275. lua_str = lua_tolstring (L, -1, &lua_str_len);
  1276. if (lua_str != NULL) {
  1277. logbuf = rspamd_fstring_append (logbuf, lua_str, lua_str_len);
  1278. }
  1279. lua_pop (L, 1);
  1280. }
  1281. break;
  1282. default:
  1283. /* We have a variable in log format */
  1284. if (lf->flags & RSPAMD_LOG_FMT_FLAG_CONDITION) {
  1285. if (!rspamd_task_log_check_condition (task, lf)) {
  1286. continue;
  1287. }
  1288. }
  1289. logbuf = rspamd_task_log_variable (task, lf, logbuf);
  1290. break;
  1291. }
  1292. }
  1293. msg_notice_task ("%V", logbuf);
  1294. rspamd_fstring_free (logbuf);
  1295. }
  1296. gdouble
  1297. rspamd_task_get_required_score (struct rspamd_task *task, struct rspamd_metric_result *m)
  1298. {
  1299. gint i;
  1300. if (m == NULL) {
  1301. m = task->result;
  1302. if (m == NULL) {
  1303. return NAN;
  1304. }
  1305. }
  1306. for (i = m->nactions - 1; i >= 0; i --) {
  1307. struct rspamd_action_result *action_lim = &m->actions_limits[i];
  1308. if (!isnan (action_lim->cur_limit) &&
  1309. !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD|RSPAMD_ACTION_HAM))) {
  1310. return m->actions_limits[i].cur_limit;
  1311. }
  1312. }
  1313. return NAN;
  1314. }
  1315. rspamd_ftok_t *
  1316. rspamd_task_get_request_header (struct rspamd_task *task,
  1317. const gchar *name)
  1318. {
  1319. GPtrArray *ret;
  1320. rspamd_ftok_t srch;
  1321. srch.begin = (gchar *)name;
  1322. srch.len = strlen (name);
  1323. ret = g_hash_table_lookup (task->request_headers, &srch);
  1324. if (ret) {
  1325. return (rspamd_ftok_t *)g_ptr_array_index (ret, 0);
  1326. }
  1327. return NULL;
  1328. }
  1329. GPtrArray*
  1330. rspamd_task_get_request_header_multiple (struct rspamd_task *task,
  1331. const gchar *name)
  1332. {
  1333. GPtrArray *ret;
  1334. rspamd_ftok_t srch;
  1335. srch.begin = (gchar *)name;
  1336. srch.len = strlen (name);
  1337. ret = g_hash_table_lookup (task->request_headers, &srch);
  1338. return ret;
  1339. }
  1340. void
  1341. rspamd_task_add_request_header (struct rspamd_task *task,
  1342. rspamd_ftok_t *name, rspamd_ftok_t *value)
  1343. {
  1344. GPtrArray *ret;
  1345. ret = g_hash_table_lookup (task->request_headers, name);
  1346. if (ret) {
  1347. g_ptr_array_add (ret, value);
  1348. /* We need to free name token */
  1349. rspamd_fstring_mapped_ftok_free (name);
  1350. }
  1351. else {
  1352. ret = g_ptr_array_sized_new (2);
  1353. g_ptr_array_add (ret, value);
  1354. g_hash_table_replace (task->request_headers, name, ret);
  1355. }
  1356. }
  1357. void
  1358. rspamd_task_profile_set (struct rspamd_task *task, const gchar *key,
  1359. gdouble value)
  1360. {
  1361. GHashTable *tbl;
  1362. gdouble *pval;
  1363. if (key == NULL) {
  1364. return;
  1365. }
  1366. tbl = rspamd_mempool_get_variable (task->task_pool, RSPAMD_MEMPOOL_PROFILE);
  1367. if (tbl == NULL) {
  1368. tbl = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
  1369. rspamd_mempool_set_variable (task->task_pool, RSPAMD_MEMPOOL_PROFILE,
  1370. tbl, (rspamd_mempool_destruct_t)g_hash_table_unref);
  1371. }
  1372. pval = g_hash_table_lookup (tbl, key);
  1373. if (pval == NULL) {
  1374. pval = rspamd_mempool_alloc (task->task_pool, sizeof (*pval));
  1375. *pval = value;
  1376. g_hash_table_insert (tbl, (void *)key, pval);
  1377. }
  1378. else {
  1379. *pval = value;
  1380. }
  1381. }
  1382. gdouble*
  1383. rspamd_task_profile_get (struct rspamd_task *task, const gchar *key)
  1384. {
  1385. GHashTable *tbl;
  1386. gdouble *pval = NULL;
  1387. tbl = rspamd_mempool_get_variable (task->task_pool, RSPAMD_MEMPOOL_PROFILE);
  1388. if (tbl != NULL) {
  1389. pval = g_hash_table_lookup (tbl, key);
  1390. }
  1391. return pval;
  1392. }
  1393. gboolean
  1394. rspamd_task_set_finish_time (struct rspamd_task *task)
  1395. {
  1396. struct timeval tv;
  1397. if (isnan (task->time_real_finish)) {
  1398. #ifdef HAVE_EVENT_NO_CACHE_TIME_FUNC
  1399. if (task->ev_base) {
  1400. event_base_update_cache_time (task->ev_base);
  1401. event_base_gettimeofday_cached (task->ev_base, &tv);
  1402. task->time_real_finish = tv_to_double (&tv);
  1403. }
  1404. else {
  1405. gettimeofday (&tv, NULL);
  1406. task->time_real_finish = tv_to_double (&tv);
  1407. }
  1408. #else
  1409. gettimeofday (&tv, NULL);
  1410. task->time_real_finish = tv_to_double (&tv);
  1411. #endif
  1412. task->time_virtual_finish = rspamd_get_virtual_ticks ();
  1413. return TRUE;
  1414. }
  1415. return FALSE;
  1416. }
  1417. const gchar *
  1418. rspamd_task_stage_name (enum rspamd_task_stage stg)
  1419. {
  1420. const gchar *ret = "unknown stage";
  1421. switch (stg) {
  1422. case RSPAMD_TASK_STAGE_CONNECT:
  1423. ret = "connect";
  1424. break;
  1425. case RSPAMD_TASK_STAGE_ENVELOPE:
  1426. ret = "envelope";
  1427. break;
  1428. case RSPAMD_TASK_STAGE_READ_MESSAGE:
  1429. ret = "read_message";
  1430. break;
  1431. case RSPAMD_TASK_STAGE_PRE_FILTERS:
  1432. ret = "prefilters";
  1433. break;
  1434. case RSPAMD_TASK_STAGE_PROCESS_MESSAGE:
  1435. ret = "process_message";
  1436. break;
  1437. case RSPAMD_TASK_STAGE_FILTERS:
  1438. ret = "filters";
  1439. break;
  1440. case RSPAMD_TASK_STAGE_CLASSIFIERS_PRE:
  1441. ret = "classifiers_pre";
  1442. break;
  1443. case RSPAMD_TASK_STAGE_CLASSIFIERS:
  1444. ret = "classifiers";
  1445. break;
  1446. case RSPAMD_TASK_STAGE_CLASSIFIERS_POST:
  1447. ret = "classifiers_post";
  1448. break;
  1449. case RSPAMD_TASK_STAGE_COMPOSITES:
  1450. ret = "composites";
  1451. break;
  1452. case RSPAMD_TASK_STAGE_POST_FILTERS:
  1453. ret = "postfilters";
  1454. break;
  1455. case RSPAMD_TASK_STAGE_LEARN_PRE:
  1456. ret = "learn_pre";
  1457. break;
  1458. case RSPAMD_TASK_STAGE_LEARN:
  1459. ret = "learn";
  1460. break;
  1461. case RSPAMD_TASK_STAGE_LEARN_POST:
  1462. ret = "learn_post";
  1463. break;
  1464. case RSPAMD_TASK_STAGE_COMPOSITES_POST:
  1465. ret = "composites_post";
  1466. break;
  1467. case RSPAMD_TASK_STAGE_IDEMPOTENT:
  1468. ret = "idempotent";
  1469. break;
  1470. case RSPAMD_TASK_STAGE_DONE:
  1471. ret = "done";
  1472. break;
  1473. case RSPAMD_TASK_STAGE_REPLIED:
  1474. ret = "replied";
  1475. break;
  1476. default:
  1477. break;
  1478. }
  1479. return ret;
  1480. }