You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

protocol.c 56KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "message.h"
  19. #include "utlist.h"
  20. #include "libserver/http/http_private.h"
  21. #include "worker_private.h"
  22. #include "libserver/cfg_file_private.h"
  23. #include "libmime/scan_result_private.h"
  24. #include "lua/lua_common.h"
  25. #include "unix-std.h"
  26. #include "protocol_internal.h"
  27. #include "libserver/mempool_vars_internal.h"
  28. #include "contrib/fastutf8/fastutf8.h"
  29. #include "task.h"
  30. #include "lua/lua_classnames.h"
  31. #include <math.h>
  32. #ifdef SYS_ZSTD
  33. #include "zstd.h"
  34. #else
  35. #include "contrib/zstd/zstd.h"
  36. #endif
  37. INIT_LOG_MODULE(protocol)
  38. #define msg_err_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
  39. "protocol", task->task_pool->tag.uid, \
  40. G_STRFUNC, \
  41. __VA_ARGS__)
  42. #define msg_warn_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
  43. "protocol", task->task_pool->tag.uid, \
  44. G_STRFUNC, \
  45. __VA_ARGS__)
  46. #define msg_info_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
  47. "protocol", task->task_pool->tag.uid, \
  48. G_STRFUNC, \
  49. __VA_ARGS__)
  50. #define msg_debug_protocol(...) rspamd_conditional_debug_fast(NULL, NULL, \
  51. rspamd_protocol_log_id, "protocol", task->task_pool->tag.uid, \
  52. G_STRFUNC, \
  53. __VA_ARGS__)
  54. static GQuark
  55. rspamd_protocol_quark(void)
  56. {
  57. return g_quark_from_static_string("protocol-error");
  58. }
  59. /*
  60. * Remove <> from the fixed string and copy it to the pool
  61. */
  62. static gchar *
  63. rspamd_protocol_escape_braces(struct rspamd_task *task, rspamd_ftok_t *in)
  64. {
  65. guint nchars = 0;
  66. const gchar *p;
  67. rspamd_ftok_t tok;
  68. gboolean has_obrace = FALSE;
  69. g_assert(in != NULL);
  70. g_assert(in->len > 0);
  71. p = in->begin;
  72. while ((g_ascii_isspace(*p) || *p == '<') && nchars < in->len) {
  73. if (*p == '<') {
  74. has_obrace = TRUE;
  75. }
  76. p++;
  77. nchars++;
  78. }
  79. tok.begin = p;
  80. p = in->begin + in->len - 1;
  81. tok.len = in->len - nchars;
  82. while (g_ascii_isspace(*p) && tok.len > 0) {
  83. p--;
  84. tok.len--;
  85. }
  86. if (has_obrace && *p == '>') {
  87. tok.len--;
  88. }
  89. return rspamd_mempool_ftokdup(task->task_pool, &tok);
  90. }
  91. #define COMPARE_CMD(str, cmd, len) (sizeof(cmd) - 1 == (len) && rspamd_lc_cmp((str), (cmd), (len)) == 0)
  92. static gboolean
  93. rspamd_protocol_handle_url(struct rspamd_task *task,
  94. struct rspamd_http_message *msg)
  95. {
  96. GHashTable *query_args;
  97. GHashTableIter it;
  98. struct http_parser_url u;
  99. const gchar *p;
  100. gsize pathlen;
  101. rspamd_ftok_t *key, *value;
  102. gpointer k, v;
  103. if (msg->url == NULL || msg->url->len == 0) {
  104. g_set_error(&task->err, rspamd_protocol_quark(), 400, "missing command");
  105. return FALSE;
  106. }
  107. if (http_parser_parse_url(msg->url->str, msg->url->len, 0, &u) != 0) {
  108. g_set_error(&task->err, rspamd_protocol_quark(), 400, "bad request URL");
  109. return FALSE;
  110. }
  111. if (!(u.field_set & (1 << UF_PATH))) {
  112. g_set_error(&task->err, rspamd_protocol_quark(), 400,
  113. "bad request URL: missing path");
  114. return FALSE;
  115. }
  116. p = msg->url->str + u.field_data[UF_PATH].off;
  117. pathlen = u.field_data[UF_PATH].len;
  118. if (*p == '/') {
  119. p++;
  120. pathlen--;
  121. }
  122. switch (*p) {
  123. case 'c':
  124. case 'C':
  125. /* check */
  126. if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) {
  127. task->cmd = CMD_CHECK_V2;
  128. msg_debug_protocol("got checkv2 command");
  129. }
  130. else if (COMPARE_CMD(p, MSG_CMD_CHECK, pathlen)) {
  131. task->cmd = CMD_CHECK;
  132. msg_debug_protocol("got check command");
  133. }
  134. else {
  135. goto err;
  136. }
  137. break;
  138. case 's':
  139. case 'S':
  140. /* symbols, skip */
  141. if (COMPARE_CMD(p, MSG_CMD_SYMBOLS, pathlen)) {
  142. task->cmd = CMD_CHECK;
  143. msg_debug_protocol("got symbols -> old check command");
  144. }
  145. else if (COMPARE_CMD(p, MSG_CMD_SCAN, pathlen)) {
  146. task->cmd = CMD_CHECK;
  147. msg_debug_protocol("got scan -> old check command");
  148. }
  149. else if (COMPARE_CMD(p, MSG_CMD_SKIP, pathlen)) {
  150. msg_debug_protocol("got skip command");
  151. task->cmd = CMD_SKIP;
  152. }
  153. else {
  154. goto err;
  155. }
  156. break;
  157. case 'p':
  158. case 'P':
  159. /* ping, process */
  160. if (COMPARE_CMD(p, MSG_CMD_PING, pathlen)) {
  161. msg_debug_protocol("got ping command");
  162. task->cmd = CMD_PING;
  163. task->flags |= RSPAMD_TASK_FLAG_SKIP;
  164. task->processed_stages |= RSPAMD_TASK_STAGE_DONE; /* Skip all */
  165. }
  166. else if (COMPARE_CMD(p, MSG_CMD_PROCESS, pathlen)) {
  167. msg_debug_protocol("got process -> old check command");
  168. task->cmd = CMD_CHECK;
  169. }
  170. else {
  171. goto err;
  172. }
  173. break;
  174. case 'r':
  175. case 'R':
  176. /* report, report_ifspam */
  177. if (COMPARE_CMD(p, MSG_CMD_REPORT, pathlen)) {
  178. msg_debug_protocol("got report -> old check command");
  179. task->cmd = CMD_CHECK;
  180. }
  181. else if (COMPARE_CMD(p, MSG_CMD_REPORT_IFSPAM, pathlen)) {
  182. msg_debug_protocol("got reportifspam -> old check command");
  183. task->cmd = CMD_CHECK;
  184. }
  185. else {
  186. goto err;
  187. }
  188. break;
  189. default:
  190. goto err;
  191. }
  192. if (u.field_set & (1u << UF_QUERY)) {
  193. /* In case if we have a query, we need to store it somewhere */
  194. query_args = rspamd_http_message_parse_query(msg);
  195. /* Insert the rest of query params as HTTP headers */
  196. g_hash_table_iter_init(&it, query_args);
  197. while (g_hash_table_iter_next(&it, &k, &v)) {
  198. gchar *key_cpy;
  199. key = k;
  200. value = v;
  201. key_cpy = rspamd_mempool_ftokdup(task->task_pool, key);
  202. rspamd_http_message_add_header_len(msg, key_cpy,
  203. value->begin, value->len);
  204. msg_debug_protocol("added header \"%T\" -> \"%T\" from HTTP query",
  205. key, value);
  206. }
  207. g_hash_table_unref(query_args);
  208. }
  209. return TRUE;
  210. err:
  211. g_set_error(&task->err, rspamd_protocol_quark(), 400, "invalid command");
  212. return FALSE;
  213. }
  214. static void
  215. rspamd_protocol_process_recipients(struct rspamd_task *task,
  216. const rspamd_ftok_t *hdr)
  217. {
  218. enum {
  219. skip_spaces,
  220. quoted_string,
  221. normal_string,
  222. } state = skip_spaces;
  223. const gchar *p, *end, *start_addr;
  224. struct rspamd_email_address *addr;
  225. p = hdr->begin;
  226. end = hdr->begin + hdr->len;
  227. start_addr = NULL;
  228. while (p < end) {
  229. switch (state) {
  230. case skip_spaces:
  231. if (g_ascii_isspace(*p)) {
  232. p++;
  233. }
  234. else if (*p == '"') {
  235. start_addr = p;
  236. p++;
  237. state = quoted_string;
  238. }
  239. else {
  240. state = normal_string;
  241. start_addr = p;
  242. }
  243. break;
  244. case quoted_string:
  245. if (*p == '"') {
  246. state = normal_string;
  247. p++;
  248. }
  249. else if (*p == '\\') {
  250. /* Quoted pair */
  251. p += 2;
  252. }
  253. else {
  254. p++;
  255. }
  256. break;
  257. case normal_string:
  258. if (*p == '"') {
  259. state = quoted_string;
  260. p++;
  261. }
  262. else if (*p == ',' && start_addr != NULL && p > start_addr) {
  263. /* We have finished address, check what we have */
  264. addr = rspamd_email_address_from_smtp(start_addr,
  265. p - start_addr);
  266. if (addr) {
  267. if (task->rcpt_envelope == NULL) {
  268. task->rcpt_envelope = g_ptr_array_sized_new(
  269. 2);
  270. }
  271. g_ptr_array_add(task->rcpt_envelope, addr);
  272. }
  273. else {
  274. msg_err_protocol("bad rcpt address: '%*s'",
  275. (int) (p - start_addr), start_addr);
  276. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  277. }
  278. start_addr = NULL;
  279. p++;
  280. state = skip_spaces;
  281. }
  282. else {
  283. p++;
  284. }
  285. break;
  286. }
  287. }
  288. /* Check remainder */
  289. if (start_addr && p > start_addr) {
  290. switch (state) {
  291. case normal_string:
  292. addr = rspamd_email_address_from_smtp(start_addr, end - start_addr);
  293. if (addr) {
  294. if (task->rcpt_envelope == NULL) {
  295. task->rcpt_envelope = g_ptr_array_sized_new(
  296. 2);
  297. }
  298. g_ptr_array_add(task->rcpt_envelope, addr);
  299. }
  300. else {
  301. msg_err_protocol("bad rcpt address: '%*s'",
  302. (int) (end - start_addr), start_addr);
  303. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  304. }
  305. break;
  306. case skip_spaces:
  307. /* Do nothing */
  308. break;
  309. case quoted_string:
  310. default:
  311. msg_err_protocol("bad state when parsing rcpt address: '%*s'",
  312. (int) (end - start_addr), start_addr);
  313. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  314. }
  315. }
  316. }
  317. #define COMPARE_FLAG_LIT(lit) (len == sizeof(lit) - 1 && memcmp((lit), str, len) == 0)
  318. #define CHECK_PROTOCOL_FLAG(lit, fl) \
  319. do { \
  320. if (!known && COMPARE_FLAG_LIT(lit)) { \
  321. task->protocol_flags |= (fl); \
  322. known = TRUE; \
  323. msg_debug_protocol("add protocol flag %s", lit); \
  324. } \
  325. } while (0)
  326. #define CHECK_TASK_FLAG(lit, fl) \
  327. do { \
  328. if (!known && COMPARE_FLAG_LIT(lit)) { \
  329. task->flags |= (fl); \
  330. known = TRUE; \
  331. msg_debug_protocol("add task flag %s", lit); \
  332. } \
  333. } while (0)
  334. static void
  335. rspamd_protocol_handle_flag(struct rspamd_task *task, const gchar *str,
  336. gsize len)
  337. {
  338. gboolean known = FALSE;
  339. CHECK_TASK_FLAG("pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  340. CHECK_TASK_FLAG("no_log", RSPAMD_TASK_FLAG_NO_LOG);
  341. CHECK_TASK_FLAG("skip", RSPAMD_TASK_FLAG_SKIP);
  342. CHECK_TASK_FLAG("skip_process", RSPAMD_TASK_FLAG_SKIP_PROCESS);
  343. CHECK_TASK_FLAG("no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  344. CHECK_TASK_FLAG("ssl", RSPAMD_TASK_FLAG_SSL);
  345. CHECK_TASK_FLAG("profile", RSPAMD_TASK_FLAG_PROFILE);
  346. CHECK_PROTOCOL_FLAG("milter", RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
  347. CHECK_PROTOCOL_FLAG("zstd", RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED);
  348. CHECK_PROTOCOL_FLAG("ext_urls", RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
  349. CHECK_PROTOCOL_FLAG("body_block", RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK);
  350. CHECK_PROTOCOL_FLAG("groups", RSPAMD_TASK_PROTOCOL_FLAG_GROUPS);
  351. if (!known) {
  352. msg_warn_protocol("unknown flag: %*s", (gint) len, str);
  353. }
  354. }
  355. #undef COMPARE_FLAG
  356. #undef CHECK_PROTOCOL_FLAG
  357. static void
  358. rspamd_protocol_process_flags(struct rspamd_task *task, const rspamd_ftok_t *hdr)
  359. {
  360. enum {
  361. skip_spaces,
  362. read_flag,
  363. } state = skip_spaces;
  364. const gchar *p, *end, *start;
  365. p = hdr->begin;
  366. end = hdr->begin + hdr->len;
  367. start = NULL;
  368. while (p < end) {
  369. switch (state) {
  370. case skip_spaces:
  371. if (g_ascii_isspace(*p)) {
  372. p++;
  373. }
  374. else {
  375. state = read_flag;
  376. start = p;
  377. }
  378. break;
  379. case read_flag:
  380. if (*p == ',') {
  381. if (p > start) {
  382. rspamd_protocol_handle_flag(task, start, p - start);
  383. }
  384. start = NULL;
  385. state = skip_spaces;
  386. p++;
  387. }
  388. else {
  389. p++;
  390. }
  391. break;
  392. }
  393. }
  394. /* Check remainder */
  395. if (start && end > start && state == read_flag) {
  396. rspamd_protocol_handle_flag(task, start, end - start);
  397. }
  398. }
  399. #define IF_HEADER(name) \
  400. srch.begin = (name); \
  401. srch.len = sizeof(name) - 1; \
  402. if (rspamd_ftok_casecmp(hn_tok, &srch) == 0)
  403. gboolean
  404. rspamd_protocol_handle_headers(struct rspamd_task *task,
  405. struct rspamd_http_message *msg)
  406. {
  407. rspamd_ftok_t *hn_tok, *hv_tok, srch;
  408. gboolean has_ip = FALSE, seen_settings_header = FALSE;
  409. struct rspamd_http_header *header, *h;
  410. gchar *ntok;
  411. kh_foreach_value (msg->headers, header, {
  412. DL_FOREACH (header, h) {
  413. ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name);
  414. hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok));
  415. hn_tok->begin = ntok;
  416. hn_tok->len = h->name.len;
  417. ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value);
  418. hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok));
  419. hv_tok->begin = ntok;
  420. hv_tok->len = h->value.len;
  421. switch (*hn_tok->begin) {
  422. case 'd':
  423. case 'D':
  424. IF_HEADER(DELIVER_TO_HEADER)
  425. {
  426. task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok);
  427. msg_debug_protocol("read deliver-to header, value: %s",
  428. task->deliver_to);
  429. }
  430. else
  431. {
  432. msg_debug_protocol("wrong header: %T", hn_tok);
  433. }
  434. break;
  435. case 'h':
  436. case 'H':
  437. IF_HEADER(HELO_HEADER)
  438. {
  439. task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
  440. msg_debug_protocol("read helo header, value: %s", task->helo);
  441. }
  442. IF_HEADER(HOSTNAME_HEADER)
  443. {
  444. task->hostname = rspamd_mempool_ftokdup(task->task_pool,
  445. hv_tok);
  446. msg_debug_protocol("read hostname header, value: %s", task->hostname);
  447. }
  448. break;
  449. case 'f':
  450. case 'F':
  451. IF_HEADER(FROM_HEADER)
  452. {
  453. if (hv_tok->len == 0) {
  454. /* Replace '' with '<>' to fix parsing issue */
  455. RSPAMD_FTOK_ASSIGN(hv_tok, "<>");
  456. }
  457. task->from_envelope = rspamd_email_address_from_smtp(
  458. hv_tok->begin,
  459. hv_tok->len);
  460. msg_debug_protocol("read from header, value: %T", hv_tok);
  461. if (!task->from_envelope) {
  462. msg_err_protocol("bad from header: '%T'", hv_tok);
  463. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  464. }
  465. }
  466. IF_HEADER(FILENAME_HEADER)
  467. {
  468. task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool,
  469. hv_tok);
  470. msg_debug_protocol("read filename header, value: %s", task->msg.fpath);
  471. }
  472. IF_HEADER(FLAGS_HEADER)
  473. {
  474. msg_debug_protocol("read flags header, value: %T", hv_tok);
  475. rspamd_protocol_process_flags(task, hv_tok);
  476. }
  477. break;
  478. case 'q':
  479. case 'Q':
  480. IF_HEADER(QUEUE_ID_HEADER)
  481. {
  482. task->queue_id = rspamd_mempool_ftokdup(task->task_pool,
  483. hv_tok);
  484. msg_debug_protocol("read queue_id header, value: %s", task->queue_id);
  485. }
  486. else
  487. {
  488. msg_debug_protocol("wrong header: %T", hn_tok);
  489. }
  490. break;
  491. case 'r':
  492. case 'R':
  493. IF_HEADER(RCPT_HEADER)
  494. {
  495. rspamd_protocol_process_recipients(task, hv_tok);
  496. msg_debug_protocol("read rcpt header, value: %T", hv_tok);
  497. }
  498. IF_HEADER(RAW_DATA_HEADER)
  499. {
  500. srch.begin = "yes";
  501. srch.len = 3;
  502. msg_debug_protocol("read raw data header, value: %T", hv_tok);
  503. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  504. task->flags &= ~RSPAMD_TASK_FLAG_MIME;
  505. msg_debug_protocol("disable mime parsing");
  506. }
  507. }
  508. break;
  509. case 'i':
  510. case 'I':
  511. IF_HEADER(IP_ADDR_HEADER)
  512. {
  513. if (!rspamd_parse_inet_address(&task->from_addr,
  514. hv_tok->begin, hv_tok->len,
  515. RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
  516. msg_err_protocol("bad ip header: '%T'", hv_tok);
  517. }
  518. else {
  519. msg_debug_protocol("read IP header, value: %T", hv_tok);
  520. has_ip = TRUE;
  521. }
  522. }
  523. else
  524. {
  525. msg_debug_protocol("wrong header: %T", hn_tok);
  526. }
  527. break;
  528. case 'p':
  529. case 'P':
  530. IF_HEADER(PASS_HEADER)
  531. {
  532. srch.begin = "all";
  533. srch.len = 3;
  534. msg_debug_protocol("read pass header, value: %T", hv_tok);
  535. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  536. task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
  537. msg_debug_protocol("pass all filters");
  538. }
  539. }
  540. IF_HEADER(PROFILE_HEADER)
  541. {
  542. msg_debug_protocol("read profile header, value: %T", hv_tok);
  543. task->flags |= RSPAMD_TASK_FLAG_PROFILE;
  544. }
  545. break;
  546. case 's':
  547. case 'S':
  548. IF_HEADER(SETTINGS_ID_HEADER)
  549. {
  550. msg_debug_protocol("read settings-id header, value: %T", hv_tok);
  551. task->settings_elt = rspamd_config_find_settings_name_ref(
  552. task->cfg, hv_tok->begin, hv_tok->len);
  553. if (task->settings_elt == NULL) {
  554. GString *known_ids = g_string_new(NULL);
  555. struct rspamd_config_settings_elt *cur;
  556. DL_FOREACH(task->cfg->setting_ids, cur)
  557. {
  558. rspamd_printf_gstring(known_ids, "%s(%ud);",
  559. cur->name, cur->id);
  560. }
  561. msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v",
  562. hv_tok,
  563. rspamd_config_name_to_id(hv_tok->begin, hv_tok->len),
  564. known_ids);
  565. g_string_free(known_ids, TRUE);
  566. }
  567. else {
  568. msg_debug_protocol("applied settings id %T -> %ud", hv_tok,
  569. task->settings_elt->id);
  570. }
  571. }
  572. IF_HEADER(SETTINGS_HEADER)
  573. {
  574. msg_debug_protocol("read settings header, value: %T", hv_tok);
  575. seen_settings_header = TRUE;
  576. }
  577. break;
  578. case 'u':
  579. case 'U':
  580. IF_HEADER(USER_HEADER)
  581. {
  582. /*
  583. * We must ignore User header in case of spamc, as SA has
  584. * different meaning of this header
  585. */
  586. msg_debug_protocol("read user header, value: %T", hv_tok);
  587. if (!RSPAMD_TASK_IS_SPAMC(task)) {
  588. task->auth_user = rspamd_mempool_ftokdup(task->task_pool,
  589. hv_tok);
  590. }
  591. else {
  592. msg_info_protocol("ignore user header: legacy SA protocol");
  593. }
  594. }
  595. IF_HEADER(URLS_HEADER)
  596. {
  597. msg_debug_protocol("read urls header, value: %T", hv_tok);
  598. srch.begin = "extended";
  599. srch.len = 8;
  600. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  601. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS;
  602. msg_debug_protocol("extended urls information");
  603. }
  604. /* TODO: add more formats there */
  605. }
  606. IF_HEADER(USER_AGENT_HEADER)
  607. {
  608. msg_debug_protocol("read user-agent header, value: %T", hv_tok);
  609. if (hv_tok->len == 6 &&
  610. rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) {
  611. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT;
  612. }
  613. }
  614. break;
  615. case 'l':
  616. case 'L':
  617. IF_HEADER(NO_LOG_HEADER)
  618. {
  619. msg_debug_protocol("read log header, value: %T", hv_tok);
  620. srch.begin = "no";
  621. srch.len = 2;
  622. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  623. task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
  624. }
  625. }
  626. break;
  627. case 'm':
  628. case 'M':
  629. IF_HEADER(MLEN_HEADER)
  630. {
  631. msg_debug_protocol("read message length header, value: %T",
  632. hv_tok);
  633. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL;
  634. }
  635. IF_HEADER(MTA_TAG_HEADER)
  636. {
  637. gchar *mta_tag;
  638. mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
  639. rspamd_mempool_set_variable(task->task_pool,
  640. RSPAMD_MEMPOOL_MTA_TAG,
  641. mta_tag, NULL);
  642. msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag);
  643. }
  644. IF_HEADER(MTA_NAME_HEADER)
  645. {
  646. gchar *mta_name;
  647. mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
  648. rspamd_mempool_set_variable(task->task_pool,
  649. RSPAMD_MEMPOOL_MTA_NAME,
  650. mta_name, NULL);
  651. msg_debug_protocol("read MTA-Name header, value: %s", mta_name);
  652. }
  653. IF_HEADER(MILTER_HEADER)
  654. {
  655. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER;
  656. msg_debug_protocol("read Milter header, value: %T", hv_tok);
  657. }
  658. break;
  659. case 't':
  660. case 'T':
  661. IF_HEADER(TLS_CIPHER_HEADER)
  662. {
  663. task->flags |= RSPAMD_TASK_FLAG_SSL;
  664. msg_debug_protocol("read TLS cipher header, value: %T", hv_tok);
  665. }
  666. break;
  667. default:
  668. msg_debug_protocol("generic header: %T", hn_tok);
  669. break;
  670. }
  671. rspamd_task_add_request_header (task, hn_tok, hv_tok);
  672. }
  673. }); /* End of kh_foreach_value */
  674. if (seen_settings_header && task->settings_elt) {
  675. msg_warn_task("ignore settings id %s as settings header is also presented",
  676. task->settings_elt->name);
  677. REF_RELEASE(task->settings_elt);
  678. task->settings_elt = NULL;
  679. }
  680. if (!has_ip) {
  681. task->flags |= RSPAMD_TASK_FLAG_NO_IP;
  682. }
  683. return TRUE;
  684. }
  685. #define BOOL_TO_FLAG(val, flags, flag) \
  686. do { \
  687. if ((val)) (flags) |= (flag); \
  688. else \
  689. (flags) &= ~(flag); \
  690. } while (0)
  691. gboolean
  692. rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool,
  693. const ucl_object_t *obj,
  694. gpointer ud,
  695. struct rspamd_rcl_section *section,
  696. GError **err)
  697. {
  698. struct rspamd_rcl_struct_parser *pd = ud;
  699. gint *target;
  700. const gchar *key;
  701. gboolean value;
  702. target = (gint *) (((gchar *) pd->user_struct) + pd->offset);
  703. key = ucl_object_key(obj);
  704. value = ucl_object_toboolean(obj);
  705. if (key != NULL) {
  706. if (g_ascii_strcasecmp(key, "pass_all") == 0) {
  707. BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL);
  708. }
  709. else if (g_ascii_strcasecmp(key, "no_log") == 0) {
  710. BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG);
  711. }
  712. }
  713. return TRUE;
  714. }
  715. static struct rspamd_rcl_sections_map *control_parser = NULL;
  716. RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor)
  717. {
  718. struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL,
  719. "*",
  720. NULL,
  721. NULL,
  722. UCL_OBJECT,
  723. FALSE,
  724. TRUE);
  725. /* Default handlers */
  726. rspamd_rcl_add_default_handler(sub,
  727. "ip",
  728. rspamd_rcl_parse_struct_addr,
  729. G_STRUCT_OFFSET(struct rspamd_task, from_addr),
  730. 0,
  731. NULL);
  732. rspamd_rcl_add_default_handler(sub,
  733. "from",
  734. rspamd_rcl_parse_struct_mime_addr,
  735. G_STRUCT_OFFSET(struct rspamd_task, from_envelope),
  736. 0,
  737. NULL);
  738. rspamd_rcl_add_default_handler(sub,
  739. "rcpt",
  740. rspamd_rcl_parse_struct_mime_addr,
  741. G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope),
  742. 0,
  743. NULL);
  744. rspamd_rcl_add_default_handler(sub,
  745. "helo",
  746. rspamd_rcl_parse_struct_string,
  747. G_STRUCT_OFFSET(struct rspamd_task, helo),
  748. 0,
  749. NULL);
  750. rspamd_rcl_add_default_handler(sub,
  751. "user",
  752. rspamd_rcl_parse_struct_string,
  753. G_STRUCT_OFFSET(struct rspamd_task, auth_user),
  754. 0,
  755. NULL);
  756. rspamd_rcl_add_default_handler(sub,
  757. "pass_all",
  758. rspamd_protocol_parse_task_flags,
  759. G_STRUCT_OFFSET(struct rspamd_task, flags),
  760. 0,
  761. NULL);
  762. rspamd_rcl_add_default_handler(sub,
  763. "json",
  764. rspamd_protocol_parse_task_flags,
  765. G_STRUCT_OFFSET(struct rspamd_task, flags),
  766. 0,
  767. NULL);
  768. }
  769. RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor)
  770. {
  771. rspamd_rcl_sections_free(control_parser);
  772. }
  773. gboolean
  774. rspamd_protocol_handle_control(struct rspamd_task *task,
  775. const ucl_object_t *control)
  776. {
  777. GError *err = NULL;
  778. if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool,
  779. control, &err)) {
  780. msg_warn_protocol("cannot parse control block: %e", err);
  781. g_error_free(err);
  782. return FALSE;
  783. }
  784. return TRUE;
  785. }
  786. gboolean
  787. rspamd_protocol_handle_request(struct rspamd_task *task,
  788. struct rspamd_http_message *msg)
  789. {
  790. gboolean ret = TRUE;
  791. if (msg->method == HTTP_SYMBOLS) {
  792. msg_debug_protocol("got legacy SYMBOLS method, enable rspamc protocol workaround");
  793. task->cmd = CMD_CHECK_RSPAMC;
  794. }
  795. else if (msg->method == HTTP_CHECK) {
  796. msg_debug_protocol("got legacy CHECK method, enable rspamc protocol workaround");
  797. task->cmd = CMD_CHECK_RSPAMC;
  798. }
  799. else {
  800. ret = rspamd_protocol_handle_url(task, msg);
  801. }
  802. if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) {
  803. msg_debug_protocol("got legacy SA input, enable spamc protocol workaround");
  804. task->cmd = CMD_CHECK_SPAMC;
  805. }
  806. return ret;
  807. }
  808. /* Structure for writing tree data */
  809. struct tree_cb_data {
  810. ucl_object_t *top;
  811. khash_t(rspamd_url_host_hash) * seen;
  812. struct rspamd_task *task;
  813. };
  814. static ucl_object_t *
  815. rspamd_protocol_extended_url(struct rspamd_task *task,
  816. struct rspamd_url *url,
  817. const gchar *encoded, gsize enclen)
  818. {
  819. ucl_object_t *obj, *elt;
  820. obj = ucl_object_typed_new(UCL_OBJECT);
  821. elt = ucl_object_fromstring_common(encoded, enclen, 0);
  822. ucl_object_insert_key(obj, elt, "url", 0, false);
  823. if (url->tldlen > 0) {
  824. elt = ucl_object_fromstring_common(rspamd_url_tld_unsafe(url),
  825. url->tldlen, 0);
  826. ucl_object_insert_key(obj, elt, "tld", 0, false);
  827. }
  828. if (url->hostlen > 0) {
  829. elt = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
  830. url->hostlen, 0);
  831. ucl_object_insert_key(obj, elt, "host", 0, false);
  832. }
  833. ucl_object_t *flags = ucl_object_typed_new(UCL_ARRAY);
  834. for (unsigned int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
  835. if (url->flags & (1u << i)) {
  836. ucl_object_t *fl = ucl_object_fromstring(rspamd_url_flag_to_string(1u << i));
  837. ucl_array_append(flags, fl);
  838. }
  839. }
  840. ucl_object_insert_key(obj, flags, "flags", 0, false);
  841. if (url->ext && url->ext->linked_url) {
  842. encoded = rspamd_url_encode(url->ext->linked_url, &enclen, task->task_pool);
  843. elt = rspamd_protocol_extended_url(task, url->ext->linked_url, encoded,
  844. enclen);
  845. ucl_object_insert_key(obj, elt, "linked_url", 0, false);
  846. }
  847. return obj;
  848. }
  849. /*
  850. * Callback for writing urls
  851. */
  852. static void
  853. urls_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
  854. {
  855. ucl_object_t *obj;
  856. struct rspamd_task *task = cb->task;
  857. const gchar *user_field = "unknown", *encoded = NULL;
  858. gboolean has_user = FALSE;
  859. guint len = 0;
  860. gsize enclen = 0;
  861. if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) {
  862. if (url->hostlen > 0) {
  863. if (rspamd_url_host_set_has(cb->seen, url)) {
  864. return;
  865. }
  866. goffset err_offset;
  867. if ((err_offset = rspamd_fast_utf8_validate(rspamd_url_host_unsafe(url),
  868. url->hostlen)) == 0) {
  869. obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
  870. url->hostlen, 0);
  871. }
  872. else {
  873. obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
  874. err_offset - 1, 0);
  875. }
  876. }
  877. else {
  878. return;
  879. }
  880. rspamd_url_host_set_add(cb->seen, url);
  881. }
  882. else {
  883. encoded = rspamd_url_encode(url, &enclen, task->task_pool);
  884. obj = rspamd_protocol_extended_url(task, url, encoded, enclen);
  885. }
  886. ucl_array_append(cb->top, obj);
  887. if (cb->task->cfg->log_urls) {
  888. if (task->auth_user) {
  889. user_field = task->auth_user;
  890. len = strlen(task->auth_user);
  891. has_user = TRUE;
  892. }
  893. else if (task->from_envelope) {
  894. user_field = task->from_envelope->addr;
  895. len = task->from_envelope->addr_len;
  896. }
  897. if (!encoded) {
  898. encoded = rspamd_url_encode(url, &enclen, task->task_pool);
  899. }
  900. msg_notice_task_encrypted("<%s> %s: %*s; ip: %s; URL: %*s",
  901. MESSAGE_FIELD_CHECK(task, message_id),
  902. has_user ? "user" : "from",
  903. len, user_field,
  904. rspamd_inet_address_to_string(task->from_addr),
  905. (gint) enclen, encoded);
  906. }
  907. }
  908. static ucl_object_t *
  909. rspamd_urls_tree_ucl(khash_t(rspamd_url_hash) * set,
  910. struct rspamd_task *task)
  911. {
  912. struct tree_cb_data cb;
  913. ucl_object_t *obj;
  914. struct rspamd_url *u;
  915. obj = ucl_object_typed_new(UCL_ARRAY);
  916. cb.top = obj;
  917. cb.task = task;
  918. cb.seen = kh_init(rspamd_url_host_hash);
  919. kh_foreach_key(set, u, {
  920. if (!(u->protocol & PROTOCOL_MAILTO)) {
  921. urls_protocol_cb(u, &cb);
  922. }
  923. });
  924. kh_destroy(rspamd_url_host_hash, cb.seen);
  925. return obj;
  926. }
  927. static void
  928. emails_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
  929. {
  930. ucl_object_t *obj;
  931. if (url->userlen > 0 && url->hostlen > 0) {
  932. obj = ucl_object_fromlstring(rspamd_url_user_unsafe(url),
  933. url->userlen + url->hostlen + 1);
  934. ucl_array_append(cb->top, obj);
  935. }
  936. }
  937. static ucl_object_t *
  938. rspamd_emails_tree_ucl(khash_t(rspamd_url_hash) * set,
  939. struct rspamd_task *task)
  940. {
  941. struct tree_cb_data cb;
  942. ucl_object_t *obj;
  943. struct rspamd_url *u;
  944. obj = ucl_object_typed_new(UCL_ARRAY);
  945. cb.top = obj;
  946. cb.task = task;
  947. kh_foreach_key(set, u, {
  948. if ((u->protocol & PROTOCOL_MAILTO)) {
  949. emails_protocol_cb(u, &cb);
  950. }
  951. });
  952. return obj;
  953. }
  954. /* Write new subject */
  955. static const gchar *
  956. rspamd_protocol_rewrite_subject(struct rspamd_task *task)
  957. {
  958. GString *subj_buf;
  959. gchar *res;
  960. const gchar *s, *c, *p;
  961. gsize slen = 0;
  962. c = rspamd_mempool_get_variable(task->task_pool, "metric_subject");
  963. if (c == NULL) {
  964. c = task->cfg->subject;
  965. }
  966. if (c == NULL) {
  967. c = SPAM_SUBJECT;
  968. }
  969. p = c;
  970. s = MESSAGE_FIELD_CHECK(task, subject);
  971. if (s) {
  972. slen = strlen(s);
  973. }
  974. subj_buf = g_string_sized_new(strlen(c) + slen);
  975. while (*p) {
  976. if (*p == '%') {
  977. switch (p[1]) {
  978. case 's':
  979. g_string_append_len(subj_buf, c, p - c);
  980. if (s) {
  981. g_string_append_len(subj_buf, s, slen);
  982. }
  983. c = p + 2;
  984. p += 2;
  985. break;
  986. case 'd':
  987. g_string_append_len(subj_buf, c, p - c);
  988. rspamd_printf_gstring(subj_buf, "%.2f", task->result->score);
  989. c = p + 2;
  990. p += 2;
  991. break;
  992. case '%':
  993. g_string_append_len(subj_buf, c, p - c);
  994. g_string_append_c(subj_buf, '%');
  995. c = p + 2;
  996. p += 2;
  997. break;
  998. default:
  999. p++; /* Just % something unknown */
  1000. break;
  1001. }
  1002. }
  1003. else {
  1004. p++;
  1005. }
  1006. }
  1007. if (p > c) {
  1008. g_string_append_len(subj_buf, c, p - c);
  1009. }
  1010. res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len);
  1011. rspamd_mempool_add_destructor(task->task_pool,
  1012. (rspamd_mempool_destruct_t) g_free,
  1013. res);
  1014. g_string_free(subj_buf, TRUE);
  1015. return res;
  1016. }
  1017. static ucl_object_t *
  1018. rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result *sym)
  1019. {
  1020. ucl_object_t *obj = NULL, *ar;
  1021. const gchar *description = NULL;
  1022. struct rspamd_symbol_option *opt;
  1023. if (sym->sym != NULL) {
  1024. description = sym->sym->description;
  1025. }
  1026. obj = ucl_object_typed_new(UCL_OBJECT);
  1027. ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false);
  1028. ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false);
  1029. if (task->cmd == CMD_CHECK_V2) {
  1030. if (sym->sym) {
  1031. ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false);
  1032. }
  1033. else {
  1034. ucl_object_insert_key(obj, ucl_object_fromdouble(0.0),
  1035. "metric_score", 0, false);
  1036. }
  1037. }
  1038. if (description) {
  1039. ucl_object_insert_key(obj, ucl_object_fromstring(description),
  1040. "description", 0, false);
  1041. }
  1042. if (sym->options != NULL) {
  1043. ar = ucl_object_typed_new(UCL_ARRAY);
  1044. DL_FOREACH(sym->opts_head, opt)
  1045. {
  1046. ucl_array_append(ar, ucl_object_fromstring_common(opt->option,
  1047. opt->optlen, 0));
  1048. }
  1049. ucl_object_insert_key(obj, ar, "options", 0, false);
  1050. }
  1051. return obj;
  1052. }
  1053. static ucl_object_t *
  1054. rspamd_metric_group_ucl(struct rspamd_task *task,
  1055. struct rspamd_symbols_group *gr, gdouble score)
  1056. {
  1057. ucl_object_t *obj = NULL;
  1058. obj = ucl_object_typed_new(UCL_OBJECT);
  1059. ucl_object_insert_key(obj, ucl_object_fromdouble(score),
  1060. "score", 0, false);
  1061. if (gr->description) {
  1062. ucl_object_insert_key(obj, ucl_object_fromstring(gr->description),
  1063. "description", 0, false);
  1064. }
  1065. return obj;
  1066. }
  1067. static ucl_object_t *
  1068. rspamd_scan_result_ucl(struct rspamd_task *task,
  1069. struct rspamd_scan_result *mres, ucl_object_t *top)
  1070. {
  1071. struct rspamd_symbol_result *sym;
  1072. gboolean is_spam;
  1073. struct rspamd_action *action;
  1074. ucl_object_t *obj = NULL, *sobj;
  1075. const gchar *subject;
  1076. struct rspamd_passthrough_result *pr = NULL;
  1077. action = rspamd_check_action_metric(task, &pr, NULL);
  1078. is_spam = !(action->flags & RSPAMD_ACTION_HAM);
  1079. if (task->cmd == CMD_CHECK) {
  1080. obj = ucl_object_typed_new(UCL_OBJECT);
  1081. ucl_object_insert_key(obj,
  1082. ucl_object_frombool(is_spam),
  1083. "is_spam", 0, false);
  1084. }
  1085. else {
  1086. obj = top;
  1087. }
  1088. if (pr) {
  1089. if (pr->message && !(pr->flags & RSPAMD_PASSTHROUGH_NO_SMTP_MESSAGE)) {
  1090. /* Add smtp message if it does not exist: see #3269 for details */
  1091. if (ucl_object_lookup(task->messages, "smtp_message") == NULL) {
  1092. ucl_object_insert_key(task->messages,
  1093. ucl_object_fromstring_common(pr->message, 0, UCL_STRING_RAW),
  1094. "smtp_message", 0,
  1095. false);
  1096. }
  1097. }
  1098. ucl_object_insert_key(obj,
  1099. ucl_object_fromstring(pr->module),
  1100. "passthrough_module", 0, false);
  1101. }
  1102. ucl_object_insert_key(obj,
  1103. ucl_object_frombool(RSPAMD_TASK_IS_SKIPPED(task)),
  1104. "is_skipped", 0, false);
  1105. if (!isnan(mres->score)) {
  1106. ucl_object_insert_key(obj, ucl_object_fromdouble(mres->score),
  1107. "score", 0, false);
  1108. }
  1109. else {
  1110. ucl_object_insert_key(obj,
  1111. ucl_object_fromdouble(0.0), "score", 0, false);
  1112. }
  1113. ucl_object_insert_key(obj,
  1114. ucl_object_fromdouble(rspamd_task_get_required_score(task, mres)),
  1115. "required_score", 0, false);
  1116. ucl_object_insert_key(obj,
  1117. ucl_object_fromstring(action->name),
  1118. "action", 0, false);
  1119. if (action->action_type == METRIC_ACTION_REWRITE_SUBJECT) {
  1120. subject = rspamd_protocol_rewrite_subject(task);
  1121. if (subject) {
  1122. ucl_object_insert_key(obj, ucl_object_fromstring(subject),
  1123. "subject", 0, false);
  1124. }
  1125. }
  1126. if (action->flags & RSPAMD_ACTION_MILTER) {
  1127. /* Treat milter action specially */
  1128. if (action->action_type == METRIC_ACTION_DISCARD) {
  1129. ucl_object_insert_key(obj, ucl_object_fromstring("discard"),
  1130. "reject", 0, false);
  1131. }
  1132. else if (action->action_type == METRIC_ACTION_QUARANTINE) {
  1133. ucl_object_insert_key(obj, ucl_object_fromstring("quarantine"),
  1134. "reject", 0, false);
  1135. }
  1136. }
  1137. /* Now handle symbols */
  1138. if (task->cmd != CMD_CHECK) {
  1139. /* Insert actions thresholds */
  1140. ucl_object_t *actions_obj = ucl_object_typed_new(UCL_OBJECT);
  1141. for (int i = task->result->nactions - 1; i >= 0; i--) {
  1142. struct rspamd_action_config *action_lim = &task->result->actions_config[i];
  1143. if (!isnan(action_lim->cur_limit) &&
  1144. !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
  1145. ucl_object_insert_key(actions_obj, ucl_object_fromdouble(action_lim->cur_limit),
  1146. action_lim->action->name, 0, true);
  1147. }
  1148. }
  1149. ucl_object_insert_key(obj, actions_obj, "thresholds", 0, false);
  1150. /* For checkv2 we insert symbols as a separate object */
  1151. obj = ucl_object_typed_new(UCL_OBJECT);
  1152. }
  1153. kh_foreach_value(mres->symbols, sym, {
  1154. if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
  1155. sobj = rspamd_metric_symbol_ucl(task, sym);
  1156. ucl_object_insert_key(obj, sobj, sym->name, 0, false);
  1157. }
  1158. })
  1159. if (task->cmd != CMD_CHECK)
  1160. {
  1161. /* For checkv2 we insert symbols as a separate object */
  1162. ucl_object_insert_key(top, obj, "symbols", 0, false);
  1163. }
  1164. else
  1165. {
  1166. /* For legacy check we just insert it as "default" all together */
  1167. ucl_object_insert_key(top, obj, DEFAULT_METRIC, 0, false);
  1168. }
  1169. /* Handle groups if needed */
  1170. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_GROUPS) {
  1171. struct rspamd_symbols_group *gr;
  1172. gdouble gr_score;
  1173. obj = ucl_object_typed_new(UCL_OBJECT);
  1174. ucl_object_reserve(obj, kh_size(mres->sym_groups));
  1175. kh_foreach(mres->sym_groups, gr, gr_score, {
  1176. if (task->cfg->public_groups_only &&
  1177. !(gr->flags & RSPAMD_SYMBOL_GROUP_PUBLIC)) {
  1178. continue;
  1179. }
  1180. sobj = rspamd_metric_group_ucl(task, gr, gr_score);
  1181. ucl_object_insert_key(obj, sobj, gr->name, 0, false);
  1182. });
  1183. ucl_object_insert_key(top, obj, "groups", 0, false);
  1184. }
  1185. return obj;
  1186. }
  1187. void rspamd_ucl_torspamc_output(const ucl_object_t *top,
  1188. rspamd_fstring_t **out)
  1189. {
  1190. const ucl_object_t *symbols, *score,
  1191. *required_score, *is_spam, *elt, *cur;
  1192. ucl_object_iter_t iter = NULL;
  1193. score = ucl_object_lookup(top, "score");
  1194. required_score = ucl_object_lookup(top, "required_score");
  1195. is_spam = ucl_object_lookup(top, "is_spam");
  1196. rspamd_printf_fstring(out,
  1197. "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
  1198. ucl_object_toboolean(is_spam) ? "True" : "False",
  1199. ucl_object_todouble(score),
  1200. ucl_object_todouble(required_score));
  1201. elt = ucl_object_lookup(top, "action");
  1202. if (elt != NULL) {
  1203. rspamd_printf_fstring(out, "Action: %s\r\n",
  1204. ucl_object_tostring(elt));
  1205. }
  1206. elt = ucl_object_lookup(top, "subject");
  1207. if (elt != NULL) {
  1208. rspamd_printf_fstring(out, "Subject: %s\r\n",
  1209. ucl_object_tostring(elt));
  1210. }
  1211. symbols = ucl_object_lookup(top, "symbols");
  1212. if (symbols != NULL) {
  1213. iter = NULL;
  1214. while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
  1215. if (elt->type == UCL_OBJECT) {
  1216. const ucl_object_t *sym_score;
  1217. sym_score = ucl_object_lookup(elt, "score");
  1218. rspamd_printf_fstring(out, "Symbol: %s(%.2f)\r\n",
  1219. ucl_object_key(elt),
  1220. ucl_object_todouble(sym_score));
  1221. }
  1222. }
  1223. }
  1224. elt = ucl_object_lookup(top, "messages");
  1225. if (elt != NULL) {
  1226. iter = NULL;
  1227. while ((cur = ucl_object_iterate(elt, &iter, true)) != NULL) {
  1228. if (cur->type == UCL_STRING) {
  1229. rspamd_printf_fstring(out, "Message: %s\r\n",
  1230. ucl_object_tostring(cur));
  1231. }
  1232. }
  1233. }
  1234. elt = ucl_object_lookup(top, "message-id");
  1235. if (elt != NULL) {
  1236. rspamd_printf_fstring(out, "Message-ID: %s\r\n",
  1237. ucl_object_tostring(elt));
  1238. }
  1239. }
  1240. void rspamd_ucl_tospamc_output(const ucl_object_t *top,
  1241. rspamd_fstring_t **out)
  1242. {
  1243. const ucl_object_t *symbols, *score,
  1244. *required_score, *is_spam, *elt;
  1245. ucl_object_iter_t iter = NULL;
  1246. rspamd_fstring_t *f;
  1247. score = ucl_object_lookup(top, "score");
  1248. required_score = ucl_object_lookup(top, "required_score");
  1249. is_spam = ucl_object_lookup(top, "is_spam");
  1250. rspamd_printf_fstring(out,
  1251. "Spam: %s ; %.2f / %.2f\r\n\r\n",
  1252. ucl_object_toboolean(is_spam) ? "True" : "False",
  1253. ucl_object_todouble(score),
  1254. ucl_object_todouble(required_score));
  1255. symbols = ucl_object_lookup(top, "symbols");
  1256. if (symbols != NULL) {
  1257. while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
  1258. if (elt->type == UCL_OBJECT) {
  1259. rspamd_printf_fstring(out, "%s,",
  1260. ucl_object_key(elt));
  1261. }
  1262. }
  1263. /* Ugly hack, but the whole spamc is ugly */
  1264. f = *out;
  1265. if (f->str[f->len - 1] == ',') {
  1266. f->len--;
  1267. *out = rspamd_fstring_append(*out, CRLF, 2);
  1268. }
  1269. }
  1270. }
  1271. static void
  1272. rspamd_protocol_output_profiling(struct rspamd_task *task,
  1273. ucl_object_t *top)
  1274. {
  1275. GHashTable *tbl;
  1276. GHashTableIter it;
  1277. gpointer k, v;
  1278. ucl_object_t *prof;
  1279. gdouble val;
  1280. prof = ucl_object_typed_new(UCL_OBJECT);
  1281. tbl = rspamd_mempool_get_variable(task->task_pool, "profile");
  1282. if (tbl) {
  1283. g_hash_table_iter_init(&it, tbl);
  1284. while (g_hash_table_iter_next(&it, &k, &v)) {
  1285. val = *(gdouble *) v;
  1286. ucl_object_insert_key(prof, ucl_object_fromdouble(val),
  1287. (const char *) k, 0, false);
  1288. }
  1289. }
  1290. ucl_object_insert_key(top, prof, "profile", 0, false);
  1291. }
  1292. ucl_object_t *
  1293. rspamd_protocol_write_ucl(struct rspamd_task *task,
  1294. enum rspamd_protocol_flags flags)
  1295. {
  1296. ucl_object_t *top = NULL;
  1297. GString *dkim_sig;
  1298. GList *dkim_sigs;
  1299. const ucl_object_t *milter_reply;
  1300. rspamd_task_set_finish_time(task);
  1301. top = ucl_object_typed_new(UCL_OBJECT);
  1302. rspamd_mempool_add_destructor(task->task_pool,
  1303. (rspamd_mempool_destruct_t) ucl_object_unref, top);
  1304. if (flags & RSPAMD_PROTOCOL_METRICS) {
  1305. rspamd_scan_result_ucl(task, task->result, top);
  1306. }
  1307. if (flags & RSPAMD_PROTOCOL_MESSAGES) {
  1308. if (G_UNLIKELY(task->cfg->compat_messages)) {
  1309. const ucl_object_t *cur;
  1310. ucl_object_t *msg_object;
  1311. ucl_object_iter_t iter = NULL;
  1312. msg_object = ucl_object_typed_new(UCL_ARRAY);
  1313. while ((cur = ucl_object_iterate(task->messages, &iter, true)) != NULL) {
  1314. if (cur->type == UCL_STRING) {
  1315. ucl_array_append(msg_object, ucl_object_ref(cur));
  1316. }
  1317. }
  1318. ucl_object_insert_key(top, msg_object, "messages", 0, false);
  1319. }
  1320. else {
  1321. ucl_object_insert_key(top, ucl_object_ref(task->messages),
  1322. "messages", 0, false);
  1323. }
  1324. }
  1325. if (flags & RSPAMD_PROTOCOL_URLS && task->message) {
  1326. if (kh_size(MESSAGE_FIELD(task, urls)) > 0) {
  1327. ucl_object_insert_key(top,
  1328. rspamd_urls_tree_ucl(MESSAGE_FIELD(task, urls), task),
  1329. "urls", 0, false);
  1330. ucl_object_insert_key(top,
  1331. rspamd_emails_tree_ucl(MESSAGE_FIELD(task, urls), task),
  1332. "emails", 0, false);
  1333. }
  1334. }
  1335. if (flags & RSPAMD_PROTOCOL_EXTRA) {
  1336. if (G_UNLIKELY(RSPAMD_TASK_IS_PROFILING(task))) {
  1337. rspamd_protocol_output_profiling(task, top);
  1338. }
  1339. }
  1340. if (flags & RSPAMD_PROTOCOL_BASIC) {
  1341. ucl_object_insert_key(top,
  1342. ucl_object_fromstring(MESSAGE_FIELD_CHECK(task, message_id)),
  1343. "message-id", 0, false);
  1344. ucl_object_insert_key(top,
  1345. ucl_object_fromdouble(task->time_real_finish - task->task_timestamp),
  1346. "time_real", 0, false);
  1347. }
  1348. if (flags & RSPAMD_PROTOCOL_DKIM) {
  1349. dkim_sigs = rspamd_mempool_get_variable(task->task_pool,
  1350. RSPAMD_MEMPOOL_DKIM_SIGNATURE);
  1351. if (dkim_sigs) {
  1352. if (dkim_sigs->next) {
  1353. /* Multiple DKIM signatures */
  1354. ucl_object_t *ar = ucl_object_typed_new(UCL_ARRAY);
  1355. for (; dkim_sigs != NULL; dkim_sigs = dkim_sigs->next) {
  1356. GString *folded_header;
  1357. dkim_sig = (GString *) dkim_sigs->data;
  1358. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER ||
  1359. !task->message) {
  1360. folded_header = rspamd_header_value_fold(
  1361. "DKIM-Signature", strlen("DKIM-Signature"),
  1362. dkim_sig->str, dkim_sig->len,
  1363. 80, RSPAMD_TASK_NEWLINES_LF, NULL);
  1364. }
  1365. else {
  1366. folded_header = rspamd_header_value_fold(
  1367. "DKIM-Signature", strlen("DKIM-Signature"),
  1368. dkim_sig->str, dkim_sig->len,
  1369. 80,
  1370. MESSAGE_FIELD(task, nlines_type),
  1371. NULL);
  1372. }
  1373. ucl_array_append(ar,
  1374. ucl_object_fromstring_common(folded_header->str,
  1375. folded_header->len, UCL_STRING_RAW));
  1376. g_string_free(folded_header, TRUE);
  1377. }
  1378. ucl_object_insert_key(top,
  1379. ar,
  1380. "dkim-signature", 0,
  1381. false);
  1382. }
  1383. else {
  1384. /* Single DKIM signature */
  1385. GString *folded_header;
  1386. dkim_sig = (GString *) dkim_sigs->data;
  1387. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
  1388. folded_header = rspamd_header_value_fold(
  1389. "DKIM-Signature", strlen("DKIM-Signature"),
  1390. dkim_sig->str, dkim_sig->len,
  1391. 80, RSPAMD_TASK_NEWLINES_LF, NULL);
  1392. }
  1393. else {
  1394. folded_header = rspamd_header_value_fold(
  1395. "DKIM-Signature", strlen("DKIM-Signature"),
  1396. dkim_sig->str, dkim_sig->len,
  1397. 80, MESSAGE_FIELD(task, nlines_type),
  1398. NULL);
  1399. }
  1400. ucl_object_insert_key(top,
  1401. ucl_object_fromstring_common(folded_header->str,
  1402. folded_header->len, UCL_STRING_RAW),
  1403. "dkim-signature", 0, false);
  1404. g_string_free(folded_header, TRUE);
  1405. }
  1406. }
  1407. }
  1408. if (flags & RSPAMD_PROTOCOL_RMILTER) {
  1409. milter_reply = rspamd_mempool_get_variable(task->task_pool,
  1410. RSPAMD_MEMPOOL_MILTER_REPLY);
  1411. if (milter_reply) {
  1412. if (task->cmd != CMD_CHECK) {
  1413. ucl_object_insert_key(top, ucl_object_ref(milter_reply),
  1414. "milter", 0, false);
  1415. }
  1416. else {
  1417. ucl_object_insert_key(top, ucl_object_ref(milter_reply),
  1418. "rmilter", 0, false);
  1419. }
  1420. }
  1421. }
  1422. return top;
  1423. }
  1424. void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
  1425. struct rspamd_task *task, ucl_object_t **pobj, int how)
  1426. {
  1427. struct rspamd_scan_result *metric_res;
  1428. const struct rspamd_re_cache_stat *restat;
  1429. ucl_object_t *top = NULL;
  1430. rspamd_fstring_t *reply;
  1431. gint flags = RSPAMD_PROTOCOL_DEFAULT;
  1432. struct rspamd_action *action;
  1433. /* Removed in 2.0 */
  1434. #if 0
  1435. GHashTableIter hiter;
  1436. gpointer h, v;
  1437. /* Write custom headers */
  1438. g_hash_table_iter_init (&hiter, task->reply_headers);
  1439. while (g_hash_table_iter_next (&hiter, &h, &v)) {
  1440. rspamd_ftok_t *hn = h, *hv = v;
  1441. rspamd_http_message_add_header (msg, hn->begin, hv->begin);
  1442. }
  1443. #endif
  1444. flags |= RSPAMD_PROTOCOL_URLS;
  1445. top = rspamd_protocol_write_ucl(task, flags);
  1446. if (pobj) {
  1447. *pobj = top;
  1448. }
  1449. if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
  1450. rspamd_roll_history_update(task->worker->srv->history, task);
  1451. }
  1452. else {
  1453. msg_debug_protocol("skip history update due to no log flag");
  1454. }
  1455. rspamd_task_write_log(task);
  1456. if (task->cfg->log_flags & RSPAMD_LOG_FLAG_RE_CACHE) {
  1457. restat = rspamd_re_cache_get_stat(task->re_rt);
  1458. g_assert(restat != NULL);
  1459. msg_notice_task(
  1460. "regexp statistics: %ud pcre regexps scanned, %ud regexps matched,"
  1461. " %ud regexps total, %ud regexps cached,"
  1462. " %HL scanned using pcre, %HL scanned total",
  1463. restat->regexp_checked,
  1464. restat->regexp_matched,
  1465. restat->regexp_total,
  1466. restat->regexp_fast_cached,
  1467. restat->bytes_scanned_pcre,
  1468. restat->bytes_scanned);
  1469. }
  1470. reply = rspamd_fstring_sized_new(1000);
  1471. if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC(task)) {
  1472. msg_debug_protocol("writing json reply");
  1473. rspamd_ucl_emit_fstring(top, how, &reply);
  1474. }
  1475. else {
  1476. if (RSPAMD_TASK_IS_SPAMC(task)) {
  1477. msg_debug_protocol("writing spamc legacy reply to client");
  1478. rspamd_ucl_tospamc_output(top, &reply);
  1479. }
  1480. else {
  1481. msg_debug_protocol("writing rspamc legacy reply to client");
  1482. rspamd_ucl_torspamc_output(top, &reply);
  1483. }
  1484. }
  1485. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK) {
  1486. /* Check if we need to insert a body block */
  1487. if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) {
  1488. GString *hdr_offset = g_string_sized_new(30);
  1489. rspamd_printf_gstring(hdr_offset, "%z", RSPAMD_FSTRING_LEN(reply));
  1490. rspamd_http_message_add_header(msg, MESSAGE_OFFSET_HEADER,
  1491. hdr_offset->str);
  1492. msg_debug_protocol("write body block at position %s",
  1493. hdr_offset->str);
  1494. g_string_free(hdr_offset, TRUE);
  1495. /* In case of milter, we append just body, otherwise - full message */
  1496. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
  1497. const gchar *start;
  1498. goffset len, hdr_off;
  1499. start = task->msg.begin;
  1500. len = task->msg.len;
  1501. hdr_off = MESSAGE_FIELD(task, raw_headers_content).len;
  1502. if (hdr_off < len) {
  1503. start += hdr_off;
  1504. len -= hdr_off;
  1505. /* The problem here is that we need not end of headers, we need
  1506. * start of body.
  1507. *
  1508. * Hence, we need to skip one \r\n till there is anything else in
  1509. * a line.
  1510. */
  1511. if (*start == '\r' && len > 0) {
  1512. start++;
  1513. len--;
  1514. }
  1515. if (*start == '\n' && len > 0) {
  1516. start++;
  1517. len--;
  1518. }
  1519. msg_debug_protocol("milter version of body block size %d",
  1520. (int) len);
  1521. reply = rspamd_fstring_append(reply, start, len);
  1522. }
  1523. }
  1524. else {
  1525. msg_debug_protocol("general version of body block size %d",
  1526. (int) task->msg.len);
  1527. reply = rspamd_fstring_append(reply,
  1528. task->msg.begin, task->msg.len);
  1529. }
  1530. }
  1531. }
  1532. if ((task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) &&
  1533. rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
  1534. /* We can compress output */
  1535. ZSTD_inBuffer zin;
  1536. ZSTD_outBuffer zout;
  1537. ZSTD_CStream *zstream;
  1538. rspamd_fstring_t *compressed_reply;
  1539. gsize r;
  1540. zstream = task->cfg->libs_ctx->out_zstream;
  1541. compressed_reply = rspamd_fstring_sized_new(ZSTD_compressBound(reply->len));
  1542. zin.pos = 0;
  1543. zin.src = reply->str;
  1544. zin.size = reply->len;
  1545. zout.pos = 0;
  1546. zout.dst = compressed_reply->str;
  1547. zout.size = compressed_reply->allocated;
  1548. while (zin.pos < zin.size) {
  1549. r = ZSTD_compressStream(zstream, &zout, &zin);
  1550. if (ZSTD_isError(r)) {
  1551. msg_err_protocol("cannot compress: %s", ZSTD_getErrorName(r));
  1552. rspamd_fstring_free(compressed_reply);
  1553. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1554. goto end;
  1555. }
  1556. }
  1557. ZSTD_flushStream(zstream, &zout);
  1558. r = ZSTD_endStream(zstream, &zout);
  1559. if (ZSTD_isError(r)) {
  1560. msg_err_protocol("cannot finalize compress: %s", ZSTD_getErrorName(r));
  1561. rspamd_fstring_free(compressed_reply);
  1562. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1563. goto end;
  1564. }
  1565. msg_info_protocol("writing compressed results: %z bytes before "
  1566. "%z bytes after",
  1567. zin.pos, zout.pos);
  1568. compressed_reply->len = zout.pos;
  1569. rspamd_fstring_free(reply);
  1570. rspamd_http_message_set_body_from_fstring_steal(msg, compressed_reply);
  1571. rspamd_http_message_add_header(msg, COMPRESSION_HEADER, "zstd");
  1572. if (task->cfg->libs_ctx->out_dict &&
  1573. task->cfg->libs_ctx->out_dict->id != 0) {
  1574. gchar dict_str[32];
  1575. rspamd_snprintf(dict_str, sizeof(dict_str), "%ud",
  1576. task->cfg->libs_ctx->out_dict->id);
  1577. rspamd_http_message_add_header(msg, "Dictionary", dict_str);
  1578. }
  1579. }
  1580. else {
  1581. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1582. }
  1583. end:
  1584. if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) {
  1585. /* Update stat for default metric */
  1586. msg_debug_protocol("skip stats update due to no_stat flag");
  1587. metric_res = task->result;
  1588. if (metric_res != NULL) {
  1589. action = rspamd_check_action_metric(task, NULL, NULL);
  1590. /* TODO: handle custom actions in stats */
  1591. if (action->action_type == METRIC_ACTION_SOFT_REJECT &&
  1592. (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) {
  1593. /* Set stat action to greylist to display greylisted messages */
  1594. #ifndef HAVE_ATOMIC_BUILTINS
  1595. task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++;
  1596. #else
  1597. __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST],
  1598. 1, __ATOMIC_RELEASE);
  1599. #endif
  1600. }
  1601. else if (action->action_type < METRIC_ACTION_MAX) {
  1602. #ifndef HAVE_ATOMIC_BUILTINS
  1603. task->worker->srv->stat->actions_stat[action->action_type]++;
  1604. #else
  1605. __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type],
  1606. 1, __ATOMIC_RELEASE);
  1607. #endif
  1608. }
  1609. }
  1610. /* Increase counters */
  1611. #ifndef HAVE_ATOMIC_BUILTINS
  1612. task->worker->srv->stat->messages_scanned++;
  1613. #else
  1614. __atomic_add_fetch(&task->worker->srv->stat->messages_scanned,
  1615. 1, __ATOMIC_RELEASE);
  1616. #endif
  1617. /* Set average processing time */
  1618. uint32_t slot;
  1619. float processing_time = task->time_real_finish - task->task_timestamp;
  1620. #ifndef HAVE_ATOMIC_BUILTINS
  1621. slot = task->worker->srv->stat->avg_time.cur_slot++;
  1622. #else
  1623. slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot,
  1624. 1, __ATOMIC_RELEASE);
  1625. #endif
  1626. slot = slot % MAX_AVG_TIME_SLOTS;
  1627. /* TODO: this should be atomic but it is not supported in C */
  1628. task->worker->srv->stat->avg_time.avg_time[slot] = processing_time;
  1629. }
  1630. }
  1631. void rspamd_protocol_write_log_pipe(struct rspamd_task *task)
  1632. {
  1633. struct rspamd_worker_log_pipe *lp;
  1634. struct rspamd_protocol_log_message_sum *ls;
  1635. lua_State *L = task->cfg->lua_state;
  1636. struct rspamd_scan_result *mres;
  1637. struct rspamd_symbol_result *sym;
  1638. gint id, i;
  1639. uint32_t n = 0, nextra = 0;
  1640. gsize sz;
  1641. GArray *extra;
  1642. struct rspamd_protocol_log_symbol_result er;
  1643. struct rspamd_task **ptask;
  1644. /* Get extra results from lua plugins */
  1645. extra = g_array_new(FALSE, FALSE, sizeof(er));
  1646. lua_getglobal(L, "rspamd_plugins");
  1647. if (lua_istable(L, -1)) {
  1648. lua_pushnil(L);
  1649. while (lua_next(L, -2)) {
  1650. if (lua_istable(L, -1)) {
  1651. lua_pushvalue(L, -2);
  1652. /* stack:
  1653. * -1: copy of key
  1654. * -2: value (module table)
  1655. * -3: key (module name)
  1656. * -4: global
  1657. */
  1658. lua_pushstring(L, "log_callback");
  1659. lua_gettable(L, -3);
  1660. /* stack:
  1661. * -1: func
  1662. * -2: copy of key
  1663. * -3: value (module table)
  1664. * -3: key (module name)
  1665. * -4: global
  1666. */
  1667. if (lua_isfunction(L, -1)) {
  1668. ptask = lua_newuserdata(L, sizeof(*ptask));
  1669. *ptask = task;
  1670. rspamd_lua_setclass(L, rspamd_task_classname, -1);
  1671. /* stack:
  1672. * -1: task
  1673. * -2: func
  1674. * -3: key copy
  1675. * -4: value (module table)
  1676. * -5: key (module name)
  1677. * -6: global
  1678. */
  1679. msg_debug_protocol("calling for %s", lua_tostring(L, -3));
  1680. if (lua_pcall(L, 1, 1, 0) != 0) {
  1681. msg_info_protocol("call to log callback %s failed: %s",
  1682. lua_tostring(L, -2), lua_tostring(L, -1));
  1683. lua_pop(L, 1);
  1684. /* stack:
  1685. * -1: key copy
  1686. * -2: value
  1687. * -3: key
  1688. */
  1689. }
  1690. else {
  1691. /* stack:
  1692. * -1: result
  1693. * -2: key copy
  1694. * -3: value
  1695. * -4: key
  1696. */
  1697. if (lua_istable(L, -1)) {
  1698. /* Another iteration */
  1699. lua_pushnil(L);
  1700. while (lua_next(L, -2)) {
  1701. /* stack:
  1702. * -1: value
  1703. * -2: key
  1704. * -3: result table (pcall)
  1705. * -4: key copy (parent)
  1706. * -5: value (parent)
  1707. * -6: key (parent)
  1708. */
  1709. if (lua_istable(L, -1)) {
  1710. er.id = 0;
  1711. er.score = 0.0;
  1712. lua_rawgeti(L, -1, 1);
  1713. if (lua_isnumber(L, -1)) {
  1714. er.id = lua_tonumber(L, -1);
  1715. }
  1716. lua_rawgeti(L, -2, 2);
  1717. if (lua_isnumber(L, -1)) {
  1718. er.score = lua_tonumber(L, -1);
  1719. }
  1720. /* stack:
  1721. * -1: value[2]
  1722. * -2: value[1]
  1723. * -3: values
  1724. * -4: key
  1725. * -5: result table (pcall)
  1726. * -6: key copy (parent)
  1727. * -7: value (parent)
  1728. * -8: key (parent)
  1729. */
  1730. lua_pop(L, 2); /* Values */
  1731. g_array_append_val(extra, er);
  1732. }
  1733. lua_pop(L, 1); /* Value for lua_next */
  1734. }
  1735. lua_pop(L, 1); /* Table result of pcall */
  1736. }
  1737. else {
  1738. msg_info_protocol("call to log callback %s returned "
  1739. "wrong type: %s",
  1740. lua_tostring(L, -2),
  1741. lua_typename(L, lua_type(L, -1)));
  1742. lua_pop(L, 1); /* Returned error */
  1743. }
  1744. }
  1745. }
  1746. else {
  1747. lua_pop(L, 1);
  1748. /* stack:
  1749. * -1: key copy
  1750. * -2: value
  1751. * -3: key
  1752. */
  1753. }
  1754. }
  1755. lua_pop(L, 2); /* Top table + key copy */
  1756. }
  1757. lua_pop(L, 1); /* rspamd_plugins global */
  1758. }
  1759. else {
  1760. lua_pop(L, 1);
  1761. }
  1762. nextra = extra->len;
  1763. LL_FOREACH(task->cfg->log_pipes, lp)
  1764. {
  1765. if (lp->fd != -1) {
  1766. switch (lp->type) {
  1767. case RSPAMD_LOG_PIPE_SYMBOLS:
  1768. mres = task->result;
  1769. if (mres) {
  1770. n = kh_size(mres->symbols);
  1771. sz = sizeof(*ls) +
  1772. sizeof(struct rspamd_protocol_log_symbol_result) *
  1773. (n + nextra);
  1774. ls = g_malloc0(sz);
  1775. /* Handle settings id */
  1776. if (task->settings_elt) {
  1777. ls->settings_id = task->settings_elt->id;
  1778. }
  1779. else {
  1780. ls->settings_id = 0;
  1781. }
  1782. ls->score = mres->score;
  1783. ls->required_score = rspamd_task_get_required_score(task,
  1784. mres);
  1785. ls->nresults = n;
  1786. ls->nextra = nextra;
  1787. i = 0;
  1788. kh_foreach_value(mres->symbols, sym, {
  1789. id = rspamd_symcache_find_symbol(task->cfg->cache,
  1790. sym->name);
  1791. if (id >= 0) {
  1792. ls->results[i].id = id;
  1793. ls->results[i].score = sym->score;
  1794. }
  1795. else {
  1796. ls->results[i].id = -1;
  1797. ls->results[i].score = 0.0;
  1798. }
  1799. i++;
  1800. });
  1801. memcpy(&ls->results[n], extra->data, nextra * sizeof(er));
  1802. }
  1803. else {
  1804. sz = sizeof(*ls);
  1805. ls = g_malloc0(sz);
  1806. ls->nresults = 0;
  1807. }
  1808. /* We don't really care about return value here */
  1809. if (write(lp->fd, ls, sz) == -1) {
  1810. msg_info_protocol("cannot write to log pipe: %s",
  1811. strerror(errno));
  1812. }
  1813. g_free(ls);
  1814. break;
  1815. default:
  1816. msg_err_protocol("unknown log format %d", lp->type);
  1817. break;
  1818. }
  1819. }
  1820. }
  1821. g_array_free(extra, TRUE);
  1822. }
  1823. void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout)
  1824. {
  1825. struct rspamd_http_message *msg;
  1826. const gchar *ctype = "application/json";
  1827. rspamd_fstring_t *reply;
  1828. msg = rspamd_http_new_message(HTTP_RESPONSE);
  1829. if (rspamd_http_connection_is_encrypted(task->http_conn)) {
  1830. msg_info_protocol("<%s> writing encrypted reply",
  1831. MESSAGE_FIELD_CHECK(task, message_id));
  1832. }
  1833. const rspamd_ftok_t *accept_hdr;
  1834. int out_type = UCL_EMIT_JSON_COMPACT;
  1835. accept_hdr = rspamd_task_get_request_header(task, "Accept");
  1836. if (accept_hdr && rspamd_substring_search(accept_hdr->begin, accept_hdr->len,
  1837. "application/msgpack", sizeof("application/msgpack") - 1) != -1) {
  1838. ctype = "application/msgpack";
  1839. out_type = UCL_EMIT_MSGPACK;
  1840. }
  1841. /* Compatibility */
  1842. if (task->cmd == CMD_CHECK_RSPAMC) {
  1843. msg->method = HTTP_SYMBOLS;
  1844. }
  1845. else if (task->cmd == CMD_CHECK_SPAMC) {
  1846. msg->method = HTTP_SYMBOLS;
  1847. msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
  1848. }
  1849. if (task->err != NULL) {
  1850. msg_debug_protocol("writing error reply to client");
  1851. ucl_object_t *top = NULL;
  1852. top = ucl_object_typed_new(UCL_OBJECT);
  1853. msg->code = 500 + task->err->code % 100;
  1854. msg->status = rspamd_fstring_new_init(task->err->message,
  1855. strlen(task->err->message));
  1856. ucl_object_insert_key(top, ucl_object_fromstring(task->err->message),
  1857. "error", 0, false);
  1858. ucl_object_insert_key(top,
  1859. ucl_object_fromstring(g_quark_to_string(task->err->domain)),
  1860. "error_domain", 0, false);
  1861. reply = rspamd_fstring_sized_new(256);
  1862. rspamd_ucl_emit_fstring(top, out_type, &reply);
  1863. ucl_object_unref(top);
  1864. /* We also need to validate utf8 */
  1865. if (out_type != UCL_EMIT_MSGPACK && rspamd_fast_utf8_validate(reply->str, reply->len) != 0) {
  1866. gsize valid_len;
  1867. gchar *validated;
  1868. /* We copy reply several times here, but it should be a rare case */
  1869. validated = rspamd_str_make_utf_valid(reply->str, reply->len,
  1870. &valid_len, task->task_pool);
  1871. rspamd_http_message_set_body(msg, validated, valid_len);
  1872. rspamd_fstring_free(reply);
  1873. }
  1874. else {
  1875. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1876. }
  1877. }
  1878. else {
  1879. msg->status = rspamd_fstring_new_init("OK", 2);
  1880. switch (task->cmd) {
  1881. case CMD_CHECK:
  1882. case CMD_CHECK_RSPAMC:
  1883. case CMD_CHECK_SPAMC:
  1884. case CMD_SKIP:
  1885. case CMD_CHECK_V2:
  1886. rspamd_protocol_http_reply(msg, task, NULL, out_type);
  1887. rspamd_protocol_write_log_pipe(task);
  1888. break;
  1889. case CMD_PING:
  1890. msg_debug_protocol("writing pong to client");
  1891. rspamd_http_message_set_body(msg, "pong" CRLF, 6);
  1892. ctype = "text/plain";
  1893. break;
  1894. default:
  1895. msg_err_protocol("BROKEN");
  1896. break;
  1897. }
  1898. }
  1899. ev_now_update(task->event_loop);
  1900. msg->date = ev_time();
  1901. rspamd_http_connection_reset(task->http_conn);
  1902. rspamd_http_connection_write_message(task->http_conn, msg, NULL,
  1903. ctype, task, timeout);
  1904. task->processed_stages |= RSPAMD_TASK_STAGE_REPLIED;
  1905. }