You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

protocol.c 55KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "message.h"
  19. #include "utlist.h"
  20. #include "libserver/http/http_private.h"
  21. #include "worker_private.h"
  22. #include "libserver/cfg_file_private.h"
  23. #include "libmime/scan_result_private.h"
  24. #include "lua/lua_common.h"
  25. #include "unix-std.h"
  26. #include "protocol_internal.h"
  27. #include "libserver/mempool_vars_internal.h"
  28. #include "contrib/fastutf8/fastutf8.h"
  29. #include "task.h"
  30. #include "lua/lua_classnames.h"
  31. #include <math.h>
  32. #ifdef SYS_ZSTD
  33. #include "zstd.h"
  34. #else
  35. #include "contrib/zstd/zstd.h"
  36. #endif
  37. INIT_LOG_MODULE(protocol)
  38. #define msg_err_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
  39. "protocol", task->task_pool->tag.uid, \
  40. G_STRFUNC, \
  41. __VA_ARGS__)
  42. #define msg_warn_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
  43. "protocol", task->task_pool->tag.uid, \
  44. G_STRFUNC, \
  45. __VA_ARGS__)
  46. #define msg_info_protocol(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
  47. "protocol", task->task_pool->tag.uid, \
  48. G_STRFUNC, \
  49. __VA_ARGS__)
  50. #define msg_debug_protocol(...) rspamd_conditional_debug_fast(NULL, NULL, \
  51. rspamd_protocol_log_id, "protocol", task->task_pool->tag.uid, \
  52. G_STRFUNC, \
  53. __VA_ARGS__)
  54. static GQuark
  55. rspamd_protocol_quark(void)
  56. {
  57. return g_quark_from_static_string("protocol-error");
  58. }
  59. /*
  60. * Remove <> from the fixed string and copy it to the pool
  61. */
  62. static gchar *
  63. rspamd_protocol_escape_braces(struct rspamd_task *task, rspamd_ftok_t *in)
  64. {
  65. guint nchars = 0;
  66. const gchar *p;
  67. rspamd_ftok_t tok;
  68. gboolean has_obrace = FALSE;
  69. g_assert(in != NULL);
  70. g_assert(in->len > 0);
  71. p = in->begin;
  72. while ((g_ascii_isspace(*p) || *p == '<') && nchars < in->len) {
  73. if (*p == '<') {
  74. has_obrace = TRUE;
  75. }
  76. p++;
  77. nchars++;
  78. }
  79. tok.begin = p;
  80. p = in->begin + in->len - 1;
  81. tok.len = in->len - nchars;
  82. while (g_ascii_isspace(*p) && tok.len > 0) {
  83. p--;
  84. tok.len--;
  85. }
  86. if (has_obrace && *p == '>') {
  87. tok.len--;
  88. }
  89. return rspamd_mempool_ftokdup(task->task_pool, &tok);
  90. }
  91. #define COMPARE_CMD(str, cmd, len) (sizeof(cmd) - 1 == (len) && rspamd_lc_cmp((str), (cmd), (len)) == 0)
  92. static gboolean
  93. rspamd_protocol_handle_url(struct rspamd_task *task,
  94. struct rspamd_http_message *msg)
  95. {
  96. GHashTable *query_args;
  97. GHashTableIter it;
  98. struct http_parser_url u;
  99. const gchar *p;
  100. gsize pathlen;
  101. rspamd_ftok_t *key, *value;
  102. gpointer k, v;
  103. if (msg->url == NULL || msg->url->len == 0) {
  104. g_set_error(&task->err, rspamd_protocol_quark(), 400, "missing command");
  105. return FALSE;
  106. }
  107. if (http_parser_parse_url(msg->url->str, msg->url->len, 0, &u) != 0) {
  108. g_set_error(&task->err, rspamd_protocol_quark(), 400, "bad request URL");
  109. return FALSE;
  110. }
  111. if (!(u.field_set & (1 << UF_PATH))) {
  112. g_set_error(&task->err, rspamd_protocol_quark(), 400,
  113. "bad request URL: missing path");
  114. return FALSE;
  115. }
  116. p = msg->url->str + u.field_data[UF_PATH].off;
  117. pathlen = u.field_data[UF_PATH].len;
  118. if (*p == '/') {
  119. p++;
  120. pathlen--;
  121. }
  122. switch (*p) {
  123. case 'c':
  124. case 'C':
  125. /* check */
  126. if (COMPARE_CMD(p, MSG_CMD_CHECK_V2, pathlen)) {
  127. task->cmd = CMD_CHECK_V2;
  128. msg_debug_protocol("got checkv2 command");
  129. }
  130. else if (COMPARE_CMD(p, MSG_CMD_CHECK, pathlen)) {
  131. task->cmd = CMD_CHECK;
  132. msg_debug_protocol("got check command");
  133. }
  134. else {
  135. goto err;
  136. }
  137. break;
  138. case 's':
  139. case 'S':
  140. /* symbols, skip */
  141. if (COMPARE_CMD(p, MSG_CMD_SYMBOLS, pathlen)) {
  142. task->cmd = CMD_CHECK;
  143. msg_debug_protocol("got symbols -> old check command");
  144. }
  145. else if (COMPARE_CMD(p, MSG_CMD_SCAN, pathlen)) {
  146. task->cmd = CMD_CHECK;
  147. msg_debug_protocol("got scan -> old check command");
  148. }
  149. else if (COMPARE_CMD(p, MSG_CMD_SKIP, pathlen)) {
  150. msg_debug_protocol("got skip command");
  151. task->cmd = CMD_SKIP;
  152. }
  153. else {
  154. goto err;
  155. }
  156. break;
  157. case 'p':
  158. case 'P':
  159. /* ping, process */
  160. if (COMPARE_CMD(p, MSG_CMD_PING, pathlen)) {
  161. msg_debug_protocol("got ping command");
  162. task->cmd = CMD_PING;
  163. task->flags |= RSPAMD_TASK_FLAG_SKIP;
  164. task->processed_stages |= RSPAMD_TASK_STAGE_DONE; /* Skip all */
  165. }
  166. else if (COMPARE_CMD(p, MSG_CMD_PROCESS, pathlen)) {
  167. msg_debug_protocol("got process -> old check command");
  168. task->cmd = CMD_CHECK;
  169. }
  170. else {
  171. goto err;
  172. }
  173. break;
  174. case 'r':
  175. case 'R':
  176. /* report, report_ifspam */
  177. if (COMPARE_CMD(p, MSG_CMD_REPORT, pathlen)) {
  178. msg_debug_protocol("got report -> old check command");
  179. task->cmd = CMD_CHECK;
  180. }
  181. else if (COMPARE_CMD(p, MSG_CMD_REPORT_IFSPAM, pathlen)) {
  182. msg_debug_protocol("got reportifspam -> old check command");
  183. task->cmd = CMD_CHECK;
  184. }
  185. else {
  186. goto err;
  187. }
  188. break;
  189. default:
  190. goto err;
  191. }
  192. if (u.field_set & (1u << UF_QUERY)) {
  193. /* In case if we have a query, we need to store it somewhere */
  194. query_args = rspamd_http_message_parse_query(msg);
  195. /* Insert the rest of query params as HTTP headers */
  196. g_hash_table_iter_init(&it, query_args);
  197. while (g_hash_table_iter_next(&it, &k, &v)) {
  198. gchar *key_cpy;
  199. key = k;
  200. value = v;
  201. key_cpy = rspamd_mempool_ftokdup(task->task_pool, key);
  202. rspamd_http_message_add_header_len(msg, key_cpy,
  203. value->begin, value->len);
  204. msg_debug_protocol("added header \"%T\" -> \"%T\" from HTTP query",
  205. key, value);
  206. }
  207. g_hash_table_unref(query_args);
  208. }
  209. return TRUE;
  210. err:
  211. g_set_error(&task->err, rspamd_protocol_quark(), 400, "invalid command");
  212. return FALSE;
  213. }
  214. static void
  215. rspamd_protocol_process_recipients(struct rspamd_task *task,
  216. const rspamd_ftok_t *hdr)
  217. {
  218. enum {
  219. skip_spaces,
  220. quoted_string,
  221. normal_string,
  222. } state = skip_spaces;
  223. const gchar *p, *end, *start_addr;
  224. struct rspamd_email_address *addr;
  225. p = hdr->begin;
  226. end = hdr->begin + hdr->len;
  227. start_addr = NULL;
  228. while (p < end) {
  229. switch (state) {
  230. case skip_spaces:
  231. if (g_ascii_isspace(*p)) {
  232. p++;
  233. }
  234. else if (*p == '"') {
  235. start_addr = p;
  236. p++;
  237. state = quoted_string;
  238. }
  239. else {
  240. state = normal_string;
  241. start_addr = p;
  242. }
  243. break;
  244. case quoted_string:
  245. if (*p == '"') {
  246. state = normal_string;
  247. p++;
  248. }
  249. else if (*p == '\\') {
  250. /* Quoted pair */
  251. p += 2;
  252. }
  253. else {
  254. p++;
  255. }
  256. break;
  257. case normal_string:
  258. if (*p == '"') {
  259. state = quoted_string;
  260. p++;
  261. }
  262. else if (*p == ',' && start_addr != NULL && p > start_addr) {
  263. /* We have finished address, check what we have */
  264. addr = rspamd_email_address_from_smtp(start_addr,
  265. p - start_addr);
  266. if (addr) {
  267. if (task->rcpt_envelope == NULL) {
  268. task->rcpt_envelope = g_ptr_array_sized_new(
  269. 2);
  270. }
  271. g_ptr_array_add(task->rcpt_envelope, addr);
  272. }
  273. else {
  274. msg_err_protocol("bad rcpt address: '%*s'",
  275. (int) (p - start_addr), start_addr);
  276. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  277. }
  278. start_addr = NULL;
  279. p++;
  280. state = skip_spaces;
  281. }
  282. else {
  283. p++;
  284. }
  285. break;
  286. }
  287. }
  288. /* Check remainder */
  289. if (start_addr && p > start_addr) {
  290. switch (state) {
  291. case normal_string:
  292. addr = rspamd_email_address_from_smtp(start_addr, end - start_addr);
  293. if (addr) {
  294. if (task->rcpt_envelope == NULL) {
  295. task->rcpt_envelope = g_ptr_array_sized_new(
  296. 2);
  297. }
  298. g_ptr_array_add(task->rcpt_envelope, addr);
  299. }
  300. else {
  301. msg_err_protocol("bad rcpt address: '%*s'",
  302. (int) (end - start_addr), start_addr);
  303. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  304. }
  305. break;
  306. case skip_spaces:
  307. /* Do nothing */
  308. break;
  309. case quoted_string:
  310. default:
  311. msg_err_protocol("bad state when parsing rcpt address: '%*s'",
  312. (int) (end - start_addr), start_addr);
  313. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  314. }
  315. }
  316. }
  317. #define COMPARE_FLAG_LIT(lit) (len == sizeof(lit) - 1 && memcmp((lit), str, len) == 0)
  318. #define CHECK_PROTOCOL_FLAG(lit, fl) \
  319. do { \
  320. if (!known && COMPARE_FLAG_LIT(lit)) { \
  321. task->protocol_flags |= (fl); \
  322. known = TRUE; \
  323. msg_debug_protocol("add protocol flag %s", lit); \
  324. } \
  325. } while (0)
  326. #define CHECK_TASK_FLAG(lit, fl) \
  327. do { \
  328. if (!known && COMPARE_FLAG_LIT(lit)) { \
  329. task->flags |= (fl); \
  330. known = TRUE; \
  331. msg_debug_protocol("add task flag %s", lit); \
  332. } \
  333. } while (0)
  334. static void
  335. rspamd_protocol_handle_flag(struct rspamd_task *task, const gchar *str,
  336. gsize len)
  337. {
  338. gboolean known = FALSE;
  339. CHECK_TASK_FLAG("pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  340. CHECK_TASK_FLAG("no_log", RSPAMD_TASK_FLAG_NO_LOG);
  341. CHECK_TASK_FLAG("skip", RSPAMD_TASK_FLAG_SKIP);
  342. CHECK_TASK_FLAG("skip_process", RSPAMD_TASK_FLAG_SKIP_PROCESS);
  343. CHECK_TASK_FLAG("no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  344. CHECK_TASK_FLAG("ssl", RSPAMD_TASK_FLAG_SSL);
  345. CHECK_TASK_FLAG("profile", RSPAMD_TASK_FLAG_PROFILE);
  346. CHECK_PROTOCOL_FLAG("milter", RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
  347. CHECK_PROTOCOL_FLAG("zstd", RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED);
  348. CHECK_PROTOCOL_FLAG("ext_urls", RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
  349. CHECK_PROTOCOL_FLAG("body_block", RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK);
  350. CHECK_PROTOCOL_FLAG("groups", RSPAMD_TASK_PROTOCOL_FLAG_GROUPS);
  351. if (!known) {
  352. msg_warn_protocol("unknown flag: %*s", (gint) len, str);
  353. }
  354. }
  355. #undef COMPARE_FLAG
  356. #undef CHECK_PROTOCOL_FLAG
  357. static void
  358. rspamd_protocol_process_flags(struct rspamd_task *task, const rspamd_ftok_t *hdr)
  359. {
  360. enum {
  361. skip_spaces,
  362. read_flag,
  363. } state = skip_spaces;
  364. const gchar *p, *end, *start;
  365. p = hdr->begin;
  366. end = hdr->begin + hdr->len;
  367. start = NULL;
  368. while (p < end) {
  369. switch (state) {
  370. case skip_spaces:
  371. if (g_ascii_isspace(*p)) {
  372. p++;
  373. }
  374. else {
  375. state = read_flag;
  376. start = p;
  377. }
  378. break;
  379. case read_flag:
  380. if (*p == ',') {
  381. if (p > start) {
  382. rspamd_protocol_handle_flag(task, start, p - start);
  383. }
  384. start = NULL;
  385. state = skip_spaces;
  386. p++;
  387. }
  388. else {
  389. p++;
  390. }
  391. break;
  392. }
  393. }
  394. /* Check remainder */
  395. if (start && end > start && state == read_flag) {
  396. rspamd_protocol_handle_flag(task, start, end - start);
  397. }
  398. }
  399. #define IF_HEADER(name) \
  400. srch.begin = (name); \
  401. srch.len = sizeof(name) - 1; \
  402. if (rspamd_ftok_casecmp(hn_tok, &srch) == 0)
  403. gboolean
  404. rspamd_protocol_handle_headers(struct rspamd_task *task,
  405. struct rspamd_http_message *msg)
  406. {
  407. rspamd_ftok_t *hn_tok, *hv_tok, srch;
  408. gboolean has_ip = FALSE, seen_settings_header = FALSE;
  409. struct rspamd_http_header *header, *h;
  410. gchar *ntok;
  411. kh_foreach_value (msg->headers, header, {
  412. DL_FOREACH (header, h) {
  413. ntok = rspamd_mempool_ftokdup (task->task_pool, &h->name);
  414. hn_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hn_tok));
  415. hn_tok->begin = ntok;
  416. hn_tok->len = h->name.len;
  417. ntok = rspamd_mempool_ftokdup (task->task_pool, &h->value);
  418. hv_tok = rspamd_mempool_alloc (task->task_pool, sizeof (*hv_tok));
  419. hv_tok->begin = ntok;
  420. hv_tok->len = h->value.len;
  421. switch (*hn_tok->begin) {
  422. case 'd':
  423. case 'D':
  424. IF_HEADER(DELIVER_TO_HEADER)
  425. {
  426. task->deliver_to = rspamd_protocol_escape_braces(task, hv_tok);
  427. msg_debug_protocol("read deliver-to header, value: %s",
  428. task->deliver_to);
  429. }
  430. else
  431. {
  432. msg_debug_protocol("wrong header: %T", hn_tok);
  433. }
  434. break;
  435. case 'h':
  436. case 'H':
  437. IF_HEADER(HELO_HEADER)
  438. {
  439. task->helo = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
  440. msg_debug_protocol("read helo header, value: %s", task->helo);
  441. }
  442. IF_HEADER(HOSTNAME_HEADER)
  443. {
  444. task->hostname = rspamd_mempool_ftokdup(task->task_pool,
  445. hv_tok);
  446. msg_debug_protocol("read hostname header, value: %s", task->hostname);
  447. }
  448. break;
  449. case 'f':
  450. case 'F':
  451. IF_HEADER(FROM_HEADER)
  452. {
  453. if (hv_tok->len == 0) {
  454. /* Replace '' with '<>' to fix parsing issue */
  455. RSPAMD_FTOK_ASSIGN(hv_tok, "<>");
  456. }
  457. task->from_envelope = rspamd_email_address_from_smtp(
  458. hv_tok->begin,
  459. hv_tok->len);
  460. msg_debug_protocol("read from header, value: %T", hv_tok);
  461. if (!task->from_envelope) {
  462. msg_err_protocol("bad from header: '%T'", hv_tok);
  463. task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
  464. }
  465. }
  466. IF_HEADER(FILENAME_HEADER)
  467. {
  468. task->msg.fpath = rspamd_mempool_ftokdup(task->task_pool,
  469. hv_tok);
  470. msg_debug_protocol("read filename header, value: %s", task->msg.fpath);
  471. }
  472. IF_HEADER(FLAGS_HEADER)
  473. {
  474. msg_debug_protocol("read flags header, value: %T", hv_tok);
  475. rspamd_protocol_process_flags(task, hv_tok);
  476. }
  477. break;
  478. case 'q':
  479. case 'Q':
  480. IF_HEADER(QUEUE_ID_HEADER)
  481. {
  482. task->queue_id = rspamd_mempool_ftokdup(task->task_pool,
  483. hv_tok);
  484. msg_debug_protocol("read queue_id header, value: %s", task->queue_id);
  485. }
  486. else
  487. {
  488. msg_debug_protocol("wrong header: %T", hn_tok);
  489. }
  490. break;
  491. case 'r':
  492. case 'R':
  493. IF_HEADER(RCPT_HEADER)
  494. {
  495. rspamd_protocol_process_recipients(task, hv_tok);
  496. msg_debug_protocol("read rcpt header, value: %T", hv_tok);
  497. }
  498. IF_HEADER(RAW_DATA_HEADER)
  499. {
  500. srch.begin = "yes";
  501. srch.len = 3;
  502. msg_debug_protocol("read raw data header, value: %T", hv_tok);
  503. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  504. task->flags &= ~RSPAMD_TASK_FLAG_MIME;
  505. msg_debug_protocol("disable mime parsing");
  506. }
  507. }
  508. break;
  509. case 'i':
  510. case 'I':
  511. IF_HEADER(IP_ADDR_HEADER)
  512. {
  513. if (!rspamd_parse_inet_address(&task->from_addr,
  514. hv_tok->begin, hv_tok->len,
  515. RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
  516. msg_err_protocol("bad ip header: '%T'", hv_tok);
  517. }
  518. else {
  519. msg_debug_protocol("read IP header, value: %T", hv_tok);
  520. has_ip = TRUE;
  521. }
  522. }
  523. else
  524. {
  525. msg_debug_protocol("wrong header: %T", hn_tok);
  526. }
  527. break;
  528. case 'p':
  529. case 'P':
  530. IF_HEADER(PASS_HEADER)
  531. {
  532. srch.begin = "all";
  533. srch.len = 3;
  534. msg_debug_protocol("read pass header, value: %T", hv_tok);
  535. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  536. task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
  537. msg_debug_protocol("pass all filters");
  538. }
  539. }
  540. IF_HEADER(PROFILE_HEADER)
  541. {
  542. msg_debug_protocol("read profile header, value: %T", hv_tok);
  543. task->flags |= RSPAMD_TASK_FLAG_PROFILE;
  544. }
  545. break;
  546. case 's':
  547. case 'S':
  548. IF_HEADER(SETTINGS_ID_HEADER)
  549. {
  550. msg_debug_protocol("read settings-id header, value: %T", hv_tok);
  551. task->settings_elt = rspamd_config_find_settings_name_ref(
  552. task->cfg, hv_tok->begin, hv_tok->len);
  553. if (task->settings_elt == NULL) {
  554. GString *known_ids = g_string_new(NULL);
  555. struct rspamd_config_settings_elt *cur;
  556. DL_FOREACH(task->cfg->setting_ids, cur)
  557. {
  558. rspamd_printf_gstring(known_ids, "%s(%ud);",
  559. cur->name, cur->id);
  560. }
  561. msg_warn_protocol("unknown settings id: %T(%d); known_ids: %v",
  562. hv_tok,
  563. rspamd_config_name_to_id(hv_tok->begin, hv_tok->len),
  564. known_ids);
  565. g_string_free(known_ids, TRUE);
  566. }
  567. else {
  568. msg_debug_protocol("applied settings id %T -> %ud", hv_tok,
  569. task->settings_elt->id);
  570. }
  571. }
  572. IF_HEADER(SETTINGS_HEADER)
  573. {
  574. msg_debug_protocol("read settings header, value: %T", hv_tok);
  575. seen_settings_header = TRUE;
  576. }
  577. break;
  578. case 'u':
  579. case 'U':
  580. IF_HEADER(USER_HEADER)
  581. {
  582. /*
  583. * We must ignore User header in case of spamc, as SA has
  584. * different meaning of this header
  585. */
  586. msg_debug_protocol("read user header, value: %T", hv_tok);
  587. if (!RSPAMD_TASK_IS_SPAMC(task)) {
  588. task->auth_user = rspamd_mempool_ftokdup(task->task_pool,
  589. hv_tok);
  590. }
  591. else {
  592. msg_info_protocol("ignore user header: legacy SA protocol");
  593. }
  594. }
  595. IF_HEADER(URLS_HEADER)
  596. {
  597. msg_debug_protocol("read urls header, value: %T", hv_tok);
  598. srch.begin = "extended";
  599. srch.len = 8;
  600. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  601. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS;
  602. msg_debug_protocol("extended urls information");
  603. }
  604. /* TODO: add more formats there */
  605. }
  606. IF_HEADER(USER_AGENT_HEADER)
  607. {
  608. msg_debug_protocol("read user-agent header, value: %T", hv_tok);
  609. if (hv_tok->len == 6 &&
  610. rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) {
  611. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT;
  612. }
  613. }
  614. break;
  615. case 'l':
  616. case 'L':
  617. IF_HEADER(NO_LOG_HEADER)
  618. {
  619. msg_debug_protocol("read log header, value: %T", hv_tok);
  620. srch.begin = "no";
  621. srch.len = 2;
  622. if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
  623. task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
  624. }
  625. }
  626. break;
  627. case 'm':
  628. case 'M':
  629. IF_HEADER(MLEN_HEADER)
  630. {
  631. msg_debug_protocol("read message length header, value: %T",
  632. hv_tok);
  633. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL;
  634. }
  635. IF_HEADER(MTA_TAG_HEADER)
  636. {
  637. gchar *mta_tag;
  638. mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
  639. rspamd_mempool_set_variable(task->task_pool,
  640. RSPAMD_MEMPOOL_MTA_TAG,
  641. mta_tag, NULL);
  642. msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag);
  643. }
  644. IF_HEADER(MTA_NAME_HEADER)
  645. {
  646. gchar *mta_name;
  647. mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
  648. rspamd_mempool_set_variable(task->task_pool,
  649. RSPAMD_MEMPOOL_MTA_NAME,
  650. mta_name, NULL);
  651. msg_debug_protocol("read MTA-Name header, value: %s", mta_name);
  652. }
  653. IF_HEADER(MILTER_HEADER)
  654. {
  655. task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER;
  656. msg_debug_protocol("read Milter header, value: %T", hv_tok);
  657. }
  658. break;
  659. case 't':
  660. case 'T':
  661. IF_HEADER(TLS_CIPHER_HEADER)
  662. {
  663. task->flags |= RSPAMD_TASK_FLAG_SSL;
  664. msg_debug_protocol("read TLS cipher header, value: %T", hv_tok);
  665. }
  666. break;
  667. default:
  668. msg_debug_protocol("generic header: %T", hn_tok);
  669. break;
  670. }
  671. rspamd_task_add_request_header (task, hn_tok, hv_tok);
  672. }
  673. }); /* End of kh_foreach_value */
  674. if (seen_settings_header && task->settings_elt) {
  675. msg_warn_task("ignore settings id %s as settings header is also presented",
  676. task->settings_elt->name);
  677. REF_RELEASE(task->settings_elt);
  678. task->settings_elt = NULL;
  679. }
  680. if (!has_ip) {
  681. task->flags |= RSPAMD_TASK_FLAG_NO_IP;
  682. }
  683. return TRUE;
  684. }
  685. #define BOOL_TO_FLAG(val, flags, flag) \
  686. do { \
  687. if ((val)) (flags) |= (flag); \
  688. else \
  689. (flags) &= ~(flag); \
  690. } while (0)
  691. gboolean
  692. rspamd_protocol_parse_task_flags(rspamd_mempool_t *pool,
  693. const ucl_object_t *obj,
  694. gpointer ud,
  695. struct rspamd_rcl_section *section,
  696. GError **err)
  697. {
  698. struct rspamd_rcl_struct_parser *pd = ud;
  699. gint *target;
  700. const gchar *key;
  701. gboolean value;
  702. target = (gint *) (((gchar *) pd->user_struct) + pd->offset);
  703. key = ucl_object_key(obj);
  704. value = ucl_object_toboolean(obj);
  705. if (key != NULL) {
  706. if (g_ascii_strcasecmp(key, "pass_all") == 0) {
  707. BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_PASS_ALL);
  708. }
  709. else if (g_ascii_strcasecmp(key, "no_log") == 0) {
  710. BOOL_TO_FLAG(value, *target, RSPAMD_TASK_FLAG_NO_LOG);
  711. }
  712. }
  713. return TRUE;
  714. }
  715. static struct rspamd_rcl_sections_map *control_parser = NULL;
  716. RSPAMD_CONSTRUCTOR(rspamd_protocol_control_parser_ctor)
  717. {
  718. struct rspamd_rcl_section *sub = rspamd_rcl_add_section(&control_parser, NULL,
  719. "*",
  720. NULL,
  721. NULL,
  722. UCL_OBJECT,
  723. FALSE,
  724. TRUE);
  725. /* Default handlers */
  726. rspamd_rcl_add_default_handler(sub,
  727. "ip",
  728. rspamd_rcl_parse_struct_addr,
  729. G_STRUCT_OFFSET(struct rspamd_task, from_addr),
  730. 0,
  731. NULL);
  732. rspamd_rcl_add_default_handler(sub,
  733. "from",
  734. rspamd_rcl_parse_struct_mime_addr,
  735. G_STRUCT_OFFSET(struct rspamd_task, from_envelope),
  736. 0,
  737. NULL);
  738. rspamd_rcl_add_default_handler(sub,
  739. "rcpt",
  740. rspamd_rcl_parse_struct_mime_addr,
  741. G_STRUCT_OFFSET(struct rspamd_task, rcpt_envelope),
  742. 0,
  743. NULL);
  744. rspamd_rcl_add_default_handler(sub,
  745. "helo",
  746. rspamd_rcl_parse_struct_string,
  747. G_STRUCT_OFFSET(struct rspamd_task, helo),
  748. 0,
  749. NULL);
  750. rspamd_rcl_add_default_handler(sub,
  751. "user",
  752. rspamd_rcl_parse_struct_string,
  753. G_STRUCT_OFFSET(struct rspamd_task, auth_user),
  754. 0,
  755. NULL);
  756. rspamd_rcl_add_default_handler(sub,
  757. "pass_all",
  758. rspamd_protocol_parse_task_flags,
  759. G_STRUCT_OFFSET(struct rspamd_task, flags),
  760. 0,
  761. NULL);
  762. rspamd_rcl_add_default_handler(sub,
  763. "json",
  764. rspamd_protocol_parse_task_flags,
  765. G_STRUCT_OFFSET(struct rspamd_task, flags),
  766. 0,
  767. NULL);
  768. }
  769. RSPAMD_DESTRUCTOR(rspamd_protocol_control_parser_dtor)
  770. {
  771. rspamd_rcl_sections_free(control_parser);
  772. }
  773. gboolean
  774. rspamd_protocol_handle_control(struct rspamd_task *task,
  775. const ucl_object_t *control)
  776. {
  777. GError *err = NULL;
  778. if (!rspamd_rcl_parse(control_parser, task->cfg, task, task->task_pool,
  779. control, &err)) {
  780. msg_warn_protocol("cannot parse control block: %e", err);
  781. g_error_free(err);
  782. return FALSE;
  783. }
  784. return TRUE;
  785. }
  786. gboolean
  787. rspamd_protocol_handle_request(struct rspamd_task *task,
  788. struct rspamd_http_message *msg)
  789. {
  790. gboolean ret = TRUE;
  791. if (msg->method == HTTP_SYMBOLS) {
  792. msg_debug_protocol("got legacy SYMBOLS method, enable rspamc protocol workaround");
  793. task->cmd = CMD_CHECK_RSPAMC;
  794. }
  795. else if (msg->method == HTTP_CHECK) {
  796. msg_debug_protocol("got legacy CHECK method, enable rspamc protocol workaround");
  797. task->cmd = CMD_CHECK_RSPAMC;
  798. }
  799. else {
  800. ret = rspamd_protocol_handle_url(task, msg);
  801. }
  802. if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) {
  803. msg_debug_protocol("got legacy SA input, enable spamc protocol workaround");
  804. task->cmd = CMD_CHECK_SPAMC;
  805. }
  806. return ret;
  807. }
  808. /* Structure for writing tree data */
  809. struct tree_cb_data {
  810. ucl_object_t *top;
  811. khash_t(rspamd_url_host_hash) * seen;
  812. struct rspamd_task *task;
  813. };
  814. static ucl_object_t *
  815. rspamd_protocol_extended_url(struct rspamd_task *task,
  816. struct rspamd_url *url,
  817. const gchar *encoded, gsize enclen)
  818. {
  819. ucl_object_t *obj, *elt;
  820. obj = ucl_object_typed_new(UCL_OBJECT);
  821. elt = ucl_object_fromstring_common(encoded, enclen, 0);
  822. ucl_object_insert_key(obj, elt, "url", 0, false);
  823. if (url->tldlen > 0) {
  824. elt = ucl_object_fromstring_common(rspamd_url_tld_unsafe(url),
  825. url->tldlen, 0);
  826. ucl_object_insert_key(obj, elt, "tld", 0, false);
  827. }
  828. if (url->hostlen > 0) {
  829. elt = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
  830. url->hostlen, 0);
  831. ucl_object_insert_key(obj, elt, "host", 0, false);
  832. }
  833. ucl_object_t *flags = ucl_object_typed_new(UCL_ARRAY);
  834. for (unsigned int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
  835. if (url->flags & (1u << i)) {
  836. ucl_object_t *fl = ucl_object_fromstring(rspamd_url_flag_to_string(1u << i));
  837. ucl_array_append(flags, fl);
  838. }
  839. }
  840. ucl_object_insert_key(obj, flags, "flags", 0, false);
  841. if (url->ext && url->ext->linked_url) {
  842. encoded = rspamd_url_encode(url->ext->linked_url, &enclen, task->task_pool);
  843. elt = rspamd_protocol_extended_url(task, url->ext->linked_url, encoded,
  844. enclen);
  845. ucl_object_insert_key(obj, elt, "linked_url", 0, false);
  846. }
  847. return obj;
  848. }
  849. /*
  850. * Callback for writing urls
  851. */
  852. static void
  853. urls_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
  854. {
  855. ucl_object_t *obj;
  856. struct rspamd_task *task = cb->task;
  857. const gchar *user_field = "unknown", *encoded = NULL;
  858. gboolean has_user = FALSE;
  859. guint len = 0;
  860. gsize enclen = 0;
  861. if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) {
  862. if (url->hostlen > 0) {
  863. if (rspamd_url_host_set_has(cb->seen, url)) {
  864. return;
  865. }
  866. goffset err_offset;
  867. if ((err_offset = rspamd_fast_utf8_validate(rspamd_url_host_unsafe(url),
  868. url->hostlen)) == 0) {
  869. obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
  870. url->hostlen, 0);
  871. }
  872. else {
  873. obj = ucl_object_fromstring_common(rspamd_url_host_unsafe(url),
  874. err_offset - 1, 0);
  875. }
  876. }
  877. else {
  878. return;
  879. }
  880. rspamd_url_host_set_add(cb->seen, url);
  881. }
  882. else {
  883. encoded = rspamd_url_encode(url, &enclen, task->task_pool);
  884. obj = rspamd_protocol_extended_url(task, url, encoded, enclen);
  885. }
  886. ucl_array_append(cb->top, obj);
  887. if (cb->task->cfg->log_urls) {
  888. if (task->auth_user) {
  889. user_field = task->auth_user;
  890. len = strlen(task->auth_user);
  891. has_user = TRUE;
  892. }
  893. else if (task->from_envelope) {
  894. user_field = task->from_envelope->addr;
  895. len = task->from_envelope->addr_len;
  896. }
  897. if (!encoded) {
  898. encoded = rspamd_url_encode(url, &enclen, task->task_pool);
  899. }
  900. msg_notice_task_encrypted("<%s> %s: %*s; ip: %s; URL: %*s",
  901. MESSAGE_FIELD_CHECK(task, message_id),
  902. has_user ? "user" : "from",
  903. len, user_field,
  904. rspamd_inet_address_to_string(task->from_addr),
  905. (gint) enclen, encoded);
  906. }
  907. }
  908. static ucl_object_t *
  909. rspamd_urls_tree_ucl(khash_t(rspamd_url_hash) * set,
  910. struct rspamd_task *task)
  911. {
  912. struct tree_cb_data cb;
  913. ucl_object_t *obj;
  914. struct rspamd_url *u;
  915. obj = ucl_object_typed_new(UCL_ARRAY);
  916. cb.top = obj;
  917. cb.task = task;
  918. cb.seen = kh_init(rspamd_url_host_hash);
  919. kh_foreach_key(set, u, {
  920. if (!(u->protocol & PROTOCOL_MAILTO)) {
  921. urls_protocol_cb(u, &cb);
  922. }
  923. });
  924. kh_destroy(rspamd_url_host_hash, cb.seen);
  925. return obj;
  926. }
  927. static void
  928. emails_protocol_cb(struct rspamd_url *url, struct tree_cb_data *cb)
  929. {
  930. ucl_object_t *obj;
  931. if (url->userlen > 0 && url->hostlen > 0) {
  932. obj = ucl_object_fromlstring(rspamd_url_user_unsafe(url),
  933. url->userlen + url->hostlen + 1);
  934. ucl_array_append(cb->top, obj);
  935. }
  936. }
  937. static ucl_object_t *
  938. rspamd_emails_tree_ucl(khash_t(rspamd_url_hash) * set,
  939. struct rspamd_task *task)
  940. {
  941. struct tree_cb_data cb;
  942. ucl_object_t *obj;
  943. struct rspamd_url *u;
  944. obj = ucl_object_typed_new(UCL_ARRAY);
  945. cb.top = obj;
  946. cb.task = task;
  947. kh_foreach_key(set, u, {
  948. if ((u->protocol & PROTOCOL_MAILTO)) {
  949. emails_protocol_cb(u, &cb);
  950. }
  951. });
  952. return obj;
  953. }
  954. /* Write new subject */
  955. static const gchar *
  956. rspamd_protocol_rewrite_subject(struct rspamd_task *task)
  957. {
  958. GString *subj_buf;
  959. gchar *res;
  960. const gchar *s, *c, *p;
  961. gsize slen = 0;
  962. c = rspamd_mempool_get_variable(task->task_pool, "metric_subject");
  963. if (c == NULL) {
  964. c = task->cfg->subject;
  965. }
  966. if (c == NULL) {
  967. c = SPAM_SUBJECT;
  968. }
  969. p = c;
  970. s = MESSAGE_FIELD_CHECK(task, subject);
  971. if (s) {
  972. slen = strlen(s);
  973. }
  974. subj_buf = g_string_sized_new(strlen(c) + slen);
  975. while (*p) {
  976. if (*p == '%') {
  977. switch (p[1]) {
  978. case 's':
  979. g_string_append_len(subj_buf, c, p - c);
  980. if (s) {
  981. g_string_append_len(subj_buf, s, slen);
  982. }
  983. c = p + 2;
  984. p += 2;
  985. break;
  986. case 'd':
  987. g_string_append_len(subj_buf, c, p - c);
  988. rspamd_printf_gstring(subj_buf, "%.2f", task->result->score);
  989. c = p + 2;
  990. p += 2;
  991. break;
  992. case '%':
  993. g_string_append_len(subj_buf, c, p - c);
  994. g_string_append_c(subj_buf, '%');
  995. c = p + 2;
  996. p += 2;
  997. break;
  998. default:
  999. p++; /* Just % something unknown */
  1000. break;
  1001. }
  1002. }
  1003. else {
  1004. p++;
  1005. }
  1006. }
  1007. if (p > c) {
  1008. g_string_append_len(subj_buf, c, p - c);
  1009. }
  1010. res = rspamd_mime_header_encode(subj_buf->str, subj_buf->len);
  1011. rspamd_mempool_add_destructor(task->task_pool,
  1012. (rspamd_mempool_destruct_t) g_free,
  1013. res);
  1014. g_string_free(subj_buf, TRUE);
  1015. return res;
  1016. }
  1017. static ucl_object_t *
  1018. rspamd_metric_symbol_ucl(struct rspamd_task *task, struct rspamd_symbol_result *sym)
  1019. {
  1020. ucl_object_t *obj = NULL, *ar;
  1021. const gchar *description = NULL;
  1022. struct rspamd_symbol_option *opt;
  1023. if (sym->sym != NULL) {
  1024. description = sym->sym->description;
  1025. }
  1026. obj = ucl_object_typed_new(UCL_OBJECT);
  1027. ucl_object_insert_key(obj, ucl_object_fromstring(sym->name), "name", 0, false);
  1028. ucl_object_insert_key(obj, ucl_object_fromdouble(sym->score), "score", 0, false);
  1029. if (task->cmd == CMD_CHECK_V2) {
  1030. if (sym->sym) {
  1031. ucl_object_insert_key(obj, ucl_object_fromdouble(sym->sym->score), "metric_score", 0, false);
  1032. }
  1033. else {
  1034. ucl_object_insert_key(obj, ucl_object_fromdouble(0.0),
  1035. "metric_score", 0, false);
  1036. }
  1037. }
  1038. if (description) {
  1039. ucl_object_insert_key(obj, ucl_object_fromstring(description),
  1040. "description", 0, false);
  1041. }
  1042. if (sym->options != NULL) {
  1043. ar = ucl_object_typed_new(UCL_ARRAY);
  1044. DL_FOREACH(sym->opts_head, opt)
  1045. {
  1046. ucl_array_append(ar, ucl_object_fromstring_common(opt->option,
  1047. opt->optlen, 0));
  1048. }
  1049. ucl_object_insert_key(obj, ar, "options", 0, false);
  1050. }
  1051. return obj;
  1052. }
  1053. static ucl_object_t *
  1054. rspamd_metric_group_ucl(struct rspamd_task *task,
  1055. struct rspamd_symbols_group *gr, gdouble score)
  1056. {
  1057. ucl_object_t *obj = NULL;
  1058. obj = ucl_object_typed_new(UCL_OBJECT);
  1059. ucl_object_insert_key(obj, ucl_object_fromdouble(score),
  1060. "score", 0, false);
  1061. if (gr->description) {
  1062. ucl_object_insert_key(obj, ucl_object_fromstring(gr->description),
  1063. "description", 0, false);
  1064. }
  1065. return obj;
  1066. }
  1067. static ucl_object_t *
  1068. rspamd_scan_result_ucl(struct rspamd_task *task,
  1069. struct rspamd_scan_result *mres, ucl_object_t *top)
  1070. {
  1071. struct rspamd_symbol_result *sym;
  1072. gboolean is_spam;
  1073. struct rspamd_action *action;
  1074. ucl_object_t *obj = NULL, *sobj;
  1075. const gchar *subject;
  1076. struct rspamd_passthrough_result *pr = NULL;
  1077. action = rspamd_check_action_metric(task, &pr, NULL);
  1078. is_spam = !(action->flags & RSPAMD_ACTION_HAM);
  1079. if (task->cmd == CMD_CHECK) {
  1080. obj = ucl_object_typed_new(UCL_OBJECT);
  1081. ucl_object_insert_key(obj,
  1082. ucl_object_frombool(is_spam),
  1083. "is_spam", 0, false);
  1084. }
  1085. else {
  1086. obj = top;
  1087. }
  1088. if (pr) {
  1089. if (pr->message && !(pr->flags & RSPAMD_PASSTHROUGH_NO_SMTP_MESSAGE)) {
  1090. /* Add smtp message if it does not exist: see #3269 for details */
  1091. if (ucl_object_lookup(task->messages, "smtp_message") == NULL) {
  1092. ucl_object_insert_key(task->messages,
  1093. ucl_object_fromstring_common(pr->message, 0, UCL_STRING_RAW),
  1094. "smtp_message", 0,
  1095. false);
  1096. }
  1097. }
  1098. ucl_object_insert_key(obj,
  1099. ucl_object_fromstring(pr->module),
  1100. "passthrough_module", 0, false);
  1101. }
  1102. ucl_object_insert_key(obj,
  1103. ucl_object_frombool(RSPAMD_TASK_IS_SKIPPED(task)),
  1104. "is_skipped", 0, false);
  1105. if (!isnan(mres->score)) {
  1106. ucl_object_insert_key(obj, ucl_object_fromdouble(mres->score),
  1107. "score", 0, false);
  1108. }
  1109. else {
  1110. ucl_object_insert_key(obj,
  1111. ucl_object_fromdouble(0.0), "score", 0, false);
  1112. }
  1113. ucl_object_insert_key(obj,
  1114. ucl_object_fromdouble(rspamd_task_get_required_score(task, mres)),
  1115. "required_score", 0, false);
  1116. ucl_object_insert_key(obj,
  1117. ucl_object_fromstring(action->name),
  1118. "action", 0, false);
  1119. if (action->action_type == METRIC_ACTION_REWRITE_SUBJECT) {
  1120. subject = rspamd_protocol_rewrite_subject(task);
  1121. if (subject) {
  1122. ucl_object_insert_key(obj, ucl_object_fromstring(subject),
  1123. "subject", 0, false);
  1124. }
  1125. }
  1126. if (action->flags & RSPAMD_ACTION_MILTER) {
  1127. /* Treat milter action specially */
  1128. if (action->action_type == METRIC_ACTION_DISCARD) {
  1129. ucl_object_insert_key(obj, ucl_object_fromstring("discard"),
  1130. "reject", 0, false);
  1131. }
  1132. else if (action->action_type == METRIC_ACTION_QUARANTINE) {
  1133. ucl_object_insert_key(obj, ucl_object_fromstring("quarantine"),
  1134. "reject", 0, false);
  1135. }
  1136. }
  1137. /* Now handle symbols */
  1138. if (task->cmd != CMD_CHECK) {
  1139. /* Insert actions thresholds */
  1140. ucl_object_t *actions_obj = ucl_object_typed_new(UCL_OBJECT);
  1141. for (int i = task->result->nactions - 1; i >= 0; i--) {
  1142. struct rspamd_action_config *action_lim = &task->result->actions_config[i];
  1143. if (!isnan(action_lim->cur_limit) &&
  1144. !(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
  1145. ucl_object_insert_key(actions_obj, ucl_object_fromdouble(action_lim->cur_limit),
  1146. action_lim->action->name, 0, true);
  1147. }
  1148. }
  1149. ucl_object_insert_key(obj, actions_obj, "thresholds", 0, false);
  1150. /* For checkv2 we insert symbols as a separate object */
  1151. obj = ucl_object_typed_new(UCL_OBJECT);
  1152. }
  1153. kh_foreach_value(mres->symbols, sym, {
  1154. if (!(sym->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
  1155. sobj = rspamd_metric_symbol_ucl(task, sym);
  1156. ucl_object_insert_key(obj, sobj, sym->name, 0, false);
  1157. }
  1158. })
  1159. if (task->cmd != CMD_CHECK)
  1160. {
  1161. /* For checkv2 we insert symbols as a separate object */
  1162. ucl_object_insert_key(top, obj, "symbols", 0, false);
  1163. }
  1164. else
  1165. {
  1166. /* For legacy check we just insert it as "default" all together */
  1167. ucl_object_insert_key(top, obj, DEFAULT_METRIC, 0, false);
  1168. }
  1169. /* Handle groups if needed */
  1170. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_GROUPS) {
  1171. struct rspamd_symbols_group *gr;
  1172. gdouble gr_score;
  1173. obj = ucl_object_typed_new(UCL_OBJECT);
  1174. ucl_object_reserve(obj, kh_size(mres->sym_groups));
  1175. kh_foreach(mres->sym_groups, gr, gr_score, {
  1176. if (task->cfg->public_groups_only &&
  1177. !(gr->flags & RSPAMD_SYMBOL_GROUP_PUBLIC)) {
  1178. continue;
  1179. }
  1180. sobj = rspamd_metric_group_ucl(task, gr, gr_score);
  1181. ucl_object_insert_key(obj, sobj, gr->name, 0, false);
  1182. });
  1183. ucl_object_insert_key(top, obj, "groups", 0, false);
  1184. }
  1185. return obj;
  1186. }
  1187. void rspamd_ucl_torspamc_output(const ucl_object_t *top,
  1188. rspamd_fstring_t **out)
  1189. {
  1190. const ucl_object_t *symbols, *score,
  1191. *required_score, *is_spam, *elt, *cur;
  1192. ucl_object_iter_t iter = NULL;
  1193. score = ucl_object_lookup(top, "score");
  1194. required_score = ucl_object_lookup(top, "required_score");
  1195. is_spam = ucl_object_lookup(top, "is_spam");
  1196. rspamd_printf_fstring(out,
  1197. "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
  1198. ucl_object_toboolean(is_spam) ? "True" : "False",
  1199. ucl_object_todouble(score),
  1200. ucl_object_todouble(required_score));
  1201. elt = ucl_object_lookup(top, "action");
  1202. if (elt != NULL) {
  1203. rspamd_printf_fstring(out, "Action: %s\r\n",
  1204. ucl_object_tostring(elt));
  1205. }
  1206. elt = ucl_object_lookup(top, "subject");
  1207. if (elt != NULL) {
  1208. rspamd_printf_fstring(out, "Subject: %s\r\n",
  1209. ucl_object_tostring(elt));
  1210. }
  1211. symbols = ucl_object_lookup(top, "symbols");
  1212. if (symbols != NULL) {
  1213. iter = NULL;
  1214. while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
  1215. if (elt->type == UCL_OBJECT) {
  1216. const ucl_object_t *sym_score;
  1217. sym_score = ucl_object_lookup(elt, "score");
  1218. rspamd_printf_fstring(out, "Symbol: %s(%.2f)\r\n",
  1219. ucl_object_key(elt),
  1220. ucl_object_todouble(sym_score));
  1221. }
  1222. }
  1223. }
  1224. elt = ucl_object_lookup(top, "messages");
  1225. if (elt != NULL) {
  1226. iter = NULL;
  1227. while ((cur = ucl_object_iterate(elt, &iter, true)) != NULL) {
  1228. if (cur->type == UCL_STRING) {
  1229. rspamd_printf_fstring(out, "Message: %s\r\n",
  1230. ucl_object_tostring(cur));
  1231. }
  1232. }
  1233. }
  1234. elt = ucl_object_lookup(top, "message-id");
  1235. if (elt != NULL) {
  1236. rspamd_printf_fstring(out, "Message-ID: %s\r\n",
  1237. ucl_object_tostring(elt));
  1238. }
  1239. }
  1240. void rspamd_ucl_tospamc_output(const ucl_object_t *top,
  1241. rspamd_fstring_t **out)
  1242. {
  1243. const ucl_object_t *symbols, *score,
  1244. *required_score, *is_spam, *elt;
  1245. ucl_object_iter_t iter = NULL;
  1246. rspamd_fstring_t *f;
  1247. score = ucl_object_lookup(top, "score");
  1248. required_score = ucl_object_lookup(top, "required_score");
  1249. is_spam = ucl_object_lookup(top, "is_spam");
  1250. rspamd_printf_fstring(out,
  1251. "Spam: %s ; %.2f / %.2f\r\n\r\n",
  1252. ucl_object_toboolean(is_spam) ? "True" : "False",
  1253. ucl_object_todouble(score),
  1254. ucl_object_todouble(required_score));
  1255. symbols = ucl_object_lookup(top, "symbols");
  1256. if (symbols != NULL) {
  1257. while ((elt = ucl_object_iterate(symbols, &iter, true)) != NULL) {
  1258. if (elt->type == UCL_OBJECT) {
  1259. rspamd_printf_fstring(out, "%s,",
  1260. ucl_object_key(elt));
  1261. }
  1262. }
  1263. /* Ugly hack, but the whole spamc is ugly */
  1264. f = *out;
  1265. if (f->str[f->len - 1] == ',') {
  1266. f->len--;
  1267. *out = rspamd_fstring_append(*out, CRLF, 2);
  1268. }
  1269. }
  1270. }
  1271. static void
  1272. rspamd_protocol_output_profiling(struct rspamd_task *task,
  1273. ucl_object_t *top)
  1274. {
  1275. GHashTable *tbl;
  1276. GHashTableIter it;
  1277. gpointer k, v;
  1278. ucl_object_t *prof;
  1279. gdouble val;
  1280. prof = ucl_object_typed_new(UCL_OBJECT);
  1281. tbl = rspamd_mempool_get_variable(task->task_pool, "profile");
  1282. if (tbl) {
  1283. g_hash_table_iter_init(&it, tbl);
  1284. while (g_hash_table_iter_next(&it, &k, &v)) {
  1285. val = *(gdouble *) v;
  1286. ucl_object_insert_key(prof, ucl_object_fromdouble(val),
  1287. (const char *) k, 0, false);
  1288. }
  1289. }
  1290. ucl_object_insert_key(top, prof, "profile", 0, false);
  1291. }
  1292. ucl_object_t *
  1293. rspamd_protocol_write_ucl(struct rspamd_task *task,
  1294. enum rspamd_protocol_flags flags)
  1295. {
  1296. ucl_object_t *top = NULL;
  1297. GString *dkim_sig;
  1298. GList *dkim_sigs;
  1299. const ucl_object_t *milter_reply;
  1300. rspamd_task_set_finish_time(task);
  1301. top = ucl_object_typed_new(UCL_OBJECT);
  1302. rspamd_mempool_add_destructor(task->task_pool,
  1303. (rspamd_mempool_destruct_t) ucl_object_unref, top);
  1304. if (flags & RSPAMD_PROTOCOL_METRICS) {
  1305. rspamd_scan_result_ucl(task, task->result, top);
  1306. }
  1307. if (flags & RSPAMD_PROTOCOL_MESSAGES) {
  1308. if (G_UNLIKELY(task->cfg->compat_messages)) {
  1309. const ucl_object_t *cur;
  1310. ucl_object_t *msg_object;
  1311. ucl_object_iter_t iter = NULL;
  1312. msg_object = ucl_object_typed_new(UCL_ARRAY);
  1313. while ((cur = ucl_object_iterate(task->messages, &iter, true)) != NULL) {
  1314. if (cur->type == UCL_STRING) {
  1315. ucl_array_append(msg_object, ucl_object_ref(cur));
  1316. }
  1317. }
  1318. ucl_object_insert_key(top, msg_object, "messages", 0, false);
  1319. }
  1320. else {
  1321. ucl_object_insert_key(top, ucl_object_ref(task->messages),
  1322. "messages", 0, false);
  1323. }
  1324. }
  1325. if (flags & RSPAMD_PROTOCOL_URLS && task->message) {
  1326. if (kh_size(MESSAGE_FIELD(task, urls)) > 0) {
  1327. ucl_object_insert_key(top,
  1328. rspamd_urls_tree_ucl(MESSAGE_FIELD(task, urls), task),
  1329. "urls", 0, false);
  1330. ucl_object_insert_key(top,
  1331. rspamd_emails_tree_ucl(MESSAGE_FIELD(task, urls), task),
  1332. "emails", 0, false);
  1333. }
  1334. }
  1335. if (flags & RSPAMD_PROTOCOL_EXTRA) {
  1336. if (G_UNLIKELY(RSPAMD_TASK_IS_PROFILING(task))) {
  1337. rspamd_protocol_output_profiling(task, top);
  1338. }
  1339. }
  1340. if (flags & RSPAMD_PROTOCOL_BASIC) {
  1341. ucl_object_insert_key(top,
  1342. ucl_object_fromstring(MESSAGE_FIELD_CHECK(task, message_id)),
  1343. "message-id", 0, false);
  1344. ucl_object_insert_key(top,
  1345. ucl_object_fromdouble(task->time_real_finish - task->task_timestamp),
  1346. "time_real", 0, false);
  1347. }
  1348. if (flags & RSPAMD_PROTOCOL_DKIM) {
  1349. dkim_sigs = rspamd_mempool_get_variable(task->task_pool,
  1350. RSPAMD_MEMPOOL_DKIM_SIGNATURE);
  1351. if (dkim_sigs) {
  1352. if (dkim_sigs->next) {
  1353. /* Multiple DKIM signatures */
  1354. ucl_object_t *ar = ucl_object_typed_new(UCL_ARRAY);
  1355. for (; dkim_sigs != NULL; dkim_sigs = dkim_sigs->next) {
  1356. GString *folded_header;
  1357. dkim_sig = (GString *) dkim_sigs->data;
  1358. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER ||
  1359. !task->message) {
  1360. folded_header = rspamd_header_value_fold(
  1361. "DKIM-Signature", strlen("DKIM-Signature"),
  1362. dkim_sig->str, dkim_sig->len,
  1363. 80, RSPAMD_TASK_NEWLINES_LF, NULL);
  1364. }
  1365. else {
  1366. folded_header = rspamd_header_value_fold(
  1367. "DKIM-Signature", strlen("DKIM-Signature"),
  1368. dkim_sig->str, dkim_sig->len,
  1369. 80,
  1370. MESSAGE_FIELD(task, nlines_type),
  1371. NULL);
  1372. }
  1373. ucl_array_append(ar,
  1374. ucl_object_fromstring_common(folded_header->str,
  1375. folded_header->len, UCL_STRING_RAW));
  1376. g_string_free(folded_header, TRUE);
  1377. }
  1378. ucl_object_insert_key(top,
  1379. ar,
  1380. "dkim-signature", 0,
  1381. false);
  1382. }
  1383. else {
  1384. /* Single DKIM signature */
  1385. GString *folded_header;
  1386. dkim_sig = (GString *) dkim_sigs->data;
  1387. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
  1388. folded_header = rspamd_header_value_fold(
  1389. "DKIM-Signature", strlen("DKIM-Signature"),
  1390. dkim_sig->str, dkim_sig->len,
  1391. 80, RSPAMD_TASK_NEWLINES_LF, NULL);
  1392. }
  1393. else {
  1394. folded_header = rspamd_header_value_fold(
  1395. "DKIM-Signature", strlen("DKIM-Signature"),
  1396. dkim_sig->str, dkim_sig->len,
  1397. 80, MESSAGE_FIELD(task, nlines_type),
  1398. NULL);
  1399. }
  1400. ucl_object_insert_key(top,
  1401. ucl_object_fromstring_common(folded_header->str,
  1402. folded_header->len, UCL_STRING_RAW),
  1403. "dkim-signature", 0, false);
  1404. g_string_free(folded_header, TRUE);
  1405. }
  1406. }
  1407. }
  1408. if (flags & RSPAMD_PROTOCOL_RMILTER) {
  1409. milter_reply = rspamd_mempool_get_variable(task->task_pool,
  1410. RSPAMD_MEMPOOL_MILTER_REPLY);
  1411. if (milter_reply) {
  1412. if (task->cmd != CMD_CHECK) {
  1413. ucl_object_insert_key(top, ucl_object_ref(milter_reply),
  1414. "milter", 0, false);
  1415. }
  1416. else {
  1417. ucl_object_insert_key(top, ucl_object_ref(milter_reply),
  1418. "rmilter", 0, false);
  1419. }
  1420. }
  1421. }
  1422. return top;
  1423. }
  1424. void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
  1425. struct rspamd_task *task, ucl_object_t **pobj)
  1426. {
  1427. struct rspamd_scan_result *metric_res;
  1428. const struct rspamd_re_cache_stat *restat;
  1429. ucl_object_t *top = NULL;
  1430. rspamd_fstring_t *reply;
  1431. gint flags = RSPAMD_PROTOCOL_DEFAULT;
  1432. struct rspamd_action *action;
  1433. /* Removed in 2.0 */
  1434. #if 0
  1435. GHashTableIter hiter;
  1436. gpointer h, v;
  1437. /* Write custom headers */
  1438. g_hash_table_iter_init (&hiter, task->reply_headers);
  1439. while (g_hash_table_iter_next (&hiter, &h, &v)) {
  1440. rspamd_ftok_t *hn = h, *hv = v;
  1441. rspamd_http_message_add_header (msg, hn->begin, hv->begin);
  1442. }
  1443. #endif
  1444. flags |= RSPAMD_PROTOCOL_URLS;
  1445. top = rspamd_protocol_write_ucl(task, flags);
  1446. if (pobj) {
  1447. *pobj = top;
  1448. }
  1449. if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
  1450. rspamd_roll_history_update(task->worker->srv->history, task);
  1451. }
  1452. else {
  1453. msg_debug_protocol("skip history update due to no log flag");
  1454. }
  1455. rspamd_task_write_log(task);
  1456. if (task->cfg->log_flags & RSPAMD_LOG_FLAG_RE_CACHE) {
  1457. restat = rspamd_re_cache_get_stat(task->re_rt);
  1458. g_assert(restat != NULL);
  1459. msg_notice_task(
  1460. "regexp statistics: %ud pcre regexps scanned, %ud regexps matched,"
  1461. " %ud regexps total, %ud regexps cached,"
  1462. " %HL scanned using pcre, %HL scanned total",
  1463. restat->regexp_checked,
  1464. restat->regexp_matched,
  1465. restat->regexp_total,
  1466. restat->regexp_fast_cached,
  1467. restat->bytes_scanned_pcre,
  1468. restat->bytes_scanned);
  1469. }
  1470. reply = rspamd_fstring_sized_new(1000);
  1471. if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC(task)) {
  1472. msg_debug_protocol("writing json reply");
  1473. rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply);
  1474. }
  1475. else {
  1476. if (RSPAMD_TASK_IS_SPAMC(task)) {
  1477. msg_debug_protocol("writing spamc legacy reply to client");
  1478. rspamd_ucl_tospamc_output(top, &reply);
  1479. }
  1480. else {
  1481. msg_debug_protocol("writing rspamc legacy reply to client");
  1482. rspamd_ucl_torspamc_output(top, &reply);
  1483. }
  1484. }
  1485. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK) {
  1486. /* Check if we need to insert a body block */
  1487. if (task->flags & RSPAMD_TASK_FLAG_MESSAGE_REWRITE) {
  1488. GString *hdr_offset = g_string_sized_new(30);
  1489. rspamd_printf_gstring(hdr_offset, "%z", RSPAMD_FSTRING_LEN(reply));
  1490. rspamd_http_message_add_header(msg, MESSAGE_OFFSET_HEADER,
  1491. hdr_offset->str);
  1492. msg_debug_protocol("write body block at position %s",
  1493. hdr_offset->str);
  1494. g_string_free(hdr_offset, TRUE);
  1495. /* In case of milter, we append just body, otherwise - full message */
  1496. if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_MILTER) {
  1497. const gchar *start;
  1498. goffset len, hdr_off;
  1499. start = task->msg.begin;
  1500. len = task->msg.len;
  1501. hdr_off = MESSAGE_FIELD(task, raw_headers_content).len;
  1502. if (hdr_off < len) {
  1503. start += hdr_off;
  1504. len -= hdr_off;
  1505. /* The problem here is that we need not end of headers, we need
  1506. * start of body.
  1507. *
  1508. * Hence, we need to skip one \r\n till there is anything else in
  1509. * a line.
  1510. */
  1511. if (*start == '\r' && len > 0) {
  1512. start++;
  1513. len--;
  1514. }
  1515. if (*start == '\n' && len > 0) {
  1516. start++;
  1517. len--;
  1518. }
  1519. msg_debug_protocol("milter version of body block size %d",
  1520. (int) len);
  1521. reply = rspamd_fstring_append(reply, start, len);
  1522. }
  1523. }
  1524. else {
  1525. msg_debug_protocol("general version of body block size %d",
  1526. (int) task->msg.len);
  1527. reply = rspamd_fstring_append(reply,
  1528. task->msg.begin, task->msg.len);
  1529. }
  1530. }
  1531. }
  1532. if ((task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) &&
  1533. rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
  1534. /* We can compress output */
  1535. ZSTD_inBuffer zin;
  1536. ZSTD_outBuffer zout;
  1537. ZSTD_CStream *zstream;
  1538. rspamd_fstring_t *compressed_reply;
  1539. gsize r;
  1540. zstream = task->cfg->libs_ctx->out_zstream;
  1541. compressed_reply = rspamd_fstring_sized_new(ZSTD_compressBound(reply->len));
  1542. zin.pos = 0;
  1543. zin.src = reply->str;
  1544. zin.size = reply->len;
  1545. zout.pos = 0;
  1546. zout.dst = compressed_reply->str;
  1547. zout.size = compressed_reply->allocated;
  1548. while (zin.pos < zin.size) {
  1549. r = ZSTD_compressStream(zstream, &zout, &zin);
  1550. if (ZSTD_isError(r)) {
  1551. msg_err_protocol("cannot compress: %s", ZSTD_getErrorName(r));
  1552. rspamd_fstring_free(compressed_reply);
  1553. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1554. goto end;
  1555. }
  1556. }
  1557. ZSTD_flushStream(zstream, &zout);
  1558. r = ZSTD_endStream(zstream, &zout);
  1559. if (ZSTD_isError(r)) {
  1560. msg_err_protocol("cannot finalize compress: %s", ZSTD_getErrorName(r));
  1561. rspamd_fstring_free(compressed_reply);
  1562. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1563. goto end;
  1564. }
  1565. msg_info_protocol("writing compressed results: %z bytes before "
  1566. "%z bytes after",
  1567. zin.pos, zout.pos);
  1568. compressed_reply->len = zout.pos;
  1569. rspamd_fstring_free(reply);
  1570. rspamd_http_message_set_body_from_fstring_steal(msg, compressed_reply);
  1571. rspamd_http_message_add_header(msg, COMPRESSION_HEADER, "zstd");
  1572. if (task->cfg->libs_ctx->out_dict &&
  1573. task->cfg->libs_ctx->out_dict->id != 0) {
  1574. gchar dict_str[32];
  1575. rspamd_snprintf(dict_str, sizeof(dict_str), "%ud",
  1576. task->cfg->libs_ctx->out_dict->id);
  1577. rspamd_http_message_add_header(msg, "Dictionary", dict_str);
  1578. }
  1579. }
  1580. else {
  1581. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1582. }
  1583. end:
  1584. if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) {
  1585. /* Update stat for default metric */
  1586. msg_debug_protocol("skip stats update due to no_stat flag");
  1587. metric_res = task->result;
  1588. if (metric_res != NULL) {
  1589. action = rspamd_check_action_metric(task, NULL, NULL);
  1590. /* TODO: handle custom actions in stats */
  1591. if (action->action_type == METRIC_ACTION_SOFT_REJECT &&
  1592. (task->flags & RSPAMD_TASK_FLAG_GREYLISTED)) {
  1593. /* Set stat action to greylist to display greylisted messages */
  1594. #ifndef HAVE_ATOMIC_BUILTINS
  1595. task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST]++;
  1596. #else
  1597. __atomic_add_fetch(&task->worker->srv->stat->actions_stat[METRIC_ACTION_GREYLIST],
  1598. 1, __ATOMIC_RELEASE);
  1599. #endif
  1600. }
  1601. else if (action->action_type < METRIC_ACTION_MAX) {
  1602. #ifndef HAVE_ATOMIC_BUILTINS
  1603. task->worker->srv->stat->actions_stat[action->action_type]++;
  1604. #else
  1605. __atomic_add_fetch(&task->worker->srv->stat->actions_stat[action->action_type],
  1606. 1, __ATOMIC_RELEASE);
  1607. #endif
  1608. }
  1609. }
  1610. /* Increase counters */
  1611. #ifndef HAVE_ATOMIC_BUILTINS
  1612. task->worker->srv->stat->messages_scanned++;
  1613. #else
  1614. __atomic_add_fetch(&task->worker->srv->stat->messages_scanned,
  1615. 1, __ATOMIC_RELEASE);
  1616. #endif
  1617. /* Set average processing time */
  1618. guint32 slot;
  1619. float processing_time = task->time_real_finish - task->task_timestamp;
  1620. #ifndef HAVE_ATOMIC_BUILTINS
  1621. slot = task->worker->srv->stat->avg_time.cur_slot++;
  1622. #else
  1623. slot = __atomic_fetch_add(&task->worker->srv->stat->avg_time.cur_slot,
  1624. 1, __ATOMIC_RELEASE);
  1625. #endif
  1626. slot = slot % MAX_AVG_TIME_SLOTS;
  1627. /* TODO: this should be atomic but it is not supported in C */
  1628. task->worker->srv->stat->avg_time.avg_time[slot] = processing_time;
  1629. }
  1630. }
  1631. void rspamd_protocol_write_log_pipe(struct rspamd_task *task)
  1632. {
  1633. struct rspamd_worker_log_pipe *lp;
  1634. struct rspamd_protocol_log_message_sum *ls;
  1635. lua_State *L = task->cfg->lua_state;
  1636. struct rspamd_scan_result *mres;
  1637. struct rspamd_symbol_result *sym;
  1638. gint id, i;
  1639. guint32 n = 0, nextra = 0;
  1640. gsize sz;
  1641. GArray *extra;
  1642. struct rspamd_protocol_log_symbol_result er;
  1643. struct rspamd_task **ptask;
  1644. /* Get extra results from lua plugins */
  1645. extra = g_array_new(FALSE, FALSE, sizeof(er));
  1646. lua_getglobal(L, "rspamd_plugins");
  1647. if (lua_istable(L, -1)) {
  1648. lua_pushnil(L);
  1649. while (lua_next(L, -2)) {
  1650. if (lua_istable(L, -1)) {
  1651. lua_pushvalue(L, -2);
  1652. /* stack:
  1653. * -1: copy of key
  1654. * -2: value (module table)
  1655. * -3: key (module name)
  1656. * -4: global
  1657. */
  1658. lua_pushstring(L, "log_callback");
  1659. lua_gettable(L, -3);
  1660. /* stack:
  1661. * -1: func
  1662. * -2: copy of key
  1663. * -3: value (module table)
  1664. * -3: key (module name)
  1665. * -4: global
  1666. */
  1667. if (lua_isfunction(L, -1)) {
  1668. ptask = lua_newuserdata(L, sizeof(*ptask));
  1669. *ptask = task;
  1670. rspamd_lua_setclass(L, rspamd_task_classname, -1);
  1671. /* stack:
  1672. * -1: task
  1673. * -2: func
  1674. * -3: key copy
  1675. * -4: value (module table)
  1676. * -5: key (module name)
  1677. * -6: global
  1678. */
  1679. msg_debug_protocol("calling for %s", lua_tostring(L, -3));
  1680. if (lua_pcall(L, 1, 1, 0) != 0) {
  1681. msg_info_protocol("call to log callback %s failed: %s",
  1682. lua_tostring(L, -2), lua_tostring(L, -1));
  1683. lua_pop(L, 1);
  1684. /* stack:
  1685. * -1: key copy
  1686. * -2: value
  1687. * -3: key
  1688. */
  1689. }
  1690. else {
  1691. /* stack:
  1692. * -1: result
  1693. * -2: key copy
  1694. * -3: value
  1695. * -4: key
  1696. */
  1697. if (lua_istable(L, -1)) {
  1698. /* Another iteration */
  1699. lua_pushnil(L);
  1700. while (lua_next(L, -2)) {
  1701. /* stack:
  1702. * -1: value
  1703. * -2: key
  1704. * -3: result table (pcall)
  1705. * -4: key copy (parent)
  1706. * -5: value (parent)
  1707. * -6: key (parent)
  1708. */
  1709. if (lua_istable(L, -1)) {
  1710. er.id = 0;
  1711. er.score = 0.0;
  1712. lua_rawgeti(L, -1, 1);
  1713. if (lua_isnumber(L, -1)) {
  1714. er.id = lua_tonumber(L, -1);
  1715. }
  1716. lua_rawgeti(L, -2, 2);
  1717. if (lua_isnumber(L, -1)) {
  1718. er.score = lua_tonumber(L, -1);
  1719. }
  1720. /* stack:
  1721. * -1: value[2]
  1722. * -2: value[1]
  1723. * -3: values
  1724. * -4: key
  1725. * -5: result table (pcall)
  1726. * -6: key copy (parent)
  1727. * -7: value (parent)
  1728. * -8: key (parent)
  1729. */
  1730. lua_pop(L, 2); /* Values */
  1731. g_array_append_val(extra, er);
  1732. }
  1733. lua_pop(L, 1); /* Value for lua_next */
  1734. }
  1735. lua_pop(L, 1); /* Table result of pcall */
  1736. }
  1737. else {
  1738. msg_info_protocol("call to log callback %s returned "
  1739. "wrong type: %s",
  1740. lua_tostring(L, -2),
  1741. lua_typename(L, lua_type(L, -1)));
  1742. lua_pop(L, 1); /* Returned error */
  1743. }
  1744. }
  1745. }
  1746. else {
  1747. lua_pop(L, 1);
  1748. /* stack:
  1749. * -1: key copy
  1750. * -2: value
  1751. * -3: key
  1752. */
  1753. }
  1754. }
  1755. lua_pop(L, 2); /* Top table + key copy */
  1756. }
  1757. lua_pop(L, 1); /* rspamd_plugins global */
  1758. }
  1759. else {
  1760. lua_pop(L, 1);
  1761. }
  1762. nextra = extra->len;
  1763. LL_FOREACH(task->cfg->log_pipes, lp)
  1764. {
  1765. if (lp->fd != -1) {
  1766. switch (lp->type) {
  1767. case RSPAMD_LOG_PIPE_SYMBOLS:
  1768. mres = task->result;
  1769. if (mres) {
  1770. n = kh_size(mres->symbols);
  1771. sz = sizeof(*ls) +
  1772. sizeof(struct rspamd_protocol_log_symbol_result) *
  1773. (n + nextra);
  1774. ls = g_malloc0(sz);
  1775. /* Handle settings id */
  1776. if (task->settings_elt) {
  1777. ls->settings_id = task->settings_elt->id;
  1778. }
  1779. else {
  1780. ls->settings_id = 0;
  1781. }
  1782. ls->score = mres->score;
  1783. ls->required_score = rspamd_task_get_required_score(task,
  1784. mres);
  1785. ls->nresults = n;
  1786. ls->nextra = nextra;
  1787. i = 0;
  1788. kh_foreach_value(mres->symbols, sym, {
  1789. id = rspamd_symcache_find_symbol(task->cfg->cache,
  1790. sym->name);
  1791. if (id >= 0) {
  1792. ls->results[i].id = id;
  1793. ls->results[i].score = sym->score;
  1794. }
  1795. else {
  1796. ls->results[i].id = -1;
  1797. ls->results[i].score = 0.0;
  1798. }
  1799. i++;
  1800. });
  1801. memcpy(&ls->results[n], extra->data, nextra * sizeof(er));
  1802. }
  1803. else {
  1804. sz = sizeof(*ls);
  1805. ls = g_malloc0(sz);
  1806. ls->nresults = 0;
  1807. }
  1808. /* We don't really care about return value here */
  1809. if (write(lp->fd, ls, sz) == -1) {
  1810. msg_info_protocol("cannot write to log pipe: %s",
  1811. strerror(errno));
  1812. }
  1813. g_free(ls);
  1814. break;
  1815. default:
  1816. msg_err_protocol("unknown log format %d", lp->type);
  1817. break;
  1818. }
  1819. }
  1820. }
  1821. g_array_free(extra, TRUE);
  1822. }
  1823. void rspamd_protocol_write_reply(struct rspamd_task *task, ev_tstamp timeout)
  1824. {
  1825. struct rspamd_http_message *msg;
  1826. const gchar *ctype = "application/json";
  1827. rspamd_fstring_t *reply;
  1828. msg = rspamd_http_new_message(HTTP_RESPONSE);
  1829. if (rspamd_http_connection_is_encrypted(task->http_conn)) {
  1830. msg_info_protocol("<%s> writing encrypted reply",
  1831. MESSAGE_FIELD_CHECK(task, message_id));
  1832. }
  1833. /* Compatibility */
  1834. if (task->cmd == CMD_CHECK_RSPAMC) {
  1835. msg->method = HTTP_SYMBOLS;
  1836. }
  1837. else if (task->cmd == CMD_CHECK_SPAMC) {
  1838. msg->method = HTTP_SYMBOLS;
  1839. msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
  1840. }
  1841. if (task->err != NULL) {
  1842. msg_debug_protocol("writing error reply to client");
  1843. ucl_object_t *top = NULL;
  1844. top = ucl_object_typed_new(UCL_OBJECT);
  1845. msg->code = 500 + task->err->code % 100;
  1846. msg->status = rspamd_fstring_new_init(task->err->message,
  1847. strlen(task->err->message));
  1848. ucl_object_insert_key(top, ucl_object_fromstring(task->err->message),
  1849. "error", 0, false);
  1850. ucl_object_insert_key(top,
  1851. ucl_object_fromstring(g_quark_to_string(task->err->domain)),
  1852. "error_domain", 0, false);
  1853. reply = rspamd_fstring_sized_new(256);
  1854. rspamd_ucl_emit_fstring(top, UCL_EMIT_JSON_COMPACT, &reply);
  1855. ucl_object_unref(top);
  1856. /* We also need to validate utf8 */
  1857. if (rspamd_fast_utf8_validate(reply->str, reply->len) != 0) {
  1858. gsize valid_len;
  1859. gchar *validated;
  1860. /* We copy reply several times here but it should be a rare case */
  1861. validated = rspamd_str_make_utf_valid(reply->str, reply->len,
  1862. &valid_len, task->task_pool);
  1863. rspamd_http_message_set_body(msg, validated, valid_len);
  1864. rspamd_fstring_free(reply);
  1865. }
  1866. else {
  1867. rspamd_http_message_set_body_from_fstring_steal(msg, reply);
  1868. }
  1869. }
  1870. else {
  1871. msg->status = rspamd_fstring_new_init("OK", 2);
  1872. switch (task->cmd) {
  1873. case CMD_CHECK:
  1874. case CMD_CHECK_RSPAMC:
  1875. case CMD_CHECK_SPAMC:
  1876. case CMD_SKIP:
  1877. case CMD_CHECK_V2:
  1878. rspamd_protocol_http_reply(msg, task, NULL);
  1879. rspamd_protocol_write_log_pipe(task);
  1880. break;
  1881. case CMD_PING:
  1882. msg_debug_protocol("writing pong to client");
  1883. rspamd_http_message_set_body(msg, "pong" CRLF, 6);
  1884. ctype = "text/plain";
  1885. break;
  1886. default:
  1887. msg_err_protocol("BROKEN");
  1888. break;
  1889. }
  1890. }
  1891. ev_now_update(task->event_loop);
  1892. msg->date = ev_time();
  1893. rspamd_http_connection_reset(task->http_conn);
  1894. rspamd_http_connection_write_message(task->http_conn, msg, NULL,
  1895. ctype, task, timeout);
  1896. task->processed_stages |= RSPAMD_TASK_STAGE_REPLIED;
  1897. }