You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

protocol.c 27KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "util.h"
  19. #include "cfg_file.h"
  20. #include "cfg_rcl.h"
  21. #include "message.h"
  22. #include "utlist.h"
  23. #include "http.h"
  24. /* Max line size */
  25. #define OUTBUFSIZ BUFSIZ
  26. /*
  27. * Just check if the passed message is spam or not and reply as
  28. * described below
  29. */
  30. #define MSG_CMD_CHECK "check"
  31. /*
  32. * Check if message is spam or not, and return score plus list
  33. * of symbols hit
  34. */
  35. #define MSG_CMD_SYMBOLS "symbols"
  36. /*
  37. * Check if message is spam or not, and return score plus report
  38. */
  39. #define MSG_CMD_REPORT "report"
  40. /*
  41. * Check if message is spam or not, and return score plus report
  42. * if the message is spam
  43. */
  44. #define MSG_CMD_REPORT_IFSPAM "report_ifspam"
  45. /*
  46. * Ignore this message -- client opened connection then changed
  47. */
  48. #define MSG_CMD_SKIP "skip"
  49. /*
  50. * Return a confirmation that spamd is alive
  51. */
  52. #define MSG_CMD_PING "ping"
  53. /*
  54. * Process this message as described above and return modified message
  55. */
  56. #define MSG_CMD_PROCESS "process"
  57. /*
  58. * Learn specified statfile using message
  59. */
  60. #define MSG_CMD_LEARN "learn"
  61. /*
  62. * spamassassin greeting:
  63. */
  64. #define SPAMC_GREETING "SPAMC"
  65. /*
  66. * rspamd greeting:
  67. */
  68. #define RSPAMC_GREETING "RSPAMC"
  69. /*
  70. * Headers
  71. */
  72. #define CONTENT_LENGTH_HEADER "Content-length"
  73. #define HELO_HEADER "Helo"
  74. #define FROM_HEADER "From"
  75. #define IP_ADDR_HEADER "IP"
  76. #define NRCPT_HEADER "Recipient-Number"
  77. #define RCPT_HEADER "Rcpt"
  78. #define SUBJECT_HEADER "Subject"
  79. #define STATFILE_HEADER "Statfile"
  80. #define QUEUE_ID_HEADER "Queue-ID"
  81. #define ERROR_HEADER "Error"
  82. #define USER_HEADER "User"
  83. #define URLS_HEADER "URL-Format"
  84. #define PASS_HEADER "Pass"
  85. #define JSON_HEADER "Json"
  86. #define HOSTNAME_HEADER "Hostname"
  87. #define DELIVER_TO_HEADER "Deliver-To"
  88. #define NO_LOG_HEADER "Log"
  89. #define MLEN_HEADER "Message-Length"
  90. static GList *custom_commands = NULL;
  91. static GQuark
  92. rspamd_protocol_quark (void)
  93. {
  94. return g_quark_from_static_string ("protocol-error");
  95. }
  96. /*
  97. * Remove <> from the fixed string and copy it to the pool
  98. */
  99. static gchar *
  100. rspamd_protocol_escape_braces (struct rspamd_task *task, rspamd_fstring_t *in)
  101. {
  102. guint nchars = 0;
  103. const gchar *p;
  104. rspamd_ftok_t tok;
  105. g_assert (in != NULL);
  106. g_assert (in->len > 0);
  107. p = in->str;
  108. while ((g_ascii_isspace (*p) || *p == '<') && nchars < in->len) {
  109. p++;
  110. nchars ++;
  111. }
  112. tok.begin = p;
  113. p = in->str + in->len - 1;
  114. tok.len = in->len - nchars;
  115. while ((!g_ascii_isspace (*p) && *p !=
  116. '>') && tok.len > 0) {
  117. p--;
  118. tok.len --;
  119. }
  120. return rspamd_mempool_ftokdup (task->task_pool, &tok);
  121. }
  122. static gboolean
  123. rspamd_protocol_handle_url (struct rspamd_task *task,
  124. struct rspamd_http_message *msg)
  125. {
  126. GList *cur;
  127. GHashTable *query_args;
  128. GHashTableIter it;
  129. struct custom_command *cmd;
  130. struct http_parser_url u;
  131. const gchar *p;
  132. gsize pathlen;
  133. rspamd_ftok_t *key, *value;
  134. gpointer k, v;
  135. if (msg->url == NULL || msg->url->len == 0) {
  136. g_set_error (&task->err, rspamd_protocol_quark(), 400, "missing command");
  137. return FALSE;
  138. }
  139. if (http_parser_parse_url (msg->url->str, msg->url->len, 0, &u) != 0) {
  140. g_set_error (&task->err, rspamd_protocol_quark(), 400, "bad request URL");
  141. return FALSE;
  142. }
  143. if (!(u.field_set & (1 << UF_PATH))) {
  144. g_set_error (&task->err, rspamd_protocol_quark(), 400,
  145. "bad request URL: missing path");
  146. return FALSE;
  147. }
  148. p = msg->url->str + u.field_data[UF_PATH].off;
  149. pathlen = u.field_data[UF_PATH].len;
  150. if (*p == '/') {
  151. p ++;
  152. pathlen --;
  153. }
  154. switch (*p) {
  155. case 'c':
  156. case 'C':
  157. /* check */
  158. if (g_ascii_strncasecmp (p, MSG_CMD_CHECK, pathlen) == 0) {
  159. task->cmd = CMD_CHECK;
  160. }
  161. else {
  162. goto err;
  163. }
  164. break;
  165. case 's':
  166. case 'S':
  167. /* symbols, skip */
  168. if (g_ascii_strncasecmp (p, MSG_CMD_SYMBOLS, pathlen) == 0) {
  169. task->cmd = CMD_SYMBOLS;
  170. }
  171. else if (g_ascii_strncasecmp (p, MSG_CMD_SKIP, pathlen) == 0) {
  172. task->cmd = CMD_SKIP;
  173. }
  174. else {
  175. goto err;
  176. }
  177. break;
  178. case 'p':
  179. case 'P':
  180. /* ping, process */
  181. if (g_ascii_strncasecmp (p, MSG_CMD_PING, pathlen) == 0) {
  182. task->cmd = CMD_PING;
  183. }
  184. else if (g_ascii_strncasecmp (p, MSG_CMD_PROCESS, pathlen) == 0) {
  185. task->cmd = CMD_PROCESS;
  186. }
  187. else {
  188. goto err;
  189. }
  190. break;
  191. case 'r':
  192. case 'R':
  193. /* report, report_ifspam */
  194. if (g_ascii_strncasecmp (p, MSG_CMD_REPORT, pathlen) == 0) {
  195. task->cmd = CMD_REPORT;
  196. }
  197. else if (g_ascii_strncasecmp (p, MSG_CMD_REPORT_IFSPAM,
  198. pathlen) == 0) {
  199. task->cmd = CMD_REPORT_IFSPAM;
  200. }
  201. else {
  202. goto err;
  203. }
  204. break;
  205. default:
  206. cur = custom_commands;
  207. while (cur) {
  208. cmd = cur->data;
  209. if (g_ascii_strncasecmp (p, cmd->name, pathlen) == 0) {
  210. task->cmd = CMD_OTHER;
  211. task->custom_cmd = cmd;
  212. break;
  213. }
  214. cur = g_list_next (cur);
  215. }
  216. if (cur == NULL) {
  217. goto err;
  218. }
  219. break;
  220. }
  221. if (u.field_set & (1 << UF_QUERY)) {
  222. /* In case if we have a query, we need to store it somewhere */
  223. query_args = rspamd_http_message_parse_query (msg);
  224. /* Insert the rest of query params as HTTP headers */
  225. g_hash_table_iter_init (&it, query_args);
  226. while (g_hash_table_iter_next (&it, &k, &v)) {
  227. key = k;
  228. value = v;
  229. /* Steal strings */
  230. g_hash_table_iter_steal (&it);
  231. g_hash_table_replace (task->request_headers, key, value);
  232. msg_debug_task ("added header \"%T\" -> \"%T\" from HTTP query",
  233. key, value);
  234. }
  235. g_hash_table_unref (query_args);
  236. }
  237. return TRUE;
  238. err:
  239. g_set_error (&task->err, rspamd_protocol_quark(), 400, "invalid command: %*.s",
  240. (gint)pathlen, p);
  241. return FALSE;
  242. }
  243. #define IF_HEADER(name) \
  244. srch.begin = (name); \
  245. srch.len = sizeof (name) - 1; \
  246. if (rspamd_ftok_casecmp (hn_tok, &srch) == 0)
  247. gboolean
  248. rspamd_protocol_handle_headers (struct rspamd_task *task,
  249. struct rspamd_http_message *msg)
  250. {
  251. rspamd_fstring_t *hn, *hv;
  252. rspamd_ftok_t *hn_tok, *hv_tok, srch;
  253. gboolean fl, has_ip = FALSE;
  254. struct rspamd_http_header *h;
  255. LL_FOREACH (msg->headers, h)
  256. {
  257. hn = rspamd_fstring_new_init (h->name->begin, h->name->len);
  258. hv = rspamd_fstring_new_init (h->value->begin, h->value->len);
  259. hn_tok = rspamd_ftok_map (hn);
  260. hv_tok = rspamd_ftok_map (hv);
  261. g_hash_table_replace (task->request_headers, hn_tok, hv_tok);
  262. switch (*hn_tok->begin) {
  263. case 'd':
  264. case 'D':
  265. IF_HEADER (DELIVER_TO_HEADER) {
  266. task->deliver_to = rspamd_protocol_escape_braces (task, hv);
  267. debug_task ("read deliver-to header, value: %s",
  268. task->deliver_to);
  269. }
  270. else {
  271. debug_task ("wrong header: %V", hn);
  272. }
  273. break;
  274. case 'h':
  275. case 'H':
  276. IF_HEADER (HELO_HEADER) {
  277. task->helo = rspamd_mempool_ftokdup (task->task_pool, hv_tok);
  278. debug_task ("read helo header, value: %s", task->helo);
  279. }
  280. IF_HEADER (HOSTNAME_HEADER) {
  281. task->hostname = rspamd_mempool_ftokdup (task->task_pool,
  282. hv_tok);
  283. debug_task ("read hostname header, value: %s", task->hostname);
  284. }
  285. break;
  286. case 'f':
  287. case 'F':
  288. IF_HEADER (FROM_HEADER) {
  289. if (!rspamd_task_add_sender (task,
  290. rspamd_mempool_ftokdup (task->task_pool, hv_tok))) {
  291. msg_err_task ("bad from header: '%V'", hv);
  292. }
  293. }
  294. else {
  295. debug_task ("wrong header: %V", hn);
  296. }
  297. break;
  298. case 'j':
  299. case 'J':
  300. IF_HEADER (JSON_HEADER) {
  301. fl = rspamd_config_parse_flag (hv->str, hv->len);
  302. if (fl) {
  303. task->flags |= RSPAMD_TASK_FLAG_JSON;
  304. }
  305. else {
  306. task->flags &= ~RSPAMD_TASK_FLAG_JSON;
  307. }
  308. }
  309. else {
  310. debug_task ("wrong header: %V", hn);
  311. }
  312. break;
  313. case 'q':
  314. case 'Q':
  315. IF_HEADER (QUEUE_ID_HEADER) {
  316. task->queue_id = rspamd_mempool_ftokdup (task->task_pool,
  317. hv_tok);
  318. debug_task ("read queue_id header, value: %s", task->queue_id);
  319. }
  320. else {
  321. debug_task ("wrong header: %V", hn);
  322. }
  323. break;
  324. case 'r':
  325. case 'R':
  326. IF_HEADER (RCPT_HEADER) {
  327. if (!rspamd_task_add_recipient (task,
  328. rspamd_mempool_ftokdup (task->task_pool, hv_tok))) {
  329. msg_err_task ("bad from header: '%T'", h->value);
  330. }
  331. debug_task ("read rcpt header, value: %V", hv);
  332. }
  333. else {
  334. debug_task ("wrong header: %V", hn);
  335. }
  336. break;
  337. case 'i':
  338. case 'I':
  339. IF_HEADER (IP_ADDR_HEADER) {
  340. if (!rspamd_parse_inet_address (&task->from_addr, hv->str, hv->len)) {
  341. msg_err_task ("bad ip header: '%V'", hv);
  342. return FALSE;
  343. }
  344. debug_task ("read IP header, value: %V", hv);
  345. has_ip = TRUE;
  346. }
  347. else {
  348. debug_task ("wrong header: %V", hn);
  349. }
  350. break;
  351. case 'p':
  352. case 'P':
  353. IF_HEADER (PASS_HEADER) {
  354. srch.begin = "all";
  355. srch.len = 3;
  356. if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
  357. task->flags |= RSPAMD_TASK_FLAG_PASS_ALL;
  358. debug_task ("pass all filters");
  359. }
  360. }
  361. break;
  362. case 's':
  363. case 'S':
  364. IF_HEADER (SUBJECT_HEADER) {
  365. task->subject = rspamd_mempool_ftokdup (task->task_pool, hv_tok);
  366. }
  367. break;
  368. case 'u':
  369. case 'U':
  370. IF_HEADER (USER_HEADER) {
  371. /*
  372. * We must ignore User header in case of spamc, as SA has
  373. * different meaning of this header
  374. */
  375. if (!RSPAMD_TASK_IS_SPAMC (task)) {
  376. task->user = rspamd_mempool_ftokdup (task->task_pool,
  377. hv_tok);
  378. }
  379. }
  380. IF_HEADER (URLS_HEADER) {
  381. srch.begin = "extended";
  382. srch.len = 8;
  383. if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
  384. task->flags |= RSPAMD_TASK_FLAG_EXT_URLS;
  385. debug_task ("extended urls information");
  386. }
  387. }
  388. break;
  389. case 'l':
  390. case 'L':
  391. IF_HEADER (NO_LOG_HEADER) {
  392. srch.begin = "no";
  393. srch.len = 2;
  394. if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
  395. task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
  396. }
  397. }
  398. break;
  399. case 'm':
  400. case 'M':
  401. IF_HEADER (MLEN_HEADER) {
  402. if (!rspamd_strtoul (hv_tok->begin,
  403. hv_tok->len,
  404. &task->message_len)) {
  405. msg_err_task ("Invalid message length header: %V", hv);
  406. }
  407. else {
  408. task->flags |= RSPAMD_TASK_FLAG_HAS_CONTROL;
  409. }
  410. }
  411. break;
  412. default:
  413. debug_task ("unknown header: %V", hn);
  414. break;
  415. }
  416. }
  417. if (task->hostname == NULL || task->hostname[0] == '\0') {
  418. /* We assume that hostname is either "unknown" or existing */
  419. task->hostname = rspamd_mempool_strdup (task->task_pool, "unknown");
  420. }
  421. if (!has_ip) {
  422. task->flags |= RSPAMD_TASK_FLAG_NO_IP;
  423. }
  424. return TRUE;
  425. }
  426. #define BOOL_TO_FLAG(val, flags, flag) do { \
  427. if ((val)) (flags) |= (flag); \
  428. else (flags) &= ~(flag); \
  429. } while(0)
  430. gboolean
  431. rspamd_protocol_parse_task_flags (rspamd_mempool_t *pool,
  432. const ucl_object_t *obj,
  433. gpointer ud,
  434. struct rspamd_rcl_section *section,
  435. GError **err)
  436. {
  437. struct rspamd_rcl_struct_parser *pd = ud;
  438. gint *target;
  439. const gchar *key;
  440. gboolean value;
  441. target = (gint *)(((gchar *)pd->user_struct) + pd->offset);
  442. key = ucl_object_key (obj);
  443. value = ucl_object_toboolean (obj);
  444. if (key != NULL) {
  445. if (g_ascii_strcasecmp (key, "pass_all") == 0) {
  446. BOOL_TO_FLAG (value, *target, RSPAMD_TASK_FLAG_PASS_ALL);
  447. }
  448. else if (g_ascii_strcasecmp (key, "no_log") == 0) {
  449. BOOL_TO_FLAG (value, *target, RSPAMD_TASK_FLAG_NO_LOG);
  450. }
  451. }
  452. return TRUE;
  453. }
  454. static struct rspamd_rcl_section *control_parser = NULL;
  455. static void
  456. rspamd_protocol_control_parser_init (void)
  457. {
  458. struct rspamd_rcl_section *sub;
  459. if (control_parser == NULL) {
  460. sub = rspamd_rcl_add_section (&control_parser,
  461. "*",
  462. NULL,
  463. NULL,
  464. UCL_OBJECT,
  465. FALSE,
  466. TRUE);
  467. /* Default handlers */
  468. rspamd_rcl_add_default_handler (sub,
  469. "ip",
  470. rspamd_rcl_parse_struct_addr,
  471. G_STRUCT_OFFSET (struct rspamd_task, from_addr),
  472. 0,
  473. NULL);
  474. rspamd_rcl_add_default_handler (sub,
  475. "from",
  476. rspamd_rcl_parse_struct_mime_addr,
  477. G_STRUCT_OFFSET (struct rspamd_task, from_envelope),
  478. 0,
  479. NULL);
  480. rspamd_rcl_add_default_handler (sub,
  481. "rcpt",
  482. rspamd_rcl_parse_struct_mime_addr,
  483. G_STRUCT_OFFSET (struct rspamd_task, rcpt_envelope),
  484. 0,
  485. NULL);
  486. rspamd_rcl_add_default_handler (sub,
  487. "helo",
  488. rspamd_rcl_parse_struct_string,
  489. G_STRUCT_OFFSET (struct rspamd_task, helo),
  490. 0,
  491. NULL);
  492. rspamd_rcl_add_default_handler (sub,
  493. "user",
  494. rspamd_rcl_parse_struct_string,
  495. G_STRUCT_OFFSET (struct rspamd_task, user),
  496. 0,
  497. NULL);
  498. rspamd_rcl_add_default_handler (sub,
  499. "pass_all",
  500. rspamd_protocol_parse_task_flags,
  501. G_STRUCT_OFFSET (struct rspamd_task, flags),
  502. 0,
  503. NULL);
  504. rspamd_rcl_add_default_handler (sub,
  505. "json",
  506. rspamd_protocol_parse_task_flags,
  507. G_STRUCT_OFFSET (struct rspamd_task, flags),
  508. 0,
  509. NULL);
  510. }
  511. }
  512. gboolean
  513. rspamd_protocol_handle_control (struct rspamd_task *task,
  514. const ucl_object_t *control)
  515. {
  516. GError *err = NULL;
  517. rspamd_protocol_control_parser_init ();
  518. if (!rspamd_rcl_parse (control_parser, task, task->task_pool,
  519. control, &err)) {
  520. msg_warn_task ("cannot parse control block: %e", err);
  521. g_error_free (err);
  522. return FALSE;
  523. }
  524. return TRUE;
  525. }
  526. gboolean
  527. rspamd_protocol_handle_request (struct rspamd_task *task,
  528. struct rspamd_http_message *msg)
  529. {
  530. gboolean ret = TRUE;
  531. if (msg->method == HTTP_SYMBOLS) {
  532. task->cmd = CMD_SYMBOLS;
  533. task->flags &= ~RSPAMD_TASK_FLAG_JSON;
  534. }
  535. else if (msg->method == HTTP_CHECK) {
  536. task->cmd = CMD_CHECK;
  537. task->flags &= ~RSPAMD_TASK_FLAG_JSON;
  538. }
  539. else {
  540. task->flags |= RSPAMD_TASK_FLAG_JSON;
  541. ret = rspamd_protocol_handle_url (task, msg);
  542. }
  543. if (msg->flags & RSPAMD_HTTP_FLAG_SPAMC) {
  544. task->flags &= ~RSPAMD_TASK_FLAG_JSON;
  545. task->flags |= RSPAMD_TASK_FLAG_SPAMC;
  546. }
  547. return ret;
  548. }
  549. /* Structure for writing tree data */
  550. struct tree_cb_data {
  551. ucl_object_t *top;
  552. struct rspamd_task *task;
  553. };
  554. /*
  555. * Callback for writing urls
  556. */
  557. static void
  558. urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
  559. {
  560. struct tree_cb_data *cb = ud;
  561. struct rspamd_url *url = value;
  562. ucl_object_t *obj, *elt;
  563. struct rspamd_task *task = cb->task;
  564. const gchar *user_field = "unknown";
  565. gboolean has_user = FALSE;
  566. if (!(task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
  567. obj = ucl_object_fromlstring (url->string, url->urllen);
  568. }
  569. else {
  570. obj = ucl_object_typed_new (UCL_OBJECT);
  571. elt = ucl_object_fromlstring (url->string, url->urllen);
  572. ucl_object_insert_key (obj, elt, "url", 0, false);
  573. if (url->surbllen > 0) {
  574. elt = ucl_object_fromlstring (url->surbl, url->surbllen);
  575. ucl_object_insert_key (obj, elt, "surbl", 0, false);
  576. }
  577. if (url->hostlen > 0) {
  578. elt = ucl_object_fromlstring (url->host, url->hostlen);
  579. ucl_object_insert_key (obj, elt, "host", 0, false);
  580. }
  581. elt = ucl_object_frombool (url->flags & RSPAMD_URL_FLAG_PHISHED);
  582. ucl_object_insert_key (obj, elt, "phished", 0, false);
  583. }
  584. ucl_array_append (cb->top, obj);
  585. if (cb->task->cfg->log_urls) {
  586. if (task->user) {
  587. user_field = task->user;
  588. has_user = TRUE;
  589. }
  590. else if (task->from_envelope) {
  591. InternetAddress *ia;
  592. ia = internet_address_list_get_address (task->from_envelope, 0);
  593. if (ia && INTERNET_ADDRESS_IS_MAILBOX (ia)) {
  594. InternetAddressMailbox *iamb = INTERNET_ADDRESS_MAILBOX (ia);
  595. user_field = iamb->addr;
  596. }
  597. }
  598. msg_info_task ("<%s> %s: %s; ip: %s; URL: %*s",
  599. task->message_id,
  600. has_user ? "user" : "from",
  601. user_field,
  602. rspamd_inet_address_to_string (task->from_addr),
  603. url->urllen, url->string);
  604. }
  605. }
  606. static ucl_object_t *
  607. rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
  608. {
  609. struct tree_cb_data cb;
  610. ucl_object_t *obj;
  611. obj = ucl_object_typed_new (UCL_ARRAY);
  612. cb.top = obj;
  613. cb.task = task;
  614. g_hash_table_foreach (input, urls_protocol_cb, &cb);
  615. return obj;
  616. }
  617. static void
  618. emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
  619. {
  620. struct tree_cb_data *cb = ud;
  621. struct rspamd_url *url = value;
  622. ucl_object_t *obj;
  623. if (url->userlen > 0 && url->hostlen > 0 &&
  624. url->host == url->user + url->userlen + 1) {
  625. obj = ucl_object_fromlstring (url->user,
  626. url->userlen + url->hostlen + 1);
  627. ucl_array_append (cb->top, obj);
  628. }
  629. }
  630. static ucl_object_t *
  631. rspamd_emails_tree_ucl (GHashTable *input, struct rspamd_task *task)
  632. {
  633. struct tree_cb_data cb;
  634. ucl_object_t *obj;
  635. obj = ucl_object_typed_new (UCL_ARRAY);
  636. cb.top = obj;
  637. cb.task = task;
  638. g_hash_table_foreach (input, emails_protocol_cb, &cb);
  639. return obj;
  640. }
  641. /* Write new subject */
  642. static const gchar *
  643. make_rewritten_subject (struct metric *metric, struct rspamd_task *task)
  644. {
  645. static gchar subj_buf[1024];
  646. gchar *p = subj_buf, *end, *res;
  647. const gchar *s, *c;
  648. end = p + sizeof(subj_buf);
  649. c = metric->subject;
  650. if (c == NULL) {
  651. c = SPAM_SUBJECT;
  652. }
  653. s = g_mime_message_get_subject (task->message);
  654. while (p < end) {
  655. if (*c == '\0') {
  656. *p = '\0';
  657. break;
  658. }
  659. else if (*c == '%' && *(c + 1) == 's') {
  660. p += rspamd_strlcpy (p, (s != NULL) ? s : "", end - p);
  661. c += 2;
  662. }
  663. else {
  664. *p = *c++;
  665. }
  666. p++;
  667. }
  668. res = g_mime_utils_header_encode_text (subj_buf);
  669. rspamd_mempool_add_destructor (task->task_pool,
  670. (rspamd_mempool_destruct_t)g_free,
  671. res);
  672. return res;
  673. }
  674. static ucl_object_t *
  675. rspamd_str_list_ucl (GList *str_list)
  676. {
  677. ucl_object_t *top = NULL, *obj;
  678. GList *cur;
  679. top = ucl_object_typed_new (UCL_ARRAY);
  680. cur = str_list;
  681. while (cur) {
  682. obj = ucl_object_fromstring (cur->data);
  683. ucl_array_append (top, obj);
  684. cur = g_list_next (cur);
  685. }
  686. return top;
  687. }
  688. static ucl_object_t *
  689. rspamd_metric_symbol_ucl (struct rspamd_task *task, struct metric *m,
  690. struct symbol *sym)
  691. {
  692. ucl_object_t *obj = NULL;
  693. const gchar *description = NULL;
  694. if (sym->def != NULL) {
  695. description = sym->def->description;
  696. }
  697. obj = ucl_object_typed_new (UCL_OBJECT);
  698. ucl_object_insert_key (obj, ucl_object_fromstring (
  699. sym->name), "name", 0, false);
  700. ucl_object_insert_key (obj, ucl_object_fromdouble (
  701. sym->score), "score", 0, false);
  702. if (description) {
  703. ucl_object_insert_key (obj, ucl_object_fromstring (
  704. description), "description", 0, false);
  705. }
  706. if (sym->options != NULL) {
  707. ucl_object_insert_key (obj, rspamd_str_list_ucl (
  708. sym->options), "options", 0, false);
  709. }
  710. return obj;
  711. }
  712. static ucl_object_t *
  713. rspamd_metric_result_ucl (struct rspamd_task *task,
  714. struct metric_result *mres)
  715. {
  716. GHashTableIter hiter;
  717. struct symbol *sym;
  718. struct metric *m;
  719. gboolean is_spam;
  720. enum rspamd_metric_action action = METRIC_ACTION_NOACTION;
  721. ucl_object_t *obj = NULL, *sobj;;
  722. gpointer h, v;
  723. const gchar *subject;
  724. m = mres->metric;
  725. mres->action = rspamd_check_action_metric (task, mres->score,
  726. &mres->required_score, m);
  727. action = mres->action;
  728. is_spam = (action == METRIC_ACTION_REJECT);
  729. obj = ucl_object_typed_new (UCL_OBJECT);
  730. ucl_object_insert_key (obj, ucl_object_frombool (is_spam),
  731. "is_spam", 0, false);
  732. ucl_object_insert_key (obj, ucl_object_frombool (RSPAMD_TASK_IS_SKIPPED (task)),
  733. "is_skipped", 0, false);
  734. ucl_object_insert_key (obj, ucl_object_fromdouble (mres->score),
  735. "score", 0, false);
  736. ucl_object_insert_key (obj, ucl_object_fromdouble (mres->required_score),
  737. "required_score", 0, false);
  738. ucl_object_insert_key (obj,
  739. ucl_object_fromstring (rspamd_action_to_str (action)),
  740. "action", 0, false);
  741. if (action == METRIC_ACTION_REWRITE_SUBJECT) {
  742. subject = make_rewritten_subject (m, task);
  743. ucl_object_insert_key (obj, ucl_object_fromstring (subject),
  744. "subject", 0, false);
  745. }
  746. /* Now handle symbols */
  747. g_hash_table_iter_init (&hiter, mres->symbols);
  748. while (g_hash_table_iter_next (&hiter, &h, &v)) {
  749. sym = (struct symbol *)v;
  750. sobj = rspamd_metric_symbol_ucl (task, m, sym);
  751. ucl_object_insert_key (obj, sobj, h, 0, false);
  752. }
  753. return obj;
  754. }
  755. static void
  756. rspamd_ucl_torspamc_output (struct rspamd_task *task,
  757. ucl_object_t *top,
  758. rspamd_fstring_t **out)
  759. {
  760. const ucl_object_t *metric, *score,
  761. *required_score, *is_spam, *elt, *cur;
  762. ucl_object_iter_t iter = NULL;
  763. metric = ucl_object_find_key (top, DEFAULT_METRIC);
  764. if (metric != NULL) {
  765. score = ucl_object_find_key (metric, "score");
  766. required_score = ucl_object_find_key (metric, "required_score");
  767. is_spam = ucl_object_find_key (metric, "is_spam");
  768. rspamd_printf_fstring (out,
  769. "Metric: default; %s; %.2f / %.2f / 0.0\r\n",
  770. ucl_object_toboolean (is_spam) ? "True" : "False",
  771. ucl_object_todouble (score),
  772. ucl_object_todouble (required_score));
  773. elt = ucl_object_find_key (metric, "action");
  774. if (elt != NULL) {
  775. rspamd_printf_fstring (out, "Action: %s\r\n",
  776. ucl_object_tostring (elt));
  777. }
  778. iter = NULL;
  779. while ((elt = ucl_iterate_object (metric, &iter, true)) != NULL) {
  780. if (elt->type == UCL_OBJECT) {
  781. const ucl_object_t *sym_score;
  782. sym_score = ucl_object_find_key (elt, "score");
  783. rspamd_printf_fstring (out, "Symbol: %s(%.2f)\r\n",
  784. ucl_object_key (elt),
  785. ucl_object_todouble (sym_score));
  786. }
  787. }
  788. elt = ucl_object_find_key (metric, "subject");
  789. if (elt != NULL) {
  790. rspamd_printf_fstring (out, "Subject: %s\r\n",
  791. ucl_object_tostring (elt));
  792. }
  793. }
  794. elt = ucl_object_find_key (top, "messages");
  795. if (elt != NULL) {
  796. iter = NULL;
  797. while ((cur = ucl_iterate_object (elt, &iter, true)) != NULL) {
  798. if (cur->type == UCL_STRING) {
  799. rspamd_printf_fstring (out, "Message: %s\r\n",
  800. ucl_object_tostring (cur));
  801. }
  802. }
  803. }
  804. rspamd_printf_fstring (out, "Message-ID: %s\r\n", task->message_id);
  805. }
  806. static void
  807. rspamd_ucl_tospamc_output (struct rspamd_task *task,
  808. ucl_object_t *top,
  809. rspamd_fstring_t **out)
  810. {
  811. const ucl_object_t *metric, *score,
  812. *required_score, *is_spam, *elt;
  813. ucl_object_iter_t iter = NULL;
  814. rspamd_fstring_t *f;
  815. metric = ucl_object_find_key (top, DEFAULT_METRIC);
  816. if (metric != NULL) {
  817. score = ucl_object_find_key (metric, "score");
  818. required_score = ucl_object_find_key (metric, "required_score");
  819. is_spam = ucl_object_find_key (metric, "is_spam");
  820. rspamd_printf_fstring (out,
  821. "Spam: %s ; %.2f / %.2f\r\n\r\n",
  822. ucl_object_toboolean (is_spam) ? "True" : "False",
  823. ucl_object_todouble (score),
  824. ucl_object_todouble (required_score));
  825. while ((elt = ucl_iterate_object (metric, &iter, true)) != NULL) {
  826. if (elt->type == UCL_OBJECT) {
  827. rspamd_printf_fstring (out, "%s,",
  828. ucl_object_key (elt));
  829. }
  830. }
  831. /* Ugly hack, but the whole spamc is ugly */
  832. f = *out;
  833. if (f->str[f->len - 1] == ',') {
  834. f->len --;
  835. *out = rspamd_fstring_append (*out, CRLF, 2);
  836. }
  837. }
  838. }
  839. ucl_object_t *
  840. rspamd_protocol_write_ucl (struct rspamd_task *task)
  841. {
  842. struct metric_result *metric_res;
  843. ucl_object_t *top = NULL, *obj;
  844. GHashTableIter hiter;
  845. gpointer h, v;
  846. g_hash_table_iter_init (&hiter, task->results);
  847. top = ucl_object_typed_new (UCL_OBJECT);
  848. /* Convert results to an ucl object */
  849. while (g_hash_table_iter_next (&hiter, &h, &v)) {
  850. metric_res = (struct metric_result *)v;
  851. obj = rspamd_metric_result_ucl (task, metric_res);
  852. ucl_object_insert_key (top, obj, h, 0, false);
  853. }
  854. if (task->messages != NULL) {
  855. ucl_object_insert_key (top, rspamd_str_list_ucl (
  856. task->messages), "messages", 0, false);
  857. }
  858. if (task->cfg->log_urls || (task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
  859. if (g_hash_table_size (task->urls) > 0) {
  860. ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls,
  861. task), "urls", 0, false);
  862. }
  863. if (g_hash_table_size (task->emails) > 0) {
  864. ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task),
  865. "emails", 0, false);
  866. }
  867. }
  868. ucl_object_insert_key (top, ucl_object_fromstring (task->message_id),
  869. "message-id", 0, false);
  870. return top;
  871. }
  872. void
  873. rspamd_protocol_http_reply (struct rspamd_http_message *msg,
  874. struct rspamd_task *task)
  875. {
  876. struct metric_result *metric_res;
  877. GHashTableIter hiter;
  878. const struct rspamd_re_cache_stat *restat;
  879. gpointer h, v;
  880. ucl_object_t *top = NULL;
  881. gdouble required_score;
  882. gint action;
  883. /* Write custom headers */
  884. g_hash_table_iter_init (&hiter, task->reply_headers);
  885. while (g_hash_table_iter_next (&hiter, &h, &v)) {
  886. rspamd_ftok_t *hn = h, *hv = v;
  887. rspamd_http_message_add_header (msg, hn->begin, hv->begin);
  888. }
  889. top = rspamd_protocol_write_ucl (task);
  890. if (!(task->flags & RSPAMD_TASK_FLAG_NO_LOG)) {
  891. rspamd_roll_history_update (task->worker->srv->history, task);
  892. }
  893. rspamd_task_write_log (task);
  894. if (task->cfg->log_re_cache) {
  895. restat = rspamd_re_cache_get_stat (task->re_rt);
  896. g_assert (restat != NULL);
  897. msg_info_task (
  898. "regexp statistics: %ud pcre regexps scanned, %ud regexps matched,"
  899. " %HL bytes scanned using pcre, %HL bytes scanned total",
  900. restat->regexp_checked,
  901. restat->regexp_matched,
  902. restat->bytes_scanned_pcre,
  903. restat->bytes_scanned);
  904. }
  905. msg->body = rspamd_fstring_sized_new (1000);
  906. if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC (task)) {
  907. rspamd_ucl_emit_fstring (top, UCL_EMIT_JSON_COMPACT, &msg->body);
  908. }
  909. else {
  910. if (RSPAMD_TASK_IS_SPAMC (task)) {
  911. rspamd_ucl_tospamc_output (task, top, &msg->body);
  912. }
  913. else {
  914. rspamd_ucl_torspamc_output (task, top, &msg->body);
  915. }
  916. }
  917. ucl_object_unref (top);
  918. if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) {
  919. /* Update stat for default metric */
  920. metric_res = g_hash_table_lookup (task->results, DEFAULT_METRIC);
  921. if (metric_res != NULL) {
  922. action = rspamd_check_action_metric (task, metric_res->score, &required_score,
  923. metric_res->metric);
  924. if (action <= METRIC_ACTION_NOACTION) {
  925. #ifndef HAVE_ATOMIC_BUILTINS
  926. task->worker->srv->stat->actions_stat[action]++;
  927. #else
  928. __atomic_add_fetch (&task->worker->srv->stat->actions_stat[action],
  929. 1, __ATOMIC_RELEASE);
  930. #endif
  931. }
  932. }
  933. /* Increase counters */
  934. #ifndef HAVE_ATOMIC_BUILTINS
  935. task->worker->srv->stat->messages_scanned++;
  936. #else
  937. __atomic_add_fetch (&task->worker->srv->stat->messages_scanned,
  938. 1, __ATOMIC_RELEASE);
  939. #endif
  940. }
  941. }
  942. void
  943. rspamd_protocol_write_reply (struct rspamd_task *task)
  944. {
  945. struct rspamd_http_message *msg;
  946. const gchar *ctype = "application/json";
  947. msg = rspamd_http_new_message (HTTP_RESPONSE);
  948. if (rspamd_http_connection_is_encrypted (task->http_conn)) {
  949. msg_info_task ("<%s> writing encrypted reply", task->message_id);
  950. }
  951. if (!RSPAMD_TASK_IS_JSON (task)) {
  952. /* Turn compatibility on */
  953. msg->method = HTTP_SYMBOLS;
  954. }
  955. if (RSPAMD_TASK_IS_SPAMC (task)) {
  956. msg->flags |= RSPAMD_HTTP_FLAG_SPAMC;
  957. }
  958. msg->date = time (NULL);
  959. debug_task ("writing reply to client");
  960. if (task->err != NULL) {
  961. ucl_object_t *top = NULL;
  962. top = ucl_object_typed_new (UCL_OBJECT);
  963. msg->code = 500 + task->err->code % 100;
  964. msg->status = rspamd_fstring_new_init (task->err->message,
  965. strlen (task->err->message));
  966. ucl_object_insert_key (top, ucl_object_fromstring (task->err->message),
  967. "error", 0, false);
  968. ucl_object_insert_key (top,
  969. ucl_object_fromstring (g_quark_to_string (task->err->domain)),
  970. "error_domain", 0, false);
  971. msg->body = rspamd_fstring_sized_new (256);
  972. rspamd_ucl_emit_fstring (top, UCL_EMIT_JSON_COMPACT, &msg->body);
  973. ucl_object_unref (top);
  974. }
  975. else {
  976. msg->status = rspamd_fstring_new_init ("OK", 2);
  977. switch (task->cmd) {
  978. case CMD_REPORT_IFSPAM:
  979. case CMD_REPORT:
  980. case CMD_CHECK:
  981. case CMD_SYMBOLS:
  982. case CMD_PROCESS:
  983. case CMD_SKIP:
  984. rspamd_protocol_http_reply (msg, task);
  985. break;
  986. case CMD_PING:
  987. msg->body = rspamd_fstring_new_init ("pong" CRLF, 6);
  988. ctype = "text/plain";
  989. break;
  990. case CMD_OTHER:
  991. msg_err_task ("BROKEN");
  992. break;
  993. }
  994. }
  995. rspamd_http_connection_reset (task->http_conn);
  996. rspamd_http_connection_write_message (task->http_conn, msg, NULL,
  997. ctype, task, task->sock, &task->tv, task->ev_base);
  998. task->processed_stages |= RSPAMD_TASK_STAGE_REPLIED;
  999. }
  1000. void
  1001. register_protocol_command (const gchar *name, protocol_reply_func func)
  1002. {
  1003. struct custom_command *cmd;
  1004. cmd = g_malloc (sizeof (struct custom_command));
  1005. cmd->name = name;
  1006. cmd->func = func;
  1007. custom_commands = g_list_prepend (custom_commands, cmd);
  1008. }