1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441 |
- /*
- * Copyright 2024 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "mime_headers.h"
- #include "smtp_parsers.h"
- #include "mime_encoding.h"
- #include "received.h"
- #include "contrib/uthash/utlist.h"
- #include "libserver/mempool_vars_internal.h"
- #include "libserver/cfg_file.h"
- #include "libutil/util.h"
- #include <unicode/utf8.h>
-
- KHASH_INIT(rspamd_mime_headers_htb, gchar *,
- struct rspamd_mime_header *, 1,
- rspamd_strcase_hash, rspamd_strcase_equal);
-
- struct rspamd_mime_headers_table {
- khash_t(rspamd_mime_headers_htb) htb;
- ref_entry_t ref;
- };
-
- static void
- rspamd_mime_header_check_special(struct rspamd_task *task,
- struct rspamd_mime_header *rh)
- {
- uint64_t h;
- const gchar *p, *end;
- gchar *id;
- gint max_recipients = -1, len;
-
- if (task->cfg) {
- max_recipients = task->cfg->max_recipients;
- }
-
- h = rspamd_icase_hash(rh->name, strlen(rh->name), 0xdeadbabe);
-
- switch (h) {
- case 0x88705DC4D9D61ABULL: /* received */
- if (rspamd_received_header_parse(task, rh->decoded, strlen(rh->decoded), rh)) {
- rh->flags |= RSPAMD_HEADER_RECEIVED;
- }
- break;
- case 0x76F31A09F4352521ULL: /* to */
- MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
- rh->value, strlen(rh->value),
- MESSAGE_FIELD(task, rcpt_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_TO | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
- break;
- case 0x7EB117C1480B76ULL: /* cc */
- MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
- rh->value, strlen(rh->value),
- MESSAGE_FIELD(task, rcpt_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_CC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
- break;
- case 0xE4923E11C4989C8DULL: /* bcc */
- MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
- rh->value, strlen(rh->value),
- MESSAGE_FIELD(task, rcpt_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_BCC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
- break;
- case 0x41E1985EDC1CBDE4ULL: /* from */
- MESSAGE_FIELD(task, from_mime) = rspamd_email_address_from_mime(task->task_pool,
- rh->value, strlen(rh->value),
- MESSAGE_FIELD(task, from_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_FROM | RSPAMD_HEADER_SENDER | RSPAMD_HEADER_UNIQUE;
- break;
- case 0x43A558FC7C240226ULL: /* message-id */ {
-
- rh->flags = RSPAMD_HEADER_MESSAGE_ID | RSPAMD_HEADER_UNIQUE;
- p = rh->decoded;
- len = rspamd_strip_smtp_comments_inplace(rh->decoded, strlen(p));
- rh->decoded[len] = '\0'; /* Zero terminate after stripping */
- /* Strip surrounding spaces */
- rh->decoded = g_strstrip(rh->decoded);
- end = p + len;
-
- if (*p == '<') {
- p++;
- }
-
- if (end > p) {
- gchar *d;
-
- if (*(end - 1) == '>') {
- end--;
- }
-
- id = rspamd_mempool_alloc(task->task_pool, end - p + 1);
- d = id;
-
- while (p < end) {
- if (g_ascii_isgraph(*p)) {
- *d++ = *p++;
- }
- else {
- *d++ = '?';
- p++;
- }
- }
-
- *d = '\0';
-
- MESSAGE_FIELD(task, message_id) = id;
- }
-
- break;
- }
- case 0xB91D3910358E8212ULL: /* subject */
- if (MESSAGE_FIELD(task, subject) == NULL) {
- MESSAGE_FIELD(task, subject) = rh->decoded;
- }
- rh->flags = RSPAMD_HEADER_SUBJECT | RSPAMD_HEADER_UNIQUE;
- break;
- case 0xEE4AA2EAAC61D6F4ULL: /* return-path */
- if (task->from_envelope == NULL) {
- task->from_envelope = rspamd_email_address_from_smtp(rh->decoded,
- strlen(rh->decoded));
- }
- rh->flags = RSPAMD_HEADER_RETURN_PATH | RSPAMD_HEADER_UNIQUE;
- break;
- case 0xB9EEFAD2E93C2161ULL: /* delivered-to */
- if (task->deliver_to == NULL) {
- task->deliver_to = rh->decoded;
- }
- rh->flags = RSPAMD_HEADER_DELIVERED_TO;
- break;
- case 0x2EC3BFF3C393FC10ULL: /* date */
- case 0xAC0DDB1A1D214CAULL: /* sender */
- case 0x54094572367AB695ULL: /* in-reply-to */
- case 0x81CD9E9131AB6A9AULL: /* content-type */
- case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
- case 0xB3F6704CB3AD6589ULL: /* references */
- rh->flags = RSPAMD_HEADER_UNIQUE;
- break;
- }
- }
-
- static void
- rspamd_mime_header_add(struct rspamd_task *task,
- khash_t(rspamd_mime_headers_htb) * target,
- struct rspamd_mime_header **order_ptr,
- struct rspamd_mime_header *rh,
- gboolean check_special)
- {
- khiter_t k;
- struct rspamd_mime_header *ex;
- int res;
-
- k = kh_put(rspamd_mime_headers_htb, target, rh->name, &res);
-
- if (res == 0) {
- ex = kh_value(target, k);
- DL_APPEND(ex, rh);
- msg_debug_task("append raw header %s: %s", rh->name, rh->value);
- }
- else {
- kh_value(target, k) = rh;
- rh->prev = rh;
- rh->next = NULL;
- msg_debug_task("add new raw header %s: %s", rh->name, rh->value);
- }
-
- LL_PREPEND2(*order_ptr, rh, ord_next);
-
- if (check_special) {
- rspamd_mime_header_check_special(task, rh);
- }
- }
-
-
- /* Convert raw headers to a list of struct raw_header * */
- void rspamd_mime_headers_process(struct rspamd_task *task,
- struct rspamd_mime_headers_table *target,
- struct rspamd_mime_header **order_ptr,
- const gchar *in, gsize len,
- gboolean check_newlines)
- {
- struct rspamd_mime_header *nh = NULL;
- const gchar *p, *c, *end;
- gchar *tmp, *tp;
- gint state = 0, l, next_state = 100, err_state = 100, t_state;
- gboolean valid_folding = FALSE, shift_by_one = FALSE;
- guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
- guint norder = 0;
-
- p = in;
- end = p + len;
- c = p;
- memset(nlines_count, 0, sizeof(nlines_count));
- msg_debug_task("start processing headers");
-
- while (p < end) {
- /* FSM for processing headers */
- switch (state) {
- case 0:
- /* Begin processing headers */
- if (!g_ascii_isalpha(*p)) {
- /* We have some garbage at the beginning of headers, skip this line */
- state = 100;
- next_state = 0;
- }
- else {
- state = 1;
- c = p;
- }
- break;
- case 1:
- /* We got something like header's name */
- if (*p == ':') {
- nh = rspamd_mempool_alloc0(task->task_pool,
- sizeof(struct rspamd_mime_header));
- l = p - c;
- tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
- rspamd_null_safe_copy(c, l, tmp, l + 1);
- nh->name = tmp;
- nh->flags |= RSPAMD_HEADER_EMPTY_SEPARATOR;
- nh->raw_value = c;
- nh->raw_len = p - c; /* Including trailing ':' */
- p++;
- state = 2;
- c = p;
- }
- else if (g_ascii_isspace(*p)) {
- /* Not header but some garbage */
- if (target == MESSAGE_FIELD(task, raw_headers)) {
- /* Do not propagate flag from the attachments */
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- }
- state = 100;
- next_state = 0;
- }
- else {
- p++;
- }
- break;
- case 2:
- /* We got header's name, so skip any \t or spaces */
- if (*p == '\t') {
- nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
- nh->flags |= RSPAMD_HEADER_TAB_SEPARATED;
- p++;
- }
- else if (*p == ' ') {
- nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
- p++;
- }
- else if (*p == '\n' || *p == '\r') {
-
- if (check_newlines) {
- if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
- }
- else if (p + 1 < end && *(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
- }
- else {
- nlines_count[RSPAMD_TASK_NEWLINES_CR]++;
- }
- }
-
- /* Process folding */
- state = 99;
- l = p - c;
- if (l > 0) {
- tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
- rspamd_null_safe_copy(c, l, tmp, l + 1);
- nh->separator = tmp;
- }
- next_state = 3;
- err_state = 5;
- c = p;
- }
- else {
- /* Process value */
- l = p - c;
- if (l >= 0) {
- tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
- rspamd_null_safe_copy(c, l, tmp, l + 1);
- nh->separator = tmp;
- }
- c = p;
- state = 3;
- }
- break;
- case 3:
- if (*p == '\r' || *p == '\n') {
- /* Hold folding */
- if (check_newlines) {
- if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
- }
- else if (p + 1 < end && *(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
- }
- else {
- nlines_count[RSPAMD_TASK_NEWLINES_CR]++;
- }
- }
- state = 99;
- next_state = 3;
- err_state = 4;
- }
- else if (p + 1 == end) {
- state = 4;
- }
- else {
- p++;
- }
- break;
- case 4:
- /* Copy header's value */
-
- /*
- * XXX:
- * The original decision to use here null terminated
- * strings was extremely poor!
- */
- l = p - c;
- tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
- tp = tmp;
- t_state = 0;
- while (l--) {
- if (t_state == 0) {
- /* Before folding */
- if (*c == '\n' || *c == '\r') {
- t_state = 1;
- c++;
- *tp++ = ' ';
- }
- else {
- if (*c != '\0') {
- *tp++ = *c++;
- }
- else {
- c++;
- }
- }
- }
- else if (t_state == 1) {
- /* Inside folding */
- if (g_ascii_isspace(*c)) {
- c++;
- }
- else {
- t_state = 0;
- if (*c != '\0') {
- *tp++ = *c++;
- }
- else {
- c++;
- }
- }
- }
- }
- /* Strip last space that can be added by \r\n parsing */
- if (tp > tmp && *(tp - 1) == ' ') {
- tp--;
- }
-
- *tp = '\0';
- /* Strip the initial spaces that could also be added by folding */
- while (*tmp != '\0' && g_ascii_isspace(*tmp)) {
- tmp++;
- }
-
- if (p + 1 == end) {
- nh->raw_len = end - nh->raw_value;
- }
- else {
- nh->raw_len = p - nh->raw_value;
- }
-
- nh->value = tmp;
-
- gboolean broken_utf = FALSE;
-
- nh->decoded = rspamd_mime_header_decode(task->task_pool,
- nh->value, strlen(tmp), &broken_utf);
-
- if (broken_utf) {
- task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
- }
-
- if (nh->decoded == NULL) {
- /* As we strip comments in place... */
- nh->decoded = rspamd_mempool_strdup(task->task_pool, "");
- }
-
- /* We also validate utf8 and replace all non-valid utf8 chars */
- rspamd_mime_charset_utf_enforce(nh->decoded, strlen(nh->decoded));
- nh->order = norder++;
- rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines);
- nh = NULL;
- state = 0;
- break;
- case 5:
- /* Header has only name, no value */
- nh->value = rspamd_mempool_strdup(task->task_pool, "");
- nh->decoded = rspamd_mempool_strdup(task->task_pool, "");
- nh->raw_len = p - nh->raw_value;
- if (shift_by_one) {
- nh->raw_len++;
- }
- nh->order = norder++;
- rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines);
- nh = NULL;
- state = 0;
- break;
- case 99:
- /* Folding state */
- if (p + 1 == end) {
- state = err_state;
- /* Include the last character into the next header */
- shift_by_one = TRUE;
- }
- else {
- if (*p == '\r' || *p == '\n') {
- p++;
- valid_folding = FALSE;
- }
- else if (*p == '\t' || *p == ' ') {
- /* Valid folding */
- p++;
- valid_folding = TRUE;
- }
- else {
- if (valid_folding) {
- debug_task("go to state: %d->%d", state, next_state);
- state = next_state;
- }
- else {
- /* Fall back */
- debug_task("go to state: %d->%d", state, err_state);
- state = err_state;
- }
- }
- }
- break;
- case 100:
- /* Fail state, skip line */
-
- if (*p == '\r') {
- if (p + 1 < end && *(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
- p++;
- }
- p++;
- state = next_state;
- }
- else if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
-
- if (p + 1 < end && *(p + 1) == '\r') {
- p++;
- }
- p++;
- state = next_state;
- }
- else if (p + 1 == end) {
- state = next_state;
- p++;
- }
- else {
- p++;
- }
- break;
- }
- }
-
- /* Since we have prepended headers, we need to reverse the list to get the actual order */
- LL_REVERSE(*order_ptr);
-
- if (check_newlines) {
- guint max_cnt = 0;
- gint sel = 0;
- rspamd_cryptobox_hash_state_t hs;
- guchar hout[rspamd_cryptobox_HASHBYTES], *hexout;
-
- for (gint i = RSPAMD_TASK_NEWLINES_CR; i < RSPAMD_TASK_NEWLINES_MAX; i++) {
- if (nlines_count[i] > max_cnt) {
- max_cnt = nlines_count[i];
- sel = i;
- }
- }
-
- MESSAGE_FIELD(task, nlines_type) = sel;
-
- rspamd_cryptobox_hash_init(&hs, NULL, 0);
-
- LL_FOREACH(*order_ptr, nh)
- {
- if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
- rspamd_cryptobox_hash_update(&hs, nh->name, strlen(nh->name));
- }
- }
-
- rspamd_cryptobox_hash_final(&hs, hout);
- hexout = rspamd_mempool_alloc(task->task_pool, sizeof(hout) * 2 + 1);
- hexout[sizeof(hout) * 2] = '\0';
- rspamd_encode_hex_buf(hout, sizeof(hout), hexout,
- sizeof(hout) * 2 + 1);
- rspamd_mempool_set_variable(task->task_pool,
- RSPAMD_MEMPOOL_HEADERS_HASH,
- hexout, NULL);
- }
- }
-
- static void
- rspamd_mime_header_maybe_save_token(rspamd_mempool_t *pool,
- GString *out,
- GByteArray *token,
- GByteArray *decoded_token,
- rspamd_ftok_t *old_charset,
- rspamd_ftok_t *new_charset)
- {
- if (new_charset->len == 0) {
- g_assert_not_reached();
- }
-
- if (old_charset->len > 0) {
- if (rspamd_ftok_casecmp(new_charset, old_charset) == 0) {
- rspamd_ftok_t srch;
-
- /*
- * Special case for iso-2022-jp:
- * https://github.com/vstakhov/rspamd/issues/1669
- */
- RSPAMD_FTOK_ASSIGN(&srch, "iso-2022-jp");
-
- if (rspamd_ftok_casecmp(new_charset, &srch) != 0) {
- /* We can concatenate buffers, just return */
- return;
- }
- }
- }
-
- /* We need to flush and decode old token to out string */
- if (rspamd_mime_to_utf8_byte_array(token, decoded_token, pool,
- rspamd_mime_detect_charset(new_charset, pool))) {
- g_string_append_len(out, decoded_token->data, decoded_token->len);
- }
-
- /* We also reset buffer */
- g_byte_array_set_size(token, 0);
- /*
- * Propagate charset
- *
- * Here are dragons: we save the original charset to allow buffers concat
- * in the condition at the beginning of the function.
- * However, it will likely cause unnecessary calls for
- * `rspamd_mime_detect_charset` which could be relatively expensive.
- * But we ignore that for now...
- */
- memcpy(old_charset, new_charset, sizeof(*old_charset));
- }
-
- static void
- rspamd_mime_header_sanity_check(GString *str)
- {
- gsize i;
- gchar t;
-
- for (i = 0; i < str->len; i++) {
- t = str->str[i];
- if (!((t & 0x80) || g_ascii_isgraph(t))) {
- if (g_ascii_isspace(t)) {
- /* Replace spaces characters with plain space */
- str->str[i] = ' ';
- }
- else {
- str->str[i] = '?';
- }
- }
- }
- }
-
- gchar *
- rspamd_mime_header_decode(rspamd_mempool_t *pool, const gchar *in,
- gsize inlen, gboolean *invalid_utf)
- {
- GString *out;
- const guchar *c, *p, *end;
- const gchar *tok_start = NULL;
- gsize tok_len = 0, pos;
- GByteArray *token = NULL, *decoded;
- rspamd_ftok_t cur_charset = {0, NULL}, old_charset = {0, NULL};
- gint encoding;
- gssize r;
- guint qmarks = 0;
- gchar *ret;
- enum {
- parse_normal = 0,
- got_eqsign,
- got_encoded_start,
- got_more_qmark,
- skip_spaces,
- } state = parse_normal;
-
- g_assert(in != NULL);
-
- c = in;
- p = in;
- end = in + inlen;
- out = g_string_sized_new(inlen);
- token = g_byte_array_sized_new(80);
- decoded = g_byte_array_sized_new(122);
-
- while (p < end) {
- switch (state) {
- case parse_normal:
- if (*p == '=') {
- g_string_append_len(out, c, p - c);
- c = p;
- state = got_eqsign;
- }
- else if (*p >= 128) {
- gint off = 0;
- UChar32 uc;
- /* Unencoded character */
- g_string_append_len(out, c, p - c);
- /* Check if that's valid UTF8 */
- U8_NEXT(p, off, end - p, uc);
-
- if (uc <= 0) {
- c = p + 1;
- /* 0xFFFD in UTF8 */
- g_string_append_len(out, " ", 3);
- off = 0;
- U8_APPEND_UNSAFE(out->str + out->len - 3,
- off, 0xfffd);
-
- if (invalid_utf) {
- *invalid_utf = TRUE;
- }
- }
- else {
- c = p;
- p = p + off;
- continue; /* To avoid p ++ after this block */
- }
- }
- p++;
- break;
- case got_eqsign:
- if (*p == '?') {
- state = got_encoded_start;
- qmarks = 0;
- }
- else {
- g_string_append_len(out, c, 1);
- c = p;
- state = parse_normal;
- continue; /* Deal with == case */
- }
- p++;
- break;
- case got_encoded_start:
- if (*p == '?') {
- state = got_more_qmark;
- qmarks++;
-
- /* Skip multiple ? signs */
- p++;
- while (p < end && *p == '?') {
- p++;
- }
-
- continue;
- }
- p++;
- break;
- case got_more_qmark:
- if (*p == '=') {
- if (qmarks < 3) {
- state = got_encoded_start;
- }
- else {
- /* Finished encoded boundary */
- if (*c == '"') {
- /* Quoted string, non-RFC conformant but used by retards */
- c++;
- }
- if (rspamd_rfc2047_parser(c, p - c + 1, &encoding,
- &cur_charset.begin, &cur_charset.len,
- &tok_start, &tok_len)) {
- /* We have a token, so we can decode it from `encoding` */
- if (token->len > 0) {
- if (old_charset.len == 0) {
- memcpy(&old_charset, &cur_charset,
- sizeof(old_charset));
- }
-
- rspamd_mime_header_maybe_save_token(pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
-
- qmarks = 0;
- pos = token->len;
- g_byte_array_set_size(token, pos + tok_len);
-
- if (encoding == RSPAMD_RFC2047_QP) {
- r = rspamd_decode_qp2047_buf(tok_start, tok_len,
- token->data + pos, tok_len);
-
- if (r != -1) {
- token->len = pos + r;
- }
- else {
- /* Cannot decode qp */
- token->len -= tok_len;
- }
- }
- else {
- if (rspamd_cryptobox_base64_decode(tok_start, tok_len,
- token->data + pos, &tok_len)) {
- token->len = pos + tok_len;
- }
- else {
- /* Cannot decode */
- token->len -= tok_len;
- }
- }
-
- c = p + 1;
- state = skip_spaces;
- }
- else {
- /* Not encoded-word */
- old_charset.len = 0;
-
- if (token->len > 0) {
- rspamd_mime_header_maybe_save_token(pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
-
- g_string_append_len(out, c, p - c);
- c = p;
- state = parse_normal;
- }
- } /* qmarks >= 3 */
- } /* p == '=' */
- else {
- state = got_encoded_start;
- }
- p++;
- break;
- case skip_spaces:
- if (g_ascii_isspace(*p)) {
- p++;
- }
- else if (*p == '=' && p < end - 1 && p[1] == '?') {
- /* Next boundary, can glue */
- c = p;
- p += 2;
- state = got_encoded_start;
- }
- else {
- /* Need to save spaces and decoded token */
- if (token->len > 0) {
- old_charset.len = 0;
- rspamd_mime_header_maybe_save_token(pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
-
- g_string_append_len(out, c, p - c);
- c = p;
- state = parse_normal;
- }
- break;
- }
- }
-
- /* Leftover */
- switch (state) {
- case skip_spaces:
- if (token->len > 0 && cur_charset.len > 0) {
- old_charset.len = 0;
- rspamd_mime_header_maybe_save_token(pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
- break;
- default:
- /* Just copy leftover */
- if (p > c) {
- g_string_append_len(out, c, p - c);
- }
- break;
- }
-
- g_byte_array_free(token, TRUE);
- g_byte_array_free(decoded, TRUE);
- rspamd_mime_header_sanity_check(out);
- rspamd_mempool_notify_alloc(pool, out->len);
- ret = g_string_free(out, FALSE);
- rspamd_mempool_add_destructor(pool, g_free, ret);
-
- return ret;
- }
-
- gchar *
- rspamd_mime_header_encode(const gchar *in, gsize len)
- {
- const gchar *p = in, *end = in + len;
- gchar *out, encode_buf[80 * sizeof(uint32_t)];
- GString *res;
- gboolean need_encoding = FALSE;
-
- /* Check if we need to encode */
- while (p < end) {
- if ((((guchar) *p) & 0x80) != 0) {
- need_encoding = TRUE;
- break;
- }
- p++;
- }
-
- if (!need_encoding) {
- out = g_malloc(len + 1);
- rspamd_strlcpy(out, in, len + 1);
- }
- else {
- /* Need encode */
- gsize ulen, pos;
- gint r;
- const gchar *prev;
- /* Choose step: =?UTF-8?Q?<qp>?= should be less than 76 chars */
- guint step = (76 - 12) / 3 + 1;
-
- ulen = g_utf8_strlen(in, len);
- res = g_string_sized_new(len * 2 + 1);
- pos = 0;
- prev = in;
- /* Adjust chunk size for unicode average length */
- step *= 1.0 * ulen / (gdouble) len;
-
- while (pos < ulen) {
- p = g_utf8_offset_to_pointer(in, pos);
-
- if (p > prev) {
- /* Encode and print */
- r = rspamd_encode_qp2047_buf(prev, p - prev,
- encode_buf, sizeof(encode_buf));
-
- if (r != -1) {
- if (res->len > 0) {
- rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- else {
- rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- }
- }
-
- pos += MIN(step, ulen - pos);
- prev = p;
- }
-
- /* Leftover */
- if (prev < end) {
- r = rspamd_encode_qp2047_buf(prev, end - prev,
- encode_buf, sizeof(encode_buf));
-
- if (r != -1) {
- if (res->len > 0) {
- rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- else {
- rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- }
- }
-
- out = g_string_free(res, FALSE);
- }
-
- return out;
- }
-
- gchar *
- rspamd_mime_message_id_generate(const gchar *fqdn)
- {
- GString *out;
- uint64_t rnd, clk;
-
- out = g_string_sized_new(strlen(fqdn) + 22);
- rnd = ottery_rand_uint64();
- clk = rspamd_get_calendar_ticks() * 1e6;
-
- rspamd_printf_gstring(out, "%*bs.%*bs@%s",
- (gint) sizeof(uint64_t) - 3, (guchar *) &clk,
- (gint) sizeof(uint64_t), (gchar *) &rnd,
- fqdn);
-
- return g_string_free(out, FALSE);
- }
-
- struct rspamd_mime_header *
- rspamd_message_get_header_from_hash(struct rspamd_mime_headers_table *hdrs,
- const gchar *field,
- gboolean need_modified)
- {
- if (hdrs == NULL) {
- return NULL;
- }
-
- khiter_t k;
- khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
- struct rspamd_mime_header *hdr;
-
- if (htb) {
- k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) field);
-
- if (k == kh_end(htb)) {
- return NULL;
- }
-
- hdr = kh_value(htb, k);
-
- if (!need_modified) {
- if (hdr->flags & RSPAMD_HEADER_NON_EXISTING) {
- return NULL;
- }
-
- return hdr;
- }
- else {
- if (hdr->flags & RSPAMD_HEADER_MODIFIED) {
- return hdr->modified_chain;
- }
-
- return hdr;
- }
- }
-
- return NULL;
- }
-
- struct rspamd_mime_header *
- rspamd_message_get_header_array(struct rspamd_task *task, const gchar *field,
- gboolean need_modified)
- {
- return rspamd_message_get_header_from_hash(
- MESSAGE_FIELD_CHECK(task, raw_headers),
- field, need_modified);
- }
-
- gsize rspamd_mime_headers_count(struct rspamd_mime_headers_table *hdrs)
- {
- if (hdrs) {
- return kh_size(&hdrs->htb);
- }
-
- return 0;
- }
-
- bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs,
- rspamd_hdr_traverse_func_t func, void *ud)
- {
- const gchar *name;
- struct rspamd_mime_header *hdr;
-
- kh_foreach(&hdrs->htb, name, hdr, {
- if (!func(name, hdr, ud)) {
- return false;
- }
- });
-
- return true;
- }
-
- static void
- rspamd_message_headers_dtor(struct rspamd_mime_headers_table *hdrs)
- {
- if (hdrs) {
- kfree(hdrs->htb.keys);
- kfree(hdrs->htb.vals);
- kfree(hdrs->htb.flags);
- g_free(hdrs);
- }
- }
-
- struct rspamd_mime_headers_table *
- rspamd_message_headers_ref(struct rspamd_mime_headers_table *hdrs)
- {
- REF_RETAIN(hdrs);
-
- return hdrs;
- }
-
- void rspamd_message_headers_unref(struct rspamd_mime_headers_table *hdrs)
- {
- REF_RELEASE(hdrs);
- }
-
- struct rspamd_mime_headers_table *
- rspamd_message_headers_new(void)
- {
- struct rspamd_mime_headers_table *nhdrs;
-
- nhdrs = g_malloc0(sizeof(*nhdrs));
- REF_INIT_RETAIN(nhdrs, rspamd_message_headers_dtor);
-
- return nhdrs;
- }
-
- gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len)
- {
- /*
- * t - tortoise (destination)
- * h - hare (source)
- */
- char *t = hdr, *h = hdr, *end = (hdr + len);
- enum {
- copy_chars,
- folding_cr,
- folding_lf,
- folding_ws,
- } state = copy_chars;
-
- while (h < end) {
- switch (state) {
- case copy_chars:
- if (*h == '\r') {
- state = folding_cr;
- h++;
- }
- else if (*h == '\n') {
- state = folding_lf;
- h++;
- }
- else {
- *t++ = *h++;
- }
- break;
- case folding_cr:
- if (*h == '\n') {
- state = folding_lf;
- h++;
- }
- else if (g_ascii_isspace(*h)) {
- state = folding_ws;
- h++;
- }
- else {
- /* It is weird, not like a folding, so we need to revert back */
- *t++ = '\r';
- state = copy_chars;
- }
- break;
- case folding_lf:
- if (g_ascii_isspace(*h)) {
- state = folding_ws;
- h++;
- }
- else {
- /* It is weird, not like a folding, so we need to revert back */
- *t++ = '\n';
- state = copy_chars;
- }
- break;
- case folding_ws:
- if (!g_ascii_isspace(*h)) {
- *t++ = ' ';
- state = copy_chars;
- }
- else {
- h++;
- }
- break;
- }
- }
-
- return t - hdr;
- }
-
- void rspamd_message_set_modified_header(struct rspamd_task *task,
- struct rspamd_mime_headers_table *hdrs,
- const gchar *hdr_name,
- const ucl_object_t *obj,
- struct rspamd_mime_header **order_ptr)
- {
- khiter_t k;
- khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
- struct rspamd_mime_header *hdr_elt, *existing_chain;
- int i;
-
- if (htb) {
- k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) hdr_name);
-
- if (k == kh_end(htb)) {
- hdr_elt = rspamd_mempool_alloc0(task->task_pool, sizeof(*hdr_elt));
-
- hdr_elt->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_NON_EXISTING;
- hdr_elt->name = rspamd_mempool_strdup(task->task_pool, hdr_name);
-
- int r;
- k = kh_put(rspamd_mime_headers_htb, htb, hdr_elt->name, &r);
-
- kh_value(htb, k) = hdr_elt;
-
- if (order_ptr) {
- /*
- * This iterates over all headers in O(N), but we have no other options here, as the
- * list is already set.
- */
- LL_APPEND2(*order_ptr, hdr_elt, ord_next);
- }
- }
- else {
- hdr_elt = kh_value(htb, k);
- }
- }
- else {
- /* No hash, no modification */
- msg_err_task("internal error: calling for set_modified_header for no headers");
- return;
- }
-
- if (hdr_elt->flags & RSPAMD_HEADER_MODIFIED) {
- existing_chain = hdr_elt->modified_chain;
- }
- else {
- existing_chain = hdr_elt;
- }
-
- const ucl_object_t *elt, *cur;
- ucl_object_iter_t it;
-
- /* First, deal with removed headers, copying the relevant headers with remove flag */
- elt = ucl_object_lookup(obj, "remove");
-
- /*
- * remove: {1, 2 ...}
- * where number is the header's position starting from '1'
- */
- if (elt && ucl_object_type(elt) == UCL_ARRAY) {
- /* First, use a temporary array to keep all headers */
- GPtrArray *existing_ar = g_ptr_array_new();
- struct rspamd_mime_header *cur_hdr;
-
- /* Exclude removed headers */
- LL_FOREACH(existing_chain, cur_hdr)
- {
- if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
- g_ptr_array_add(existing_ar, cur_hdr);
- }
- }
-
- it = NULL;
-
- while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
- if (ucl_object_type(cur) == UCL_INT) {
- int ord = ucl_object_toint(cur);
-
- if (ord == 0) {
- /* Remove all headers in the existing chain */
- PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr)
- {
- cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
- }
- }
- else if (ord > 0) {
- /* Start from the top */
-
- if (ord <= existing_ar->len) {
- cur_hdr = g_ptr_array_index(existing_ar, ord - 1);
- cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
- }
- }
- else {
- /* Start from the bottom; ord < 0 */
- if ((-ord) <= existing_ar->len) {
- cur_hdr = g_ptr_array_index(existing_ar, existing_ar->len + ord);
- cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
- }
- }
- }
- }
-
- /*
- * Next, we return all headers modified to the existing chain
- * This implies an additional copy of all structures but is safe enough to
- * deal with it
- */
- hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
- hdr_elt->modified_chain = NULL;
-
- PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr)
- {
- if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
- struct rspamd_mime_header *nhdr = rspamd_mempool_alloc(
- task->task_pool, sizeof(*nhdr));
- memcpy(nhdr, cur_hdr, sizeof(*nhdr));
- nhdr->modified_chain = NULL;
- nhdr->prev = NULL;
- nhdr->next = NULL;
- nhdr->ord_next = NULL;
-
- DL_APPEND(hdr_elt->modified_chain, nhdr);
- }
- }
-
- g_ptr_array_free(existing_ar, TRUE);
-
- /* End of headers removal logic */
- }
-
- /* We can now deal with headers additions */
- elt = ucl_object_lookup(obj, "add");
- if (elt && ucl_object_type(elt) == UCL_ARRAY) {
- if (!(hdr_elt->flags & RSPAMD_HEADER_MODIFIED)) {
- /* Copy the header itself to the modified chain */
- struct rspamd_mime_header *nhdr;
- hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
- nhdr = rspamd_mempool_alloc(
- task->task_pool, sizeof(*nhdr));
- memcpy(nhdr, hdr_elt, sizeof(*hdr_elt));
- nhdr->modified_chain = NULL;
- nhdr->next = NULL;
- nhdr->ord_next = NULL;
- nhdr->prev = nhdr;
- hdr_elt->modified_chain = nhdr;
- }
-
- /*
- * add: {{1, "foo"}, {-1, "bar"} ...}
- * where number is the header's position starting from '1'
- */
- it = NULL;
-
- while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
- if (ucl_object_type(cur) == UCL_ARRAY) {
- const ucl_object_t *order = ucl_array_find_index(cur, 0),
- *value = ucl_array_find_index(cur, 1);
-
- if (order && value &&
- (ucl_object_type(order) == UCL_INT &&
- ucl_object_type(value) == UCL_STRING)) {
- int ord = ucl_object_toint(order);
- const char *raw_value;
- gsize raw_len;
-
- raw_value = ucl_object_tolstring(value, &raw_len);
-
- if (raw_len == 0) {
- continue;
- }
-
- struct rspamd_mime_header *nhdr = rspamd_mempool_alloc0(
- task->task_pool, sizeof(*nhdr));
-
- nhdr->flags |= RSPAMD_HEADER_ADDED;
- nhdr->name = hdr_elt->name;
- nhdr->value = rspamd_mempool_alloc(task->task_pool,
- raw_len + 1);
- /* Strlcpy will ensure that value will have no embedded \0 */
- rspamd_strlcpy(nhdr->value, raw_value, raw_len + 1);
- gsize value_len = rspamd_message_header_unfold_inplace(nhdr->value, raw_len);
- nhdr->value[value_len] = '\0';
-
- /* Deal with the raw value */
- size_t namelen = strlen(hdr_elt->name);
- char *rawbuf = rspamd_mempool_alloc(task->task_pool, namelen +
- raw_len +
- sizeof(": \r\n"));
- /* Name: value<newline> */
- nhdr->raw_value = rawbuf;
- memcpy(rawbuf, hdr_elt->name, namelen);
- rawbuf += namelen;
- memcpy(rawbuf, ": ", sizeof(": ") - 1);
- nhdr->separator = rspamd_mempool_strdup(task->task_pool, " ");
- rawbuf += sizeof(": ") - 1;
- memcpy(rawbuf, raw_value, raw_len);
- nhdr->raw_len = raw_len;
-
- if (MESSAGE_FIELD(task, nlines_type) == RSPAMD_TASK_NEWLINES_LF) {
- rawbuf[raw_len++] = '\n';
- }
- else {
- rawbuf[raw_len++] = '\r';
-
- if (MESSAGE_FIELD(task, nlines_type) == RSPAMD_TASK_NEWLINES_CRLF) {
- rawbuf[raw_len++] = '\n';
- }
- }
-
- rawbuf[raw_len] = '\0';
-
- nhdr->decoded = rspamd_mime_header_decode(task->task_pool,
- raw_value, nhdr->raw_len,
- NULL);
-
- /* Now find a position to insert a value */
- struct rspamd_mime_header **pos = &hdr_elt->modified_chain;
-
- if (ord == 0) {
- DL_PREPEND(hdr_elt->modified_chain, nhdr);
- }
- else if (ord == -1) {
- DL_APPEND(hdr_elt->modified_chain, nhdr);
- }
- else if (ord > 0) {
- while (ord > 0 && (*pos)) {
- ord--;
- pos = &((*pos)->next);
- }
- if (*pos) {
- /* pos is &(elt)->next */
- nhdr->next = (*pos);
- nhdr->prev = (*pos)->prev;
- (*pos)->prev = nhdr;
- *pos = nhdr;
- }
- else {
- /* Last element */
- DL_APPEND(*pos, nhdr);
- }
- }
- else {
- /* NYI: negative order is not defined */
- msg_err_task("internal error: calling for set_modified_header "
- "with negative add order header");
- }
- }
- else {
- msg_err_task("internal error: calling for set_modified_header "
- "with invalid header");
- }
- }
- }
- }
- }
-
- gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len)
- {
- enum parser_state {
- parse_normal,
- parse_obrace,
- parse_comment,
- parse_quoted_copy,
- parse_quoted_ignore,
- } state = parse_normal,
- next_state = parse_normal;
- gchar *d = input, *end = input + len, *start = input;
- gchar t;
- int obraces = 0, ebraces = 0;
-
- while (input < end) {
- t = *input;
- switch (state) {
- case parse_normal:
- if (t == '(') {
- state = parse_obrace;
- }
- else if (t == '\\') {
- state = parse_quoted_copy;
- next_state = parse_normal;
- }
- else {
- *d++ = t;
- }
- input++;
- break;
- case parse_obrace:
- obraces++;
- if (t == '(') {
- obraces++;
- }
- else if (t == ')') {
- ebraces++;
-
- if (obraces == ebraces) {
- obraces = 0;
- ebraces = 0;
- state = parse_normal;
- }
- }
- else if (t == '\\') {
- state = parse_quoted_ignore;
- next_state = parse_comment;
- }
- else {
- state = parse_comment;
- }
- input++;
- break;
- case parse_comment:
- if (t == '(') {
- state = parse_obrace;
- }
- else if (t == ')') {
- ebraces++;
-
- if (obraces == ebraces) {
- obraces = 0;
- ebraces = 0;
- state = parse_normal;
- }
- }
- else if (t == '\\') {
- state = parse_quoted_ignore;
- next_state = parse_comment;
- }
- input++;
- break;
- case parse_quoted_copy:
- *d++ = t;
- state = next_state;
- input++;
- break;
- case parse_quoted_ignore:
- state = next_state;
- input++;
- break;
- }
- }
-
- return (d - start);
- }
|