12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "mime_headers.h"
- #include "smtp_parsers.h"
- #include "mime_encoding.h"
- #include "received.h"
- #include "contrib/uthash/utlist.h"
- #include "libserver/mempool_vars_internal.h"
- #include "libserver/cfg_file.h"
- #include "libutil/util.h"
- #include <unicode/utf8.h>
-
- KHASH_INIT (rspamd_mime_headers_htb, gchar *,
- struct rspamd_mime_header *, 1,
- rspamd_strcase_hash, rspamd_strcase_equal);
-
- struct rspamd_mime_headers_table {
- khash_t(rspamd_mime_headers_htb) htb;
- ref_entry_t ref;
- };
-
- static void
- rspamd_mime_header_check_special (struct rspamd_task *task,
- struct rspamd_mime_header *rh)
- {
- guint64 h;
- const gchar *p, *end;
- gchar *id;
- gint max_recipients = -1, len;
-
- if (task->cfg) {
- max_recipients = task->cfg->max_recipients;
- }
-
- h = rspamd_icase_hash (rh->name, strlen (rh->name), 0xdeadbabe);
-
- switch (h) {
- case 0x88705DC4D9D61ABULL: /* received */
- if (rspamd_received_header_parse(task, rh->decoded, strlen (rh->decoded), rh)) {
- rh->flags |= RSPAMD_HEADER_RECEIVED;
- }
- break;
- case 0x76F31A09F4352521ULL: /* to */
- MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool,
- rh->value, strlen (rh->value),
- MESSAGE_FIELD (task, rcpt_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
- break;
- case 0x7EB117C1480B76ULL: /* cc */
- MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool,
- rh->value, strlen (rh->value),
- MESSAGE_FIELD (task, rcpt_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
- break;
- case 0xE4923E11C4989C8DULL: /* bcc */
- MESSAGE_FIELD (task, rcpt_mime) = rspamd_email_address_from_mime (task->task_pool,
- rh->value, strlen (rh->value),
- MESSAGE_FIELD (task, rcpt_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
- break;
- case 0x41E1985EDC1CBDE4ULL: /* from */
- MESSAGE_FIELD (task, from_mime) = rspamd_email_address_from_mime (task->task_pool,
- rh->value, strlen (rh->value),
- MESSAGE_FIELD (task, from_mime), max_recipients);
- rh->flags |= RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
- break;
- case 0x43A558FC7C240226ULL: /* message-id */ {
-
- rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
- p = rh->decoded;
- len = rspamd_strip_smtp_comments_inplace(rh->decoded, strlen(p));
- rh->decoded[len] = '\0'; /* Zero terminate after stripping */
- end = p + len;
-
- if (*p == '<') {
- p++;
- }
-
- if (end > p) {
- gchar *d;
-
- if (*(end - 1) == '>') {
- end --;
- }
-
- id = rspamd_mempool_alloc (task->task_pool, end - p + 1);
- d = id;
-
- while (p < end) {
- if (g_ascii_isgraph (*p)) {
- *d++ = *p++;
- }
- else {
- *d++ = '?';
- p++;
- }
- }
-
- *d = '\0';
-
- MESSAGE_FIELD (task, message_id) = id;
- }
-
- break;
- }
- case 0xB91D3910358E8212ULL: /* subject */
- if (MESSAGE_FIELD (task, subject) == NULL) {
- MESSAGE_FIELD (task, subject) = rh->decoded;
- }
- rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
- break;
- case 0xEE4AA2EAAC61D6F4ULL: /* return-path */
- if (task->from_envelope == NULL) {
- task->from_envelope = rspamd_email_address_from_smtp (rh->decoded,
- strlen (rh->decoded));
- }
- rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
- break;
- case 0xB9EEFAD2E93C2161ULL: /* delivered-to */
- if (task->deliver_to == NULL) {
- task->deliver_to = rh->decoded;
- }
- rh->flags = RSPAMD_HEADER_DELIVERED_TO;
- break;
- case 0x2EC3BFF3C393FC10ULL: /* date */
- case 0xAC0DDB1A1D214CAULL: /* sender */
- case 0x54094572367AB695ULL: /* in-reply-to */
- case 0x81CD9E9131AB6A9AULL: /* content-type */
- case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
- case 0xB3F6704CB3AD6589ULL: /* references */
- rh->flags = RSPAMD_HEADER_UNIQUE;
- break;
- }
- }
-
- static void
- rspamd_mime_header_add (struct rspamd_task *task,
- khash_t(rspamd_mime_headers_htb) *target,
- struct rspamd_mime_header **order_ptr,
- struct rspamd_mime_header *rh,
- gboolean check_special)
- {
- khiter_t k;
- struct rspamd_mime_header *ex;
- int res;
-
- k = kh_put (rspamd_mime_headers_htb, target, rh->name, &res);
-
- if (res == 0) {
- ex = kh_value (target, k);
- DL_APPEND (ex, rh);
- msg_debug_task ("append raw header %s: %s", rh->name, rh->value);
- }
- else {
- kh_value (target, k) = rh;
- rh->prev = rh;
- rh->next = NULL;
- msg_debug_task ("add new raw header %s: %s", rh->name, rh->value);
- }
-
- LL_PREPEND2 (*order_ptr, rh, ord_next);
-
- if (check_special) {
- rspamd_mime_header_check_special (task, rh);
- }
- }
-
-
- /* Convert raw headers to a list of struct raw_header * */
- void
- rspamd_mime_headers_process (struct rspamd_task *task,
- struct rspamd_mime_headers_table *target,
- struct rspamd_mime_header **order_ptr,
- const gchar *in, gsize len,
- gboolean check_newlines)
- {
- struct rspamd_mime_header *nh = NULL;
- const gchar *p, *c, *end;
- gchar *tmp, *tp;
- gint state = 0, l, next_state = 100, err_state = 100, t_state;
- gboolean valid_folding = FALSE;
- guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
- guint norder = 0;
-
- p = in;
- end = p + len;
- c = p;
- memset (nlines_count, 0, sizeof (nlines_count));
- msg_debug_task ("start processing headers");
-
- while (p < end) {
- /* FSM for processing headers */
- switch (state) {
- case 0:
- /* Begin processing headers */
- if (!g_ascii_isalpha (*p)) {
- /* We have some garbage at the beginning of headers, skip this line */
- state = 100;
- next_state = 0;
- }
- else {
- state = 1;
- c = p;
- }
- break;
- case 1:
- /* We got something like header's name */
- if (*p == ':') {
- nh = rspamd_mempool_alloc0 (task->task_pool,
- sizeof (struct rspamd_mime_header));
- l = p - c;
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- rspamd_null_safe_copy (c, l, tmp, l + 1);
- nh->name = tmp;
- nh->flags |= RSPAMD_HEADER_EMPTY_SEPARATOR;
- nh->raw_value = c;
- nh->raw_len = p - c; /* Including trailing ':' */
- p++;
- state = 2;
- c = p;
- }
- else if (g_ascii_isspace (*p)) {
- /* Not header but some garbage */
- task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
- state = 100;
- next_state = 0;
- }
- else {
- p++;
- }
- break;
- case 2:
- /* We got header's name, so skip any \t or spaces */
- if (*p == '\t') {
- nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
- nh->flags |= RSPAMD_HEADER_TAB_SEPARATED;
- p++;
- }
- else if (*p == ' ') {
- nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
- p++;
- }
- else if (*p == '\n' || *p == '\r') {
-
- if (check_newlines) {
- if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
- }
- else if (p + 1 < end && *(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
- }
- else {
- nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
- }
- }
-
- /* Process folding */
- state = 99;
- l = p - c;
- if (l > 0) {
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- rspamd_null_safe_copy (c, l, tmp, l + 1);
- nh->separator = tmp;
- }
- next_state = 3;
- err_state = 5;
- c = p;
- }
- else {
- /* Process value */
- l = p - c;
- if (l >= 0) {
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- rspamd_null_safe_copy (c, l, tmp, l + 1);
- nh->separator = tmp;
- }
- c = p;
- state = 3;
- }
- break;
- case 3:
- if (*p == '\r' || *p == '\n') {
- /* Hold folding */
- if (check_newlines) {
- if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
- }
- else if (p + 1 < end && *(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
- }
- else {
- nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
- }
- }
- state = 99;
- next_state = 3;
- err_state = 4;
- }
- else if (p + 1 == end) {
- state = 4;
- }
- else {
- p++;
- }
- break;
- case 4:
- /* Copy header's value */
-
- /*
- * XXX:
- * The original decision to use here null terminated
- * strings was extremely poor!
- */
- l = p - c;
- tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
- tp = tmp;
- t_state = 0;
- while (l--) {
- if (t_state == 0) {
- /* Before folding */
- if (*c == '\n' || *c == '\r') {
- t_state = 1;
- c++;
- *tp++ = ' ';
- }
- else {
- if (*c != '\0') {
- *tp++ = *c++;
- }
- else {
- c++;
- }
- }
- }
- else if (t_state == 1) {
- /* Inside folding */
- if (g_ascii_isspace (*c)) {
- c++;
- }
- else {
- t_state = 0;
- if (*c != '\0') {
- *tp++ = *c++;
- }
- else {
- c++;
- }
- }
- }
- }
- /* Strip last space that can be added by \r\n parsing */
- if (tp > tmp && *(tp - 1) == ' ') {
- tp--;
- }
-
- *tp = '\0';
- /* Strip the initial spaces that could also be added by folding */
- while (*tmp != '\0' && g_ascii_isspace (*tmp)) {
- tmp ++;
- }
-
- if (p + 1 == end) {
- nh->raw_len = end - nh->raw_value;
- }
- else {
- nh->raw_len = p - nh->raw_value;
- }
-
- nh->value = tmp;
-
- gboolean broken_utf = FALSE;
-
- nh->decoded = rspamd_mime_header_decode (task->task_pool,
- nh->value, strlen (tmp), &broken_utf);
-
- if (broken_utf) {
- task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
- }
-
- if (nh->decoded == NULL) {
- /* As we strip comments in place... */
- nh->decoded = rspamd_mempool_strdup (task->task_pool, "");
- }
-
- /* We also validate utf8 and replace all non-valid utf8 chars */
- rspamd_mime_charset_utf_enforce (nh->decoded, strlen (nh->decoded));
- nh->order = norder ++;
- rspamd_mime_header_add (task, &target->htb, order_ptr, nh, check_newlines);
- nh = NULL;
- state = 0;
- break;
- case 5:
- /* Header has only name, no value */
- nh->value = rspamd_mempool_strdup (task->task_pool, "");;
- nh->decoded = rspamd_mempool_strdup (task->task_pool, "");;
- nh->raw_len = p - nh->raw_value;
- nh->order = norder ++;
- rspamd_mime_header_add (task, &target->htb, order_ptr, nh, check_newlines);
- nh = NULL;
- state = 0;
- break;
- case 99:
- /* Folding state */
- if (p + 1 == end) {
- state = err_state;
- }
- else {
- if (*p == '\r' || *p == '\n') {
- p++;
- valid_folding = FALSE;
- }
- else if (*p == '\t' || *p == ' ') {
- /* Valid folding */
- p++;
- valid_folding = TRUE;
- }
- else {
- if (valid_folding) {
- debug_task ("go to state: %d->%d", state, next_state);
- state = next_state;
- }
- else {
- /* Fall back */
- debug_task ("go to state: %d->%d", state, err_state);
- state = err_state;
- }
- }
- }
- break;
- case 100:
- /* Fail state, skip line */
-
- if (*p == '\r') {
- if (p + 1 < end && *(p + 1) == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
- p++;
- }
- p++;
- state = next_state;
- }
- else if (*p == '\n') {
- nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
-
- if (p + 1 < end && *(p + 1) == '\r') {
- p++;
- }
- p++;
- state = next_state;
- }
- else if (p + 1 == end) {
- state = next_state;
- p++;
- }
- else {
- p++;
- }
- break;
- }
- }
-
- /* Since we have prepended headers, we need to reverse the list to get the actual order */
- LL_REVERSE (*order_ptr);
-
- if (check_newlines) {
- guint max_cnt = 0;
- gint sel = 0;
- rspamd_cryptobox_hash_state_t hs;
- guchar hout[rspamd_cryptobox_HASHBYTES], *hexout;
-
- for (gint i = RSPAMD_TASK_NEWLINES_CR; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
- if (nlines_count[i] > max_cnt) {
- max_cnt = nlines_count[i];
- sel = i;
- }
- }
-
- MESSAGE_FIELD (task, nlines_type) = sel;
-
- rspamd_cryptobox_hash_init (&hs, NULL, 0);
-
- LL_FOREACH (*order_ptr, nh) {
- if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
- rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name));
- }
- }
-
- rspamd_cryptobox_hash_final (&hs, hout);
- hexout = rspamd_mempool_alloc (task->task_pool, sizeof (hout) * 2 + 1);
- hexout[sizeof (hout) * 2] = '\0';
- rspamd_encode_hex_buf (hout, sizeof (hout), hexout,
- sizeof (hout) * 2 + 1);
- rspamd_mempool_set_variable (task->task_pool,
- RSPAMD_MEMPOOL_HEADERS_HASH,
- hexout, NULL);
- }
- }
-
- static void
- rspamd_mime_header_maybe_save_token (rspamd_mempool_t *pool,
- GString *out,
- GByteArray *token,
- GByteArray *decoded_token,
- rspamd_ftok_t *old_charset,
- rspamd_ftok_t *new_charset)
- {
- if (new_charset->len == 0) {
- g_assert_not_reached ();
- }
-
- if (old_charset->len > 0) {
- if (rspamd_ftok_casecmp (new_charset, old_charset) == 0) {
- rspamd_ftok_t srch;
-
- /*
- * Special case for iso-2022-jp:
- * https://github.com/vstakhov/rspamd/issues/1669
- */
- RSPAMD_FTOK_ASSIGN (&srch, "iso-2022-jp");
-
- if (rspamd_ftok_casecmp (new_charset, &srch) != 0) {
- /* We can concatenate buffers, just return */
- return;
- }
- }
- }
-
- /* We need to flush and decode old token to out string */
- if (rspamd_mime_to_utf8_byte_array (token, decoded_token, pool,
- rspamd_mime_detect_charset (new_charset, pool))) {
- g_string_append_len (out, decoded_token->data, decoded_token->len);
- }
-
- /* We also reset buffer */
- g_byte_array_set_size (token, 0);
- /*
- * Propagate charset
- *
- * Here are dragons: we save the original charset to allow buffers concat
- * in the condition at the beginning of the function.
- * However, it will likely cause unnecessary calls for
- * `rspamd_mime_detect_charset` which could be relatively expensive.
- * But we ignore that for now...
- */
- memcpy (old_charset, new_charset, sizeof (*old_charset));
- }
-
- static void
- rspamd_mime_header_sanity_check (GString *str)
- {
- gsize i;
- gchar t;
-
- for (i = 0; i < str->len; i ++) {
- t = str->str[i];
- if (!((t & 0x80) || g_ascii_isgraph (t))) {
- if (g_ascii_isspace (t)) {
- /* Replace spaces characters with plain space */
- str->str[i] = ' ';
- }
- else {
- str->str[i] = '?';
- }
- }
- }
- }
-
- gchar *
- rspamd_mime_header_decode (rspamd_mempool_t *pool, const gchar *in,
- gsize inlen, gboolean *invalid_utf)
- {
- GString *out;
- const guchar *c, *p, *end;
- const gchar *tok_start = NULL;
- gsize tok_len = 0, pos;
- GByteArray *token = NULL, *decoded;
- rspamd_ftok_t cur_charset = {0, NULL}, old_charset = {0, NULL};
- gint encoding;
- gssize r;
- guint qmarks = 0;
- gchar *ret;
- enum {
- parse_normal = 0,
- got_eqsign,
- got_encoded_start,
- got_more_qmark,
- skip_spaces,
- } state = parse_normal;
-
- g_assert (in != NULL);
-
- c = in;
- p = in;
- end = in + inlen;
- out = g_string_sized_new (inlen);
- token = g_byte_array_sized_new (80);
- decoded = g_byte_array_sized_new (122);
-
- while (p < end) {
- switch (state) {
- case parse_normal:
- if (*p == '=') {
- g_string_append_len (out, c, p - c);
- c = p;
- state = got_eqsign;
- }
- else if (*p >= 128) {
- gint off = 0;
- UChar32 uc;
- /* Unencoded character */
- g_string_append_len (out, c, p - c);
- /* Check if that's valid UTF8 */
- U8_NEXT (p, off, end - p, uc);
-
- if (uc <= 0) {
- c = p + 1;
- /* 0xFFFD in UTF8 */
- g_string_append_len (out, " ", 3);
- off = 0;
- U8_APPEND_UNSAFE (out->str + out->len - 3,
- off, 0xfffd);
-
- if (invalid_utf) {
- *invalid_utf = TRUE;
- }
- }
- else {
- c = p;
- p = p + off;
- continue; /* To avoid p ++ after this block */
- }
- }
- p ++;
- break;
- case got_eqsign:
- if (*p == '?') {
- state = got_encoded_start;
- qmarks = 0;
- }
- else {
- g_string_append_len (out, c, 1);
- c = p;
- state = parse_normal;
- continue; /* Deal with == case */
- }
- p ++;
- break;
- case got_encoded_start:
- if (*p == '?') {
- state = got_more_qmark;
- qmarks ++;
-
- /* Skip multiple ? signs */
- p ++;
- while (p < end && *p == '?') {
- p ++;
- }
-
- continue;
- }
- p ++;
- break;
- case got_more_qmark:
- if (*p == '=') {
- if (qmarks < 3) {
- state = got_encoded_start;
- }
- else {
- /* Finished encoded boundary */
- if (*c == '"') {
- /* Quoted string, non-RFC conformant but used by retards */
- c ++;
- }
- if (rspamd_rfc2047_parser (c, p - c + 1, &encoding,
- &cur_charset.begin, &cur_charset.len,
- &tok_start, &tok_len)) {
- /* We have a token, so we can decode it from `encoding` */
- if (token->len > 0) {
- if (old_charset.len == 0) {
- memcpy (&old_charset, &cur_charset,
- sizeof (old_charset));
- }
-
- rspamd_mime_header_maybe_save_token (pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
-
- qmarks = 0;
- pos = token->len;
- g_byte_array_set_size (token, pos + tok_len);
-
- if (encoding == RSPAMD_RFC2047_QP) {
- r = rspamd_decode_qp2047_buf (tok_start, tok_len,
- token->data + pos, tok_len);
-
- if (r != -1) {
- token->len = pos + r;
- } else {
- /* Cannot decode qp */
- token->len -= tok_len;
- }
- } else {
- if (rspamd_cryptobox_base64_decode (tok_start, tok_len,
- token->data + pos, &tok_len)) {
- token->len = pos + tok_len;
- } else {
- /* Cannot decode */
- token->len -= tok_len;
- }
- }
-
- c = p + 1;
- state = skip_spaces;
- } else {
- /* Not encoded-word */
- old_charset.len = 0;
-
- if (token->len > 0) {
- rspamd_mime_header_maybe_save_token (pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
-
- g_string_append_len (out, c, p - c);
- c = p;
- state = parse_normal;
- }
- } /* qmarks >= 3 */
- } /* p == '=' */
- else {
- state = got_encoded_start;
- }
- p ++;
- break;
- case skip_spaces:
- if (g_ascii_isspace (*p)) {
- p ++;
- }
- else if (*p == '=' && p < end - 1 && p[1] == '?') {
- /* Next boundary, can glue */
- c = p;
- p += 2;
- state = got_encoded_start;
- }
- else {
- /* Need to save spaces and decoded token */
- if (token->len > 0) {
- old_charset.len = 0;
- rspamd_mime_header_maybe_save_token (pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
-
- g_string_append_len (out, c, p - c);
- c = p;
- state = parse_normal;
- }
- break;
- }
- }
-
- /* Leftover */
- switch (state) {
- case skip_spaces:
- if (token->len > 0 && cur_charset.len > 0) {
- old_charset.len = 0;
- rspamd_mime_header_maybe_save_token (pool, out,
- token, decoded,
- &old_charset, &cur_charset);
- }
- break;
- default:
- /* Just copy leftover */
- if (p > c) {
- g_string_append_len (out, c, p - c);
- }
- break;
- }
-
- g_byte_array_free (token, TRUE);
- g_byte_array_free (decoded, TRUE);
- rspamd_mime_header_sanity_check (out);
- rspamd_mempool_notify_alloc (pool, out->len);
- ret = g_string_free (out, FALSE);
- rspamd_mempool_add_destructor (pool, g_free, ret);
-
- return ret;
- }
-
- gchar *
- rspamd_mime_header_encode (const gchar *in, gsize len)
- {
- const gchar *p = in, *end = in + len;
- gchar *out, encode_buf[80 * sizeof (guint32)];
- GString *res;
- gboolean need_encoding = FALSE;
-
- /* Check if we need to encode */
- while (p < end) {
- if ((((guchar)*p) & 0x80) != 0) {
- need_encoding = TRUE;
- break;
- }
- p ++;
- }
-
- if (!need_encoding) {
- out = g_malloc (len + 1);
- rspamd_strlcpy (out, in, len + 1);
- }
- else {
- /* Need encode */
- gsize ulen, pos;
- gint r;
- const gchar *prev;
- /* Choose step: =?UTF-8?Q?<qp>?= should be less than 76 chars */
- guint step = (76 - 12) / 3 + 1;
-
- ulen = g_utf8_strlen (in, len);
- res = g_string_sized_new (len * 2 + 1);
- pos = 0;
- prev = in;
- /* Adjust chunk size for unicode average length */
- step *= 1.0 * ulen / (gdouble)len;
-
- while (pos < ulen) {
- p = g_utf8_offset_to_pointer (in, pos);
-
- if (p > prev) {
- /* Encode and print */
- r = rspamd_encode_qp2047_buf (prev, p - prev,
- encode_buf, sizeof (encode_buf));
-
- if (r != -1) {
- if (res->len > 0) {
- rspamd_printf_gstring (res, " =?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- else {
- rspamd_printf_gstring (res, "=?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- }
- }
-
- pos += MIN (step, ulen - pos);
- prev = p;
- }
-
- /* Leftover */
- if (prev < end) {
- r = rspamd_encode_qp2047_buf (prev, end - prev,
- encode_buf, sizeof (encode_buf));
-
- if (r != -1) {
- if (res->len > 0) {
- rspamd_printf_gstring (res, " =?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- else {
- rspamd_printf_gstring (res, "=?UTF-8?Q?%*s?=", r,
- encode_buf);
- }
- }
- }
-
- out = g_string_free (res, FALSE);
- }
-
- return out;
- }
-
- gchar *
- rspamd_mime_message_id_generate (const gchar *fqdn)
- {
- GString *out;
- guint64 rnd, clk;
-
- out = g_string_sized_new (strlen (fqdn) + 22);
- rnd = ottery_rand_uint64 ();
- clk = rspamd_get_calendar_ticks () * 1e6;
-
- rspamd_printf_gstring (out, "%*bs.%*bs@%s",
- (gint)sizeof (guint64) - 3, (guchar *)&clk,
- (gint)sizeof (guint64), (gchar *)&rnd,
- fqdn);
-
- return g_string_free (out, FALSE);
- }
-
- struct rspamd_mime_header *
- rspamd_message_get_header_from_hash (struct rspamd_mime_headers_table *hdrs,
- const gchar *field,
- gboolean need_modified)
- {
- khiter_t k;
- khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
- struct rspamd_mime_header *hdr;
-
- if (htb) {
- k = kh_get (rspamd_mime_headers_htb, htb, (gchar *) field);
-
- if (k == kh_end (htb)) {
- return NULL;
- }
-
- hdr = kh_value (htb, k);
-
- if (!need_modified) {
- if (hdr->flags & RSPAMD_HEADER_NON_EXISTING) {
- return NULL;
- }
-
- return hdr;
- }
- else {
- if (hdr->flags & RSPAMD_HEADER_MODIFIED) {
- return hdr->modified_chain;
- }
-
- return hdr;
- }
- }
-
- return NULL;
- }
-
- struct rspamd_mime_header *
- rspamd_message_get_header_array (struct rspamd_task *task, const gchar *field,
- gboolean need_modified)
- {
- return rspamd_message_get_header_from_hash(
- MESSAGE_FIELD_CHECK (task, raw_headers),
- field, need_modified);
- }
-
- gsize
- rspamd_mime_headers_count (struct rspamd_mime_headers_table *hdrs)
- {
- if (hdrs) {
- return kh_size (&hdrs->htb);
- }
-
- return 0;
- }
-
- bool
- rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs,
- rspamd_hdr_traverse_func_t func, void *ud)
- {
- const gchar *name;
- struct rspamd_mime_header *hdr;
-
- kh_foreach(&hdrs->htb, name, hdr, {
- if (!func(name, hdr, ud)) {
- return false;
- }
- });
-
- return true;
- }
-
- static void
- rspamd_message_headers_dtor (struct rspamd_mime_headers_table *hdrs)
- {
- if (hdrs) {
- kfree (hdrs->htb.keys);
- kfree (hdrs->htb.vals);
- kfree (hdrs->htb.flags);
- g_free (hdrs);
- }
- }
-
- struct rspamd_mime_headers_table *
- rspamd_message_headers_ref (struct rspamd_mime_headers_table *hdrs)
- {
- REF_RETAIN (hdrs);
-
- return hdrs;
- }
-
- void
- rspamd_message_headers_unref (struct rspamd_mime_headers_table *hdrs)
- {
- REF_RELEASE (hdrs);
- }
-
- struct rspamd_mime_headers_table *
- rspamd_message_headers_new (void)
- {
- struct rspamd_mime_headers_table *nhdrs;
-
- nhdrs = g_malloc0 (sizeof (*nhdrs));
- REF_INIT_RETAIN (nhdrs, rspamd_message_headers_dtor);
-
- return nhdrs;
- }
-
- void
- rspamd_message_set_modified_header (struct rspamd_task *task,
- struct rspamd_mime_headers_table *hdrs,
- const gchar *hdr_name,
- const ucl_object_t *obj)
- {
- khiter_t k;
- khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
- struct rspamd_mime_header *hdr_elt, *existing_chain;
- int i;
-
- if (htb) {
- k = kh_get (rspamd_mime_headers_htb, htb, (gchar *)hdr_name);
-
- if (k == kh_end (htb)) {
- hdr_elt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*hdr_elt));
-
- hdr_elt->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_NON_EXISTING;
- hdr_elt->name = rspamd_mempool_strdup (task->task_pool, hdr_name);
-
- int r;
- k = kh_put (rspamd_mime_headers_htb, htb, hdr_elt->name, &r);
-
- kh_value (htb, k) = hdr_elt;
- }
- else {
- hdr_elt = kh_value (htb, k);
- }
- }
- else {
- /* No hash, no modification */
- msg_err_task ("internal error: calling for set_modified_header for no headers");
- return;
- }
-
- if (hdr_elt->flags & RSPAMD_HEADER_MODIFIED) {
- existing_chain = hdr_elt->modified_chain;
- }
- else {
- existing_chain = hdr_elt;
- }
-
- const ucl_object_t *elt, *cur;
- ucl_object_iter_t it;
-
- /* First, deal with removed headers, copying the relevant headers with remove flag */
- elt = ucl_object_lookup (obj, "remove");
-
- /*
- * remove: {1, 2 ...}
- * where number is the header's position starting from '1'
- */
- if (elt && ucl_object_type (elt) == UCL_ARRAY) {
- /* First, use a temporary array to keep all headers */
- GPtrArray *existing_ar = g_ptr_array_new ();
- struct rspamd_mime_header *cur_hdr;
-
- /* Exclude removed headers */
- LL_FOREACH (existing_chain, cur_hdr) {
- if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
- g_ptr_array_add (existing_ar, cur_hdr);
- }
- }
-
- it = NULL;
-
- while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) {
- if (ucl_object_type (cur) == UCL_INT) {
- int ord = ucl_object_toint (cur);
-
- if (ord == 0) {
- /* Remove all headers in the existing chain */
- PTR_ARRAY_FOREACH (existing_ar, i, cur_hdr) {
- cur_hdr->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_REMOVED;
- }
- }
- else if (ord > 0) {
- /* Start from the top */
-
- if (ord <= existing_ar->len) {
- cur_hdr = g_ptr_array_index (existing_ar, ord - 1);
- cur_hdr->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_REMOVED;
- }
- }
- else {
- /* Start from the bottom; ord < 0 */
- if ((-ord) <= existing_ar->len) {
- cur_hdr = g_ptr_array_index (existing_ar, existing_ar->len + ord);
- cur_hdr->flags |= RSPAMD_HEADER_MODIFIED|RSPAMD_HEADER_REMOVED;
- }
- }
- }
- }
-
- /*
- * Next, we return all headers modified to the existing chain
- * This implies an additional copy of all structures but is safe enough to
- * deal with it
- */
- hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
- hdr_elt->modified_chain = NULL;
- gint new_chain_length = 0;
-
- PTR_ARRAY_FOREACH (existing_ar, i, cur_hdr) {
- if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
- struct rspamd_mime_header *nhdr = rspamd_mempool_alloc (
- task->task_pool, sizeof (*nhdr));
- memcpy (nhdr, cur_hdr, sizeof (*nhdr));
- nhdr->modified_chain = NULL;
- nhdr->prev = NULL;
- nhdr->next = NULL;
- nhdr->ord_next = NULL;
-
- DL_APPEND (hdr_elt->modified_chain, nhdr);
- new_chain_length ++;
- }
- }
-
- g_ptr_array_free (existing_ar, TRUE);
-
- /* End of headers removal logic */
- }
-
- /* We can now deal with headers additions */
- elt = ucl_object_lookup (obj, "add");
- if (elt && ucl_object_type (elt) == UCL_ARRAY) {
- if (!(hdr_elt->flags & RSPAMD_HEADER_MODIFIED)) {
- /* Copy the header itself to the modified chain */
- struct rspamd_mime_header *nhdr;
- hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
- nhdr = rspamd_mempool_alloc (
- task->task_pool, sizeof (*nhdr));
- memcpy (nhdr, hdr_elt, sizeof (*hdr_elt));
- nhdr->modified_chain = NULL;
- nhdr->next = NULL;
- nhdr->ord_next = NULL;
- nhdr->prev = nhdr;
- hdr_elt->modified_chain = nhdr;
- }
-
- /*
- * add: {{1, "foo"}, {-1, "bar"} ...}
- * where number is the header's position starting from '1'
- */
- it = NULL;
-
- while ((cur = ucl_object_iterate (elt, &it, true)) != NULL) {
- if (ucl_object_type (cur) == UCL_ARRAY) {
- const ucl_object_t *order = ucl_array_find_index (cur, 0),
- *value = ucl_array_find_index (cur, 1);
-
- if (order && value &&
- (ucl_object_type (order) == UCL_INT &&
- ucl_object_type (value) == UCL_STRING)) {
- int ord = ucl_object_toint (order);
- const char *raw_value;
- gsize raw_len;
-
- raw_value = ucl_object_tolstring (value, &raw_len);
-
- if (raw_len == 0) {
- continue;
- }
-
- struct rspamd_mime_header *nhdr = rspamd_mempool_alloc0 (
- task->task_pool, sizeof (*nhdr));
-
- nhdr->flags |= RSPAMD_HEADER_ADDED;
- nhdr->name = hdr_elt->name;
- nhdr->value = rspamd_mempool_alloc (task->task_pool,
- raw_len + 1);
- nhdr->raw_len = rspamd_strlcpy (nhdr->value, raw_value,
- raw_len + 1);
- nhdr->raw_value = nhdr->value;
- nhdr->decoded = rspamd_mime_header_decode (task->task_pool,
- raw_value, raw_len, NULL);
-
- /* Now find a position to insert a value */
- struct rspamd_mime_header **pos = &hdr_elt->modified_chain;
-
- if (ord == 0) {
- DL_PREPEND (hdr_elt->modified_chain, nhdr);
- }
- else if (ord == -1) {
- DL_APPEND (hdr_elt->modified_chain, nhdr);
- }
- else if (ord > 0) {
- while (ord > 0 && (*pos)) {
- ord --;
- pos = &((*pos)->next);
- }
- if (*pos) {
- /* pos is &(elt)->next */
- nhdr->next = (*pos);
- nhdr->prev = (*pos)->prev;
- (*pos)->prev = nhdr;
- *pos = nhdr;
- }
- else {
- /* Last element */
- DL_APPEND (*pos, nhdr);
- }
- }
- else {
- /* NYI: negative order is not defined */
- msg_err_task ("internal error: calling for set_modified_header "
- "with negative add order header");
- }
- }
- else {
- msg_err_task ("internal error: calling for set_modified_header "
- "with invalid header");
- }
- }
- }
- }
- }
-
- gsize
- rspamd_strip_smtp_comments_inplace (gchar *input, gsize len)
- {
- enum parser_state {
- parse_normal,
- parse_obrace,
- parse_comment,
- parse_quoted_copy,
- parse_quoted_ignore,
- } state = parse_normal, next_state = parse_normal;
- gchar *d = input, *end = input + len, *start = input;
- gchar t;
- int obraces = 0, ebraces = 0;
-
- while (input < end) {
- t = *input;
- switch (state) {
- case parse_normal:
- if (t == '(') {
- state = parse_obrace;
- }
- else if (t == '\\') {
- state = parse_quoted_copy;
- next_state = parse_normal;
- }
- else {
- *d++ = t;
- }
- input ++;
- break;
- case parse_obrace:
- obraces ++;
- if (t == '(') {
- obraces ++;
- }
- else if (t == ')') {
- ebraces ++;
-
- if (obraces == ebraces) {
- obraces = 0;
- ebraces = 0;
- state = parse_normal;
- }
- }
- else if (t == '\\') {
- state = parse_quoted_ignore;
- next_state = parse_comment;
- }
- else {
- state = parse_comment;
- }
- input ++;
- break;
- case parse_comment:
- if (t == '(') {
- state = parse_obrace;
- }
- else if (t == ')') {
- ebraces ++;
-
- if (obraces == ebraces) {
- obraces = 0;
- ebraces = 0;
- state = parse_normal;
- }
- }
- else if (t == '\\') {
- state = parse_quoted_ignore;
- next_state = parse_comment;
- }
- input ++;
- break;
- case parse_quoted_copy:
- *d++ = t;
- state = next_state;
- input ++;
- break;
- case parse_quoted_ignore:
- state = next_state;
- input ++;
- break;
- }
- }
-
- return (d - start);
- }
|