diff options
Diffstat (limited to 'src/libmime/message.c')
-rw-r--r-- | src/libmime/message.c | 1764 |
1 files changed, 1764 insertions, 0 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c new file mode 100644 index 000000000..4567869e9 --- /dev/null +++ b/src/libmime/message.c @@ -0,0 +1,1764 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "cfg_file.h" +#include "html.h" +#include "images.h" + +#define RECURSION_LIMIT 30 +#define UTF8_CHARSET "UTF-8" + +GByteArray * +strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr) +{ + uint8_t *p, *rp, *tbegin = NULL, *end, c, lc; + gint br, i = 0, depth = 0, in_q = 0; + gint state = 0; + GByteArray *buf; + GNode *level_ptr = NULL; + gboolean erase = FALSE; + + if (stateptr) + state = *stateptr; + + buf = g_byte_array_sized_new (src->len); + g_byte_array_append (buf, src->data, src->len); + + c = *src->data; + lc = '\0'; + p = src->data; + rp = buf->data; + end = src->data + src->len; + br = 0; + + while (i < (gint)src->len) { + switch (c) { + case '\0': + break; + case '<': + if (g_ascii_isspace (*(p + 1))) { + goto reg_char; + } + if (state == 0) { + lc = '<'; + tbegin = p + 1; + state = 1; + } + else if (state == 1) { + /* Opening bracket without closing one */ + p --; + while (g_ascii_isspace (*p) && p > src->data) { + p --; + } + p ++; + goto unbreak_tag; + } + break; + + case '(': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = '('; + br++; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + break; + + case ')': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = ')'; + br--; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + break; + + case '>': + if (depth) { + depth--; + break; + } + + if (in_q) { + break; + } +unbreak_tag: + switch (state) { + case 1: /* HTML/XML */ + lc = '>'; + in_q = state = 0; + erase = !add_html_node (task, pool, part, tbegin, p - tbegin, end - tbegin, &level_ptr); + break; + + case 2: /* PHP */ + if (!br && lc != '\"' && *(p - 1) == '?') { + in_q = state = 0; + } + break; + + case 3: + in_q = state = 0; + break; + + case 4: /* JavaScript/CSS/etc... */ + if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') { + in_q = state = 0; + } + break; + + default: + if (!erase) { + *(rp++) = c; + } + break; + } + break; + + case '"': + case '\'': + if (state == 2 && *(p - 1) != '\\') { + if (lc == c) { + lc = '\0'; + } + else if (lc != '\\') { + lc = c; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) { + if (in_q) { + in_q = 0; + } + else { + in_q = *p; + } + } + break; + + case '!': + /* JavaScript & Other HTML scripting languages */ + if (state == 1 && *(p - 1) == '<') { + state = 3; + lc = c; + } + else { + if (state == 0 && !erase) { + *(rp++) = c; + } + } + break; + + case '-': + if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') { + state = 4; + } + else { + goto reg_char; + } + break; + + case '?': + + if (state == 1 && *(p - 1) == '<') { + br = 0; + state = 2; + break; + } + + case 'E': + case 'e': + /* !DOCTYPE exception */ + if (state == 3 && p > src->data + 6 + && g_ascii_tolower (*(p - 1)) == 'p' + && g_ascii_tolower (*(p - 2)) == 'y' + && g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') { + state = 1; + break; + } + /* fall-through */ + + case 'l': + + /* swm: If we encounter '<?xml' then we shouldn't be in + * state == 2 (PHP). Switch back to HTML. + */ + + if (state == 2 && p > src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') { + state = 1; + break; + } + + /* fall-through */ + default: + reg_char: + if (state == 0 && !erase) { + *(rp++) = c; + } + break; + } + i++; + if (i < (gint)src->len) { + c = *(++p); + } + } + if (rp < buf->data + src->len) { + *rp = '\0'; + g_byte_array_set_size (buf, rp - buf->data); + } + + /* Check tag balancing */ + if (level_ptr && level_ptr->data != NULL) { + part->is_balanced = FALSE; + } + + if (stateptr) { + *stateptr = state; + } + + return buf; +} + +static void +parse_qmail_recv (rspamd_mempool_t * pool, gchar *line, struct received_header *r) +{ + gchar *s, *p, t; + + /* We are interested only with received from network headers */ + if ((p = strstr (line, "from network")) == NULL) { + r->is_error = 2; + return; + } + + p += sizeof ("from network") - 1; + while (g_ascii_isspace (*p) || *p == '[') { + p++; + } + /* format is ip/host */ + s = p; + if (*p) { + while (g_ascii_isdigit (*++p) || *p == '.'); + if (*p != '/') { + r->is_error = 1; + return; + } + else { + *p = '\0'; + r->real_ip = rspamd_mempool_strdup (pool, s); + *p = '/'; + /* Now try to parse hostname */ + s = ++p; + while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { + p++; + } + t = *p; + *p = '\0'; + r->real_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + } + } +} + +static void +parse_recv_header (rspamd_mempool_t * pool, gchar *line, struct received_header *r) +{ + gchar *p, *s, t, **res = NULL; + enum { + RSPAMD_RECV_STATE_INIT = 0, + RSPAMD_RECV_STATE_FROM, + RSPAMD_RECV_STATE_IP_BLOCK, + RSPAMD_RECV_STATE_BRACES_BLOCK, + RSPAMD_RECV_STATE_BY_BLOCK, + RSPAMD_RECV_STATE_PARSE_IP, + RSPAMD_RECV_STATE_SKIP_SPACES, + RSPAMD_RECV_STATE_ERROR + } state = RSPAMD_RECV_STATE_INIT, + next_state = RSPAMD_RECV_STATE_INIT; + gboolean is_exim = FALSE; + + g_strstrip (line); + p = line; + s = line; + + while (*p) { + switch (state) { + /* Initial state, search for from */ + case RSPAMD_RECV_STATE_INIT: + if (*p == 'f' || *p == 'F') { + if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') { + p++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_FROM; + } + } + else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { + state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + /* This can be qmail header, parse it separately */ + parse_qmail_recv (pool, line, r); + return; + } + break; + /* Read hostname */ + case RSPAMD_RECV_STATE_FROM: + if (*p == '[') { + /* This should be IP address */ + res = &r->from_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + s = ++p; + } + else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { + p++; + } + else { + t = *p; + *p = '\0'; + r->from_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + break; + /* Try to extract additional info */ + case RSPAMD_RECV_STATE_IP_BLOCK: + /* Try to extract ip or () info or by */ + if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { + p += 2; + /* Skip spaces after by */ + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BY_BLOCK; + } + else if (*p == '(') { + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + p++; + } + else if (*p == '[') { + /* Got ip before '(' so extract it */ + s = ++p; + res = &r->from_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + p++; + } + break; + /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */ + case RSPAMD_RECV_STATE_BRACES_BLOCK: + /* End of block */ + if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || + *p == '_' || *p == ':') { + p++; + } + else if (*p == '[') { + s = ++p; + state = RSPAMD_RECV_STATE_PARSE_IP; + res = &r->real_ip; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else { + if (p > s) { + /* Got some real hostname */ + /* check whether it is helo or p is not space symbol */ + if (!g_ascii_isspace (*p) || *(p + 1) != '[') { + /* Exim style ([ip]:port helo=hostname) */ + if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) { + /* Ip ending */ + is_exim = TRUE; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) { + p ++; + is_exim = TRUE; + if (r->real_hostname == NULL && r->from_hostname != NULL) { + r->real_hostname = r->from_hostname; + } + s = p; + while (*p != ')' && !g_ascii_isspace (*p) && *p != '\0') { + p ++; + } + if (p > s) { + r->from_hostname = rspamd_mempool_alloc (pool, p - s + 1); + rspamd_strlcpy (r->from_hostname, s, p - s + 1); + } + } + else if (p - s == 4 && memcmp (s, "port=", 5) == 0) { + p ++; + is_exim = TRUE; + while (g_ascii_isdigit (*p)) { + p ++; + } + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else if (*p == '=' && is_exim) { + /* Just skip unknown pairs */ + p ++; + while (!g_ascii_isspace (*p) && *p != ')' && *p != '\0') { + p ++; + } + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else { + /* skip all */ + while (*p++ != ')' && *p != '\0'); + state = RSPAMD_RECV_STATE_IP_BLOCK; + } + } + else { + /* Postfix style (hostname [ip]) */ + t = *p; + *p = '\0'; + r->real_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + /* Now parse ip */ + p += 2; + s = p; + res = &r->real_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + continue; + } + if (*p == ')') { + p ++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + } + else if (*p == ')') { + p ++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + r->is_error = 1; + return; + } + } + break; + /* Got by word */ + case RSPAMD_RECV_STATE_BY_BLOCK: + /* Here can be only hostname */ + if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-' + || *p == '_') && p[1] != '\0') { + p++; + } + else { + /* We got something like hostname */ + if (p[1] != '\0') { + t = *p; + *p = '\0'; + r->by_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + } + else { + r->by_hostname = rspamd_mempool_strdup (pool, s); + } + /* Now end of parsing */ + if (is_exim) { + /* Adjust for exim received */ + if (r->real_ip == NULL && r->from_ip != NULL) { + r->real_ip = r->from_ip; + } + else if (r->from_ip == NULL && r->real_ip != NULL) { + r->from_ip = r->real_ip; + if (r->real_hostname == NULL && r->from_hostname != NULL) { + r->real_hostname = r->from_hostname; + } + } + } + return; + } + break; + + /* Extract ip */ + case RSPAMD_RECV_STATE_PARSE_IP: + while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') { + p ++; + } + if (*p != ']') { + /* Not an ip in fact */ + state = RSPAMD_RECV_STATE_SKIP_SPACES; + p++; + } + else { + *p = '\0'; + *res = rspamd_mempool_strdup (pool, s); + *p = ']'; + p++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + } + break; + + /* Skip spaces */ + case RSPAMD_RECV_STATE_SKIP_SPACES: + if (!g_ascii_isspace (*p)) { + state = next_state; + s = p; + } + else { + p++; + } + break; + default: + r->is_error = 1; + return; + break; + } + } + + r->is_error = 1; + return; +} + +/* Convert raw headers to a list of struct raw_header * */ +static void +process_raw_headers (struct rspamd_task *task) +{ + struct raw_header *new = NULL, *lp; + gchar *p, *c, *tmp, *tp; + gint state = 0, l, next_state = 100, err_state = 100, t_state; + gboolean valid_folding = FALSE; + + p = task->raw_headers_str; + c = p; + while (*p) { + /* FSM for processing headers */ + switch (state) { + case 0: + /* Begin processing headers */ + if (!g_ascii_isalpha (*p)) { + /* We have some garbage at the beginning of headers, skip this line */ + state = 100; + next_state = 0; + } + else { + state = 1; + c = p; + } + break; + case 1: + /* We got something like header's name */ + if (*p == ':') { + new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct raw_header)); + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->name = tmp; + new->empty_separator = TRUE; + p ++; + state = 2; + c = p; + } + else if (g_ascii_isspace (*p)) { + /* Not header but some garbage */ + state = 100; + next_state = 0; + } + else { + p ++; + } + break; + case 2: + /* We got header's name, so skip any \t or spaces */ + if (*p == '\t') { + new->tab_separated = TRUE; + new->empty_separator = FALSE; + p ++; + } + else if (*p == ' ') { + new->empty_separator = FALSE; + p ++; + } + else if (*p == '\n' || *p == '\r') { + /* Process folding */ + state = 99; + l = p - c; + if (l > 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + next_state = 3; + err_state = 5; + c = p; + } + else { + /* Process value */ + l = p - c; + if (l >= 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + c = p; + state = 3; + } + break; + case 3: + if (*p == '\r' || *p == '\n') { + /* Hold folding */ + state = 99; + next_state = 3; + err_state = 4; + } + else if (*(p + 1) == '\0') { + state = 4; + } + else { + p ++; + } + break; + case 4: + /* Copy header's value */ + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + tp = tmp; + t_state = 0; + while (l --) { + if (t_state == 0) { + /* Before folding */ + if (*c == '\n' || *c == '\r') { + t_state = 1; + c ++; + *tp ++ = ' '; + } + else { + *tp ++ = *c ++; + } + } + else if (t_state == 1) { + /* Inside folding */ + if (g_ascii_isspace (*c)) { + c++; + } + else { + t_state = 0; + *tp ++ = *c ++; + } + } + } + /* Strip last space that can be added by \r\n parsing */ + if (*(tp - 1) == ' ') { + tp --; + } + *tp = '\0'; + new->value = tmp; + new->next = NULL; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } + debug_task ("add raw header %s: %s", new->name, new->value); + state = 0; + break; + case 5: + /* Header has only name, no value */ + new->next = NULL; + new->value = ""; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } + state = 0; + debug_task ("add raw header %s: %s", new->name, new->value); + break; + case 99: + /* Folding state */ + if (*(p + 1) == '\0') { + state = err_state; + } + else { + if (*p == '\r' || *p == '\n') { + p ++; + valid_folding = FALSE; + } + else if (*p == '\t' || *p == ' ') { + /* Valid folding */ + p ++; + valid_folding = TRUE; + } + else { + if (valid_folding) { + debug_task ("go to state: %d->%d", state, next_state); + state = next_state; + } + else { + /* Fall back */ + debug_task ("go to state: %d->%d", state, err_state); + state = err_state; + } + } + } + break; + case 100: + /* Fail state, skip line */ + if (*p == '\r') { + if (*(p + 1) == '\n') { + p ++; + } + p ++; + state = next_state; + } + else if (*p == '\n') { + if (*(p + 1) == '\r') { + p ++; + } + p ++; + state = next_state; + } + else if (*(p + 1) == '\0') { + state = next_state; + p ++; + } + else { + p ++; + } + break; + } + } +} + +static void +free_byte_array_callback (void *pointer) +{ + GByteArray *arr = (GByteArray *) pointer; + g_byte_array_free (arr, TRUE); +} + +static GByteArray * +convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part) +{ + GError *err = NULL; + gsize read_bytes, write_bytes; + const gchar *charset; + gchar *res_str; + GByteArray *result_array; + + if (task->cfg->raw_mode) { + text_part->is_raw = TRUE; + return part_content; + } + + if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { + text_part->is_raw = TRUE; + return part_content; + } + + if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { + if (g_utf8_validate (part_content->data, part_content->len, NULL)) { + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + return part_content; + } + else { + msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); + text_part->is_raw = TRUE; + return part_content; + } + } + + res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); + if (res_str == NULL) { + msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem"); + text_part->is_raw = TRUE; + return part_content; + } + + result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); + result_array->data = res_str; + result_array->len = write_bytes; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, res_str); + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + + return result_array; +} + +static void +process_text_part (struct rspamd_task *task, GByteArray *part_content, GMimeContentType *type, + GMimeObject *part, GMimeObject *parent, gboolean is_empty) +{ + struct mime_text_part *text_part; + const gchar *cd; + + /* Skip attachements */ +#ifndef GMIME24 + cd = g_mime_part_get_content_disposition (GMIME_PART (part)); + if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#else + cd = g_mime_object_get_disposition (GMIME_OBJECT (part)); + if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#endif + + if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) { + debug_task ("got urls from text/html part"); + + text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); + text_part->is_html = TRUE; + if (is_empty) { + text_part->is_empty = TRUE; + text_part->orig = NULL; + text_part->content = NULL; + task->text_parts = g_list_prepend (task->text_parts, text_part); + return; + } + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); + text_part->is_balanced = TRUE; + text_part->html_nodes = NULL; + text_part->parent = parent; + + text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL); + + if (text_part->html_nodes == NULL) { + url_parse_text (task->task_pool, task, text_part, FALSE); + } + else { + decode_entitles (text_part->content->data, &text_part->content->len); + url_parse_text (task->task_pool, task, text_part, FALSE); +#if 0 + url_parse_text (task->task_pool, task, text_part, TRUE); +#endif + } + + fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content); + task->text_parts = g_list_prepend (task->text_parts, text_part); + } + else if (g_mime_content_type_is_type (type, "text", "*")) { + debug_task ("got urls from text/plain part"); + + text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); + text_part->is_html = FALSE; + text_part->parent = parent; + if (is_empty) { + text_part->is_empty = TRUE; + text_part->orig = NULL; + text_part->content = NULL; + task->text_parts = g_list_prepend (task->text_parts, text_part); + return; + } + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); + text_part->content = text_part->orig; + url_parse_text (task->task_pool, task, text_part, FALSE); + fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); + task->text_parts = g_list_prepend (task->text_parts, text_part); + } +} + +#ifdef GMIME24 +static void +mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data) +#else +static void +mime_foreach_callback (GMimeObject * part, gpointer user_data) +#endif +{ + struct rspamd_task *task = (struct rspamd_task *)user_data; + struct mime_part *mime_part; + GMimeContentType *type; + GMimeDataWrapper *wrapper; + GMimeStream *part_stream; + GByteArray *part_content; + + task->parts_count++; + + /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ + + /* find out what class 'part' is... */ + if (GMIME_IS_MESSAGE_PART (part)) { + /* message/rfc822 or message/news */ + GMimeMessage *message; + + /* g_mime_message_foreach_part() won't descend into + child message parts, so if we want to count any + subparts of this child message, we'll have to call + g_mime_message_foreach_part() again here. */ + + message = g_mime_message_part_get_message ((GMimeMessagePart *) part); + if (task->parser_recursion++ < RECURSION_LIMIT) { +#ifdef GMIME24 + g_mime_message_foreach (message, mime_foreach_callback, task); +#else + g_mime_message_foreach_part (message, mime_foreach_callback, task); +#endif + } + else { + msg_err ("endless recursion detected: %d", task->parser_recursion); + return; + } +#ifndef GMIME24 + g_object_unref (message); +#endif + } + else if (GMIME_IS_MESSAGE_PARTIAL (part)) { + /* message/partial */ + + /* this is an incomplete message part, probably a + large message that the sender has broken into + smaller parts and is sending us bit by bit. we + could save some info about it so that we could + piece this back together again once we get all the + parts? */ + } + else if (GMIME_IS_MULTIPART (part)) { + /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ + task->parser_parent_part = part; +#ifndef GMIME24 + debug_task ("detected multipart part"); + /* we'll get to finding out if this is a signed/encrypted multipart later... */ + if (task->parser_recursion++ < RECURSION_LIMIT) { + g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task); + } + else { + msg_err ("endless recursion detected: %d", task->parser_recursion); + return; + } +#endif + } + else if (GMIME_IS_PART (part)) { + /* a normal leaf part, could be text/plain or image/jpeg etc */ +#ifdef GMIME24 + type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part)); +#else + type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part)); +#endif + if (type == NULL) { + msg_warn ("type of part is unknown, assume text/plain"); + type = g_mime_content_type_new ("text", "plain"); +#ifdef GMIME24 + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, type); +#else + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type); +#endif + } + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); +#ifdef GMIME24 + if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) { +#else + if (wrapper != NULL) { +#endif + part_stream = g_mime_stream_mem_new (); + if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE); + part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); + g_object_unref (part_stream); + mime_part = rspamd_mempool_alloc (task->task_pool, sizeof (struct mime_part)); + mime_part->type = type; + mime_part->content = part_content; + mime_part->parent = task->parser_parent_part; + mime_part->filename = g_mime_part_get_filename (GMIME_PART (part)); + debug_task ("found part with content-type: %s/%s", type->type, type->subtype); + task->parts = g_list_prepend (task->parts, mime_part); + /* Skip empty parts */ + process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0)); + } + else { + msg_warn ("write to stream failed: %d, %s", errno, strerror (errno)); + } +#ifndef GMIME24 + g_object_unref (wrapper); +#endif + } + else { + msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype); + } + } + else { + g_assert_not_reached (); + } +} + +static void +destroy_message (void *pointer) +{ + GMimeMessage *msg = pointer; + + msg_debug ("freeing pointer %p", msg); + g_object_unref (msg); +} + +gint +process_message (struct rspamd_task *task) +{ + GMimeMessage *message; + GMimeParser *parser; + GMimeStream *stream; + GByteArray *tmp; + GList *first, *cur; + GMimePart *part; + GMimeDataWrapper *wrapper; + struct received_header *recv; + gchar *mid, *url_str, *p, *end, *url_end; + struct uri *subject_url; + gsize len; + gint rc; + + tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); + tmp->data = task->msg->str; + tmp->len = task->msg->len; + + stream = g_mime_stream_mem_new_with_byte_array (tmp); + /* + * This causes g_mime_stream not to free memory by itself as it is memory allocated by + * pool allocator + */ + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); + + if (task->is_mime) { + + debug_task ("construct mime parser from string length %d", (gint)task->msg->len); + /* create a new parser object to parse the stream */ + parser = g_mime_parser_new_with_stream (stream); + g_object_unref (stream); + + /* parse the message from the stream */ + message = g_mime_parser_construct_message (parser); + + if (message == NULL) { + msg_warn ("cannot construct mime from stream"); + return -1; + } + + task->message = message; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); + + /* Save message id for future use */ + task->message_id = g_mime_message_get_message_id (task->message); + if (task->message_id == NULL) { + task->message_id = "undef"; + } + + task->parser_recursion = 0; +#ifdef GMIME24 + g_mime_message_foreach (message, mime_foreach_callback, task); +#else + /* + * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback + * so we need to set up parent part by hands + */ + task->parser_parent_part = g_mime_message_get_mime_part (message); + g_object_unref (task->parser_parent_part); + g_mime_message_foreach_part (message, mime_foreach_callback, task); +#endif + + debug_task ("found %d parts in message", task->parts_count); + if (task->queue_id == NULL) { + task->queue_id = "undef"; + } + +#ifdef GMIME24 + task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); +#else + task->raw_headers_str = g_mime_message_get_headers (task->message); +#endif + + process_images (task); + + /* Parse received headers */ + first = message_get_header (task->task_pool, message, "Received", FALSE); + cur = first; + while (cur) { + recv = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header)); + parse_recv_header (task->task_pool, cur->data, recv); + task->received = g_list_prepend (task->received, recv); + cur = g_list_next (cur); + } + if (first) { + g_list_free (first); + } + + if (task->raw_headers_str) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, task->raw_headers_str); + process_raw_headers (task); + } + + task->rcpts = g_mime_message_get_all_recipients (message); + if (task->rcpts) { +#ifdef GMIME24 + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, task->rcpts); +#else + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) internet_address_list_destroy, task->rcpts); +#endif + } + + + /* free the parser (and the stream) */ + g_object_unref (parser); + } + else { + /* We got only message, no mime headers or anything like this */ + /* Construct fake message for it */ + task->message = g_mime_message_new (TRUE); + if (task->from) { + g_mime_message_set_sender (task->message, task->from); + } + /* Construct part for it */ + part = g_mime_part_new_with_type ("text", "html"); +#ifdef GMIME24 + wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT); +#else + wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT); +#endif + g_mime_part_set_content_object (part, wrapper); + g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part)); + /* Register destructors */ + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, wrapper); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, part); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); + /* Now parse in a normal way */ + task->parser_recursion = 0; +#ifdef GMIME24 + g_mime_message_foreach (task->message, mime_foreach_callback, task); +#else + g_mime_message_foreach_part (task->message, mime_foreach_callback, task); +#endif + /* Generate message ID */ + mid = g_mime_utils_generate_message_id ("localhost.localdomain"); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid); + g_mime_message_set_message_id (task->message, mid); + task->message_id = mid; + task->queue_id = mid; + /* Set headers for message */ + if (task->subject) { + g_mime_message_set_subject (task->message, task->subject); + } + + /* Add recipients */ +#ifndef GMIME24 + if (task->rcpt) { + cur = task->rcpt; + while (cur) { + g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data); + cur = g_list_next (cur); + } + } +#endif + } + + /* Parse urls inside Subject header */ + cur = message_get_header (task->task_pool, task->message, "Subject", FALSE); + if (cur) { + p = cur->data; + len = strlen (p); + end = p + len; + + while (p < end) { + /* Search to the end of url */ + if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) { + if (url_str != NULL) { + subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); + if (subject_url != NULL) { + /* Try to parse url */ + rc = parse_uri (subject_url, url_str, task->task_pool); + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && + subject_url->hostlen > 0) { + if (subject_url->protocol != PROTOCOL_MAILTO) { + if (!g_tree_lookup (task->urls, subject_url)) { + g_tree_insert (task->urls, subject_url, subject_url); + } + } + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + } + else { + break; + } + p = url_end + 1; + } + /* Free header's list */ + g_list_free (cur); + } + + return 0; +} + +struct gmime_raw_header { + struct raw_header *next; + gchar *name; + gchar *value; +}; + +typedef struct _GMimeHeader { + GHashTable *hash; + GHashTable *writers; + struct raw_header *headers; +} local_GMimeHeader; + + +/* known header field types */ +enum { + HEADER_FROM = 0, + HEADER_REPLY_TO, + HEADER_TO, + HEADER_CC, + HEADER_BCC, + HEADER_SUBJECT, + HEADER_DATE, + HEADER_MESSAGE_ID, + HEADER_UNKNOWN +}; + +/* + * Iterate throught all headers and make a list + */ +#ifndef GMIME24 +static void +header_iterate (rspamd_mempool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong) +{ + while (h) { + if (G_LIKELY (!strong)) { + if (h->value && !g_ascii_strncasecmp (field, h->name, strlen (field))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); + } + else { + *ret = g_list_prepend (*ret, g_strdup (h->value)); + } + } + } + else { + if (h->value && !strncmp (field, h->name, strlen (field))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); + } + else { + *ret = g_list_prepend (*ret, g_strdup (h->value)); + } + } + } + h = (struct gmime_raw_header *)h->next; + } +} +#else +static void +header_iterate (rspamd_mempool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong) +{ + /* Use iterator in case of gmime 2.4 */ + GMimeHeaderIter *iter; + const gchar *name; + + if (ls == NULL) { + *ret = NULL; + return; + } + + iter = g_mime_header_iter_new (); + if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) { + /* Iterate throught headers */ + while (g_mime_header_iter_is_valid (iter)) { + name = g_mime_header_iter_get_name (iter); + if (G_LIKELY (!strong)) { + if (!g_ascii_strncasecmp (field, name, strlen (name))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); + } + else { + *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); + } + } + } + else { + if (!strncmp (field, name, strlen (name))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); + } + else { + *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); + } + } + } + if (!g_mime_header_iter_next (iter)) { + break; + } + } + } + g_mime_header_iter_free (iter); +} +#endif + + +struct multipart_cb_data { + GList *ret; + rspamd_mempool_t *pool; + const gchar *field; + gboolean try_search; + gboolean strong; + gint rec; +}; + +#define MAX_REC 10 + +static void +#ifdef GMIME24 +multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data) +#else +multipart_iterate (GMimeObject * part, gpointer user_data) +#endif +{ + struct multipart_cb_data *data = user_data; +#ifndef GMIME24 + struct gmime_raw_header *h; +#endif + GList *l = NULL; + + if (data->try_search && part != NULL && GMIME_IS_PART (part)) { +#ifdef GMIME24 + GMimeHeaderList *ls; + + ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); + header_iterate (data->pool, ls, &l, data->field, data->strong); +#else + h = (struct gmime_raw_header *)part->headers->headers; + header_iterate (data->pool, h, &l, data->field, data->strong); +#endif + if (l == NULL) { + /* Header not found, abandon search results */ + data->try_search = FALSE; + g_list_free (data->ret); + data->ret = NULL; + } + else { + data->ret = g_list_concat (l, data->ret); + } + } + else if (data->try_search && GMIME_IS_MULTIPART (part)) { + /* Maybe endless recursion here ? */ + if (data->rec++ < MAX_REC) { + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data); + } + else { + msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec); + data->try_search = FALSE; + } + } +} + +static GList * +local_message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) +{ + GList *gret = NULL; + GMimeObject *part; + struct multipart_cb_data cb = { + .try_search = TRUE, + .rec = 0, + .ret = NULL, + }; + cb.pool = pool; + cb.field = field; + cb.strong = strong; + +#ifndef GMIME24 + struct gmime_raw_header *h; + + if (field == NULL) { + return NULL; + } + + msg_debug ("iterate over headers to find header %s", field); + h = (struct gmime_raw_header *) (GMIME_OBJECT (message)->headers->headers); + header_iterate (pool, h, &gret, field, strong); + + if (gret == NULL) { + /* Try to iterate with mime part headers */ + msg_debug ("iterate over headers of mime part to find header %s", field); + part = g_mime_message_get_mime_part (message); + if (part) { + h = (struct gmime_raw_header *)part->headers->headers; + header_iterate (pool, h, &gret, field, strong); + if (gret == NULL && GMIME_IS_MULTIPART (part)) { + msg_debug ("iterate over headers of each multipart's subparts %s", field); + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); + if (cb.ret != NULL) { + gret = cb.ret; + } + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + } + + return gret; +#else + GMimeHeaderList *ls; + + ls = g_mime_object_get_header_list (GMIME_OBJECT (message)); + header_iterate (pool, ls, &gret, field, strong); + if (gret == NULL) { + /* Try to iterate with mime part headers */ + part = g_mime_message_get_mime_part (message); + if (part) { + ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); + header_iterate (pool, ls, &gret, field, strong); + if (gret == NULL && GMIME_IS_MULTIPART (part)) { + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); + if (cb.ret != NULL) { + gret = cb.ret; + } + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + } + + + return gret; +#endif +} + +/** +* g_mime_message_set_date_from_string: Set the message sent-date +* @message: MIME Message +* @string: A string of date +* +* Set the sent-date on a MIME Message. +**/ +void +local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string) +{ + time_t date; + gint offset = 0; + + date = g_mime_utils_header_decode_date (string, &offset); + g_mime_message_set_date (message, date, offset); +} + +/* + * Replacements for standart gmime functions but converting adresses to IA + */ +static const gchar * +local_message_get_sender (GMimeMessage * message) +{ + gchar *res; + const gchar *from = g_mime_message_get_sender (message); + InternetAddressList *ia; + +#ifndef GMIME24 + ia = internet_address_parse_string (from); +#else + ia = internet_address_list_parse_string (from); +#endif + if (!ia) { + return NULL; + } + res = internet_address_list_to_string (ia, FALSE); +#ifndef GMIME24 + internet_address_list_destroy (ia); +#else + g_object_unref (ia); +#endif + + return res; +} + +static const gchar * +local_message_get_reply_to (GMimeMessage * message) +{ + gchar *res; + const gchar *from = g_mime_message_get_reply_to (message); + InternetAddressList *ia; + +#ifndef GMIME24 + ia = internet_address_parse_string (from); +#else + ia = internet_address_list_parse_string (from); +#endif + if (!ia) { + return NULL; + } + res = internet_address_list_to_string (ia, FALSE); +#ifndef GMIME24 + internet_address_list_destroy (ia); +#else + g_object_unref (ia); +#endif + + return res; +} + +#ifdef GMIME24 + +# define ADD_RECIPIENT_TEMPLATE(type,def) \ +static void \ +local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value) \ +{ \ + InternetAddressList *il, *new; \ + \ + il = g_mime_message_get_recipients (message, (def)); \ + new = internet_address_list_parse_string (string); \ + internet_address_list_append (il, new); \ +} \ + +ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) + ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) + ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) +# define GET_RECIPIENT_TEMPLATE(type,def) \ +static InternetAddressList* \ +local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused) \ +{ \ + return g_mime_message_get_recipients (message, (def)); \ +} + GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) + GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) + GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) +#endif +/* different declarations for different types of set and get functions */ + typedef const gchar *(*GetFunc) (GMimeMessage * message); + typedef InternetAddressList *(*GetRcptFunc) (GMimeMessage * message, const gchar *type); + typedef GList *(*GetListFunc) (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong); + typedef void (*SetFunc) (GMimeMessage * message, const gchar *value); + typedef void (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value); + +/** different types of functions +* +* FUNC_CHARPTR +* - function with no arguments +* - get returns gchar* +* +* FUNC_IA (from Internet Address) +* - function with additional "field" argument from the fieldfunc table, +* - get returns Glist* +* +* FUNC_LIST +* - function with additional "field" argument (given arbitrary header field name) +* - get returns Glist* +**/ + enum { + FUNC_CHARPTR = 0, + FUNC_CHARFREEPTR, + FUNC_IA, + FUNC_LIST + }; + +/** +* fieldfunc struct: structure of MIME fields and corresponding get and set +* functions. +**/ + static struct { + gchar *name; + GetFunc func; + GetRcptFunc rcptfunc; + GetListFunc getlistfunc; + SetFunc setfunc; + SetListFunc setlfunc; + gint functype; + } fieldfunc[] = +{ + { + "From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, { + "Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR}, +#ifndef GMIME24 + { + "To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, +#else + { + "To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, { + "Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, { + "Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, { + "Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, +#endif + { + "Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, { + "Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR}, +#ifndef GMIME24 + { + NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST} +#else + { + NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST} +#endif +}; + +/** +* message_set_header: set header of any type excluding special (Content- and MIME-Version:) +**/ +void +message_set_header (GMimeMessage * message, const gchar *field, const gchar *value) +{ + gint i; + + if (!g_ascii_strcasecmp (field, "MIME-Version:") || !g_ascii_strncasecmp (field, "Content-", 8)) { + return; + } + for (i = 0; i <= HEADER_UNKNOWN; ++i) { + if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { + switch (fieldfunc[i].functype) { + case FUNC_CHARPTR: + (*(fieldfunc[i].setfunc)) (message, value); + break; + case FUNC_IA: + (*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value); + break; + case FUNC_LIST: + (*(fieldfunc[i].setlfunc)) (message, field, value); + break; + } + break; + } + } +} + + +/** +* message_get_header: returns the list of 'any header' values +* (except of unsupported yet Content- and MIME-Version special headers) +* +* You should free the GList list by yourself. +**/ +GList * +message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) +{ + gint i; + gchar *ret = NULL, *ia_string; + GList *gret = NULL; + InternetAddressList *ia_list = NULL, *ia; + + for (i = 0; i <= HEADER_UNKNOWN; ++i) { + if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { + switch (fieldfunc[i].functype) { + case FUNC_CHARFREEPTR: + ret = (gchar *)(*(fieldfunc[i].func)) (message); + break; + case FUNC_CHARPTR: + ret = (gchar *)(*(fieldfunc[i].func)) (message); + break; + case FUNC_IA: + ia_list = (*(fieldfunc[i].rcptfunc)) (message, field); + ia = ia_list; +#ifndef GMIME24 + while (ia && ia->address) { + + ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE); + if (pool != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); + } + gret = g_list_prepend (gret, ia_string); + ia = ia->next; + } +#else + i = internet_address_list_length (ia); + while (--i >= 0) { + ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE); + if (pool != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); + } + gret = g_list_prepend (gret, ia_string); + } +#endif + break; + case FUNC_LIST: + gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong); + break; + } + break; + } + } + if (gret == NULL && ret != NULL) { + if (pool != NULL) { + gret = g_list_prepend (gret, rspamd_mempool_strdup (pool, ret)); + } + else { + gret = g_list_prepend (gret, g_strdup (ret)); + } + } + if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) { + g_free (ret); + } + + return gret; +} + +GList* +message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong) +{ + GList *gret = NULL; + struct raw_header *rh; + + rh = g_hash_table_lookup (task->raw_headers, field); + + if (rh == NULL) { + return NULL; + } + + while (rh) { + if (strong) { + if (strcmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + else { + if (g_ascii_strcasecmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + rh = rh->next; + } + + if (gret != NULL) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, gret); + } + + return gret; +} |