123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "libmime/content_type.h"
- #include "smtp_parsers.h"
- #include "utlist.h"
-
- void
- rspamd_content_type_add_param (rspamd_mempool_t *pool,
- struct rspamd_content_type *ct,
- const gchar *name_start, const gchar *name_end,
- const gchar *value_start, const gchar *value_end)
- {
- rspamd_ftok_t srch;
- struct rspamd_content_type_param *found = NULL, *nparam;
-
- g_assert (ct != NULL);
-
- srch.begin = name_start;
- srch.len = name_end - name_start;
-
- if (ct->attrs) {
- found = g_hash_table_lookup (ct->attrs, &srch);
- }
- else {
- ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal);
- }
-
- nparam = rspamd_mempool_alloc (pool, sizeof (*nparam));
- nparam->name.begin = name_start;
- nparam->name.len = name_end - name_start;
- nparam->value.begin = value_start;
- nparam->value.len = value_end - value_start;
-
- if (!found) {
- DL_APPEND (found, nparam);
- g_hash_table_insert (ct->attrs, &nparam->name, nparam);
- }
- else {
- DL_APPEND (found, nparam);
- }
-
- RSPAMD_FTOK_ASSIGN (&srch, "charset");
-
- if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
- /* Adjust charset */
- ct->charset.begin = nparam->value.begin;
- ct->charset.len = nparam->value.len;
- }
-
- RSPAMD_FTOK_ASSIGN (&srch, "boundary");
-
- if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
- /* Adjust boundary */
- ct->boundary.begin = nparam->value.begin;
- ct->boundary.len = nparam->value.len;
- }
- }
-
- static struct rspamd_content_type *
- rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
- {
- guint obraces = 0, ebraces = 0, qlen = 0;
- const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
- struct rspamd_content_type *res = NULL, val;
- gboolean eqsign_seen = FALSE;
- enum {
- parse_type,
- parse_subtype,
- parse_after_subtype,
- parse_param_name,
- parse_param_after_name,
- parse_param_value,
- parse_param_value_after_quote,
- parse_space,
- parse_quoted,
- parse_comment,
- } state = parse_space, next_state = parse_type;
-
- p = in;
- c = p;
- end = p + len;
- memset (&val, 0, sizeof (val));
- val.lc_data = (gchar *)in;
-
- while (p < end) {
- switch (state) {
- case parse_type:
- if (g_ascii_isspace (*p) || *p == ';') {
- /* We have type without subtype */
- val.type.begin = c;
- val.type.len = p - c;
- state = parse_after_subtype;
- } else if (*p == '/') {
- val.type.begin = c;
- val.type.len = p - c;
- state = parse_space;
- next_state = parse_subtype;
- p++;
- } else {
- p++;
- }
- break;
- case parse_subtype:
- if (g_ascii_isspace (*p) || *p == ';') {
- val.subtype.begin = c;
- val.subtype.len = p - c;
- state = parse_after_subtype;
- } else {
- p++;
- }
- break;
- case parse_after_subtype:
- if (*p == ';' || g_ascii_isspace (*p)) {
- p++;
- } else if (*p == '(') {
- c = p;
- state = parse_comment;
- next_state = parse_param_name;
- obraces = 1;
- ebraces = 0;
- pname_start = NULL;
- pname_end = NULL;
- eqsign_seen = FALSE;
- p++;
- } else {
- c = p;
- state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- eqsign_seen = FALSE;
- }
- break;
- case parse_param_name:
- if (*p == '=') {
- pname_start = c;
- pname_end = p;
- state = parse_param_after_name;
- eqsign_seen = TRUE;
- p++;
- } else if (g_ascii_isspace (*p)) {
- pname_start = c;
- pname_end = p;
- state = parse_param_after_name;
- } else {
- p++;
- }
- break;
- case parse_param_after_name:
- if (g_ascii_isspace (*p)) {
- p++;
- } else if (*p == '=') {
- if (eqsign_seen) {
- /* Treat as value start */
- c = p;
- eqsign_seen = FALSE;
- state = parse_space;
- next_state = parse_param_value;
- p++;
- } else {
- eqsign_seen = TRUE;
- p++;
- }
- } else {
- if (eqsign_seen) {
- state = parse_param_value;
- c = p;
- } else {
- /* Invalid parameter without value */
- c = p;
- state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- }
- }
- break;
- case parse_param_value:
- if (*p == '"') {
- p++;
- c = p;
- state = parse_quoted;
- next_state = parse_param_value_after_quote;
- } else if (g_ascii_isspace (*p)) {
- if (pname_start && pname_end && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, &val, pname_start,
- pname_end, c, p);
-
- }
-
- state = parse_space;
- next_state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- } else if (*p == '(') {
- if (pname_start && pname_end && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, &val, pname_start,
- pname_end, c, p);
- }
-
- obraces = 1;
- ebraces = 0;
- p++;
- state = parse_comment;
- next_state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- } else {
- p++;
- }
- break;
- case parse_param_value_after_quote:
- if (pname_start && pname_end && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, &val, pname_start,
- pname_end, c, c + qlen);
- }
-
- if (g_ascii_isspace (*p)) {
- state = parse_space;
- next_state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- } else if (*p == '(') {
- obraces = 1;
- ebraces = 0;
- p++;
- state = parse_comment;
- next_state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- } else {
- state = parse_param_name;
- pname_start = NULL;
- pname_end = NULL;
- c = p;
- }
- break;
- case parse_quoted:
- if (*p == '\\') {
- /* Quoted pair */
- if (p + 1 < end) {
- p += 2;
- } else {
- p++;
- }
- } else if (*p == '"') {
- qlen = p - c;
- state = next_state;
- } else {
- p++;
- }
- break;
- case parse_comment:
- if (*p == '(') {
- obraces++;
- p++;
- } else if (*p == ')') {
- ebraces++;
- p++;
-
- if (ebraces == obraces && p < end) {
- if (g_ascii_isspace (*p)) {
- state = parse_space;
- } else {
- c = p;
- state = next_state;
- }
- }
- } else {
- p++;
- }
- break;
- case parse_space:
- if (g_ascii_isspace (*p)) {
- p++;
- } else if (*p == '(') {
- obraces = 1;
- ebraces = 0;
- p++;
- state = parse_comment;
- } else {
- c = p;
- state = next_state;
- }
- break;
- }
- }
-
- /* Process leftover */
- switch (state) {
- case parse_type:
- val.type.begin = c;
- val.type.len = p - c;
- break;
- case parse_subtype:
- val.subtype.begin = c;
- val.subtype.len = p - c;
- break;
- case parse_param_value:
- if (pname_start && pname_end && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, &val, pname_start,
- pname_end, c, p);
-
- }
- case parse_param_value_after_quote:
- if (pname_start && pname_end && pname_end > pname_start) {
- rspamd_content_type_add_param (pool, &val, pname_start,
- pname_end, c, c + qlen);
- }
- break;
- default:
- break;
- }
-
- if (val.type.len > 0) {
- res = rspamd_mempool_alloc (pool, sizeof (val));
- memcpy (res, &val, sizeof (val));
- }
-
- return res;
- }
-
- struct rspamd_content_type *
- rspamd_content_type_parse (const gchar *in,
- gsize len, rspamd_mempool_t *pool)
- {
- struct rspamd_content_type *res = NULL;
- rspamd_ftok_t srch;
- gchar *lc_data;
-
- lc_data = rspamd_mempool_alloc (pool, len);
- memcpy (lc_data, in, len);
- rspamd_str_lc (lc_data, len);
-
- if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) {
- if (res->attrs) {
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
- }
-
- /* Now do some hacks to work with broken content types */
- if (res->subtype.len == 0) {
- res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
- RSPAMD_FTOK_ASSIGN (&srch, "text");
-
- if (rspamd_ftok_cmp (&res->type, &srch) == 0) {
- /* Workaround for Content-Type: text */
- /* Assume text/plain */
- RSPAMD_FTOK_ASSIGN (&srch, "plain");
- }
- else {
- RSPAMD_FTOK_ASSIGN (&srch, "html");
-
- if (rspamd_ftok_cmp (&res->type, &srch) == 0) {
- /* Workaround for Content-Type: html */
- RSPAMD_FTOK_ASSIGN (&res->type, "text");
- RSPAMD_FTOK_ASSIGN (&res->subtype, "html");
- }
- else {
- RSPAMD_FTOK_ASSIGN (&srch, "application");
-
- if (rspamd_ftok_cmp (&res->type, &srch) == 0) {
- RSPAMD_FTOK_ASSIGN (&res->subtype, "octet-stream");
- }
- }
- }
- }
- else {
- /* Common mistake done by retards */
- RSPAMD_FTOK_ASSIGN (&srch, "alternate");
-
- if (rspamd_ftok_cmp (&res->subtype, &srch) == 0) {
- res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
- RSPAMD_FTOK_ASSIGN (&res->subtype, "alternative");
- }
- }
-
- RSPAMD_FTOK_ASSIGN (&srch, "multipart");
-
- if (rspamd_ftok_cmp (&res->type, &srch) == 0) {
- res->flags |= RSPAMD_CONTENT_TYPE_MULTIPART;
- }
- else {
- RSPAMD_FTOK_ASSIGN (&srch, "text");
-
- if (rspamd_ftok_cmp (&res->type, &srch) == 0) {
- res->flags |= RSPAMD_CONTENT_TYPE_TEXT;
- }
- else {
- RSPAMD_FTOK_ASSIGN (&srch, "message");
-
- if (rspamd_ftok_cmp (&res->type, &srch) == 0) {
- RSPAMD_FTOK_ASSIGN (&srch, "delivery-status");
-
- if (rspamd_ftok_cmp (&res->subtype, &srch) == 0) {
- res->flags |= RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_DSN;
- }
- else {
- res->flags |= RSPAMD_CONTENT_TYPE_MESSAGE;
- }
- }
- }
- }
- }
- else {
- msg_warn_pool ("cannot parse content type: %*s", (gint)len, lc_data);
- }
-
- return res;
- }
-
- void
- rspamd_content_disposition_add_param (rspamd_mempool_t *pool,
- struct rspamd_content_disposition *cd,
- const gchar *name_start, const gchar *name_end,
- const gchar *value_start, const gchar *value_end)
- {
- rspamd_ftok_t srch;
- gchar *decoded;
- struct rspamd_content_type_param *found = NULL, *nparam;
-
- g_assert (cd != NULL);
-
- srch.begin = name_start;
- srch.len = name_end - name_start;
-
- if (cd->attrs) {
- found = g_hash_table_lookup (cd->attrs, &srch);
- }
- else {
- cd->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal);
- }
-
- nparam = rspamd_mempool_alloc (pool, sizeof (*nparam));
- nparam->name.begin = name_start;
- nparam->name.len = name_end - name_start;
- decoded = rspamd_mime_header_decode (pool, value_start, value_end - value_start);
- RSPAMD_FTOK_FROM_STR (&nparam->value, decoded);
-
- if (!found) {
- g_hash_table_insert (cd->attrs, &nparam->name, nparam);
- }
-
- DL_APPEND (found, nparam);
-
- srch.begin = "filename";
- srch.len = 8;
-
- if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
- /* Adjust filename */
- cd->filename.begin = nparam->value.begin;
- cd->filename.len = nparam->value.len;
- }
- }
-
- struct rspamd_content_disposition *
- rspamd_content_disposition_parse (const gchar *in,
- gsize len, rspamd_mempool_t *pool)
- {
- struct rspamd_content_disposition *res = NULL, val;
-
- val.lc_data = rspamd_mempool_alloc (pool, len);
- memcpy (val.lc_data, in, len);
- rspamd_str_lc (val.lc_data, len);
-
- if (rspamd_content_disposition_parser (in, len, &val, pool)) {
- res = rspamd_mempool_alloc (pool, sizeof (val));
- memcpy (res, &val, sizeof (val));
-
- if (res->attrs) {
- rspamd_mempool_add_destructor (pool,
- (rspamd_mempool_destruct_t)g_hash_table_unref, res->attrs);
- }
- }
- else {
- msg_warn_pool ("cannot parse content disposition: %*s",
- (gint)len, val.lc_data);
- }
-
- return res;
- }
|