From: Vsevolod Stakhov Date: Mon, 12 May 2008 16:28:12 +0000 (+0400) Subject: * Add initial release of mime parser. Now can only decode base64 and quoted-printable. X-Git-Tag: 0.2.7~404 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=8dbb9984edd945090c2c8ae27f4cfede944ba912;p=rspamd.git * Add initial release of mime parser. Now can only decode base64 and quoted-printable. --- diff --git a/configure b/configure index ca1271608..672307866 100755 --- a/configure +++ b/configure @@ -17,7 +17,7 @@ LEX_SRC="cfg_file.l" YACC_OUTPUT="cfg_yacc.c" LEX_OUTPUT="cfg_lex.c" -SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c ${LEX_OUTPUT} ${YACC_OUTPUT}" +SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c mime.c ${LEX_OUTPUT} ${YACC_OUTPUT}" CFLAGS="$CFLAGS -W -Wall -Wpointer-arith -Wno-unused-parameter" CFLAGS="$CFLAGS -Wno-unused-function -Wunused-variable -Wno-sign-compare" @@ -25,7 +25,7 @@ CFLAGS="$CFLAGS -Wunused-value -ggdb -I${LOCALBASE}/include" CFLAGS="$CFLAGS -DRVERSION=\\\"${VERSION}\\\" -DHASH_COMPAT" LDFLAGS="$LDFLAGS -L/usr/lib -L${LOCALBASE}/lib" OPT_FLAGS="-O -pipe -fno-omit-frame-pointer" -DEPS="cfg_file.h memcached.h util.h main.h upstream.h fstring.h ${LEX_OUTPUT} ${YACC_OUTPUT}" +DEPS="cfg_file.h memcached.h util.h main.h upstream.h fstring.h mime.h ${LEX_OUTPUT} ${YACC_OUTPUT}" EXEC=rspamd USER=postfix GROUP=postfix diff --git a/fstring.h b/fstring.h index 30895ccc5..4c831f052 100644 --- a/fstring.h +++ b/fstring.h @@ -74,4 +74,9 @@ f_str_t* fstrgrow (f_str_t *orig, size_t newlen); */ #define fstrfree(x) free((x)->begin); free((x)) +/* + * Return specified character + */ +#define fstridx(str, pos) *((str)->begin + (pos)) + #endif diff --git a/mime.c b/mime.c new file mode 100644 index 000000000..14b23247f --- /dev/null +++ b/mime.c @@ -0,0 +1,116 @@ +#include +#include + +#include "mime.h" +#include "fstring.h" + +/* + * Quoted printable and base64 decoders for mime parser + */ + +static f_str_t * +base64decode (f_str_t *src) +{ + int bits = 0, buf = 0, padding = 0, v; + size_t pos; + char c; + f_str_t *res; + + res = fstralloc (src->len); + if (res == NULL) { + return NULL; + } + + for (pos = 0; pos < src->len; pos ++) { + c = *(src->begin + pos); + if (c >= 'A' && c <= 'Z') { + v = c - 'A'; + } + else if (c >= 'a' && c <= 'z') { + v = c - 'a' + 26; + } + else if (c >= '0' && c <= '9') { + v = c - '0' + 52; + } + else if ('+' == c) { + v = 62; + } + else if ('/' == c) { + v = 63; + } + else if ('=' == c) { + padding++; + continue; + } + else { + continue; + } + if (padding) { + padding = 0; + } + buf = buf << 6 | v; + bits += 6; + if (bits >= 8) { + c = 255 & (buf >> (bits - 8)); + fstrpush (res, c); + } + } + + return res; +} + +static f_str_t * +qpdecode (f_str_t *src, short header) +{ + f_str_t *res; + size_t pos; + char c; + + res = fstralloc (src->len); + if (res == NULL) { + return NULL; + } + + for (pos = 0; pos < src->len; pos++) { + c = *(src->begin + pos); + if (header && '_' == c) { + c = 0x20; + } + else if ('=' == c && pos + 3 <= src->len && isxdigit (fstridx (src, pos + 1)) && isxdigit (fstridx (src, pos + 2))) { + if (isdigit (fstridx (src, pos + 2))) { + if (isdigit (fstridx (src, pos + 1))) { + c = (toupper (fstridx (src, pos + 2)) - '0') | (16 * (fstridx (src, pos + 1) - '0')); + } + else { + c = (toupper (fstridx (src, pos + 2)) - '0') | (16 * (toupper (fstridx (src, pos + 1)) - 'A' + 10)); + } + } + else if (isdigit (fstridx (src, pos + 1))) { + c = (toupper (fstridx (src, pos + 2)) - 'A' + 10) | (16 * (fstridx (src, pos + 1) - '0')); + } + else { + c = (toupper (fstridx (src, pos + 2)) - 'A' + 10) | (16 * (toupper (fstridx (src, pos + 1)) - 'A' + 10)); + } + pos += 2; + } + else if ('=' == c && pos + 2 <= src->len && ('\r' == fstridx (src, pos + 1) || '\n' == fstridx (src, pos + 1))) { + if ('\r' == fstridx (src, pos + 1)) { + if (pos + 3 <= src->len && '\n' == fstridx (src, pos + 2)) { + pos ++; + } + pos ++; + } + if ('\n' == fstridx (src, pos + 1)) { + if (pos + 3 <= src->len && '\r' == fstridx (src, pos + 2)) { + pos ++; + } + pos ++; + } + continue; + } + fstrpush (res, c); + } + + return res; +} + diff --git a/mime.h b/mime.h new file mode 100644 index 000000000..cf56b780d --- /dev/null +++ b/mime.h @@ -0,0 +1,95 @@ +#ifndef MIME_H +#define MIME_H + +#include "fstring.h" +#ifndef OWN_QUEUE_H +#include +#else +#include "queue.h" +#endif + +/* + * Header types. If we reach 31, we must group the headers we need to + * remember at the beginning, or we should use fd_set bit sets. + */ +#define HDR_APPARENTLY_TO 1 +#define HDR_BCC 2 +#define HDR_CC 3 +#define HDR_CONTENT_LENGTH 4 +#define HDR_CONTENT_TRANSFER_ENCODING 5 +#define HDR_CONTENT_TYPE 6 +#define HDR_DATE 7 +#define HDR_DELIVERED_TO 8 +#define HDR_ERRORS_TO 9 +#define HDR_FROM 10 +#define HDR_MESSAGE_ID 11 +#define HDR_RECEIVED 12 +#define HDR_REPLY_TO 13 +#define HDR_RESENT_BCC 14 +#define HDR_RESENT_CC 15 +#define HDR_RESENT_DATE 16 +#define HDR_RESENT_FROM 17 +#define HDR_RESENT_MESSAGE_ID 18 +#define HDR_RESENT_REPLY_TO 19 +#define HDR_RESENT_SENDER 20 +#define HDR_RESENT_TO 21 +#define HDR_RETURN_PATH 22 +#define HDR_RETURN_RECEIPT_TO 23 +#define HDR_SENDER 24 +#define HDR_TO 25 +#define HDR_MAIL_FOLLOWUP_TO 26 +#define HDR_CONTENT_DESCRIPTION 27 +#define HDR_CONTENT_DISPOSITION 28 +#define HDR_CONTENT_ID 29 +#define HDR_MIME_VERSION 30 +#define HDR_DISP_NOTIFICATION 31 + +#define URL_A 1 +#define URL_IMG 2 + +/* + * Headers: + * name - header name + * value - decoded, translated to utf8 and normalized version + * type - type of header in case of known headers + */ +typedef struct mime_header_s { + f_str_t *name; + f_str_t *value; + int type; + LIST_ENTRY (mime_header_s) next; +} mime_header_t; + +/* + * Body part: + * data - content of this part, translated to utf, decoded, normalized and deHTMLed + * type - content-type of this part + * encoding - original encoding of body part + */ +typedef struct mime_body_part_s { + f_str_t *data; + f_str_t *type; + f_str_t *encoding; + LIST_ENTRY (mime_body_part_s) next; +} mime_body_part_t; + +/* + * Image and A urls: + * url - normalized and decoded url + * caption - decoded caption for this url (if any) + * type - image or a references + */ +typedef struct mime_url_s { + f_str_t *url; + f_str_t *caption; + int type; +} mime_url_t; + +typedef struct mime_ctx_s { + LIST_HEAD (headersl, mime_header_s) headers; + LIST_HEAD (bodypartsl, mime_body_part_s) parts; + f_str_t *cur_content_type; + f_str_t *cur_encoding; +} mime_ctx_t; + +#endif