]> source.dussan.org Git - rspamd.git/commitdiff
* Add initial release of mime parser. Now can only decode base64 and quoted-printable.
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 12 May 2008 16:28:12 +0000 (20:28 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 12 May 2008 16:28:12 +0000 (20:28 +0400)
configure
fstring.h
mime.c [new file with mode: 0644]
mime.h [new file with mode: 0644]

index ca1271608aad758953db1103ea70113cd8377c09..67230786667733fb49b03980eadeefc2ba4b248e 100755 (executable)
--- a/configure
+++ b/configure
@@ -17,7 +17,7 @@ LEX_SRC="cfg_file.l"
 YACC_OUTPUT="cfg_yacc.c"
 LEX_OUTPUT="cfg_lex.c"
 
-SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c ${LEX_OUTPUT} ${YACC_OUTPUT}"
+SOURCES="upstream.c cfg_utils.c memcached.c main.c util.c worker.c fstring.c mime.c ${LEX_OUTPUT} ${YACC_OUTPUT}"
 
 CFLAGS="$CFLAGS -W -Wall -Wpointer-arith -Wno-unused-parameter"
 CFLAGS="$CFLAGS -Wno-unused-function -Wunused-variable -Wno-sign-compare"
@@ -25,7 +25,7 @@ CFLAGS="$CFLAGS -Wunused-value -ggdb -I${LOCALBASE}/include"
 CFLAGS="$CFLAGS -DRVERSION=\\\"${VERSION}\\\" -DHASH_COMPAT"
 LDFLAGS="$LDFLAGS -L/usr/lib -L${LOCALBASE}/lib"
 OPT_FLAGS="-O -pipe -fno-omit-frame-pointer"
-DEPS="cfg_file.h memcached.h util.h main.h upstream.h fstring.h ${LEX_OUTPUT} ${YACC_OUTPUT}"
+DEPS="cfg_file.h memcached.h util.h main.h upstream.h fstring.h mime.h ${LEX_OUTPUT} ${YACC_OUTPUT}"
 EXEC=rspamd
 USER=postfix
 GROUP=postfix
index 30895ccc55a902b105253ac83a8474f3971b747f..4c831f0527e90526b317e63a291e694d1a042713 100644 (file)
--- a/fstring.h
+++ b/fstring.h
@@ -74,4 +74,9 @@ f_str_t* fstrgrow (f_str_t *orig, size_t newlen);
  */
 #define fstrfree(x) free((x)->begin); free((x))
 
+/*
+ * Return specified character
+ */
+#define fstridx(str, pos) *((str)->begin + (pos))
+
 #endif
diff --git a/mime.c b/mime.c
new file mode 100644 (file)
index 0000000..14b2324
--- /dev/null
+++ b/mime.c
@@ -0,0 +1,116 @@
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "mime.h"
+#include "fstring.h"
+
+/* 
+ * Quoted printable and base64 decoders for mime parser
+ */
+
+static f_str_t *
+base64decode (f_str_t *src)
+{
+       int bits = 0, buf = 0, padding = 0, v;
+       size_t pos;
+       char c;
+       f_str_t *res;
+
+       res = fstralloc (src->len);
+       if (res == NULL) {
+               return NULL;
+       }
+
+       for (pos = 0; pos < src->len; pos ++) {
+               c = *(src->begin + pos);
+               if (c >= 'A' && c <= 'Z') {
+                       v = c - 'A';
+               }
+               else if (c >= 'a' && c <= 'z') {
+                       v = c - 'a' + 26;
+               }
+               else if (c >= '0' && c <= '9') {
+                       v = c - '0' + 52;
+               }
+               else if ('+' == c) {
+                       v = 62;
+               }
+               else if ('/' == c) {
+                       v = 63;
+               }
+               else if ('=' == c) {
+                       padding++;
+                       continue;
+               } 
+               else {
+                       continue;
+               }
+               if (padding) {
+                       padding = 0;
+               }
+               buf = buf << 6 | v;
+               bits += 6;
+               if (bits >= 8) {
+                       c = 255 & (buf >> (bits - 8));
+                       fstrpush (res, c);
+               }
+       }
+
+       return res;
+}
+
+static f_str_t *
+qpdecode (f_str_t *src, short header)
+{
+       f_str_t *res;
+       size_t pos;
+       char c;
+
+       res = fstralloc (src->len);
+       if (res == NULL) {
+               return NULL;
+       }
+
+       for (pos = 0; pos < src->len; pos++) {
+               c = *(src->begin + pos);
+               if (header && '_' == c) {
+                       c = 0x20;
+               }
+               else if ('=' == c && pos + 3 <= src->len && isxdigit (fstridx (src, pos + 1)) && isxdigit (fstridx (src, pos + 2))) {
+                       if (isdigit (fstridx (src, pos + 2))) {
+                               if (isdigit (fstridx (src, pos + 1))) {
+                                       c = (toupper (fstridx (src, pos + 2)) - '0') | (16 * (fstridx (src, pos + 1) - '0'));
+                               } 
+                               else {
+                                       c = (toupper (fstridx (src, pos + 2)) - '0') | (16 * (toupper (fstridx (src, pos + 1)) - 'A' + 10));
+                               }
+                       }
+                       else if (isdigit (fstridx (src, pos + 1))) {
+                               c = (toupper (fstridx (src, pos + 2)) - 'A' + 10) | (16 * (fstridx (src, pos + 1) - '0'));
+                       } 
+                       else {
+                               c = (toupper (fstridx (src, pos + 2)) - 'A' + 10) | (16 * (toupper (fstridx (src, pos + 1)) - 'A' + 10));
+                       }
+                       pos += 2;
+               } 
+               else if ('=' == c && pos + 2 <= src->len && ('\r' == fstridx (src, pos + 1) || '\n' == fstridx (src, pos + 1))) {
+                       if ('\r' == fstridx (src, pos + 1)) {
+                               if (pos + 3 <= src->len && '\n' == fstridx (src, pos + 2)) {
+                                       pos ++;
+                               }
+                               pos ++;
+                       }
+                       if ('\n' == fstridx (src, pos + 1)) {
+                               if (pos + 3 <= src->len && '\r' == fstridx (src, pos + 2)) {
+                                       pos ++;
+                               }
+                               pos ++;
+                       }
+                       continue;
+               }
+               fstrpush (res, c);
+       }
+
+       return res;
+}
+
diff --git a/mime.h b/mime.h
new file mode 100644 (file)
index 0000000..cf56b78
--- /dev/null
+++ b/mime.h
@@ -0,0 +1,95 @@
+#ifndef MIME_H
+#define MIME_H
+
+#include "fstring.h"
+#ifndef OWN_QUEUE_H
+#include <sys/queue.h>
+#else
+#include "queue.h"
+#endif
+
+/*
+ * Header types. If we reach 31, we must group the headers we need to
+ * remember at the beginning, or we should use fd_set bit sets.
+ */
+#define HDR_APPARENTLY_TO              1
+#define HDR_BCC                                2
+#define HDR_CC                         3
+#define HDR_CONTENT_LENGTH             4
+#define HDR_CONTENT_TRANSFER_ENCODING  5
+#define HDR_CONTENT_TYPE               6
+#define HDR_DATE                       7
+#define HDR_DELIVERED_TO               8
+#define HDR_ERRORS_TO                  9
+#define HDR_FROM                       10
+#define HDR_MESSAGE_ID                 11
+#define HDR_RECEIVED                   12
+#define HDR_REPLY_TO                   13
+#define HDR_RESENT_BCC                 14
+#define HDR_RESENT_CC                  15
+#define HDR_RESENT_DATE                        16
+#define HDR_RESENT_FROM                        17
+#define HDR_RESENT_MESSAGE_ID          18
+#define HDR_RESENT_REPLY_TO            19
+#define HDR_RESENT_SENDER              20
+#define HDR_RESENT_TO                  21
+#define HDR_RETURN_PATH                        22
+#define HDR_RETURN_RECEIPT_TO          23
+#define HDR_SENDER                     24
+#define HDR_TO                         25
+#define HDR_MAIL_FOLLOWUP_TO           26
+#define HDR_CONTENT_DESCRIPTION                27
+#define HDR_CONTENT_DISPOSITION                28
+#define HDR_CONTENT_ID                 29
+#define HDR_MIME_VERSION               30
+#define HDR_DISP_NOTIFICATION          31
+
+#define URL_A                          1
+#define URL_IMG                                2
+
+/*
+ * Headers:
+ * name - header name
+ * value - decoded, translated to utf8 and normalized version
+ * type - type of header in case of known headers
+ */
+typedef struct mime_header_s {
+       f_str_t *name;
+       f_str_t *value;
+       int type;
+       LIST_ENTRY (mime_header_s) next;
+} mime_header_t;
+
+/*
+ * Body part:
+ * data - content of this part, translated to utf, decoded, normalized and deHTMLed
+ * type - content-type of this part
+ * encoding - original encoding of body part
+ */
+typedef struct mime_body_part_s {
+       f_str_t *data;
+       f_str_t *type;
+       f_str_t *encoding;
+       LIST_ENTRY (mime_body_part_s) next;
+} mime_body_part_t;
+
+/*
+ * Image and A urls:
+ * url - normalized and decoded url
+ * caption - decoded caption for this url (if any)
+ * type - image or a references
+ */
+typedef struct mime_url_s {
+       f_str_t *url;
+       f_str_t *caption;
+       int type;
+} mime_url_t;
+
+typedef struct mime_ctx_s {
+       LIST_HEAD (headersl, mime_header_s) headers;
+       LIST_HEAD (bodypartsl, mime_body_part_s) parts;
+       f_str_t *cur_content_type;
+       f_str_t *cur_encoding;
+} mime_ctx_t;
+
+#endif