123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215 |
- /**
- * @file message.h
- * Message processing functions and structures
- */
-
- #ifndef RSPAMD_MESSAGE_H
- #define RSPAMD_MESSAGE_H
-
- #include "config.h"
-
- #include "libmime/email_addr.h"
- #include "libutil/addr.h"
- #include "libcryptobox/cryptobox.h"
- #include "libmime/mime_headers.h"
- #include "libmime/content_type.h"
- #include "libutil/ref.h"
- #include "libutil/str_util.h"
-
- #include <unicode/uchar.h>
- #include <unicode/utext.h>
-
- #ifdef __cplusplus
- extern "C" {
- #endif
-
- struct rspamd_task;
- struct controller_session;
- struct html_content;
- struct rspamd_image;
- struct rspamd_archive;
-
- enum rspamd_mime_part_flags {
- RSPAMD_MIME_PART_TEXT = (1 << 0),
- RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1),
- RSPAMD_MIME_PART_IMAGE = (1 << 2),
- RSPAMD_MIME_PART_ARCHIVE = (1 << 3),
- RSPAMD_MIME_PART_BAD_CTE = (1 << 4),
- RSPAMD_MIME_PART_MISSING_CTE = (1 << 5)
- };
-
- enum rspamd_cte {
- RSPAMD_CTE_UNKNOWN = 0,
- RSPAMD_CTE_7BIT = 1,
- RSPAMD_CTE_8BIT = 2,
- RSPAMD_CTE_QP = 3,
- RSPAMD_CTE_B64 = 4,
- RSPAMD_CTE_UUE = 5,
- };
-
- struct rspamd_mime_text_part;
-
- struct rspamd_mime_multipart {
- GPtrArray *children;
- rspamd_ftok_t boundary;
- };
-
- struct rspamd_mime_part {
- struct rspamd_content_type *ct;
- struct rspamd_content_type *detected_ct;
- gchar *detected_type;
- gchar *detected_ext;
- struct rspamd_content_disposition *cd;
- rspamd_ftok_t raw_data;
- rspamd_ftok_t parsed_data;
- struct rspamd_mime_part *parent_part;
-
- struct rspamd_mime_header *headers_order;
- struct rspamd_mime_headers_table *raw_headers;
-
- gchar *raw_headers_str;
- gsize raw_headers_len;
-
- enum rspamd_cte cte;
- guint flags;
- guint id;
-
- union {
- struct rspamd_mime_multipart *mp;
- struct rspamd_mime_text_part *txt;
- struct rspamd_image *img;
- struct rspamd_archive *arch;
- } specific;
-
- guchar digest[rspamd_cryptobox_HASHBYTES];
- };
-
- #define RSPAMD_MIME_TEXT_PART_FLAG_UTF (1 << 0)
- #define RSPAMD_MIME_TEXT_PART_FLAG_BALANCED (1 << 1)
- #define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 2)
- #define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 3)
- #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT (1 << 4)
- #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5)
- #define RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL (1 << 6)
- #define RSPAMD_MIME_TEXT_PART_NORMALISED (1 << 7)
-
- #define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
- #define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
- #define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF))
- #define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
-
-
- struct rspamd_mime_text_part {
- const gchar *language;
- GPtrArray *languages;
- const gchar *real_charset;
-
- /* Raw data in native encoding */
- rspamd_ftok_t raw;
- rspamd_ftok_t parsed; /* decoded from mime encodings */
-
- /* UTF8 content */
- GByteArray *utf_content; /* utf8 encoded processed content */
- GByteArray *utf_raw_content; /* utf raw content */
- GByteArray *utf_stripped_content; /* utf content with no newlines */
- GArray *normalized_hashes;
- GArray *utf_words;
- UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
-
- GPtrArray *newlines; /**< positions of newlines in text, relative to content*/
- struct html_content *html;
- GList *exceptions; /**< list of offsets of urls */
- struct rspamd_mime_part *mime_part;
-
- guint flags;
- guint nlines;
- guint spaces;
- guint nwords;
- guint non_ascii_chars;
- guint ascii_chars;
- guint double_spaces;
- guint non_spaces;
- guint empty_lines;
- guint capital_letters;
- guint numeric_characters;
- guint unicode_scripts;
- };
-
- struct rspamd_message_raw_headers_content {
- const gchar *begin;
- gsize len;
- const gchar *body_start;
- };
-
- struct rspamd_message {
- const gchar *message_id;
- gchar *subject;
-
- GPtrArray *parts; /**< list of parsed parts */
- GPtrArray *text_parts; /**< list of text parts */
- struct rspamd_message_raw_headers_content raw_headers_content;
- struct rspamd_received_header *received; /**< list of received headers */
- GHashTable *urls; /**< list of parsed urls */
- GHashTable *emails; /**< list of parsed emails */
- struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */
- struct rspamd_mime_header *headers_order; /**< order of raw headers */
- GPtrArray *rcpt_mime;
- GPtrArray *from_mime;
- guchar digest[16];
- enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */
- ref_entry_t ref;
- };
-
- #define MESSAGE_FIELD(task, field) ((task)->message->field)
- #define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? \
- (task)->message->field : \
- (__typeof__((task)->message->field))NULL)
-
- /**
- * Parse and pre-process mime message
- * @param task worker_task object
- * @return
- */
- gboolean rspamd_message_parse (struct rspamd_task *task);
-
- /**
- * Process content in task (e.g. HTML parsing)
- * @param task
- */
- void rspamd_message_process (struct rspamd_task *task);
-
-
- /**
- * Converts string to cte
- * @param str
- * @return
- */
- enum rspamd_cte rspamd_cte_from_string (const gchar *str);
-
- /**
- * Converts cte to string
- * @param ct
- * @return
- */
- const gchar *rspamd_cte_to_string (enum rspamd_cte ct);
-
- struct rspamd_message* rspamd_message_new (struct rspamd_task *task);
-
- struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg);
-
- void rspamd_message_unref (struct rspamd_message *msg);
-
- /**
- * Updates digest of the message if modified
- * @param msg
- * @param input
- * @param len
- */
- void rspamd_message_update_digest (struct rspamd_message *msg,
- const void *input, gsize len);
-
- #ifdef __cplusplus
- }
- #endif
-
- #endif
|