/** * @file message.h * Message processing functions and structures */ #ifndef RSPAMD_MESSAGE_H #define RSPAMD_MESSAGE_H #include "config.h" #include "email_addr.h" #include "addr.h" #include "cryptobox.h" #include "mime_headers.h" #include "content_type.h" #include #include struct rspamd_task; struct controller_session; struct html_content; struct rspamd_image; struct rspamd_archive; enum rspamd_mime_part_flags { RSPAMD_MIME_PART_TEXT = (1 << 0), RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1), RSPAMD_MIME_PART_IMAGE = (1 << 2), RSPAMD_MIME_PART_ARCHIVE = (1 << 3), RSPAMD_MIME_PART_BAD_CTE = (1 << 4), RSPAMD_MIME_PART_MISSING_CTE = (1 << 5) }; enum rspamd_cte { RSPAMD_CTE_UNKNOWN = 0, RSPAMD_CTE_7BIT = 1, RSPAMD_CTE_8BIT = 2, RSPAMD_CTE_QP = 3, RSPAMD_CTE_B64 = 4, }; struct rspamd_mime_text_part; struct rspamd_mime_multipart { GPtrArray *children; rspamd_ftok_t boundary; }; struct rspamd_mime_part { struct rspamd_content_type *ct; struct rspamd_content_type *detected_ct; struct rspamd_content_disposition *cd; rspamd_ftok_t raw_data; rspamd_ftok_t parsed_data; struct rspamd_mime_part *parent_part; GQueue *headers_order; GHashTable *raw_headers; gchar *raw_headers_str; gsize raw_headers_len; enum rspamd_cte cte; enum rspamd_mime_part_flags flags; guint id; union { struct rspamd_mime_multipart *mp; struct rspamd_mime_text_part *txt; struct rspamd_image *img; struct rspamd_archive *arch; } specific; guchar digest[rspamd_cryptobox_HASHBYTES]; }; #define RSPAMD_MIME_TEXT_PART_FLAG_UTF (1 << 0) #define RSPAMD_MIME_TEXT_PART_FLAG_BALANCED (1 << 1) #define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 2) #define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 3) #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT (1 << 4) #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5) #define RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL (1 << 6) #define RSPAMD_MIME_TEXT_PART_NORMALISED (1 << 7) #define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY) #define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF) #define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)) #define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML) struct rspamd_mime_text_part { const gchar *language; GPtrArray *languages; const gchar *real_charset; /* Raw data in native encoding */ rspamd_ftok_t raw; rspamd_ftok_t parsed; /* decoded from mime encodings */ /* UTF8 content */ GByteArray *utf_content; /* utf8 encoded processed content */ GByteArray *utf_raw_content; /* utf raw content */ GByteArray *utf_stripped_content; /* utf content with no newlines */ GArray *normalized_hashes; GArray *utf_words; UText utf_stripped_text; /* Used by libicu to represent the utf8 content */ GPtrArray *newlines; /**< positions of newlines in text, relative to content*/ struct html_content *html; GList *exceptions; /**< list of offsets of urls */ struct rspamd_mime_part *mime_part; guint flags; guint nlines; guint spaces; guint nwords; guint non_ascii_chars; guint ascii_chars; guint double_spaces; guint non_spaces; guint empty_lines; guint capital_letters; guint numeric_characters; guint unicode_scripts; }; /** * Parse and pre-process mime message * @param task worker_task object * @return */ gboolean rspamd_message_parse (struct rspamd_task *task); /** * Process content in task (e.g. HTML parsing) * @param task */ void rspamd_message_process (struct rspamd_task *task); /** * Get an array of header's values with specified header's name using raw headers * @param task worker task structure * @param field header's name * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not * @return An array of header's values or NULL. It is NOT permitted to free array or values. */ GPtrArray *rspamd_message_get_header_array (struct rspamd_task *task, const gchar *field, gboolean strong); /** * Get an array of mime parts header's values with specified header's name using raw headers * @param task worker task structure * @param field header's name * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not * @return An array of header's values or NULL. It is NOT permitted to free array or values. */ GPtrArray *rspamd_message_get_mime_header_array (struct rspamd_task *task, const gchar *field, gboolean strong); /** * Get an array of header's values with specified header's name using raw headers * @param htb hash table indexed by header name (caseless) with ptr arrays as elements * @param field header's name * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not * @return An array of header's values or NULL. It is NOT permitted to free array or values. */ GPtrArray *rspamd_message_get_header_from_hash (GHashTable *htb, rspamd_mempool_t *pool, const gchar *field, gboolean strong); /** * Converts string to cte * @param str * @return */ enum rspamd_cte rspamd_cte_from_string (const gchar *str); /** * Converts cte to string * @param ct * @return */ const gchar* rspamd_cte_to_string (enum rspamd_cte ct); #endif