You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

message.h 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /**
  2. * @file message.h
  3. * Message processing functions and structures
  4. */
  5. #ifndef RSPAMD_MESSAGE_H
  6. #define RSPAMD_MESSAGE_H
  7. #include "config.h"
  8. #include "libmime/email_addr.h"
  9. #include "libutil/addr.h"
  10. #include "libcryptobox/cryptobox.h"
  11. #include "libmime/mime_headers.h"
  12. #include "libmime/content_type.h"
  13. #include "libutil/ref.h"
  14. #include "libutil/str_util.h"
  15. #include <unicode/uchar.h>
  16. #include <unicode/utext.h>
  17. #ifdef __cplusplus
  18. extern "C" {
  19. #endif
  20. struct rspamd_task;
  21. struct controller_session;
  22. struct html_content;
  23. struct rspamd_image;
  24. struct rspamd_archive;
  25. enum rspamd_mime_part_flags {
  26. RSPAMD_MIME_PART_TEXT = (1 << 0),
  27. RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1),
  28. RSPAMD_MIME_PART_IMAGE = (1 << 2),
  29. RSPAMD_MIME_PART_ARCHIVE = (1 << 3),
  30. RSPAMD_MIME_PART_BAD_CTE = (1 << 4),
  31. RSPAMD_MIME_PART_MISSING_CTE = (1 << 5)
  32. };
  33. enum rspamd_cte {
  34. RSPAMD_CTE_UNKNOWN = 0,
  35. RSPAMD_CTE_7BIT = 1,
  36. RSPAMD_CTE_8BIT = 2,
  37. RSPAMD_CTE_QP = 3,
  38. RSPAMD_CTE_B64 = 4,
  39. RSPAMD_CTE_UUE = 5,
  40. };
  41. struct rspamd_mime_text_part;
  42. struct rspamd_mime_multipart {
  43. GPtrArray *children;
  44. rspamd_ftok_t boundary;
  45. };
  46. struct rspamd_mime_part {
  47. struct rspamd_content_type *ct;
  48. struct rspamd_content_type *detected_ct;
  49. gchar *detected_type;
  50. gchar *detected_ext;
  51. struct rspamd_content_disposition *cd;
  52. rspamd_ftok_t raw_data;
  53. rspamd_ftok_t parsed_data;
  54. struct rspamd_mime_part *parent_part;
  55. struct rspamd_mime_header *headers_order;
  56. struct rspamd_mime_headers_table *raw_headers;
  57. gchar *raw_headers_str;
  58. gsize raw_headers_len;
  59. enum rspamd_cte cte;
  60. guint flags;
  61. guint id;
  62. union {
  63. struct rspamd_mime_multipart *mp;
  64. struct rspamd_mime_text_part *txt;
  65. struct rspamd_image *img;
  66. struct rspamd_archive *arch;
  67. } specific;
  68. guchar digest[rspamd_cryptobox_HASHBYTES];
  69. };
  70. #define RSPAMD_MIME_TEXT_PART_FLAG_UTF (1 << 0)
  71. #define RSPAMD_MIME_TEXT_PART_FLAG_BALANCED (1 << 1)
  72. #define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 2)
  73. #define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 3)
  74. #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT (1 << 4)
  75. #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5)
  76. #define RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL (1 << 6)
  77. #define RSPAMD_MIME_TEXT_PART_NORMALISED (1 << 7)
  78. #define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
  79. #define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
  80. #define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF))
  81. #define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
  82. struct rspamd_mime_text_part {
  83. const gchar *language;
  84. GPtrArray *languages;
  85. const gchar *real_charset;
  86. /* Raw data in native encoding */
  87. rspamd_ftok_t raw;
  88. rspamd_ftok_t parsed; /* decoded from mime encodings */
  89. /* UTF8 content */
  90. GByteArray *utf_content; /* utf8 encoded processed content */
  91. GByteArray *utf_raw_content; /* utf raw content */
  92. GByteArray *utf_stripped_content; /* utf content with no newlines */
  93. GArray *normalized_hashes;
  94. GArray *utf_words;
  95. UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
  96. GPtrArray *newlines; /**< positions of newlines in text, relative to content*/
  97. struct html_content *html;
  98. GList *exceptions; /**< list of offsets of urls */
  99. struct rspamd_mime_part *mime_part;
  100. guint flags;
  101. guint nlines;
  102. guint spaces;
  103. guint nwords;
  104. guint non_ascii_chars;
  105. guint ascii_chars;
  106. guint double_spaces;
  107. guint non_spaces;
  108. guint empty_lines;
  109. guint capital_letters;
  110. guint numeric_characters;
  111. guint unicode_scripts;
  112. };
  113. struct rspamd_message_raw_headers_content {
  114. const gchar *begin;
  115. gsize len;
  116. const gchar *body_start;
  117. };
  118. struct rspamd_message {
  119. const gchar *message_id;
  120. gchar *subject;
  121. GPtrArray *parts; /**< list of parsed parts */
  122. GPtrArray *text_parts; /**< list of text parts */
  123. struct rspamd_message_raw_headers_content raw_headers_content;
  124. struct rspamd_received_header *received; /**< list of received headers */
  125. GHashTable *urls; /**< list of parsed urls */
  126. GHashTable *emails; /**< list of parsed emails */
  127. struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */
  128. struct rspamd_mime_header *headers_order; /**< order of raw headers */
  129. GPtrArray *rcpt_mime;
  130. GPtrArray *from_mime;
  131. guchar digest[16];
  132. enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */
  133. ref_entry_t ref;
  134. };
  135. #define MESSAGE_FIELD(task, field) ((task)->message->field)
  136. #define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? \
  137. (task)->message->field : \
  138. (__typeof__((task)->message->field))NULL)
  139. /**
  140. * Parse and pre-process mime message
  141. * @param task worker_task object
  142. * @return
  143. */
  144. gboolean rspamd_message_parse (struct rspamd_task *task);
  145. /**
  146. * Process content in task (e.g. HTML parsing)
  147. * @param task
  148. */
  149. void rspamd_message_process (struct rspamd_task *task);
  150. /**
  151. * Converts string to cte
  152. * @param str
  153. * @return
  154. */
  155. enum rspamd_cte rspamd_cte_from_string (const gchar *str);
  156. /**
  157. * Converts cte to string
  158. * @param ct
  159. * @return
  160. */
  161. const gchar *rspamd_cte_to_string (enum rspamd_cte ct);
  162. struct rspamd_message* rspamd_message_new (struct rspamd_task *task);
  163. struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg);
  164. void rspamd_message_unref (struct rspamd_message *msg);
  165. /**
  166. * Updates digest of the message if modified
  167. * @param msg
  168. * @param input
  169. * @param len
  170. */
  171. void rspamd_message_update_digest (struct rspamd_message *msg,
  172. const void *input, gsize len);
  173. #ifdef __cplusplus
  174. }
  175. #endif
  176. #endif