You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

message.h 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. /**
  2. * @file message.h
  3. * Message processing functions and structures
  4. */
  5. #ifndef RSPAMD_MESSAGE_H
  6. #define RSPAMD_MESSAGE_H
  7. #include "config.h"
  8. #include "email_addr.h"
  9. #include "addr.h"
  10. #include "cryptobox.h"
  11. #include "mime_headers.h"
  12. #include "content_type.h"
  13. #include <unicode/uchar.h>
  14. #include <unicode/utext.h>
  15. struct rspamd_task;
  16. struct controller_session;
  17. struct html_content;
  18. struct rspamd_image;
  19. struct rspamd_archive;
  20. enum rspamd_mime_part_flags {
  21. RSPAMD_MIME_PART_TEXT = (1 << 0),
  22. RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1),
  23. RSPAMD_MIME_PART_IMAGE = (1 << 2),
  24. RSPAMD_MIME_PART_ARCHIVE = (1 << 3),
  25. RSPAMD_MIME_PART_BAD_CTE = (1 << 4),
  26. RSPAMD_MIME_PART_MISSING_CTE = (1 << 5)
  27. };
  28. enum rspamd_cte {
  29. RSPAMD_CTE_UNKNOWN = 0,
  30. RSPAMD_CTE_7BIT = 1,
  31. RSPAMD_CTE_8BIT = 2,
  32. RSPAMD_CTE_QP = 3,
  33. RSPAMD_CTE_B64 = 4,
  34. };
  35. struct rspamd_mime_text_part;
  36. struct rspamd_mime_multipart {
  37. GPtrArray *children;
  38. rspamd_ftok_t boundary;
  39. };
  40. struct rspamd_mime_part {
  41. struct rspamd_content_type *ct;
  42. struct rspamd_content_type *detected_ct;
  43. struct rspamd_content_disposition *cd;
  44. rspamd_ftok_t raw_data;
  45. rspamd_ftok_t parsed_data;
  46. struct rspamd_mime_part *parent_part;
  47. GQueue *headers_order;
  48. GHashTable *raw_headers;
  49. gchar *raw_headers_str;
  50. gsize raw_headers_len;
  51. enum rspamd_cte cte;
  52. enum rspamd_mime_part_flags flags;
  53. guint id;
  54. union {
  55. struct rspamd_mime_multipart *mp;
  56. struct rspamd_mime_text_part *txt;
  57. struct rspamd_image *img;
  58. struct rspamd_archive *arch;
  59. } specific;
  60. guchar digest[rspamd_cryptobox_HASHBYTES];
  61. };
  62. #define RSPAMD_MIME_TEXT_PART_FLAG_UTF (1 << 0)
  63. #define RSPAMD_MIME_TEXT_PART_FLAG_BALANCED (1 << 1)
  64. #define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 2)
  65. #define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 3)
  66. #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT (1 << 4)
  67. #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5)
  68. #define RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL (1 << 6)
  69. #define RSPAMD_MIME_TEXT_PART_NORMALISED (1 << 7)
  70. #define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
  71. #define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
  72. #define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF))
  73. #define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
  74. struct rspamd_mime_text_part {
  75. const gchar *language;
  76. GPtrArray *languages;
  77. const gchar *real_charset;
  78. /* Raw data in native encoding */
  79. rspamd_ftok_t raw;
  80. rspamd_ftok_t parsed; /* decoded from mime encodings */
  81. /* UTF8 content */
  82. GByteArray *utf_content; /* utf8 encoded processed content */
  83. GByteArray *utf_raw_content; /* utf raw content */
  84. GByteArray *utf_stripped_content; /* utf content with no newlines */
  85. GArray *normalized_hashes;
  86. GArray *utf_words;
  87. UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
  88. GPtrArray *newlines; /**< positions of newlines in text, relative to content*/
  89. struct html_content *html;
  90. GList *exceptions; /**< list of offsets of urls */
  91. struct rspamd_mime_part *mime_part;
  92. guint flags;
  93. guint nlines;
  94. guint spaces;
  95. guint nwords;
  96. guint non_ascii_chars;
  97. guint ascii_chars;
  98. guint double_spaces;
  99. guint non_spaces;
  100. guint empty_lines;
  101. guint capital_letters;
  102. guint numeric_characters;
  103. guint unicode_scripts;
  104. };
  105. enum rspamd_received_type {
  106. RSPAMD_RECEIVED_SMTP = 0,
  107. RSPAMD_RECEIVED_ESMTP,
  108. RSPAMD_RECEIVED_ESMTPA,
  109. RSPAMD_RECEIVED_ESMTPS,
  110. RSPAMD_RECEIVED_ESMTPSA,
  111. RSPAMD_RECEIVED_LMTP,
  112. RSPAMD_RECEIVED_IMAP,
  113. RSPAMD_RECEIVED_UNKNOWN
  114. };
  115. #define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0)
  116. #define RSPAMD_RECEIVED_FLAG_SSL (1 << 1)
  117. #define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
  118. struct received_header {
  119. gchar *from_hostname;
  120. gchar *from_ip;
  121. gchar *real_hostname;
  122. gchar *real_ip;
  123. gchar *by_hostname;
  124. gchar *for_mbox;
  125. gchar *comment_ip;
  126. rspamd_inet_addr_t *addr;
  127. struct rspamd_mime_header *hdr;
  128. time_t timestamp;
  129. enum rspamd_received_type type;
  130. gint flags;
  131. };
  132. /**
  133. * Parse and pre-process mime message
  134. * @param task worker_task object
  135. * @return
  136. */
  137. gboolean rspamd_message_parse (struct rspamd_task *task);
  138. /**
  139. * Process content in task (e.g. HTML parsing)
  140. * @param task
  141. */
  142. void rspamd_message_process (struct rspamd_task *task);
  143. /**
  144. * Get an array of header's values with specified header's name using raw headers
  145. * @param task worker task structure
  146. * @param field header's name
  147. * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
  148. * @return An array of header's values or NULL. It is NOT permitted to free array or values.
  149. */
  150. GPtrArray *rspamd_message_get_header_array (struct rspamd_task *task,
  151. const gchar *field,
  152. gboolean strong);
  153. /**
  154. * Get an array of mime parts header's values with specified header's name using raw headers
  155. * @param task worker task structure
  156. * @param field header's name
  157. * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
  158. * @return An array of header's values or NULL. It is NOT permitted to free array or values.
  159. */
  160. GPtrArray *rspamd_message_get_mime_header_array (struct rspamd_task *task,
  161. const gchar *field,
  162. gboolean strong);
  163. /**
  164. * Get an array of header's values with specified header's name using raw headers
  165. * @param htb hash table indexed by header name (caseless) with ptr arrays as elements
  166. * @param field header's name
  167. * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
  168. * @return An array of header's values or NULL. It is NOT permitted to free array or values.
  169. */
  170. GPtrArray *rspamd_message_get_header_from_hash (GHashTable *htb,
  171. rspamd_mempool_t *pool,
  172. const gchar *field,
  173. gboolean strong);
  174. /**
  175. * Converts string to cte
  176. * @param str
  177. * @return
  178. */
  179. enum rspamd_cte rspamd_cte_from_string (const gchar *str);
  180. /**
  181. * Converts cte to string
  182. * @param ct
  183. * @return
  184. */
  185. const gchar* rspamd_cte_to_string (enum rspamd_cte ct);
  186. #endif