You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.h 9.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef TASK_H_
  17. #define TASK_H_
  18. #include "config.h"
  19. #include "http.h"
  20. #include "events.h"
  21. #include "util.h"
  22. #include "mem_pool.h"
  23. #include "dns.h"
  24. #include "re_cache.h"
  25. #include <gmime/gmime.h>
  26. enum rspamd_command {
  27. CMD_CHECK,
  28. CMD_SYMBOLS,
  29. CMD_REPORT,
  30. CMD_REPORT_IFSPAM,
  31. CMD_SKIP,
  32. CMD_PING,
  33. CMD_PROCESS,
  34. CMD_OTHER
  35. };
  36. enum rspamd_metric_action {
  37. METRIC_ACTION_REJECT = 0,
  38. METRIC_ACTION_SOFT_REJECT,
  39. METRIC_ACTION_REWRITE_SUBJECT,
  40. METRIC_ACTION_ADD_HEADER,
  41. METRIC_ACTION_GREYLIST,
  42. METRIC_ACTION_NOACTION,
  43. METRIC_ACTION_MAX
  44. };
  45. enum rspamd_task_stage {
  46. RSPAMD_TASK_STAGE_CONNECT = (1 << 0),
  47. RSPAMD_TASK_STAGE_ENVELOPE = (1 << 1),
  48. RSPAMD_TASK_STAGE_READ_MESSAGE = (1 << 2),
  49. RSPAMD_TASK_STAGE_PRE_FILTERS = (1 << 3),
  50. RSPAMD_TASK_STAGE_FILTERS = (1 << 4),
  51. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE = (1 << 5),
  52. RSPAMD_TASK_STAGE_CLASSIFIERS = (1 << 6),
  53. RSPAMD_TASK_STAGE_CLASSIFIERS_POST = (1 << 7),
  54. RSPAMD_TASK_STAGE_COMPOSITES = (1 << 8),
  55. RSPAMD_TASK_STAGE_POST_FILTERS = (1 << 9),
  56. RSPAMD_TASK_STAGE_LEARN_PRE = (1 << 10),
  57. RSPAMD_TASK_STAGE_LEARN = (1 << 11),
  58. RSPAMD_TASK_STAGE_LEARN_POST = (1 << 12),
  59. RSPAMD_TASK_STAGE_DONE = (1 << 13),
  60. RSPAMD_TASK_STAGE_REPLIED = (1 << 14)
  61. };
  62. #define RSPAMD_TASK_PROCESS_ALL (RSPAMD_TASK_STAGE_CONNECT | \
  63. RSPAMD_TASK_STAGE_ENVELOPE | \
  64. RSPAMD_TASK_STAGE_READ_MESSAGE | \
  65. RSPAMD_TASK_STAGE_PRE_FILTERS | \
  66. RSPAMD_TASK_STAGE_FILTERS | \
  67. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
  68. RSPAMD_TASK_STAGE_CLASSIFIERS | \
  69. RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
  70. RSPAMD_TASK_STAGE_COMPOSITES | \
  71. RSPAMD_TASK_STAGE_POST_FILTERS | \
  72. RSPAMD_TASK_STAGE_LEARN_PRE | \
  73. RSPAMD_TASK_STAGE_LEARN | \
  74. RSPAMD_TASK_STAGE_LEARN_POST | \
  75. RSPAMD_TASK_STAGE_DONE)
  76. #define RSPAMD_TASK_PROCESS_LEARN (RSPAMD_TASK_STAGE_CONNECT | \
  77. RSPAMD_TASK_STAGE_ENVELOPE | \
  78. RSPAMD_TASK_STAGE_READ_MESSAGE | \
  79. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
  80. RSPAMD_TASK_STAGE_CLASSIFIERS | \
  81. RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
  82. RSPAMD_TASK_STAGE_LEARN_PRE | \
  83. RSPAMD_TASK_STAGE_LEARN | \
  84. RSPAMD_TASK_STAGE_LEARN_POST | \
  85. RSPAMD_TASK_STAGE_DONE)
  86. #define RSPAMD_TASK_FLAG_MIME (1 << 0)
  87. #define RSPAMD_TASK_FLAG_JSON (1 << 1)
  88. #define RSPAMD_TASK_FLAG_SKIP_EXTRA (1 << 2)
  89. #define RSPAMD_TASK_FLAG_SKIP (1 << 3)
  90. #define RSPAMD_TASK_FLAG_EXT_URLS (1 << 4)
  91. #define RSPAMD_TASK_FLAG_SPAMC (1 << 5)
  92. #define RSPAMD_TASK_FLAG_PASS_ALL (1 << 6)
  93. #define RSPAMD_TASK_FLAG_NO_LOG (1 << 7)
  94. #define RSPAMD_TASK_FLAG_NO_IP (1 << 8)
  95. #define RSPAMD_TASK_FLAG_HAS_CONTROL (1 << 9)
  96. #define RSPAMD_TASK_FLAG_PROCESSING (1 << 10)
  97. #define RSPAMD_TASK_FLAG_GTUBE (1 << 11)
  98. #define RSPAMD_TASK_FLAG_FILE (1 << 12)
  99. #define RSPAMD_TASK_FLAG_NO_STAT (1 << 13)
  100. #define RSPAMD_TASK_FLAG_UNLEARN (1 << 14)
  101. #define RSPAMD_TASK_FLAG_ALREADY_LEARNED (1 << 15)
  102. #define RSPAMD_TASK_FLAG_LEARN_SPAM (1 << 16)
  103. #define RSPAMD_TASK_FLAG_LEARN_HAM (1 << 17)
  104. #define RSPAMD_TASK_FLAG_LEARN_AUTO (1 << 18)
  105. #define RSPAMD_TASK_FLAG_BROKEN_HEADERS (1 << 19)
  106. #define RSPAMD_TASK_IS_SKIPPED(task) (((task)->flags & RSPAMD_TASK_FLAG_SKIP))
  107. #define RSPAMD_TASK_IS_JSON(task) (((task)->flags & RSPAMD_TASK_FLAG_JSON))
  108. #define RSPAMD_TASK_IS_SPAMC(task) (((task)->flags & RSPAMD_TASK_FLAG_SPAMC))
  109. #define RSPAMD_TASK_IS_PROCESSED(task) (((task)->processed_stages & RSPAMD_TASK_STAGE_DONE))
  110. #define RSPAMD_TASK_IS_CLASSIFIED(task) (((task)->processed_stages & RSPAMD_TASK_STAGE_CLASSIFIERS))
  111. typedef gint (*protocol_reply_func)(struct rspamd_task *task);
  112. struct custom_command {
  113. const gchar *name;
  114. protocol_reply_func func;
  115. };
  116. /**
  117. * Worker task structure
  118. */
  119. struct rspamd_task {
  120. struct rspamd_worker *worker; /**< pointer to worker object */
  121. struct custom_command *custom_cmd; /**< custom command if any */
  122. guint processed_stages; /**< bits of stages that are processed */
  123. enum rspamd_command cmd; /**< command */
  124. gint sock; /**< socket descriptor */
  125. guint flags; /**< Bit flags */
  126. guint32 dns_requests; /**< number of DNS requests per this task */
  127. gulong message_len; /**< Message length */
  128. gchar *helo; /**< helo header value */
  129. gchar *queue_id; /**< queue id if specified */
  130. const gchar *message_id; /**< message id */
  131. rspamd_inet_addr_t *from_addr; /**< from addr for a task */
  132. rspamd_inet_addr_t *client_addr; /**< address of connected socket */
  133. gchar *deliver_to; /**< address to deliver */
  134. gchar *user; /**< user to deliver */
  135. gchar *subject; /**< subject (for non-mime) */
  136. gchar *hostname; /**< hostname reported by MTA */
  137. GHashTable *request_headers; /**< HTTP headers in a request */
  138. GHashTable *reply_headers; /**< Custom reply headers */
  139. rspamd_ftok_t msg; /**< message buffer */
  140. struct rspamd_http_connection *http_conn; /**< HTTP server connection */
  141. struct rspamd_async_session * s; /**< async session object */
  142. GMimeMessage *message; /**< message, parsed with GMime */
  143. GPtrArray *parts; /**< list of parsed parts */
  144. GPtrArray *text_parts; /**< list of text parts */
  145. rspamd_ftok_t raw_headers_content; /**< list of raw headers */
  146. GPtrArray *received; /**< list of received headers */
  147. GHashTable *urls; /**< list of parsed urls */
  148. GHashTable *emails; /**< list of parsed emails */
  149. GList *images; /**< list of images */
  150. GHashTable *raw_headers; /**< list of raw headers */
  151. GHashTable *results; /**< hash table of metric_result indexed by
  152. * metric's name */
  153. GPtrArray *tokens; /**< statistics tokens */
  154. InternetAddressList *rcpt_mime; /**< list of all recipients */
  155. InternetAddressList *rcpt_envelope; /**< list of all recipients */
  156. InternetAddressList *from_mime;
  157. InternetAddressList *from_envelope;
  158. GList *messages; /**< list of messages that would be reported */
  159. struct rspamd_re_runtime *re_rt; /**< regexp runtime */
  160. GPtrArray *stat_runtimes; /**< backend runtime */
  161. struct rspamd_config *cfg; /**< pointer to config object */
  162. GError *err;
  163. rspamd_mempool_t *task_pool; /**< memory pool for task */
  164. double time_real;
  165. double time_virtual;
  166. struct timeval tv;
  167. gboolean (*fin_callback)(struct rspamd_task *task, void *arg);
  168. /**< calback for filters finalizing */
  169. void *fin_arg; /**< argument for fin callback */
  170. struct rspamd_dns_resolver *resolver; /**< DNS resolver */
  171. struct event_base *ev_base; /**< Event base */
  172. struct event timeout_ev; /**< Global task timeout */
  173. gpointer checkpoint; /**< Opaque checkpoint data */
  174. struct {
  175. enum rspamd_metric_action action; /**< Action of pre filters */
  176. gchar *str; /**< String describing action */
  177. } pre_result; /**< Result of pre-filters */
  178. ucl_object_t *settings; /**< Settings applied to task */
  179. const gchar *classifier; /**< Classifier to learn (if needed) */
  180. };
  181. /**
  182. * Construct new task for worker
  183. */
  184. struct rspamd_task * rspamd_task_new (struct rspamd_worker *worker,
  185. struct rspamd_config *cfg);
  186. /**
  187. * Destroy task object and remove its IO dispatcher if it exists
  188. */
  189. void rspamd_task_free (struct rspamd_task *task);
  190. /**
  191. * Called if session was restored inside fin callback
  192. */
  193. void rspamd_task_restore (void *arg);
  194. /**
  195. * Called if all filters are processed
  196. * @return TRUE if session should be terminated
  197. */
  198. gboolean rspamd_task_fin (void *arg);
  199. /**
  200. * Load HTTP message with body in `msg` to an rspamd_task
  201. * @param task
  202. * @param msg
  203. * @param start
  204. * @param len
  205. * @return
  206. */
  207. gboolean rspamd_task_load_message (struct rspamd_task *task,
  208. struct rspamd_http_message *msg, const gchar *start, gsize len);
  209. /**
  210. * Process task
  211. * @param task task to process
  212. * @return task has been successfully parsed and processed
  213. */
  214. gboolean rspamd_task_process (struct rspamd_task *task, guint stages);
  215. /**
  216. * Return address of sender or NULL
  217. * @param task
  218. * @return
  219. */
  220. const gchar *rspamd_task_get_sender (struct rspamd_task *task);
  221. /**
  222. * Return addresses in the following precendence:
  223. * - deliver to
  224. * - the first smtp recipient
  225. * - the first mime recipient
  226. * @param task
  227. * @return
  228. */
  229. const gchar *rspamd_task_get_principal_recipient (struct rspamd_task *task);
  230. /**
  231. * Add a recipient for a task
  232. * @param task task object
  233. * @param rcpt string representation of recipient address
  234. * @return TRUE if an address has been parsed and added
  235. */
  236. gboolean rspamd_task_add_recipient (struct rspamd_task *task, const gchar *rcpt);
  237. /**
  238. * Add a sender for a task
  239. * @param task task object
  240. * @param sender string representation of sender's address
  241. * @return TRUE if an address has been parsed and added
  242. */
  243. gboolean rspamd_task_add_sender (struct rspamd_task *task, const gchar *sender);
  244. /**
  245. * Learn specified statfile with message in a task
  246. * @param task worker's task object
  247. * @param classifier classifier to learn (or NULL to learn all)
  248. * @param err pointer to GError
  249. * @return true if learn succeed
  250. */
  251. gboolean rspamd_learn_task_spam (struct rspamd_task *task,
  252. gboolean is_spam,
  253. const gchar *classifier,
  254. GError **err);
  255. /**
  256. * Write log line about the specified task if needed
  257. */
  258. void rspamd_task_write_log (struct rspamd_task *task);
  259. #endif /* TASK_H_ */