You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.h 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef TASK_H_
  17. #define TASK_H_
  18. #include "config.h"
  19. #include "libserver/http/http_connection.h"
  20. #include "async_session.h"
  21. #include "util.h"
  22. #include "mem_pool.h"
  23. #include "dns.h"
  24. #include "re_cache.h"
  25. #include "khash.h"
  26. #ifdef __cplusplus
  27. extern "C" {
  28. #endif
  29. enum rspamd_command {
  30. CMD_SKIP = 0,
  31. CMD_PING,
  32. CMD_CHECK_SPAMC, /* Legacy spamassassin format */
  33. CMD_CHECK_RSPAMC, /* Legacy rspamc format (like SA one) */
  34. CMD_CHECK, /* Legacy check - metric json reply */
  35. CMD_CHECK_V2, /* Modern check - symbols in json reply */
  36. };
  37. enum rspamd_task_stage {
  38. RSPAMD_TASK_STAGE_CONNECT = (1u << 0u),
  39. RSPAMD_TASK_STAGE_CONNFILTERS = (1u << 1u),
  40. RSPAMD_TASK_STAGE_READ_MESSAGE = (1u << 2u),
  41. RSPAMD_TASK_STAGE_PROCESS_MESSAGE = (1u << 3u),
  42. RSPAMD_TASK_STAGE_PRE_FILTERS = (1u << 4u),
  43. RSPAMD_TASK_STAGE_FILTERS = (1u << 5u),
  44. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE = (1u << 6u),
  45. RSPAMD_TASK_STAGE_CLASSIFIERS = (1u << 7u),
  46. RSPAMD_TASK_STAGE_CLASSIFIERS_POST = (1u << 8u),
  47. RSPAMD_TASK_STAGE_COMPOSITES = (1u << 9u),
  48. RSPAMD_TASK_STAGE_POST_FILTERS = (1u << 10u),
  49. RSPAMD_TASK_STAGE_LEARN_PRE = (1u << 11u),
  50. RSPAMD_TASK_STAGE_LEARN = (1u << 12u),
  51. RSPAMD_TASK_STAGE_LEARN_POST = (1u << 13u),
  52. RSPAMD_TASK_STAGE_COMPOSITES_POST = (1u << 14u),
  53. RSPAMD_TASK_STAGE_IDEMPOTENT = (1u << 15u),
  54. RSPAMD_TASK_STAGE_DONE = (1u << 16u),
  55. RSPAMD_TASK_STAGE_REPLIED = (1u << 17u)
  56. };
  57. #define RSPAMD_TASK_PROCESS_ALL (RSPAMD_TASK_STAGE_CONNECT | \
  58. RSPAMD_TASK_STAGE_CONNFILTERS | \
  59. RSPAMD_TASK_STAGE_READ_MESSAGE | \
  60. RSPAMD_TASK_STAGE_PRE_FILTERS | \
  61. RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
  62. RSPAMD_TASK_STAGE_FILTERS | \
  63. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
  64. RSPAMD_TASK_STAGE_CLASSIFIERS | \
  65. RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
  66. RSPAMD_TASK_STAGE_COMPOSITES | \
  67. RSPAMD_TASK_STAGE_POST_FILTERS | \
  68. RSPAMD_TASK_STAGE_LEARN_PRE | \
  69. RSPAMD_TASK_STAGE_LEARN | \
  70. RSPAMD_TASK_STAGE_LEARN_POST | \
  71. RSPAMD_TASK_STAGE_COMPOSITES_POST | \
  72. RSPAMD_TASK_STAGE_IDEMPOTENT | \
  73. RSPAMD_TASK_STAGE_DONE)
  74. #define RSPAMD_TASK_PROCESS_LEARN (RSPAMD_TASK_STAGE_CONNECT | \
  75. RSPAMD_TASK_STAGE_READ_MESSAGE | \
  76. RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
  77. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
  78. RSPAMD_TASK_STAGE_CLASSIFIERS | \
  79. RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
  80. RSPAMD_TASK_STAGE_LEARN_PRE | \
  81. RSPAMD_TASK_STAGE_LEARN | \
  82. RSPAMD_TASK_STAGE_LEARN_POST | \
  83. RSPAMD_TASK_STAGE_DONE)
  84. #define RSPAMD_TASK_FLAG_MIME (1u << 0u)
  85. #define RSPAMD_TASK_FLAG_SKIP_PROCESS (1u << 1u)
  86. #define RSPAMD_TASK_FLAG_SKIP (1u << 2u)
  87. #define RSPAMD_TASK_FLAG_PASS_ALL (1u << 3u)
  88. #define RSPAMD_TASK_FLAG_NO_LOG (1u << 4u)
  89. #define RSPAMD_TASK_FLAG_NO_IP (1u << 5u)
  90. #define RSPAMD_TASK_FLAG_PROCESSING (1u << 6u)
  91. #define RSPAMD_TASK_FLAG_GTUBE (1u << 7u)
  92. #define RSPAMD_TASK_FLAG_FILE (1u << 8u)
  93. #define RSPAMD_TASK_FLAG_NO_STAT (1u << 9u)
  94. #define RSPAMD_TASK_FLAG_UNLEARN (1u << 10u)
  95. #define RSPAMD_TASK_FLAG_ALREADY_LEARNED (1u << 11u)
  96. #define RSPAMD_TASK_FLAG_LEARN_SPAM (1u << 12u)
  97. #define RSPAMD_TASK_FLAG_LEARN_HAM (1u << 13u)
  98. #define RSPAMD_TASK_FLAG_LEARN_AUTO (1u << 14u)
  99. #define RSPAMD_TASK_FLAG_BROKEN_HEADERS (1u << 15u)
  100. #define RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS (1u << 16u)
  101. #define RSPAMD_TASK_FLAG_HAS_HAM_TOKENS (1u << 17u)
  102. #define RSPAMD_TASK_FLAG_EMPTY (1u << 18u)
  103. #define RSPAMD_TASK_FLAG_PROFILE (1u << 19u)
  104. #define RSPAMD_TASK_FLAG_GREYLISTED (1u << 20u)
  105. #define RSPAMD_TASK_FLAG_OWN_POOL (1u << 21u)
  106. #define RSPAMD_TASK_FLAG_SSL (1u << 22u)
  107. #define RSPAMD_TASK_FLAG_BAD_UNICODE (1u << 23u)
  108. #define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 24u)
  109. #define RSPAMD_TASK_FLAG_MAX_SHIFT (24u)
  110. /* Request has a JSON control block */
  111. #define RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL (1u << 0u)
  112. /* Request has been done by a local client */
  113. #define RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT (1u << 1u)
  114. /* Request has been sent via milter */
  115. #define RSPAMD_TASK_PROTOCOL_FLAG_MILTER (1u << 2u)
  116. /* Compress protocol reply */
  117. #define RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED (1u << 3u)
  118. /* Include all URLs */
  119. #define RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS (1u << 4u)
  120. /* Client allows body block (including headers in no FLAG_MILTER) */
  121. #define RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK (1u << 5u)
  122. /* Emit groups information */
  123. #define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u)
  124. #define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (6u)
  125. #define RSPAMD_TASK_IS_SKIPPED(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_SKIP))
  126. #define RSPAMD_TASK_IS_SPAMC(task) (G_UNLIKELY((task)->cmd == CMD_CHECK_SPAMC))
  127. #define RSPAMD_TASK_IS_PROCESSED(task) (G_UNLIKELY((task)->processed_stages & RSPAMD_TASK_STAGE_DONE))
  128. #define RSPAMD_TASK_IS_CLASSIFIED(task) (((task)->processed_stages & RSPAMD_TASK_STAGE_CLASSIFIERS))
  129. #define RSPAMD_TASK_IS_EMPTY(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_EMPTY))
  130. #define RSPAMD_TASK_IS_PROFILING(task) (G_UNLIKELY((task)->flags & RSPAMD_TASK_FLAG_PROFILE))
  131. #define RSPAMD_TASK_IS_MIME(task) (G_LIKELY((task)->flags & RSPAMD_TASK_FLAG_MIME))
  132. struct rspamd_email_address;
  133. struct rspamd_lang_detector;
  134. enum rspamd_newlines_type;
  135. struct rspamd_message;
  136. struct rspamd_task_data_storage {
  137. const gchar *begin;
  138. gsize len;
  139. gchar *fpath;
  140. };
  141. struct rspamd_request_header_chain {
  142. rspamd_ftok_t *hdr;
  143. struct rspamd_request_header_chain *next;
  144. };
  145. __KHASH_TYPE(rspamd_req_headers_hash, rspamd_ftok_t *, struct rspamd_request_header_chain *);
  146. struct rspamd_lua_cached_entry {
  147. gint ref;
  148. guint id;
  149. };
  150. KHASH_INIT(rspamd_task_lua_cache, char *, struct rspamd_lua_cached_entry, 1, kh_str_hash_func, kh_str_hash_equal);
  151. /**
  152. * Worker task structure
  153. */
  154. struct rspamd_task {
  155. struct rspamd_worker *worker; /**< pointer to worker object */
  156. enum rspamd_command cmd; /**< command */
  157. gint sock; /**< socket descriptor */
  158. uint32_t dns_requests; /**< number of DNS requests per this task */
  159. uint32_t flags; /**< Bit flags */
  160. uint32_t protocol_flags;
  161. uint32_t processed_stages; /**< bits of stages that are processed */
  162. gchar *helo; /**< helo header value */
  163. gchar *queue_id; /**< queue id if specified */
  164. rspamd_inet_addr_t *from_addr; /**< from addr for a task */
  165. rspamd_inet_addr_t *client_addr; /**< address of connected socket */
  166. gchar *deliver_to; /**< address to deliver */
  167. gchar *auth_user; /**< SMTP authenticated user */
  168. const gchar *hostname; /**< hostname reported by MTA */
  169. khash_t(rspamd_req_headers_hash) * request_headers; /**< HTTP headers in a request */
  170. struct rspamd_task_data_storage msg; /**< message buffer */
  171. struct rspamd_http_connection *http_conn; /**< HTTP server connection */
  172. struct rspamd_async_session *s; /**< async session object */
  173. struct rspamd_scan_result *result; /**< Metric result */
  174. khash_t(rspamd_task_lua_cache) lua_cache; /**< cache of lua objects */
  175. GPtrArray *tokens; /**< statistics tokens */
  176. GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers
  177. (e.g. Subject) */
  178. GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */
  179. struct rspamd_email_address *from_envelope;
  180. struct rspamd_email_address *from_envelope_orig;
  181. ucl_object_t *messages; /**< list of messages that would be reported */
  182. struct rspamd_re_runtime *re_rt; /**< regexp runtime */
  183. GPtrArray *stat_runtimes; /**< backend runtime */
  184. struct rspamd_config *cfg; /**< pointer to config object */
  185. GError *err;
  186. rspamd_mempool_t *task_pool; /**< memory pool for task */
  187. double time_real_finish;
  188. ev_tstamp task_timestamp;
  189. gboolean (*fin_callback)(struct rspamd_task *task, void *arg);
  190. /**< callback for filters finalizing */
  191. void *fin_arg; /**< argument for fin callback */
  192. struct rspamd_dns_resolver *resolver; /**< DNS resolver */
  193. struct ev_loop *event_loop; /**< Event base */
  194. struct ev_timer timeout_ev; /**< Global task timeout */
  195. struct ev_io guard_ev; /**< Event for input sanity guard */
  196. gpointer symcache_runtime; /**< Opaque checkpoint data */
  197. ucl_object_t *settings; /**< Settings applied to task */
  198. struct rspamd_config_settings_elt *settings_elt; /**< preprocessed settings id elt */
  199. const gchar *classifier; /**< Classifier to learn (if needed) */
  200. struct rspamd_lang_detector *lang_det; /**< Languages detector */
  201. struct rspamd_message *message;
  202. };
  203. /**
  204. * Construct new task for worker
  205. */
  206. struct rspamd_task *rspamd_task_new(struct rspamd_worker *worker,
  207. struct rspamd_config *cfg,
  208. rspamd_mempool_t *pool,
  209. struct rspamd_lang_detector *lang_det,
  210. struct ev_loop *event_loop,
  211. gboolean debug_mem);
  212. /**
  213. * Destroy task object and remove its IO dispatcher if it exists
  214. */
  215. void rspamd_task_free(struct rspamd_task *task);
  216. /**
  217. * Called if all filters are processed
  218. * @return TRUE if session should be terminated
  219. */
  220. gboolean rspamd_task_fin(void *arg);
  221. /**
  222. * Load HTTP message with body in `msg` to an rspamd_task
  223. * @param task
  224. * @param msg
  225. * @param start
  226. * @param len
  227. * @return
  228. */
  229. gboolean rspamd_task_load_message(struct rspamd_task *task,
  230. struct rspamd_http_message *msg,
  231. const gchar *start, gsize len);
  232. /**
  233. * Process task
  234. * @param task task to process
  235. * @return task has been successfully parsed and processed
  236. */
  237. gboolean rspamd_task_process(struct rspamd_task *task, guint stages);
  238. /**
  239. * Return address of sender or NULL
  240. * @param task
  241. * @return
  242. */
  243. struct rspamd_email_address *rspamd_task_get_sender(struct rspamd_task *task);
  244. /**
  245. * Return addresses in the following precedence:
  246. * - deliver to
  247. * - the first smtp recipient
  248. * - the first mime recipient
  249. * @param task
  250. * @return
  251. */
  252. const gchar *rspamd_task_get_principal_recipient(struct rspamd_task *task);
  253. /**
  254. * Add a recipient for a task
  255. * @param task task object
  256. * @param rcpt string representation of recipient address
  257. * @return TRUE if an address has been parsed and added
  258. */
  259. gboolean rspamd_task_add_recipient(struct rspamd_task *task, const gchar *rcpt);
  260. /**
  261. * Learn specified statfile with message in a task
  262. * @param task worker's task object
  263. * @param classifier classifier to learn (or NULL to learn all)
  264. * @param err pointer to GError
  265. * @return true if learn succeed
  266. */
  267. gboolean rspamd_learn_task_spam(struct rspamd_task *task,
  268. gboolean is_spam,
  269. const gchar *classifier,
  270. GError **err);
  271. /**
  272. * Returns required score for a message (usually reject score)
  273. * @param task
  274. * @param m
  275. * @return
  276. */
  277. struct rspamd_scan_result;
  278. gdouble rspamd_task_get_required_score(struct rspamd_task *task,
  279. struct rspamd_scan_result *m);
  280. /**
  281. * Returns the first header as value for a header
  282. * @param task
  283. * @param name
  284. * @return
  285. */
  286. rspamd_ftok_t *rspamd_task_get_request_header(struct rspamd_task *task,
  287. const gchar *name);
  288. /**
  289. * Returns all headers with the specific name
  290. * @param task
  291. * @param name
  292. * @return
  293. */
  294. struct rspamd_request_header_chain *rspamd_task_get_request_header_multiple(
  295. struct rspamd_task *task,
  296. const gchar *name);
  297. /**
  298. * Adds a new request header to task (name and value should be mapped to fstring)
  299. * @param task
  300. * @param name
  301. * @param value
  302. */
  303. void rspamd_task_add_request_header(struct rspamd_task *task,
  304. rspamd_ftok_t *name, rspamd_ftok_t *value);
  305. /**
  306. * Write log line about the specified task if needed
  307. */
  308. void rspamd_task_write_log(struct rspamd_task *task);
  309. /**
  310. * Set profiling value for a specific key
  311. * @param task
  312. * @param key
  313. * @param value
  314. */
  315. void rspamd_task_profile_set(struct rspamd_task *task, const gchar *key,
  316. gdouble value);
  317. /**
  318. * Get value for a specific profiling key
  319. * @param task
  320. * @param key
  321. * @return
  322. */
  323. gdouble *rspamd_task_profile_get(struct rspamd_task *task, const gchar *key);
  324. /**
  325. * Sets finishing time for a task if not yet set
  326. * @param task
  327. * @return
  328. */
  329. gboolean rspamd_task_set_finish_time(struct rspamd_task *task);
  330. /**
  331. * Returns task processing stage name
  332. * @param stg
  333. * @return
  334. */
  335. const gchar *rspamd_task_stage_name(enum rspamd_task_stage stg);
  336. /*
  337. * Called on forced timeout
  338. */
  339. void rspamd_task_timeout(EV_P_ ev_timer *w, int revents);
  340. /*
  341. * Called on unexpected IO error (e.g. ECONNRESET)
  342. */
  343. void rspamd_worker_guard_handler(EV_P_ ev_io *w, int revents);
  344. #ifdef __cplusplus
  345. }
  346. #endif
  347. #endif /* TASK_H_ */