You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

task.h 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef TASK_H_
  17. #define TASK_H_
  18. #include "config.h"
  19. #include "libserver/http/http_connection.h"
  20. #include "async_session.h"
  21. #include "util.h"
  22. #include "mem_pool.h"
  23. #include "dns.h"
  24. #include "re_cache.h"
  25. #include "khash.h"
  26. #ifdef __cplusplus
  27. extern "C" {
  28. #endif
  29. enum rspamd_command {
  30. CMD_SKIP = 0,
  31. CMD_PING,
  32. CMD_CHECK_SPAMC, /* Legacy spamasassin format */
  33. CMD_CHECK_RSPAMC, /* Legacy rspamc format (like SA one) */
  34. CMD_CHECK, /* Legacy check - metric json reply */
  35. CMD_CHECK_V2, /* Modern check - symbols in json reply */
  36. };
  37. enum rspamd_task_stage {
  38. RSPAMD_TASK_STAGE_CONNECT = (1u << 0u),
  39. RSPAMD_TASK_STAGE_CONNFILTERS = (1u << 1u),
  40. RSPAMD_TASK_STAGE_READ_MESSAGE = (1u << 2u),
  41. RSPAMD_TASK_STAGE_PROCESS_MESSAGE = (1u << 3u),
  42. RSPAMD_TASK_STAGE_PRE_FILTERS = (1u << 4u),
  43. RSPAMD_TASK_STAGE_FILTERS = (1u << 5u),
  44. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE = (1u << 6u),
  45. RSPAMD_TASK_STAGE_CLASSIFIERS = (1u << 7u),
  46. RSPAMD_TASK_STAGE_CLASSIFIERS_POST = (1u << 8u),
  47. RSPAMD_TASK_STAGE_COMPOSITES = (1u << 9u),
  48. RSPAMD_TASK_STAGE_POST_FILTERS = (1u << 10u),
  49. RSPAMD_TASK_STAGE_LEARN_PRE = (1u << 11u),
  50. RSPAMD_TASK_STAGE_LEARN = (1u << 12u),
  51. RSPAMD_TASK_STAGE_LEARN_POST = (1u << 13u),
  52. RSPAMD_TASK_STAGE_COMPOSITES_POST = (1u << 14u),
  53. RSPAMD_TASK_STAGE_IDEMPOTENT = (1u << 15u),
  54. RSPAMD_TASK_STAGE_DONE = (1u << 16u),
  55. RSPAMD_TASK_STAGE_REPLIED = (1u << 17u)
  56. };
  57. #define RSPAMD_TASK_PROCESS_ALL (RSPAMD_TASK_STAGE_CONNECT | \
  58. RSPAMD_TASK_STAGE_CONNFILTERS | \
  59. RSPAMD_TASK_STAGE_READ_MESSAGE | \
  60. RSPAMD_TASK_STAGE_PRE_FILTERS | \
  61. RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
  62. RSPAMD_TASK_STAGE_FILTERS | \
  63. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
  64. RSPAMD_TASK_STAGE_CLASSIFIERS | \
  65. RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
  66. RSPAMD_TASK_STAGE_COMPOSITES | \
  67. RSPAMD_TASK_STAGE_POST_FILTERS | \
  68. RSPAMD_TASK_STAGE_LEARN_PRE | \
  69. RSPAMD_TASK_STAGE_LEARN | \
  70. RSPAMD_TASK_STAGE_LEARN_POST | \
  71. RSPAMD_TASK_STAGE_COMPOSITES_POST | \
  72. RSPAMD_TASK_STAGE_IDEMPOTENT | \
  73. RSPAMD_TASK_STAGE_DONE)
  74. #define RSPAMD_TASK_PROCESS_LEARN (RSPAMD_TASK_STAGE_CONNECT | \
  75. RSPAMD_TASK_STAGE_READ_MESSAGE | \
  76. RSPAMD_TASK_STAGE_PROCESS_MESSAGE | \
  77. RSPAMD_TASK_STAGE_CLASSIFIERS_PRE | \
  78. RSPAMD_TASK_STAGE_CLASSIFIERS | \
  79. RSPAMD_TASK_STAGE_CLASSIFIERS_POST | \
  80. RSPAMD_TASK_STAGE_LEARN_PRE | \
  81. RSPAMD_TASK_STAGE_LEARN | \
  82. RSPAMD_TASK_STAGE_LEARN_POST | \
  83. RSPAMD_TASK_STAGE_DONE)
  84. #define RSPAMD_TASK_FLAG_MIME (1u << 0u)
  85. #define RSPAMD_TASK_FLAG_SKIP_PROCESS (1u << 1u)
  86. #define RSPAMD_TASK_FLAG_SKIP (1u << 2u)
  87. #define RSPAMD_TASK_FLAG_PASS_ALL (1u << 3u)
  88. #define RSPAMD_TASK_FLAG_NO_LOG (1u << 4u)
  89. #define RSPAMD_TASK_FLAG_NO_IP (1u << 5u)
  90. #define RSPAMD_TASK_FLAG_PROCESSING (1u << 6u)
  91. #define RSPAMD_TASK_FLAG_GTUBE (1u << 7u)
  92. #define RSPAMD_TASK_FLAG_FILE (1u << 8u)
  93. #define RSPAMD_TASK_FLAG_NO_STAT (1u << 9u)
  94. #define RSPAMD_TASK_FLAG_UNLEARN (1u << 10u)
  95. #define RSPAMD_TASK_FLAG_ALREADY_LEARNED (1u << 11u)
  96. #define RSPAMD_TASK_FLAG_LEARN_SPAM (1u << 12u)
  97. #define RSPAMD_TASK_FLAG_LEARN_HAM (1u << 13u)
  98. #define RSPAMD_TASK_FLAG_LEARN_AUTO (1u << 14u)
  99. #define RSPAMD_TASK_FLAG_BROKEN_HEADERS (1u << 15u)
  100. #define RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS (1u << 16u)
  101. #define RSPAMD_TASK_FLAG_HAS_HAM_TOKENS (1u << 17u)
  102. #define RSPAMD_TASK_FLAG_EMPTY (1u << 18u)
  103. #define RSPAMD_TASK_FLAG_PROFILE (1u << 19u)
  104. #define RSPAMD_TASK_FLAG_GREYLISTED (1u << 20u)
  105. #define RSPAMD_TASK_FLAG_OWN_POOL (1u << 21u)
  106. #define RSPAMD_TASK_FLAG_SSL (1u << 22u)
  107. #define RSPAMD_TASK_FLAG_BAD_UNICODE (1u << 23u)
  108. #define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 24u)
  109. #define RSPAMD_TASK_FLAG_MAX_SHIFT (24u)
  110. /* Request has a JSON control block */
  111. #define RSPAMD_TASK_PROTOCOL_FLAG_HAS_CONTROL (1u << 0u)
  112. /* Request has been done by a local client */
  113. #define RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT (1u << 1u)
  114. /* Request has been sent via milter */
  115. #define RSPAMD_TASK_PROTOCOL_FLAG_MILTER (1u << 2u)
  116. /* Compress protocol reply */
  117. #define RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED (1u << 3u)
  118. /* Include all URLs */
  119. #define RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS (1u << 4u)
  120. /* Client allows body block (including headers in no FLAG_MILTER) */
  121. #define RSPAMD_TASK_PROTOCOL_FLAG_BODY_BLOCK (1u << 5u)
  122. /* Emit groups information */
  123. #define RSPAMD_TASK_PROTOCOL_FLAG_GROUPS (1u << 6u)
  124. #define RSPAMD_TASK_PROTOCOL_FLAG_MAX_SHIFT (6u)
  125. #define RSPAMD_TASK_IS_SKIPPED(task) (((task)->flags & RSPAMD_TASK_FLAG_SKIP))
  126. #define RSPAMD_TASK_IS_SPAMC(task) (((task)->cmd == CMD_CHECK_SPAMC))
  127. #define RSPAMD_TASK_IS_PROCESSED(task) (((task)->processed_stages & RSPAMD_TASK_STAGE_DONE))
  128. #define RSPAMD_TASK_IS_CLASSIFIED(task) (((task)->processed_stages & RSPAMD_TASK_STAGE_CLASSIFIERS))
  129. #define RSPAMD_TASK_IS_EMPTY(task) (((task)->flags & RSPAMD_TASK_FLAG_EMPTY))
  130. #define RSPAMD_TASK_IS_PROFILING(task) (((task)->flags & RSPAMD_TASK_FLAG_PROFILE))
  131. #define RSPAMD_TASK_IS_MIME(task) (((task)->flags & RSPAMD_TASK_FLAG_MIME))
  132. struct rspamd_email_address;
  133. struct rspamd_lang_detector;
  134. enum rspamd_newlines_type;
  135. struct rspamd_message;
  136. struct rspamd_task_data_storage {
  137. const gchar *begin;
  138. gsize len;
  139. gchar *fpath;
  140. };
  141. struct rspamd_request_header_chain {
  142. rspamd_ftok_t *hdr;
  143. struct rspamd_request_header_chain *next;
  144. };
  145. __KHASH_TYPE (rspamd_req_headers_hash, rspamd_ftok_t *, struct rspamd_request_header_chain *)
  146. /**
  147. * Worker task structure
  148. */
  149. struct rspamd_task {
  150. struct rspamd_worker *worker; /**< pointer to worker object */
  151. enum rspamd_command cmd; /**< command */
  152. gint sock; /**< socket descriptor */
  153. guint32 dns_requests; /**< number of DNS requests per this task */
  154. guint32 flags; /**< Bit flags */
  155. guint32 protocol_flags;
  156. guint32 processed_stages; /**< bits of stages that are processed */
  157. gchar *helo; /**< helo header value */
  158. gchar *queue_id; /**< queue id if specified */
  159. rspamd_inet_addr_t *from_addr; /**< from addr for a task */
  160. rspamd_inet_addr_t *client_addr; /**< address of connected socket */
  161. gchar *deliver_to; /**< address to deliver */
  162. gchar *user; /**< user to deliver */
  163. const gchar *hostname; /**< hostname reported by MTA */
  164. khash_t(rspamd_req_headers_hash) *request_headers; /**< HTTP headers in a request */
  165. struct rspamd_task_data_storage msg; /**< message buffer */
  166. struct rspamd_http_connection *http_conn; /**< HTTP server connection */
  167. struct rspamd_async_session *s; /**< async session object */
  168. struct rspamd_scan_result *result; /**< Metric result */
  169. GHashTable *lua_cache; /**< cache of lua objects */
  170. GPtrArray *tokens; /**< statistics tokens */
  171. GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers
  172. (e.g. Subject) */
  173. GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */
  174. struct rspamd_email_address *from_envelope;
  175. struct rspamd_email_address *from_envelope_orig;
  176. ucl_object_t *messages; /**< list of messages that would be reported */
  177. struct rspamd_re_runtime *re_rt; /**< regexp runtime */
  178. GPtrArray *stat_runtimes; /**< backend runtime */
  179. struct rspamd_config *cfg; /**< pointer to config object */
  180. GError *err;
  181. rspamd_mempool_t *task_pool; /**< memory pool for task */
  182. double time_real_finish;
  183. ev_tstamp task_timestamp;
  184. gboolean (*fin_callback) (struct rspamd_task *task, void *arg);
  185. /**< callback for filters finalizing */
  186. void *fin_arg; /**< argument for fin callback */
  187. struct rspamd_dns_resolver *resolver; /**< DNS resolver */
  188. struct ev_loop *event_loop; /**< Event base */
  189. struct ev_timer timeout_ev; /**< Global task timeout */
  190. struct ev_io guard_ev; /**< Event for input sanity guard */
  191. gpointer checkpoint; /**< Opaque checkpoint data */
  192. ucl_object_t *settings; /**< Settings applied to task */
  193. struct rspamd_config_settings_elt *settings_elt; /**< preprocessed settings id elt */
  194. const gchar *classifier; /**< Classifier to learn (if needed) */
  195. struct rspamd_lang_detector *lang_det; /**< Languages detector */
  196. struct rspamd_message *message;
  197. };
  198. /**
  199. * Construct new task for worker
  200. */
  201. struct rspamd_task *rspamd_task_new (struct rspamd_worker *worker,
  202. struct rspamd_config *cfg,
  203. rspamd_mempool_t *pool,
  204. struct rspamd_lang_detector *lang_det,
  205. struct ev_loop *event_loop,
  206. gboolean debug_mem);
  207. /**
  208. * Destroy task object and remove its IO dispatcher if it exists
  209. */
  210. void rspamd_task_free (struct rspamd_task *task);
  211. /**
  212. * Called if session was restored inside fin callback
  213. */
  214. void rspamd_task_restore (void *arg);
  215. /**
  216. * Called if all filters are processed
  217. * @return TRUE if session should be terminated
  218. */
  219. gboolean rspamd_task_fin (void *arg);
  220. /**
  221. * Load HTTP message with body in `msg` to an rspamd_task
  222. * @param task
  223. * @param msg
  224. * @param start
  225. * @param len
  226. * @return
  227. */
  228. gboolean rspamd_task_load_message (struct rspamd_task *task,
  229. struct rspamd_http_message *msg,
  230. const gchar *start, gsize len);
  231. /**
  232. * Process task
  233. * @param task task to process
  234. * @return task has been successfully parsed and processed
  235. */
  236. gboolean rspamd_task_process (struct rspamd_task *task, guint stages);
  237. /**
  238. * Return address of sender or NULL
  239. * @param task
  240. * @return
  241. */
  242. struct rspamd_email_address *rspamd_task_get_sender (struct rspamd_task *task);
  243. /**
  244. * Return addresses in the following precedence:
  245. * - deliver to
  246. * - the first smtp recipient
  247. * - the first mime recipient
  248. * @param task
  249. * @return
  250. */
  251. const gchar *rspamd_task_get_principal_recipient (struct rspamd_task *task);
  252. /**
  253. * Add a recipient for a task
  254. * @param task task object
  255. * @param rcpt string representation of recipient address
  256. * @return TRUE if an address has been parsed and added
  257. */
  258. gboolean rspamd_task_add_recipient (struct rspamd_task *task, const gchar *rcpt);
  259. /**
  260. * Learn specified statfile with message in a task
  261. * @param task worker's task object
  262. * @param classifier classifier to learn (or NULL to learn all)
  263. * @param err pointer to GError
  264. * @return true if learn succeed
  265. */
  266. gboolean rspamd_learn_task_spam (struct rspamd_task *task,
  267. gboolean is_spam,
  268. const gchar *classifier,
  269. GError **err);
  270. /**
  271. * Returns required score for a message (usually reject score)
  272. * @param task
  273. * @param m
  274. * @return
  275. */
  276. struct rspamd_scan_result;
  277. gdouble rspamd_task_get_required_score (struct rspamd_task *task,
  278. struct rspamd_scan_result *m);
  279. /**
  280. * Returns the first header as value for a header
  281. * @param task
  282. * @param name
  283. * @return
  284. */
  285. rspamd_ftok_t *rspamd_task_get_request_header (struct rspamd_task *task,
  286. const gchar *name);
  287. /**
  288. * Returns all headers with the specific name
  289. * @param task
  290. * @param name
  291. * @return
  292. */
  293. struct rspamd_request_header_chain *rspamd_task_get_request_header_multiple (
  294. struct rspamd_task *task,
  295. const gchar *name);
  296. /**
  297. * Adds a new request header to task (name and value should be mapped to fstring)
  298. * @param task
  299. * @param name
  300. * @param value
  301. */
  302. void rspamd_task_add_request_header (struct rspamd_task *task,
  303. rspamd_ftok_t *name, rspamd_ftok_t *value);
  304. /**
  305. * Write log line about the specified task if needed
  306. */
  307. void rspamd_task_write_log (struct rspamd_task *task);
  308. /**
  309. * Set profiling value for a specific key
  310. * @param task
  311. * @param key
  312. * @param value
  313. */
  314. void rspamd_task_profile_set (struct rspamd_task *task, const gchar *key,
  315. gdouble value);
  316. /**
  317. * Get value for a specific profiling key
  318. * @param task
  319. * @param key
  320. * @return
  321. */
  322. gdouble *rspamd_task_profile_get (struct rspamd_task *task, const gchar *key);
  323. /**
  324. * Sets finishing time for a task if not yet set
  325. * @param task
  326. * @return
  327. */
  328. gboolean rspamd_task_set_finish_time (struct rspamd_task *task);
  329. /**
  330. * Returns task processing stage name
  331. * @param stg
  332. * @return
  333. */
  334. const gchar *rspamd_task_stage_name (enum rspamd_task_stage stg);
  335. /*
  336. * Called on forced timeout
  337. */
  338. void rspamd_task_timeout (EV_P_ ev_timer *w, int revents);
  339. /*
  340. * Called on unexpected IO error (e.g. ECONNRESET)
  341. */
  342. void rspamd_worker_guard_handler (EV_P_ ev_io *w, int revents);
  343. #ifdef __cplusplus
  344. }
  345. #endif
  346. #endif /* TASK_H_ */