You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

surbl.c 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. /*
  2. * Copyright (c) 2009, Rambler media
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
  17. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. /***MODULE:surbl
  25. * rspamd module that implements SURBL url checking
  26. *
  27. * Allowed options:
  28. * - weight (integer): weight of symbol
  29. * Redirecotor options:
  30. * - redirector (string): address of http redirector utility in format "host:port"
  31. * - redirector_connect_timeout (seconds): redirector connect timeout (default: 1s)
  32. * - redirector_read_timeout (seconds): timeout for reading data (default: 5s)
  33. * - redirector_hosts_map (map string): map that contains domains to check with redirector
  34. * Surbl options:
  35. * - exceptions (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
  36. * components of domain name instead of normal 2 (e.g. domain.com)
  37. * - whitelist (map string): map of domains that should be whitelisted for surbl checks
  38. * - max_urls (integer): maximum allowed number of urls in message to be checked
  39. * - suffix (string): surbl address (for example insecure-bl.rambler.ru), may contain %b if bits are used (read documentation about it)
  40. * - bit (string): describes a prefix for a single bit
  41. */
  42. #include "../config.h"
  43. #include "../util.h"
  44. #include "../message.h"
  45. #include "../view.h"
  46. #include "../map.h"
  47. #include "../evdns/evdns.h"
  48. #include "surbl.h"
  49. static struct surbl_ctx *surbl_module_ctx = NULL;
  50. static int surbl_filter (struct worker_task *task);
  51. static void surbl_test_url (struct worker_task *task, void *user_data);
  52. static void dns_callback (int result, char type, int count, int ttl, void *addresses, void *data);
  53. static void process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr);
  54. static int urls_command_handler (struct worker_task *task);
  55. #define SURBL_ERROR surbl_error_quark ()
  56. #define WHITELIST_ERROR 0
  57. #define CONVERSION_ERROR 1
  58. GQuark
  59. surbl_error_quark (void)
  60. {
  61. return g_quark_from_static_string ("surbl-error-quark");
  62. }
  63. static void
  64. exception_insert (gpointer st, gconstpointer key, gpointer value)
  65. {
  66. GHashTable **t = st;
  67. int level = 0;
  68. const char *p = key;
  69. f_str_t *val;
  70. while (*p) {
  71. if (*p == '.') {
  72. level ++;
  73. }
  74. p ++;
  75. }
  76. if (level >= MAX_LEVELS) {
  77. msg_err ("invalid domain in exceptions list: %s, levels: %d", (char *)key, level);
  78. return;
  79. }
  80. val = g_malloc (sizeof (f_str_t));
  81. val->begin = (char *)key;
  82. val->len = strlen (key);
  83. if (t[level] == NULL) {
  84. t[level] = g_hash_table_new_full (fstr_strcase_hash, fstr_strcase_equal, g_free, NULL);
  85. }
  86. g_hash_table_insert (t[level], val, value);
  87. }
  88. static u_char *
  89. read_exceptions_list (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data)
  90. {
  91. if (data->cur_data == NULL) {
  92. data->cur_data = memory_pool_alloc (pool, sizeof (GHashTable *) * MAX_LEVELS);
  93. }
  94. return abstract_parse_list (pool, chunk, len, data, (insert_func) exception_insert);
  95. }
  96. static void
  97. fin_exceptions_list (memory_pool_t * pool, struct map_cb_data *data)
  98. {
  99. GHashTable **t;
  100. int i;
  101. if (data->prev_data) {
  102. t = data->prev_data;
  103. for (i = 0; i < MAX_LEVELS; i ++) {
  104. if (t[i] != NULL) {
  105. g_hash_table_destroy (t[i]);
  106. }
  107. }
  108. }
  109. }
  110. int
  111. surbl_module_init (struct config_file *cfg, struct module_ctx **ctx)
  112. {
  113. surbl_module_ctx = g_malloc (sizeof (struct surbl_ctx));
  114. surbl_module_ctx->filter = surbl_filter;
  115. surbl_module_ctx->use_redirector = 0;
  116. surbl_module_ctx->suffixes = NULL;
  117. surbl_module_ctx->bits = NULL;
  118. surbl_module_ctx->surbl_pool = memory_pool_new (memory_pool_get_size ());
  119. surbl_module_ctx->tld2_file = NULL;
  120. surbl_module_ctx->whitelist_file = NULL;
  121. surbl_module_ctx->redirector_hosts = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
  122. surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
  123. /* Zero exceptions hashes */
  124. surbl_module_ctx->exceptions = memory_pool_alloc0 (surbl_module_ctx->surbl_pool, MAX_LEVELS * sizeof (GHashTable *));
  125. /* Register destructors */
  126. memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_hash_table_destroy, surbl_module_ctx->whitelist);
  127. memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_hash_table_destroy, surbl_module_ctx->redirector_hosts);
  128. memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->suffixes);
  129. memory_pool_add_destructor (surbl_module_ctx->surbl_pool, (pool_destruct_func) g_list_free, surbl_module_ctx->bits);
  130. *ctx = (struct module_ctx *)surbl_module_ctx;
  131. register_protocol_command ("urls", urls_command_handler);
  132. return 0;
  133. }
  134. int
  135. surbl_module_config (struct config_file *cfg)
  136. {
  137. struct hostent *hent;
  138. GList *cur_opt;
  139. struct module_opt *cur;
  140. struct suffix_item *new_suffix;
  141. struct surbl_bit_item *new_bit;
  142. char *value, *cur_tok, *str;
  143. uint32_t bit;
  144. if ((value = get_module_opt (cfg, "surbl", "redirector")) != NULL) {
  145. str = memory_pool_strdup (surbl_module_ctx->surbl_pool, value);
  146. cur_tok = strsep (&str, ":");
  147. if (!inet_aton (cur_tok, &surbl_module_ctx->redirector_addr)) {
  148. /* Try to call gethostbyname */
  149. hent = gethostbyname (cur_tok);
  150. if (hent != NULL) {
  151. memcpy ((char *)&surbl_module_ctx->redirector_addr, hent->h_addr, sizeof (struct in_addr));
  152. if (str != NULL) {
  153. surbl_module_ctx->redirector_port = (uint16_t) strtoul (str, NULL, 10);
  154. }
  155. else {
  156. surbl_module_ctx->redirector_port = DEFAULT_REDIRECTOR_PORT;
  157. }
  158. surbl_module_ctx->use_redirector = 1;
  159. }
  160. }
  161. }
  162. if ((value = get_module_opt (cfg, "surbl", "weight")) != NULL) {
  163. surbl_module_ctx->weight = atoi (value);
  164. }
  165. else {
  166. surbl_module_ctx->weight = DEFAULT_SURBL_WEIGHT;
  167. }
  168. if ((value = get_module_opt (cfg, "surbl", "url_expire")) != NULL) {
  169. surbl_module_ctx->url_expire = atoi (value);
  170. }
  171. else {
  172. surbl_module_ctx->url_expire = DEFAULT_SURBL_URL_EXPIRE;
  173. }
  174. if ((value = get_module_opt (cfg, "surbl", "redirector_connect_timeout")) != NULL) {
  175. surbl_module_ctx->connect_timeout = parse_seconds (value);
  176. }
  177. else {
  178. surbl_module_ctx->connect_timeout = DEFAULT_REDIRECTOR_CONNECT_TIMEOUT;
  179. }
  180. if ((value = get_module_opt (cfg, "surbl", "redirector_read_timeout")) != NULL) {
  181. surbl_module_ctx->read_timeout = parse_seconds (value);
  182. }
  183. else {
  184. surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT;
  185. }
  186. if ((value = get_module_opt (cfg, "surbl", "redirector_hosts_map")) != NULL) {
  187. add_map (value, read_host_list, fin_host_list, (void **)&surbl_module_ctx->redirector_hosts);
  188. }
  189. else {
  190. surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT;
  191. }
  192. if ((value = get_module_opt (cfg, "surbl", "max_urls")) != NULL) {
  193. surbl_module_ctx->max_urls = atoi (value);
  194. }
  195. else {
  196. surbl_module_ctx->max_urls = DEFAULT_SURBL_MAX_URLS;
  197. }
  198. if ((value = get_module_opt (cfg, "surbl", "exceptions")) != NULL) {
  199. if (add_map (value, read_exceptions_list, fin_exceptions_list, (void **)&surbl_module_ctx->exceptions)) {
  200. surbl_module_ctx->tld2_file = memory_pool_strdup (surbl_module_ctx->surbl_pool, value + sizeof ("file://") - 1);
  201. }
  202. }
  203. if ((value = get_module_opt (cfg, "surbl", "whitelist")) != NULL) {
  204. if (add_map (value, read_host_list, fin_host_list, (void **)&surbl_module_ctx->whitelist)) {
  205. surbl_module_ctx->whitelist_file = memory_pool_strdup (surbl_module_ctx->surbl_pool, value + sizeof ("file://") - 1);
  206. }
  207. }
  208. cur_opt = g_hash_table_lookup (cfg->modules_opts, "surbl");
  209. while (cur_opt) {
  210. cur = cur_opt->data;
  211. if (!g_strncasecmp (cur->param, "suffix", sizeof ("suffix") - 1)) {
  212. if ((str = strchr (cur->param, '_')) != NULL) {
  213. new_suffix = memory_pool_alloc (surbl_module_ctx->surbl_pool, sizeof (struct suffix_item));
  214. *str = '\0';
  215. new_suffix->symbol = memory_pool_strdup (surbl_module_ctx->surbl_pool, str + 1);
  216. new_suffix->suffix = memory_pool_strdup (surbl_module_ctx->surbl_pool, cur->value);
  217. msg_debug ("add new surbl suffix: %s with symbol: %s", new_suffix->suffix, new_suffix->symbol);
  218. *str = '_';
  219. surbl_module_ctx->suffixes = g_list_prepend (surbl_module_ctx->suffixes, new_suffix);
  220. register_symbol (&cfg->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix);
  221. }
  222. }
  223. if (!g_strncasecmp (cur->param, "bit", sizeof ("bit") - 1)) {
  224. if ((str = strchr (cur->param, '_')) != NULL) {
  225. bit = strtoul (str + 1, NULL, 10);
  226. if (bit != 0) {
  227. new_bit = memory_pool_alloc (surbl_module_ctx->surbl_pool, sizeof (struct surbl_bit_item));
  228. new_bit->bit = bit;
  229. new_bit->symbol = memory_pool_strdup (surbl_module_ctx->surbl_pool, cur->value);
  230. msg_debug ("add new bit suffix: %d with symbol: %s", (int)new_bit->bit, new_bit->symbol);
  231. surbl_module_ctx->bits = g_list_prepend (surbl_module_ctx->bits, new_bit);
  232. }
  233. }
  234. }
  235. cur_opt = g_list_next (cur_opt);
  236. }
  237. /* Add default suffix */
  238. if (surbl_module_ctx->suffixes == NULL) {
  239. new_suffix = memory_pool_alloc (surbl_module_ctx->surbl_pool, sizeof (struct suffix_item));
  240. new_suffix->suffix = memory_pool_strdup (surbl_module_ctx->surbl_pool, DEFAULT_SURBL_SUFFIX);
  241. new_suffix->symbol = memory_pool_strdup (surbl_module_ctx->surbl_pool, DEFAULT_SURBL_SYMBOL);
  242. msg_debug ("add default surbl suffix: %s with symbol: %s", new_suffix->suffix, new_suffix->symbol);
  243. surbl_module_ctx->suffixes = g_list_prepend (surbl_module_ctx->suffixes, new_suffix);
  244. register_symbol (&cfg->cache, new_suffix->symbol, 1, surbl_test_url, new_suffix);
  245. }
  246. return TRUE;
  247. }
  248. int
  249. surbl_module_reconfig (struct config_file *cfg)
  250. {
  251. memory_pool_delete (surbl_module_ctx->surbl_pool);
  252. surbl_module_ctx->surbl_pool = memory_pool_new (1024);
  253. return surbl_module_config (cfg);
  254. }
  255. static char *
  256. format_surbl_request (memory_pool_t * pool, f_str_t * hostname, struct suffix_item *suffix, gboolean append_suffix, GError ** err)
  257. {
  258. GHashTable *t;
  259. char *result = NULL, *dots[MAX_LEVELS], num_buf[sizeof("18446744073709551616")], *p;
  260. int len, slen, r, i, dots_num = 0, level = MAX_LEVELS;
  261. gboolean is_numeric = TRUE;
  262. guint64 ip_num;
  263. f_str_t f;
  264. if (G_LIKELY (suffix != NULL)) {
  265. slen = strlen (suffix->suffix);
  266. }
  267. else if (!append_suffix) {
  268. slen = 0;
  269. }
  270. else {
  271. g_assert_not_reached ();
  272. }
  273. len = hostname->len + slen + 2;
  274. p = hostname->begin;
  275. while (p - hostname->begin < hostname->len && dots_num < MAX_LEVELS) {
  276. if (*p == '.') {
  277. dots[dots_num] = p;
  278. dots_num ++;
  279. }
  280. else if (! g_ascii_isdigit (*p)) {
  281. is_numeric = FALSE;
  282. }
  283. p ++;
  284. }
  285. /* Check for numeric expressions */
  286. if (is_numeric && dots_num == 3) {
  287. /* This is ip address */
  288. result = memory_pool_alloc (pool, len);
  289. r = snprintf (result, len, "%*s.%*s.%*s.%*s",
  290. (int)(hostname->len - (dots[2] - hostname->begin + 1)),
  291. dots[2] + 1,
  292. (int)(dots[2] - dots[1] - 1),
  293. dots[1],
  294. (int)(dots[1] - dots[0] - 1),
  295. dots[0],
  296. (int)(dots[0] - hostname->begin),
  297. hostname->begin);
  298. }
  299. else if (is_numeric && dots_num == 0) {
  300. /* This is number */
  301. g_strlcpy (num_buf, hostname->begin, MIN (hostname->len + 1, sizeof (num_buf)));
  302. errno = 0;
  303. ip_num = strtoull (num_buf, NULL, 10);
  304. if (errno != 0) {
  305. msg_info ("cannot convert ip to number '%s': %s", num_buf, strerror (errno));
  306. g_set_error (err, SURBL_ERROR, /* error domain */
  307. CONVERSION_ERROR, /* error code */
  308. "URL cannot be decoded");
  309. return NULL;
  310. }
  311. len = sizeof ("255.255.255.255") + slen;
  312. result = memory_pool_alloc (pool, len);
  313. /* Hack for bugged windows resolver */
  314. ip_num &= 0xFFFFFFFF;
  315. /* Get octets */
  316. r = snprintf (result, len, "%u.%u.%u.%u",
  317. (uint32_t) ip_num & 0x000000FF, (uint32_t) (ip_num & 0x0000FF00) >> 8, (uint32_t) (ip_num & 0x00FF0000) >> 16, (uint32_t) (ip_num & 0xFF000000) >> 24);
  318. }
  319. else {
  320. /* Not a numeric url */
  321. result = memory_pool_alloc (pool, len);
  322. /* Now we should try to check for exceptions */
  323. for (i = MAX_LEVELS - 1; i >= 0; i --) {
  324. t = surbl_module_ctx->exceptions[i];
  325. if (t != NULL && dots_num >= i + 1) {
  326. f.begin = dots[dots_num - i - 1] + 1;
  327. f.len = hostname->len - (dots[dots_num - i - 1] - hostname->begin + 1);
  328. if (g_hash_table_lookup (t, &f) != NULL) {
  329. level = dots_num - i - 1;
  330. break;
  331. }
  332. }
  333. }
  334. if (level != MAX_LEVELS) {
  335. if (level == 0) {
  336. r = snprintf (result, len, "%*s", (int)hostname->len, hostname->begin);
  337. }
  338. else {
  339. r = snprintf (result, len, "%*s",
  340. (int)(hostname->len - (dots[level - 1] - hostname->begin + 1)),
  341. dots[level - 1] + 1);
  342. }
  343. }
  344. else if (dots_num >= 2) {
  345. r = snprintf (result, len, "%*s",
  346. (int)(hostname->len - (dots[dots_num - 2] - hostname->begin + 1)),
  347. dots[dots_num - 2] + 1);
  348. }
  349. else {
  350. r = snprintf (result, len, "%*s", (int)hostname->len, hostname->begin);
  351. }
  352. }
  353. if (g_hash_table_lookup (surbl_module_ctx->whitelist, result) != NULL) {
  354. msg_debug ("url %s is whitelisted", result);
  355. g_set_error (err, SURBL_ERROR, /* error domain */
  356. WHITELIST_ERROR, /* error code */
  357. "URL is whitelisted: %s", /* error message format string */
  358. result);
  359. return NULL;
  360. }
  361. if (append_suffix) {
  362. r += snprintf (result + r, len - r, ".%s", suffix->suffix);
  363. }
  364. msg_debug ("request: %s, dots: %d, level: %d, orig: %*s", result, dots_num, level, (int)hostname->len, hostname->begin);
  365. return result;
  366. }
  367. static void
  368. make_surbl_requests (struct uri *url, struct worker_task *task, GTree * tree, struct suffix_item *suffix)
  369. {
  370. char *surbl_req;
  371. f_str_t f;
  372. GError *err = NULL;
  373. struct dns_param *param;
  374. f.begin = url->host;
  375. f.len = url->hostlen;
  376. if (check_view (task->cfg->views, suffix->symbol, task)) {
  377. if ((surbl_req = format_surbl_request (task->task_pool, &f, suffix, TRUE, &err)) != NULL) {
  378. if (g_tree_lookup (tree, surbl_req) == NULL) {
  379. g_tree_insert (tree, surbl_req, surbl_req);
  380. param = memory_pool_alloc (task->task_pool, sizeof (struct dns_param));
  381. param->url = url;
  382. param->task = task;
  383. param->suffix = suffix;
  384. param->host_resolve = memory_pool_strdup (task->task_pool, surbl_req);
  385. debug_task ("send surbl dns request %s", surbl_req);
  386. if (evdns_resolve_ipv4 (surbl_req, DNS_QUERY_NO_SEARCH, dns_callback, (void *)param) == 0) {
  387. param->task->save.saved++;
  388. register_async_event (task->s, (event_finalizer_t) dns_callback, NULL, TRUE);
  389. }
  390. }
  391. else {
  392. debug_task ("request %s is already sent", surbl_req);
  393. }
  394. }
  395. else if (err != NULL && err->code != WHITELIST_ERROR) {
  396. msg_info ("cannot format url string for surbl %s, %s", struri (url), err->message);
  397. g_error_free (err);
  398. return;
  399. }
  400. else if (err != NULL) {
  401. g_error_free (err);
  402. }
  403. }
  404. else {
  405. debug_task ("skipping symbol that is not in view: %s", suffix->symbol);
  406. }
  407. }
  408. static void
  409. process_dns_results (struct worker_task *task, struct suffix_item *suffix, char *url, uint32_t addr)
  410. {
  411. char *c, *symbol;
  412. GList *cur;
  413. struct surbl_bit_item *bit;
  414. int len, found = 0;
  415. if ((c = strchr (suffix->symbol, '%')) != NULL && *(c + 1) == 'b') {
  416. cur = g_list_first (surbl_module_ctx->bits);
  417. while (cur) {
  418. bit = (struct surbl_bit_item *)cur->data;
  419. debug_task ("got result(%d) AND bit(%d): %d", (int)addr, (int)ntohl (bit->bit), (int)bit->bit & (int)ntohl (addr));
  420. if (((int)bit->bit & (int)ntohl (addr)) != 0) {
  421. len = strlen (suffix->symbol) - 2 + strlen (bit->symbol) + 1;
  422. *c = '\0';
  423. symbol = memory_pool_alloc (task->task_pool, len);
  424. snprintf (symbol, len, "%s%s%s", suffix->symbol, bit->symbol, c + 2);
  425. *c = '%';
  426. insert_result (task, symbol, 1, g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url)));
  427. found = 1;
  428. }
  429. cur = g_list_next (cur);
  430. }
  431. if (!found) {
  432. insert_result (task, suffix->symbol, 1, g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url)));
  433. }
  434. }
  435. else {
  436. insert_result (task, suffix->symbol, 1, g_list_prepend (NULL, memory_pool_strdup (task->task_pool, url)));
  437. }
  438. }
  439. static void
  440. dns_callback (int result, char type, int count, int ttl, void *addresses, void *data)
  441. {
  442. struct dns_param *param = (struct dns_param *)data;
  443. struct worker_task *task = param->task;
  444. debug_task ("in surbl request callback");
  445. /* If we have result from DNS server, this url exists in SURBL, so increase score */
  446. if (result == DNS_ERR_NONE && type == DNS_IPv4_A) {
  447. msg_info ("<%s> domain [%s] is in surbl %s", param->task->message_id, param->host_resolve, param->suffix->suffix);
  448. process_dns_results (param->task, param->suffix, param->host_resolve, (uint32_t) (((in_addr_t *) addresses)[0]));
  449. }
  450. else {
  451. debug_task ("<%s> domain [%s] is not in surbl %s", param->task->message_id, param->host_resolve, param->suffix->suffix);
  452. }
  453. param->task->save.saved--;
  454. if (param->task->save.saved == 0) {
  455. /* Call other filters */
  456. param->task->save.saved = 1;
  457. process_filters (param->task);
  458. }
  459. remove_forced_event (param->task->s, (event_finalizer_t) dns_callback);
  460. }
  461. static void
  462. memcached_callback (memcached_ctx_t * ctx, memc_error_t error, void *data)
  463. {
  464. struct memcached_param *param = (struct memcached_param *)data;
  465. int *url_count;
  466. switch (ctx->op) {
  467. case CMD_CONNECT:
  468. if (error != OK) {
  469. msg_info ("memcached returned error %s on CONNECT stage", memc_strerror (error));
  470. memc_close_ctx (param->ctx);
  471. param->task->save.saved--;
  472. if (param->task->save.saved == 0) {
  473. /* Call other filters */
  474. param->task->save.saved = 1;
  475. process_filters (param->task);
  476. }
  477. }
  478. else {
  479. memc_get (param->ctx, param->ctx->param);
  480. }
  481. break;
  482. case CMD_READ:
  483. if (error != OK) {
  484. msg_info ("memcached returned error %s on READ stage", memc_strerror (error));
  485. memc_close_ctx (param->ctx);
  486. param->task->save.saved--;
  487. if (param->task->save.saved == 0) {
  488. /* Call other filters */
  489. param->task->save.saved = 1;
  490. process_filters (param->task);
  491. }
  492. }
  493. else {
  494. url_count = (int *)param->ctx->param->buf;
  495. /* Do not check DNS for urls that have count more than max_urls */
  496. if (*url_count > surbl_module_ctx->max_urls) {
  497. msg_info ("url '%s' has count %d, max: %d", struri (param->url), *url_count, surbl_module_ctx->max_urls);
  498. /*
  499. * XXX: try to understand why we should use memcached here
  500. * insert_result (param->task, surbl_module_ctx->metric, surbl_module_ctx->symbol, 1);
  501. */
  502. }
  503. (*url_count)++;
  504. memc_set (param->ctx, param->ctx->param, surbl_module_ctx->url_expire);
  505. }
  506. break;
  507. case CMD_WRITE:
  508. if (error != OK) {
  509. msg_info ("memcached returned error %s on WRITE stage", memc_strerror (error));
  510. }
  511. memc_close_ctx (param->ctx);
  512. param->task->save.saved--;
  513. if (param->task->save.saved == 0) {
  514. /* Call other filters */
  515. param->task->save.saved = 1;
  516. process_filters (param->task);
  517. }
  518. make_surbl_requests (param->url, param->task, param->tree, param->suffix);
  519. break;
  520. default:
  521. return;
  522. }
  523. }
  524. static void
  525. register_memcached_call (struct uri *url, struct worker_task *task, GTree * url_tree, struct suffix_item *suffix)
  526. {
  527. struct memcached_param *param;
  528. struct memcached_server *selected;
  529. memcached_param_t *cur_param;
  530. gchar *sum_str;
  531. int *url_count;
  532. param = memory_pool_alloc (task->task_pool, sizeof (struct memcached_param));
  533. cur_param = memory_pool_alloc0 (task->task_pool, sizeof (memcached_param_t));
  534. url_count = memory_pool_alloc (task->task_pool, sizeof (int));
  535. param->url = url;
  536. param->task = task;
  537. param->tree = url_tree;
  538. param->suffix = suffix;
  539. param->ctx = memory_pool_alloc0 (task->task_pool, sizeof (memcached_ctx_t));
  540. cur_param->buf = (u_char *) url_count;
  541. cur_param->bufsize = sizeof (int);
  542. sum_str = g_compute_checksum_for_string (G_CHECKSUM_MD5, struri (url), -1);
  543. g_strlcpy (cur_param->key, sum_str, sizeof (cur_param->key));
  544. g_free (sum_str);
  545. selected = (struct memcached_server *)get_upstream_by_hash ((void *)task->cfg->memcached_servers,
  546. task->cfg->memcached_servers_num, sizeof (struct memcached_server),
  547. time (NULL), task->cfg->memcached_error_time, task->cfg->memcached_dead_time, task->cfg->memcached_maxerrors, cur_param->key, strlen (cur_param->key));
  548. if (selected == NULL) {
  549. msg_err ("no memcached servers can be selected");
  550. return;
  551. }
  552. param->ctx->callback = memcached_callback;
  553. param->ctx->callback_data = (void *)param;
  554. param->ctx->protocol = task->cfg->memcached_protocol;
  555. memcpy (&param->ctx->addr, &selected->addr, sizeof (struct in_addr));
  556. param->ctx->port = selected->port;
  557. param->ctx->timeout.tv_sec = task->cfg->memcached_connect_timeout / 1000;
  558. param->ctx->timeout.tv_sec = task->cfg->memcached_connect_timeout - param->ctx->timeout.tv_sec * 1000;
  559. param->ctx->sock = -1;
  560. #ifdef WITH_DEBUG
  561. param->ctx->options = MEMC_OPT_DEBUG;
  562. #else
  563. param->ctx->options = 0;
  564. #endif
  565. param->ctx->param = cur_param;
  566. memc_init_ctx (param->ctx);
  567. }
  568. static void
  569. free_redirector_session (void *ud)
  570. {
  571. struct redirector_param *param = (struct redirector_param *)ud;
  572. event_del (&param->ev);
  573. close (param->sock);
  574. param->task->save.saved--;
  575. make_surbl_requests (param->url, param->task, param->tree, param->suffix);
  576. if (param->task->save.saved == 0) {
  577. /* Call other filters */
  578. param->task->save.saved = 1;
  579. process_filters (param->task);
  580. }
  581. }
  582. static void
  583. redirector_callback (int fd, short what, void *arg)
  584. {
  585. struct redirector_param *param = (struct redirector_param *)arg;
  586. struct worker_task *task = param->task;
  587. char url_buf[1024];
  588. int r;
  589. struct timeval *timeout;
  590. char *p, *c;
  591. switch (param->state) {
  592. case STATE_CONNECT:
  593. /* We have write readiness after connect call, so reinit event */
  594. if (what == EV_WRITE) {
  595. timeout = memory_pool_alloc (param->task->task_pool, sizeof (struct timeval));
  596. timeout->tv_sec = surbl_module_ctx->read_timeout / 1000;
  597. timeout->tv_usec = surbl_module_ctx->read_timeout - timeout->tv_sec * 1000;
  598. event_del (&param->ev);
  599. event_set (&param->ev, param->sock, EV_READ | EV_PERSIST, redirector_callback, (void *)param);
  600. event_add (&param->ev, timeout);
  601. r = snprintf (url_buf, sizeof (url_buf), "GET %s HTTP/1.0\r\n\r\n", struri (param->url));
  602. if (write (param->sock, url_buf, r) == -1) {
  603. msg_err ("write failed %s", strerror (errno));
  604. remove_normal_event (param->task->s, free_redirector_session, param);
  605. return;
  606. }
  607. param->state = STATE_READ;
  608. }
  609. else {
  610. msg_info ("<%s> connection to redirector timed out while waiting for write", param->task->message_id);
  611. remove_normal_event (param->task->s, free_redirector_session, param);
  612. return;
  613. }
  614. break;
  615. case STATE_READ:
  616. if (what == EV_READ) {
  617. r = read (param->sock, url_buf, sizeof (url_buf));
  618. if ((p = strstr (url_buf, "Uri: ")) != NULL) {
  619. p += sizeof ("Uri: ") - 1;
  620. c = p;
  621. while (p++ < url_buf + sizeof (url_buf) - 1) {
  622. if (*p == '\r' || *p == '\n') {
  623. *p = '\0';
  624. break;
  625. }
  626. }
  627. if (*p == '\0') {
  628. debug_task ("<%s> got reply from redirector: '%s' -> '%s'", param->task->message_id, struri (param->url), c);
  629. parse_uri (param->url, memory_pool_strdup (param->task->task_pool, c), param->task->task_pool);
  630. }
  631. }
  632. remove_normal_event (param->task->s, free_redirector_session, param);
  633. }
  634. else {
  635. msg_info ("<%s> reading redirector timed out, while waiting for read", param->task->message_id);
  636. remove_normal_event (param->task->s, free_redirector_session, param);
  637. }
  638. break;
  639. }
  640. }
  641. static void
  642. register_redirector_call (struct uri *url, struct worker_task *task, GTree * url_tree, struct suffix_item *suffix)
  643. {
  644. int s;
  645. struct redirector_param *param;
  646. struct timeval *timeout;
  647. s = make_tcp_socket (&surbl_module_ctx->redirector_addr, surbl_module_ctx->redirector_port, FALSE, TRUE);
  648. if (s == -1) {
  649. msg_info ("<%s> cannot create tcp socket failed: %s", task->message_id, strerror (errno));
  650. task->save.saved--;
  651. make_surbl_requests (url, task, url_tree, suffix);
  652. return;
  653. }
  654. param = memory_pool_alloc (task->task_pool, sizeof (struct redirector_param));
  655. param->url = url;
  656. param->task = task;
  657. param->state = STATE_CONNECT;
  658. param->sock = s;
  659. param->tree = url_tree;
  660. param->suffix = suffix;
  661. timeout = memory_pool_alloc (task->task_pool, sizeof (struct timeval));
  662. timeout->tv_sec = surbl_module_ctx->connect_timeout / 1000;
  663. timeout->tv_usec = surbl_module_ctx->connect_timeout - timeout->tv_sec * 1000;
  664. event_set (&param->ev, s, EV_WRITE, redirector_callback, (void *)param);
  665. event_add (&param->ev, timeout);
  666. register_async_event (task->s, free_redirector_session, param, FALSE);
  667. }
  668. static gboolean
  669. tree_url_callback (gpointer key, gpointer value, void *data)
  670. {
  671. struct redirector_param *param = data;
  672. struct worker_task *task = param->task;
  673. struct uri *url = value;
  674. f_str_t f;
  675. char *urlstr;
  676. GError *err = NULL;
  677. debug_task ("check url %s", struri (url));
  678. if (surbl_module_ctx->use_redirector) {
  679. f.begin = url->host;
  680. f.len = url->hostlen;
  681. if ((urlstr = format_surbl_request (param->task->task_pool, &f, NULL, FALSE, &err)) != NULL) {
  682. if (g_hash_table_lookup (surbl_module_ctx->redirector_hosts, urlstr) != NULL) {
  683. register_redirector_call (url, param->task, param->tree, param->suffix);
  684. param->task->save.saved++;
  685. return FALSE;
  686. }
  687. }
  688. make_surbl_requests (url, param->task, param->tree, param->suffix);
  689. }
  690. else {
  691. if (param->task->worker->srv->cfg->memcached_servers_num > 0) {
  692. register_memcached_call (url, param->task, param->tree, param->suffix);
  693. param->task->save.saved++;
  694. }
  695. else {
  696. make_surbl_requests (url, param->task, param->tree, param->suffix);
  697. }
  698. }
  699. return FALSE;
  700. }
  701. static void
  702. surbl_test_url (struct worker_task *task, void *user_data)
  703. {
  704. GTree *url_tree;
  705. GList *cur;
  706. struct mime_text_part *part;
  707. struct redirector_param param;
  708. struct suffix_item *suffix = user_data;
  709. url_tree = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
  710. param.tree = url_tree;
  711. param.task = task;
  712. param.suffix = suffix;
  713. cur = task->text_parts;
  714. while (cur) {
  715. part = cur->data;
  716. if (part->urls) {
  717. g_tree_foreach (part->urls, tree_url_callback, &param);
  718. }
  719. if (part->html_urls) {
  720. g_tree_foreach (part->html_urls, tree_url_callback, &param);
  721. }
  722. cur = g_list_next (cur);
  723. }
  724. memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_tree_destroy, url_tree);
  725. }
  726. static int
  727. surbl_filter (struct worker_task *task)
  728. {
  729. /* XXX: remove this shit */
  730. return 0;
  731. }
  732. static int
  733. urls_command_handler (struct worker_task *task)
  734. {
  735. GList *cur;
  736. char *outbuf, *urlstr;
  737. int r, num = 0, buflen;
  738. struct uri *url;
  739. GError *err = NULL;
  740. GTree *url_tree;
  741. f_str_t f;
  742. url_tree = g_tree_new ((GCompareFunc) g_ascii_strcasecmp);
  743. /* First calculate buffer length */
  744. cur = g_list_first (task->urls);
  745. buflen = 0;
  746. while (cur) {
  747. url = cur->data;
  748. buflen += strlen (struri (url)) + url->hostlen + sizeof (" <\"\">, ") - 1;
  749. cur = g_list_next (cur);
  750. }
  751. buflen += sizeof (RSPAMD_REPLY_BANNER " 0 OK" CRLF CRLF "URLs: ");
  752. outbuf = memory_pool_alloc (task->task_pool, buflen * sizeof (char));
  753. r = snprintf (outbuf, buflen, "%s 0 %s" CRLF, (task->proto == SPAMC_PROTO) ? SPAMD_REPLY_BANNER : RSPAMD_REPLY_BANNER, "OK");
  754. r += snprintf (outbuf + r, buflen - r - 2, "URLs: ");
  755. cur = g_list_first (task->urls);
  756. while (cur) {
  757. num++;
  758. url = cur->data;
  759. if (g_tree_lookup (url_tree, struri (url)) == NULL) {
  760. g_tree_insert (url_tree, struri (url), url);
  761. f.begin = url->host;
  762. f.len = url->hostlen;
  763. if ((urlstr = format_surbl_request (task->task_pool, &f, NULL, FALSE, &err)) != NULL) {
  764. if (g_list_next (cur) != NULL) {
  765. r += snprintf (outbuf + r, buflen - r - 2, "%s <\"%s\">, ", (char *)urlstr, struri (url));
  766. }
  767. else {
  768. r += snprintf (outbuf + r, buflen - r - 2, "%s <\"%s\">", (char *)urlstr, struri (url));
  769. }
  770. }
  771. }
  772. cur = g_list_next (cur);
  773. }
  774. outbuf[r++] = '\r';
  775. outbuf[r++] = '\n';
  776. rspamd_dispatcher_write (task->dispatcher, outbuf, r, FALSE, TRUE);
  777. msg_info ("msg ok, id: <%s>, %d urls extracted", task->message_id, num);
  778. g_tree_destroy (url_tree);
  779. return 0;
  780. }
  781. /*
  782. * vi:ts=4
  783. */