You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

surbl.c 56KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:surbl
  17. * rspamd module that implements SURBL url checking
  18. *
  19. * Allowed options:
  20. * - weight (integer): weight of symbol
  21. * Redirecotor options:
  22. * - redirector (string): address of http redirector utility in format "host:port"
  23. * - redirector_connect_timeout (seconds): redirector connect timeout (default: 1s)
  24. * - redirector_read_timeout (seconds): timeout for reading data (default: 5s)
  25. * - redirector_hosts_map (map string): map that contains domains to check with redirector
  26. * Surbl options:
  27. * - exceptions (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
  28. * components of domain name instead of normal 2 (e.g. domain.com)
  29. * - whitelist (map string): map of domains that should be whitelisted for surbl checks
  30. * - max_urls (integer): maximum allowed number of urls in message to be checked
  31. * - suffix (string): surbl address (for example insecure-bl.rambler.ru), may contain %b if bits are used (read documentation about it)
  32. * - bit (string): describes a prefix for a single bit
  33. */
  34. #include "config.h"
  35. #include "libmime/message.h"
  36. #include "libutil/map.h"
  37. #include "libutil/map_helpers.h"
  38. #include "rspamd.h"
  39. #include "utlist.h"
  40. #include "multipattern.h"
  41. #include "monitored.h"
  42. #include "libserver/html.h"
  43. #include "libutil/http_private.h"
  44. #include "unix-std.h"
  45. #include "lua/lua_common.h"
  46. #define msg_err_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  47. "surbl", task->task_pool->tag.uid, \
  48. G_STRFUNC, \
  49. __VA_ARGS__)
  50. #define msg_warn_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  51. "surbl", task->task_pool->tag.uid, \
  52. G_STRFUNC, \
  53. __VA_ARGS__)
  54. #define msg_info_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  55. "surbl", task->task_pool->tag.uid, \
  56. G_STRFUNC, \
  57. __VA_ARGS__)
  58. #define msg_debug_surbl(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
  59. rspamd_surbl_log_id, "surbl", task->task_pool->tag.uid, \
  60. G_STRFUNC, \
  61. __VA_ARGS__)
  62. INIT_LOG_MODULE(surbl)
  63. static const gchar *M = "surbl";
  64. #define DEFAULT_SURBL_WEIGHT 10
  65. #define DEFAULT_REDIRECTOR_READ_TIMEOUT 5.0
  66. #define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
  67. #define SURBL_OPTION_NOIP (1u << 0u)
  68. #define SURBL_OPTION_RESOLVEIP (1u << 1u)
  69. #define SURBL_OPTION_CHECKIMAGES (1u << 2u)
  70. #define SURBL_OPTION_CHECKDKIM (1u << 3u)
  71. #define SURBL_OPTION_FULLDOMAIN (1u << 4u)
  72. #define SURBL_OPTION_CHECKEMAILS (1u << 5u)
  73. #define MAX_LEVELS 10
  74. struct surbl_ctx {
  75. struct module_ctx ctx;
  76. guint16 weight;
  77. gdouble read_timeout;
  78. gboolean use_tags;
  79. GList *suffixes;
  80. const gchar *redirector_symbol;
  81. GHashTable **exceptions;
  82. struct rspamd_hash_map_helper *whitelist;
  83. GHashTable *redirector_tlds;
  84. guint use_redirector;
  85. guint max_redirected_urls;
  86. gint redirector_cbid;
  87. struct upstream_list *redirectors;
  88. };
  89. struct suffix_item {
  90. guint64 magic;
  91. const gchar *monitored_domain;
  92. const gchar *suffix;
  93. const gchar *symbol;
  94. GArray *bits;
  95. GHashTable *ips;
  96. struct rspamd_monitored *m;
  97. guint32 options;
  98. gboolean reported_offline;
  99. gint callback_id;
  100. gint url_process_cbref;
  101. };
  102. struct dns_param {
  103. struct rspamd_url *url;
  104. struct rspamd_task *task;
  105. gchar *host_resolve;
  106. gchar *host_orig; /* Name with no uribl suffix */
  107. struct suffix_item *suffix;
  108. struct rspamd_symcache_item *item;
  109. struct surbl_module_ctx *ctx;
  110. };
  111. struct redirector_param {
  112. struct rspamd_url *url;
  113. struct rspamd_task *task;
  114. struct upstream *redirector;
  115. struct surbl_ctx *ctx;
  116. struct rspamd_http_connection *conn;
  117. GHashTable *tree;
  118. struct suffix_item *suffix;
  119. struct rspamd_symcache_item *item;
  120. guint redirector_requests;
  121. };
  122. struct surbl_bit_item {
  123. guint32 bit;
  124. gchar *symbol;
  125. };
  126. #define SURBL_REDIRECTOR_CALLBACK "SURBL_REDIRECTOR_CALLBACK"
  127. static const guint64 rspamd_surbl_cb_magic = 0xe09b8536f80de0d1ULL;
  128. static const gchar *rspamd_surbl_default_monitored = "facebook.com";
  129. static const guint default_max_redirected_urls = 10;
  130. static void surbl_test_url (struct rspamd_task *task,
  131. struct rspamd_symcache_item *item,
  132. void *user_data);
  133. static void surbl_test_redirector (struct rspamd_task *task,
  134. struct rspamd_symcache_item *item,
  135. void *user_data);
  136. static void surbl_dns_callback (struct rdns_reply *reply, gpointer arg);
  137. static void surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg);
  138. static void process_dns_results (struct rspamd_task *task,
  139. struct suffix_item *suffix, gchar *resolved_name,
  140. guint32 addr, struct rspamd_url *url);
  141. static gint surbl_register_redirect_handler (lua_State *L);
  142. static gint surbl_continue_process_handler (lua_State *L);
  143. static gint surbl_is_redirector_handler (lua_State *L);
  144. #define NO_REGEXP (gpointer) - 1
  145. #define SURBL_ERROR surbl_error_quark ()
  146. #define WHITELIST_ERROR 0
  147. #define CONVERSION_ERROR 1
  148. #define DUPLICATE_ERROR 1
  149. GQuark
  150. surbl_error_quark (void)
  151. {
  152. return g_quark_from_static_string ("surbl-error-quark");
  153. }
  154. /* Initialization */
  155. gint surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
  156. gint surbl_module_config (struct rspamd_config *cfg);
  157. gint surbl_module_reconfig (struct rspamd_config *cfg);
  158. module_t surbl_module = {
  159. "surbl",
  160. surbl_module_init,
  161. surbl_module_config,
  162. surbl_module_reconfig,
  163. NULL,
  164. RSPAMD_MODULE_VER,
  165. (guint)-1,
  166. };
  167. static inline struct surbl_ctx *
  168. surbl_get_context (struct rspamd_config *cfg)
  169. {
  170. return (struct surbl_ctx *)g_ptr_array_index (cfg->c_modules,
  171. surbl_module.ctx_offset);
  172. }
  173. static void
  174. exceptions_free_value (gpointer v)
  175. {
  176. rspamd_ftok_t *val = v;
  177. g_free ((gpointer)val->begin);
  178. g_free (val);
  179. }
  180. static void
  181. exception_insert (gpointer st, gconstpointer key, gconstpointer value)
  182. {
  183. GHashTable **t = st;
  184. gint level = 0;
  185. const gchar *p = key;
  186. rspamd_ftok_t *val;
  187. while (*p) {
  188. if (*p == '.') {
  189. level++;
  190. }
  191. p++;
  192. }
  193. if (level >= MAX_LEVELS) {
  194. msg_err ("invalid domain in exceptions list: %s, levels: %d",
  195. (gchar *)key,
  196. level);
  197. return;
  198. }
  199. val = g_malloc (sizeof (rspamd_ftok_t));
  200. val->begin = g_strdup (key);
  201. val->len = strlen (key);
  202. if (t[level] == NULL) {
  203. t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash,
  204. rspamd_ftok_icase_equal,
  205. exceptions_free_value,
  206. g_free);
  207. }
  208. g_hash_table_replace (t[level], val, g_strdup (value));
  209. }
  210. static gchar *
  211. read_exceptions_list (gchar * chunk,
  212. gint len,
  213. struct map_cb_data *data,
  214. gboolean final)
  215. {
  216. GHashTable **t;
  217. guint i;
  218. if (data->cur_data == NULL) {
  219. t = data->prev_data;
  220. if (t) {
  221. for (i = 0; i < MAX_LEVELS; i++) {
  222. if (t[i] != NULL) {
  223. g_hash_table_destroy (t[i]);
  224. }
  225. t[i] = NULL;
  226. }
  227. g_free (t);
  228. }
  229. data->prev_data = NULL;
  230. data->cur_data = g_malloc0 (MAX_LEVELS * sizeof (GHashTable *));
  231. }
  232. return rspamd_parse_kv_list (
  233. chunk,
  234. len,
  235. data,
  236. exception_insert,
  237. "",
  238. final);
  239. }
  240. static void
  241. fin_exceptions_list (struct map_cb_data *data, void **target)
  242. {
  243. GHashTable **t;
  244. gint i;
  245. if (target) {
  246. *target = data->cur_data;
  247. }
  248. if (data->prev_data) {
  249. t = data->prev_data;
  250. for (i = 0; i < MAX_LEVELS; i++) {
  251. if (t[i] != NULL) {
  252. rspamd_default_log_function (G_LOG_LEVEL_DEBUG,
  253. "surbl", "",
  254. G_STRFUNC,
  255. "exceptions level %d: %d elements",
  256. i, g_hash_table_size (t[i]));
  257. }
  258. }
  259. }
  260. }
  261. static void
  262. dtor_exceptions_list (struct map_cb_data *data)
  263. {
  264. GHashTable **t;
  265. gint i;
  266. if (data->cur_data) {
  267. t = data->cur_data;
  268. for (i = 0; i < MAX_LEVELS; i++) {
  269. if (t[i] != NULL) {
  270. g_hash_table_destroy (t[i]);
  271. }
  272. t[i] = NULL;
  273. }
  274. g_free (t);
  275. }
  276. }
  277. static void
  278. redirector_insert (gpointer st, gconstpointer key, gconstpointer value)
  279. {
  280. GHashTable *tld_hash = st;
  281. const gchar *p = key, *begin = key;
  282. rspamd_fstring_t *pat;
  283. rspamd_ftok_t *tok;
  284. rspamd_regexp_t *re = NO_REGEXP;
  285. GError *err = NULL;
  286. while (*p && !g_ascii_isspace (*p)) {
  287. p++;
  288. }
  289. pat = rspamd_fstring_new_init (begin, p - begin);
  290. tok = g_malloc0 (sizeof (*tok));
  291. tok->begin = pat->str;
  292. tok->len = pat->len;
  293. if (g_ascii_isspace (*p)) {
  294. while (g_ascii_isspace (*p) && *p) {
  295. p++;
  296. }
  297. if (*p) {
  298. re = rspamd_regexp_new (p,
  299. "ir",
  300. &err);
  301. if (re == NULL) {
  302. msg_warn ("could not read regexp: %e while reading regexp %s",
  303. err,
  304. p);
  305. g_error_free (err);
  306. re = NO_REGEXP;
  307. }
  308. }
  309. }
  310. g_hash_table_replace (tld_hash, tok, re);
  311. }
  312. static void
  313. redirector_item_free (gpointer p)
  314. {
  315. rspamd_regexp_t *re;
  316. if (p != NULL && p != NO_REGEXP) {
  317. re = (rspamd_regexp_t *)p;
  318. rspamd_regexp_unref (re);
  319. }
  320. }
  321. static gchar *
  322. read_redirectors_list (gchar * chunk,
  323. gint len,
  324. struct map_cb_data *data,
  325. gboolean final)
  326. {
  327. GHashTable *tld_hash;
  328. if (data->cur_data == NULL) {
  329. tld_hash = g_hash_table_new_full (rspamd_ftok_icase_hash,
  330. rspamd_ftok_icase_equal,
  331. rspamd_fstring_mapped_ftok_free,
  332. redirector_item_free);
  333. data->cur_data = tld_hash;
  334. }
  335. return rspamd_parse_kv_list (
  336. chunk,
  337. len,
  338. data,
  339. redirector_insert,
  340. "",
  341. final);
  342. }
  343. static void
  344. fin_redirectors_list (struct map_cb_data *data, void **target)
  345. {
  346. GHashTable *tld_hash;
  347. if (target) {
  348. *target = data->cur_data;
  349. }
  350. if (data->prev_data) {
  351. tld_hash = data->prev_data;
  352. g_hash_table_unref (tld_hash);
  353. }
  354. }
  355. static void
  356. dtor_redirectors_list (struct map_cb_data *data)
  357. {
  358. GHashTable *tld_hash;
  359. if (data->cur_data) {
  360. tld_hash = data->cur_data;
  361. g_hash_table_unref (tld_hash);
  362. }
  363. }
  364. gint
  365. surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  366. {
  367. struct surbl_ctx *surbl_module_ctx;
  368. surbl_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  369. sizeof (struct surbl_ctx));
  370. surbl_module_ctx->use_redirector = 0;
  371. surbl_module_ctx->suffixes = NULL;
  372. surbl_module_ctx->redirectors = NULL;
  373. surbl_module_ctx->whitelist = NULL;
  374. surbl_module_ctx->exceptions = NULL;
  375. surbl_module_ctx->redirector_cbid = -1;
  376. *ctx = (struct module_ctx *)surbl_module_ctx;
  377. rspamd_rcl_add_doc_by_path (cfg,
  378. NULL,
  379. "URL blacklist plugin",
  380. "surbl",
  381. UCL_OBJECT,
  382. NULL,
  383. 0,
  384. NULL,
  385. 0);
  386. rspamd_rcl_add_doc_by_path (cfg,
  387. "surbl",
  388. "List of redirector servers",
  389. "redirector",
  390. UCL_STRING,
  391. NULL,
  392. 0,
  393. NULL,
  394. 0);
  395. rspamd_rcl_add_doc_by_path (cfg,
  396. "surbl",
  397. "Map of domains that should be checked with redirector",
  398. "redirector_hosts_map",
  399. UCL_STRING,
  400. NULL,
  401. 0,
  402. NULL,
  403. 0);
  404. rspamd_rcl_add_doc_by_path (cfg,
  405. "surbl",
  406. "Connect timeout for redirector",
  407. "redirector_connect_timeout",
  408. UCL_TIME,
  409. NULL,
  410. 0,
  411. NULL,
  412. 0);
  413. rspamd_rcl_add_doc_by_path (cfg,
  414. "surbl",
  415. "Read timeout for redirector",
  416. "redirector_read_timeout",
  417. UCL_TIME,
  418. NULL,
  419. 0,
  420. NULL,
  421. 0);
  422. rspamd_rcl_add_doc_by_path (cfg,
  423. "surbl",
  424. "Maximum number of URLs to process per message",
  425. "max_urls",
  426. UCL_INT,
  427. NULL,
  428. 0,
  429. NULL,
  430. 0);
  431. rspamd_rcl_add_doc_by_path (cfg,
  432. "surbl",
  433. "Rules for TLD composition",
  434. "exceptions",
  435. UCL_STRING,
  436. NULL,
  437. 0,
  438. NULL,
  439. 0);
  440. rspamd_rcl_add_doc_by_path (cfg,
  441. "surbl",
  442. "Map of whitelisted domains",
  443. "whitelist",
  444. UCL_STRING,
  445. NULL,
  446. 0,
  447. NULL,
  448. 0);
  449. rspamd_rcl_add_doc_by_path (cfg,
  450. "surbl",
  451. "URL blacklist rule",
  452. "rule",
  453. UCL_OBJECT,
  454. NULL,
  455. 0,
  456. NULL,
  457. 0);
  458. /* Rules doc strings */
  459. rspamd_rcl_add_doc_by_path (cfg,
  460. "surbl.rule",
  461. "Name of DNS black list (e.g. `multi.surbl.com`)",
  462. "suffix",
  463. UCL_STRING,
  464. NULL,
  465. 0,
  466. NULL,
  467. 0);
  468. rspamd_rcl_add_doc_by_path (cfg,
  469. "surbl.rule",
  470. "Symbol to insert (if no bits or suffixes are defined)",
  471. "symbol",
  472. UCL_STRING,
  473. NULL,
  474. 0,
  475. NULL,
  476. 0);
  477. rspamd_rcl_add_doc_by_path (cfg,
  478. "surbl.rule",
  479. "Whether the defined rule should be used",
  480. "enabled",
  481. UCL_BOOLEAN,
  482. NULL,
  483. 0,
  484. NULL,
  485. 0);
  486. rspamd_rcl_add_doc_by_path (cfg,
  487. "surbl.rule",
  488. "Do not try to check URLs with IP address instead of hostname",
  489. "no_ip",
  490. UCL_BOOLEAN,
  491. NULL,
  492. 0,
  493. NULL,
  494. 0);
  495. rspamd_rcl_add_doc_by_path (cfg,
  496. "surbl.rule",
  497. "Resolve URL host and then check against the specified suffix with reversed IP octets",
  498. "resolve_ip",
  499. UCL_BOOLEAN,
  500. NULL,
  501. 0,
  502. NULL,
  503. 0);
  504. rspamd_rcl_add_doc_by_path (cfg,
  505. "surbl.rule",
  506. "Check images URLs with this URL list",
  507. "images",
  508. UCL_BOOLEAN,
  509. NULL,
  510. 0,
  511. NULL,
  512. 0);
  513. rspamd_rcl_add_doc_by_path (cfg,
  514. "surbl.rule",
  515. "Parse IP bits in DNS reply, the content is 'symbol = <bit>'",
  516. "bits",
  517. UCL_OBJECT,
  518. NULL,
  519. 0,
  520. NULL,
  521. 0);
  522. rspamd_rcl_add_doc_by_path (cfg,
  523. "surbl.rule",
  524. "Parse IP addresses in DNS reply, the content is 'symbol = address'",
  525. "ips",
  526. UCL_OBJECT,
  527. NULL,
  528. 0,
  529. NULL,
  530. 0);
  531. rspamd_rcl_add_doc_by_path (cfg,
  532. "surbl.rule",
  533. "Check domains in valid DKIM signatures",
  534. "check_dkim",
  535. UCL_BOOLEAN,
  536. NULL,
  537. 0,
  538. NULL,
  539. 0);
  540. rspamd_rcl_add_doc_by_path (cfg,
  541. "surbl.rule",
  542. "Check full domain name instead of eSLD",
  543. "full_domain",
  544. UCL_BOOLEAN,
  545. NULL,
  546. 0,
  547. NULL,
  548. 0);
  549. return 0;
  550. }
  551. /*
  552. * Register virtual symbols for suffixes with bit wildcard
  553. */
  554. static void
  555. register_bit_symbols (struct rspamd_config *cfg, struct suffix_item *suffix,
  556. gint parent_id)
  557. {
  558. guint i;
  559. GHashTableIter it;
  560. struct surbl_bit_item *bit;
  561. gpointer k, v;
  562. if (suffix->ips != NULL) {
  563. g_hash_table_iter_init (&it, suffix->ips);
  564. while (g_hash_table_iter_next (&it, &k, &v)) {
  565. bit = v;
  566. /*
  567. * We can have multiple IPs mapped to a single symbol,
  568. * so skip symbol's registration to avoid duplicates
  569. */
  570. if (rspamd_symcache_find_symbol (cfg->cache, bit->symbol) == -1) {
  571. rspamd_symcache_add_symbol (cfg->cache, bit->symbol,
  572. 0, NULL, NULL,
  573. SYMBOL_TYPE_VIRTUAL, parent_id);
  574. }
  575. msg_debug_config ("bit: %d", bit->bit);
  576. }
  577. }
  578. else if (suffix->bits != NULL) {
  579. for (i = 0; i < suffix->bits->len; i++) {
  580. bit = &g_array_index (suffix->bits, struct surbl_bit_item, i);
  581. rspamd_symcache_add_symbol (cfg->cache, bit->symbol,
  582. 0, NULL, NULL,
  583. SYMBOL_TYPE_VIRTUAL, parent_id);
  584. }
  585. }
  586. else {
  587. rspamd_symcache_add_symbol (cfg->cache, suffix->symbol,
  588. 0, NULL, NULL,
  589. SYMBOL_TYPE_VIRTUAL, parent_id);
  590. }
  591. }
  592. static void
  593. surbl_module_add_ip (const ucl_object_t *ip, const gchar *symbol,
  594. struct suffix_item* suffix,
  595. struct rspamd_config* cfg)
  596. {
  597. gchar* p;
  598. guint32 bit;
  599. const gchar* ip_val;
  600. struct surbl_bit_item* new_bit;
  601. ip_val = ucl_obj_tostring (ip);
  602. new_bit = rspamd_mempool_alloc (
  603. cfg->cfg_pool,
  604. sizeof(struct surbl_bit_item));
  605. if (inet_pton (AF_INET, ip_val, &bit) != 1) {
  606. msg_err_config ("cannot parse ip %s: %s", ip_val,
  607. strerror (errno));
  608. return;
  609. }
  610. new_bit->bit = bit;
  611. new_bit->symbol = rspamd_mempool_strdup (
  612. cfg->cfg_pool,
  613. symbol);
  614. /* Convert to uppercase */
  615. p = new_bit->symbol;
  616. while (*p) {
  617. *p = g_ascii_toupper (*p);
  618. p++;
  619. }
  620. msg_debug_config ("add new IP suffix: %d with symbol: %s",
  621. (gint)new_bit->bit, new_bit->symbol);
  622. g_hash_table_insert (suffix->ips, &new_bit->bit,
  623. new_bit);
  624. }
  625. static gint
  626. surbl_module_parse_rule (const ucl_object_t* value, struct rspamd_config* cfg)
  627. {
  628. const ucl_object_t* cur_rule;
  629. const ucl_object_t* cur;
  630. gint cb_id;
  631. gint nrules = 0;
  632. struct suffix_item* new_suffix;
  633. const gchar *monitored_domain = NULL;
  634. struct surbl_bit_item* new_bit;
  635. ucl_object_t *ropts;
  636. struct surbl_ctx *surbl_module_ctx = surbl_get_context (cfg);
  637. LL_FOREACH(value, cur_rule) {
  638. monitored_domain = NULL;
  639. cur = ucl_object_lookup (cur_rule, "enabled");
  640. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  641. if (!ucl_object_toboolean (cur)) {
  642. continue;
  643. }
  644. }
  645. cur = ucl_object_lookup (cur_rule, "suffix");
  646. if (cur == NULL) {
  647. msg_err_config("surbl rule must have explicit symbol "
  648. "definition");
  649. continue;
  650. }
  651. new_suffix = rspamd_mempool_alloc0 (cfg->cfg_pool,
  652. sizeof (struct suffix_item));
  653. new_suffix->magic = rspamd_surbl_cb_magic;
  654. new_suffix->suffix = rspamd_mempool_strdup (
  655. cfg->cfg_pool, ucl_obj_tostring (cur));
  656. new_suffix->options = 0;
  657. new_suffix->bits = g_array_new (FALSE, FALSE,
  658. sizeof (struct surbl_bit_item));
  659. rspamd_mempool_add_destructor (cfg->cfg_pool,
  660. (rspamd_mempool_destruct_t )rspamd_array_free_hard,
  661. new_suffix->bits);
  662. cur = ucl_object_lookup (cur_rule, "symbol");
  663. if (cur == NULL) {
  664. if (ucl_object_key (value)) {
  665. new_suffix->symbol = rspamd_mempool_strdup (
  666. cfg->cfg_pool,
  667. ucl_object_key (value));
  668. }
  669. else {
  670. msg_warn_config(
  671. "surbl rule for suffix %s lacks symbol, using %s as symbol",
  672. new_suffix->suffix, DEFAULT_SURBL_SYMBOL);
  673. new_suffix->symbol = rspamd_mempool_strdup (
  674. cfg->cfg_pool, DEFAULT_SURBL_SYMBOL);
  675. }
  676. }
  677. else {
  678. new_suffix->symbol = rspamd_mempool_strdup (
  679. cfg->cfg_pool, ucl_obj_tostring (cur));
  680. }
  681. cur = ucl_object_lookup (cur_rule, "options");
  682. if (cur != NULL && cur->type == UCL_STRING) {
  683. if (strstr(ucl_obj_tostring (cur), "noip") != NULL) {
  684. new_suffix->options |= SURBL_OPTION_NOIP;
  685. }
  686. }
  687. cur = ucl_object_lookup (cur_rule, "no_ip");
  688. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  689. if (ucl_object_toboolean (cur)) {
  690. new_suffix->options |= SURBL_OPTION_NOIP;
  691. }
  692. }
  693. cur = ucl_object_lookup (cur_rule, "monitored_domain");
  694. if (cur != NULL && cur->type == UCL_STRING) {
  695. monitored_domain = ucl_object_tostring (cur);
  696. }
  697. cur = ucl_object_lookup (cur_rule, "resolve_ip");
  698. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  699. if (ucl_object_toboolean (cur)) {
  700. new_suffix->options |= SURBL_OPTION_RESOLVEIP;
  701. if (!monitored_domain) {
  702. monitored_domain = "1.0.0.127";
  703. }
  704. }
  705. }
  706. if (!monitored_domain) {
  707. monitored_domain = rspamd_surbl_default_monitored;
  708. }
  709. ropts = ucl_object_typed_new (UCL_OBJECT);
  710. ucl_object_insert_key (ropts,
  711. ucl_object_fromstring (monitored_domain),
  712. "prefix", 0, false);
  713. ucl_object_insert_key (ropts,
  714. ucl_object_fromstring ("nxdomain"),
  715. "rcode", 0, false);
  716. rspamd_mempool_add_destructor (cfg->cfg_pool,
  717. (rspamd_mempool_destruct_t )ucl_object_unref,
  718. ropts);
  719. cur = ucl_object_lookup_any (cur_rule, "images", "check_images", NULL);
  720. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  721. if (ucl_object_toboolean (cur)) {
  722. new_suffix->options |= SURBL_OPTION_CHECKIMAGES;
  723. }
  724. }
  725. cur = ucl_object_lookup_any (cur_rule, "emails", "check_emails", NULL);
  726. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  727. if (ucl_object_toboolean (cur)) {
  728. new_suffix->options |= SURBL_OPTION_CHECKEMAILS;
  729. }
  730. }
  731. cur = ucl_object_lookup_any (cur_rule, "dkim", "check_dkim", NULL);
  732. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  733. if (ucl_object_toboolean (cur)) {
  734. new_suffix->options |= SURBL_OPTION_CHECKDKIM;
  735. }
  736. }
  737. cur = ucl_object_lookup (cur_rule, "full_domain");
  738. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  739. if (ucl_object_toboolean (cur)) {
  740. new_suffix->options |= SURBL_OPTION_FULLDOMAIN;
  741. }
  742. }
  743. if ((new_suffix->options & (SURBL_OPTION_RESOLVEIP | SURBL_OPTION_NOIP))
  744. == (SURBL_OPTION_NOIP | SURBL_OPTION_RESOLVEIP)) {
  745. /* Mutually exclusive options */
  746. msg_err_config ("options noip and resolve_ip are "
  747. "mutually exclusive for suffix %s", new_suffix->suffix);
  748. continue;
  749. }
  750. GString *sym = g_string_sized_new (127);
  751. gchar *p;
  752. rspamd_printf_gstring (sym, "SURBL_%s",
  753. new_suffix->suffix);
  754. p = sym->str;
  755. while (*p) {
  756. if (*p == '.') {
  757. *p = '_';
  758. }
  759. else {
  760. *p = g_ascii_toupper (*p);
  761. }
  762. p ++;
  763. }
  764. cb_id = rspamd_symcache_add_symbol (cfg->cache, sym->str,
  765. 0, surbl_test_url, new_suffix, SYMBOL_TYPE_CALLBACK, -1);
  766. rspamd_config_add_symbol (cfg,
  767. sym->str,
  768. 0.0,
  769. "SURBL rule check callback",
  770. "surbl",
  771. RSPAMD_SYMBOL_FLAG_IGNORE,
  772. 1,
  773. 1);
  774. rspamd_symcache_add_dependency (cfg->cache, cb_id,
  775. SURBL_REDIRECTOR_CALLBACK, -1);
  776. /* Failure symbol */
  777. g_string_append (sym, "_FAIL");
  778. rspamd_symcache_add_symbol (cfg->cache, sym->str,
  779. 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_NOSTAT, cb_id);
  780. rspamd_config_add_symbol (cfg, sym->str, 0.0, "SURBL failure symbol",
  781. "surbl", 0, 0, 0);
  782. g_string_free (sym, TRUE);
  783. nrules++;
  784. new_suffix->callback_id = cb_id;
  785. cur = ucl_object_lookup (cur_rule, "bits");
  786. if (cur != NULL && cur->type == UCL_OBJECT) {
  787. ucl_object_iter_t it = NULL;
  788. const ucl_object_t* cur_bit;
  789. guint32 bit;
  790. while ((cur_bit = ucl_object_iterate (cur, &it, true)) != NULL) {
  791. if (ucl_object_key (cur_bit) != NULL
  792. && cur_bit->type == UCL_INT) {
  793. gchar* p;
  794. bit = ucl_obj_toint (cur_bit);
  795. new_bit = rspamd_mempool_alloc (
  796. cfg->cfg_pool,
  797. sizeof(struct surbl_bit_item));
  798. new_bit->bit = bit;
  799. new_bit->symbol = rspamd_mempool_strdup (
  800. cfg->cfg_pool,
  801. ucl_object_key (cur_bit));
  802. /* Convert to uppercase */
  803. p = new_bit->symbol;
  804. while (*p) {
  805. *p = g_ascii_toupper (*p);
  806. p++;
  807. }
  808. msg_debug_config("add new bit suffix: %d with symbol: %s",
  809. (gint)new_bit->bit, new_bit->symbol);
  810. g_array_append_val(new_suffix->bits, *new_bit);
  811. }
  812. }
  813. }
  814. cur = ucl_object_lookup(cur_rule, "ips");
  815. if (cur != NULL && cur->type == UCL_OBJECT) {
  816. ucl_object_iter_t it = NULL;
  817. const ucl_object_t* cur_bit;
  818. new_suffix->ips = g_hash_table_new (g_int_hash, g_int_equal);
  819. rspamd_mempool_add_destructor (cfg->cfg_pool,
  820. (rspamd_mempool_destruct_t )g_hash_table_unref,
  821. new_suffix->ips);
  822. while ((cur_bit = ucl_object_iterate (cur, &it, true)) != NULL) {
  823. if (ucl_object_key (cur_bit) != NULL) {
  824. if (ucl_object_type (cur_bit) == UCL_STRING) {
  825. /* Single IP */
  826. surbl_module_add_ip (cur_bit, ucl_object_key (cur_bit),
  827. new_suffix, cfg);
  828. }
  829. else if (ucl_object_type (cur_bit) == UCL_ARRAY) {
  830. ucl_object_iter_t ar_it = NULL;
  831. const ucl_object_t* cur_ar;
  832. /* Array of IPs */
  833. while ((cur_ar = ucl_object_iterate (cur_bit, &ar_it,
  834. true)) != NULL) {
  835. if (ucl_object_type (cur_ar) == UCL_STRING) {
  836. surbl_module_add_ip (cur_ar,
  837. ucl_object_key (cur_bit),
  838. new_suffix, cfg);
  839. }
  840. else {
  841. msg_err_config ("garbadge in ips element");
  842. }
  843. }
  844. }
  845. }
  846. }
  847. }
  848. cur = ucl_object_lookup (cur_rule, "process_script");
  849. if (cur != NULL && cur->type == UCL_STRING) {
  850. lua_State *L = cfg->lua_state;
  851. GString *tb;
  852. gint err_idx;
  853. const gchar *input = ucl_object_tostring (cur);
  854. gboolean loaded = FALSE;
  855. lua_pushcfunction (L, &rspamd_lua_traceback);
  856. err_idx = lua_gettop (L);
  857. /* First try return + input */
  858. tb = g_string_sized_new (strlen (input) + sizeof ("return "));
  859. rspamd_printf_gstring (tb, "return %s", input);
  860. if (luaL_loadstring (L, tb->str) != 0) {
  861. /* Reset stack */
  862. lua_settop (L, err_idx - 1);
  863. lua_pushcfunction (L, &rspamd_lua_traceback);
  864. err_idx = lua_gettop (L);
  865. /* Try with no return */
  866. if (luaL_loadstring (L, input) != 0) {
  867. msg_err_config ("cannot load string %s\n",
  868. input);
  869. }
  870. else {
  871. loaded = TRUE;
  872. }
  873. }
  874. else {
  875. loaded = TRUE;
  876. }
  877. g_string_free (tb, TRUE);
  878. if (loaded) {
  879. if (lua_pcall (L, 0, 1, err_idx) != 0) {
  880. msg_err_config ("call failed: %s\n", lua_tostring (L, -1));
  881. }
  882. else if (lua_isfunction (L, -1)) {
  883. new_suffix->url_process_cbref = luaL_ref (L,
  884. LUA_REGISTRYINDEX);
  885. }
  886. }
  887. lua_settop (L, err_idx - 1);
  888. }
  889. if (new_suffix->symbol) {
  890. /* Register just a symbol itself */
  891. rspamd_symcache_add_symbol (cfg->cache,
  892. new_suffix->symbol, 0,
  893. NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id);
  894. nrules++;
  895. }
  896. new_suffix->m = rspamd_monitored_create (cfg->monitored_ctx,
  897. new_suffix->suffix, RSPAMD_MONITORED_DNS,
  898. RSPAMD_MONITORED_DEFAULT, ropts);
  899. surbl_module_ctx->suffixes = g_list_prepend (surbl_module_ctx->suffixes,
  900. new_suffix);
  901. }
  902. return nrules;
  903. }
  904. gint
  905. surbl_module_config (struct rspamd_config *cfg)
  906. {
  907. GList *cur_opt;
  908. struct suffix_item *cur_suffix = NULL;
  909. const ucl_object_t *value, *cur;
  910. const gchar *redir_val;
  911. gint nrules = 0;
  912. lua_State *L;
  913. struct surbl_ctx *surbl_module_ctx = surbl_get_context (cfg);
  914. if (!rspamd_config_is_module_enabled (cfg, "surbl")) {
  915. return TRUE;
  916. }
  917. /* Register global methods */
  918. L = cfg->lua_state;
  919. lua_getglobal (L, "rspamd_plugins");
  920. if (lua_type (L, -1) == LUA_TTABLE) {
  921. lua_pushstring (L, "surbl");
  922. lua_createtable (L, 0, 3);
  923. /* Set methods */
  924. lua_pushstring (L, "register_redirect");
  925. lua_pushcfunction (L, surbl_register_redirect_handler);
  926. lua_settable (L, -3);
  927. lua_pushstring (L, "continue_process");
  928. lua_pushcfunction (L, surbl_continue_process_handler);
  929. lua_settable (L, -3);
  930. lua_pushstring (L, "is_redirector");
  931. lua_pushcfunction (L, surbl_is_redirector_handler);
  932. lua_settable (L, -3);
  933. /* Finish surbl key */
  934. lua_settable (L, -3);
  935. }
  936. lua_pop (L, 1); /* Remove global function */
  937. (void) rspamd_symcache_add_symbol (cfg->cache, SURBL_REDIRECTOR_CALLBACK,
  938. 0, surbl_test_redirector, NULL,
  939. SYMBOL_TYPE_CALLBACK, -1);
  940. rspamd_config_add_symbol (cfg,
  941. SURBL_REDIRECTOR_CALLBACK,
  942. 0.0,
  943. "SURBL redirector check callback",
  944. "surbl",
  945. RSPAMD_SYMBOL_FLAG_IGNORE,
  946. 1,
  947. 1);
  948. if ((value =
  949. rspamd_config_get_module_opt (cfg, "surbl", "redirector")) != NULL) {
  950. surbl_module_ctx->redirectors = rspamd_upstreams_create (cfg->ups_ctx);
  951. rspamd_mempool_add_destructor (cfg->cfg_pool,
  952. (rspamd_mempool_destruct_t)rspamd_upstreams_destroy,
  953. surbl_module_ctx->redirectors);
  954. LL_FOREACH (value, cur)
  955. {
  956. redir_val = ucl_obj_tostring (cur);
  957. if (rspamd_upstreams_add_upstream (surbl_module_ctx->redirectors,
  958. redir_val, 80, RSPAMD_UPSTREAM_PARSE_DEFAULT,
  959. NULL)) {
  960. surbl_module_ctx->use_redirector = TRUE;
  961. }
  962. }
  963. }
  964. if ((value =
  965. rspamd_config_get_module_opt (cfg, "surbl",
  966. "redirector_symbol")) != NULL) {
  967. surbl_module_ctx->redirector_symbol = ucl_obj_tostring (value);
  968. rspamd_symcache_add_symbol (cfg->cache,
  969. surbl_module_ctx->redirector_symbol,
  970. 0, NULL, NULL, SYMBOL_TYPE_COMPOSITE, -1);
  971. }
  972. else {
  973. surbl_module_ctx->redirector_symbol = NULL;
  974. }
  975. if ((value =
  976. rspamd_config_get_module_opt (cfg, "surbl", "weight")) != NULL) {
  977. surbl_module_ctx->weight = ucl_obj_toint (value);
  978. }
  979. else {
  980. surbl_module_ctx->weight = DEFAULT_SURBL_WEIGHT;
  981. }
  982. if ((value =
  983. rspamd_config_get_module_opt (cfg, "surbl", "use_tags")) != NULL) {
  984. surbl_module_ctx->use_tags = ucl_obj_toboolean (value);
  985. }
  986. else {
  987. surbl_module_ctx->use_tags = FALSE;
  988. }
  989. if ((value =
  990. rspamd_config_get_module_opt (cfg, "surbl",
  991. "redirector_read_timeout")) != NULL) {
  992. surbl_module_ctx->read_timeout = ucl_obj_todouble (value);
  993. }
  994. else {
  995. surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT;
  996. }
  997. if ((value =
  998. rspamd_config_get_module_opt (cfg, "surbl",
  999. "redirector_hosts_map")) != NULL) {
  1000. if (!rspamd_map_add_from_ucl (cfg, value,
  1001. "SURBL redirectors list",
  1002. read_redirectors_list,
  1003. fin_redirectors_list,
  1004. dtor_redirectors_list,
  1005. (void **)&surbl_module_ctx->redirector_tlds)) {
  1006. msg_warn_config ("bad redirectors map definition: %s",
  1007. ucl_obj_tostring (value));
  1008. }
  1009. }
  1010. if ((value =
  1011. rspamd_config_get_module_opt (cfg, "surbl", "exceptions")) != NULL) {
  1012. rspamd_map_add_from_ucl (cfg, value,
  1013. "SURBL exceptions list",
  1014. read_exceptions_list,
  1015. fin_exceptions_list,
  1016. dtor_exceptions_list,
  1017. (void **)&surbl_module_ctx->exceptions);
  1018. }
  1019. if ((value =
  1020. rspamd_config_get_module_opt (cfg, "surbl", "whitelist")) != NULL) {
  1021. rspamd_map_add_from_ucl (cfg, value,
  1022. "SURBL whitelist",
  1023. rspamd_kv_list_read,
  1024. rspamd_kv_list_fin,
  1025. rspamd_kv_list_dtor,
  1026. (void **)&surbl_module_ctx->whitelist);
  1027. }
  1028. value = rspamd_config_get_module_opt (cfg, "surbl", "rule");
  1029. if (value != NULL && value->type == UCL_OBJECT) {
  1030. ucl_object_iter_t it = NULL;
  1031. const ucl_object_t *cur_value;
  1032. if (ucl_object_lookup (value, "symbol") != NULL) {
  1033. /* Old style */
  1034. nrules += surbl_module_parse_rule (value, cfg);
  1035. }
  1036. else {
  1037. /* New style */
  1038. while ((cur_value = ucl_object_iterate (value, &it, true)) != NULL) {
  1039. nrules += surbl_module_parse_rule (cur_value, cfg);
  1040. }
  1041. }
  1042. }
  1043. value = rspamd_config_get_module_opt (cfg, "surbl", "rules");
  1044. if (value != NULL && value->type == UCL_OBJECT) {
  1045. ucl_object_iter_t it = NULL;
  1046. const ucl_object_t *cur_value;
  1047. /* New style only */
  1048. while ((cur_value = ucl_object_iterate (value, &it, true)) != NULL) {
  1049. nrules += surbl_module_parse_rule (cur_value, cfg);
  1050. }
  1051. }
  1052. /* Add default suffix */
  1053. if (surbl_module_ctx->suffixes == NULL) {
  1054. msg_err_config ("surbl module loaded but no suffixes defined, skip "
  1055. "checks");
  1056. return TRUE;
  1057. }
  1058. if (surbl_module_ctx->suffixes != NULL) {
  1059. rspamd_mempool_add_destructor (cfg->cfg_pool,
  1060. (rspamd_mempool_destruct_t) g_list_free,
  1061. surbl_module_ctx->suffixes);
  1062. }
  1063. cur_opt = surbl_module_ctx->suffixes;
  1064. while (cur_opt) {
  1065. cur_suffix = cur_opt->data;
  1066. if (cur_suffix->bits != NULL || cur_suffix->ips != NULL) {
  1067. register_bit_symbols (cfg, cur_suffix, cur_suffix->callback_id);
  1068. }
  1069. if (cur_suffix->options & SURBL_OPTION_CHECKDKIM) {
  1070. rspamd_symcache_add_dependency (cfg->cache,
  1071. cur_suffix->callback_id, "DKIM_TRACE", -1);
  1072. }
  1073. cur_opt = g_list_next (cur_opt);
  1074. }
  1075. surbl_module_ctx->max_redirected_urls = default_max_redirected_urls;
  1076. if ((value =
  1077. rspamd_config_get_module_opt (cfg, "surbl", "max_redirected_urls")) != NULL) {
  1078. surbl_module_ctx->max_redirected_urls = ucl_obj_toint (value);
  1079. }
  1080. msg_info_config ("init internal surbls module, %d uribl rules loaded",
  1081. nrules);
  1082. return TRUE;
  1083. }
  1084. gint
  1085. surbl_module_reconfig (struct rspamd_config *cfg)
  1086. {
  1087. struct surbl_ctx *surbl_module_ctx = surbl_get_context (cfg);
  1088. /* Reinit module */
  1089. surbl_module_ctx->use_redirector = 0;
  1090. surbl_module_ctx->suffixes = NULL;
  1091. surbl_module_ctx->redirectors = NULL;
  1092. surbl_module_ctx->whitelist = NULL;
  1093. /* Zero exceptions hashes */
  1094. surbl_module_ctx->exceptions = NULL;
  1095. rspamd_mempool_add_destructor (cfg->cfg_pool,
  1096. (rspamd_mempool_destruct_t) g_list_free,
  1097. surbl_module_ctx->suffixes);
  1098. /* Perform configure */
  1099. return surbl_module_config (cfg);
  1100. }
  1101. static gchar *
  1102. format_surbl_request (rspamd_mempool_t * pool,
  1103. rspamd_ftok_t * hostname,
  1104. struct suffix_item *suffix,
  1105. gboolean append_suffix,
  1106. GError ** err,
  1107. gboolean forced,
  1108. GHashTable *tree,
  1109. struct rspamd_url *url,
  1110. lua_State *L,
  1111. struct surbl_ctx *surbl_module_ctx)
  1112. {
  1113. GHashTable *t;
  1114. gchar *result = NULL;
  1115. const gchar *p, *dots[MAX_LEVELS];
  1116. gint r, i, dots_num = 0, level = MAX_LEVELS;
  1117. gsize slen, len;
  1118. gboolean found_exception = FALSE;
  1119. rspamd_ftok_t f;
  1120. if (G_LIKELY (suffix != NULL)) {
  1121. slen = strlen (suffix->suffix);
  1122. }
  1123. else if (!append_suffix) {
  1124. slen = 0;
  1125. }
  1126. else {
  1127. g_assert_not_reached ();
  1128. }
  1129. len = hostname->len + slen + 2;
  1130. p = hostname->begin;
  1131. while (p - hostname->begin < (gint)hostname->len && dots_num < MAX_LEVELS) {
  1132. if (*p == '.') {
  1133. dots[dots_num] = p;
  1134. dots_num++;
  1135. }
  1136. p++;
  1137. }
  1138. /* Check for numeric expressions */
  1139. if (url->flags & RSPAMD_URL_FLAG_NUMERIC) {
  1140. /* This is ip address */
  1141. if (suffix != NULL && (suffix->options & SURBL_OPTION_NOIP) != 0) {
  1142. /* Ignore such requests */
  1143. msg_info_pool ("ignore request of ip url for list %s",
  1144. suffix->symbol);
  1145. return NULL;
  1146. }
  1147. if (dots_num == 3) {
  1148. /* IPv4 address */
  1149. result = rspamd_mempool_alloc (pool, len);
  1150. r = rspamd_snprintf (result, len, "%*s.%*s.%*s.%*s",
  1151. (gint) (hostname->len - (dots[2] - hostname->begin + 1)),
  1152. dots[2] + 1,
  1153. (gint) (dots[2] - dots[1] - 1),
  1154. dots[1] + 1,
  1155. (gint) (dots[1] - dots[0] - 1),
  1156. dots[0] + 1,
  1157. (gint) (dots[0] - hostname->begin),
  1158. hostname->begin);
  1159. }
  1160. else {
  1161. /* Just pring ip as is */
  1162. result = rspamd_mempool_alloc (pool, len);
  1163. r = rspamd_snprintf (result, len, "%*s",
  1164. (gint)hostname->len, hostname->begin);
  1165. }
  1166. }
  1167. else {
  1168. /* Not a numeric url */
  1169. result = rspamd_mempool_alloc (pool, len);
  1170. if (suffix->options & SURBL_OPTION_FULLDOMAIN) {
  1171. /* Full domain case */
  1172. r = rspamd_snprintf (result,
  1173. len,
  1174. "%*s",
  1175. url->hostlen,
  1176. url->host);
  1177. }
  1178. else {
  1179. /* Now we should try to check for exceptions */
  1180. if (!forced && surbl_module_ctx->exceptions) {
  1181. for (i = MAX_LEVELS - 1; i >= 0; i--) {
  1182. t = surbl_module_ctx->exceptions[i];
  1183. if (t != NULL && dots_num >= i + 1) {
  1184. f.begin = dots[dots_num - i - 1] + 1;
  1185. f.len = hostname->len -
  1186. (dots[dots_num - i - 1] - hostname->begin + 1);
  1187. if (g_hash_table_lookup (t, &f) != NULL) {
  1188. level = dots_num - i - 1;
  1189. found_exception = TRUE;
  1190. break;
  1191. }
  1192. }
  1193. }
  1194. }
  1195. if (found_exception || url->tldlen == 0) {
  1196. if (level != MAX_LEVELS) {
  1197. if (level == 0) {
  1198. r = rspamd_snprintf (result,
  1199. len,
  1200. "%T",
  1201. hostname);
  1202. }
  1203. else {
  1204. r = rspamd_snprintf (result, len, "%*s",
  1205. (gint) (hostname->len -
  1206. (dots[level - 1] - hostname->begin + 1)),
  1207. dots[level - 1] + 1);
  1208. }
  1209. }
  1210. else if (dots_num >= 2) {
  1211. r = rspamd_snprintf (result, len, "%*s",
  1212. (gint) (hostname->len -
  1213. (dots[dots_num - 2] - hostname->begin + 1)),
  1214. dots[dots_num - 2] + 1);
  1215. }
  1216. else {
  1217. r = rspamd_snprintf (result,
  1218. len,
  1219. "%T",
  1220. hostname);
  1221. }
  1222. }
  1223. else {
  1224. /* No exception */
  1225. r = rspamd_snprintf (result,
  1226. len,
  1227. "%*s",
  1228. url->tldlen,
  1229. url->tld);
  1230. }
  1231. }
  1232. }
  1233. url->surbl = result;
  1234. url->surbllen = r;
  1235. if (!forced &&
  1236. rspamd_match_hash_map (surbl_module_ctx->whitelist, result) != NULL) {
  1237. msg_debug_pool ("url %s is whitelisted", result);
  1238. g_set_error (err, SURBL_ERROR,
  1239. WHITELIST_ERROR,
  1240. "URL is whitelisted: %s",
  1241. result);
  1242. return NULL;
  1243. }
  1244. if (append_suffix) {
  1245. if (suffix->url_process_cbref > 0) {
  1246. lua_rawgeti (L, LUA_REGISTRYINDEX, suffix->url_process_cbref);
  1247. lua_pushstring (L, result);
  1248. lua_pushstring (L, suffix->suffix);
  1249. if (lua_pcall (L, 2, 1, 0) != 0) {
  1250. msg_err_pool ("cannot call url process script: %s",
  1251. lua_tostring (L, -1));
  1252. lua_pop (L, 1);
  1253. rspamd_snprintf (result + r, len - r, ".%s", suffix->suffix);
  1254. }
  1255. else {
  1256. result = rspamd_mempool_strdup (pool, lua_tostring (L, -1));
  1257. lua_pop (L, 1);
  1258. }
  1259. }
  1260. else {
  1261. rspamd_snprintf (result + r, len - r, ".%s", suffix->suffix);
  1262. }
  1263. }
  1264. if (tree != NULL) {
  1265. if (g_hash_table_lookup (tree, result) != NULL) {
  1266. msg_debug_pool ("url %s is already registered", result);
  1267. g_set_error (err, SURBL_ERROR,
  1268. DUPLICATE_ERROR,
  1269. "URL is duplicated: %s",
  1270. result);
  1271. return NULL;
  1272. }
  1273. else {
  1274. g_hash_table_insert (tree, result, url);
  1275. }
  1276. }
  1277. msg_debug_pool ("request: %s, dots: %d, level: %d, orig: %*s",
  1278. result,
  1279. dots_num,
  1280. level,
  1281. (gint)hostname->len,
  1282. hostname->begin);
  1283. return result;
  1284. }
  1285. static void
  1286. make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task,
  1287. struct rspamd_symcache_item *item,
  1288. struct suffix_item *suffix,
  1289. gboolean forced, GHashTable *tree,
  1290. struct surbl_ctx *surbl_module_ctx)
  1291. {
  1292. gchar *surbl_req;
  1293. rspamd_ftok_t f;
  1294. GError *err = NULL;
  1295. struct dns_param *param;
  1296. f.begin = url->host;
  1297. f.len = url->hostlen;
  1298. if (suffix->options & SURBL_OPTION_RESOLVEIP) {
  1299. /*
  1300. * We need to get url real TLD, resolve it with no suffix and then
  1301. * check against surbl using reverse octets printing
  1302. */
  1303. surbl_req = format_surbl_request (task->task_pool,
  1304. &f,
  1305. suffix,
  1306. FALSE,
  1307. &err,
  1308. forced,
  1309. tree,
  1310. url,
  1311. task->cfg->lua_state,
  1312. surbl_module_ctx);
  1313. if (surbl_req == NULL) {
  1314. if (err != NULL) {
  1315. if (err->code != WHITELIST_ERROR && err->code != DUPLICATE_ERROR) {
  1316. msg_info_surbl ("cannot format url string for surbl %*s, %e",
  1317. url->urllen, url->string,
  1318. err);
  1319. }
  1320. g_error_free (err);
  1321. return;
  1322. }
  1323. }
  1324. else {
  1325. /* XXX: We make merely A request here */
  1326. param =
  1327. rspamd_mempool_alloc (task->task_pool,
  1328. sizeof (struct dns_param));
  1329. param->url = url;
  1330. param->task = task;
  1331. param->suffix = suffix;
  1332. param->host_resolve =
  1333. rspamd_mempool_strdup (task->task_pool, surbl_req);
  1334. rspamd_ftok_t ftmp;
  1335. ftmp.begin = url->surbl;
  1336. ftmp.len = url->surbllen;
  1337. param->host_orig = rspamd_mempool_ftokdup (task->task_pool, &ftmp);
  1338. msg_debug_surbl ("send surbl dns ip request %s to %s", surbl_req,
  1339. suffix->suffix);
  1340. if (rspamd_dns_resolver_request_task (task,
  1341. surbl_dns_ip_callback,
  1342. (void *) param, RDNS_REQUEST_A, surbl_req)) {
  1343. param->item = item;
  1344. rspamd_symcache_item_async_inc (task, item, M);
  1345. }
  1346. }
  1347. }
  1348. else if ((surbl_req = format_surbl_request (task->task_pool,
  1349. &f,
  1350. suffix,
  1351. TRUE,
  1352. &err,
  1353. forced,
  1354. tree,
  1355. url,
  1356. task->cfg->lua_state,
  1357. surbl_module_ctx)) != NULL) {
  1358. param =
  1359. rspamd_mempool_alloc (task->task_pool, sizeof (struct dns_param));
  1360. param->url = url;
  1361. param->task = task;
  1362. param->suffix = suffix;
  1363. param->host_resolve =
  1364. rspamd_mempool_strdup (task->task_pool, url->surbl);
  1365. rspamd_ftok_t ftmp;
  1366. ftmp.begin = url->surbl;
  1367. ftmp.len = url->surbllen;
  1368. param->host_orig = rspamd_mempool_ftokdup (task->task_pool, &ftmp);
  1369. msg_debug_surbl ("send surbl dns request %s", surbl_req);
  1370. if (rspamd_dns_resolver_request_task (task,
  1371. surbl_dns_callback,
  1372. (void *) param, RDNS_REQUEST_A, surbl_req)) {
  1373. param->item = item;
  1374. rspamd_symcache_item_async_inc (task, item, M);
  1375. }
  1376. }
  1377. else if (err != NULL) {
  1378. if (err->code != WHITELIST_ERROR && err->code != DUPLICATE_ERROR) {
  1379. msg_info_surbl ("cannot format url string for surbl %*s, %e",
  1380. url->urllen,
  1381. url->string, err);
  1382. }
  1383. g_error_free (err);
  1384. return;
  1385. }
  1386. }
  1387. static void
  1388. process_dns_results (struct rspamd_task *task,
  1389. struct suffix_item *suffix,
  1390. gchar *resolved_name,
  1391. guint32 addr,
  1392. struct rspamd_url *uri)
  1393. {
  1394. guint i;
  1395. gboolean got_result = FALSE;
  1396. struct surbl_bit_item *bit;
  1397. struct in_addr ina;
  1398. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1399. if (suffix->ips && g_hash_table_size (suffix->ips) > 0) {
  1400. bit = g_hash_table_lookup (suffix->ips, &addr);
  1401. if (bit != NULL) {
  1402. msg_info_surbl ("domain [%s] is in surbl %s(%xd)",
  1403. resolved_name, suffix->suffix,
  1404. bit->bit);
  1405. rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);
  1406. got_result = TRUE;
  1407. }
  1408. }
  1409. else if (suffix->bits != NULL && suffix->bits->len > 0) {
  1410. for (i = 0; i < suffix->bits->len; i ++) {
  1411. bit = &g_array_index (suffix->bits, struct surbl_bit_item, i);
  1412. msg_debug_surbl ("got result(%d) AND bit(%d): %d",
  1413. (gint)addr,
  1414. (gint)ntohl (bit->bit),
  1415. (gint)bit->bit & (gint)ntohl (addr));
  1416. if (((gint)bit->bit & (gint)ntohl (addr)) != 0) {
  1417. got_result = TRUE;
  1418. msg_info_surbl ("domain [%s] is in surbl %s(%xd)",
  1419. resolved_name, suffix->suffix,
  1420. bit->bit);
  1421. rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);
  1422. }
  1423. }
  1424. }
  1425. if (!got_result) {
  1426. if ((suffix->bits == NULL || suffix->bits->len == 0) &&
  1427. suffix->ips == NULL) {
  1428. msg_info_surbl ("domain [%s] is in surbl %s",
  1429. resolved_name, suffix->suffix);
  1430. rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name);
  1431. }
  1432. else {
  1433. ina.s_addr = addr;
  1434. msg_info_surbl ("domain [%s] is in surbl %s but at unknown result: %s",
  1435. resolved_name, suffix->suffix,
  1436. inet_ntoa (ina));
  1437. }
  1438. }
  1439. }
  1440. static void
  1441. surbl_dns_callback (struct rdns_reply *reply, gpointer arg)
  1442. {
  1443. struct dns_param *param = (struct dns_param *)arg;
  1444. struct rspamd_task *task;
  1445. struct rdns_reply_entry *elt;
  1446. task = param->task;
  1447. if (reply->code == RDNS_RC_NOERROR && reply->entries) {
  1448. msg_debug_surbl ("domain [%s] is in surbl %s",
  1449. param->host_orig, param->suffix->suffix);
  1450. DL_FOREACH (reply->entries, elt) {
  1451. if (elt->type == RDNS_REQUEST_A) {
  1452. process_dns_results (param->task, param->suffix,
  1453. param->host_orig, (guint32) elt->content.a.addr.s_addr,
  1454. param->url);
  1455. }
  1456. }
  1457. }
  1458. else {
  1459. if (reply->code == RDNS_RC_NXDOMAIN || reply->code == RDNS_RC_NOREC) {
  1460. msg_debug_surbl ("domain [%s] is not in surbl %s",
  1461. param->host_orig,
  1462. param->suffix->suffix);
  1463. }
  1464. else {
  1465. /* Insert failure symbol */
  1466. GString *sym = g_string_new (param->suffix->symbol);
  1467. g_string_append (sym, "_FAIL");
  1468. rspamd_task_insert_result (task, sym->str, 1.0,
  1469. rdns_strerror (reply->code));
  1470. g_string_free (sym, TRUE);
  1471. }
  1472. }
  1473. rspamd_symcache_item_async_dec_check (param->task, param->item, M);
  1474. }
  1475. static void
  1476. surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg)
  1477. {
  1478. struct dns_param *param = (struct dns_param *) arg;
  1479. struct rspamd_task *task;
  1480. struct rdns_reply_entry *elt;
  1481. GString *to_resolve;
  1482. guint32 ip_addr;
  1483. task = param->task;
  1484. /* If we have result from DNS server, this url exists in SURBL, so increase score */
  1485. if (reply->code == RDNS_RC_NOERROR && reply->entries) {
  1486. LL_FOREACH (reply->entries, elt) {
  1487. if (elt->type == RDNS_REQUEST_A) {
  1488. to_resolve = g_string_sized_new (
  1489. strlen (param->suffix->suffix) +
  1490. sizeof ("255.255.255.255."));
  1491. ip_addr = elt->content.a.addr.s_addr;
  1492. /* Big endian <4>.<3>.<2>.<1> */
  1493. rspamd_printf_gstring (to_resolve, "%d.%d.%d.%d.%s",
  1494. ip_addr >> 24 & 0xff,
  1495. ip_addr >> 16 & 0xff,
  1496. ip_addr >> 8 & 0xff,
  1497. ip_addr & 0xff, param->suffix->suffix);
  1498. msg_debug_surbl (
  1499. "domain [%s] send %v request to surbl",
  1500. param->host_orig,
  1501. to_resolve);
  1502. if (rspamd_dns_resolver_request_task (task,
  1503. surbl_dns_callback,
  1504. param, RDNS_REQUEST_A, to_resolve->str)) {
  1505. rspamd_symcache_item_async_inc (param->task, param->item, M);
  1506. }
  1507. g_string_free (to_resolve, TRUE);
  1508. }
  1509. }
  1510. }
  1511. else {
  1512. msg_debug_surbl ("domain [%s] cannot be resolved for SURBL check %s",
  1513. param->host_resolve,
  1514. param->suffix->suffix);
  1515. }
  1516. rspamd_symcache_item_async_dec_check (param->task, param->item, M);
  1517. }
  1518. static void
  1519. free_redirector_session (void *ud)
  1520. {
  1521. struct redirector_param *param = (struct redirector_param *)ud;
  1522. if (param->item) {
  1523. rspamd_symcache_item_async_dec_check (param->task, param->item, M);
  1524. }
  1525. rspamd_http_connection_unref (param->conn);
  1526. }
  1527. static void
  1528. surbl_redirector_error (struct rspamd_http_connection *conn,
  1529. GError *err)
  1530. {
  1531. struct redirector_param *param = (struct redirector_param *)conn->ud;
  1532. struct rspamd_task *task;
  1533. task = param->task;
  1534. msg_err_surbl ("connection with http server %s terminated incorrectly: %e",
  1535. rspamd_inet_address_to_string (
  1536. rspamd_upstream_addr_cur (param->redirector)),
  1537. err);
  1538. rspamd_upstream_fail (param->redirector, FALSE);
  1539. rspamd_session_remove_event (param->task->s, free_redirector_session,
  1540. param);
  1541. }
  1542. static int
  1543. surbl_redirector_finish (struct rspamd_http_connection *conn,
  1544. struct rspamd_http_message *msg)
  1545. {
  1546. struct redirector_param *param = (struct redirector_param *)conn->ud;
  1547. struct rspamd_task *task;
  1548. struct surbl_ctx *surbl_module_ctx;
  1549. gint r, urllen;
  1550. struct rspamd_url *redirected_url, *existing;
  1551. const rspamd_ftok_t *hdr;
  1552. gchar *urlstr;
  1553. task = param->task;
  1554. surbl_module_ctx = surbl_get_context (task->cfg);
  1555. if (msg->code == 200) {
  1556. hdr = rspamd_http_message_find_header (msg, "Uri");
  1557. if (hdr != NULL) {
  1558. msg_info_surbl ("got reply from redirector: '%*s' -> '%T'",
  1559. param->url->urllen, param->url->string,
  1560. hdr);
  1561. urllen = hdr->len;
  1562. urlstr = rspamd_mempool_alloc (task->task_pool,
  1563. urllen + 1);
  1564. redirected_url = rspamd_mempool_alloc0 (task->task_pool,
  1565. sizeof (*redirected_url));
  1566. rspamd_strlcpy (urlstr, hdr->begin, urllen + 1);
  1567. r = rspamd_url_parse (redirected_url, urlstr, urllen,
  1568. task->task_pool, RSPAMD_URL_PARSE_TEXT);
  1569. if (r == URI_ERRNO_OK) {
  1570. if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
  1571. redirected_url)) == NULL) {
  1572. g_hash_table_insert (MESSAGE_FIELD (task, urls), redirected_url,
  1573. redirected_url);
  1574. redirected_url->phished_url = param->url;
  1575. redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  1576. }
  1577. else {
  1578. existing->count ++;
  1579. }
  1580. }
  1581. else {
  1582. msg_info_surbl ("cannot parse redirector reply: %s", urlstr);
  1583. }
  1584. }
  1585. }
  1586. else {
  1587. msg_info_surbl ("could not resolve '%*s' on redirector",
  1588. param->url->urllen, param->url->string);
  1589. }
  1590. rspamd_upstream_ok (param->redirector);
  1591. rspamd_session_remove_event (param->task->s, free_redirector_session,
  1592. param);
  1593. return 0;
  1594. }
  1595. static void
  1596. register_redirector_call (struct rspamd_url *url, struct rspamd_task *task,
  1597. const gchar *rule)
  1598. {
  1599. struct redirector_param *param;
  1600. struct upstream *selected;
  1601. struct rspamd_http_message *msg;
  1602. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1603. if (!rspamd_session_blocked (task->s)) {
  1604. selected = rspamd_upstream_get (surbl_module_ctx->redirectors,
  1605. RSPAMD_UPSTREAM_ROUND_ROBIN, url->host, url->hostlen);
  1606. param = rspamd_mempool_alloc0 (task->task_pool,
  1607. sizeof (struct redirector_param));
  1608. if (selected) {
  1609. param->conn = rspamd_http_connection_new_client (NULL,
  1610. NULL,
  1611. surbl_redirector_error,
  1612. surbl_redirector_finish,
  1613. RSPAMD_HTTP_CLIENT_SIMPLE,
  1614. rspamd_upstream_addr_next (selected));
  1615. }
  1616. if (param->conn == NULL) {
  1617. msg_info_surbl ("cannot create tcp socket failed: %s",
  1618. strerror (errno));
  1619. return;
  1620. }
  1621. param->url = url;
  1622. param->task = task;
  1623. param->ctx = surbl_module_ctx;
  1624. msg = rspamd_http_new_message (HTTP_REQUEST);
  1625. msg->url = rspamd_fstring_assign (msg->url, url->string, url->urllen);
  1626. param->redirector = selected;
  1627. rspamd_session_add_event (task->s,
  1628. free_redirector_session, param,
  1629. M);
  1630. param->item = rspamd_symcache_get_cur_item (task);
  1631. if (param->item) {
  1632. rspamd_symcache_item_async_inc (param->task, param->item, M);
  1633. }
  1634. rspamd_http_connection_write_message (param->conn, msg, NULL,
  1635. NULL, param, surbl_module_ctx->read_timeout);
  1636. msg_info_surbl (
  1637. "registered redirector call for %*s to %s, according to rule: %s",
  1638. url->urllen, url->string,
  1639. rspamd_upstream_name (param->redirector),
  1640. rule);
  1641. }
  1642. }
  1643. static void
  1644. surbl_tree_redirector_callback (gpointer key, gpointer value, void *data)
  1645. {
  1646. struct redirector_param *param = data, *nparam;
  1647. struct rspamd_task *task, **ptask;
  1648. struct rspamd_url *url = value, **purl;
  1649. lua_State *L;
  1650. rspamd_regexp_t *re;
  1651. rspamd_ftok_t srch;
  1652. gboolean found = FALSE;
  1653. gchar *found_tld;
  1654. struct surbl_ctx *surbl_module_ctx;
  1655. task = param->task;
  1656. surbl_module_ctx = param->ctx;
  1657. msg_debug_surbl ("check url redirection %*s", url->urllen, url->string);
  1658. if (url->hostlen <= 0) {
  1659. return;
  1660. }
  1661. /* Search in trie */
  1662. srch.begin = url->tld;
  1663. srch.len = url->tldlen;
  1664. re = g_hash_table_lookup (surbl_module_ctx->redirector_tlds, &srch);
  1665. if (re) {
  1666. if (re == NO_REGEXP) {
  1667. found = TRUE;
  1668. }
  1669. else if (rspamd_regexp_search (re, url->string, 0,
  1670. NULL, NULL, TRUE, NULL)) {
  1671. found = TRUE;
  1672. }
  1673. if (found) {
  1674. found_tld = rspamd_mempool_ftokdup (task->task_pool, &srch);
  1675. if (surbl_module_ctx->redirector_symbol != NULL) {
  1676. rspamd_task_insert_result (param->task,
  1677. surbl_module_ctx->redirector_symbol,
  1678. 1,
  1679. found_tld);
  1680. }
  1681. if (param->redirector_requests >= surbl_module_ctx->max_redirected_urls) {
  1682. msg_info_surbl ("cannot register redirector request for url domain: "
  1683. "%s, max_redirected_urls is reached: %d",
  1684. found_tld, surbl_module_ctx->max_redirected_urls);
  1685. return;
  1686. }
  1687. param->redirector_requests ++;
  1688. if (surbl_module_ctx->redirector_cbid != -1) {
  1689. nparam = rspamd_mempool_alloc (task->task_pool,
  1690. sizeof (*nparam));
  1691. /* Copy to detach from the shared param */
  1692. memcpy (nparam, param, sizeof (*param));
  1693. nparam->url = url;
  1694. L = task->cfg->lua_state;
  1695. lua_rawgeti (L, LUA_REGISTRYINDEX,
  1696. surbl_module_ctx->redirector_cbid);
  1697. ptask = lua_newuserdata (L, sizeof (*ptask));
  1698. *ptask = task;
  1699. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1700. purl = lua_newuserdata (L, sizeof (*purl));
  1701. *purl = url;
  1702. rspamd_lua_setclass (L, "rspamd{url}", -1);
  1703. lua_pushlightuserdata (L, nparam);
  1704. rspamd_symcache_set_cur_item (task, param->item);
  1705. if (lua_pcall (L, 3, 0, 0) != 0) {
  1706. msg_err_task ("cannot call for redirector script: %s",
  1707. lua_tostring (L, -1));
  1708. lua_pop (L, 1);
  1709. }
  1710. else {
  1711. nparam->item = param->item;
  1712. }
  1713. }
  1714. else {
  1715. register_redirector_call (url,
  1716. param->task,
  1717. found_tld);
  1718. }
  1719. }
  1720. }
  1721. }
  1722. static void
  1723. surbl_tree_url_callback (gpointer key, gpointer value, void *data)
  1724. {
  1725. struct redirector_param *param = data;
  1726. struct rspamd_url *url = value;
  1727. struct rspamd_task *task;
  1728. struct surbl_ctx *surbl_module_ctx;
  1729. if (url->hostlen <= 0) {
  1730. return;
  1731. }
  1732. if (url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED) {
  1733. /* Skip urls that are displayed only */
  1734. return;
  1735. }
  1736. task = param->task;
  1737. surbl_module_ctx = param->ctx;
  1738. msg_debug_surbl ("check url %*s in %s", url->urllen, url->string,
  1739. param->suffix->suffix);
  1740. make_surbl_requests (url, param->task, param->item, param->suffix, FALSE,
  1741. param->tree, surbl_module_ctx);
  1742. }
  1743. static void
  1744. surbl_test_url (struct rspamd_task *task,
  1745. struct rspamd_symcache_item *item,
  1746. void *user_data)
  1747. {
  1748. struct redirector_param *param;
  1749. struct suffix_item *suffix = user_data;
  1750. guint i, j;
  1751. struct rspamd_mime_text_part *part;
  1752. struct html_image *img;
  1753. struct rspamd_url *url;
  1754. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1755. if (!rspamd_monitored_alive (suffix->m)) {
  1756. if (!suffix->reported_offline) {
  1757. msg_info_surbl ("disable surbl %s as it is reported to be offline",
  1758. suffix->suffix);
  1759. suffix->reported_offline = TRUE;
  1760. }
  1761. rspamd_symcache_finalize_item (task, item);
  1762. return;
  1763. }
  1764. suffix->reported_offline = FALSE;
  1765. param = rspamd_mempool_alloc0 (task->task_pool, sizeof (*param));
  1766. param->task = task;
  1767. param->suffix = suffix;
  1768. param->tree = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
  1769. param->ctx = surbl_module_ctx;
  1770. param->item = item;
  1771. rspamd_mempool_add_destructor (task->task_pool,
  1772. (rspamd_mempool_destruct_t)g_hash_table_unref,
  1773. param->tree);
  1774. g_hash_table_foreach (MESSAGE_FIELD (task, urls),
  1775. surbl_tree_url_callback, param);
  1776. rspamd_symcache_item_async_inc (task, item, M);
  1777. if (suffix->options & SURBL_OPTION_CHECKEMAILS) {
  1778. g_hash_table_foreach (MESSAGE_FIELD (task, emails),
  1779. surbl_tree_url_callback, param);
  1780. }
  1781. /* We also need to check and process img URLs */
  1782. if (suffix->options & SURBL_OPTION_CHECKIMAGES) {
  1783. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
  1784. if (part->html && part->html->images) {
  1785. for (j = 0; j < part->html->images->len; j ++) {
  1786. img = g_ptr_array_index (part->html->images, j);
  1787. if ((img->flags & RSPAMD_HTML_FLAG_IMAGE_EXTERNAL)
  1788. && img->url) {
  1789. surbl_tree_url_callback (img->url, img->url, param);
  1790. msg_debug_surbl ("checked image url %s over %s",
  1791. img->src, suffix->suffix);
  1792. }
  1793. }
  1794. }
  1795. }
  1796. }
  1797. if (suffix->options & SURBL_OPTION_CHECKDKIM) {
  1798. struct rspamd_symbol_result *s;
  1799. struct rspamd_symbol_option *opt;
  1800. s = rspamd_task_find_symbol_result (task, "DKIM_TRACE");
  1801. if (s && s->opts_head) {
  1802. DL_FOREACH (s->opts_head, opt) {
  1803. gsize len = strlen (opt->option);
  1804. gchar *p = opt->option + len - 1;
  1805. if (*p == '+') {
  1806. url = rspamd_html_process_url (task->task_pool,
  1807. opt->option, len - 2, NULL);
  1808. if (url) {
  1809. surbl_tree_url_callback (url, url, param);
  1810. msg_debug_surbl ("checked dkim url %s over %s",
  1811. url->string, suffix->suffix);
  1812. }
  1813. }
  1814. }
  1815. }
  1816. }
  1817. rspamd_symcache_item_async_dec_check (task, item, M);
  1818. }
  1819. static void
  1820. surbl_test_redirector (struct rspamd_task *task,
  1821. struct rspamd_symcache_item *item,
  1822. void *user_data)
  1823. {
  1824. struct redirector_param *param;
  1825. guint i, j;
  1826. struct rspamd_mime_text_part *part;
  1827. struct html_image *img;
  1828. struct rspamd_url *url;
  1829. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1830. if (!surbl_module_ctx->use_redirector || !surbl_module_ctx->redirector_tlds) {
  1831. rspamd_symcache_finalize_item (task, item);
  1832. return;
  1833. }
  1834. rspamd_symcache_item_async_inc (task, item, M);
  1835. param = rspamd_mempool_alloc0 (task->task_pool, sizeof (*param));
  1836. param->task = task;
  1837. param->suffix = NULL;
  1838. param->redirector_requests = 0;
  1839. param->ctx = surbl_module_ctx;
  1840. param->item = item;
  1841. g_hash_table_foreach (MESSAGE_FIELD (task, urls),
  1842. surbl_tree_redirector_callback, param);
  1843. /* We also need to check and process img URLs */
  1844. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
  1845. if (part->html && part->html->images) {
  1846. for (j = 0; j < part->html->images->len; j ++) {
  1847. img = g_ptr_array_index (part->html->images, j);
  1848. if ((img->flags & RSPAMD_HTML_FLAG_IMAGE_EXTERNAL)
  1849. && img->src) {
  1850. url = rspamd_html_process_url (task->task_pool,
  1851. img->src, strlen (img->src), NULL);
  1852. if (url) {
  1853. surbl_tree_redirector_callback (url, url, param);
  1854. msg_debug_surbl ("checked image url %s for redirectors",
  1855. img->src);
  1856. }
  1857. }
  1858. }
  1859. }
  1860. }
  1861. rspamd_symcache_item_async_dec_check (task, item, M);
  1862. }
  1863. static gint
  1864. surbl_register_redirect_handler (lua_State *L)
  1865. {
  1866. struct surbl_ctx *surbl_module_ctx;
  1867. struct rspamd_config *cfg = lua_check_config (L, 1);
  1868. if (!cfg) {
  1869. return luaL_error (L, "config is now required as the first parameter");
  1870. }
  1871. surbl_module_ctx = surbl_get_context (cfg);
  1872. if (surbl_module_ctx->redirector_cbid != -1) {
  1873. luaL_unref (L, LUA_REGISTRYINDEX, surbl_module_ctx->redirector_cbid);
  1874. }
  1875. lua_pushvalue (L, 2);
  1876. if (lua_type (L, -1) == LUA_TFUNCTION) {
  1877. surbl_module_ctx->redirector_cbid = luaL_ref (L, LUA_REGISTRYINDEX);
  1878. surbl_module_ctx->use_redirector = TRUE;
  1879. }
  1880. else {
  1881. lua_pop (L, 1);
  1882. return luaL_error (L, "argument must be a function");
  1883. }
  1884. return 0;
  1885. }
  1886. static gint
  1887. surbl_is_redirector_handler (lua_State *L)
  1888. {
  1889. const gchar *url;
  1890. struct rspamd_task *task;
  1891. struct rspamd_url uri;
  1892. gsize len;
  1893. rspamd_regexp_t *re;
  1894. rspamd_ftok_t srch;
  1895. gboolean found = FALSE;
  1896. gchar *found_tld, *url_cpy;
  1897. struct surbl_ctx *surbl_module_ctx;
  1898. task = lua_check_task (L, 1);
  1899. url = luaL_checklstring (L, 2, &len);
  1900. if (task && url) {
  1901. surbl_module_ctx = surbl_get_context (task->cfg);
  1902. url_cpy = rspamd_mempool_alloc (task->task_pool, len);
  1903. memcpy (url_cpy, url, len);
  1904. if (rspamd_url_parse (&uri, url_cpy, len, task->task_pool, RSPAMD_URL_PARSE_TEXT)) {
  1905. msg_debug_surbl ("check url redirection %*s", uri.urllen,
  1906. uri.string);
  1907. if (uri.hostlen <= 0) {
  1908. lua_pushboolean (L, false);
  1909. return 1;
  1910. }
  1911. /* Search in trie */
  1912. srch.begin = uri.tld;
  1913. srch.len = uri.tldlen;
  1914. re = g_hash_table_lookup (surbl_module_ctx->redirector_tlds, &srch);
  1915. if (re) {
  1916. if (re == NO_REGEXP) {
  1917. found = TRUE;
  1918. }
  1919. else if (rspamd_regexp_search (re, uri.string, 0,
  1920. NULL, NULL, TRUE, NULL)) {
  1921. found = TRUE;
  1922. }
  1923. if (found) {
  1924. found_tld = rspamd_mempool_ftokdup (task->task_pool, &srch);
  1925. lua_pushboolean (L, true);
  1926. lua_pushstring (L, found_tld);
  1927. return 2;
  1928. }
  1929. }
  1930. }
  1931. }
  1932. else {
  1933. return luaL_error (L, "arguments must be: task, url");
  1934. }
  1935. lua_pushboolean (L, false);
  1936. return 1;
  1937. }
  1938. /*
  1939. * Accepts two arguments:
  1940. * url: string with a redirected URL, if url is nil, then it couldn't be resolved
  1941. * userdata: opaque pointer of `struct redirector_param *`
  1942. */
  1943. static gint
  1944. surbl_continue_process_handler (lua_State *L)
  1945. {
  1946. struct redirector_param *param;
  1947. struct rspamd_task *task;
  1948. const gchar *nurl;
  1949. gint r;
  1950. gsize urllen;
  1951. struct rspamd_url *redirected_url;
  1952. gchar *urlstr;
  1953. struct surbl_ctx *surbl_module_ctx;
  1954. nurl = lua_tolstring (L, 1, &urllen);
  1955. param = (struct redirector_param *)lua_topointer (L, 2);
  1956. if (param != NULL) {
  1957. task = param->task;
  1958. surbl_module_ctx = surbl_get_context (task->cfg);
  1959. if (nurl != NULL) {
  1960. msg_info_surbl ("got reply from redirector: '%*s' -> '%*s'",
  1961. param->url->urllen, param->url->string,
  1962. (gint)urllen, nurl);
  1963. urlstr = rspamd_mempool_alloc (task->task_pool,
  1964. urllen + 1);
  1965. redirected_url = rspamd_mempool_alloc0 (task->task_pool,
  1966. sizeof (*redirected_url));
  1967. rspamd_strlcpy (urlstr, nurl, urllen + 1);
  1968. r = rspamd_url_parse (redirected_url, urlstr, urllen,
  1969. task->task_pool, RSPAMD_URL_PARSE_TEXT);
  1970. if (r == URI_ERRNO_OK) {
  1971. if (!g_hash_table_lookup (MESSAGE_FIELD (task, urls),
  1972. redirected_url)) {
  1973. g_hash_table_insert (MESSAGE_FIELD (task, urls),
  1974. redirected_url,
  1975. redirected_url);
  1976. redirected_url->phished_url = param->url;
  1977. redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  1978. }
  1979. }
  1980. else {
  1981. msg_info_surbl ("could not resolve '%*s' on redirector",
  1982. param->url->urllen, param->url->string);
  1983. }
  1984. }
  1985. else {
  1986. msg_info_surbl ("could not resolve '%*s' on redirector",
  1987. param->url->urllen, param->url->string);
  1988. }
  1989. }
  1990. else {
  1991. return luaL_error (L, "invalid arguments");
  1992. }
  1993. return 0;
  1994. }