You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

surbl.c 57KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:surbl
  17. * rspamd module that implements SURBL url checking
  18. *
  19. * Allowed options:
  20. * - weight (integer): weight of symbol
  21. * Redirecotor options:
  22. * - redirector (string): address of http redirector utility in format "host:port"
  23. * - redirector_connect_timeout (seconds): redirector connect timeout (default: 1s)
  24. * - redirector_read_timeout (seconds): timeout for reading data (default: 5s)
  25. * - redirector_hosts_map (map string): map that contains domains to check with redirector
  26. * Surbl options:
  27. * - exceptions (map string): map of domains that should be checked via surbl using 3 (e.g. somehost.domain.com)
  28. * components of domain name instead of normal 2 (e.g. domain.com)
  29. * - whitelist (map string): map of domains that should be whitelisted for surbl checks
  30. * - max_urls (integer): maximum allowed number of urls in message to be checked
  31. * - suffix (string): surbl address (for example insecure-bl.rambler.ru), may contain %b if bits are used (read documentation about it)
  32. * - bit (string): describes a prefix for a single bit
  33. */
  34. #include "config.h"
  35. #include "libmime/message.h"
  36. #include "libutil/map.h"
  37. #include "libutil/map_helpers.h"
  38. #include "rspamd.h"
  39. #include "utlist.h"
  40. #include "multipattern.h"
  41. #include "monitored.h"
  42. #include "libserver/html.h"
  43. #include "libutil/http_private.h"
  44. #include "unix-std.h"
  45. #include "lua/lua_common.h"
  46. #define msg_err_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  47. "surbl", task->task_pool->tag.uid, \
  48. G_STRFUNC, \
  49. __VA_ARGS__)
  50. #define msg_warn_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  51. "surbl", task->task_pool->tag.uid, \
  52. G_STRFUNC, \
  53. __VA_ARGS__)
  54. #define msg_info_surbl(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  55. "surbl", task->task_pool->tag.uid, \
  56. G_STRFUNC, \
  57. __VA_ARGS__)
  58. #define msg_debug_surbl(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
  59. rspamd_surbl_log_id, "surbl", task->task_pool->tag.uid, \
  60. G_STRFUNC, \
  61. __VA_ARGS__)
  62. INIT_LOG_MODULE(surbl)
  63. static const gchar *M = "surbl";
  64. #define DEFAULT_SURBL_WEIGHT 10
  65. #define DEFAULT_REDIRECTOR_READ_TIMEOUT 5.0
  66. #define DEFAULT_SURBL_SYMBOL "SURBL_DNS"
  67. #define SURBL_OPTION_NOIP (1 << 0)
  68. #define SURBL_OPTION_RESOLVEIP (1 << 1)
  69. #define SURBL_OPTION_CHECKIMAGES (1 << 2)
  70. #define SURBL_OPTION_CHECKDKIM (1 << 3)
  71. #define SURBL_OPTION_FULLDOMAIN (1 << 4)
  72. #define MAX_LEVELS 10
  73. struct surbl_ctx {
  74. struct module_ctx ctx;
  75. guint16 weight;
  76. gdouble read_timeout;
  77. gboolean use_tags;
  78. GList *suffixes;
  79. gchar *metric;
  80. const gchar *redirector_symbol;
  81. GHashTable **exceptions;
  82. struct rspamd_hash_map_helper *whitelist;
  83. GHashTable *redirector_tlds;
  84. guint use_redirector;
  85. guint max_redirected_urls;
  86. gint redirector_cbid;
  87. struct upstream_list *redirectors;
  88. };
  89. struct suffix_item {
  90. guint64 magic;
  91. const gchar *monitored_domain;
  92. const gchar *suffix;
  93. const gchar *symbol;
  94. GArray *bits;
  95. GHashTable *ips;
  96. struct rspamd_monitored *m;
  97. guint32 options;
  98. gboolean reported_offline;
  99. gint callback_id;
  100. gint url_process_cbref;
  101. };
  102. struct dns_param {
  103. struct rspamd_url *url;
  104. struct rspamd_task *task;
  105. gchar *host_resolve;
  106. struct suffix_item *suffix;
  107. struct rspamd_symcache_item *item;
  108. struct surbl_module_ctx *ctx;
  109. };
  110. struct redirector_param {
  111. struct rspamd_url *url;
  112. struct rspamd_task *task;
  113. struct upstream *redirector;
  114. struct surbl_ctx *ctx;
  115. struct rspamd_http_connection *conn;
  116. GHashTable *tree;
  117. struct suffix_item *suffix;
  118. struct rspamd_symcache_item *item;
  119. gint sock;
  120. guint redirector_requests;
  121. };
  122. struct surbl_bit_item {
  123. guint32 bit;
  124. gchar *symbol;
  125. };
  126. #define SURBL_REDIRECTOR_CALLBACK "SURBL_REDIRECTOR_CALLBACK"
  127. static const guint64 rspamd_surbl_cb_magic = 0xe09b8536f80de0d1ULL;
  128. static const gchar *rspamd_surbl_default_monitored = "facebook.com";
  129. static const guint default_max_redirected_urls = 10;
  130. static void surbl_test_url (struct rspamd_task *task,
  131. struct rspamd_symcache_item *item,
  132. void *user_data);
  133. static void surbl_test_redirector (struct rspamd_task *task,
  134. struct rspamd_symcache_item *item,
  135. void *user_data);
  136. static void surbl_dns_callback (struct rdns_reply *reply, gpointer arg);
  137. static void surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg);
  138. static void process_dns_results (struct rspamd_task *task,
  139. struct suffix_item *suffix, gchar *resolved_name,
  140. guint32 addr, struct rspamd_url *url);
  141. static gint surbl_register_redirect_handler (lua_State *L);
  142. static gint surbl_continue_process_handler (lua_State *L);
  143. static gint surbl_is_redirector_handler (lua_State *L);
  144. #define NO_REGEXP (gpointer) - 1
  145. #define SURBL_ERROR surbl_error_quark ()
  146. #define WHITELIST_ERROR 0
  147. #define CONVERSION_ERROR 1
  148. #define DUPLICATE_ERROR 1
  149. GQuark
  150. surbl_error_quark (void)
  151. {
  152. return g_quark_from_static_string ("surbl-error-quark");
  153. }
  154. /* Initialization */
  155. gint surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
  156. gint surbl_module_config (struct rspamd_config *cfg);
  157. gint surbl_module_reconfig (struct rspamd_config *cfg);
  158. module_t surbl_module = {
  159. "surbl",
  160. surbl_module_init,
  161. surbl_module_config,
  162. surbl_module_reconfig,
  163. NULL,
  164. RSPAMD_MODULE_VER,
  165. (guint)-1,
  166. };
  167. static inline struct surbl_ctx *
  168. surbl_get_context (struct rspamd_config *cfg)
  169. {
  170. return (struct surbl_ctx *)g_ptr_array_index (cfg->c_modules,
  171. surbl_module.ctx_offset);
  172. }
  173. static void
  174. exceptions_free_value (gpointer v)
  175. {
  176. rspamd_ftok_t *val = v;
  177. g_free ((gpointer)val->begin);
  178. g_free (val);
  179. }
  180. static void
  181. exception_insert (gpointer st, gconstpointer key, gconstpointer value)
  182. {
  183. GHashTable **t = st;
  184. gint level = 0;
  185. const gchar *p = key;
  186. rspamd_ftok_t *val;
  187. while (*p) {
  188. if (*p == '.') {
  189. level++;
  190. }
  191. p++;
  192. }
  193. if (level >= MAX_LEVELS) {
  194. msg_err ("invalid domain in exceptions list: %s, levels: %d",
  195. (gchar *)key,
  196. level);
  197. return;
  198. }
  199. val = g_malloc (sizeof (rspamd_ftok_t));
  200. val->begin = g_strdup (key);
  201. val->len = strlen (key);
  202. if (t[level] == NULL) {
  203. t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash,
  204. rspamd_ftok_icase_equal,
  205. exceptions_free_value,
  206. g_free);
  207. }
  208. g_hash_table_replace (t[level], val, g_strdup (value));
  209. }
  210. static gchar *
  211. read_exceptions_list (gchar * chunk,
  212. gint len,
  213. struct map_cb_data *data,
  214. gboolean final)
  215. {
  216. GHashTable **t;
  217. guint i;
  218. if (data->cur_data == NULL) {
  219. t = data->prev_data;
  220. if (t) {
  221. for (i = 0; i < MAX_LEVELS; i++) {
  222. if (t[i] != NULL) {
  223. g_hash_table_destroy (t[i]);
  224. }
  225. t[i] = NULL;
  226. }
  227. g_free (t);
  228. }
  229. data->prev_data = NULL;
  230. data->cur_data = g_malloc0 (MAX_LEVELS * sizeof (GHashTable *));
  231. }
  232. return rspamd_parse_kv_list (
  233. chunk,
  234. len,
  235. data,
  236. exception_insert,
  237. "",
  238. final);
  239. }
  240. static void
  241. fin_exceptions_list (struct map_cb_data *data, void **target)
  242. {
  243. GHashTable **t;
  244. gint i;
  245. if (target) {
  246. *target = data->cur_data;
  247. }
  248. if (data->prev_data) {
  249. t = data->prev_data;
  250. for (i = 0; i < MAX_LEVELS; i++) {
  251. if (t[i] != NULL) {
  252. rspamd_default_log_function (G_LOG_LEVEL_DEBUG,
  253. "surbl", "",
  254. G_STRFUNC,
  255. "exceptions level %d: %d elements",
  256. i, g_hash_table_size (t[i]));
  257. }
  258. }
  259. }
  260. }
  261. static void
  262. dtor_exceptions_list (struct map_cb_data *data)
  263. {
  264. GHashTable **t;
  265. gint i;
  266. if (data->cur_data) {
  267. t = data->cur_data;
  268. for (i = 0; i < MAX_LEVELS; i++) {
  269. if (t[i] != NULL) {
  270. g_hash_table_destroy (t[i]);
  271. }
  272. t[i] = NULL;
  273. }
  274. g_free (t);
  275. }
  276. }
  277. static void
  278. redirector_insert (gpointer st, gconstpointer key, gconstpointer value)
  279. {
  280. GHashTable *tld_hash = st;
  281. const gchar *p = key, *begin = key;
  282. rspamd_fstring_t *pat;
  283. rspamd_ftok_t *tok;
  284. rspamd_regexp_t *re = NO_REGEXP;
  285. GError *err = NULL;
  286. while (*p && !g_ascii_isspace (*p)) {
  287. p++;
  288. }
  289. pat = rspamd_fstring_new_init (begin, p - begin);
  290. tok = g_malloc0 (sizeof (*tok));
  291. tok->begin = pat->str;
  292. tok->len = pat->len;
  293. if (g_ascii_isspace (*p)) {
  294. while (g_ascii_isspace (*p) && *p) {
  295. p++;
  296. }
  297. if (*p) {
  298. re = rspamd_regexp_new (p,
  299. "ir",
  300. &err);
  301. if (re == NULL) {
  302. msg_warn ("could not read regexp: %e while reading regexp %s",
  303. err,
  304. p);
  305. g_error_free (err);
  306. re = NO_REGEXP;
  307. }
  308. }
  309. }
  310. g_hash_table_replace (tld_hash, tok, re);
  311. }
  312. static void
  313. redirector_item_free (gpointer p)
  314. {
  315. rspamd_regexp_t *re;
  316. if (p != NULL && p != NO_REGEXP) {
  317. re = (rspamd_regexp_t *)p;
  318. rspamd_regexp_unref (re);
  319. }
  320. }
  321. static gchar *
  322. read_redirectors_list (gchar * chunk,
  323. gint len,
  324. struct map_cb_data *data,
  325. gboolean final)
  326. {
  327. GHashTable *tld_hash;
  328. if (data->cur_data == NULL) {
  329. tld_hash = g_hash_table_new_full (rspamd_ftok_icase_hash,
  330. rspamd_ftok_icase_equal,
  331. rspamd_fstring_mapped_ftok_free,
  332. redirector_item_free);
  333. data->cur_data = tld_hash;
  334. }
  335. return rspamd_parse_kv_list (
  336. chunk,
  337. len,
  338. data,
  339. redirector_insert,
  340. "",
  341. final);
  342. }
  343. static void
  344. fin_redirectors_list (struct map_cb_data *data, void **target)
  345. {
  346. GHashTable *tld_hash;
  347. if (target) {
  348. *target = data->cur_data;
  349. }
  350. if (data->prev_data) {
  351. tld_hash = data->prev_data;
  352. g_hash_table_unref (tld_hash);
  353. }
  354. }
  355. static void
  356. dtor_redirectors_list (struct map_cb_data *data)
  357. {
  358. GHashTable *tld_hash;
  359. if (data->cur_data) {
  360. tld_hash = data->cur_data;
  361. g_hash_table_unref (tld_hash);
  362. }
  363. }
  364. gint
  365. surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  366. {
  367. struct surbl_ctx *surbl_module_ctx;
  368. surbl_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  369. sizeof (struct surbl_ctx));
  370. surbl_module_ctx->use_redirector = 0;
  371. surbl_module_ctx->suffixes = NULL;
  372. surbl_module_ctx->redirectors = NULL;
  373. surbl_module_ctx->whitelist = NULL;
  374. surbl_module_ctx->exceptions = NULL;
  375. surbl_module_ctx->redirector_cbid = -1;
  376. *ctx = (struct module_ctx *)surbl_module_ctx;
  377. rspamd_rcl_add_doc_by_path (cfg,
  378. NULL,
  379. "URL blacklist plugin",
  380. "surbl",
  381. UCL_OBJECT,
  382. NULL,
  383. 0,
  384. NULL,
  385. 0);
  386. rspamd_rcl_add_doc_by_path (cfg,
  387. "surbl",
  388. "List of redirector servers",
  389. "redirector",
  390. UCL_STRING,
  391. NULL,
  392. 0,
  393. NULL,
  394. 0);
  395. rspamd_rcl_add_doc_by_path (cfg,
  396. "surbl",
  397. "Map of domains that should be checked with redirector",
  398. "redirector_hosts_map",
  399. UCL_STRING,
  400. NULL,
  401. 0,
  402. NULL,
  403. 0);
  404. rspamd_rcl_add_doc_by_path (cfg,
  405. "surbl",
  406. "Connect timeout for redirector",
  407. "redirector_connect_timeout",
  408. UCL_TIME,
  409. NULL,
  410. 0,
  411. NULL,
  412. 0);
  413. rspamd_rcl_add_doc_by_path (cfg,
  414. "surbl",
  415. "Read timeout for redirector",
  416. "redirector_read_timeout",
  417. UCL_TIME,
  418. NULL,
  419. 0,
  420. NULL,
  421. 0);
  422. rspamd_rcl_add_doc_by_path (cfg,
  423. "surbl",
  424. "Maximum number of URLs to process per message",
  425. "max_urls",
  426. UCL_INT,
  427. NULL,
  428. 0,
  429. NULL,
  430. 0);
  431. rspamd_rcl_add_doc_by_path (cfg,
  432. "surbl",
  433. "Rules for TLD composition",
  434. "exceptions",
  435. UCL_STRING,
  436. NULL,
  437. 0,
  438. NULL,
  439. 0);
  440. rspamd_rcl_add_doc_by_path (cfg,
  441. "surbl",
  442. "Map of whitelisted domains",
  443. "whitelist",
  444. UCL_STRING,
  445. NULL,
  446. 0,
  447. NULL,
  448. 0);
  449. rspamd_rcl_add_doc_by_path (cfg,
  450. "surbl",
  451. "URL blacklist rule",
  452. "rule",
  453. UCL_OBJECT,
  454. NULL,
  455. 0,
  456. NULL,
  457. 0);
  458. /* Rules doc strings */
  459. rspamd_rcl_add_doc_by_path (cfg,
  460. "surbl.rule",
  461. "Name of DNS black list (e.g. `multi.surbl.com`)",
  462. "suffix",
  463. UCL_STRING,
  464. NULL,
  465. 0,
  466. NULL,
  467. 0);
  468. rspamd_rcl_add_doc_by_path (cfg,
  469. "surbl.rule",
  470. "Symbol to insert (if no bits or suffixes are defined)",
  471. "symbol",
  472. UCL_STRING,
  473. NULL,
  474. 0,
  475. NULL,
  476. 0);
  477. rspamd_rcl_add_doc_by_path (cfg,
  478. "surbl.rule",
  479. "Whether the defined rule should be used",
  480. "enabled",
  481. UCL_BOOLEAN,
  482. NULL,
  483. 0,
  484. NULL,
  485. 0);
  486. rspamd_rcl_add_doc_by_path (cfg,
  487. "surbl.rule",
  488. "Do not try to check URLs with IP address instead of hostname",
  489. "no_ip",
  490. UCL_BOOLEAN,
  491. NULL,
  492. 0,
  493. NULL,
  494. 0);
  495. rspamd_rcl_add_doc_by_path (cfg,
  496. "surbl.rule",
  497. "Resolve URL host and then check against the specified suffix with reversed IP octets",
  498. "resolve_ip",
  499. UCL_BOOLEAN,
  500. NULL,
  501. 0,
  502. NULL,
  503. 0);
  504. rspamd_rcl_add_doc_by_path (cfg,
  505. "surbl.rule",
  506. "Check images URLs with this URL list",
  507. "images",
  508. UCL_BOOLEAN,
  509. NULL,
  510. 0,
  511. NULL,
  512. 0);
  513. rspamd_rcl_add_doc_by_path (cfg,
  514. "surbl.rule",
  515. "Parse IP bits in DNS reply, the content is 'symbol = <bit>'",
  516. "bits",
  517. UCL_OBJECT,
  518. NULL,
  519. 0,
  520. NULL,
  521. 0);
  522. rspamd_rcl_add_doc_by_path (cfg,
  523. "surbl.rule",
  524. "Parse IP addresses in DNS reply, the content is 'symbol = address'",
  525. "ips",
  526. UCL_OBJECT,
  527. NULL,
  528. 0,
  529. NULL,
  530. 0);
  531. rspamd_rcl_add_doc_by_path (cfg,
  532. "surbl.rule",
  533. "Check domains in valid DKIM signatures",
  534. "check_dkim",
  535. UCL_BOOLEAN,
  536. NULL,
  537. 0,
  538. NULL,
  539. 0);
  540. rspamd_rcl_add_doc_by_path (cfg,
  541. "surbl.rule",
  542. "Check full domain name instead of eSLD",
  543. "full_domain",
  544. UCL_BOOLEAN,
  545. NULL,
  546. 0,
  547. NULL,
  548. 0);
  549. return 0;
  550. }
  551. /*
  552. * Register virtual symbols for suffixes with bit wildcard
  553. */
  554. static void
  555. register_bit_symbols (struct rspamd_config *cfg, struct suffix_item *suffix,
  556. gint parent_id)
  557. {
  558. guint i;
  559. GHashTableIter it;
  560. struct surbl_bit_item *bit;
  561. gpointer k, v;
  562. if (suffix->ips != NULL) {
  563. g_hash_table_iter_init (&it, suffix->ips);
  564. while (g_hash_table_iter_next (&it, &k, &v)) {
  565. bit = v;
  566. /*
  567. * We can have multiple IPs mapped to a single symbol,
  568. * so skip symbol's registration to avoid duplicates
  569. */
  570. if (rspamd_symcache_find_symbol (cfg->cache, bit->symbol) == -1) {
  571. rspamd_symcache_add_symbol (cfg->cache, bit->symbol,
  572. 0, NULL, NULL,
  573. SYMBOL_TYPE_VIRTUAL, parent_id);
  574. }
  575. msg_debug_config ("bit: %d", bit->bit);
  576. }
  577. }
  578. else if (suffix->bits != NULL) {
  579. for (i = 0; i < suffix->bits->len; i++) {
  580. bit = &g_array_index (suffix->bits, struct surbl_bit_item, i);
  581. rspamd_symcache_add_symbol (cfg->cache, bit->symbol,
  582. 0, NULL, NULL,
  583. SYMBOL_TYPE_VIRTUAL, parent_id);
  584. }
  585. }
  586. else {
  587. rspamd_symcache_add_symbol (cfg->cache, suffix->symbol,
  588. 0, NULL, NULL,
  589. SYMBOL_TYPE_VIRTUAL, parent_id);
  590. }
  591. }
  592. static void
  593. surbl_module_add_ip (const ucl_object_t *ip, const gchar *symbol,
  594. struct suffix_item* suffix,
  595. struct rspamd_config* cfg)
  596. {
  597. gchar* p;
  598. guint32 bit;
  599. const gchar* ip_val;
  600. struct surbl_bit_item* new_bit;
  601. ip_val = ucl_obj_tostring (ip);
  602. new_bit = rspamd_mempool_alloc (
  603. cfg->cfg_pool,
  604. sizeof(struct surbl_bit_item));
  605. if (inet_pton (AF_INET, ip_val, &bit) != 1) {
  606. msg_err_config ("cannot parse ip %s: %s", ip_val,
  607. strerror (errno));
  608. return;
  609. }
  610. new_bit->bit = bit;
  611. new_bit->symbol = rspamd_mempool_strdup (
  612. cfg->cfg_pool,
  613. symbol);
  614. /* Convert to uppercase */
  615. p = new_bit->symbol;
  616. while (*p) {
  617. *p = g_ascii_toupper (*p);
  618. p++;
  619. }
  620. msg_debug_config ("add new IP suffix: %d with symbol: %s",
  621. (gint)new_bit->bit, new_bit->symbol);
  622. g_hash_table_insert (suffix->ips, &new_bit->bit,
  623. new_bit);
  624. }
  625. static gint
  626. surbl_module_parse_rule (const ucl_object_t* value, struct rspamd_config* cfg)
  627. {
  628. const ucl_object_t* cur_rule;
  629. const ucl_object_t* cur;
  630. gint cb_id;
  631. gint nrules = 0;
  632. struct suffix_item* new_suffix;
  633. const gchar *monitored_domain = NULL;
  634. struct surbl_bit_item* new_bit;
  635. ucl_object_t *ropts;
  636. struct surbl_ctx *surbl_module_ctx = surbl_get_context (cfg);
  637. LL_FOREACH(value, cur_rule) {
  638. monitored_domain = NULL;
  639. cur = ucl_object_lookup (cur_rule, "enabled");
  640. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  641. if (!ucl_object_toboolean (cur)) {
  642. continue;
  643. }
  644. }
  645. cur = ucl_object_lookup (cur_rule, "suffix");
  646. if (cur == NULL) {
  647. msg_err_config("surbl rule must have explicit symbol "
  648. "definition");
  649. continue;
  650. }
  651. new_suffix = rspamd_mempool_alloc0 (cfg->cfg_pool,
  652. sizeof (struct suffix_item));
  653. new_suffix->magic = rspamd_surbl_cb_magic;
  654. new_suffix->suffix = rspamd_mempool_strdup (
  655. cfg->cfg_pool, ucl_obj_tostring (cur));
  656. new_suffix->options = 0;
  657. new_suffix->bits = g_array_new (FALSE, FALSE,
  658. sizeof (struct surbl_bit_item));
  659. rspamd_mempool_add_destructor (cfg->cfg_pool,
  660. (rspamd_mempool_destruct_t )rspamd_array_free_hard,
  661. new_suffix->bits);
  662. cur = ucl_object_lookup (cur_rule, "symbol");
  663. if (cur == NULL) {
  664. if (ucl_object_key (value)) {
  665. new_suffix->symbol = rspamd_mempool_strdup (
  666. cfg->cfg_pool,
  667. ucl_object_key (value));
  668. }
  669. else {
  670. msg_warn_config(
  671. "surbl rule for suffix %s lacks symbol, using %s as symbol",
  672. new_suffix->suffix, DEFAULT_SURBL_SYMBOL);
  673. new_suffix->symbol = rspamd_mempool_strdup (
  674. cfg->cfg_pool, DEFAULT_SURBL_SYMBOL);
  675. }
  676. }
  677. else {
  678. new_suffix->symbol = rspamd_mempool_strdup (
  679. cfg->cfg_pool, ucl_obj_tostring (cur));
  680. }
  681. cur = ucl_object_lookup (cur_rule, "options");
  682. if (cur != NULL && cur->type == UCL_STRING) {
  683. if (strstr(ucl_obj_tostring (cur), "noip") != NULL) {
  684. new_suffix->options |= SURBL_OPTION_NOIP;
  685. }
  686. }
  687. cur = ucl_object_lookup (cur_rule, "no_ip");
  688. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  689. if (ucl_object_toboolean (cur)) {
  690. new_suffix->options |= SURBL_OPTION_NOIP;
  691. }
  692. }
  693. cur = ucl_object_lookup (cur_rule, "monitored_domain");
  694. if (cur != NULL && cur->type == UCL_STRING) {
  695. monitored_domain = ucl_object_tostring (cur);
  696. }
  697. cur = ucl_object_lookup (cur_rule, "resolve_ip");
  698. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  699. if (ucl_object_toboolean (cur)) {
  700. new_suffix->options |= SURBL_OPTION_RESOLVEIP;
  701. if (!monitored_domain) {
  702. monitored_domain = "1.0.0.127";
  703. }
  704. }
  705. }
  706. if (!monitored_domain) {
  707. monitored_domain = rspamd_surbl_default_monitored;
  708. }
  709. ropts = ucl_object_typed_new (UCL_OBJECT);
  710. ucl_object_insert_key (ropts,
  711. ucl_object_fromstring (monitored_domain),
  712. "prefix", 0, false);
  713. ucl_object_insert_key (ropts,
  714. ucl_object_fromstring ("nxdomain"),
  715. "rcode", 0, false);
  716. rspamd_mempool_add_destructor (cfg->cfg_pool,
  717. (rspamd_mempool_destruct_t )ucl_object_unref,
  718. ropts);
  719. cur = ucl_object_lookup (cur_rule, "images");
  720. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  721. if (ucl_object_toboolean (cur)) {
  722. new_suffix->options |= SURBL_OPTION_CHECKIMAGES;
  723. }
  724. }
  725. cur = ucl_object_lookup (cur_rule, "check_dkim");
  726. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  727. if (ucl_object_toboolean (cur)) {
  728. new_suffix->options |= SURBL_OPTION_CHECKDKIM;
  729. }
  730. }
  731. cur = ucl_object_lookup (cur_rule, "full_domain");
  732. if (cur != NULL && cur->type == UCL_BOOLEAN) {
  733. if (ucl_object_toboolean (cur)) {
  734. new_suffix->options |= SURBL_OPTION_FULLDOMAIN;
  735. }
  736. }
  737. if ((new_suffix->options & (SURBL_OPTION_RESOLVEIP | SURBL_OPTION_NOIP))
  738. == (SURBL_OPTION_NOIP | SURBL_OPTION_RESOLVEIP)) {
  739. /* Mutually exclusive options */
  740. msg_err_config ("options noip and resolve_ip are "
  741. "mutually exclusive for suffix %s", new_suffix->suffix);
  742. continue;
  743. }
  744. GString *sym = g_string_sized_new (127);
  745. gchar *p;
  746. rspamd_printf_gstring (sym, "SURBL_%s",
  747. new_suffix->suffix);
  748. p = sym->str;
  749. while (*p) {
  750. if (*p == '.') {
  751. *p = '_';
  752. }
  753. else {
  754. *p = g_ascii_toupper (*p);
  755. }
  756. p ++;
  757. }
  758. cb_id = rspamd_symcache_add_symbol (cfg->cache, sym->str,
  759. 0, surbl_test_url, new_suffix, SYMBOL_TYPE_CALLBACK, -1);
  760. rspamd_symcache_add_dependency (cfg->cache, cb_id,
  761. SURBL_REDIRECTOR_CALLBACK);
  762. g_string_free (sym, TRUE);
  763. nrules++;
  764. new_suffix->callback_id = cb_id;
  765. cur = ucl_object_lookup (cur_rule, "bits");
  766. if (cur != NULL && cur->type == UCL_OBJECT) {
  767. ucl_object_iter_t it = NULL;
  768. const ucl_object_t* cur_bit;
  769. guint32 bit;
  770. while ((cur_bit = ucl_object_iterate (cur, &it, true)) != NULL) {
  771. if (ucl_object_key (cur_bit) != NULL
  772. && cur_bit->type == UCL_INT) {
  773. gchar* p;
  774. bit = ucl_obj_toint (cur_bit);
  775. new_bit = rspamd_mempool_alloc (
  776. cfg->cfg_pool,
  777. sizeof(struct surbl_bit_item));
  778. new_bit->bit = bit;
  779. new_bit->symbol = rspamd_mempool_strdup (
  780. cfg->cfg_pool,
  781. ucl_object_key (cur_bit));
  782. /* Convert to uppercase */
  783. p = new_bit->symbol;
  784. while (*p) {
  785. *p = g_ascii_toupper (*p);
  786. p++;
  787. }
  788. msg_debug_config("add new bit suffix: %d with symbol: %s",
  789. (gint)new_bit->bit, new_bit->symbol);
  790. g_array_append_val(new_suffix->bits, *new_bit);
  791. }
  792. }
  793. }
  794. cur = ucl_object_lookup(cur_rule, "ips");
  795. if (cur != NULL && cur->type == UCL_OBJECT) {
  796. ucl_object_iter_t it = NULL;
  797. const ucl_object_t* cur_bit;
  798. new_suffix->ips = g_hash_table_new (g_int_hash, g_int_equal);
  799. rspamd_mempool_add_destructor (cfg->cfg_pool,
  800. (rspamd_mempool_destruct_t )g_hash_table_unref,
  801. new_suffix->ips);
  802. while ((cur_bit = ucl_object_iterate (cur, &it, true)) != NULL) {
  803. if (ucl_object_key (cur_bit) != NULL) {
  804. if (ucl_object_type (cur_bit) == UCL_STRING) {
  805. /* Single IP */
  806. surbl_module_add_ip (cur_bit, ucl_object_key (cur_bit),
  807. new_suffix, cfg);
  808. }
  809. else if (ucl_object_type (cur_bit) == UCL_ARRAY) {
  810. ucl_object_iter_t ar_it = NULL;
  811. const ucl_object_t* cur_ar;
  812. /* Array of IPs */
  813. while ((cur_ar = ucl_object_iterate (cur_bit, &ar_it,
  814. true)) != NULL) {
  815. if (ucl_object_type (cur_ar) == UCL_STRING) {
  816. surbl_module_add_ip (cur_ar,
  817. ucl_object_key (cur_bit),
  818. new_suffix, cfg);
  819. }
  820. else {
  821. msg_err_config ("garbadge in ips element");
  822. }
  823. }
  824. }
  825. }
  826. }
  827. }
  828. cur = ucl_object_lookup (cur_rule, "process_script");
  829. if (cur != NULL && cur->type == UCL_STRING) {
  830. lua_State *L = cfg->lua_state;
  831. GString *tb;
  832. gint err_idx;
  833. const gchar *input = ucl_object_tostring (cur);
  834. gboolean loaded = FALSE;
  835. lua_pushcfunction (L, &rspamd_lua_traceback);
  836. err_idx = lua_gettop (L);
  837. /* First try return + input */
  838. tb = g_string_sized_new (strlen (input) + sizeof ("return "));
  839. rspamd_printf_gstring (tb, "return %s", input);
  840. if (luaL_loadstring (L, tb->str) != 0) {
  841. /* Reset stack */
  842. lua_settop (L, err_idx - 1);
  843. lua_pushcfunction (L, &rspamd_lua_traceback);
  844. err_idx = lua_gettop (L);
  845. /* Try with no return */
  846. if (luaL_loadstring (L, input) != 0) {
  847. msg_err_config ("cannot load string %s\n",
  848. input);
  849. }
  850. else {
  851. loaded = TRUE;
  852. }
  853. }
  854. else {
  855. loaded = TRUE;
  856. }
  857. g_string_free (tb, TRUE);
  858. if (loaded) {
  859. if (lua_pcall (L, 0, 1, err_idx) != 0) {
  860. tb = lua_touserdata (L, -1);
  861. msg_err_config ("call failed: %v\n", tb);
  862. g_string_free (tb, TRUE);
  863. }
  864. else if (lua_isfunction (L, -1)) {
  865. new_suffix->url_process_cbref = luaL_ref (L,
  866. LUA_REGISTRYINDEX);
  867. }
  868. }
  869. lua_settop (L, err_idx - 1);
  870. }
  871. if (new_suffix->symbol) {
  872. /* Register just a symbol itself */
  873. rspamd_symcache_add_symbol (cfg->cache,
  874. new_suffix->symbol, 0,
  875. NULL, NULL, SYMBOL_TYPE_VIRTUAL, cb_id);
  876. nrules++;
  877. }
  878. new_suffix->m = rspamd_monitored_create (cfg->monitored_ctx,
  879. new_suffix->suffix, RSPAMD_MONITORED_DNS,
  880. RSPAMD_MONITORED_DEFAULT, ropts);
  881. surbl_module_ctx->suffixes = g_list_prepend (surbl_module_ctx->suffixes,
  882. new_suffix);
  883. }
  884. return nrules;
  885. }
  886. gint
  887. surbl_module_config (struct rspamd_config *cfg)
  888. {
  889. GList *cur_opt;
  890. struct suffix_item *cur_suffix = NULL;
  891. const ucl_object_t *value, *cur;
  892. const gchar *redir_val;
  893. gint nrules = 0;
  894. lua_State *L;
  895. struct surbl_ctx *surbl_module_ctx = surbl_get_context (cfg);
  896. if (!rspamd_config_is_module_enabled (cfg, "surbl")) {
  897. return TRUE;
  898. }
  899. /* Register global methods */
  900. L = cfg->lua_state;
  901. lua_getglobal (L, "rspamd_plugins");
  902. if (lua_type (L, -1) == LUA_TTABLE) {
  903. lua_pushstring (L, "surbl");
  904. lua_createtable (L, 0, 3);
  905. /* Set methods */
  906. lua_pushstring (L, "register_redirect");
  907. lua_pushcfunction (L, surbl_register_redirect_handler);
  908. lua_settable (L, -3);
  909. lua_pushstring (L, "continue_process");
  910. lua_pushcfunction (L, surbl_continue_process_handler);
  911. lua_settable (L, -3);
  912. lua_pushstring (L, "is_redirector");
  913. lua_pushcfunction (L, surbl_is_redirector_handler);
  914. lua_settable (L, -3);
  915. /* Finish surbl key */
  916. lua_settable (L, -3);
  917. }
  918. lua_pop (L, 1); /* Remove global function */
  919. (void) rspamd_symcache_add_symbol (cfg->cache, SURBL_REDIRECTOR_CALLBACK,
  920. 0, surbl_test_redirector, NULL,
  921. SYMBOL_TYPE_CALLBACK, -1);
  922. if ((value =
  923. rspamd_config_get_module_opt (cfg, "surbl", "redirector")) != NULL) {
  924. surbl_module_ctx->redirectors = rspamd_upstreams_create (cfg->ups_ctx);
  925. rspamd_mempool_add_destructor (cfg->cfg_pool,
  926. (rspamd_mempool_destruct_t)rspamd_upstreams_destroy,
  927. surbl_module_ctx->redirectors);
  928. LL_FOREACH (value, cur)
  929. {
  930. redir_val = ucl_obj_tostring (cur);
  931. if (rspamd_upstreams_add_upstream (surbl_module_ctx->redirectors,
  932. redir_val, 80, RSPAMD_UPSTREAM_PARSE_DEFAULT,
  933. NULL)) {
  934. surbl_module_ctx->use_redirector = TRUE;
  935. }
  936. }
  937. }
  938. if ((value =
  939. rspamd_config_get_module_opt (cfg, "surbl",
  940. "redirector_symbol")) != NULL) {
  941. surbl_module_ctx->redirector_symbol = ucl_obj_tostring (value);
  942. rspamd_symcache_add_symbol (cfg->cache,
  943. surbl_module_ctx->redirector_symbol,
  944. 0, NULL, NULL, SYMBOL_TYPE_COMPOSITE, -1);
  945. }
  946. else {
  947. surbl_module_ctx->redirector_symbol = NULL;
  948. }
  949. if ((value =
  950. rspamd_config_get_module_opt (cfg, "surbl", "weight")) != NULL) {
  951. surbl_module_ctx->weight = ucl_obj_toint (value);
  952. }
  953. else {
  954. surbl_module_ctx->weight = DEFAULT_SURBL_WEIGHT;
  955. }
  956. if ((value =
  957. rspamd_config_get_module_opt (cfg, "surbl", "use_tags")) != NULL) {
  958. surbl_module_ctx->use_tags = ucl_obj_toboolean (value);
  959. }
  960. else {
  961. surbl_module_ctx->use_tags = FALSE;
  962. }
  963. if ((value =
  964. rspamd_config_get_module_opt (cfg, "surbl",
  965. "redirector_read_timeout")) != NULL) {
  966. surbl_module_ctx->read_timeout = ucl_obj_todouble (value);
  967. }
  968. else {
  969. surbl_module_ctx->read_timeout = DEFAULT_REDIRECTOR_READ_TIMEOUT;
  970. }
  971. if ((value =
  972. rspamd_config_get_module_opt (cfg, "surbl",
  973. "redirector_hosts_map")) != NULL) {
  974. if (!rspamd_map_add_from_ucl (cfg, value,
  975. "SURBL redirectors list",
  976. read_redirectors_list,
  977. fin_redirectors_list,
  978. dtor_redirectors_list,
  979. (void **)&surbl_module_ctx->redirector_tlds)) {
  980. msg_warn_config ("bad redirectors map definition: %s",
  981. ucl_obj_tostring (value));
  982. }
  983. }
  984. if ((value =
  985. rspamd_config_get_module_opt (cfg, "surbl", "exceptions")) != NULL) {
  986. rspamd_map_add_from_ucl (cfg, value,
  987. "SURBL exceptions list",
  988. read_exceptions_list,
  989. fin_exceptions_list,
  990. dtor_exceptions_list,
  991. (void **)&surbl_module_ctx->exceptions);
  992. }
  993. if ((value =
  994. rspamd_config_get_module_opt (cfg, "surbl", "whitelist")) != NULL) {
  995. rspamd_map_add_from_ucl (cfg, value,
  996. "SURBL whitelist",
  997. rspamd_kv_list_read,
  998. rspamd_kv_list_fin,
  999. rspamd_kv_list_dtor,
  1000. (void **)&surbl_module_ctx->whitelist);
  1001. }
  1002. value = rspamd_config_get_module_opt (cfg, "surbl", "rule");
  1003. if (value != NULL && value->type == UCL_OBJECT) {
  1004. ucl_object_iter_t it = NULL;
  1005. const ucl_object_t *cur_value;
  1006. if (ucl_object_lookup (value, "symbol") != NULL) {
  1007. /* Old style */
  1008. nrules += surbl_module_parse_rule (value, cfg);
  1009. }
  1010. else {
  1011. /* New style */
  1012. while ((cur_value = ucl_object_iterate (value, &it, true)) != NULL) {
  1013. nrules += surbl_module_parse_rule (cur_value, cfg);
  1014. }
  1015. }
  1016. }
  1017. value = rspamd_config_get_module_opt (cfg, "surbl", "rules");
  1018. if (value != NULL && value->type == UCL_OBJECT) {
  1019. ucl_object_iter_t it = NULL;
  1020. const ucl_object_t *cur_value;
  1021. /* New style only */
  1022. while ((cur_value = ucl_object_iterate (value, &it, true)) != NULL) {
  1023. nrules += surbl_module_parse_rule (cur_value, cfg);
  1024. }
  1025. }
  1026. /* Add default suffix */
  1027. if (surbl_module_ctx->suffixes == NULL) {
  1028. msg_err_config ("surbl module loaded but no suffixes defined, skip "
  1029. "checks");
  1030. return TRUE;
  1031. }
  1032. if (surbl_module_ctx->suffixes != NULL) {
  1033. rspamd_mempool_add_destructor (cfg->cfg_pool,
  1034. (rspamd_mempool_destruct_t) g_list_free,
  1035. surbl_module_ctx->suffixes);
  1036. }
  1037. cur_opt = surbl_module_ctx->suffixes;
  1038. while (cur_opt) {
  1039. cur_suffix = cur_opt->data;
  1040. if (cur_suffix->bits != NULL || cur_suffix->ips != NULL) {
  1041. register_bit_symbols (cfg, cur_suffix, cur_suffix->callback_id);
  1042. }
  1043. if (cur_suffix->options & SURBL_OPTION_CHECKDKIM) {
  1044. rspamd_symcache_add_dependency (cfg->cache,
  1045. cur_suffix->callback_id, "DKIM_TRACE");
  1046. }
  1047. cur_opt = g_list_next (cur_opt);
  1048. }
  1049. surbl_module_ctx->max_redirected_urls = default_max_redirected_urls;
  1050. if ((value =
  1051. rspamd_config_get_module_opt (cfg, "surbl", "max_redirected_urls")) != NULL) {
  1052. surbl_module_ctx->max_redirected_urls = ucl_obj_toint (value);
  1053. }
  1054. msg_info_config ("init internal surbls module, %d uribl rules loaded",
  1055. nrules);
  1056. return TRUE;
  1057. }
  1058. gint
  1059. surbl_module_reconfig (struct rspamd_config *cfg)
  1060. {
  1061. struct surbl_ctx *surbl_module_ctx = surbl_get_context (cfg);
  1062. /* Reinit module */
  1063. surbl_module_ctx->use_redirector = 0;
  1064. surbl_module_ctx->suffixes = NULL;
  1065. surbl_module_ctx->redirectors = NULL;
  1066. surbl_module_ctx->whitelist = NULL;
  1067. /* Zero exceptions hashes */
  1068. surbl_module_ctx->exceptions = NULL;
  1069. rspamd_mempool_add_destructor (cfg->cfg_pool,
  1070. (rspamd_mempool_destruct_t) g_list_free,
  1071. surbl_module_ctx->suffixes);
  1072. /* Perform configure */
  1073. return surbl_module_config (cfg);
  1074. }
  1075. static gchar *
  1076. format_surbl_request (rspamd_mempool_t * pool,
  1077. rspamd_ftok_t * hostname,
  1078. struct suffix_item *suffix,
  1079. gboolean append_suffix,
  1080. GError ** err,
  1081. gboolean forced,
  1082. GHashTable *tree,
  1083. struct rspamd_url *url,
  1084. lua_State *L,
  1085. struct surbl_ctx *surbl_module_ctx)
  1086. {
  1087. GHashTable *t;
  1088. gchar *result = NULL;
  1089. const gchar *p, *dots[MAX_LEVELS];
  1090. gint r, i, dots_num = 0, level = MAX_LEVELS;
  1091. gsize slen, len;
  1092. gboolean found_exception = FALSE;
  1093. rspamd_ftok_t f;
  1094. if (G_LIKELY (suffix != NULL)) {
  1095. slen = strlen (suffix->suffix);
  1096. }
  1097. else if (!append_suffix) {
  1098. slen = 0;
  1099. }
  1100. else {
  1101. g_assert_not_reached ();
  1102. }
  1103. len = hostname->len + slen + 2;
  1104. p = hostname->begin;
  1105. while (p - hostname->begin < (gint)hostname->len && dots_num < MAX_LEVELS) {
  1106. if (*p == '.') {
  1107. dots[dots_num] = p;
  1108. dots_num++;
  1109. }
  1110. p++;
  1111. }
  1112. /* Check for numeric expressions */
  1113. if (url->flags & RSPAMD_URL_FLAG_NUMERIC) {
  1114. /* This is ip address */
  1115. if (suffix != NULL && (suffix->options & SURBL_OPTION_NOIP) != 0) {
  1116. /* Ignore such requests */
  1117. msg_info_pool ("ignore request of ip url for list %s",
  1118. suffix->symbol);
  1119. return NULL;
  1120. }
  1121. if (dots_num == 3) {
  1122. /* IPv4 address */
  1123. result = rspamd_mempool_alloc (pool, len);
  1124. r = rspamd_snprintf (result, len, "%*s.%*s.%*s.%*s",
  1125. (gint) (hostname->len - (dots[2] - hostname->begin + 1)),
  1126. dots[2] + 1,
  1127. (gint) (dots[2] - dots[1] - 1),
  1128. dots[1] + 1,
  1129. (gint) (dots[1] - dots[0] - 1),
  1130. dots[0] + 1,
  1131. (gint) (dots[0] - hostname->begin),
  1132. hostname->begin);
  1133. }
  1134. else {
  1135. /* Just pring ip as is */
  1136. result = rspamd_mempool_alloc (pool, len);
  1137. r = rspamd_snprintf (result, len, "%*s",
  1138. (gint)hostname->len, hostname->begin);
  1139. }
  1140. }
  1141. else {
  1142. /* Not a numeric url */
  1143. result = rspamd_mempool_alloc (pool, len);
  1144. if (suffix->options & SURBL_OPTION_FULLDOMAIN) {
  1145. /* Full domain case */
  1146. r = rspamd_snprintf (result,
  1147. len,
  1148. "%*s",
  1149. url->hostlen,
  1150. url->host);
  1151. }
  1152. else {
  1153. /* Now we should try to check for exceptions */
  1154. if (!forced && surbl_module_ctx->exceptions) {
  1155. for (i = MAX_LEVELS - 1; i >= 0; i--) {
  1156. t = surbl_module_ctx->exceptions[i];
  1157. if (t != NULL && dots_num >= i + 1) {
  1158. f.begin = dots[dots_num - i - 1] + 1;
  1159. f.len = hostname->len -
  1160. (dots[dots_num - i - 1] - hostname->begin + 1);
  1161. if (g_hash_table_lookup (t, &f) != NULL) {
  1162. level = dots_num - i - 1;
  1163. found_exception = TRUE;
  1164. break;
  1165. }
  1166. }
  1167. }
  1168. }
  1169. if (found_exception || url->tldlen == 0) {
  1170. if (level != MAX_LEVELS) {
  1171. if (level == 0) {
  1172. r = rspamd_snprintf (result,
  1173. len,
  1174. "%T",
  1175. hostname);
  1176. }
  1177. else {
  1178. r = rspamd_snprintf (result, len, "%*s",
  1179. (gint) (hostname->len -
  1180. (dots[level - 1] - hostname->begin + 1)),
  1181. dots[level - 1] + 1);
  1182. }
  1183. }
  1184. else if (dots_num >= 2) {
  1185. r = rspamd_snprintf (result, len, "%*s",
  1186. (gint) (hostname->len -
  1187. (dots[dots_num - 2] - hostname->begin + 1)),
  1188. dots[dots_num - 2] + 1);
  1189. }
  1190. else {
  1191. r = rspamd_snprintf (result,
  1192. len,
  1193. "%T",
  1194. hostname);
  1195. }
  1196. }
  1197. else {
  1198. /* No exception */
  1199. r = rspamd_snprintf (result,
  1200. len,
  1201. "%*s",
  1202. url->tldlen,
  1203. url->tld);
  1204. }
  1205. }
  1206. }
  1207. url->surbl = result;
  1208. url->surbllen = r;
  1209. if (!forced &&
  1210. rspamd_match_hash_map (surbl_module_ctx->whitelist, result) != NULL) {
  1211. msg_debug_pool ("url %s is whitelisted", result);
  1212. g_set_error (err, SURBL_ERROR,
  1213. WHITELIST_ERROR,
  1214. "URL is whitelisted: %s",
  1215. result);
  1216. return NULL;
  1217. }
  1218. if (append_suffix) {
  1219. if (suffix->url_process_cbref > 0) {
  1220. lua_rawgeti (L, LUA_REGISTRYINDEX, suffix->url_process_cbref);
  1221. lua_pushstring (L, result);
  1222. lua_pushstring (L, suffix->suffix);
  1223. if (lua_pcall (L, 2, 1, 0) != 0) {
  1224. msg_err_pool ("cannot call url process script: %s",
  1225. lua_tostring (L, -1));
  1226. lua_pop (L, 1);
  1227. rspamd_snprintf (result + r, len - r, ".%s", suffix->suffix);
  1228. }
  1229. else {
  1230. result = rspamd_mempool_strdup (pool, lua_tostring (L, -1));
  1231. lua_pop (L, 1);
  1232. }
  1233. }
  1234. else {
  1235. rspamd_snprintf (result + r, len - r, ".%s", suffix->suffix);
  1236. }
  1237. }
  1238. if (tree != NULL) {
  1239. if (g_hash_table_lookup (tree, result) != NULL) {
  1240. msg_debug_pool ("url %s is already registered", result);
  1241. g_set_error (err, SURBL_ERROR,
  1242. DUPLICATE_ERROR,
  1243. "URL is duplicated: %s",
  1244. result);
  1245. return NULL;
  1246. }
  1247. else {
  1248. g_hash_table_insert (tree, result, url);
  1249. }
  1250. }
  1251. msg_debug_pool ("request: %s, dots: %d, level: %d, orig: %*s",
  1252. result,
  1253. dots_num,
  1254. level,
  1255. (gint)hostname->len,
  1256. hostname->begin);
  1257. return result;
  1258. }
  1259. static void
  1260. make_surbl_requests (struct rspamd_url *url, struct rspamd_task *task,
  1261. struct rspamd_symcache_item *item,
  1262. struct suffix_item *suffix,
  1263. gboolean forced, GHashTable *tree,
  1264. struct surbl_ctx *surbl_module_ctx)
  1265. {
  1266. gchar *surbl_req;
  1267. rspamd_ftok_t f;
  1268. GError *err = NULL;
  1269. struct dns_param *param;
  1270. f.begin = url->host;
  1271. f.len = url->hostlen;
  1272. if (suffix->options & SURBL_OPTION_RESOLVEIP) {
  1273. /*
  1274. * We need to get url real TLD, resolve it with no suffix and then
  1275. * check against surbl using reverse octets printing
  1276. */
  1277. surbl_req = format_surbl_request (task->task_pool,
  1278. &f,
  1279. suffix,
  1280. FALSE,
  1281. &err,
  1282. forced,
  1283. tree,
  1284. url,
  1285. task->cfg->lua_state,
  1286. surbl_module_ctx);
  1287. if (surbl_req == NULL) {
  1288. if (err != NULL) {
  1289. if (err->code != WHITELIST_ERROR && err->code != DUPLICATE_ERROR) {
  1290. msg_info_surbl ("cannot format url string for surbl %*s, %e",
  1291. url->urllen, url->string,
  1292. err);
  1293. }
  1294. g_error_free (err);
  1295. return;
  1296. }
  1297. }
  1298. else {
  1299. /* XXX: We make merely A request here */
  1300. param =
  1301. rspamd_mempool_alloc (task->task_pool,
  1302. sizeof (struct dns_param));
  1303. param->url = url;
  1304. param->task = task;
  1305. param->suffix = suffix;
  1306. param->host_resolve =
  1307. rspamd_mempool_strdup (task->task_pool, surbl_req);
  1308. msg_debug_surbl ("send surbl dns ip request %s to %s", surbl_req,
  1309. suffix->suffix);
  1310. if (make_dns_request_task (task,
  1311. surbl_dns_ip_callback,
  1312. (void *) param, RDNS_REQUEST_A, surbl_req)) {
  1313. param->item = item;
  1314. rspamd_symcache_item_async_inc (task, item, M);
  1315. }
  1316. }
  1317. }
  1318. else if ((surbl_req = format_surbl_request (task->task_pool,
  1319. &f,
  1320. suffix,
  1321. TRUE,
  1322. &err,
  1323. forced,
  1324. tree,
  1325. url,
  1326. task->cfg->lua_state,
  1327. surbl_module_ctx)) != NULL) {
  1328. param =
  1329. rspamd_mempool_alloc (task->task_pool, sizeof (struct dns_param));
  1330. param->url = url;
  1331. param->task = task;
  1332. param->suffix = suffix;
  1333. param->host_resolve =
  1334. rspamd_mempool_strdup (task->task_pool, url->surbl);
  1335. msg_debug_surbl ("send surbl dns request %s", surbl_req);
  1336. if (make_dns_request_task (task,
  1337. surbl_dns_callback,
  1338. (void *) param, RDNS_REQUEST_A, surbl_req)) {
  1339. param->item = item;
  1340. rspamd_symcache_item_async_inc (task, item, M);
  1341. }
  1342. }
  1343. else if (err != NULL) {
  1344. if (err->code != WHITELIST_ERROR && err->code != DUPLICATE_ERROR) {
  1345. msg_info_surbl ("cannot format url string for surbl %*s, %e",
  1346. url->urllen,
  1347. url->string, err);
  1348. }
  1349. g_error_free (err);
  1350. return;
  1351. }
  1352. }
  1353. static void
  1354. process_dns_results (struct rspamd_task *task,
  1355. struct suffix_item *suffix,
  1356. gchar *resolved_name,
  1357. guint32 addr,
  1358. struct rspamd_url *uri)
  1359. {
  1360. guint i;
  1361. gboolean got_result = FALSE;
  1362. struct surbl_bit_item *bit;
  1363. struct in_addr ina;
  1364. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1365. if (suffix->ips && g_hash_table_size (suffix->ips) > 0) {
  1366. bit = g_hash_table_lookup (suffix->ips, &addr);
  1367. if (bit != NULL) {
  1368. msg_info_surbl ("<%s> domain [%s] is in surbl %s(%xd)",
  1369. task->message_id,
  1370. resolved_name, suffix->suffix,
  1371. bit->bit);
  1372. rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);
  1373. if (surbl_module_ctx->use_tags) {
  1374. rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool);
  1375. }
  1376. got_result = TRUE;
  1377. }
  1378. }
  1379. else if (suffix->bits != NULL && suffix->bits->len > 0) {
  1380. for (i = 0; i < suffix->bits->len; i ++) {
  1381. bit = &g_array_index (suffix->bits, struct surbl_bit_item, i);
  1382. msg_debug_surbl ("got result(%d) AND bit(%d): %d",
  1383. (gint)addr,
  1384. (gint)ntohl (bit->bit),
  1385. (gint)bit->bit & (gint)ntohl (addr));
  1386. if (((gint)bit->bit & (gint)ntohl (addr)) != 0) {
  1387. got_result = TRUE;
  1388. msg_info_surbl ("<%s> domain [%s] is in surbl %s(%xd)",
  1389. task->message_id,
  1390. resolved_name, suffix->suffix,
  1391. bit->bit);
  1392. rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);
  1393. if (surbl_module_ctx->use_tags) {
  1394. rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool);
  1395. }
  1396. }
  1397. }
  1398. }
  1399. if (!got_result) {
  1400. if ((suffix->bits == NULL || suffix->bits->len == 0) &&
  1401. suffix->ips == NULL) {
  1402. msg_info_surbl ("<%s> domain [%s] is in surbl %s",
  1403. task->message_id,
  1404. resolved_name, suffix->suffix);
  1405. rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name);
  1406. if (surbl_module_ctx->use_tags) {
  1407. rspamd_url_add_tag (uri, "surbl", suffix->symbol, task->task_pool);
  1408. }
  1409. }
  1410. else {
  1411. ina.s_addr = addr;
  1412. msg_info_surbl ("<%s> domain [%s] is in surbl %s but at unknown result: %s",
  1413. task->message_id,
  1414. resolved_name, suffix->suffix,
  1415. inet_ntoa (ina));
  1416. }
  1417. }
  1418. }
  1419. static void
  1420. surbl_dns_callback (struct rdns_reply *reply, gpointer arg)
  1421. {
  1422. struct dns_param *param = (struct dns_param *)arg;
  1423. struct rspamd_task *task;
  1424. struct rdns_reply_entry *elt;
  1425. task = param->task;
  1426. if (reply->code == RDNS_RC_NOERROR && reply->entries) {
  1427. msg_debug_surbl ("<%s> domain [%s] is in surbl %s",
  1428. param->task->message_id,
  1429. param->host_resolve, param->suffix->suffix);
  1430. DL_FOREACH (reply->entries, elt) {
  1431. if (elt->type == RDNS_REQUEST_A) {
  1432. process_dns_results (param->task, param->suffix,
  1433. param->host_resolve, (guint32) elt->content.a.addr.s_addr,
  1434. param->url);
  1435. }
  1436. }
  1437. }
  1438. else {
  1439. msg_debug_surbl ("<%s> domain [%s] is not in surbl %s",
  1440. param->task->message_id, param->host_resolve,
  1441. param->suffix->suffix);
  1442. }
  1443. rspamd_symcache_item_async_dec_check (param->task, param->item, M);
  1444. }
  1445. static void
  1446. surbl_dns_ip_callback (struct rdns_reply *reply, gpointer arg)
  1447. {
  1448. struct dns_param *param = (struct dns_param *) arg;
  1449. struct rspamd_task *task;
  1450. struct rdns_reply_entry *elt;
  1451. GString *to_resolve;
  1452. guint32 ip_addr;
  1453. task = param->task;
  1454. /* If we have result from DNS server, this url exists in SURBL, so increase score */
  1455. if (reply->code == RDNS_RC_NOERROR && reply->entries) {
  1456. LL_FOREACH (reply->entries, elt) {
  1457. if (elt->type == RDNS_REQUEST_A) {
  1458. to_resolve = g_string_sized_new (
  1459. strlen (param->suffix->suffix) +
  1460. sizeof ("255.255.255.255."));
  1461. ip_addr = elt->content.a.addr.s_addr;
  1462. /* Big endian <4>.<3>.<2>.<1> */
  1463. rspamd_printf_gstring (to_resolve, "%d.%d.%d.%d.%s",
  1464. ip_addr >> 24 & 0xff,
  1465. ip_addr >> 16 & 0xff,
  1466. ip_addr >> 8 & 0xff,
  1467. ip_addr & 0xff, param->suffix->suffix);
  1468. msg_debug_surbl (
  1469. "<%s> domain [%s] send %v request to surbl",
  1470. param->task->message_id,
  1471. param->host_resolve,
  1472. to_resolve);
  1473. if (make_dns_request_task (task,
  1474. surbl_dns_callback,
  1475. param, RDNS_REQUEST_A, to_resolve->str)) {
  1476. rspamd_symcache_item_async_inc (param->task, param->item, M);
  1477. }
  1478. g_string_free (to_resolve, TRUE);
  1479. }
  1480. }
  1481. }
  1482. else {
  1483. msg_debug_surbl ("<%s> domain [%s] cannot be resolved for SURBL check %s",
  1484. param->task->message_id, param->host_resolve,
  1485. param->suffix->suffix);
  1486. }
  1487. rspamd_symcache_item_async_dec_check (param->task, param->item, M);
  1488. }
  1489. static void
  1490. free_redirector_session (void *ud)
  1491. {
  1492. struct redirector_param *param = (struct redirector_param *)ud;
  1493. if (param->item) {
  1494. rspamd_symcache_item_async_dec_check (param->task, param->item, M);
  1495. }
  1496. rspamd_http_connection_unref (param->conn);
  1497. close (param->sock);
  1498. }
  1499. static void
  1500. surbl_redirector_error (struct rspamd_http_connection *conn,
  1501. GError *err)
  1502. {
  1503. struct redirector_param *param = (struct redirector_param *)conn->ud;
  1504. struct rspamd_task *task;
  1505. task = param->task;
  1506. msg_err_surbl ("connection with http server %s terminated incorrectly: %e",
  1507. rspamd_inet_address_to_string (
  1508. rspamd_upstream_addr_cur (param->redirector)),
  1509. err);
  1510. rspamd_upstream_fail (param->redirector, FALSE);
  1511. rspamd_session_remove_event (param->task->s, free_redirector_session,
  1512. param);
  1513. }
  1514. static int
  1515. surbl_redirector_finish (struct rspamd_http_connection *conn,
  1516. struct rspamd_http_message *msg)
  1517. {
  1518. struct redirector_param *param = (struct redirector_param *)conn->ud;
  1519. struct rspamd_task *task;
  1520. struct surbl_ctx *surbl_module_ctx;
  1521. gint r, urllen;
  1522. struct rspamd_url *redirected_url, *existing;
  1523. const rspamd_ftok_t *hdr;
  1524. gchar *urlstr;
  1525. task = param->task;
  1526. surbl_module_ctx = surbl_get_context (task->cfg);
  1527. if (msg->code == 200) {
  1528. hdr = rspamd_http_message_find_header (msg, "Uri");
  1529. if (hdr != NULL) {
  1530. msg_info_surbl ("<%s> got reply from redirector: '%*s' -> '%T'",
  1531. param->task->message_id,
  1532. param->url->urllen, param->url->string,
  1533. hdr);
  1534. urllen = hdr->len;
  1535. urlstr = rspamd_mempool_alloc (task->task_pool,
  1536. urllen + 1);
  1537. redirected_url = rspamd_mempool_alloc0 (task->task_pool,
  1538. sizeof (*redirected_url));
  1539. rspamd_strlcpy (urlstr, hdr->begin, urllen + 1);
  1540. r = rspamd_url_parse (redirected_url, urlstr, urllen,
  1541. task->task_pool, RSPAMD_URL_PARSE_TEXT);
  1542. if (r == URI_ERRNO_OK) {
  1543. if ((existing = g_hash_table_lookup (task->urls, redirected_url)) == NULL) {
  1544. g_hash_table_insert (task->urls, redirected_url,
  1545. redirected_url);
  1546. redirected_url->phished_url = param->url;
  1547. redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  1548. }
  1549. else {
  1550. existing->count ++;
  1551. }
  1552. if (surbl_module_ctx->use_tags) {
  1553. rspamd_url_add_tag (param->url, "redirector", urlstr,
  1554. task->task_pool);
  1555. }
  1556. }
  1557. else {
  1558. msg_info_surbl ("cannot parse redirector reply: %s", urlstr);
  1559. }
  1560. }
  1561. }
  1562. else {
  1563. msg_info_surbl ("<%s> could not resolve '%*s' on redirector",
  1564. param->task->message_id,
  1565. param->url->urllen, param->url->string);
  1566. }
  1567. rspamd_upstream_ok (param->redirector);
  1568. rspamd_session_remove_event (param->task->s, free_redirector_session,
  1569. param);
  1570. return 0;
  1571. }
  1572. static void
  1573. register_redirector_call (struct rspamd_url *url, struct rspamd_task *task,
  1574. const gchar *rule)
  1575. {
  1576. gint s = -1;
  1577. struct redirector_param *param;
  1578. struct timeval *timeout;
  1579. struct upstream *selected;
  1580. struct rspamd_http_message *msg;
  1581. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1582. if (!rspamd_session_blocked (task->s)) {
  1583. selected = rspamd_upstream_get (surbl_module_ctx->redirectors,
  1584. RSPAMD_UPSTREAM_ROUND_ROBIN, url->host, url->hostlen);
  1585. if (selected) {
  1586. s = rspamd_inet_address_connect (rspamd_upstream_addr_next (selected),
  1587. SOCK_STREAM, TRUE);
  1588. }
  1589. if (s == -1) {
  1590. msg_info_surbl ("<%s> cannot create tcp socket failed: %s",
  1591. task->message_id,
  1592. strerror (errno));
  1593. return;
  1594. }
  1595. param =
  1596. rspamd_mempool_alloc (task->task_pool,
  1597. sizeof (struct redirector_param));
  1598. param->url = url;
  1599. param->task = task;
  1600. param->conn = rspamd_http_connection_new (NULL,
  1601. s,
  1602. NULL,
  1603. surbl_redirector_error,
  1604. surbl_redirector_finish,
  1605. RSPAMD_HTTP_CLIENT_SIMPLE,
  1606. RSPAMD_HTTP_CLIENT);
  1607. param->ctx = surbl_module_ctx;
  1608. msg = rspamd_http_new_message (HTTP_REQUEST);
  1609. msg->url = rspamd_fstring_assign (msg->url, url->string, url->urllen);
  1610. param->sock = s;
  1611. param->redirector = selected;
  1612. timeout = rspamd_mempool_alloc (task->task_pool, sizeof (struct timeval));
  1613. double_to_tv (surbl_module_ctx->read_timeout, timeout);
  1614. rspamd_session_add_event (task->s,
  1615. free_redirector_session, param,
  1616. M);
  1617. param->item = rspamd_symcache_get_cur_item (task);
  1618. if (param->item) {
  1619. rspamd_symcache_item_async_inc (param->task, param->item, M);
  1620. }
  1621. rspamd_http_connection_write_message (param->conn, msg, NULL,
  1622. NULL, param, timeout);
  1623. msg_info_surbl (
  1624. "<%s> registered redirector call for %*s to %s, according to rule: %s",
  1625. task->message_id,
  1626. url->urllen, url->string,
  1627. rspamd_upstream_name (param->redirector),
  1628. rule);
  1629. }
  1630. }
  1631. static gboolean
  1632. surbl_test_tags (struct rspamd_task *task, struct redirector_param *param,
  1633. struct rspamd_url *url)
  1634. {
  1635. struct rspamd_url_tag *tag = NULL, *cur;
  1636. gchar *ftld = NULL;
  1637. rspamd_ftok_t tld;
  1638. gboolean processed = FALSE;
  1639. if (url->tags) {
  1640. tag = g_hash_table_lookup (url->tags, "surbl");
  1641. }
  1642. if (tag) {
  1643. tld.begin = url->tld;
  1644. tld.len = url->tldlen;
  1645. ftld = rspamd_mempool_ftokdup (task->task_pool, &tld);
  1646. /* We know results for this URL */
  1647. DL_FOREACH (tag, cur) {
  1648. msg_info_surbl ("<%s> domain [%s] is in surbl %s (tags)",
  1649. task->message_id,
  1650. ftld, cur->data);
  1651. rspamd_task_insert_result (task, cur->data, 1, ftld);
  1652. }
  1653. processed = TRUE;
  1654. }
  1655. return processed;
  1656. }
  1657. static void
  1658. surbl_tree_redirector_callback (gpointer key, gpointer value, void *data)
  1659. {
  1660. struct redirector_param *param = data, *nparam;
  1661. struct rspamd_task *task, **ptask;
  1662. struct rspamd_url *url = value, **purl;
  1663. lua_State *L;
  1664. rspamd_regexp_t *re;
  1665. rspamd_ftok_t srch;
  1666. gboolean found = FALSE;
  1667. gchar *found_tld;
  1668. struct surbl_ctx *surbl_module_ctx;
  1669. task = param->task;
  1670. surbl_module_ctx = param->ctx;
  1671. msg_debug_surbl ("check url redirection %*s", url->urllen, url->string);
  1672. if (url->hostlen <= 0) {
  1673. return;
  1674. }
  1675. /* Search in trie */
  1676. srch.begin = url->tld;
  1677. srch.len = url->tldlen;
  1678. re = g_hash_table_lookup (surbl_module_ctx->redirector_tlds, &srch);
  1679. if (re) {
  1680. if (re == NO_REGEXP) {
  1681. found = TRUE;
  1682. }
  1683. else if (rspamd_regexp_search (re, url->string, 0,
  1684. NULL, NULL, TRUE, NULL)) {
  1685. found = TRUE;
  1686. }
  1687. if (found) {
  1688. found_tld = rspamd_mempool_ftokdup (task->task_pool, &srch);
  1689. if (surbl_module_ctx->redirector_symbol != NULL) {
  1690. rspamd_task_insert_result (param->task,
  1691. surbl_module_ctx->redirector_symbol,
  1692. 1,
  1693. found_tld);
  1694. }
  1695. if (param->redirector_requests >= surbl_module_ctx->max_redirected_urls) {
  1696. msg_info_surbl ("cannot register redirector request for url domain: "
  1697. "%s, max_redirected_urls is reached: %d",
  1698. found_tld, surbl_module_ctx->max_redirected_urls);
  1699. return;
  1700. }
  1701. param->redirector_requests ++;
  1702. if (surbl_module_ctx->redirector_cbid != -1) {
  1703. nparam = rspamd_mempool_alloc (task->task_pool,
  1704. sizeof (*nparam));
  1705. /* Copy to detach from the shared param */
  1706. memcpy (nparam, param, sizeof (*param));
  1707. nparam->url = url;
  1708. L = task->cfg->lua_state;
  1709. lua_rawgeti (L, LUA_REGISTRYINDEX,
  1710. surbl_module_ctx->redirector_cbid);
  1711. ptask = lua_newuserdata (L, sizeof (*ptask));
  1712. *ptask = task;
  1713. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1714. purl = lua_newuserdata (L, sizeof (*purl));
  1715. *purl = url;
  1716. rspamd_lua_setclass (L, "rspamd{url}", -1);
  1717. lua_pushlightuserdata (L, nparam);
  1718. rspamd_symcache_set_cur_item (task, param->item);
  1719. if (lua_pcall (L, 3, 0, 0) != 0) {
  1720. msg_err_task ("cannot call for redirector script: %s",
  1721. lua_tostring (L, -1));
  1722. lua_pop (L, 1);
  1723. }
  1724. else {
  1725. nparam->item = param->item;
  1726. }
  1727. }
  1728. else {
  1729. register_redirector_call (url,
  1730. param->task,
  1731. found_tld);
  1732. }
  1733. }
  1734. }
  1735. }
  1736. static void
  1737. surbl_tree_url_callback (gpointer key, gpointer value, void *data)
  1738. {
  1739. struct redirector_param *param = data;
  1740. struct rspamd_url *url = value;
  1741. struct rspamd_task *task;
  1742. struct surbl_ctx *surbl_module_ctx;
  1743. if (url->hostlen <= 0) {
  1744. return;
  1745. }
  1746. if (url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED) {
  1747. /* Skip urls that are displayed only */
  1748. return;
  1749. }
  1750. task = param->task;
  1751. surbl_module_ctx = param->ctx;
  1752. msg_debug_surbl ("check url %*s in %s", url->urllen, url->string,
  1753. param->suffix->suffix);
  1754. if (surbl_module_ctx->use_tags && surbl_test_tags (param->task, param, url)) {
  1755. return;
  1756. }
  1757. if (url->tags && g_hash_table_lookup (url->tags, "redirector")) {
  1758. /* URL is redirected, skip from checks */
  1759. return;
  1760. }
  1761. make_surbl_requests (url, param->task, param->item, param->suffix, FALSE,
  1762. param->tree, surbl_module_ctx);
  1763. }
  1764. static void
  1765. surbl_test_url (struct rspamd_task *task,
  1766. struct rspamd_symcache_item *item,
  1767. void *user_data)
  1768. {
  1769. struct redirector_param *param;
  1770. struct suffix_item *suffix = user_data;
  1771. guint i, j;
  1772. struct rspamd_mime_text_part *part;
  1773. struct html_image *img;
  1774. struct rspamd_url *url;
  1775. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1776. if (!rspamd_monitored_alive (suffix->m)) {
  1777. if (!suffix->reported_offline) {
  1778. msg_info_surbl ("disable surbl %s as it is reported to be offline",
  1779. suffix->suffix);
  1780. suffix->reported_offline = TRUE;
  1781. }
  1782. rspamd_symcache_finalize_item (task, item);
  1783. return;
  1784. }
  1785. suffix->reported_offline = FALSE;
  1786. param = rspamd_mempool_alloc0 (task->task_pool, sizeof (*param));
  1787. param->task = task;
  1788. param->suffix = suffix;
  1789. param->tree = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
  1790. param->ctx = surbl_module_ctx;
  1791. param->item = item;
  1792. rspamd_mempool_add_destructor (task->task_pool,
  1793. (rspamd_mempool_destruct_t)g_hash_table_unref,
  1794. param->tree);
  1795. g_hash_table_foreach (task->urls, surbl_tree_url_callback, param);
  1796. rspamd_symcache_item_async_inc (task, item, M);
  1797. /* We also need to check and process img URLs */
  1798. if (suffix->options & SURBL_OPTION_CHECKIMAGES) {
  1799. for (i = 0; i < task->text_parts->len; i ++) {
  1800. part = g_ptr_array_index (task->text_parts, i);
  1801. if (part->html && part->html->images) {
  1802. for (j = 0; j < part->html->images->len; j ++) {
  1803. img = g_ptr_array_index (part->html->images, j);
  1804. if ((img->flags & RSPAMD_HTML_FLAG_IMAGE_EXTERNAL)
  1805. && img->url) {
  1806. surbl_tree_url_callback (img->url, img->url, param);
  1807. msg_debug_surbl ("checked image url %s over %s",
  1808. img->src, suffix->suffix);
  1809. }
  1810. }
  1811. }
  1812. }
  1813. }
  1814. if (suffix->options & SURBL_OPTION_CHECKDKIM) {
  1815. struct rspamd_symbol_result *s;
  1816. struct rspamd_symbol_option *opt;
  1817. s = rspamd_task_find_symbol_result (task, "DKIM_TRACE");
  1818. if (s && s->opts_head) {
  1819. DL_FOREACH (s->opts_head, opt) {
  1820. gsize len = strlen (opt->option);
  1821. gchar *p = opt->option + len - 1;
  1822. if (*p == '+') {
  1823. url = rspamd_html_process_url (task->task_pool,
  1824. opt->option, len - 2, NULL);
  1825. if (url) {
  1826. surbl_tree_url_callback (url, url, param);
  1827. msg_debug_surbl ("checked dkim url %s over %s",
  1828. url->string, suffix->suffix);
  1829. }
  1830. }
  1831. }
  1832. }
  1833. }
  1834. rspamd_symcache_item_async_dec_check (task, item, M);
  1835. }
  1836. static void
  1837. surbl_test_redirector (struct rspamd_task *task,
  1838. struct rspamd_symcache_item *item,
  1839. void *user_data)
  1840. {
  1841. struct redirector_param *param;
  1842. guint i, j;
  1843. struct rspamd_mime_text_part *part;
  1844. struct html_image *img;
  1845. struct rspamd_url *url;
  1846. struct surbl_ctx *surbl_module_ctx = surbl_get_context (task->cfg);
  1847. if (!surbl_module_ctx->use_redirector || !surbl_module_ctx->redirector_tlds) {
  1848. rspamd_symcache_finalize_item (task, item);
  1849. return;
  1850. }
  1851. rspamd_symcache_item_async_inc (task, item, M);
  1852. param = rspamd_mempool_alloc0 (task->task_pool, sizeof (*param));
  1853. param->task = task;
  1854. param->suffix = NULL;
  1855. param->redirector_requests = 0;
  1856. param->ctx = surbl_module_ctx;
  1857. param->item = item;
  1858. g_hash_table_foreach (task->urls, surbl_tree_redirector_callback, param);
  1859. /* We also need to check and process img URLs */
  1860. for (i = 0; i < task->text_parts->len; i ++) {
  1861. part = g_ptr_array_index (task->text_parts, i);
  1862. if (part->html && part->html->images) {
  1863. for (j = 0; j < part->html->images->len; j ++) {
  1864. img = g_ptr_array_index (part->html->images, j);
  1865. if ((img->flags & RSPAMD_HTML_FLAG_IMAGE_EXTERNAL)
  1866. && img->src) {
  1867. url = rspamd_html_process_url (task->task_pool,
  1868. img->src, strlen (img->src), NULL);
  1869. if (url) {
  1870. surbl_tree_redirector_callback (url, url, param);
  1871. msg_debug_surbl ("checked image url %s for redirectors",
  1872. img->src);
  1873. }
  1874. }
  1875. }
  1876. }
  1877. }
  1878. rspamd_symcache_item_async_dec_check (task, item, M);
  1879. }
  1880. static gint
  1881. surbl_register_redirect_handler (lua_State *L)
  1882. {
  1883. struct surbl_ctx *surbl_module_ctx;
  1884. struct rspamd_config *cfg = lua_check_config (L, 1);
  1885. if (!cfg) {
  1886. return luaL_error (L, "config is now required as the first parameter");
  1887. }
  1888. surbl_module_ctx = surbl_get_context (cfg);
  1889. if (surbl_module_ctx->redirector_cbid != -1) {
  1890. luaL_unref (L, LUA_REGISTRYINDEX, surbl_module_ctx->redirector_cbid);
  1891. }
  1892. lua_pushvalue (L, 2);
  1893. if (lua_type (L, -1) == LUA_TFUNCTION) {
  1894. surbl_module_ctx->redirector_cbid = luaL_ref (L, LUA_REGISTRYINDEX);
  1895. surbl_module_ctx->use_redirector = TRUE;
  1896. }
  1897. else {
  1898. lua_pop (L, 1);
  1899. return luaL_error (L, "argument must be a function");
  1900. }
  1901. return 0;
  1902. }
  1903. static gint
  1904. surbl_is_redirector_handler (lua_State *L)
  1905. {
  1906. const gchar *url;
  1907. struct rspamd_task *task;
  1908. struct rspamd_url uri;
  1909. gsize len;
  1910. rspamd_regexp_t *re;
  1911. rspamd_ftok_t srch;
  1912. gboolean found = FALSE;
  1913. gchar *found_tld, *url_cpy;
  1914. struct surbl_ctx *surbl_module_ctx;
  1915. task = lua_check_task (L, 1);
  1916. url = luaL_checklstring (L, 2, &len);
  1917. surbl_module_ctx = surbl_get_context (task->cfg);
  1918. if (task && url) {
  1919. url_cpy = rspamd_mempool_alloc (task->task_pool, len);
  1920. memcpy (url_cpy, url, len);
  1921. if (rspamd_url_parse (&uri, url_cpy, len, task->task_pool, RSPAMD_URL_PARSE_TEXT)) {
  1922. msg_debug_surbl ("check url redirection %*s", uri.urllen,
  1923. uri.string);
  1924. if (uri.hostlen <= 0) {
  1925. lua_pushboolean (L, false);
  1926. return 1;
  1927. }
  1928. /* Search in trie */
  1929. srch.begin = uri.tld;
  1930. srch.len = uri.tldlen;
  1931. re = g_hash_table_lookup (surbl_module_ctx->redirector_tlds, &srch);
  1932. if (re) {
  1933. if (re == NO_REGEXP) {
  1934. found = TRUE;
  1935. }
  1936. else if (rspamd_regexp_search (re, uri.string, 0,
  1937. NULL, NULL, TRUE, NULL)) {
  1938. found = TRUE;
  1939. }
  1940. if (found) {
  1941. found_tld = rspamd_mempool_ftokdup (task->task_pool, &srch);
  1942. lua_pushboolean (L, true);
  1943. lua_pushstring (L, found_tld);
  1944. return 2;
  1945. }
  1946. }
  1947. }
  1948. }
  1949. else {
  1950. return luaL_error (L, "arguments must be: task, url");
  1951. }
  1952. lua_pushboolean (L, false);
  1953. return 1;
  1954. }
  1955. /*
  1956. * Accepts two arguments:
  1957. * url: string with a redirected URL, if url is nil, then it couldn't be resolved
  1958. * userdata: opaque pointer of `struct redirector_param *`
  1959. */
  1960. static gint
  1961. surbl_continue_process_handler (lua_State *L)
  1962. {
  1963. struct redirector_param *param;
  1964. struct rspamd_task *task;
  1965. const gchar *nurl;
  1966. gint r;
  1967. gsize urllen;
  1968. struct rspamd_url *redirected_url;
  1969. gchar *urlstr;
  1970. struct surbl_ctx *surbl_module_ctx;
  1971. nurl = lua_tolstring (L, 1, &urllen);
  1972. param = (struct redirector_param *)lua_topointer (L, 2);
  1973. if (param != NULL) {
  1974. task = param->task;
  1975. surbl_module_ctx = surbl_get_context (task->cfg);
  1976. if (nurl != NULL) {
  1977. msg_info_surbl ("<%s> got reply from redirector: '%*s' -> '%*s'",
  1978. param->task->message_id,
  1979. param->url->urllen, param->url->string,
  1980. (gint)urllen, nurl);
  1981. urlstr = rspamd_mempool_alloc (task->task_pool,
  1982. urllen + 1);
  1983. redirected_url = rspamd_mempool_alloc0 (task->task_pool,
  1984. sizeof (*redirected_url));
  1985. rspamd_strlcpy (urlstr, nurl, urllen + 1);
  1986. r = rspamd_url_parse (redirected_url, urlstr, urllen,
  1987. task->task_pool, RSPAMD_URL_PARSE_TEXT);
  1988. if (r == URI_ERRNO_OK) {
  1989. if (!g_hash_table_lookup (task->urls, redirected_url)) {
  1990. g_hash_table_insert (task->urls, redirected_url,
  1991. redirected_url);
  1992. redirected_url->phished_url = param->url;
  1993. redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  1994. }
  1995. if (surbl_module_ctx->use_tags) {
  1996. rspamd_url_add_tag (param->url, "redirector", urlstr,
  1997. task->task_pool);
  1998. }
  1999. }
  2000. else {
  2001. msg_info_surbl ("<%s> could not resolve '%*s' on redirector",
  2002. param->task->message_id,
  2003. param->url->urllen, param->url->string);
  2004. }
  2005. }
  2006. else {
  2007. msg_info_surbl ("<%s> could not resolve '%*s' on redirector",
  2008. param->task->message_id,
  2009. param->url->urllen, param->url->string);
  2010. }
  2011. }
  2012. else {
  2013. return luaL_error (L, "invalid arguments");
  2014. }
  2015. return 0;
  2016. }