You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_url.c 34KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "lua_url.h"
  18. /***
  19. * @module rspamd_url
  20. * This module provides routines to handle URL's and extract URL's from the text.
  21. * Objects of this class are returned, for example, by `task:get_urls()` or `task:get_emails()`.
  22. * You can also create `rspamd_url` from any text.
  23. * @example
  24. local url = require "rspamd_url"
  25. local mpool = require "rspamd_mempool"
  26. url.init("/usr/share/rspamd/effective_tld_names.dat")
  27. local pool = mpool.create()
  28. local res = url.create(pool, 'Look at: http://user@test.example.com/test?query")
  29. local t = res:to_table()
  30. -- Content of t:
  31. -- url = ['http://test.example.com/test?query']
  32. -- host = ['test.example.com']
  33. -- user = ['user']
  34. -- path = ['test']
  35. -- tld = ['example.com']
  36. pool:destroy() -- res is destroyed here, so you should not use it afterwards
  37. local mistake = res:to_table() -- INVALID! as pool is destroyed
  38. */
  39. /* URL methods */
  40. LUA_FUNCTION_DEF(url, get_length);
  41. LUA_FUNCTION_DEF(url, get_host);
  42. LUA_FUNCTION_DEF(url, get_port);
  43. LUA_FUNCTION_DEF(url, get_user);
  44. LUA_FUNCTION_DEF(url, get_path);
  45. LUA_FUNCTION_DEF(url, get_query);
  46. LUA_FUNCTION_DEF(url, get_fragment);
  47. LUA_FUNCTION_DEF(url, get_text);
  48. LUA_FUNCTION_DEF(url, tostring);
  49. LUA_FUNCTION_DEF(url, get_raw);
  50. LUA_FUNCTION_DEF(url, get_tld);
  51. LUA_FUNCTION_DEF(url, get_flags);
  52. LUA_FUNCTION_DEF(url, get_flags_num);
  53. LUA_FUNCTION_DEF(url, get_protocol);
  54. LUA_FUNCTION_DEF(url, to_table);
  55. LUA_FUNCTION_DEF(url, is_phished);
  56. LUA_FUNCTION_DEF(url, is_redirected);
  57. LUA_FUNCTION_DEF(url, is_obscured);
  58. LUA_FUNCTION_DEF(url, is_html_displayed);
  59. LUA_FUNCTION_DEF(url, is_subject);
  60. LUA_FUNCTION_DEF(url, get_phished);
  61. LUA_FUNCTION_DEF(url, set_redirected);
  62. LUA_FUNCTION_DEF(url, get_count);
  63. LUA_FUNCTION_DEF(url, get_visible);
  64. LUA_FUNCTION_DEF(url, create);
  65. LUA_FUNCTION_DEF(url, init);
  66. LUA_FUNCTION_DEF(url, all);
  67. LUA_FUNCTION_DEF(url, lt);
  68. LUA_FUNCTION_DEF(url, eq);
  69. LUA_FUNCTION_DEF(url, get_order);
  70. LUA_FUNCTION_DEF(url, get_part_order);
  71. LUA_FUNCTION_DEF(url, to_http);
  72. static const struct luaL_reg urllib_m[] = {
  73. LUA_INTERFACE_DEF(url, get_length),
  74. LUA_INTERFACE_DEF(url, get_host),
  75. LUA_INTERFACE_DEF(url, get_port),
  76. LUA_INTERFACE_DEF(url, get_user),
  77. LUA_INTERFACE_DEF(url, get_path),
  78. LUA_INTERFACE_DEF(url, get_query),
  79. LUA_INTERFACE_DEF(url, get_fragment),
  80. LUA_INTERFACE_DEF(url, get_text),
  81. LUA_INTERFACE_DEF(url, get_tld),
  82. LUA_INTERFACE_DEF(url, get_raw),
  83. LUA_INTERFACE_DEF(url, get_protocol),
  84. LUA_INTERFACE_DEF(url, to_table),
  85. LUA_INTERFACE_DEF(url, is_phished),
  86. LUA_INTERFACE_DEF(url, is_redirected),
  87. LUA_INTERFACE_DEF(url, is_obscured),
  88. LUA_INTERFACE_DEF(url, is_html_displayed),
  89. LUA_INTERFACE_DEF(url, is_subject),
  90. LUA_INTERFACE_DEF(url, get_phished),
  91. LUA_INTERFACE_DEF(url, get_visible),
  92. LUA_INTERFACE_DEF(url, get_count),
  93. LUA_INTERFACE_DEF(url, get_flags),
  94. LUA_INTERFACE_DEF(url, get_flags_num),
  95. LUA_INTERFACE_DEF(url, get_order),
  96. LUA_INTERFACE_DEF(url, get_part_order),
  97. LUA_INTERFACE_DEF(url, to_http),
  98. {"get_redirected", lua_url_get_phished},
  99. LUA_INTERFACE_DEF(url, set_redirected),
  100. {"__tostring", lua_url_tostring},
  101. {"__eq", lua_url_eq},
  102. {"__lt", lua_url_lt},
  103. {NULL, NULL}};
  104. static const struct luaL_reg urllib_f[] = {
  105. LUA_INTERFACE_DEF(url, init),
  106. LUA_INTERFACE_DEF(url, create),
  107. LUA_INTERFACE_DEF(url, all),
  108. {NULL, NULL}};
  109. struct rspamd_lua_url *
  110. lua_check_url(lua_State *L, int pos)
  111. {
  112. void *ud = rspamd_lua_check_udata(L, pos, rspamd_url_classname);
  113. luaL_argcheck(L, ud != NULL, pos, "'url' expected");
  114. return ud ? ((struct rspamd_lua_url *) ud) : NULL;
  115. }
  116. static gboolean
  117. lua_url_single_inserter(struct rspamd_url *url, gsize start_offset,
  118. gsize end_offset, gpointer ud)
  119. {
  120. lua_State *L = ud;
  121. struct rspamd_lua_url *lua_url;
  122. lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
  123. rspamd_lua_setclass(L, rspamd_url_classname, -1);
  124. lua_url->url = url;
  125. return TRUE;
  126. }
  127. /***
  128. * @method url:get_length()
  129. * Get length of the url
  130. * @return {number} length of url in bytes
  131. */
  132. static int
  133. lua_url_get_length(lua_State *L)
  134. {
  135. LUA_TRACE_POINT;
  136. struct rspamd_lua_url *url = lua_check_url(L, 1);
  137. if (url != NULL) {
  138. lua_pushinteger(L, url->url->urllen);
  139. }
  140. else {
  141. lua_pushnil(L);
  142. }
  143. return 1;
  144. }
  145. /***
  146. * @method url:get_host()
  147. * Get domain part of the url
  148. * @return {string} domain part of URL
  149. */
  150. static int
  151. lua_url_get_host(lua_State *L)
  152. {
  153. LUA_TRACE_POINT;
  154. struct rspamd_lua_url *url = lua_check_url(L, 1);
  155. if (url != NULL && url->url && url->url->hostlen > 0) {
  156. lua_pushlstring(L, rspamd_url_host(url->url), url->url->hostlen);
  157. }
  158. else {
  159. lua_pushnil(L);
  160. }
  161. return 1;
  162. }
  163. /***
  164. * @method url:get_port()
  165. * Get port of the url
  166. * @return {number} url port
  167. */
  168. static int
  169. lua_url_get_port(lua_State *L)
  170. {
  171. LUA_TRACE_POINT;
  172. struct rspamd_lua_url *url = lua_check_url(L, 1);
  173. if (url != NULL) {
  174. if (rspamd_url_get_port_if_special(url->url) == 0) {
  175. lua_pushnil(L);
  176. }
  177. else {
  178. lua_pushinteger(L, rspamd_url_get_port_if_special(url->url));
  179. }
  180. }
  181. else {
  182. lua_pushnil(L);
  183. }
  184. return 1;
  185. }
  186. /***
  187. * @method url:get_user()
  188. * Get user part of the url (e.g. username in email)
  189. * @return {string} user part of URL
  190. */
  191. static int
  192. lua_url_get_user(lua_State *L)
  193. {
  194. LUA_TRACE_POINT;
  195. struct rspamd_lua_url *url = lua_check_url(L, 1);
  196. if (url != NULL && rspamd_url_user(url->url) != NULL) {
  197. lua_pushlstring(L, rspamd_url_user(url->url), url->url->userlen);
  198. }
  199. else {
  200. lua_pushnil(L);
  201. }
  202. return 1;
  203. }
  204. /***
  205. * @method url:get_path()
  206. * Get path of the url
  207. * @return {string} path part of URL
  208. */
  209. static int
  210. lua_url_get_path(lua_State *L)
  211. {
  212. LUA_TRACE_POINT;
  213. struct rspamd_lua_url *url = lua_check_url(L, 1);
  214. if (url != NULL && url->url->datalen > 0) {
  215. lua_pushlstring(L, rspamd_url_data_unsafe(url->url), url->url->datalen);
  216. }
  217. else {
  218. lua_pushnil(L);
  219. }
  220. return 1;
  221. }
  222. /***
  223. * @method url:get_query()
  224. * Get query of the url
  225. * @return {string} query part of URL
  226. */
  227. static int
  228. lua_url_get_query(lua_State *L)
  229. {
  230. LUA_TRACE_POINT;
  231. struct rspamd_lua_url *url = lua_check_url(L, 1);
  232. if (url != NULL && url->url->querylen > 0) {
  233. lua_pushlstring(L, rspamd_url_query_unsafe(url->url), url->url->querylen);
  234. }
  235. else {
  236. lua_pushnil(L);
  237. }
  238. return 1;
  239. }
  240. /***
  241. * @method url:get_fragment()
  242. * Get fragment of the url
  243. * @return {string} fragment part of URL
  244. */
  245. static int
  246. lua_url_get_fragment(lua_State *L)
  247. {
  248. LUA_TRACE_POINT;
  249. struct rspamd_lua_url *url = lua_check_url(L, 1);
  250. if (url != NULL && url->url->fragmentlen > 0) {
  251. lua_pushlstring(L, rspamd_url_fragment_unsafe(url->url), url->url->fragmentlen);
  252. }
  253. else {
  254. lua_pushnil(L);
  255. }
  256. return 1;
  257. }
  258. /***
  259. * @method url:get_text()
  260. * Get full content of the url
  261. * @return {string} url string
  262. */
  263. static int
  264. lua_url_get_text(lua_State *L)
  265. {
  266. LUA_TRACE_POINT;
  267. struct rspamd_lua_url *url = lua_check_url(L, 1);
  268. if (url != NULL) {
  269. lua_pushlstring(L, url->url->string, url->url->urllen);
  270. }
  271. else {
  272. lua_pushnil(L);
  273. }
  274. return 1;
  275. }
  276. /***
  277. * @method url:tostring()
  278. * Get full content of the url or user@domain in case of email
  279. * @return {string} url as a string
  280. */
  281. static int
  282. lua_url_tostring(lua_State *L)
  283. {
  284. LUA_TRACE_POINT;
  285. struct rspamd_lua_url *url = lua_check_url(L, 1);
  286. if (url != NULL && url->url != NULL) {
  287. if (url->url->protocol == PROTOCOL_MAILTO) {
  288. char *tmp = g_malloc(url->url->userlen + 1 +
  289. url->url->hostlen);
  290. if (url->url->userlen) {
  291. memcpy(tmp, url->url->string + url->url->usershift, url->url->userlen);
  292. }
  293. tmp[url->url->userlen] = '@';
  294. memcpy(tmp + url->url->userlen + 1, rspamd_url_host_unsafe(url->url),
  295. url->url->hostlen);
  296. lua_pushlstring(L, tmp, url->url->userlen + 1 + url->url->hostlen);
  297. g_free(tmp);
  298. }
  299. else {
  300. lua_pushlstring(L, url->url->string, url->url->urllen);
  301. }
  302. }
  303. else {
  304. lua_pushnil(L);
  305. }
  306. return 1;
  307. }
  308. /***
  309. * @method url:to_http()
  310. * Get URL suitable for HTTP request (e.g. by trimming fragment and user parts)
  311. * @return {string} url as a string
  312. */
  313. static int
  314. lua_url_to_http(lua_State *L)
  315. {
  316. LUA_TRACE_POINT;
  317. struct rspamd_lua_url *url = lua_check_url(L, 1);
  318. if (url != NULL && url->url != NULL) {
  319. if (url->url->protocol == PROTOCOL_MAILTO) {
  320. /* Nothing to do here */
  321. lua_pushnil(L);
  322. }
  323. else {
  324. if (url->url->userlen > 0) {
  325. /* We need to reconstruct url :( */
  326. gsize len = url->url->urllen - url->url->fragmentlen + 1;
  327. /* Strip the # character */
  328. if (url->url->fragmentlen > 0 && len > 0) {
  329. while (url->url->string[len - 1] == '#' && len > 0) {
  330. len--;
  331. }
  332. }
  333. char *nstr = g_malloc(len);
  334. char *d = nstr, *end = nstr + len;
  335. memcpy(nstr, url->url->string, url->url->protocollen);
  336. d += url->url->protocollen;
  337. *d++ = ':';
  338. *d++ = '/';
  339. *d++ = '/';
  340. /* Host part */
  341. memcpy(d, rspamd_url_host(url->url), url->url->hostlen);
  342. d += url->url->hostlen;
  343. int port = rspamd_url_get_port_if_special(url->url);
  344. if (port > 0) {
  345. d += rspamd_snprintf(d, end - d, ":%d/", port);
  346. }
  347. else {
  348. *d++ = '/';
  349. }
  350. if (url->url->datalen > 0) {
  351. memcpy(d, rspamd_url_data_unsafe(url->url), url->url->datalen);
  352. d += url->url->datalen;
  353. }
  354. if (url->url->querylen > 0) {
  355. *d++ = '?';
  356. memcpy(d, rspamd_url_query_unsafe(url->url), url->url->querylen);
  357. d += url->url->querylen;
  358. }
  359. g_assert(d < end);
  360. lua_pushlstring(L, nstr, d - nstr);
  361. }
  362. else {
  363. gsize len = url->url->urllen - url->url->fragmentlen;
  364. /* Strip the # character */
  365. if (url->url->fragmentlen > 0 && len > 0) {
  366. while (url->url->string[len - 1] == '#' && len > 0) {
  367. len--;
  368. }
  369. }
  370. lua_pushlstring(L, url->url->string, len);
  371. }
  372. }
  373. }
  374. else {
  375. lua_pushnil(L);
  376. }
  377. return 1;
  378. }
  379. /***
  380. * @method url:get_raw()
  381. * Get full content of the url as it was parsed (e.g. with urldecode)
  382. * @return {string} url string
  383. */
  384. static int
  385. lua_url_get_raw(lua_State *L)
  386. {
  387. LUA_TRACE_POINT;
  388. struct rspamd_lua_url *url = lua_check_url(L, 1);
  389. if (url != NULL) {
  390. lua_pushlstring(L, url->url->raw, url->url->rawlen);
  391. }
  392. else {
  393. lua_pushnil(L);
  394. }
  395. return 1;
  396. }
  397. /***
  398. * @method url:is_phished()
  399. * Check whether URL is treated as phished
  400. * @return {boolean} `true` if URL is phished
  401. */
  402. static int
  403. lua_url_is_phished(lua_State *L)
  404. {
  405. LUA_TRACE_POINT;
  406. struct rspamd_lua_url *url = lua_check_url(L, 1);
  407. if (url != NULL) {
  408. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_PHISHED);
  409. }
  410. else {
  411. lua_pushnil(L);
  412. }
  413. return 1;
  414. }
  415. /***
  416. * @method url:is_redirected()
  417. * Check whether URL was redirected
  418. * @return {boolean} `true` if URL is redirected
  419. */
  420. static int
  421. lua_url_is_redirected(lua_State *L)
  422. {
  423. LUA_TRACE_POINT;
  424. struct rspamd_lua_url *url = lua_check_url(L, 1);
  425. if (url != NULL) {
  426. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_REDIRECTED);
  427. }
  428. else {
  429. lua_pushnil(L);
  430. }
  431. return 1;
  432. }
  433. /***
  434. * @method url:is_obscured()
  435. * Check whether URL is treated as obscured or obfuscated (e.g. numbers in IP address or other hacks)
  436. * @return {boolean} `true` if URL is obscured
  437. */
  438. static int
  439. lua_url_is_obscured(lua_State *L)
  440. {
  441. LUA_TRACE_POINT;
  442. struct rspamd_lua_url *url = lua_check_url(L, 1);
  443. if (url != NULL) {
  444. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_OBSCURED);
  445. }
  446. else {
  447. lua_pushnil(L);
  448. }
  449. return 1;
  450. }
  451. /***
  452. * @method url:is_html_displayed()
  453. * Check whether URL is just displayed in HTML (e.g. NOT a real href)
  454. * @return {boolean} `true` if URL is displayed only
  455. */
  456. static int
  457. lua_url_is_html_displayed(lua_State *L)
  458. {
  459. LUA_TRACE_POINT;
  460. struct rspamd_lua_url *url = lua_check_url(L, 1);
  461. if (url != NULL) {
  462. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED);
  463. }
  464. else {
  465. lua_pushnil(L);
  466. }
  467. return 1;
  468. }
  469. /***
  470. * @method url:is_subject()
  471. * Check whether URL is found in subject
  472. * @return {boolean} `true` if URL is found in subject
  473. */
  474. static int
  475. lua_url_is_subject(lua_State *L)
  476. {
  477. LUA_TRACE_POINT;
  478. struct rspamd_lua_url *url = lua_check_url(L, 1);
  479. if (url != NULL) {
  480. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_SUBJECT);
  481. }
  482. else {
  483. lua_pushnil(L);
  484. }
  485. return 1;
  486. }
  487. /***
  488. * @method url:get_phished()
  489. * Get another URL that pretends to be this URL (e.g. used in phishing)
  490. * @return {url} phished URL
  491. */
  492. static int
  493. lua_url_get_phished(lua_State *L)
  494. {
  495. LUA_TRACE_POINT;
  496. struct rspamd_lua_url *purl, *url = lua_check_url(L, 1);
  497. if (url) {
  498. if (url->url->ext && url->url->ext->linked_url != NULL) {
  499. /* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
  500. if (url->url->flags &
  501. (RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_REDIRECTED)) {
  502. purl = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
  503. rspamd_lua_setclass(L, rspamd_url_classname, -1);
  504. purl->url = url->url->ext->linked_url;
  505. return 1;
  506. }
  507. }
  508. }
  509. lua_pushnil(L);
  510. return 1;
  511. }
  512. /***
  513. * @method url:set_redirected(url, pool)
  514. * Set url as redirected to another url
  515. * @param {string|url} url new url that is redirecting an old one
  516. * @param {pool} pool memory pool to allocate memory if needed
  517. * @return {url} parsed redirected url (if needed)
  518. */
  519. static int
  520. lua_url_set_redirected(lua_State *L)
  521. {
  522. LUA_TRACE_POINT;
  523. struct rspamd_lua_url *url = lua_check_url(L, 1), *redir;
  524. rspamd_mempool_t *pool = NULL;
  525. if (url == NULL) {
  526. return luaL_error(L, "url is required as the first argument");
  527. }
  528. if (lua_type(L, 2) == LUA_TSTRING) {
  529. /* Parse url */
  530. if (lua_type(L, 3) != LUA_TUSERDATA) {
  531. return luaL_error(L, "mempool is required as the third argument");
  532. }
  533. pool = rspamd_lua_check_mempool(L, 3);
  534. if (pool == NULL) {
  535. return luaL_error(L, "mempool is required as the third argument");
  536. }
  537. gsize len;
  538. const char *urlstr = lua_tolstring(L, 2, &len);
  539. rspamd_url_find_single(pool, urlstr, len, RSPAMD_URL_FIND_ALL,
  540. lua_url_single_inserter, L);
  541. if (lua_type(L, -1) != LUA_TUSERDATA) {
  542. /* URL is actually not found */
  543. lua_pushnil(L);
  544. }
  545. else {
  546. redir = lua_check_url(L, -1);
  547. url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  548. if (url->url->ext == NULL) {
  549. url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
  550. }
  551. url->url->ext->linked_url = redir->url;
  552. }
  553. }
  554. else {
  555. redir = lua_check_url(L, 2);
  556. if (redir == NULL) {
  557. return luaL_error(L, "url is required as the second argument");
  558. }
  559. pool = rspamd_lua_check_mempool(L, 3);
  560. if (pool == NULL) {
  561. return luaL_error(L, "mempool is required as the third argument");
  562. }
  563. url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  564. if (url->url->ext == NULL) {
  565. url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
  566. }
  567. url->url->ext->linked_url = redir->url;
  568. /* Push back on stack */
  569. lua_pushvalue(L, 2);
  570. }
  571. return 1;
  572. }
  573. /***
  574. * @method url:get_tld()
  575. * Get effective second level domain part (eSLD) of the url host
  576. * @return {string} effective second level domain part (eSLD) of the url host
  577. */
  578. static int
  579. lua_url_get_tld(lua_State *L)
  580. {
  581. LUA_TRACE_POINT;
  582. struct rspamd_lua_url *url = lua_check_url(L, 1);
  583. if (url != NULL && url->url->tldlen > 0) {
  584. lua_pushlstring(L, rspamd_url_tld_unsafe(url->url), url->url->tldlen);
  585. }
  586. else {
  587. lua_pushnil(L);
  588. }
  589. return 1;
  590. }
  591. /***
  592. * @method url:get_protocol()
  593. * Get protocol name
  594. * @return {string} protocol as a string
  595. */
  596. static int
  597. lua_url_get_protocol(lua_State *L)
  598. {
  599. LUA_TRACE_POINT;
  600. struct rspamd_lua_url *url = lua_check_url(L, 1);
  601. if (url != NULL && url->url->protocol != PROTOCOL_UNKNOWN) {
  602. lua_pushstring(L, rspamd_url_protocol_name(url->url->protocol));
  603. }
  604. else {
  605. lua_pushnil(L);
  606. }
  607. return 1;
  608. }
  609. /***
  610. * @method url:get_count()
  611. * Return number of occurrences for this particular URL
  612. * @return {number} number of occurrences
  613. */
  614. static int
  615. lua_url_get_count(lua_State *L)
  616. {
  617. LUA_TRACE_POINT;
  618. struct rspamd_lua_url *url = lua_check_url(L, 1);
  619. if (url != NULL && url->url != NULL) {
  620. lua_pushinteger(L, url->url->count);
  621. }
  622. else {
  623. lua_pushnil(L);
  624. }
  625. return 1;
  626. }
  627. /***
  628. * @method url:get_visible()
  629. * Get visible part of the url with html tags stripped
  630. * @return {string} url string
  631. */
  632. static int
  633. lua_url_get_visible(lua_State *L)
  634. {
  635. LUA_TRACE_POINT;
  636. struct rspamd_lua_url *url = lua_check_url(L, 1);
  637. if (url != NULL && url->url->ext && url->url->ext->visible_part) {
  638. lua_pushstring(L, url->url->ext->visible_part);
  639. }
  640. else {
  641. lua_pushnil(L);
  642. }
  643. return 1;
  644. }
  645. /***
  646. * @method url:to_table()
  647. * Return url as a table with the following fields:
  648. *
  649. * - `url`: full content
  650. * - `host`: hostname part
  651. * - `user`: user part
  652. * - `path`: path part
  653. * - `tld`: top level domain
  654. * - `protocol`: url protocol
  655. * @return {table} URL as a table
  656. */
  657. static int
  658. lua_url_to_table(lua_State *L)
  659. {
  660. LUA_TRACE_POINT;
  661. struct rspamd_lua_url *url = lua_check_url(L, 1);
  662. struct rspamd_url *u;
  663. if (url != NULL) {
  664. u = url->url;
  665. lua_createtable(L, 0, 12);
  666. lua_pushstring(L, "url");
  667. lua_pushlstring(L, u->string, u->urllen);
  668. lua_settable(L, -3);
  669. if (u->hostlen > 0) {
  670. lua_pushstring(L, "host");
  671. lua_pushlstring(L, rspamd_url_host_unsafe(u), u->hostlen);
  672. lua_settable(L, -3);
  673. }
  674. if (rspamd_url_get_port_if_special(u) != 0) {
  675. lua_pushstring(L, "port");
  676. lua_pushinteger(L, rspamd_url_get_port_if_special(u));
  677. lua_settable(L, -3);
  678. }
  679. if (u->tldlen > 0) {
  680. lua_pushstring(L, "tld");
  681. lua_pushlstring(L, rspamd_url_tld_unsafe(u), u->tldlen);
  682. lua_settable(L, -3);
  683. }
  684. if (u->userlen > 0) {
  685. lua_pushstring(L, "user");
  686. lua_pushlstring(L, rspamd_url_user(u), u->userlen);
  687. lua_settable(L, -3);
  688. }
  689. if (u->datalen > 0) {
  690. lua_pushstring(L, "path");
  691. lua_pushlstring(L, rspamd_url_data_unsafe(u), u->datalen);
  692. lua_settable(L, -3);
  693. }
  694. if (u->querylen > 0) {
  695. lua_pushstring(L, "query");
  696. lua_pushlstring(L, rspamd_url_query_unsafe(u), u->querylen);
  697. lua_settable(L, -3);
  698. }
  699. if (u->fragmentlen > 0) {
  700. lua_pushstring(L, "fragment");
  701. lua_pushlstring(L, rspamd_url_fragment_unsafe(u), u->fragmentlen);
  702. lua_settable(L, -3);
  703. }
  704. lua_pushstring(L, "protocol");
  705. lua_pushstring(L, rspamd_url_protocol_name(u->protocol));
  706. lua_settable(L, -3);
  707. }
  708. else {
  709. lua_pushnil(L);
  710. }
  711. return 1;
  712. }
  713. static rspamd_mempool_t *static_lua_url_pool;
  714. RSPAMD_CONSTRUCTOR(rspamd_urls_static_pool_ctor)
  715. {
  716. static_lua_url_pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
  717. "static_lua_url", 0);
  718. }
  719. RSPAMD_DESTRUCTOR(rspamd_urls_static_pool_dtor)
  720. {
  721. rspamd_mempool_delete(static_lua_url_pool);
  722. }
  723. /***
  724. * @function url.create([mempool,] str, [{flags_table}])
  725. * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()`
  726. * @param {string} text that contains URL (can also contain other stuff)
  727. * @return {url} new url object that exists as long as the corresponding mempool exists
  728. */
  729. static int
  730. lua_url_create(lua_State *L)
  731. {
  732. LUA_TRACE_POINT;
  733. rspamd_mempool_t *pool;
  734. struct rspamd_lua_text *t;
  735. struct rspamd_lua_url *u;
  736. if (lua_type(L, 1) == LUA_TUSERDATA) {
  737. pool = rspamd_lua_check_mempool(L, 1);
  738. t = lua_check_text_or_string(L, 2);
  739. }
  740. else {
  741. pool = static_lua_url_pool;
  742. t = lua_check_text_or_string(L, 1);
  743. }
  744. if (pool == NULL) {
  745. return luaL_error(L, "invalid arguments: mempool is expected as the second argument");
  746. }
  747. if (t == NULL) {
  748. return luaL_error(L, "invalid arguments: string/text is expected as the first argument");
  749. }
  750. rspamd_url_find_single(pool, t->start, t->len, RSPAMD_URL_FIND_ALL,
  751. lua_url_single_inserter, L);
  752. if (lua_type(L, -1) != LUA_TUSERDATA) {
  753. /* URL is actually not found */
  754. lua_pushnil(L);
  755. return 1;
  756. }
  757. u = (struct rspamd_lua_url *) lua_touserdata(L, -1);
  758. if (lua_type(L, 3) == LUA_TTABLE) {
  759. /* Add flags */
  760. for (lua_pushnil(L); lua_next(L, 3); lua_pop(L, 1)) {
  761. int nmask = 0;
  762. const char *fname = lua_tostring(L, -1);
  763. if (rspamd_url_flag_from_string(fname, &nmask)) {
  764. u->url->flags |= nmask;
  765. }
  766. else {
  767. lua_pop(L, 1);
  768. return luaL_error(L, "invalid flag: %s", fname);
  769. }
  770. }
  771. }
  772. return 1;
  773. }
  774. /***
  775. * @function url.init(tld_file)
  776. * Initialize url library if not initialized yet by Rspamd
  777. * @param {string} tld_file path to effective_tld_names.dat file (public suffix list)
  778. * @return nothing
  779. */
  780. static int
  781. lua_url_init(lua_State *L)
  782. {
  783. const char *tld_path;
  784. tld_path = luaL_checkstring(L, 1);
  785. rspamd_url_init(tld_path);
  786. return 0;
  787. }
  788. static gboolean
  789. lua_url_table_inserter(struct rspamd_url *url, gsize start_offset,
  790. gsize end_offset, gpointer ud)
  791. {
  792. lua_State *L = ud;
  793. struct rspamd_lua_url *lua_url;
  794. int n;
  795. n = rspamd_lua_table_size(L, -1);
  796. lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
  797. rspamd_lua_setclass(L, rspamd_url_classname, -1);
  798. lua_url->url = url;
  799. lua_rawseti(L, -2, n + 1);
  800. return TRUE;
  801. }
  802. static int
  803. lua_url_all(lua_State *L)
  804. {
  805. LUA_TRACE_POINT;
  806. rspamd_mempool_t *pool = rspamd_lua_check_mempool(L, 1);
  807. const char *text;
  808. size_t length;
  809. if (pool == NULL) {
  810. lua_pushnil(L);
  811. }
  812. else {
  813. text = luaL_checklstring(L, 2, &length);
  814. if (text != NULL) {
  815. lua_newtable(L);
  816. rspamd_url_find_multiple(pool, text, length,
  817. RSPAMD_URL_FIND_ALL, NULL,
  818. lua_url_table_inserter, L);
  819. }
  820. else {
  821. lua_pushnil(L);
  822. }
  823. }
  824. return 1;
  825. }
  826. /***
  827. * @method url:get_flags()
  828. * Return flags for a specified URL as map 'flag'->true for all flags set,
  829. * possible flags are:
  830. *
  831. * - `phished`: URL is likely phished
  832. * - `numeric`: URL is numeric (e.g. IP address)
  833. * - `obscured`: URL was obscured
  834. * - `redirected`: URL comes from redirector
  835. * - `html_displayed`: URL is used just for displaying purposes
  836. * - `text`: URL comes from the text
  837. * - `subject`: URL comes from the subject
  838. * - `host_encoded`: URL host part is encoded
  839. * - `schema_encoded`: URL schema part is encoded
  840. * - `query_encoded`: URL query part is encoded
  841. * - `missing_slashes`: URL has some slashes missing
  842. * - `idn`: URL has international characters
  843. * - `has_port`: URL has port
  844. * - `has_user`: URL has user part
  845. * - `schemaless`: URL has no schema
  846. * - `unnormalised`: URL has some unicode unnormalities
  847. * - `zw_spaces`: URL has some zero width spaces
  848. * - `url_displayed`: URL has some other url-like string in visible part
  849. * - `image`: URL is from src attribute of img HTML tag
  850. * @return {table} URL flags
  851. */
  852. #define PUSH_FLAG(fl) \
  853. do { \
  854. if (flags & (fl)) { \
  855. lua_pushstring(L, rspamd_url_flag_to_string(fl)); \
  856. lua_pushboolean(L, true); \
  857. lua_settable(L, -3); \
  858. } \
  859. } while (0)
  860. static int
  861. lua_url_get_flags(lua_State *L)
  862. {
  863. LUA_TRACE_POINT;
  864. struct rspamd_lua_url *url = lua_check_url(L, 1);
  865. enum rspamd_url_flags flags;
  866. if (url != NULL) {
  867. flags = url->url->flags;
  868. lua_createtable(L, 0, 4);
  869. for (int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
  870. PUSH_FLAG(1u << i);
  871. }
  872. }
  873. else {
  874. return luaL_error(L, "invalid arguments");
  875. }
  876. return 1;
  877. }
  878. #undef PUSH_FLAG
  879. static int
  880. lua_url_get_flags_num(lua_State *L)
  881. {
  882. LUA_TRACE_POINT;
  883. struct rspamd_lua_url *url = lua_check_url(L, 1);
  884. if (url) {
  885. lua_pushinteger(L, url->url->flags);
  886. }
  887. else {
  888. return luaL_error(L, "invalid arguments");
  889. }
  890. return 1;
  891. }
  892. static int
  893. lua_url_get_order(lua_State *L)
  894. {
  895. LUA_TRACE_POINT;
  896. struct rspamd_lua_url *url = lua_check_url(L, 1);
  897. if (url) {
  898. if (url->url->order != (uint16_t) -1) {
  899. lua_pushinteger(L, url->url->order);
  900. }
  901. else {
  902. lua_pushnil(L);
  903. }
  904. }
  905. else {
  906. return luaL_error(L, "invalid arguments");
  907. }
  908. return 1;
  909. }
  910. static int
  911. lua_url_get_part_order(lua_State *L)
  912. {
  913. LUA_TRACE_POINT;
  914. struct rspamd_lua_url *url = lua_check_url(L, 1);
  915. if (url) {
  916. if (url->url->part_order != (uint16_t) -1) {
  917. lua_pushinteger(L, url->url->part_order);
  918. }
  919. else {
  920. lua_pushnil(L);
  921. }
  922. }
  923. else {
  924. return luaL_error(L, "invalid arguments");
  925. }
  926. return 1;
  927. }
  928. void lua_tree_url_callback(gpointer key, gpointer value, gpointer ud)
  929. {
  930. struct rspamd_lua_url *lua_url;
  931. struct rspamd_url *url = (struct rspamd_url *) value;
  932. struct lua_tree_cb_data *cb = ud;
  933. if ((url->protocol & cb->protocols_mask) == url->protocol) {
  934. /* Handle different flags application logic */
  935. switch (cb->flags_mode) {
  936. case url_flags_mode_include_any:
  937. if (url->flags != (url->flags & cb->flags_mask)) {
  938. return;
  939. }
  940. break;
  941. case url_flags_mode_include_explicit:
  942. if ((url->flags & cb->flags_mask) != cb->flags_mask) {
  943. return;
  944. }
  945. break;
  946. case url_flags_mode_exclude_include:
  947. if ((url->flags & cb->flags_exclude_mask) != 0) {
  948. return;
  949. }
  950. if ((url->flags & cb->flags_mask) == 0) {
  951. return;
  952. }
  953. break;
  954. }
  955. if (cb->skip_prob > 0) {
  956. double coin = rspamd_random_double_fast_seed(&cb->random_seed);
  957. if (coin < cb->skip_prob) {
  958. return;
  959. }
  960. }
  961. lua_url = lua_newuserdata(cb->L, sizeof(struct rspamd_lua_url));
  962. lua_pushvalue(cb->L, cb->metatable_pos);
  963. lua_setmetatable(cb->L, -2);
  964. lua_url->url = url;
  965. lua_rawseti(cb->L, -2, cb->i++);
  966. }
  967. }
  968. gboolean
  969. lua_url_cbdata_fill(lua_State *L,
  970. int pos,
  971. struct lua_tree_cb_data *cbd,
  972. unsigned int default_protocols,
  973. unsigned int default_flags,
  974. gsize max_urls)
  975. {
  976. int protocols_mask = 0;
  977. int pos_arg_type = lua_type(L, pos);
  978. unsigned int flags_mask = default_flags;
  979. gboolean seen_flags = FALSE, seen_protocols = FALSE;
  980. memset(cbd, 0, sizeof(*cbd));
  981. cbd->flags_mode = url_flags_mode_include_any;
  982. if (pos_arg_type == LUA_TBOOLEAN) {
  983. protocols_mask = default_protocols;
  984. if (lua_toboolean(L, 2)) {
  985. protocols_mask |= PROTOCOL_MAILTO;
  986. }
  987. }
  988. else if (pos_arg_type == LUA_TTABLE) {
  989. if (rspamd_lua_geti(L, 1, pos) == LUA_TNIL) {
  990. /* New method: indexed table */
  991. lua_getfield(L, pos, "flags");
  992. if (lua_istable(L, -1)) {
  993. int top = lua_gettop(L);
  994. lua_getfield(L, pos, "flags_mode");
  995. if (lua_isstring(L, -1)) {
  996. const char *mode_str = lua_tostring(L, -1);
  997. if (strcmp(mode_str, "explicit") == 0) {
  998. cbd->flags_mode = url_flags_mode_include_explicit;
  999. /*
  1000. * Ignore default flags in this mode and include
  1001. * merely flags specified by a caller
  1002. */
  1003. flags_mask = 0;
  1004. }
  1005. }
  1006. lua_pop(L, 1);
  1007. for (lua_pushnil(L); lua_next(L, top); lua_pop(L, 1)) {
  1008. int nmask = 0;
  1009. if (lua_type(L, -1) == LUA_TSTRING) {
  1010. const char *fname = lua_tostring(L, -1);
  1011. if (rspamd_url_flag_from_string(fname, &nmask)) {
  1012. flags_mask |= nmask;
  1013. }
  1014. else {
  1015. msg_info("bad url flag: %s", fname);
  1016. return FALSE;
  1017. }
  1018. }
  1019. else {
  1020. flags_mask |= lua_tointeger(L, -1);
  1021. }
  1022. }
  1023. seen_flags = TRUE;
  1024. }
  1025. else {
  1026. flags_mask |= default_flags;
  1027. }
  1028. lua_pop(L, 1);
  1029. lua_getfield(L, pos, "protocols");
  1030. if (lua_istable(L, -1)) {
  1031. int top = lua_gettop(L);
  1032. for (lua_pushnil(L); lua_next(L, top); lua_pop(L, 1)) {
  1033. int nmask;
  1034. const char *pname = lua_tostring(L, -1);
  1035. nmask = rspamd_url_protocol_from_string(pname);
  1036. if (nmask != PROTOCOL_UNKNOWN) {
  1037. protocols_mask |= nmask;
  1038. }
  1039. else {
  1040. msg_info("bad url protocol: %s", pname);
  1041. return FALSE;
  1042. }
  1043. }
  1044. seen_protocols = TRUE;
  1045. }
  1046. else {
  1047. protocols_mask = default_protocols;
  1048. }
  1049. lua_pop(L, 1);
  1050. if (!seen_protocols) {
  1051. lua_getfield(L, pos, "emails");
  1052. if (lua_isboolean(L, -1)) {
  1053. if (lua_toboolean(L, -1)) {
  1054. protocols_mask |= PROTOCOL_MAILTO;
  1055. }
  1056. }
  1057. lua_pop(L, 1);
  1058. }
  1059. if (!seen_flags) {
  1060. lua_getfield(L, pos, "images");
  1061. if (lua_isboolean(L, -1)) {
  1062. if (lua_toboolean(L, -1)) {
  1063. flags_mask |= RSPAMD_URL_FLAG_IMAGE;
  1064. }
  1065. else {
  1066. flags_mask &= ~RSPAMD_URL_FLAG_IMAGE;
  1067. }
  1068. }
  1069. else {
  1070. flags_mask &= ~RSPAMD_URL_FLAG_IMAGE;
  1071. }
  1072. lua_pop(L, 1);
  1073. }
  1074. if (!seen_flags) {
  1075. lua_getfield(L, pos, "content");
  1076. if (lua_isboolean(L, -1)) {
  1077. if (lua_toboolean(L, -1)) {
  1078. flags_mask |= RSPAMD_URL_FLAG_CONTENT;
  1079. }
  1080. else {
  1081. flags_mask &= ~RSPAMD_URL_FLAG_CONTENT;
  1082. }
  1083. }
  1084. else {
  1085. flags_mask &= ~RSPAMD_URL_FLAG_CONTENT;
  1086. }
  1087. lua_pop(L, 1);
  1088. }
  1089. lua_getfield(L, pos, "max_urls");
  1090. if (lua_isnumber(L, -1)) {
  1091. max_urls = lua_tonumber(L, -1);
  1092. }
  1093. lua_pop(L, 1);
  1094. lua_getfield(L, pos, "sort");
  1095. if (lua_isboolean(L, -1)) {
  1096. cbd->sort = TRUE;
  1097. }
  1098. lua_pop(L, 1);
  1099. }
  1100. else {
  1101. /* Plain table of the protocols */
  1102. for (lua_pushnil(L); lua_next(L, pos); lua_pop(L, 1)) {
  1103. int nmask;
  1104. const char *pname = lua_tostring(L, -1);
  1105. nmask = rspamd_url_protocol_from_string(pname);
  1106. if (nmask != PROTOCOL_UNKNOWN) {
  1107. protocols_mask |= nmask;
  1108. }
  1109. else {
  1110. msg_info("bad url protocol: %s", pname);
  1111. return FALSE;
  1112. }
  1113. }
  1114. }
  1115. lua_pop(L, 1); /* After rspamd_lua_geti */
  1116. }
  1117. else if (pos_arg_type == LUA_TSTRING) {
  1118. const char *plist = lua_tostring(L, pos);
  1119. char **strvec;
  1120. char *const *cvec;
  1121. strvec = g_strsplit_set(plist, ",;", -1);
  1122. cvec = strvec;
  1123. while (*cvec) {
  1124. int nmask;
  1125. nmask = rspamd_url_protocol_from_string(*cvec);
  1126. if (nmask != PROTOCOL_UNKNOWN) {
  1127. protocols_mask |= nmask;
  1128. }
  1129. else {
  1130. msg_info("bad url protocol: %s", *cvec);
  1131. g_strfreev(strvec);
  1132. return FALSE;
  1133. }
  1134. cvec++;
  1135. }
  1136. g_strfreev(strvec);
  1137. }
  1138. else if (pos_arg_type == LUA_TNONE || pos_arg_type == LUA_TNIL) {
  1139. protocols_mask = default_protocols;
  1140. flags_mask = default_flags;
  1141. }
  1142. else {
  1143. return FALSE;
  1144. }
  1145. if (lua_type(L, pos + 1) == LUA_TBOOLEAN) {
  1146. if (lua_toboolean(L, pos + 1)) {
  1147. flags_mask |= RSPAMD_URL_FLAG_IMAGE;
  1148. }
  1149. else {
  1150. flags_mask &= ~RSPAMD_URL_FLAG_IMAGE;
  1151. }
  1152. }
  1153. cbd->i = 1;
  1154. cbd->L = L;
  1155. cbd->max_urls = max_urls;
  1156. cbd->protocols_mask = protocols_mask;
  1157. cbd->flags_mask = flags_mask;
  1158. /* This needs to be removed from the stack */
  1159. rspamd_lua_class_metatable(L, rspamd_url_classname);
  1160. cbd->metatable_pos = lua_gettop(L);
  1161. (void) lua_checkstack(L, cbd->metatable_pos + 4);
  1162. return TRUE;
  1163. }
  1164. gboolean
  1165. lua_url_cbdata_fill_exclude_include(lua_State *L,
  1166. int pos,
  1167. struct lua_tree_cb_data *cbd,
  1168. unsigned int default_protocols,
  1169. gsize max_urls)
  1170. {
  1171. unsigned int protocols_mask = default_protocols;
  1172. unsigned int include_flags_mask, exclude_flags_mask;
  1173. int pos_arg_type = lua_type(L, pos);
  1174. memset(cbd, 0, sizeof(*cbd));
  1175. cbd->flags_mode = url_flags_mode_exclude_include;
  1176. /* Include flags */
  1177. if (pos_arg_type == LUA_TTABLE) {
  1178. include_flags_mask = 0; /* Reset to no flags */
  1179. for (lua_pushnil(L); lua_next(L, pos); lua_pop(L, 1)) {
  1180. int nmask = 0;
  1181. if (lua_type(L, -1) == LUA_TSTRING) {
  1182. const char *fname = lua_tostring(L, -1);
  1183. if (rspamd_url_flag_from_string(fname, &nmask)) {
  1184. include_flags_mask |= nmask;
  1185. }
  1186. else {
  1187. msg_info("bad url include flag: %s", fname);
  1188. return FALSE;
  1189. }
  1190. }
  1191. else {
  1192. include_flags_mask |= lua_tointeger(L, -1);
  1193. }
  1194. }
  1195. }
  1196. else if (pos_arg_type == LUA_TNIL || pos_arg_type == LUA_TNONE) {
  1197. /* Include all flags */
  1198. include_flags_mask = ~0U;
  1199. }
  1200. else {
  1201. msg_info("bad arguments: wrong include mask");
  1202. return FALSE;
  1203. }
  1204. /* Exclude flags */
  1205. pos_arg_type = lua_type(L, pos + 1);
  1206. if (pos_arg_type == LUA_TTABLE) {
  1207. exclude_flags_mask = 0; /* Reset to no flags */
  1208. for (lua_pushnil(L); lua_next(L, pos + 1); lua_pop(L, 1)) {
  1209. int nmask = 0;
  1210. if (lua_type(L, -1) == LUA_TSTRING) {
  1211. const char *fname = lua_tostring(L, -1);
  1212. if (rspamd_url_flag_from_string(fname, &nmask)) {
  1213. exclude_flags_mask |= nmask;
  1214. }
  1215. else {
  1216. msg_info("bad url exclude flag: %s", fname);
  1217. return FALSE;
  1218. }
  1219. }
  1220. else {
  1221. exclude_flags_mask |= lua_tointeger(L, -1);
  1222. }
  1223. }
  1224. }
  1225. else if (pos_arg_type == LUA_TNIL || pos_arg_type == LUA_TNONE) {
  1226. /* Empty all exclude flags */
  1227. exclude_flags_mask = 0U;
  1228. }
  1229. else {
  1230. msg_info("bad arguments: wrong exclude mask");
  1231. return FALSE;
  1232. }
  1233. if (lua_type(L, pos + 2) == LUA_TTABLE) {
  1234. protocols_mask = 0U; /* Reset all protocols */
  1235. for (lua_pushnil(L); lua_next(L, pos + 2); lua_pop(L, 1)) {
  1236. int nmask;
  1237. const char *pname = lua_tostring(L, -1);
  1238. nmask = rspamd_url_protocol_from_string(pname);
  1239. if (nmask != PROTOCOL_UNKNOWN) {
  1240. protocols_mask |= nmask;
  1241. }
  1242. else {
  1243. msg_info("bad url protocol: %s", pname);
  1244. return FALSE;
  1245. }
  1246. }
  1247. }
  1248. else {
  1249. protocols_mask = default_protocols;
  1250. }
  1251. cbd->i = 1;
  1252. cbd->L = L;
  1253. cbd->max_urls = max_urls;
  1254. cbd->protocols_mask = protocols_mask;
  1255. cbd->flags_mask = include_flags_mask;
  1256. cbd->flags_exclude_mask = exclude_flags_mask;
  1257. /* This needs to be removed from the stack */
  1258. rspamd_lua_class_metatable(L, rspamd_url_classname);
  1259. cbd->metatable_pos = lua_gettop(L);
  1260. (void) lua_checkstack(L, cbd->metatable_pos + 4);
  1261. return TRUE;
  1262. }
  1263. void lua_url_cbdata_dtor(struct lua_tree_cb_data *cbd)
  1264. {
  1265. if (cbd->metatable_pos != -1) {
  1266. lua_remove(cbd->L, cbd->metatable_pos);
  1267. }
  1268. }
  1269. gsize lua_url_adjust_skip_prob(float timestamp,
  1270. unsigned char digest[16],
  1271. struct lua_tree_cb_data *cb,
  1272. gsize sz)
  1273. {
  1274. if (cb->max_urls > 0 && sz > cb->max_urls) {
  1275. cb->skip_prob = 1.0 - ((double) cb->max_urls) / (double) sz;
  1276. /*
  1277. * Use task dependent probabilistic seed to ensure that
  1278. * consequent task:get_urls return the same list of urls
  1279. * We use both digest and timestamp here to avoid attack surface
  1280. * based just on digest.
  1281. */
  1282. memcpy(&cb->random_seed, digest, 4);
  1283. memcpy(((unsigned char *) &cb->random_seed) + 4, &timestamp, 4);
  1284. sz = cb->max_urls;
  1285. }
  1286. return sz;
  1287. }
  1288. static int
  1289. lua_url_eq(lua_State *L)
  1290. {
  1291. LUA_TRACE_POINT;
  1292. struct rspamd_lua_url *u1 = lua_check_url(L, 1),
  1293. *u2 = lua_check_url(L, 2);
  1294. if (u1 && u2) {
  1295. lua_pushboolean(L, (rspamd_url_cmp(u1->url, u2->url) == 0));
  1296. }
  1297. else {
  1298. lua_pushboolean(L, false);
  1299. }
  1300. return 1;
  1301. }
  1302. static int
  1303. lua_url_lt(lua_State *L)
  1304. {
  1305. LUA_TRACE_POINT;
  1306. struct rspamd_lua_url *u1 = lua_check_url(L, 1),
  1307. *u2 = lua_check_url(L, 2);
  1308. if (u1 && u2) {
  1309. lua_pushinteger(L, rspamd_url_cmp(u1->url, u2->url));
  1310. }
  1311. else {
  1312. return luaL_error(L, "invalid arguments");
  1313. }
  1314. return 1;
  1315. }
  1316. static int
  1317. lua_load_url(lua_State *L)
  1318. {
  1319. lua_newtable(L);
  1320. luaL_register(L, NULL, urllib_f);
  1321. /* Push flags */
  1322. lua_createtable(L, 0, RSPAMD_URL_MAX_FLAG_SHIFT);
  1323. for (int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
  1324. unsigned int flag = 1u << i;
  1325. lua_pushinteger(L, flag);
  1326. lua_setfield(L, -2, rspamd_url_flag_to_string(flag));
  1327. }
  1328. lua_setfield(L, -2, "flags");
  1329. return 1;
  1330. }
  1331. void luaopen_url(lua_State *L)
  1332. {
  1333. rspamd_lua_new_class(L, rspamd_url_classname, urllib_m);
  1334. lua_pop(L, 1);
  1335. rspamd_lua_add_preload(L, "rspamd_url", lua_load_url);
  1336. }