You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_url.c 32KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "lua_url.h"
  18. /***
  19. * @module rspamd_url
  20. * This module provides routines to handle URL's and extract URL's from the text.
  21. * Objects of this class are returned, for example, by `task:get_urls()` or `task:get_emails()`.
  22. * You can also create `rspamd_url` from any text.
  23. * @example
  24. local url = require "rspamd_url"
  25. local mpool = require "rspamd_mempool"
  26. url.init("/usr/share/rspamd/effective_tld_names.dat")
  27. local pool = mpool.create()
  28. local res = url.create(pool, 'Look at: http://user@test.example.com/test?query")
  29. local t = res:to_table()
  30. -- Content of t:
  31. -- url = ['http://test.example.com/test?query']
  32. -- host = ['test.example.com']
  33. -- user = ['user']
  34. -- path = ['test']
  35. -- tld = ['example.com']
  36. pool:destroy() -- res is destroyed here, so you should not use it afterwards
  37. local mistake = res:to_table() -- INVALID! as pool is destroyed
  38. */
  39. /* URL methods */
  40. LUA_FUNCTION_DEF(url, get_length);
  41. LUA_FUNCTION_DEF(url, get_host);
  42. LUA_FUNCTION_DEF(url, get_port);
  43. LUA_FUNCTION_DEF(url, get_user);
  44. LUA_FUNCTION_DEF(url, get_path);
  45. LUA_FUNCTION_DEF(url, get_query);
  46. LUA_FUNCTION_DEF(url, get_fragment);
  47. LUA_FUNCTION_DEF(url, get_text);
  48. LUA_FUNCTION_DEF(url, tostring);
  49. LUA_FUNCTION_DEF(url, get_raw);
  50. LUA_FUNCTION_DEF(url, get_tld);
  51. LUA_FUNCTION_DEF(url, get_flags);
  52. LUA_FUNCTION_DEF(url, get_flags_num);
  53. LUA_FUNCTION_DEF(url, get_protocol);
  54. LUA_FUNCTION_DEF(url, to_table);
  55. LUA_FUNCTION_DEF(url, is_phished);
  56. LUA_FUNCTION_DEF(url, is_redirected);
  57. LUA_FUNCTION_DEF(url, is_obscured);
  58. LUA_FUNCTION_DEF(url, is_html_displayed);
  59. LUA_FUNCTION_DEF(url, is_subject);
  60. LUA_FUNCTION_DEF(url, get_phished);
  61. LUA_FUNCTION_DEF(url, set_redirected);
  62. LUA_FUNCTION_DEF(url, get_count);
  63. LUA_FUNCTION_DEF(url, get_visible);
  64. LUA_FUNCTION_DEF(url, create);
  65. LUA_FUNCTION_DEF(url, init);
  66. LUA_FUNCTION_DEF(url, all);
  67. LUA_FUNCTION_DEF(url, lt);
  68. LUA_FUNCTION_DEF(url, eq);
  69. LUA_FUNCTION_DEF(url, get_order);
  70. LUA_FUNCTION_DEF(url, get_part_order);
  71. static const struct luaL_reg urllib_m[] = {
  72. LUA_INTERFACE_DEF(url, get_length),
  73. LUA_INTERFACE_DEF(url, get_host),
  74. LUA_INTERFACE_DEF(url, get_port),
  75. LUA_INTERFACE_DEF(url, get_user),
  76. LUA_INTERFACE_DEF(url, get_path),
  77. LUA_INTERFACE_DEF(url, get_query),
  78. LUA_INTERFACE_DEF(url, get_fragment),
  79. LUA_INTERFACE_DEF(url, get_text),
  80. LUA_INTERFACE_DEF(url, get_tld),
  81. LUA_INTERFACE_DEF(url, get_raw),
  82. LUA_INTERFACE_DEF(url, get_protocol),
  83. LUA_INTERFACE_DEF(url, to_table),
  84. LUA_INTERFACE_DEF(url, is_phished),
  85. LUA_INTERFACE_DEF(url, is_redirected),
  86. LUA_INTERFACE_DEF(url, is_obscured),
  87. LUA_INTERFACE_DEF(url, is_html_displayed),
  88. LUA_INTERFACE_DEF(url, is_subject),
  89. LUA_INTERFACE_DEF(url, get_phished),
  90. LUA_INTERFACE_DEF(url, get_visible),
  91. LUA_INTERFACE_DEF(url, get_count),
  92. LUA_INTERFACE_DEF(url, get_flags),
  93. LUA_INTERFACE_DEF(url, get_flags_num),
  94. LUA_INTERFACE_DEF(url, get_order),
  95. LUA_INTERFACE_DEF(url, get_part_order),
  96. {"get_redirected", lua_url_get_phished},
  97. LUA_INTERFACE_DEF(url, set_redirected),
  98. {"__tostring", lua_url_tostring},
  99. {"__eq", lua_url_eq},
  100. {"__lt", lua_url_lt},
  101. {NULL, NULL}};
  102. static const struct luaL_reg urllib_f[] = {
  103. LUA_INTERFACE_DEF(url, init),
  104. LUA_INTERFACE_DEF(url, create),
  105. LUA_INTERFACE_DEF(url, all),
  106. {NULL, NULL}};
  107. struct rspamd_lua_url *
  108. lua_check_url(lua_State *L, gint pos)
  109. {
  110. void *ud = rspamd_lua_check_udata(L, pos, "rspamd{url}");
  111. luaL_argcheck(L, ud != NULL, pos, "'url' expected");
  112. return ud ? ((struct rspamd_lua_url *) ud) : NULL;
  113. }
  114. static gboolean
  115. lua_url_single_inserter(struct rspamd_url *url, gsize start_offset,
  116. gsize end_offset, gpointer ud)
  117. {
  118. lua_State *L = ud;
  119. struct rspamd_lua_url *lua_url;
  120. lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
  121. rspamd_lua_setclass(L, "rspamd{url}", -1);
  122. lua_url->url = url;
  123. return TRUE;
  124. }
  125. /***
  126. * @method url:get_length()
  127. * Get length of the url
  128. * @return {number} length of url in bytes
  129. */
  130. static gint
  131. lua_url_get_length(lua_State *L)
  132. {
  133. LUA_TRACE_POINT;
  134. struct rspamd_lua_url *url = lua_check_url(L, 1);
  135. if (url != NULL) {
  136. lua_pushinteger(L, url->url->urllen);
  137. }
  138. else {
  139. lua_pushnil(L);
  140. }
  141. return 1;
  142. }
  143. /***
  144. * @method url:get_host()
  145. * Get domain part of the url
  146. * @return {string} domain part of URL
  147. */
  148. static gint
  149. lua_url_get_host(lua_State *L)
  150. {
  151. LUA_TRACE_POINT;
  152. struct rspamd_lua_url *url = lua_check_url(L, 1);
  153. if (url != NULL && url->url && url->url->hostlen > 0) {
  154. lua_pushlstring(L, rspamd_url_host(url->url), url->url->hostlen);
  155. }
  156. else {
  157. lua_pushnil(L);
  158. }
  159. return 1;
  160. }
  161. /***
  162. * @method url:get_port()
  163. * Get port of the url
  164. * @return {number} url port
  165. */
  166. static gint
  167. lua_url_get_port(lua_State *L)
  168. {
  169. LUA_TRACE_POINT;
  170. struct rspamd_lua_url *url = lua_check_url(L, 1);
  171. if (url != NULL) {
  172. if (rspamd_url_get_port_if_special(url->url) == 0) {
  173. lua_pushnil(L);
  174. }
  175. else {
  176. lua_pushinteger(L, rspamd_url_get_port_if_special(url->url));
  177. }
  178. }
  179. else {
  180. lua_pushnil(L);
  181. }
  182. return 1;
  183. }
  184. /***
  185. * @method url:get_user()
  186. * Get user part of the url (e.g. username in email)
  187. * @return {string} user part of URL
  188. */
  189. static gint
  190. lua_url_get_user(lua_State *L)
  191. {
  192. LUA_TRACE_POINT;
  193. struct rspamd_lua_url *url = lua_check_url(L, 1);
  194. if (url != NULL && rspamd_url_user(url->url) != NULL) {
  195. lua_pushlstring(L, rspamd_url_user(url->url), url->url->userlen);
  196. }
  197. else {
  198. lua_pushnil(L);
  199. }
  200. return 1;
  201. }
  202. /***
  203. * @method url:get_path()
  204. * Get path of the url
  205. * @return {string} path part of URL
  206. */
  207. static gint
  208. lua_url_get_path(lua_State *L)
  209. {
  210. LUA_TRACE_POINT;
  211. struct rspamd_lua_url *url = lua_check_url(L, 1);
  212. if (url != NULL && url->url->datalen > 0) {
  213. lua_pushlstring(L, rspamd_url_data_unsafe(url->url), url->url->datalen);
  214. }
  215. else {
  216. lua_pushnil(L);
  217. }
  218. return 1;
  219. }
  220. /***
  221. * @method url:get_query()
  222. * Get query of the url
  223. * @return {string} query part of URL
  224. */
  225. static gint
  226. lua_url_get_query(lua_State *L)
  227. {
  228. LUA_TRACE_POINT;
  229. struct rspamd_lua_url *url = lua_check_url(L, 1);
  230. if (url != NULL && url->url->querylen > 0) {
  231. lua_pushlstring(L, rspamd_url_query_unsafe(url->url), url->url->querylen);
  232. }
  233. else {
  234. lua_pushnil(L);
  235. }
  236. return 1;
  237. }
  238. /***
  239. * @method url:get_fragment()
  240. * Get fragment of the url
  241. * @return {string} fragment part of URL
  242. */
  243. static gint
  244. lua_url_get_fragment(lua_State *L)
  245. {
  246. LUA_TRACE_POINT;
  247. struct rspamd_lua_url *url = lua_check_url(L, 1);
  248. if (url != NULL && url->url->fragmentlen > 0) {
  249. lua_pushlstring(L, rspamd_url_fragment_unsafe(url->url), url->url->fragmentlen);
  250. }
  251. else {
  252. lua_pushnil(L);
  253. }
  254. return 1;
  255. }
  256. /***
  257. * @method url:get_text()
  258. * Get full content of the url
  259. * @return {string} url string
  260. */
  261. static gint
  262. lua_url_get_text(lua_State *L)
  263. {
  264. LUA_TRACE_POINT;
  265. struct rspamd_lua_url *url = lua_check_url(L, 1);
  266. if (url != NULL) {
  267. lua_pushlstring(L, url->url->string, url->url->urllen);
  268. }
  269. else {
  270. lua_pushnil(L);
  271. }
  272. return 1;
  273. }
  274. /***
  275. * @method url:tostring()
  276. * Get full content of the url or user@domain in case of email
  277. * @return {string} url as a string
  278. */
  279. static gint
  280. lua_url_tostring(lua_State *L)
  281. {
  282. LUA_TRACE_POINT;
  283. struct rspamd_lua_url *url = lua_check_url(L, 1);
  284. if (url != NULL && url->url != NULL) {
  285. if (url->url->protocol == PROTOCOL_MAILTO) {
  286. gchar *tmp = g_malloc(url->url->userlen + 1 +
  287. url->url->hostlen);
  288. if (url->url->userlen) {
  289. memcpy(tmp, url->url->string + url->url->usershift, url->url->userlen);
  290. }
  291. tmp[url->url->userlen] = '@';
  292. memcpy(tmp + url->url->userlen + 1, rspamd_url_host_unsafe(url->url),
  293. url->url->hostlen);
  294. lua_pushlstring(L, tmp, url->url->userlen + 1 + url->url->hostlen);
  295. g_free(tmp);
  296. }
  297. else {
  298. lua_pushlstring(L, url->url->string, url->url->urllen);
  299. }
  300. }
  301. else {
  302. lua_pushnil(L);
  303. }
  304. return 1;
  305. }
  306. /***
  307. * @method url:get_raw()
  308. * Get full content of the url as it was parsed (e.g. with urldecode)
  309. * @return {string} url string
  310. */
  311. static gint
  312. lua_url_get_raw(lua_State *L)
  313. {
  314. LUA_TRACE_POINT;
  315. struct rspamd_lua_url *url = lua_check_url(L, 1);
  316. if (url != NULL) {
  317. lua_pushlstring(L, url->url->raw, url->url->rawlen);
  318. }
  319. else {
  320. lua_pushnil(L);
  321. }
  322. return 1;
  323. }
  324. /***
  325. * @method url:is_phished()
  326. * Check whether URL is treated as phished
  327. * @return {boolean} `true` if URL is phished
  328. */
  329. static gint
  330. lua_url_is_phished(lua_State *L)
  331. {
  332. LUA_TRACE_POINT;
  333. struct rspamd_lua_url *url = lua_check_url(L, 1);
  334. if (url != NULL) {
  335. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_PHISHED);
  336. }
  337. else {
  338. lua_pushnil(L);
  339. }
  340. return 1;
  341. }
  342. /***
  343. * @method url:is_redirected()
  344. * Check whether URL was redirected
  345. * @return {boolean} `true` if URL is redirected
  346. */
  347. static gint
  348. lua_url_is_redirected(lua_State *L)
  349. {
  350. LUA_TRACE_POINT;
  351. struct rspamd_lua_url *url = lua_check_url(L, 1);
  352. if (url != NULL) {
  353. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_REDIRECTED);
  354. }
  355. else {
  356. lua_pushnil(L);
  357. }
  358. return 1;
  359. }
  360. /***
  361. * @method url:is_obscured()
  362. * Check whether URL is treated as obscured or obfuscated (e.g. numbers in IP address or other hacks)
  363. * @return {boolean} `true` if URL is obscured
  364. */
  365. static gint
  366. lua_url_is_obscured(lua_State *L)
  367. {
  368. LUA_TRACE_POINT;
  369. struct rspamd_lua_url *url = lua_check_url(L, 1);
  370. if (url != NULL) {
  371. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_OBSCURED);
  372. }
  373. else {
  374. lua_pushnil(L);
  375. }
  376. return 1;
  377. }
  378. /***
  379. * @method url:is_html_displayed()
  380. * Check whether URL is just displayed in HTML (e.g. NOT a real href)
  381. * @return {boolean} `true` if URL is displayed only
  382. */
  383. static gint
  384. lua_url_is_html_displayed(lua_State *L)
  385. {
  386. LUA_TRACE_POINT;
  387. struct rspamd_lua_url *url = lua_check_url(L, 1);
  388. if (url != NULL) {
  389. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED);
  390. }
  391. else {
  392. lua_pushnil(L);
  393. }
  394. return 1;
  395. }
  396. /***
  397. * @method url:is_subject()
  398. * Check whether URL is found in subject
  399. * @return {boolean} `true` if URL is found in subject
  400. */
  401. static gint
  402. lua_url_is_subject(lua_State *L)
  403. {
  404. LUA_TRACE_POINT;
  405. struct rspamd_lua_url *url = lua_check_url(L, 1);
  406. if (url != NULL) {
  407. lua_pushboolean(L, url->url->flags & RSPAMD_URL_FLAG_SUBJECT);
  408. }
  409. else {
  410. lua_pushnil(L);
  411. }
  412. return 1;
  413. }
  414. /***
  415. * @method url:get_phished()
  416. * Get another URL that pretends to be this URL (e.g. used in phishing)
  417. * @return {url} phished URL
  418. */
  419. static gint
  420. lua_url_get_phished(lua_State *L)
  421. {
  422. LUA_TRACE_POINT;
  423. struct rspamd_lua_url *purl, *url = lua_check_url(L, 1);
  424. if (url) {
  425. if (url->url->ext && url->url->ext->linked_url != NULL) {
  426. /* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
  427. if (url->url->flags &
  428. (RSPAMD_URL_FLAG_PHISHED | RSPAMD_URL_FLAG_REDIRECTED)) {
  429. purl = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
  430. rspamd_lua_setclass(L, "rspamd{url}", -1);
  431. purl->url = url->url->ext->linked_url;
  432. return 1;
  433. }
  434. }
  435. }
  436. lua_pushnil(L);
  437. return 1;
  438. }
  439. /***
  440. * @method url:set_redirected(url,[ pool])
  441. * Set url as redirected to another url
  442. * @param {string|url} url new url that is redirecting an old one
  443. * @param {pool} pool if url is a string this is required for parsing
  444. * @return {url} parsed redirected url (if needed)
  445. */
  446. static gint
  447. lua_url_set_redirected(lua_State *L)
  448. {
  449. LUA_TRACE_POINT;
  450. struct rspamd_lua_url *url = lua_check_url(L, 1), *redir;
  451. rspamd_mempool_t *pool = NULL;
  452. if (url == NULL) {
  453. return luaL_error(L, "url is required as the first argument");
  454. }
  455. if (lua_type(L, 2) == LUA_TSTRING) {
  456. /* Parse url */
  457. if (lua_type(L, 3) != LUA_TUSERDATA) {
  458. return luaL_error(L, "mempool is required as the third argument");
  459. }
  460. pool = rspamd_lua_check_mempool(L, 3);
  461. if (pool == NULL) {
  462. return luaL_error(L, "mempool is required as the third argument");
  463. }
  464. gsize len;
  465. const gchar *urlstr = lua_tolstring(L, 2, &len);
  466. rspamd_url_find_single(pool, urlstr, len, RSPAMD_URL_FIND_ALL,
  467. lua_url_single_inserter, L);
  468. if (lua_type(L, -1) != LUA_TUSERDATA) {
  469. /* URL is actually not found */
  470. lua_pushnil(L);
  471. }
  472. else {
  473. redir = lua_check_url(L, -1);
  474. url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  475. if (url->url->ext == NULL) {
  476. url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
  477. }
  478. url->url->ext->linked_url = redir->url;
  479. }
  480. }
  481. else {
  482. redir = lua_check_url(L, 2);
  483. if (redir == NULL) {
  484. return luaL_error(L, "url is required as the second argument");
  485. }
  486. url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  487. if (url->url->ext == NULL) {
  488. url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
  489. }
  490. url->url->ext->linked_url = redir->url;
  491. /* Push back on stack */
  492. lua_pushvalue(L, 2);
  493. }
  494. return 1;
  495. }
  496. /***
  497. * @method url:get_tld()
  498. * Get effective second level domain part (eSLD) of the url host
  499. * @return {string} effective second level domain part (eSLD) of the url host
  500. */
  501. static gint
  502. lua_url_get_tld(lua_State *L)
  503. {
  504. LUA_TRACE_POINT;
  505. struct rspamd_lua_url *url = lua_check_url(L, 1);
  506. if (url != NULL && url->url->tldlen > 0) {
  507. lua_pushlstring(L, rspamd_url_tld_unsafe(url->url), url->url->tldlen);
  508. }
  509. else {
  510. lua_pushnil(L);
  511. }
  512. return 1;
  513. }
  514. /***
  515. * @method url:get_protocol()
  516. * Get protocol name
  517. * @return {string} protocol as a string
  518. */
  519. static gint
  520. lua_url_get_protocol(lua_State *L)
  521. {
  522. LUA_TRACE_POINT;
  523. struct rspamd_lua_url *url = lua_check_url(L, 1);
  524. if (url != NULL && url->url->protocol != PROTOCOL_UNKNOWN) {
  525. lua_pushstring(L, rspamd_url_protocol_name(url->url->protocol));
  526. }
  527. else {
  528. lua_pushnil(L);
  529. }
  530. return 1;
  531. }
  532. /***
  533. * @method url:get_count()
  534. * Return number of occurrences for this particular URL
  535. * @return {number} number of occurrences
  536. */
  537. static gint
  538. lua_url_get_count(lua_State *L)
  539. {
  540. LUA_TRACE_POINT;
  541. struct rspamd_lua_url *url = lua_check_url(L, 1);
  542. if (url != NULL && url->url != NULL) {
  543. lua_pushinteger(L, url->url->count);
  544. }
  545. else {
  546. lua_pushnil(L);
  547. }
  548. return 1;
  549. }
  550. /***
  551. * @method url:get_visible()
  552. * Get visible part of the url with html tags stripped
  553. * @return {string} url string
  554. */
  555. static gint
  556. lua_url_get_visible(lua_State *L)
  557. {
  558. LUA_TRACE_POINT;
  559. struct rspamd_lua_url *url = lua_check_url(L, 1);
  560. if (url != NULL && url->url->ext && url->url->ext->visible_part) {
  561. lua_pushstring(L, url->url->ext->visible_part);
  562. }
  563. else {
  564. lua_pushnil(L);
  565. }
  566. return 1;
  567. }
  568. /***
  569. * @method url:to_table()
  570. * Return url as a table with the following fields:
  571. *
  572. * - `url`: full content
  573. * - `host`: hostname part
  574. * - `user`: user part
  575. * - `path`: path part
  576. * - `tld`: top level domain
  577. * - `protocol`: url protocol
  578. * @return {table} URL as a table
  579. */
  580. static gint
  581. lua_url_to_table(lua_State *L)
  582. {
  583. LUA_TRACE_POINT;
  584. struct rspamd_lua_url *url = lua_check_url(L, 1);
  585. struct rspamd_url *u;
  586. if (url != NULL) {
  587. u = url->url;
  588. lua_createtable(L, 0, 12);
  589. lua_pushstring(L, "url");
  590. lua_pushlstring(L, u->string, u->urllen);
  591. lua_settable(L, -3);
  592. if (u->hostlen > 0) {
  593. lua_pushstring(L, "host");
  594. lua_pushlstring(L, rspamd_url_host_unsafe(u), u->hostlen);
  595. lua_settable(L, -3);
  596. }
  597. if (rspamd_url_get_port_if_special(u) != 0) {
  598. lua_pushstring(L, "port");
  599. lua_pushinteger(L, rspamd_url_get_port_if_special(u));
  600. lua_settable(L, -3);
  601. }
  602. if (u->tldlen > 0) {
  603. lua_pushstring(L, "tld");
  604. lua_pushlstring(L, rspamd_url_tld_unsafe(u), u->tldlen);
  605. lua_settable(L, -3);
  606. }
  607. if (u->userlen > 0) {
  608. lua_pushstring(L, "user");
  609. lua_pushlstring(L, rspamd_url_user(u), u->userlen);
  610. lua_settable(L, -3);
  611. }
  612. if (u->datalen > 0) {
  613. lua_pushstring(L, "path");
  614. lua_pushlstring(L, rspamd_url_data_unsafe(u), u->datalen);
  615. lua_settable(L, -3);
  616. }
  617. if (u->querylen > 0) {
  618. lua_pushstring(L, "query");
  619. lua_pushlstring(L, rspamd_url_query_unsafe(u), u->querylen);
  620. lua_settable(L, -3);
  621. }
  622. if (u->fragmentlen > 0) {
  623. lua_pushstring(L, "fragment");
  624. lua_pushlstring(L, rspamd_url_fragment_unsafe(u), u->fragmentlen);
  625. lua_settable(L, -3);
  626. }
  627. lua_pushstring(L, "protocol");
  628. lua_pushstring(L, rspamd_url_protocol_name(u->protocol));
  629. lua_settable(L, -3);
  630. }
  631. else {
  632. lua_pushnil(L);
  633. }
  634. return 1;
  635. }
  636. static rspamd_mempool_t *static_lua_url_pool;
  637. RSPAMD_CONSTRUCTOR(rspamd_urls_static_pool_ctor)
  638. {
  639. static_lua_url_pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
  640. "static_lua_url", 0);
  641. }
  642. RSPAMD_DESTRUCTOR(rspamd_urls_static_pool_dtor)
  643. {
  644. rspamd_mempool_delete(static_lua_url_pool);
  645. }
  646. /***
  647. * @function url.create([mempool,] str, [{flags_table}])
  648. * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()`
  649. * @param {string} text that contains URL (can also contain other stuff)
  650. * @return {url} new url object that exists as long as the corresponding mempool exists
  651. */
  652. static gint
  653. lua_url_create(lua_State *L)
  654. {
  655. LUA_TRACE_POINT;
  656. rspamd_mempool_t *pool;
  657. struct rspamd_lua_text *t;
  658. struct rspamd_lua_url *u;
  659. if (lua_type(L, 1) == LUA_TUSERDATA) {
  660. pool = rspamd_lua_check_mempool(L, 1);
  661. t = lua_check_text_or_string(L, 2);
  662. }
  663. else {
  664. pool = static_lua_url_pool;
  665. t = lua_check_text_or_string(L, 2);
  666. }
  667. if (pool == NULL || t == NULL) {
  668. return luaL_error(L, "invalid arguments");
  669. }
  670. else {
  671. rspamd_url_find_single(pool, t->start, t->len, RSPAMD_URL_FIND_ALL,
  672. lua_url_single_inserter, L);
  673. if (lua_type(L, -1) != LUA_TUSERDATA) {
  674. /* URL is actually not found */
  675. lua_pushnil(L);
  676. return 1;
  677. }
  678. u = (struct rspamd_lua_url *) lua_touserdata(L, -1);
  679. if (lua_type(L, 3) == LUA_TTABLE) {
  680. /* Add flags */
  681. for (lua_pushnil(L); lua_next(L, 3); lua_pop(L, 1)) {
  682. int nmask = 0;
  683. const gchar *fname = lua_tostring(L, -1);
  684. if (rspamd_url_flag_from_string(fname, &nmask)) {
  685. u->url->flags |= nmask;
  686. }
  687. else {
  688. lua_pop(L, 1);
  689. return luaL_error(L, "invalid flag: %s", fname);
  690. }
  691. }
  692. }
  693. }
  694. return 1;
  695. }
  696. /***
  697. * @function url.init(tld_file)
  698. * Initialize url library if not initialized yet by Rspamd
  699. * @param {string} tld_file path to effective_tld_names.dat file (public suffix list)
  700. * @return nothing
  701. */
  702. static gint
  703. lua_url_init(lua_State *L)
  704. {
  705. const gchar *tld_path;
  706. tld_path = luaL_checkstring(L, 1);
  707. rspamd_url_init(tld_path);
  708. return 0;
  709. }
  710. static gboolean
  711. lua_url_table_inserter(struct rspamd_url *url, gsize start_offset,
  712. gsize end_offset, gpointer ud)
  713. {
  714. lua_State *L = ud;
  715. struct rspamd_lua_url *lua_url;
  716. gint n;
  717. n = rspamd_lua_table_size(L, -1);
  718. lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
  719. rspamd_lua_setclass(L, "rspamd{url}", -1);
  720. lua_url->url = url;
  721. lua_rawseti(L, -2, n + 1);
  722. return TRUE;
  723. }
  724. static gint
  725. lua_url_all(lua_State *L)
  726. {
  727. LUA_TRACE_POINT;
  728. rspamd_mempool_t *pool = rspamd_lua_check_mempool(L, 1);
  729. const gchar *text;
  730. size_t length;
  731. if (pool == NULL) {
  732. lua_pushnil(L);
  733. }
  734. else {
  735. text = luaL_checklstring(L, 2, &length);
  736. if (text != NULL) {
  737. lua_newtable(L);
  738. rspamd_url_find_multiple(pool, text, length,
  739. RSPAMD_URL_FIND_ALL, NULL,
  740. lua_url_table_inserter, L);
  741. }
  742. else {
  743. lua_pushnil(L);
  744. }
  745. }
  746. return 1;
  747. }
  748. /***
  749. * @method url:get_flags()
  750. * Return flags for a specified URL as map 'flag'->true for all flags set,
  751. * possible flags are:
  752. *
  753. * - `phished`: URL is likely phished
  754. * - `numeric`: URL is numeric (e.g. IP address)
  755. * - `obscured`: URL was obscured
  756. * - `redirected`: URL comes from redirector
  757. * - `html_displayed`: URL is used just for displaying purposes
  758. * - `text`: URL comes from the text
  759. * - `subject`: URL comes from the subject
  760. * - `host_encoded`: URL host part is encoded
  761. * - `schema_encoded`: URL schema part is encoded
  762. * - `query_encoded`: URL query part is encoded
  763. * - `missing_slashes`: URL has some slashes missing
  764. * - `idn`: URL has international characters
  765. * - `has_port`: URL has port
  766. * - `has_user`: URL has user part
  767. * - `schemaless`: URL has no schema
  768. * - `unnormalised`: URL has some unicode unnormalities
  769. * - `zw_spaces`: URL has some zero width spaces
  770. * - `url_displayed`: URL has some other url-like string in visible part
  771. * - `image`: URL is from src attribute of img HTML tag
  772. * @return {table} URL flags
  773. */
  774. #define PUSH_FLAG(fl) \
  775. do { \
  776. if (flags & (fl)) { \
  777. lua_pushstring(L, rspamd_url_flag_to_string(fl)); \
  778. lua_pushboolean(L, true); \
  779. lua_settable(L, -3); \
  780. } \
  781. } while (0)
  782. static gint
  783. lua_url_get_flags(lua_State *L)
  784. {
  785. LUA_TRACE_POINT;
  786. struct rspamd_lua_url *url = lua_check_url(L, 1);
  787. enum rspamd_url_flags flags;
  788. if (url != NULL) {
  789. flags = url->url->flags;
  790. lua_createtable(L, 0, 4);
  791. for (gint i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
  792. PUSH_FLAG(1u << i);
  793. }
  794. }
  795. else {
  796. return luaL_error(L, "invalid arguments");
  797. }
  798. return 1;
  799. }
  800. #undef PUSH_FLAG
  801. static gint
  802. lua_url_get_flags_num(lua_State *L)
  803. {
  804. LUA_TRACE_POINT;
  805. struct rspamd_lua_url *url = lua_check_url(L, 1);
  806. if (url) {
  807. lua_pushinteger(L, url->url->flags);
  808. }
  809. else {
  810. return luaL_error(L, "invalid arguments");
  811. }
  812. return 1;
  813. }
  814. static gint
  815. lua_url_get_order(lua_State *L)
  816. {
  817. LUA_TRACE_POINT;
  818. struct rspamd_lua_url *url = lua_check_url(L, 1);
  819. if (url) {
  820. if (url->url->order != (uint16_t) -1) {
  821. lua_pushinteger(L, url->url->order);
  822. }
  823. else {
  824. lua_pushnil(L);
  825. }
  826. }
  827. else {
  828. return luaL_error(L, "invalid arguments");
  829. }
  830. return 1;
  831. }
  832. static gint
  833. lua_url_get_part_order(lua_State *L)
  834. {
  835. LUA_TRACE_POINT;
  836. struct rspamd_lua_url *url = lua_check_url(L, 1);
  837. if (url) {
  838. if (url->url->part_order != (uint16_t) -1) {
  839. lua_pushinteger(L, url->url->part_order);
  840. }
  841. else {
  842. lua_pushnil(L);
  843. }
  844. }
  845. else {
  846. return luaL_error(L, "invalid arguments");
  847. }
  848. return 1;
  849. }
  850. void lua_tree_url_callback(gpointer key, gpointer value, gpointer ud)
  851. {
  852. struct rspamd_lua_url *lua_url;
  853. struct rspamd_url *url = (struct rspamd_url *) value;
  854. struct lua_tree_cb_data *cb = ud;
  855. if ((url->protocol & cb->protocols_mask) == url->protocol) {
  856. /* Handle different flags application logic */
  857. switch (cb->flags_mode) {
  858. case url_flags_mode_include_any:
  859. if (url->flags != (url->flags & cb->flags_mask)) {
  860. return;
  861. }
  862. break;
  863. case url_flags_mode_include_explicit:
  864. if ((url->flags & cb->flags_mask) != cb->flags_mask) {
  865. return;
  866. }
  867. break;
  868. case url_flags_mode_exclude_include:
  869. if ((url->flags & cb->flags_exclude_mask) != 0) {
  870. return;
  871. }
  872. if ((url->flags & cb->flags_mask) == 0) {
  873. return;
  874. }
  875. break;
  876. }
  877. if (cb->skip_prob > 0) {
  878. gdouble coin = rspamd_random_double_fast_seed(&cb->random_seed);
  879. if (coin < cb->skip_prob) {
  880. return;
  881. }
  882. }
  883. lua_url = lua_newuserdata(cb->L, sizeof(struct rspamd_lua_url));
  884. lua_pushvalue(cb->L, cb->metatable_pos);
  885. lua_setmetatable(cb->L, -2);
  886. lua_url->url = url;
  887. lua_rawseti(cb->L, -2, cb->i++);
  888. }
  889. }
  890. gboolean
  891. lua_url_cbdata_fill(lua_State *L,
  892. gint pos,
  893. struct lua_tree_cb_data *cbd,
  894. guint default_protocols,
  895. guint default_flags,
  896. gsize max_urls)
  897. {
  898. gint protocols_mask = 0;
  899. gint pos_arg_type = lua_type(L, pos);
  900. guint flags_mask = default_flags;
  901. gboolean seen_flags = FALSE, seen_protocols = FALSE;
  902. memset(cbd, 0, sizeof(*cbd));
  903. cbd->flags_mode = url_flags_mode_include_any;
  904. if (pos_arg_type == LUA_TBOOLEAN) {
  905. protocols_mask = default_protocols;
  906. if (lua_toboolean(L, 2)) {
  907. protocols_mask |= PROTOCOL_MAILTO;
  908. }
  909. }
  910. else if (pos_arg_type == LUA_TTABLE) {
  911. if (rspamd_lua_geti(L, 1, pos) == LUA_TNIL) {
  912. /* New method: indexed table */
  913. lua_getfield(L, pos, "flags");
  914. if (lua_istable(L, -1)) {
  915. gint top = lua_gettop(L);
  916. lua_getfield(L, pos, "flags_mode");
  917. if (lua_isstring(L, -1)) {
  918. const gchar *mode_str = lua_tostring(L, -1);
  919. if (strcmp(mode_str, "explicit") == 0) {
  920. cbd->flags_mode = url_flags_mode_include_explicit;
  921. /*
  922. * Ignore default flags in this mode and include
  923. * merely flags specified by a caller
  924. */
  925. flags_mask = 0;
  926. }
  927. }
  928. lua_pop(L, 1);
  929. for (lua_pushnil(L); lua_next(L, top); lua_pop(L, 1)) {
  930. int nmask = 0;
  931. if (lua_type(L, -1) == LUA_TSTRING) {
  932. const gchar *fname = lua_tostring(L, -1);
  933. if (rspamd_url_flag_from_string(fname, &nmask)) {
  934. flags_mask |= nmask;
  935. }
  936. else {
  937. msg_info("bad url flag: %s", fname);
  938. return FALSE;
  939. }
  940. }
  941. else {
  942. flags_mask |= lua_tointeger(L, -1);
  943. }
  944. }
  945. seen_flags = TRUE;
  946. }
  947. else {
  948. flags_mask |= default_flags;
  949. }
  950. lua_pop(L, 1);
  951. lua_getfield(L, pos, "protocols");
  952. if (lua_istable(L, -1)) {
  953. gint top = lua_gettop(L);
  954. for (lua_pushnil(L); lua_next(L, top); lua_pop(L, 1)) {
  955. int nmask;
  956. const gchar *pname = lua_tostring(L, -1);
  957. nmask = rspamd_url_protocol_from_string(pname);
  958. if (nmask != PROTOCOL_UNKNOWN) {
  959. protocols_mask |= nmask;
  960. }
  961. else {
  962. msg_info("bad url protocol: %s", pname);
  963. return FALSE;
  964. }
  965. }
  966. seen_protocols = TRUE;
  967. }
  968. else {
  969. protocols_mask = default_protocols;
  970. }
  971. lua_pop(L, 1);
  972. if (!seen_protocols) {
  973. lua_getfield(L, pos, "emails");
  974. if (lua_isboolean(L, -1)) {
  975. if (lua_toboolean(L, -1)) {
  976. protocols_mask |= PROTOCOL_MAILTO;
  977. }
  978. }
  979. lua_pop(L, 1);
  980. }
  981. if (!seen_flags) {
  982. lua_getfield(L, pos, "images");
  983. if (lua_isboolean(L, -1)) {
  984. if (lua_toboolean(L, -1)) {
  985. flags_mask |= RSPAMD_URL_FLAG_IMAGE;
  986. }
  987. else {
  988. flags_mask &= ~RSPAMD_URL_FLAG_IMAGE;
  989. }
  990. }
  991. else {
  992. flags_mask &= ~RSPAMD_URL_FLAG_IMAGE;
  993. }
  994. lua_pop(L, 1);
  995. }
  996. if (!seen_flags) {
  997. lua_getfield(L, pos, "content");
  998. if (lua_isboolean(L, -1)) {
  999. if (lua_toboolean(L, -1)) {
  1000. flags_mask |= RSPAMD_URL_FLAG_CONTENT;
  1001. }
  1002. else {
  1003. flags_mask &= ~RSPAMD_URL_FLAG_CONTENT;
  1004. }
  1005. }
  1006. else {
  1007. flags_mask &= ~RSPAMD_URL_FLAG_CONTENT;
  1008. }
  1009. lua_pop(L, 1);
  1010. }
  1011. lua_getfield(L, pos, "max_urls");
  1012. if (lua_isnumber(L, -1)) {
  1013. max_urls = lua_tonumber(L, -1);
  1014. }
  1015. lua_pop(L, 1);
  1016. lua_getfield(L, pos, "sort");
  1017. if (lua_isboolean(L, -1)) {
  1018. cbd->sort = TRUE;
  1019. }
  1020. lua_pop(L, 1);
  1021. }
  1022. else {
  1023. /* Plain table of the protocols */
  1024. for (lua_pushnil(L); lua_next(L, pos); lua_pop(L, 1)) {
  1025. int nmask;
  1026. const gchar *pname = lua_tostring(L, -1);
  1027. nmask = rspamd_url_protocol_from_string(pname);
  1028. if (nmask != PROTOCOL_UNKNOWN) {
  1029. protocols_mask |= nmask;
  1030. }
  1031. else {
  1032. msg_info("bad url protocol: %s", pname);
  1033. return FALSE;
  1034. }
  1035. }
  1036. }
  1037. lua_pop(L, 1); /* After rspamd_lua_geti */
  1038. }
  1039. else if (pos_arg_type == LUA_TSTRING) {
  1040. const gchar *plist = lua_tostring(L, pos);
  1041. gchar **strvec;
  1042. gchar *const *cvec;
  1043. strvec = g_strsplit_set(plist, ",;", -1);
  1044. cvec = strvec;
  1045. while (*cvec) {
  1046. int nmask;
  1047. nmask = rspamd_url_protocol_from_string(*cvec);
  1048. if (nmask != PROTOCOL_UNKNOWN) {
  1049. protocols_mask |= nmask;
  1050. }
  1051. else {
  1052. msg_info("bad url protocol: %s", *cvec);
  1053. g_strfreev(strvec);
  1054. return FALSE;
  1055. }
  1056. cvec++;
  1057. }
  1058. g_strfreev(strvec);
  1059. }
  1060. else if (pos_arg_type == LUA_TNONE || pos_arg_type == LUA_TNIL) {
  1061. protocols_mask = default_protocols;
  1062. flags_mask = default_flags;
  1063. }
  1064. else {
  1065. return FALSE;
  1066. }
  1067. if (lua_type(L, pos + 1) == LUA_TBOOLEAN) {
  1068. if (lua_toboolean(L, pos + 1)) {
  1069. flags_mask |= RSPAMD_URL_FLAG_IMAGE;
  1070. }
  1071. else {
  1072. flags_mask &= ~RSPAMD_URL_FLAG_IMAGE;
  1073. }
  1074. }
  1075. cbd->i = 1;
  1076. cbd->L = L;
  1077. cbd->max_urls = max_urls;
  1078. cbd->protocols_mask = protocols_mask;
  1079. cbd->flags_mask = flags_mask;
  1080. /* This needs to be removed from the stack */
  1081. rspamd_lua_class_metatable(L, "rspamd{url}");
  1082. cbd->metatable_pos = lua_gettop(L);
  1083. (void) lua_checkstack(L, cbd->metatable_pos + 4);
  1084. return TRUE;
  1085. }
  1086. gboolean
  1087. lua_url_cbdata_fill_exclude_include(lua_State *L,
  1088. gint pos,
  1089. struct lua_tree_cb_data *cbd,
  1090. guint default_protocols,
  1091. gsize max_urls)
  1092. {
  1093. guint protocols_mask = default_protocols;
  1094. guint include_flags_mask, exclude_flags_mask;
  1095. gint pos_arg_type = lua_type(L, pos);
  1096. memset(cbd, 0, sizeof(*cbd));
  1097. cbd->flags_mode = url_flags_mode_exclude_include;
  1098. /* Include flags */
  1099. if (pos_arg_type == LUA_TTABLE) {
  1100. include_flags_mask = 0; /* Reset to no flags */
  1101. for (lua_pushnil(L); lua_next(L, pos); lua_pop(L, 1)) {
  1102. int nmask = 0;
  1103. if (lua_type(L, -1) == LUA_TSTRING) {
  1104. const gchar *fname = lua_tostring(L, -1);
  1105. if (rspamd_url_flag_from_string(fname, &nmask)) {
  1106. include_flags_mask |= nmask;
  1107. }
  1108. else {
  1109. msg_info("bad url include flag: %s", fname);
  1110. return FALSE;
  1111. }
  1112. }
  1113. else {
  1114. include_flags_mask |= lua_tointeger(L, -1);
  1115. }
  1116. }
  1117. }
  1118. else if (pos_arg_type == LUA_TNIL || pos_arg_type == LUA_TNONE) {
  1119. /* Include all flags */
  1120. include_flags_mask = ~0U;
  1121. }
  1122. else {
  1123. msg_info("bad arguments: wrong include mask");
  1124. return FALSE;
  1125. }
  1126. /* Exclude flags */
  1127. pos_arg_type = lua_type(L, pos + 1);
  1128. if (pos_arg_type == LUA_TTABLE) {
  1129. exclude_flags_mask = 0; /* Reset to no flags */
  1130. for (lua_pushnil(L); lua_next(L, pos + 1); lua_pop(L, 1)) {
  1131. int nmask = 0;
  1132. if (lua_type(L, -1) == LUA_TSTRING) {
  1133. const gchar *fname = lua_tostring(L, -1);
  1134. if (rspamd_url_flag_from_string(fname, &nmask)) {
  1135. exclude_flags_mask |= nmask;
  1136. }
  1137. else {
  1138. msg_info("bad url exclude flag: %s", fname);
  1139. return FALSE;
  1140. }
  1141. }
  1142. else {
  1143. exclude_flags_mask |= lua_tointeger(L, -1);
  1144. }
  1145. }
  1146. }
  1147. else if (pos_arg_type == LUA_TNIL || pos_arg_type == LUA_TNONE) {
  1148. /* Empty all exclude flags */
  1149. exclude_flags_mask = 0U;
  1150. }
  1151. else {
  1152. msg_info("bad arguments: wrong exclude mask");
  1153. return FALSE;
  1154. }
  1155. if (lua_type(L, pos + 2) == LUA_TTABLE) {
  1156. protocols_mask = 0U; /* Reset all protocols */
  1157. for (lua_pushnil(L); lua_next(L, pos + 2); lua_pop(L, 1)) {
  1158. int nmask;
  1159. const gchar *pname = lua_tostring(L, -1);
  1160. nmask = rspamd_url_protocol_from_string(pname);
  1161. if (nmask != PROTOCOL_UNKNOWN) {
  1162. protocols_mask |= nmask;
  1163. }
  1164. else {
  1165. msg_info("bad url protocol: %s", pname);
  1166. return FALSE;
  1167. }
  1168. }
  1169. }
  1170. else {
  1171. protocols_mask = default_protocols;
  1172. }
  1173. cbd->i = 1;
  1174. cbd->L = L;
  1175. cbd->max_urls = max_urls;
  1176. cbd->protocols_mask = protocols_mask;
  1177. cbd->flags_mask = include_flags_mask;
  1178. cbd->flags_exclude_mask = exclude_flags_mask;
  1179. /* This needs to be removed from the stack */
  1180. rspamd_lua_class_metatable(L, "rspamd{url}");
  1181. cbd->metatable_pos = lua_gettop(L);
  1182. (void) lua_checkstack(L, cbd->metatable_pos + 4);
  1183. return TRUE;
  1184. }
  1185. void lua_url_cbdata_dtor(struct lua_tree_cb_data *cbd)
  1186. {
  1187. if (cbd->metatable_pos != -1) {
  1188. lua_remove(cbd->L, cbd->metatable_pos);
  1189. }
  1190. }
  1191. gsize lua_url_adjust_skip_prob(float timestamp,
  1192. guchar digest[16],
  1193. struct lua_tree_cb_data *cb,
  1194. gsize sz)
  1195. {
  1196. if (cb->max_urls > 0 && sz > cb->max_urls) {
  1197. cb->skip_prob = 1.0 - ((gdouble) cb->max_urls) / (gdouble) sz;
  1198. /*
  1199. * Use task dependent probabilistic seed to ensure that
  1200. * consequent task:get_urls return the same list of urls
  1201. * We use both digest and timestamp here to avoid attack surface
  1202. * based just on digest.
  1203. */
  1204. memcpy(&cb->random_seed, digest, 4);
  1205. memcpy(((unsigned char *) &cb->random_seed) + 4, &timestamp, 4);
  1206. sz = cb->max_urls;
  1207. }
  1208. return sz;
  1209. }
  1210. static gint
  1211. lua_url_eq(lua_State *L)
  1212. {
  1213. LUA_TRACE_POINT;
  1214. struct rspamd_lua_url *u1 = lua_check_url(L, 1),
  1215. *u2 = lua_check_url(L, 2);
  1216. if (u1 && u2) {
  1217. lua_pushboolean(L, (rspamd_url_cmp(u1->url, u2->url) == 0));
  1218. }
  1219. else {
  1220. lua_pushboolean(L, false);
  1221. }
  1222. return 1;
  1223. }
  1224. static gint
  1225. lua_url_lt(lua_State *L)
  1226. {
  1227. LUA_TRACE_POINT;
  1228. struct rspamd_lua_url *u1 = lua_check_url(L, 1),
  1229. *u2 = lua_check_url(L, 2);
  1230. if (u1 && u2) {
  1231. lua_pushinteger(L, rspamd_url_cmp(u1->url, u2->url));
  1232. }
  1233. else {
  1234. return luaL_error(L, "invalid arguments");
  1235. }
  1236. return 1;
  1237. }
  1238. static gint
  1239. lua_load_url(lua_State *L)
  1240. {
  1241. lua_newtable(L);
  1242. luaL_register(L, NULL, urllib_f);
  1243. /* Push flags */
  1244. lua_createtable(L, 0, RSPAMD_URL_MAX_FLAG_SHIFT);
  1245. for (int i = 0; i < RSPAMD_URL_MAX_FLAG_SHIFT; i++) {
  1246. guint flag = 1u << i;
  1247. lua_pushinteger(L, flag);
  1248. lua_setfield(L, -2, rspamd_url_flag_to_string(flag));
  1249. }
  1250. lua_setfield(L, -2, "flags");
  1251. return 1;
  1252. }
  1253. void luaopen_url(lua_State *L)
  1254. {
  1255. rspamd_lua_new_class(L, "rspamd{url}", urllib_m);
  1256. lua_pop(L, 1);
  1257. rspamd_lua_add_preload(L, "rspamd_url", lua_load_url);
  1258. }