You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_url.c 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "contrib/uthash/utlist.h"
  18. /***
  19. * @module rspamd_url
  20. * This module provides routines to handle URL's and extract URL's from the text.
  21. * Objects of this class are returned, for example, by `task:get_urls()` or `task:get_emails()`.
  22. * You can also create `rspamd_url` from any text.
  23. * @example
  24. local url = require "rspamd_url"
  25. local mpool = require "rspamd_mempool"
  26. url.init("/usr/share/rspamd/effective_tld_names.dat")
  27. local pool = mpool.create()
  28. local res = url.create(pool, 'Look at: http://user@test.example.com/test?query")
  29. local t = res:to_table()
  30. -- Content of t:
  31. -- url = ['http://test.example.com/test?query']
  32. -- host = ['test.example.com']
  33. -- user = ['user']
  34. -- path = ['test']
  35. -- tld = ['example.com']
  36. pool:destroy() -- res is destroyed here, so you should not use it afterwards
  37. local mistake = res:to_table() -- INVALID! as pool is destroyed
  38. */
  39. /* URL methods */
  40. LUA_FUNCTION_DEF (url, get_length);
  41. LUA_FUNCTION_DEF (url, get_host);
  42. LUA_FUNCTION_DEF (url, get_port);
  43. LUA_FUNCTION_DEF (url, get_user);
  44. LUA_FUNCTION_DEF (url, get_path);
  45. LUA_FUNCTION_DEF (url, get_query);
  46. LUA_FUNCTION_DEF (url, get_fragment);
  47. LUA_FUNCTION_DEF (url, get_text);
  48. LUA_FUNCTION_DEF (url, tostring);
  49. LUA_FUNCTION_DEF (url, get_raw);
  50. LUA_FUNCTION_DEF (url, get_tld);
  51. LUA_FUNCTION_DEF (url, get_flags);
  52. LUA_FUNCTION_DEF (url, get_protocol);
  53. LUA_FUNCTION_DEF (url, to_table);
  54. LUA_FUNCTION_DEF (url, is_phished);
  55. LUA_FUNCTION_DEF (url, is_redirected);
  56. LUA_FUNCTION_DEF (url, is_obscured);
  57. LUA_FUNCTION_DEF (url, is_html_displayed);
  58. LUA_FUNCTION_DEF (url, is_subject);
  59. LUA_FUNCTION_DEF (url, get_phished);
  60. LUA_FUNCTION_DEF (url, set_redirected);
  61. LUA_FUNCTION_DEF (url, get_count);
  62. LUA_FUNCTION_DEF (url, get_visible);
  63. LUA_FUNCTION_DEF (url, create);
  64. LUA_FUNCTION_DEF (url, init);
  65. LUA_FUNCTION_DEF (url, all);
  66. static const struct luaL_reg urllib_m[] = {
  67. LUA_INTERFACE_DEF (url, get_length),
  68. LUA_INTERFACE_DEF (url, get_host),
  69. LUA_INTERFACE_DEF (url, get_port),
  70. LUA_INTERFACE_DEF (url, get_user),
  71. LUA_INTERFACE_DEF (url, get_path),
  72. LUA_INTERFACE_DEF (url, get_query),
  73. LUA_INTERFACE_DEF (url, get_fragment),
  74. LUA_INTERFACE_DEF (url, get_text),
  75. LUA_INTERFACE_DEF (url, get_tld),
  76. LUA_INTERFACE_DEF (url, get_raw),
  77. LUA_INTERFACE_DEF (url, get_protocol),
  78. LUA_INTERFACE_DEF (url, to_table),
  79. LUA_INTERFACE_DEF (url, is_phished),
  80. LUA_INTERFACE_DEF (url, is_redirected),
  81. LUA_INTERFACE_DEF (url, is_obscured),
  82. LUA_INTERFACE_DEF (url, is_html_displayed),
  83. LUA_INTERFACE_DEF (url, is_subject),
  84. LUA_INTERFACE_DEF (url, get_phished),
  85. LUA_INTERFACE_DEF (url, get_visible),
  86. LUA_INTERFACE_DEF (url, get_count),
  87. LUA_INTERFACE_DEF (url, get_flags),
  88. {"get_redirected", lua_url_get_phished},
  89. LUA_INTERFACE_DEF (url, set_redirected),
  90. {"__tostring", lua_url_tostring},
  91. {NULL, NULL}
  92. };
  93. static const struct luaL_reg urllib_f[] = {
  94. LUA_INTERFACE_DEF (url, init),
  95. LUA_INTERFACE_DEF (url, create),
  96. LUA_INTERFACE_DEF (url, all),
  97. {NULL, NULL}
  98. };
  99. struct rspamd_lua_url *
  100. lua_check_url (lua_State * L, gint pos)
  101. {
  102. void *ud = rspamd_lua_check_udata (L, pos, "rspamd{url}");
  103. luaL_argcheck (L, ud != NULL, pos, "'url' expected");
  104. return ud ? ((struct rspamd_lua_url *)ud) : NULL;
  105. }
  106. static gboolean
  107. lua_url_single_inserter (struct rspamd_url *url, gsize start_offset,
  108. gsize end_offset, gpointer ud)
  109. {
  110. lua_State *L = ud;
  111. struct rspamd_lua_url *lua_url;
  112. lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  113. rspamd_lua_setclass (L, "rspamd{url}", -1);
  114. lua_url->url = url;
  115. return TRUE;
  116. }
  117. /***
  118. * @method url:get_length()
  119. * Get length of the url
  120. * @return {number} length of url in bytes
  121. */
  122. static gint
  123. lua_url_get_length (lua_State *L)
  124. {
  125. LUA_TRACE_POINT;
  126. struct rspamd_lua_url *url = lua_check_url (L, 1);
  127. if (url != NULL) {
  128. lua_pushinteger (L, url->url->urllen);
  129. }
  130. else {
  131. lua_pushnil (L);
  132. }
  133. return 1;
  134. }
  135. /***
  136. * @method url:get_host()
  137. * Get domain part of the url
  138. * @return {string} domain part of URL
  139. */
  140. static gint
  141. lua_url_get_host (lua_State *L)
  142. {
  143. LUA_TRACE_POINT;
  144. struct rspamd_lua_url *url = lua_check_url (L, 1);
  145. if (url != NULL) {
  146. lua_pushlstring (L, url->url->host, url->url->hostlen);
  147. }
  148. else {
  149. lua_pushnil (L);
  150. }
  151. return 1;
  152. }
  153. /***
  154. * @method url:get_port()
  155. * Get port of the url
  156. * @return {number} url port
  157. */
  158. static gint
  159. lua_url_get_port (lua_State *L)
  160. {
  161. LUA_TRACE_POINT;
  162. struct rspamd_lua_url *url = lua_check_url (L, 1);
  163. if (url != NULL) {
  164. lua_pushinteger (L, url->url->port);
  165. }
  166. else {
  167. lua_pushnil (L);
  168. }
  169. return 1;
  170. }
  171. /***
  172. * @method url:get_user()
  173. * Get user part of the url (e.g. username in email)
  174. * @return {string} user part of URL
  175. */
  176. static gint
  177. lua_url_get_user (lua_State *L)
  178. {
  179. LUA_TRACE_POINT;
  180. struct rspamd_lua_url *url = lua_check_url (L, 1);
  181. if (url != NULL && url->url->user != NULL) {
  182. lua_pushlstring (L, url->url->user, url->url->userlen);
  183. }
  184. else {
  185. lua_pushnil (L);
  186. }
  187. return 1;
  188. }
  189. /***
  190. * @method url:get_path()
  191. * Get path of the url
  192. * @return {string} path part of URL
  193. */
  194. static gint
  195. lua_url_get_path (lua_State *L)
  196. {
  197. LUA_TRACE_POINT;
  198. struct rspamd_lua_url *url = lua_check_url (L, 1);
  199. if (url != NULL && url->url->datalen > 0) {
  200. lua_pushlstring (L, url->url->data, url->url->datalen);
  201. }
  202. else {
  203. lua_pushnil (L);
  204. }
  205. return 1;
  206. }
  207. /***
  208. * @method url:get_query()
  209. * Get query of the url
  210. * @return {string} query part of URL
  211. */
  212. static gint
  213. lua_url_get_query (lua_State *L)
  214. {
  215. LUA_TRACE_POINT;
  216. struct rspamd_lua_url *url = lua_check_url (L, 1);
  217. if (url != NULL && url->url->querylen > 0) {
  218. lua_pushlstring (L, url->url->query, url->url->querylen);
  219. }
  220. else {
  221. lua_pushnil (L);
  222. }
  223. return 1;
  224. }
  225. /***
  226. * @method url:get_fragment()
  227. * Get fragment of the url
  228. * @return {string} fragment part of URL
  229. */
  230. static gint
  231. lua_url_get_fragment (lua_State *L)
  232. {
  233. LUA_TRACE_POINT;
  234. struct rspamd_lua_url *url = lua_check_url (L, 1);
  235. if (url != NULL && url->url->fragmentlen > 0) {
  236. lua_pushlstring (L, url->url->fragment, url->url->fragmentlen);
  237. }
  238. else {
  239. lua_pushnil (L);
  240. }
  241. return 1;
  242. }
  243. /***
  244. * @method url:get_text()
  245. * Get full content of the url
  246. * @return {string} url string
  247. */
  248. static gint
  249. lua_url_get_text (lua_State *L)
  250. {
  251. LUA_TRACE_POINT;
  252. struct rspamd_lua_url *url = lua_check_url (L, 1);
  253. if (url != NULL) {
  254. lua_pushlstring (L, url->url->string, url->url->urllen);
  255. }
  256. else {
  257. lua_pushnil (L);
  258. }
  259. return 1;
  260. }
  261. /***
  262. * @method url:tostring()
  263. * Get full content of the url or user@domain in case of email
  264. * @return {string} url as a string
  265. */
  266. static gint
  267. lua_url_tostring (lua_State *L)
  268. {
  269. LUA_TRACE_POINT;
  270. struct rspamd_lua_url *url = lua_check_url (L, 1);
  271. if (url != NULL && url->url != NULL) {
  272. if (url->url->protocol == PROTOCOL_MAILTO) {
  273. gchar *tmp = g_malloc (url->url->userlen + 1 +
  274. url->url->hostlen);
  275. memcpy (tmp, url->url->user, url->url->userlen);
  276. tmp[url->url->userlen] = '@';
  277. memcpy (tmp + url->url->userlen + 1, url->url->host,
  278. url->url->hostlen);
  279. lua_pushlstring (L, tmp, url->url->userlen + 1 + url->url->hostlen);
  280. g_free (tmp);
  281. }
  282. else {
  283. lua_pushlstring (L, url->url->string, url->url->urllen);
  284. }
  285. }
  286. else {
  287. lua_pushnil (L);
  288. }
  289. return 1;
  290. }
  291. /***
  292. * @method url:get_raw()
  293. * Get full content of the url as it was parsed (e.g. with urldecode)
  294. * @return {string} url string
  295. */
  296. static gint
  297. lua_url_get_raw (lua_State *L)
  298. {
  299. LUA_TRACE_POINT;
  300. struct rspamd_lua_url *url = lua_check_url (L, 1);
  301. if (url != NULL) {
  302. lua_pushlstring (L, url->url->raw, url->url->rawlen);
  303. }
  304. else {
  305. lua_pushnil (L);
  306. }
  307. return 1;
  308. }
  309. /***
  310. * @method url:is_phished()
  311. * Check whether URL is treated as phished
  312. * @return {boolean} `true` if URL is phished
  313. */
  314. static gint
  315. lua_url_is_phished (lua_State *L)
  316. {
  317. LUA_TRACE_POINT;
  318. struct rspamd_lua_url *url = lua_check_url (L, 1);
  319. if (url != NULL) {
  320. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_PHISHED);
  321. }
  322. else {
  323. lua_pushnil (L);
  324. }
  325. return 1;
  326. }
  327. /***
  328. * @method url:is_redirected()
  329. * Check whether URL was redirected
  330. * @return {boolean} `true` if URL is redirected
  331. */
  332. static gint
  333. lua_url_is_redirected (lua_State *L)
  334. {
  335. LUA_TRACE_POINT;
  336. struct rspamd_lua_url *url = lua_check_url (L, 1);
  337. if (url != NULL) {
  338. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_REDIRECTED);
  339. }
  340. else {
  341. lua_pushnil (L);
  342. }
  343. return 1;
  344. }
  345. /***
  346. * @method url:is_obscured()
  347. * Check whether URL is treated as obscured or obfusicated (e.g. numbers in IP address or other hacks)
  348. * @return {boolean} `true` if URL is obscured
  349. */
  350. static gint
  351. lua_url_is_obscured (lua_State *L)
  352. {
  353. LUA_TRACE_POINT;
  354. struct rspamd_lua_url *url = lua_check_url (L, 1);
  355. if (url != NULL) {
  356. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_OBSCURED);
  357. }
  358. else {
  359. lua_pushnil (L);
  360. }
  361. return 1;
  362. }
  363. /***
  364. * @method url:is_html_displayed()
  365. * Check whether URL is just displayed in HTML (e.g. NOT a real href)
  366. * @return {boolean} `true` if URL is displayed only
  367. */
  368. static gint
  369. lua_url_is_html_displayed (lua_State *L)
  370. {
  371. LUA_TRACE_POINT;
  372. struct rspamd_lua_url *url = lua_check_url (L, 1);
  373. if (url != NULL) {
  374. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED);
  375. }
  376. else {
  377. lua_pushnil (L);
  378. }
  379. return 1;
  380. }
  381. /***
  382. * @method url:is_subject()
  383. * Check whether URL is found in subject
  384. * @return {boolean} `true` if URL is found in subject
  385. */
  386. static gint
  387. lua_url_is_subject (lua_State *L)
  388. {
  389. LUA_TRACE_POINT;
  390. struct rspamd_lua_url *url = lua_check_url (L, 1);
  391. if (url != NULL) {
  392. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_SUBJECT);
  393. }
  394. else {
  395. lua_pushnil (L);
  396. }
  397. return 1;
  398. }
  399. /***
  400. * @method url:get_phished()
  401. * Get another URL that pretends to be this URL (e.g. used in phishing)
  402. * @return {url} phished URL
  403. */
  404. static gint
  405. lua_url_get_phished (lua_State *L)
  406. {
  407. LUA_TRACE_POINT;
  408. struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
  409. if (url) {
  410. if (url->url->phished_url != NULL) {
  411. if (url->url->flags &
  412. (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
  413. purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  414. rspamd_lua_setclass (L, "rspamd{url}", -1);
  415. purl->url = url->url->phished_url;
  416. return 1;
  417. }
  418. }
  419. }
  420. lua_pushnil (L);
  421. return 1;
  422. }
  423. /***
  424. * @method url:set_redirected(url,[ pool])
  425. * Set url as redirected to another url
  426. * @param {string|url} url new url that is redirecting an old one
  427. * @param {pool} pool if url is a string this is required for parsing
  428. * @return {url} parsed redirected url (if needed)
  429. */
  430. static gint
  431. lua_url_set_redirected (lua_State *L)
  432. {
  433. LUA_TRACE_POINT;
  434. struct rspamd_lua_url *url = lua_check_url (L, 1), *redir;
  435. rspamd_mempool_t *pool = NULL;
  436. if (url == NULL) {
  437. return luaL_error (L, "url is required as the first argument");
  438. }
  439. if (lua_type (L, 2) == LUA_TSTRING) {
  440. /* Parse url */
  441. if (lua_type (L, 3) != LUA_TUSERDATA) {
  442. return luaL_error (L, "mempool is required as the third argument");
  443. }
  444. pool = rspamd_lua_check_mempool (L, 3);
  445. if (pool == NULL) {
  446. return luaL_error (L, "mempool is required as the third argument");
  447. }
  448. gsize len;
  449. const gchar *urlstr = lua_tolstring (L, 2, &len);
  450. rspamd_url_find_single (pool, urlstr, len, RSPAMD_URL_FIND_ALL,
  451. lua_url_single_inserter, L);
  452. if (lua_type (L, -1) != LUA_TUSERDATA) {
  453. /* URL is actually not found */
  454. lua_pushnil (L);
  455. }
  456. else {
  457. redir = lua_check_url (L, -1);
  458. url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  459. url->url->phished_url = redir->url;
  460. }
  461. }
  462. else {
  463. redir = lua_check_url (L, 2);
  464. if (redir == NULL) {
  465. return luaL_error (L, "url is required as the second argument");
  466. }
  467. url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
  468. url->url->phished_url = redir->url;
  469. /* Push back on stack */
  470. lua_pushvalue (L, 2);
  471. }
  472. return 1;
  473. }
  474. /***
  475. * @method url:get_tld()
  476. * Get effective second level domain part (eSLD) of the url host
  477. * @return {string} effective second level domain part (eSLD) of the url host
  478. */
  479. static gint
  480. lua_url_get_tld (lua_State *L)
  481. {
  482. LUA_TRACE_POINT;
  483. struct rspamd_lua_url *url = lua_check_url (L, 1);
  484. if (url != NULL && url->url->tldlen > 0) {
  485. lua_pushlstring (L, url->url->tld, url->url->tldlen);
  486. }
  487. else {
  488. lua_pushnil (L);
  489. }
  490. return 1;
  491. }
  492. /***
  493. * @method url:get_protocol()
  494. * Get protocol name
  495. * @return {string} protocol as a string
  496. */
  497. static gint
  498. lua_url_get_protocol (lua_State *L)
  499. {
  500. LUA_TRACE_POINT;
  501. struct rspamd_lua_url *url = lua_check_url (L, 1);
  502. if (url != NULL && url->url->protocol != PROTOCOL_UNKNOWN) {
  503. lua_pushstring (L, rspamd_url_protocol_name (url->url->protocol));
  504. }
  505. else {
  506. lua_pushnil (L);
  507. }
  508. return 1;
  509. }
  510. /***
  511. * @method url:get_count()
  512. * Return number of occurrencies for this particular URL
  513. * @return {number} number of occurrencies
  514. */
  515. static gint
  516. lua_url_get_count (lua_State *L)
  517. {
  518. LUA_TRACE_POINT;
  519. struct rspamd_lua_url *url = lua_check_url (L, 1);
  520. if (url != NULL && url->url != NULL) {
  521. lua_pushinteger (L, url->url->count);
  522. }
  523. else {
  524. lua_pushnil (L);
  525. }
  526. return 1;
  527. }
  528. /***
  529. * @method url:get_visible()
  530. * Get visible part of the url with html tags stripped
  531. * @return {string} url string
  532. */
  533. static gint
  534. lua_url_get_visible (lua_State *L)
  535. {
  536. LUA_TRACE_POINT;
  537. struct rspamd_lua_url *url = lua_check_url (L, 1);
  538. if (url != NULL && url->url->visible_part) {
  539. lua_pushstring (L, url->url->visible_part);
  540. }
  541. else {
  542. lua_pushnil (L);
  543. }
  544. return 1;
  545. }
  546. /***
  547. * @method url:to_table()
  548. * Return url as a table with the following fields:
  549. *
  550. * - `url`: full content
  551. * - `host`: hostname part
  552. * - `user`: user part
  553. * - `path`: path part
  554. * - `tld`: top level domain
  555. * - `protocol`: url protocol
  556. * @return {table} URL as a table
  557. */
  558. static gint
  559. lua_url_to_table (lua_State *L)
  560. {
  561. LUA_TRACE_POINT;
  562. struct rspamd_lua_url *url = lua_check_url (L, 1);
  563. struct rspamd_url *u;
  564. if (url != NULL) {
  565. u = url->url;
  566. lua_createtable (L, 0, 12);
  567. lua_pushstring (L, "url");
  568. lua_pushlstring (L, u->string, u->urllen);
  569. lua_settable (L, -3);
  570. if (u->hostlen > 0) {
  571. lua_pushstring (L, "host");
  572. lua_pushlstring (L, u->host, u->hostlen);
  573. lua_settable (L, -3);
  574. }
  575. if (u->port != 0) {
  576. lua_pushstring (L, "port");
  577. lua_pushinteger (L, u->port);
  578. lua_settable (L, -3);
  579. }
  580. if (u->tldlen > 0) {
  581. lua_pushstring (L, "tld");
  582. lua_pushlstring (L, u->tld, u->tldlen);
  583. lua_settable (L, -3);
  584. }
  585. if (u->userlen > 0) {
  586. lua_pushstring (L, "user");
  587. lua_pushlstring (L, u->user, u->userlen);
  588. lua_settable (L, -3);
  589. }
  590. if (u->datalen > 0) {
  591. lua_pushstring (L, "path");
  592. lua_pushlstring (L, u->data, u->datalen);
  593. lua_settable (L, -3);
  594. }
  595. if (u->querylen > 0) {
  596. lua_pushstring (L, "query");
  597. lua_pushlstring (L, u->query, u->querylen);
  598. lua_settable (L, -3);
  599. }
  600. if (u->fragmentlen > 0) {
  601. lua_pushstring (L, "fragment");
  602. lua_pushlstring (L, u->fragment, u->fragmentlen);
  603. lua_settable (L, -3);
  604. }
  605. lua_pushstring (L, "protocol");
  606. lua_pushstring (L, rspamd_url_protocol_name (u->protocol));
  607. lua_settable (L, -3);
  608. }
  609. else {
  610. lua_pushnil (L);
  611. }
  612. return 1;
  613. }
  614. /***
  615. * @function url.create([mempool,] str)
  616. * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()`
  617. * @param {string} text that contains URL (can also contain other stuff)
  618. * @return {url} new url object that exists as long as the corresponding mempool exists
  619. */
  620. static gint
  621. lua_url_create (lua_State *L)
  622. {
  623. LUA_TRACE_POINT;
  624. rspamd_mempool_t *pool;
  625. const gchar *text;
  626. size_t length;
  627. gboolean own_pool = FALSE;
  628. if (lua_type (L, 1) == LUA_TUSERDATA) {
  629. pool = rspamd_lua_check_mempool (L, 1);
  630. text = luaL_checklstring (L, 2, &length);
  631. }
  632. else {
  633. own_pool = TRUE;
  634. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "url");
  635. text = luaL_checklstring (L, 1, &length);
  636. }
  637. if (pool == NULL || text == NULL) {
  638. if (own_pool && pool) {
  639. rspamd_mempool_delete (pool);
  640. }
  641. return luaL_error (L, "invalid arguments");
  642. }
  643. else {
  644. rspamd_url_find_single (pool, text, length, RSPAMD_URL_FIND_ALL,
  645. lua_url_single_inserter, L);
  646. if (lua_type (L, -1) != LUA_TUSERDATA) {
  647. /* URL is actually not found */
  648. lua_pushnil (L);
  649. }
  650. }
  651. if (own_pool && pool) {
  652. rspamd_mempool_delete (pool);
  653. }
  654. return 1;
  655. }
  656. /***
  657. * @function url.init(tld_file)
  658. * Initialize url library if not initialized yet by Rspamd
  659. * @param {string} tld_file path to effective_tld_names.dat file (public suffix list)
  660. * @return nothing
  661. */
  662. static gint
  663. lua_url_init (lua_State *L)
  664. {
  665. const gchar *tld_path;
  666. tld_path = luaL_checkstring (L, 1);
  667. rspamd_url_init (tld_path);
  668. return 0;
  669. }
  670. static gboolean
  671. lua_url_table_inserter (struct rspamd_url *url, gsize start_offset,
  672. gsize end_offset, gpointer ud)
  673. {
  674. lua_State *L = ud;
  675. struct rspamd_lua_url *lua_url;
  676. gint n;
  677. n = rspamd_lua_table_size (L, -1);
  678. lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  679. rspamd_lua_setclass (L, "rspamd{url}", -1);
  680. lua_url->url = url;
  681. lua_pushinteger (L, n + 1);
  682. lua_pushlstring (L, url->string, url->urllen);
  683. lua_settable (L, -3);
  684. return TRUE;
  685. }
  686. static gint
  687. lua_url_all (lua_State *L)
  688. {
  689. LUA_TRACE_POINT;
  690. rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 1);
  691. const gchar *text;
  692. size_t length;
  693. if (pool == NULL) {
  694. lua_pushnil (L);
  695. }
  696. else {
  697. text = luaL_checklstring (L, 2, &length);
  698. if (text != NULL) {
  699. lua_newtable (L);
  700. rspamd_url_find_multiple (pool, text, length,
  701. RSPAMD_URL_FIND_ALL, NULL,
  702. lua_url_table_inserter, L);
  703. }
  704. else {
  705. lua_pushnil (L);
  706. }
  707. }
  708. return 1;
  709. }
  710. /***
  711. * @method url:get_flags()
  712. * Return flags for a specified URL as map 'flag'->true for all flags set,
  713. * possible flags are:
  714. *
  715. * - `phished`: URL is likely phished
  716. * - `numeric`: URL is numeric (e.g. IP address)
  717. * - `obscured`: URL was obscured
  718. * - `redirected`: URL comes from redirector
  719. * - `html_displayed`: URL is used just for displaying purposes
  720. * - `text`: URL comes from the text
  721. * - `subject`: URL comes from the subject
  722. * - `host_encoded`: URL host part is encoded
  723. * - `schema_encoded`: URL schema part is encoded
  724. * - `query_encoded`: URL query part is encoded
  725. * - `missing_slahes`: URL has some slashes missing
  726. * - `idn`: URL has international characters
  727. * - `has_port`: URL has port
  728. * - `has_user`: URL has user part
  729. * - `schemaless`: URL has no schema
  730. * - `unnormalised`: URL has some unicode unnormalities
  731. * - `zw_spaces`: URL has some zero width spaces
  732. * - `url_displayed`: URL has some other url-like string in visible part
  733. * - `image`: URL is from src attribute of img HTML tag
  734. * @return {table} URL flags
  735. */
  736. #define PUSH_FLAG(fl, name) do { \
  737. if (flags & (fl)) { \
  738. lua_pushstring (L, (name)); \
  739. lua_pushboolean (L, true); \
  740. lua_settable (L, -3); \
  741. } \
  742. } while (0)
  743. static gint
  744. lua_url_get_flags (lua_State *L)
  745. {
  746. LUA_TRACE_POINT;
  747. struct rspamd_lua_url *url = lua_check_url (L, 1);
  748. enum rspamd_url_flags flags;
  749. if (url != NULL) {
  750. flags = url->url->flags;
  751. lua_createtable (L, 0, 4);
  752. PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
  753. PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
  754. PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
  755. PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
  756. PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
  757. PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
  758. PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
  759. PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
  760. PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
  761. PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
  762. PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
  763. PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
  764. PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
  765. PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
  766. PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
  767. PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
  768. PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
  769. PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
  770. PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
  771. PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
  772. }
  773. else {
  774. return luaL_error (L, "invalid arguments");
  775. }
  776. return 1;
  777. }
  778. #undef PUSH_FLAG
  779. static gint
  780. lua_load_url (lua_State * L)
  781. {
  782. lua_newtable (L);
  783. luaL_register (L, NULL, urllib_f);
  784. return 1;
  785. }
  786. void
  787. luaopen_url (lua_State * L)
  788. {
  789. rspamd_lua_new_class (L, "rspamd{url}", urllib_m);
  790. lua_pop (L, 1);
  791. rspamd_lua_add_preload (L, "rspamd_url", lua_load_url);
  792. }