You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_url.c 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "contrib/uthash/utlist.h"
  18. /***
  19. * @module rspamd_url
  20. * This module provides routines to handle URL's and extract URL's from the text.
  21. * Objects of this class are returned, for example, by `task:get_urls()` or `task:get_emails()`.
  22. * You can also create `rspamd_url` from any text.
  23. * @example
  24. local url = require "rspamd_url"
  25. local pool = mpool.create()
  26. local res = url.create(pool, 'Look at: http://user@test.example.com/test?query")
  27. local t = res:to_table()
  28. -- Content of t:
  29. -- url = ['http://test.example.com/test?query']
  30. -- host = ['test.example.com']
  31. -- user = ['user']
  32. -- path = ['test']
  33. -- tld = ['example.com']
  34. pool:destroy() -- res is destroyed here, so you should not use it afterwards
  35. local mistake = res:to_table() -- INVALID! as pool is destroyed
  36. */
  37. /* URL methods */
  38. LUA_FUNCTION_DEF (url, get_length);
  39. LUA_FUNCTION_DEF (url, get_host);
  40. LUA_FUNCTION_DEF (url, get_port);
  41. LUA_FUNCTION_DEF (url, get_user);
  42. LUA_FUNCTION_DEF (url, get_path);
  43. LUA_FUNCTION_DEF (url, get_query);
  44. LUA_FUNCTION_DEF (url, get_fragment);
  45. LUA_FUNCTION_DEF (url, get_text);
  46. LUA_FUNCTION_DEF (url, tostring);
  47. LUA_FUNCTION_DEF (url, get_raw);
  48. LUA_FUNCTION_DEF (url, get_tld);
  49. LUA_FUNCTION_DEF (url, get_flags);
  50. LUA_FUNCTION_DEF (url, get_protocol);
  51. LUA_FUNCTION_DEF (url, to_table);
  52. LUA_FUNCTION_DEF (url, is_phished);
  53. LUA_FUNCTION_DEF (url, is_redirected);
  54. LUA_FUNCTION_DEF (url, is_obscured);
  55. LUA_FUNCTION_DEF (url, is_html_displayed);
  56. LUA_FUNCTION_DEF (url, is_subject);
  57. LUA_FUNCTION_DEF (url, get_phished);
  58. LUA_FUNCTION_DEF (url, get_count);
  59. LUA_FUNCTION_DEF (url, get_visible);
  60. LUA_FUNCTION_DEF (url, create);
  61. LUA_FUNCTION_DEF (url, init);
  62. LUA_FUNCTION_DEF (url, all);
  63. static const struct luaL_reg urllib_m[] = {
  64. LUA_INTERFACE_DEF (url, get_length),
  65. LUA_INTERFACE_DEF (url, get_host),
  66. LUA_INTERFACE_DEF (url, get_port),
  67. LUA_INTERFACE_DEF (url, get_user),
  68. LUA_INTERFACE_DEF (url, get_path),
  69. LUA_INTERFACE_DEF (url, get_query),
  70. LUA_INTERFACE_DEF (url, get_fragment),
  71. LUA_INTERFACE_DEF (url, get_text),
  72. LUA_INTERFACE_DEF (url, get_tld),
  73. LUA_INTERFACE_DEF (url, get_raw),
  74. LUA_INTERFACE_DEF (url, get_protocol),
  75. LUA_INTERFACE_DEF (url, to_table),
  76. LUA_INTERFACE_DEF (url, is_phished),
  77. LUA_INTERFACE_DEF (url, is_redirected),
  78. LUA_INTERFACE_DEF (url, is_obscured),
  79. LUA_INTERFACE_DEF (url, is_html_displayed),
  80. LUA_INTERFACE_DEF (url, is_subject),
  81. LUA_INTERFACE_DEF (url, get_phished),
  82. LUA_INTERFACE_DEF (url, get_visible),
  83. LUA_INTERFACE_DEF (url, get_count),
  84. LUA_INTERFACE_DEF (url, get_flags),
  85. {"get_redirected", lua_url_get_phished},
  86. {"__tostring", lua_url_tostring},
  87. {NULL, NULL}
  88. };
  89. static const struct luaL_reg urllib_f[] = {
  90. LUA_INTERFACE_DEF (url, init),
  91. LUA_INTERFACE_DEF (url, create),
  92. LUA_INTERFACE_DEF (url, all),
  93. {NULL, NULL}
  94. };
  95. static struct rspamd_lua_url *
  96. lua_check_url (lua_State * L, gint pos)
  97. {
  98. void *ud = rspamd_lua_check_udata (L, pos, "rspamd{url}");
  99. luaL_argcheck (L, ud != NULL, pos, "'url' expected");
  100. return ud ? ((struct rspamd_lua_url *)ud) : NULL;
  101. }
  102. /***
  103. * @method url:get_length()
  104. * Get length of the url
  105. * @return {number} length of url in bytes
  106. */
  107. static gint
  108. lua_url_get_length (lua_State *L)
  109. {
  110. LUA_TRACE_POINT;
  111. struct rspamd_lua_url *url = lua_check_url (L, 1);
  112. if (url != NULL) {
  113. lua_pushinteger (L, url->url->urllen);
  114. }
  115. else {
  116. lua_pushnil (L);
  117. }
  118. return 1;
  119. }
  120. /***
  121. * @method url:get_host()
  122. * Get domain part of the url
  123. * @return {string} domain part of URL
  124. */
  125. static gint
  126. lua_url_get_host (lua_State *L)
  127. {
  128. LUA_TRACE_POINT;
  129. struct rspamd_lua_url *url = lua_check_url (L, 1);
  130. if (url != NULL) {
  131. lua_pushlstring (L, url->url->host, url->url->hostlen);
  132. }
  133. else {
  134. lua_pushnil (L);
  135. }
  136. return 1;
  137. }
  138. /***
  139. * @method url:get_port()
  140. * Get port of the url
  141. * @return {number} url port
  142. */
  143. static gint
  144. lua_url_get_port (lua_State *L)
  145. {
  146. LUA_TRACE_POINT;
  147. struct rspamd_lua_url *url = lua_check_url (L, 1);
  148. if (url != NULL) {
  149. lua_pushinteger (L, url->url->port);
  150. }
  151. else {
  152. lua_pushnil (L);
  153. }
  154. return 1;
  155. }
  156. /***
  157. * @method url:get_user()
  158. * Get user part of the url (e.g. username in email)
  159. * @return {string} user part of URL
  160. */
  161. static gint
  162. lua_url_get_user (lua_State *L)
  163. {
  164. LUA_TRACE_POINT;
  165. struct rspamd_lua_url *url = lua_check_url (L, 1);
  166. if (url != NULL && url->url->user != NULL) {
  167. lua_pushlstring (L, url->url->user, url->url->userlen);
  168. }
  169. else {
  170. lua_pushnil (L);
  171. }
  172. return 1;
  173. }
  174. /***
  175. * @method url:get_path()
  176. * Get path of the url
  177. * @return {string} path part of URL
  178. */
  179. static gint
  180. lua_url_get_path (lua_State *L)
  181. {
  182. LUA_TRACE_POINT;
  183. struct rspamd_lua_url *url = lua_check_url (L, 1);
  184. if (url != NULL && url->url->datalen > 0) {
  185. lua_pushlstring (L, url->url->data, url->url->datalen);
  186. }
  187. else {
  188. lua_pushnil (L);
  189. }
  190. return 1;
  191. }
  192. /***
  193. * @method url:get_query()
  194. * Get query of the url
  195. * @return {string} query part of URL
  196. */
  197. static gint
  198. lua_url_get_query (lua_State *L)
  199. {
  200. LUA_TRACE_POINT;
  201. struct rspamd_lua_url *url = lua_check_url (L, 1);
  202. if (url != NULL && url->url->querylen > 0) {
  203. lua_pushlstring (L, url->url->query, url->url->querylen);
  204. }
  205. else {
  206. lua_pushnil (L);
  207. }
  208. return 1;
  209. }
  210. /***
  211. * @method url:get_fragment()
  212. * Get fragment of the url
  213. * @return {string} fragment part of URL
  214. */
  215. static gint
  216. lua_url_get_fragment (lua_State *L)
  217. {
  218. LUA_TRACE_POINT;
  219. struct rspamd_lua_url *url = lua_check_url (L, 1);
  220. if (url != NULL && url->url->fragmentlen > 0) {
  221. lua_pushlstring (L, url->url->fragment, url->url->fragmentlen);
  222. }
  223. else {
  224. lua_pushnil (L);
  225. }
  226. return 1;
  227. }
  228. /***
  229. * @method url:get_text()
  230. * Get full content of the url
  231. * @return {string} url string
  232. */
  233. static gint
  234. lua_url_get_text (lua_State *L)
  235. {
  236. LUA_TRACE_POINT;
  237. struct rspamd_lua_url *url = lua_check_url (L, 1);
  238. if (url != NULL) {
  239. lua_pushlstring (L, url->url->string, url->url->urllen);
  240. }
  241. else {
  242. lua_pushnil (L);
  243. }
  244. return 1;
  245. }
  246. /***
  247. * @method url:tostring()
  248. * Get full content of the url or user@domain in case of email
  249. * @return {string} url as a string
  250. */
  251. static gint
  252. lua_url_tostring (lua_State *L)
  253. {
  254. LUA_TRACE_POINT;
  255. struct rspamd_lua_url *url = lua_check_url (L, 1);
  256. if (url != NULL && url->url != NULL) {
  257. if (url->url->protocol == PROTOCOL_MAILTO) {
  258. if (url->url->userlen + 1 + url->url->hostlen >= url->url->urllen) {
  259. lua_pushlstring (L, url->url->user,
  260. url->url->userlen + 1 + url->url->hostlen);
  261. }
  262. else {
  263. lua_pushlstring (L, url->url->string, url->url->urllen);
  264. }
  265. }
  266. else {
  267. lua_pushlstring (L, url->url->string, url->url->urllen);
  268. }
  269. }
  270. else {
  271. lua_pushnil (L);
  272. }
  273. return 1;
  274. }
  275. /***
  276. * @method url:get_raw()
  277. * Get full content of the url as it was parsed (e.g. with urldecode)
  278. * @return {string} url string
  279. */
  280. static gint
  281. lua_url_get_raw (lua_State *L)
  282. {
  283. LUA_TRACE_POINT;
  284. struct rspamd_lua_url *url = lua_check_url (L, 1);
  285. if (url != NULL) {
  286. lua_pushlstring (L, url->url->raw, url->url->rawlen);
  287. }
  288. else {
  289. lua_pushnil (L);
  290. }
  291. return 1;
  292. }
  293. /***
  294. * @method url:is_phished()
  295. * Check whether URL is treated as phished
  296. * @return {boolean} `true` if URL is phished
  297. */
  298. static gint
  299. lua_url_is_phished (lua_State *L)
  300. {
  301. LUA_TRACE_POINT;
  302. struct rspamd_lua_url *url = lua_check_url (L, 1);
  303. if (url != NULL) {
  304. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_PHISHED);
  305. }
  306. else {
  307. lua_pushnil (L);
  308. }
  309. return 1;
  310. }
  311. /***
  312. * @method url:is_redirected()
  313. * Check whether URL was redirected
  314. * @return {boolean} `true` if URL is redirected
  315. */
  316. static gint
  317. lua_url_is_redirected (lua_State *L)
  318. {
  319. LUA_TRACE_POINT;
  320. struct rspamd_lua_url *url = lua_check_url (L, 1);
  321. if (url != NULL) {
  322. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_REDIRECTED);
  323. }
  324. else {
  325. lua_pushnil (L);
  326. }
  327. return 1;
  328. }
  329. /***
  330. * @method url:is_obscured()
  331. * Check whether URL is treated as obscured or obfusicated (e.g. numbers in IP address or other hacks)
  332. * @return {boolean} `true` if URL is obscured
  333. */
  334. static gint
  335. lua_url_is_obscured (lua_State *L)
  336. {
  337. LUA_TRACE_POINT;
  338. struct rspamd_lua_url *url = lua_check_url (L, 1);
  339. if (url != NULL) {
  340. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_OBSCURED);
  341. }
  342. else {
  343. lua_pushnil (L);
  344. }
  345. return 1;
  346. }
  347. /***
  348. * @method url:is_html_displayed()
  349. * Check whether URL is just displayed in HTML (e.g. NOT a real href)
  350. * @return {boolean} `true` if URL is displayed only
  351. */
  352. static gint
  353. lua_url_is_html_displayed (lua_State *L)
  354. {
  355. LUA_TRACE_POINT;
  356. struct rspamd_lua_url *url = lua_check_url (L, 1);
  357. if (url != NULL) {
  358. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED);
  359. }
  360. else {
  361. lua_pushnil (L);
  362. }
  363. return 1;
  364. }
  365. /***
  366. * @method url:is_subject()
  367. * Check whether URL is found in subject
  368. * @return {boolean} `true` if URL is found in subject
  369. */
  370. static gint
  371. lua_url_is_subject (lua_State *L)
  372. {
  373. LUA_TRACE_POINT;
  374. struct rspamd_lua_url *url = lua_check_url (L, 1);
  375. if (url != NULL) {
  376. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_SUBJECT);
  377. }
  378. else {
  379. lua_pushnil (L);
  380. }
  381. return 1;
  382. }
  383. /***
  384. * @method url:get_phished()
  385. * Get another URL that pretends to be this URL (e.g. used in phishing)
  386. * @return {url} phished URL
  387. */
  388. static gint
  389. lua_url_get_phished (lua_State *L)
  390. {
  391. LUA_TRACE_POINT;
  392. struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
  393. if (url) {
  394. if (url->url->phished_url != NULL) {
  395. if (url->url->flags &
  396. (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
  397. purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  398. rspamd_lua_setclass (L, "rspamd{url}", -1);
  399. purl->url = url->url->phished_url;
  400. return 1;
  401. }
  402. }
  403. }
  404. lua_pushnil (L);
  405. return 1;
  406. }
  407. /***
  408. * @method url:get_tld()
  409. * Get effective second level domain part (eSLD) of the url host
  410. * @return {string} effective second level domain part (eSLD) of the url host
  411. */
  412. static gint
  413. lua_url_get_tld (lua_State *L)
  414. {
  415. LUA_TRACE_POINT;
  416. struct rspamd_lua_url *url = lua_check_url (L, 1);
  417. if (url != NULL && url->url->tldlen > 0) {
  418. lua_pushlstring (L, url->url->tld, url->url->tldlen);
  419. }
  420. else {
  421. lua_pushnil (L);
  422. }
  423. return 1;
  424. }
  425. /***
  426. * @method url:get_protocol()
  427. * Get protocol name
  428. * @return {string} protocol as a string
  429. */
  430. static gint
  431. lua_url_get_protocol (lua_State *L)
  432. {
  433. LUA_TRACE_POINT;
  434. struct rspamd_lua_url *url = lua_check_url (L, 1);
  435. if (url != NULL && url->url->protocol != PROTOCOL_UNKNOWN) {
  436. lua_pushstring (L, rspamd_url_protocol_name (url->url->protocol));
  437. }
  438. else {
  439. lua_pushnil (L);
  440. }
  441. return 1;
  442. }
  443. /***
  444. * @method url:get_count()
  445. * Return number of occurrencies for this particular URL
  446. * @return {number} number of occurrencies
  447. */
  448. static gint
  449. lua_url_get_count (lua_State *L)
  450. {
  451. LUA_TRACE_POINT;
  452. struct rspamd_lua_url *url = lua_check_url (L, 1);
  453. if (url != NULL && url->url != NULL) {
  454. lua_pushinteger (L, url->url->count);
  455. }
  456. else {
  457. lua_pushnil (L);
  458. }
  459. return 1;
  460. }
  461. /***
  462. * @method url:get_visible()
  463. * Get visible part of the url with html tags stripped
  464. * @return {string} url string
  465. */
  466. static gint
  467. lua_url_get_visible (lua_State *L)
  468. {
  469. LUA_TRACE_POINT;
  470. struct rspamd_lua_url *url = lua_check_url (L, 1);
  471. if (url != NULL && url->url->visible_part) {
  472. lua_pushstring (L, url->url->visible_part);
  473. }
  474. else {
  475. lua_pushnil (L);
  476. }
  477. return 1;
  478. }
  479. /***
  480. * @method url:to_table()
  481. * Return url as a table with the following fields:
  482. *
  483. * - `url`: full content
  484. * - `host`: hostname part
  485. * - `user`: user part
  486. * - `path`: path part
  487. * - `tld`: top level domain
  488. * - `protocol`: url protocol
  489. * @return {table} URL as a table
  490. */
  491. static gint
  492. lua_url_to_table (lua_State *L)
  493. {
  494. LUA_TRACE_POINT;
  495. struct rspamd_lua_url *url = lua_check_url (L, 1);
  496. struct rspamd_url *u;
  497. if (url != NULL) {
  498. u = url->url;
  499. lua_createtable (L, 0, 12);
  500. lua_pushstring (L, "url");
  501. lua_pushlstring (L, u->string, u->urllen);
  502. lua_settable (L, -3);
  503. if (u->hostlen > 0) {
  504. lua_pushstring (L, "host");
  505. lua_pushlstring (L, u->host, u->hostlen);
  506. lua_settable (L, -3);
  507. }
  508. if (u->port != 0) {
  509. lua_pushstring (L, "port");
  510. lua_pushinteger (L, u->port);
  511. lua_settable (L, -3);
  512. }
  513. if (u->tldlen > 0) {
  514. lua_pushstring (L, "tld");
  515. lua_pushlstring (L, u->tld, u->tldlen);
  516. lua_settable (L, -3);
  517. }
  518. if (u->userlen > 0) {
  519. lua_pushstring (L, "user");
  520. lua_pushlstring (L, u->user, u->userlen);
  521. lua_settable (L, -3);
  522. }
  523. if (u->datalen > 0) {
  524. lua_pushstring (L, "path");
  525. lua_pushlstring (L, u->data, u->datalen);
  526. lua_settable (L, -3);
  527. }
  528. if (u->querylen > 0) {
  529. lua_pushstring (L, "query");
  530. lua_pushlstring (L, u->query, u->querylen);
  531. lua_settable (L, -3);
  532. }
  533. if (u->fragmentlen > 0) {
  534. lua_pushstring (L, "fragment");
  535. lua_pushlstring (L, u->fragment, u->fragmentlen);
  536. lua_settable (L, -3);
  537. }
  538. lua_pushstring (L, "protocol");
  539. lua_pushstring (L, rspamd_url_protocol_name (u->protocol));
  540. lua_settable (L, -3);
  541. }
  542. else {
  543. lua_pushnil (L);
  544. }
  545. return 1;
  546. }
  547. static void
  548. lua_url_single_inserter (struct rspamd_url *url, gsize start_offset,
  549. gsize end_offset, gpointer ud)
  550. {
  551. lua_State *L = ud;
  552. struct rspamd_lua_url *lua_url;
  553. lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  554. rspamd_lua_setclass (L, "rspamd{url}", -1);
  555. lua_url->url = url;
  556. }
  557. /***
  558. * @function url.create([mempool,] str)
  559. * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()`
  560. * @param {string} text that contains URL (can also contain other stuff)
  561. * @return {url} new url object that exists as long as the corresponding mempool exists
  562. */
  563. static gint
  564. lua_url_create (lua_State *L)
  565. {
  566. LUA_TRACE_POINT;
  567. rspamd_mempool_t *pool;
  568. const gchar *text;
  569. size_t length;
  570. gboolean own_pool = FALSE;
  571. if (lua_type (L, 1) == LUA_TUSERDATA) {
  572. pool = rspamd_lua_check_mempool (L, 1);
  573. text = luaL_checklstring (L, 2, &length);
  574. }
  575. else {
  576. own_pool = TRUE;
  577. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "url");
  578. text = luaL_checklstring (L, 1, &length);
  579. }
  580. if (pool == NULL || text == NULL) {
  581. if (own_pool && pool) {
  582. rspamd_mempool_delete (pool);
  583. }
  584. return luaL_error (L, "invalid arguments");
  585. }
  586. else {
  587. rspamd_url_find_single (pool, text, length, RSPAMD_URL_FIND_ALL,
  588. lua_url_single_inserter, L);
  589. if (lua_type (L, -1) != LUA_TUSERDATA) {
  590. /* URL is actually not found */
  591. lua_pushnil (L);
  592. }
  593. }
  594. if (own_pool && pool) {
  595. rspamd_mempool_delete (pool);
  596. }
  597. return 1;
  598. }
  599. /***
  600. * @function url.create(tld_file)
  601. * Initialize url library if not initialized yet by Rspamd
  602. * @param {string} tld_file for url library
  603. * @return nothing
  604. */
  605. static gint
  606. lua_url_init (lua_State *L)
  607. {
  608. const gchar *tld_path;
  609. tld_path = luaL_checkstring (L, 1);
  610. rspamd_url_init (tld_path);
  611. return 0;
  612. }
  613. static void
  614. lua_url_table_inserter (struct rspamd_url *url, gsize start_offset,
  615. gsize end_offset, gpointer ud)
  616. {
  617. lua_State *L = ud;
  618. struct rspamd_lua_url *lua_url;
  619. gint n;
  620. n = rspamd_lua_table_size (L, -1);
  621. lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  622. rspamd_lua_setclass (L, "rspamd{url}", -1);
  623. lua_url->url = url;
  624. lua_pushinteger (L, n + 1);
  625. lua_pushlstring (L, url->string, url->urllen);
  626. lua_settable (L, -3);
  627. }
  628. static gint
  629. lua_url_all (lua_State *L)
  630. {
  631. LUA_TRACE_POINT;
  632. rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 1);
  633. const gchar *text;
  634. size_t length;
  635. if (pool == NULL) {
  636. lua_pushnil (L);
  637. }
  638. else {
  639. text = luaL_checklstring (L, 2, &length);
  640. if (text != NULL) {
  641. lua_newtable (L);
  642. rspamd_url_find_multiple (pool, text, length,
  643. RSPAMD_URL_FIND_ALL, NULL,
  644. lua_url_table_inserter, L);
  645. }
  646. else {
  647. lua_pushnil (L);
  648. }
  649. }
  650. return 1;
  651. }
  652. /***
  653. * @method url:get_flags()
  654. * Return flags for a specified URL as map 'flag'->true for all flags set,
  655. * possible flags are:
  656. *
  657. * - `phished`: URL is likely phished
  658. * - `numeric`: URL is numeric (e.g. IP address)
  659. * - `obscured`: URL was obscured
  660. * - `redirected`: URL comes from redirector
  661. * - `html_displayed`: URL is used just for displaying purposes
  662. * - `text`: URL comes from the text
  663. * - `subject`: URL comes from the subject
  664. * - `host_encoded`: URL host part is encoded
  665. * - `schema_encoded`: URL schema part is encoded
  666. * - `query_encoded`: URL query part is encoded
  667. * - `missing_slahes`: URL has some slashes missing
  668. * - `idn`: URL has international characters
  669. * - `has_port`: URL has port
  670. * - `has_user`: URL has user part
  671. * - `schemaless`: URL has no schema
  672. * - `unnormalised`: URL has some unicode unnormalities
  673. * - `zw_spaces`: URL has some zero width spaces
  674. * - `url_displayed`: URL has some other url-like string in visible part
  675. * @return {table} URL flags
  676. */
  677. #define PUSH_FLAG(fl, name) do { \
  678. if (flags & (fl)) { \
  679. lua_pushstring (L, (name)); \
  680. lua_pushboolean (L, true); \
  681. lua_settable (L, -3); \
  682. } \
  683. } while (0)
  684. static gint
  685. lua_url_get_flags (lua_State *L)
  686. {
  687. LUA_TRACE_POINT;
  688. struct rspamd_lua_url *url = lua_check_url (L, 1);
  689. enum rspamd_url_flags flags;
  690. if (url != NULL) {
  691. flags = url->url->flags;
  692. lua_createtable (L, 0, 4);
  693. PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
  694. PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
  695. PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
  696. PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
  697. PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
  698. PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
  699. PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
  700. PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
  701. PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
  702. PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
  703. PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
  704. PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
  705. PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
  706. PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
  707. PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
  708. PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
  709. PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
  710. PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
  711. PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
  712. }
  713. else {
  714. return luaL_error (L, "invalid arguments");
  715. }
  716. return 1;
  717. }
  718. #undef PUSH_FLAG
  719. static gint
  720. lua_load_url (lua_State * L)
  721. {
  722. lua_newtable (L);
  723. luaL_register (L, NULL, urllib_f);
  724. return 1;
  725. }
  726. void
  727. luaopen_url (lua_State * L)
  728. {
  729. rspamd_lua_new_class (L, "rspamd{url}", urllib_m);
  730. lua_pop (L, 1);
  731. rspamd_lua_add_preload (L, "rspamd_url", lua_load_url);
  732. }