You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_url.c 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "lua_common.h"
  17. #include "contrib/uthash/utlist.h"
  18. /***
  19. * @module rspamd_url
  20. * This module provides routines to handle URL's and extract URL's from the text.
  21. * Objects of this class are returned, for example, by `task:get_urls()` or `task:get_emails()`.
  22. * You can also create `rspamd_url` from any text.
  23. * @example
  24. local url = require "rspamd_url"
  25. local pool = mpool.create()
  26. local res = url.create(pool, 'Look at: http://user@test.example.com/test?query")
  27. local t = res:to_table()
  28. -- Content of t:
  29. -- url = ['http://test.example.com/test?query']
  30. -- host = ['test.example.com']
  31. -- user = ['user']
  32. -- path = ['test']
  33. -- tld = ['example.com']
  34. pool:destroy() -- res is destroyed here, so you should not use it afterwards
  35. local mistake = res:to_table() -- INVALID! as pool is destroyed
  36. */
  37. /* URL methods */
  38. LUA_FUNCTION_DEF (url, get_length);
  39. LUA_FUNCTION_DEF (url, get_host);
  40. LUA_FUNCTION_DEF (url, get_port);
  41. LUA_FUNCTION_DEF (url, get_user);
  42. LUA_FUNCTION_DEF (url, get_path);
  43. LUA_FUNCTION_DEF (url, get_query);
  44. LUA_FUNCTION_DEF (url, get_fragment);
  45. LUA_FUNCTION_DEF (url, get_text);
  46. LUA_FUNCTION_DEF (url, tostring);
  47. LUA_FUNCTION_DEF (url, get_raw);
  48. LUA_FUNCTION_DEF (url, get_tld);
  49. LUA_FUNCTION_DEF (url, get_flags);
  50. LUA_FUNCTION_DEF (url, get_protocol);
  51. LUA_FUNCTION_DEF (url, to_table);
  52. LUA_FUNCTION_DEF (url, is_phished);
  53. LUA_FUNCTION_DEF (url, is_redirected);
  54. LUA_FUNCTION_DEF (url, is_obscured);
  55. LUA_FUNCTION_DEF (url, is_html_displayed);
  56. LUA_FUNCTION_DEF (url, is_subject);
  57. LUA_FUNCTION_DEF (url, get_phished);
  58. LUA_FUNCTION_DEF (url, get_tag);
  59. LUA_FUNCTION_DEF (url, get_count);
  60. LUA_FUNCTION_DEF (url, get_tags);
  61. LUA_FUNCTION_DEF (url, add_tag);
  62. LUA_FUNCTION_DEF (url, create);
  63. LUA_FUNCTION_DEF (url, init);
  64. LUA_FUNCTION_DEF (url, all);
  65. static const struct luaL_reg urllib_m[] = {
  66. LUA_INTERFACE_DEF (url, get_length),
  67. LUA_INTERFACE_DEF (url, get_host),
  68. LUA_INTERFACE_DEF (url, get_port),
  69. LUA_INTERFACE_DEF (url, get_user),
  70. LUA_INTERFACE_DEF (url, get_path),
  71. LUA_INTERFACE_DEF (url, get_query),
  72. LUA_INTERFACE_DEF (url, get_fragment),
  73. LUA_INTERFACE_DEF (url, get_text),
  74. LUA_INTERFACE_DEF (url, get_tld),
  75. LUA_INTERFACE_DEF (url, get_raw),
  76. LUA_INTERFACE_DEF (url, get_protocol),
  77. LUA_INTERFACE_DEF (url, to_table),
  78. LUA_INTERFACE_DEF (url, is_phished),
  79. LUA_INTERFACE_DEF (url, is_redirected),
  80. LUA_INTERFACE_DEF (url, is_obscured),
  81. LUA_INTERFACE_DEF (url, is_html_displayed),
  82. LUA_INTERFACE_DEF (url, is_subject),
  83. LUA_INTERFACE_DEF (url, get_phished),
  84. LUA_INTERFACE_DEF (url, get_tag),
  85. LUA_INTERFACE_DEF (url, get_tags),
  86. LUA_INTERFACE_DEF (url, add_tag),
  87. LUA_INTERFACE_DEF (url, get_count),
  88. LUA_INTERFACE_DEF (url, get_flags),
  89. {"get_redirected", lua_url_get_phished},
  90. {"__tostring", lua_url_tostring},
  91. {NULL, NULL}
  92. };
  93. static const struct luaL_reg urllib_f[] = {
  94. LUA_INTERFACE_DEF (url, init),
  95. LUA_INTERFACE_DEF (url, create),
  96. LUA_INTERFACE_DEF (url, all),
  97. {NULL, NULL}
  98. };
  99. static struct rspamd_lua_url *
  100. lua_check_url (lua_State * L, gint pos)
  101. {
  102. void *ud = rspamd_lua_check_udata (L, pos, "rspamd{url}");
  103. luaL_argcheck (L, ud != NULL, pos, "'url' expected");
  104. return ud ? ((struct rspamd_lua_url *)ud) : NULL;
  105. }
  106. /***
  107. * @method url:get_length()
  108. * Get length of the url
  109. * @return {number} length of url in bytes
  110. */
  111. static gint
  112. lua_url_get_length (lua_State *L)
  113. {
  114. LUA_TRACE_POINT;
  115. struct rspamd_lua_url *url = lua_check_url (L, 1);
  116. if (url != NULL) {
  117. lua_pushinteger (L, url->url->urllen);
  118. }
  119. else {
  120. lua_pushnil (L);
  121. }
  122. return 1;
  123. }
  124. /***
  125. * @method url:get_host()
  126. * Get domain part of the url
  127. * @return {string} domain part of URL
  128. */
  129. static gint
  130. lua_url_get_host (lua_State *L)
  131. {
  132. LUA_TRACE_POINT;
  133. struct rspamd_lua_url *url = lua_check_url (L, 1);
  134. if (url != NULL) {
  135. lua_pushlstring (L, url->url->host, url->url->hostlen);
  136. }
  137. else {
  138. lua_pushnil (L);
  139. }
  140. return 1;
  141. }
  142. /***
  143. * @method url:get_port()
  144. * Get port of the url
  145. * @return {number} url port
  146. */
  147. static gint
  148. lua_url_get_port (lua_State *L)
  149. {
  150. LUA_TRACE_POINT;
  151. struct rspamd_lua_url *url = lua_check_url (L, 1);
  152. if (url != NULL) {
  153. lua_pushinteger (L, url->url->port);
  154. }
  155. else {
  156. lua_pushnil (L);
  157. }
  158. return 1;
  159. }
  160. /***
  161. * @method url:get_user()
  162. * Get user part of the url (e.g. username in email)
  163. * @return {string} user part of URL
  164. */
  165. static gint
  166. lua_url_get_user (lua_State *L)
  167. {
  168. LUA_TRACE_POINT;
  169. struct rspamd_lua_url *url = lua_check_url (L, 1);
  170. if (url != NULL && url->url->user != NULL) {
  171. lua_pushlstring (L, url->url->user, url->url->userlen);
  172. }
  173. else {
  174. lua_pushnil (L);
  175. }
  176. return 1;
  177. }
  178. /***
  179. * @method url:get_path()
  180. * Get path of the url
  181. * @return {string} path part of URL
  182. */
  183. static gint
  184. lua_url_get_path (lua_State *L)
  185. {
  186. LUA_TRACE_POINT;
  187. struct rspamd_lua_url *url = lua_check_url (L, 1);
  188. if (url != NULL && url->url->datalen > 0) {
  189. lua_pushlstring (L, url->url->data, url->url->datalen);
  190. }
  191. else {
  192. lua_pushnil (L);
  193. }
  194. return 1;
  195. }
  196. /***
  197. * @method url:get_query()
  198. * Get query of the url
  199. * @return {string} query part of URL
  200. */
  201. static gint
  202. lua_url_get_query (lua_State *L)
  203. {
  204. LUA_TRACE_POINT;
  205. struct rspamd_lua_url *url = lua_check_url (L, 1);
  206. if (url != NULL && url->url->querylen > 0) {
  207. lua_pushlstring (L, url->url->query, url->url->querylen);
  208. }
  209. else {
  210. lua_pushnil (L);
  211. }
  212. return 1;
  213. }
  214. /***
  215. * @method url:get_fragment()
  216. * Get fragment of the url
  217. * @return {string} fragment part of URL
  218. */
  219. static gint
  220. lua_url_get_fragment (lua_State *L)
  221. {
  222. LUA_TRACE_POINT;
  223. struct rspamd_lua_url *url = lua_check_url (L, 1);
  224. if (url != NULL && url->url->fragmentlen > 0) {
  225. lua_pushlstring (L, url->url->fragment, url->url->fragmentlen);
  226. }
  227. else {
  228. lua_pushnil (L);
  229. }
  230. return 1;
  231. }
  232. /***
  233. * @method url:get_text()
  234. * Get full content of the url
  235. * @return {string} url string
  236. */
  237. static gint
  238. lua_url_get_text (lua_State *L)
  239. {
  240. LUA_TRACE_POINT;
  241. struct rspamd_lua_url *url = lua_check_url (L, 1);
  242. if (url != NULL) {
  243. lua_pushlstring (L, url->url->string, url->url->urllen);
  244. }
  245. else {
  246. lua_pushnil (L);
  247. }
  248. return 1;
  249. }
  250. /***
  251. * @method url:tostring()
  252. * Get full content of the url or user@domain in case of email
  253. * @return {string} url as a string
  254. */
  255. static gint
  256. lua_url_tostring (lua_State *L)
  257. {
  258. LUA_TRACE_POINT;
  259. struct rspamd_lua_url *url = lua_check_url (L, 1);
  260. if (url != NULL && url->url != NULL) {
  261. if (url->url->protocol == PROTOCOL_MAILTO) {
  262. if (url->url->userlen + 1 + url->url->hostlen >= url->url->urllen) {
  263. lua_pushlstring (L, url->url->user,
  264. url->url->userlen + 1 + url->url->hostlen);
  265. }
  266. else {
  267. lua_pushlstring (L, url->url->string, url->url->urllen);
  268. }
  269. }
  270. else {
  271. lua_pushlstring (L, url->url->string, url->url->urllen);
  272. }
  273. }
  274. else {
  275. lua_pushnil (L);
  276. }
  277. return 1;
  278. }
  279. /***
  280. * @method url:get_raw()
  281. * Get full content of the url as it was parsed (e.g. with urldecode)
  282. * @return {string} url string
  283. */
  284. static gint
  285. lua_url_get_raw (lua_State *L)
  286. {
  287. LUA_TRACE_POINT;
  288. struct rspamd_lua_url *url = lua_check_url (L, 1);
  289. if (url != NULL) {
  290. lua_pushlstring (L, url->url->raw, url->url->rawlen);
  291. }
  292. else {
  293. lua_pushnil (L);
  294. }
  295. return 1;
  296. }
  297. /***
  298. * @method url:is_phished()
  299. * Check whether URL is treated as phished
  300. * @return {boolean} `true` if URL is phished
  301. */
  302. static gint
  303. lua_url_is_phished (lua_State *L)
  304. {
  305. LUA_TRACE_POINT;
  306. struct rspamd_lua_url *url = lua_check_url (L, 1);
  307. if (url != NULL) {
  308. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_PHISHED);
  309. }
  310. else {
  311. lua_pushnil (L);
  312. }
  313. return 1;
  314. }
  315. /***
  316. * @method url:is_redirected()
  317. * Check whether URL was redirected
  318. * @return {boolean} `true` if URL is redirected
  319. */
  320. static gint
  321. lua_url_is_redirected (lua_State *L)
  322. {
  323. LUA_TRACE_POINT;
  324. struct rspamd_lua_url *url = lua_check_url (L, 1);
  325. if (url != NULL) {
  326. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_REDIRECTED);
  327. }
  328. else {
  329. lua_pushnil (L);
  330. }
  331. return 1;
  332. }
  333. /***
  334. * @method url:is_obscured()
  335. * Check whether URL is treated as obscured or obfusicated (e.g. numbers in IP address or other hacks)
  336. * @return {boolean} `true` if URL is obscured
  337. */
  338. static gint
  339. lua_url_is_obscured (lua_State *L)
  340. {
  341. LUA_TRACE_POINT;
  342. struct rspamd_lua_url *url = lua_check_url (L, 1);
  343. if (url != NULL) {
  344. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_OBSCURED);
  345. }
  346. else {
  347. lua_pushnil (L);
  348. }
  349. return 1;
  350. }
  351. /***
  352. * @method url:is_html_displayed()
  353. * Check whether URL is just displayed in HTML (e.g. NOT a real href)
  354. * @return {boolean} `true` if URL is displayed only
  355. */
  356. static gint
  357. lua_url_is_html_displayed (lua_State *L)
  358. {
  359. LUA_TRACE_POINT;
  360. struct rspamd_lua_url *url = lua_check_url (L, 1);
  361. if (url != NULL) {
  362. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED);
  363. }
  364. else {
  365. lua_pushnil (L);
  366. }
  367. return 1;
  368. }
  369. /***
  370. * @method url:is_subject()
  371. * Check whether URL is found in subject
  372. * @return {boolean} `true` if URL is found in subject
  373. */
  374. static gint
  375. lua_url_is_subject (lua_State *L)
  376. {
  377. LUA_TRACE_POINT;
  378. struct rspamd_lua_url *url = lua_check_url (L, 1);
  379. if (url != NULL) {
  380. lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_SUBJECT);
  381. }
  382. else {
  383. lua_pushnil (L);
  384. }
  385. return 1;
  386. }
  387. /***
  388. * @method url:get_tag(tag)
  389. * Returns list of string for a specific tagname for an url
  390. * @return {table/strings} list of tags for an url
  391. */
  392. static gint
  393. lua_url_get_tag (lua_State *L)
  394. {
  395. LUA_TRACE_POINT;
  396. struct rspamd_lua_url *url = lua_check_url (L, 1);
  397. guint i;
  398. const gchar *tag = luaL_checkstring (L, 2);
  399. struct rspamd_url_tag *tval, *cur;
  400. if (url != NULL && tag != NULL) {
  401. if (url->url->tags == NULL) {
  402. lua_createtable (L, 0, 0);
  403. }
  404. else {
  405. tval = g_hash_table_lookup (url->url->tags, tag);
  406. if (tval) {
  407. lua_newtable (L);
  408. i = 1;
  409. DL_FOREACH (tval, cur) {
  410. lua_pushstring (L, cur->data);
  411. lua_rawseti (L, -2, i ++);
  412. }
  413. lua_settable (L, -3);
  414. }
  415. else {
  416. lua_createtable (L, 0, 0);
  417. }
  418. }
  419. }
  420. else {
  421. lua_pushnil (L);
  422. }
  423. return 1;
  424. }
  425. /***
  426. * @method url:get_tags()
  427. * Returns list of string tags for an url
  428. * @return {table/strings} list of tags for an url
  429. */
  430. static gint
  431. lua_url_get_tags (lua_State *L)
  432. {
  433. LUA_TRACE_POINT;
  434. struct rspamd_lua_url *url = lua_check_url (L, 1);
  435. guint i;
  436. GHashTableIter it;
  437. struct rspamd_url_tag *tval, *cur;
  438. gpointer k, v;
  439. if (url != NULL) {
  440. if (url->url->tags == NULL) {
  441. lua_createtable (L, 0, 0);
  442. }
  443. else {
  444. lua_createtable (L, 0, g_hash_table_size (url->url->tags));
  445. g_hash_table_iter_init (&it, url->url->tags);
  446. while (g_hash_table_iter_next (&it, &k, &v)) {
  447. tval = v;
  448. lua_pushstring (L, (const gchar *)k);
  449. lua_newtable (L);
  450. i = 1;
  451. DL_FOREACH (tval, cur) {
  452. lua_pushstring (L, cur->data);
  453. lua_rawseti (L, -2, i ++);
  454. }
  455. lua_settable (L, -3);
  456. }
  457. }
  458. }
  459. else {
  460. lua_pushnil (L);
  461. }
  462. return 1;
  463. }
  464. /***
  465. * @method url:add_tag(tag, mempool)
  466. * Adds a new tag for url
  467. * @param {string} tag new tag to add
  468. * @param {mempool} mempool memory pool (e.g. `task:get_pool()`)
  469. */
  470. static gint
  471. lua_url_add_tag (lua_State *L)
  472. {
  473. LUA_TRACE_POINT;
  474. struct rspamd_lua_url *url = lua_check_url (L, 1);
  475. rspamd_mempool_t *mempool = rspamd_lua_check_mempool (L, 4);
  476. const gchar *tag = luaL_checkstring (L, 2);
  477. const gchar *value;
  478. if (lua_type (L, 3) == LUA_TSTRING) {
  479. value = lua_tostring (L, 3);
  480. }
  481. else {
  482. value = "1"; /* Some stupid placeholder */
  483. }
  484. if (url != NULL && mempool != NULL && tag != NULL) {
  485. rspamd_url_add_tag (url->url, tag, value, mempool);
  486. }
  487. else {
  488. return luaL_error (L, "invalid arguments");
  489. }
  490. return 0;
  491. }
  492. /***
  493. * @method url:get_phished()
  494. * Get another URL that pretends to be this URL (e.g. used in phishing)
  495. * @return {url} phished URL
  496. */
  497. static gint
  498. lua_url_get_phished (lua_State *L)
  499. {
  500. LUA_TRACE_POINT;
  501. struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
  502. if (url) {
  503. if (url->url->phished_url != NULL) {
  504. if (url->url->flags &
  505. (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
  506. purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  507. rspamd_lua_setclass (L, "rspamd{url}", -1);
  508. purl->url = url->url->phished_url;
  509. return 1;
  510. }
  511. }
  512. }
  513. lua_pushnil (L);
  514. return 1;
  515. }
  516. /***
  517. * @method url:get_tld()
  518. * Get effective second level domain part (eSLD) of the url host
  519. * @return {string} effective second level domain part (eSLD) of the url host
  520. */
  521. static gint
  522. lua_url_get_tld (lua_State *L)
  523. {
  524. LUA_TRACE_POINT;
  525. struct rspamd_lua_url *url = lua_check_url (L, 1);
  526. if (url != NULL && url->url->tldlen > 0) {
  527. lua_pushlstring (L, url->url->tld, url->url->tldlen);
  528. }
  529. else {
  530. lua_pushnil (L);
  531. }
  532. return 1;
  533. }
  534. /***
  535. * @method url:get_protocol()
  536. * Get protocol name
  537. * @return {string} protocol as a string
  538. */
  539. static gint
  540. lua_url_get_protocol (lua_State *L)
  541. {
  542. LUA_TRACE_POINT;
  543. struct rspamd_lua_url *url = lua_check_url (L, 1);
  544. if (url != NULL && url->url->protocol != PROTOCOL_UNKNOWN) {
  545. lua_pushstring (L, rspamd_url_protocol_name (url->url->protocol));
  546. }
  547. else {
  548. lua_pushnil (L);
  549. }
  550. return 1;
  551. }
  552. /***
  553. * @method url:get_count()
  554. * Return number of occurrencies for this particular URL
  555. * @return {number} number of occurrencies
  556. */
  557. static gint
  558. lua_url_get_count (lua_State *L)
  559. {
  560. LUA_TRACE_POINT;
  561. struct rspamd_lua_url *url = lua_check_url (L, 1);
  562. if (url != NULL && url->url != NULL) {
  563. lua_pushinteger (L, url->url->count);
  564. }
  565. else {
  566. lua_pushnil (L);
  567. }
  568. return 1;
  569. }
  570. /***
  571. * @method url:to_table()
  572. * Return url as a table with the following fields:
  573. *
  574. * - `url`: full content
  575. * - `host`: hostname part
  576. * - `user`: user part
  577. * - `path`: path part
  578. * - `tld`: top level domain
  579. * - `protocol`: url protocol
  580. * @return {table} URL as a table
  581. */
  582. static gint
  583. lua_url_to_table (lua_State *L)
  584. {
  585. LUA_TRACE_POINT;
  586. struct rspamd_lua_url *url = lua_check_url (L, 1);
  587. struct rspamd_url *u;
  588. if (url != NULL) {
  589. u = url->url;
  590. lua_createtable (L, 0, 12);
  591. lua_pushstring (L, "url");
  592. lua_pushlstring (L, u->string, u->urllen);
  593. lua_settable (L, -3);
  594. if (u->hostlen > 0) {
  595. lua_pushstring (L, "host");
  596. lua_pushlstring (L, u->host, u->hostlen);
  597. lua_settable (L, -3);
  598. }
  599. if (u->port != 0) {
  600. lua_pushstring (L, "port");
  601. lua_pushinteger (L, u->port);
  602. lua_settable (L, -3);
  603. }
  604. if (u->tldlen > 0) {
  605. lua_pushstring (L, "tld");
  606. lua_pushlstring (L, u->tld, u->tldlen);
  607. lua_settable (L, -3);
  608. }
  609. if (u->userlen > 0) {
  610. lua_pushstring (L, "user");
  611. lua_pushlstring (L, u->user, u->userlen);
  612. lua_settable (L, -3);
  613. }
  614. if (u->datalen > 0) {
  615. lua_pushstring (L, "path");
  616. lua_pushlstring (L, u->data, u->datalen);
  617. lua_settable (L, -3);
  618. }
  619. if (u->querylen > 0) {
  620. lua_pushstring (L, "query");
  621. lua_pushlstring (L, u->query, u->querylen);
  622. lua_settable (L, -3);
  623. }
  624. if (u->fragmentlen > 0) {
  625. lua_pushstring (L, "fragment");
  626. lua_pushlstring (L, u->fragment, u->fragmentlen);
  627. lua_settable (L, -3);
  628. }
  629. lua_pushstring (L, "protocol");
  630. lua_pushstring (L, rspamd_url_protocol_name (u->protocol));
  631. lua_settable (L, -3);
  632. }
  633. else {
  634. lua_pushnil (L);
  635. }
  636. return 1;
  637. }
  638. static void
  639. lua_url_single_inserter (struct rspamd_url *url, gsize start_offset,
  640. gsize end_offset, gpointer ud)
  641. {
  642. lua_State *L = ud;
  643. struct rspamd_lua_url *lua_url;
  644. lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  645. rspamd_lua_setclass (L, "rspamd{url}", -1);
  646. lua_url->url = url;
  647. }
  648. /***
  649. * @function url.create([mempool,] str)
  650. * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()`
  651. * @param {string} text that contains URL (can also contain other stuff)
  652. * @return {url} new url object that exists as long as the corresponding mempool exists
  653. */
  654. static gint
  655. lua_url_create (lua_State *L)
  656. {
  657. LUA_TRACE_POINT;
  658. rspamd_mempool_t *pool;
  659. const gchar *text;
  660. size_t length;
  661. gboolean own_pool = FALSE;
  662. if (lua_type (L, 1) == LUA_TUSERDATA) {
  663. pool = rspamd_lua_check_mempool (L, 1);
  664. text = luaL_checklstring (L, 2, &length);
  665. }
  666. else {
  667. own_pool = TRUE;
  668. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "url");
  669. text = luaL_checklstring (L, 1, &length);
  670. }
  671. if (pool == NULL || text == NULL) {
  672. if (own_pool && pool) {
  673. rspamd_mempool_delete (pool);
  674. }
  675. return luaL_error (L, "invalid arguments");
  676. }
  677. else {
  678. rspamd_url_find_single (pool, text, length, FALSE,
  679. lua_url_single_inserter, L);
  680. if (lua_type (L, -1) != LUA_TUSERDATA) {
  681. /* URL is actually not found */
  682. lua_pushnil (L);
  683. }
  684. }
  685. if (own_pool && pool) {
  686. rspamd_mempool_delete (pool);
  687. }
  688. return 1;
  689. }
  690. /***
  691. * @function url.create(tld_file)
  692. * Initialize url library if not initialized yet by Rspamd
  693. * @param {string} tld_file for url library
  694. * @return nothing
  695. */
  696. static gint
  697. lua_url_init (lua_State *L)
  698. {
  699. const gchar *tld_path;
  700. tld_path = luaL_checkstring (L, 1);
  701. rspamd_url_init (tld_path);
  702. return 0;
  703. }
  704. static void
  705. lua_url_table_inserter (struct rspamd_url *url, gsize start_offset,
  706. gsize end_offset, gpointer ud)
  707. {
  708. lua_State *L = ud;
  709. struct rspamd_lua_url *lua_url;
  710. gint n;
  711. n = rspamd_lua_table_size (L, -1);
  712. lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
  713. rspamd_lua_setclass (L, "rspamd{url}", -1);
  714. lua_url->url = url;
  715. lua_pushinteger (L, n + 1);
  716. lua_pushlstring (L, url->string, url->urllen);
  717. lua_settable (L, -3);
  718. }
  719. static gint
  720. lua_url_all (lua_State *L)
  721. {
  722. LUA_TRACE_POINT;
  723. rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 1);
  724. const gchar *text;
  725. size_t length;
  726. if (pool == NULL) {
  727. lua_pushnil (L);
  728. }
  729. else {
  730. text = luaL_checklstring (L, 2, &length);
  731. if (text != NULL) {
  732. lua_newtable (L);
  733. rspamd_url_find_multiple (pool, text, length, FALSE, NULL,
  734. lua_url_table_inserter, L);
  735. }
  736. else {
  737. lua_pushnil (L);
  738. }
  739. }
  740. return 1;
  741. }
  742. /***
  743. * @method url:get_flags()
  744. * Return flags for a specified URL as map 'flag'->true for all flags set,
  745. * possible flags are:
  746. *
  747. * - `phished`: URL is likely phished
  748. * - `numeric`: URL is numeric (e.g. IP address)
  749. * - `obscured`: URL was obscured
  750. * - `redirected`: URL comes from redirector
  751. * - `html_displayed`: URL is used just for displaying purposes
  752. * - `text`: URL comes from the text
  753. * - `subject`: URL comes from the subject
  754. * - `host_encoded`: URL host part is encoded
  755. * - `schema_encoded`: URL schema part is encoded
  756. * - `query_encoded`: URL query part is encoded
  757. * - `missing_slahes`: URL has some slashes missing
  758. * - `idn`: URL has international characters
  759. * - `has_port`: URL has port
  760. * - `has_user`: URL has user part
  761. * - `schemaless`: URL has no schema
  762. * - `unnormalised`: URL has some unicode unnormalities
  763. * - `zw_spaces`: URL has some zero width spaces
  764. * @return {table} URL flags
  765. */
  766. #define PUSH_FLAG(fl, name) do { \
  767. if (flags & (fl)) { \
  768. lua_pushstring (L, (name)); \
  769. lua_pushboolean (L, true); \
  770. lua_settable (L, -3); \
  771. } \
  772. } while (0)
  773. static gint
  774. lua_url_get_flags (lua_State *L)
  775. {
  776. LUA_TRACE_POINT;
  777. struct rspamd_lua_url *url = lua_check_url (L, 1);
  778. enum rspamd_url_flags flags;
  779. if (url != NULL) {
  780. flags = url->url->flags;
  781. lua_createtable (L, 0, 4);
  782. PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
  783. PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
  784. PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
  785. PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
  786. PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
  787. PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
  788. PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
  789. PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
  790. PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
  791. PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
  792. PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
  793. PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
  794. PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
  795. PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
  796. PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
  797. PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
  798. PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
  799. PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
  800. }
  801. else {
  802. return luaL_error (L, "invalid arguments");
  803. }
  804. return 1;
  805. }
  806. #undef PUSH_FLAG
  807. static gint
  808. lua_load_url (lua_State * L)
  809. {
  810. lua_newtable (L);
  811. luaL_register (L, NULL, urllib_f);
  812. return 1;
  813. }
  814. void
  815. luaopen_url (lua_State * L)
  816. {
  817. rspamd_lua_new_class (L, "rspamd{url}", urllib_m);
  818. lua_pop (L, 1);
  819. rspamd_lua_add_preload (L, "rspamd_url", lua_load_url);
  820. }