|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "lua_common.h"
- #include "contrib/uthash/utlist.h"
-
- /***
- * @module rspamd_url
- * This module provides routines to handle URL's and extract URL's from the text.
- * Objects of this class are returned, for example, by `task:get_urls()` or `task:get_emails()`.
- * You can also create `rspamd_url` from any text.
- * @example
- local url = require "rspamd_url"
- local mpool = require "rspamd_mempool"
-
- url.init("/usr/share/rspamd/effective_tld_names.dat")
- local pool = mpool.create()
- local res = url.create(pool, 'Look at: http://user@test.example.com/test?query")
- local t = res:to_table()
- -- Content of t:
- -- url = ['http://test.example.com/test?query']
- -- host = ['test.example.com']
- -- user = ['user']
- -- path = ['test']
- -- tld = ['example.com']
-
- pool:destroy() -- res is destroyed here, so you should not use it afterwards
-
- local mistake = res:to_table() -- INVALID! as pool is destroyed
- */
-
- /* URL methods */
- LUA_FUNCTION_DEF (url, get_length);
- LUA_FUNCTION_DEF (url, get_host);
- LUA_FUNCTION_DEF (url, get_port);
- LUA_FUNCTION_DEF (url, get_user);
- LUA_FUNCTION_DEF (url, get_path);
- LUA_FUNCTION_DEF (url, get_query);
- LUA_FUNCTION_DEF (url, get_fragment);
- LUA_FUNCTION_DEF (url, get_text);
- LUA_FUNCTION_DEF (url, tostring);
- LUA_FUNCTION_DEF (url, get_raw);
- LUA_FUNCTION_DEF (url, get_tld);
- LUA_FUNCTION_DEF (url, get_flags);
- LUA_FUNCTION_DEF (url, get_protocol);
- LUA_FUNCTION_DEF (url, to_table);
- LUA_FUNCTION_DEF (url, is_phished);
- LUA_FUNCTION_DEF (url, is_redirected);
- LUA_FUNCTION_DEF (url, is_obscured);
- LUA_FUNCTION_DEF (url, is_html_displayed);
- LUA_FUNCTION_DEF (url, is_subject);
- LUA_FUNCTION_DEF (url, get_phished);
- LUA_FUNCTION_DEF (url, set_redirected);
- LUA_FUNCTION_DEF (url, get_count);
- LUA_FUNCTION_DEF (url, get_visible);
- LUA_FUNCTION_DEF (url, create);
- LUA_FUNCTION_DEF (url, init);
- LUA_FUNCTION_DEF (url, all);
-
- static const struct luaL_reg urllib_m[] = {
- LUA_INTERFACE_DEF (url, get_length),
- LUA_INTERFACE_DEF (url, get_host),
- LUA_INTERFACE_DEF (url, get_port),
- LUA_INTERFACE_DEF (url, get_user),
- LUA_INTERFACE_DEF (url, get_path),
- LUA_INTERFACE_DEF (url, get_query),
- LUA_INTERFACE_DEF (url, get_fragment),
- LUA_INTERFACE_DEF (url, get_text),
- LUA_INTERFACE_DEF (url, get_tld),
- LUA_INTERFACE_DEF (url, get_raw),
- LUA_INTERFACE_DEF (url, get_protocol),
- LUA_INTERFACE_DEF (url, to_table),
- LUA_INTERFACE_DEF (url, is_phished),
- LUA_INTERFACE_DEF (url, is_redirected),
- LUA_INTERFACE_DEF (url, is_obscured),
- LUA_INTERFACE_DEF (url, is_html_displayed),
- LUA_INTERFACE_DEF (url, is_subject),
- LUA_INTERFACE_DEF (url, get_phished),
-
- LUA_INTERFACE_DEF (url, get_visible),
- LUA_INTERFACE_DEF (url, get_count),
- LUA_INTERFACE_DEF (url, get_flags),
- {"get_redirected", lua_url_get_phished},
- LUA_INTERFACE_DEF (url, set_redirected),
- {"__tostring", lua_url_tostring},
- {NULL, NULL}
- };
-
- static const struct luaL_reg urllib_f[] = {
- LUA_INTERFACE_DEF (url, init),
- LUA_INTERFACE_DEF (url, create),
- LUA_INTERFACE_DEF (url, all),
- {NULL, NULL}
- };
-
- struct rspamd_lua_url *
- lua_check_url (lua_State * L, gint pos)
- {
- void *ud = rspamd_lua_check_udata (L, pos, "rspamd{url}");
- luaL_argcheck (L, ud != NULL, pos, "'url' expected");
- return ud ? ((struct rspamd_lua_url *)ud) : NULL;
- }
-
- static gboolean
- lua_url_single_inserter (struct rspamd_url *url, gsize start_offset,
- gsize end_offset, gpointer ud)
- {
- lua_State *L = ud;
- struct rspamd_lua_url *lua_url;
-
- lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
- rspamd_lua_setclass (L, "rspamd{url}", -1);
- lua_url->url = url;
-
- return TRUE;
- }
-
- /***
- * @method url:get_length()
- * Get length of the url
- * @return {number} length of url in bytes
- */
- static gint
- lua_url_get_length (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushinteger (L, url->url->urllen);
- }
- else {
- lua_pushnil (L);
- }
- return 1;
- }
-
- /***
- * @method url:get_host()
- * Get domain part of the url
- * @return {string} domain part of URL
- */
- static gint
- lua_url_get_host (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushlstring (L, url->url->host, url->url->hostlen);
- }
- else {
- lua_pushnil (L);
- }
- return 1;
- }
-
- /***
- * @method url:get_port()
- * Get port of the url
- * @return {number} url port
- */
- static gint
- lua_url_get_port (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushinteger (L, url->url->port);
- }
- else {
- lua_pushnil (L);
- }
- return 1;
- }
-
- /***
- * @method url:get_user()
- * Get user part of the url (e.g. username in email)
- * @return {string} user part of URL
- */
- static gint
- lua_url_get_user (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->user != NULL) {
- lua_pushlstring (L, url->url->user, url->url->userlen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_path()
- * Get path of the url
- * @return {string} path part of URL
- */
- static gint
- lua_url_get_path (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->datalen > 0) {
- lua_pushlstring (L, url->url->data, url->url->datalen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_query()
- * Get query of the url
- * @return {string} query part of URL
- */
- static gint
- lua_url_get_query (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->querylen > 0) {
- lua_pushlstring (L, url->url->query, url->url->querylen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_fragment()
- * Get fragment of the url
- * @return {string} fragment part of URL
- */
- static gint
- lua_url_get_fragment (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->fragmentlen > 0) {
- lua_pushlstring (L, url->url->fragment, url->url->fragmentlen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_text()
- * Get full content of the url
- * @return {string} url string
- */
- static gint
- lua_url_get_text (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushlstring (L, url->url->string, url->url->urllen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:tostring()
- * Get full content of the url or user@domain in case of email
- * @return {string} url as a string
- */
- static gint
- lua_url_tostring (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url != NULL) {
- if (url->url->protocol == PROTOCOL_MAILTO) {
- gchar *tmp = g_malloc (url->url->userlen + 1 +
- url->url->hostlen);
- memcpy (tmp, url->url->user, url->url->userlen);
- tmp[url->url->userlen] = '@';
- memcpy (tmp + url->url->userlen + 1, url->url->host,
- url->url->hostlen);
-
- lua_pushlstring (L, tmp, url->url->userlen + 1 + url->url->hostlen);
- g_free (tmp);
- }
- else {
- lua_pushlstring (L, url->url->string, url->url->urllen);
- }
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_raw()
- * Get full content of the url as it was parsed (e.g. with urldecode)
- * @return {string} url string
- */
- static gint
- lua_url_get_raw (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushlstring (L, url->url->raw, url->url->rawlen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:is_phished()
- * Check whether URL is treated as phished
- * @return {boolean} `true` if URL is phished
- */
- static gint
- lua_url_is_phished (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_PHISHED);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:is_redirected()
- * Check whether URL was redirected
- * @return {boolean} `true` if URL is redirected
- */
- static gint
- lua_url_is_redirected (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_REDIRECTED);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:is_obscured()
- * Check whether URL is treated as obscured or obfusicated (e.g. numbers in IP address or other hacks)
- * @return {boolean} `true` if URL is obscured
- */
- static gint
- lua_url_is_obscured (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_OBSCURED);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
-
- /***
- * @method url:is_html_displayed()
- * Check whether URL is just displayed in HTML (e.g. NOT a real href)
- * @return {boolean} `true` if URL is displayed only
- */
- static gint
- lua_url_is_html_displayed (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_HTML_DISPLAYED);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:is_subject()
- * Check whether URL is found in subject
- * @return {boolean} `true` if URL is found in subject
- */
- static gint
- lua_url_is_subject (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL) {
- lua_pushboolean (L, url->url->flags & RSPAMD_URL_FLAG_SUBJECT);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_phished()
- * Get another URL that pretends to be this URL (e.g. used in phishing)
- * @return {url} phished URL
- */
- static gint
- lua_url_get_phished (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
-
- if (url) {
- if (url->url->phished_url != NULL) {
- if (url->url->flags &
- (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
- purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
- rspamd_lua_setclass (L, "rspamd{url}", -1);
- purl->url = url->url->phished_url;
-
- return 1;
- }
- }
- }
-
- lua_pushnil (L);
- return 1;
- }
-
- /***
- * @method url:set_redirected(url,[ pool])
- * Set url as redirected to another url
- * @param {string|url} url new url that is redirecting an old one
- * @param {pool} pool if url is a string this is required for parsing
- * @return {url} parsed redirected url (if needed)
- */
- static gint
- lua_url_set_redirected (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1), *redir;
- rspamd_mempool_t *pool = NULL;
-
- if (url == NULL) {
- return luaL_error (L, "url is required as the first argument");
- }
-
- if (lua_type (L, 2) == LUA_TSTRING) {
- /* Parse url */
- if (lua_type (L, 3) != LUA_TUSERDATA) {
- return luaL_error (L, "mempool is required as the third argument");
- }
-
- pool = rspamd_lua_check_mempool (L, 3);
-
- if (pool == NULL) {
- return luaL_error (L, "mempool is required as the third argument");
- }
-
- gsize len;
- const gchar *urlstr = lua_tolstring (L, 2, &len);
-
- rspamd_url_find_single (pool, urlstr, len, RSPAMD_URL_FIND_ALL,
- lua_url_single_inserter, L);
-
- if (lua_type (L, -1) != LUA_TUSERDATA) {
- /* URL is actually not found */
- lua_pushnil (L);
- }
- else {
- redir = lua_check_url (L, -1);
-
- url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
- url->url->phished_url = redir->url;
- }
- }
- else {
- redir = lua_check_url (L, 2);
-
- if (redir == NULL) {
- return luaL_error (L, "url is required as the second argument");
- }
-
- url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
- url->url->phished_url = redir->url;
-
- /* Push back on stack */
- lua_pushvalue (L, 2);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_tld()
- * Get effective second level domain part (eSLD) of the url host
- * @return {string} effective second level domain part (eSLD) of the url host
- */
- static gint
- lua_url_get_tld (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->tldlen > 0) {
- lua_pushlstring (L, url->url->tld, url->url->tldlen);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_protocol()
- * Get protocol name
- * @return {string} protocol as a string
- */
- static gint
- lua_url_get_protocol (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->protocol != PROTOCOL_UNKNOWN) {
- lua_pushstring (L, rspamd_url_protocol_name (url->url->protocol));
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_count()
- * Return number of occurrencies for this particular URL
- * @return {number} number of occurrencies
- */
- static gint
- lua_url_get_count (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url != NULL) {
- lua_pushinteger (L, url->url->count);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:get_visible()
- * Get visible part of the url with html tags stripped
- * @return {string} url string
- */
- static gint
- lua_url_get_visible (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
-
- if (url != NULL && url->url->visible_part) {
- lua_pushstring (L, url->url->visible_part);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
- /***
- * @method url:to_table()
- * Return url as a table with the following fields:
- *
- * - `url`: full content
- * - `host`: hostname part
- * - `user`: user part
- * - `path`: path part
- * - `tld`: top level domain
- * - `protocol`: url protocol
- * @return {table} URL as a table
- */
- static gint
- lua_url_to_table (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
- struct rspamd_url *u;
-
- if (url != NULL) {
- u = url->url;
- lua_createtable (L, 0, 12);
- lua_pushstring (L, "url");
- lua_pushlstring (L, u->string, u->urllen);
- lua_settable (L, -3);
-
- if (u->hostlen > 0) {
- lua_pushstring (L, "host");
- lua_pushlstring (L, u->host, u->hostlen);
- lua_settable (L, -3);
- }
-
- if (u->port != 0) {
- lua_pushstring (L, "port");
- lua_pushinteger (L, u->port);
- lua_settable (L, -3);
- }
-
- if (u->tldlen > 0) {
- lua_pushstring (L, "tld");
- lua_pushlstring (L, u->tld, u->tldlen);
- lua_settable (L, -3);
- }
-
- if (u->userlen > 0) {
- lua_pushstring (L, "user");
- lua_pushlstring (L, u->user, u->userlen);
- lua_settable (L, -3);
- }
-
- if (u->datalen > 0) {
- lua_pushstring (L, "path");
- lua_pushlstring (L, u->data, u->datalen);
- lua_settable (L, -3);
- }
-
- if (u->querylen > 0) {
- lua_pushstring (L, "query");
- lua_pushlstring (L, u->query, u->querylen);
- lua_settable (L, -3);
- }
-
- if (u->fragmentlen > 0) {
- lua_pushstring (L, "fragment");
- lua_pushlstring (L, u->fragment, u->fragmentlen);
- lua_settable (L, -3);
- }
-
-
- lua_pushstring (L, "protocol");
- lua_pushstring (L, rspamd_url_protocol_name (u->protocol));
- lua_settable (L, -3);
- }
- else {
- lua_pushnil (L);
- }
-
- return 1;
- }
-
-
- /***
- * @function url.create([mempool,] str)
- * @param {rspamd_mempool} memory pool for URL, e.g. `task:get_mempool()`
- * @param {string} text that contains URL (can also contain other stuff)
- * @return {url} new url object that exists as long as the corresponding mempool exists
- */
- static gint
- lua_url_create (lua_State *L)
- {
- LUA_TRACE_POINT;
- rspamd_mempool_t *pool;
- const gchar *text;
- size_t length;
- gboolean own_pool = FALSE;
-
- if (lua_type (L, 1) == LUA_TUSERDATA) {
- pool = rspamd_lua_check_mempool (L, 1);
- text = luaL_checklstring (L, 2, &length);
- }
- else {
- own_pool = TRUE;
- pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "url");
- text = luaL_checklstring (L, 1, &length);
- }
-
- if (pool == NULL || text == NULL) {
- if (own_pool && pool) {
- rspamd_mempool_delete (pool);
- }
-
- return luaL_error (L, "invalid arguments");
- }
- else {
- rspamd_url_find_single (pool, text, length, RSPAMD_URL_FIND_ALL,
- lua_url_single_inserter, L);
-
- if (lua_type (L, -1) != LUA_TUSERDATA) {
- /* URL is actually not found */
- lua_pushnil (L);
- }
- }
-
- if (own_pool && pool) {
- rspamd_mempool_delete (pool);
- }
-
- return 1;
- }
-
- /***
- * @function url.init(tld_file)
- * Initialize url library if not initialized yet by Rspamd
- * @param {string} tld_file path to effective_tld_names.dat file (public suffix list)
- * @return nothing
- */
- static gint
- lua_url_init (lua_State *L)
- {
- const gchar *tld_path;
-
- tld_path = luaL_checkstring (L, 1);
-
- rspamd_url_init (tld_path);
-
- return 0;
- }
-
- static gboolean
- lua_url_table_inserter (struct rspamd_url *url, gsize start_offset,
- gsize end_offset, gpointer ud)
- {
- lua_State *L = ud;
- struct rspamd_lua_url *lua_url;
- gint n;
-
- n = rspamd_lua_table_size (L, -1);
- lua_url = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
- rspamd_lua_setclass (L, "rspamd{url}", -1);
- lua_url->url = url;
- lua_pushinteger (L, n + 1);
- lua_pushlstring (L, url->string, url->urllen);
- lua_settable (L, -3);
-
- return TRUE;
- }
-
-
- static gint
- lua_url_all (lua_State *L)
- {
- LUA_TRACE_POINT;
- rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 1);
- const gchar *text;
- size_t length;
-
- if (pool == NULL) {
- lua_pushnil (L);
- }
- else {
- text = luaL_checklstring (L, 2, &length);
-
- if (text != NULL) {
- lua_newtable (L);
- rspamd_url_find_multiple (pool, text, length,
- RSPAMD_URL_FIND_ALL, NULL,
- lua_url_table_inserter, L);
-
- }
- else {
- lua_pushnil (L);
- }
- }
-
- return 1;
- }
-
- /***
- * @method url:get_flags()
- * Return flags for a specified URL as map 'flag'->true for all flags set,
- * possible flags are:
- *
- * - `phished`: URL is likely phished
- * - `numeric`: URL is numeric (e.g. IP address)
- * - `obscured`: URL was obscured
- * - `redirected`: URL comes from redirector
- * - `html_displayed`: URL is used just for displaying purposes
- * - `text`: URL comes from the text
- * - `subject`: URL comes from the subject
- * - `host_encoded`: URL host part is encoded
- * - `schema_encoded`: URL schema part is encoded
- * - `query_encoded`: URL query part is encoded
- * - `missing_slahes`: URL has some slashes missing
- * - `idn`: URL has international characters
- * - `has_port`: URL has port
- * - `has_user`: URL has user part
- * - `schemaless`: URL has no schema
- * - `unnormalised`: URL has some unicode unnormalities
- * - `zw_spaces`: URL has some zero width spaces
- * - `url_displayed`: URL has some other url-like string in visible part
- * - `image`: URL is from src attribute of img HTML tag
- * @return {table} URL flags
- */
- #define PUSH_FLAG(fl, name) do { \
- if (flags & (fl)) { \
- lua_pushstring (L, (name)); \
- lua_pushboolean (L, true); \
- lua_settable (L, -3); \
- } \
- } while (0)
-
- static gint
- lua_url_get_flags (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_url *url = lua_check_url (L, 1);
- enum rspamd_url_flags flags;
-
- if (url != NULL) {
- flags = url->url->flags;
-
- lua_createtable (L, 0, 4);
-
- PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
- PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
- PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
- PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
- PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
- PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
- PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
- PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
- PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
- PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
- PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
- PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
- PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
- PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
- PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
- PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- #undef PUSH_FLAG
-
- static gint
- lua_load_url (lua_State * L)
- {
- lua_newtable (L);
- luaL_register (L, NULL, urllib_f);
-
- return 1;
- }
-
- void
- luaopen_url (lua_State * L)
- {
- rspamd_lua_new_class (L, "rspamd{url}", urllib_m);
- lua_pop (L, 1);
-
- rspamd_lua_add_preload (L, "rspamd_url", lua_load_url);
- }
|