/* * Copyright 2024 Vsevolod Stakhov * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "lua_common.h" #include "libcryptobox/cryptobox.h" #include "contrib/fastutf8/fastutf8.h" #include "unix-std.h" /*** * @module rspamd_text * This module provides access to opaque text structures used widely to prevent * copying between Lua and C for various concerns: performance, security etc... * * You can convert rspamd_text into string but it will copy data. */ /*** * @function rspamd_text.fromstring(str) * Creates rspamd_text from Lua string (copied to the text) * @param {string} str string to use * @return {rspamd_text} resulting text */ LUA_FUNCTION_DEF(text, fromstring); /*** * @function rspamd_text.null() * Creates rspamd_text with NULL pointer for testing purposes * @param {string} str string to use * @return {rspamd_text} resulting text */ LUA_FUNCTION_DEF(text, null); /*** * @function rspamd_text.randombytes(nbytes) * Creates rspamd_text with random bytes inside (raw bytes) * @param {number} nbytes number of random bytes generated * @return {rspamd_text} random bytes text */ LUA_FUNCTION_DEF(text, randombytes); /*** * @function rspamd_text.fromtable(tbl[, delim]) * Same as `table.concat` but generates rspamd_text instead of the Lua string * @param {table} tbl table to use * @param {string} delim optional delimiter * @return {rspamd_text} resulting text */ LUA_FUNCTION_DEF(text, fromtable); /*** * @method rspamd_text:byte(pos[, pos2]) * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified * @param {integer} pos index * @param {integer} pos2 index * @return {integer} byte at the position `pos` or varargs of bytes */ LUA_FUNCTION_DEF(text, byte); /*** * @method rspamd_text:len() * Returns length of a string * @return {number} length of string in **bytes** */ LUA_FUNCTION_DEF(text, len); /*** * @method rspamd_text:str() * Converts text to string by copying its content * @return {string} copy of text as Lua string */ LUA_FUNCTION_DEF(text, str); /*** * @method rspamd_text:ptr() * Converts text to lightuserdata * @return {lightuserdata} pointer value of rspamd_text */ LUA_FUNCTION_DEF(text, ptr); /*** * @method rspamd_text:save_in_file(fname[, mode]) * Saves text in file * @return {boolean} true if save has been completed */ LUA_FUNCTION_DEF(text, save_in_file); /*** * @method rspamd_text:span(start[, len]) * Returns a span for lua_text starting at pos [start] (1 indexed) and with * length `len` (or to the end of the text) * @param {integer} start start index * @param {integer} len length of span * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...) */ LUA_FUNCTION_DEF(text, span); /*** * @method rspamd_text:sub(start[, len]) * Returns a substring for lua_text similar to string.sub from Lua * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...) */ LUA_FUNCTION_DEF(text, sub); /*** * @method rspamd_text:lines([stringify]) * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true * @param {boolean} stringify stringify lines * @return {iterator} iterator triplet */ LUA_FUNCTION_DEF(text, lines); /*** * @method rspamd_text:split(regexp, [stringify]) * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting * @param {boolean} stringify stringify lines * @return {iterator} iterator triplet */ LUA_FUNCTION_DEF(text, split); /*** * @method rspamd_text:at(pos) * Returns a byte at the position `pos` * @param {integer} pos index * @return {integer} byte at the position `pos` or nil if pos out of bound */ LUA_FUNCTION_DEF(text, at); /*** * @method rspamd_text:memchr(chr, [reverse]) * Returns the first or the last position of the character `chr` in the text or * -1 in case if a character has not been found. Indexes start from `1` * @param {string/number} chr character or a character code to find * @param {boolean} reverse last character if `true` * @return {integer} position of the character or `-1` */ LUA_FUNCTION_DEF(text, memchr); /*** * @method rspamd_text:bytes() * Converts text to an array of bytes * @return {table|integer} bytes in the array (as unsigned char) */ LUA_FUNCTION_DEF(text, bytes); /*** * @method rspamd_text:lower([is_utf, [inplace]]) * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase * @param {boolean} is_utf apply utf8 lowercase * @param {boolean} inplace lowercase the original text * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters */ LUA_FUNCTION_DEF(text, lower); LUA_FUNCTION_DEF(text, take_ownership); /*** * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy]) * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned) * where all chars from `set_to_exclude` are removed * Patterns supported: * * - %s - all space characters * - %n - all newline characters * - %c - all control characters (it includes 8bit characters and spaces) * - %8 - all 8 bit characters * - %% - just a percent character * * @param {string} set_to_exclude characters to exclude * @param {boolean} always_copy always copy the source text * @return {rspamd_text} modified or copied text */ LUA_FUNCTION_DEF(text, exclude_chars); /*** * @method rspamd_text:oneline([always_copy]) * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned) * where the following transformations are made: * - All spaces sequences are replaced with a single space * - All newlines sequences are replaced with a single space * - Trailing and leading spaces are removed * - Control characters are excluded * - UTF8 sequences are normalised * * @param {boolean} always_copy always copy the source text * @return {rspamd_text} modified or copied text */ LUA_FUNCTION_DEF(text, oneline); /*** * @method rspamd_text:base32([b32type]) * Returns a text encoded in base32 (new rspamd_text is allocated) * * @param {string} b32type base32 type (default, bleach, rfc) * @return {rspamd_text} new text encoded in base32 */ LUA_FUNCTION_DEF(text, base32); /*** * @method rspamd_text:base64([line_length, [nline, [fold]]]) * Returns a text encoded in base64 (new rspamd_text is allocated) * * @param {number} line_length return text split with newlines up to this attribute * @param {string} nline newline type: `cr`, `lf`, `crlf` * @param {boolean} fold use folding when splitting into lines (false by default) * @return {rspamd_text} new text encoded in base64 */ LUA_FUNCTION_DEF(text, base64); /*** * @method rspamd_text:hex() * Returns a text encoded in hex (new rspamd_text is allocated) * * @return {rspamd_text} new text encoded in hex */ LUA_FUNCTION_DEF(text, hex); /*** * @method rspamd_text:find(pattern [, init]) * Looks for the first match of pattern in the string s. * If it finds a match, then find returns the indices of s where this occurrence * starts and ends; otherwise, it returns nil. A third, * optional numerical argument init specifies where to start the search; * its default value is 1 and can be negative. * This method currently supports merely a plain search, no patterns. * * @param {string} pattern pattern to find * @param {number} init specifies where to start the search (1 default) * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil */ LUA_FUNCTION_DEF(text, find); LUA_FUNCTION_DEF(text, gc); LUA_FUNCTION_DEF(text, eq); LUA_FUNCTION_DEF(text, lt); LUA_FUNCTION_DEF(text, concat); LUA_FUNCTION_DEF(text, strtoul); static const struct luaL_reg textlib_f[] = { LUA_INTERFACE_DEF(text, fromstring), {"from_string", lua_text_fromstring}, LUA_INTERFACE_DEF(text, fromtable), {"from_table", lua_text_fromtable}, LUA_INTERFACE_DEF(text, null), LUA_INTERFACE_DEF(text, randombytes), {NULL, NULL}}; static const struct luaL_reg textlib_m[] = { LUA_INTERFACE_DEF(text, len), LUA_INTERFACE_DEF(text, str), LUA_INTERFACE_DEF(text, ptr), LUA_INTERFACE_DEF(text, take_ownership), LUA_INTERFACE_DEF(text, save_in_file), LUA_INTERFACE_DEF(text, span), LUA_INTERFACE_DEF(text, sub), LUA_INTERFACE_DEF(text, lines), LUA_INTERFACE_DEF(text, split), LUA_INTERFACE_DEF(text, at), LUA_INTERFACE_DEF(text, memchr), LUA_INTERFACE_DEF(text, byte), LUA_INTERFACE_DEF(text, bytes), LUA_INTERFACE_DEF(text, lower), LUA_INTERFACE_DEF(text, exclude_chars), LUA_INTERFACE_DEF(text, oneline), LUA_INTERFACE_DEF(text, base32), LUA_INTERFACE_DEF(text, base64), LUA_INTERFACE_DEF(text, hex), LUA_INTERFACE_DEF(text, find), LUA_INTERFACE_DEF(text, strtoul), {"write", lua_text_save_in_file}, {"__len", lua_text_len}, {"__tostring", lua_text_str}, {"__gc", lua_text_gc}, {"__eq", lua_text_eq}, {"__lt", lua_text_lt}, {"__concat", lua_text_concat}, {NULL, NULL}}; struct rspamd_lua_text * lua_check_text(lua_State *L, int pos) { void *ud = rspamd_lua_check_udata(L, pos, rspamd_text_classname); luaL_argcheck(L, ud != NULL, pos, "'text' expected"); return ud ? (struct rspamd_lua_text *) ud : NULL; } struct rspamd_lua_text * lua_check_text_or_string(lua_State *L, int pos) { int pos_type = lua_type(L, pos); if (pos_type == LUA_TUSERDATA) { void *ud = rspamd_lua_check_udata(L, pos, rspamd_text_classname); luaL_argcheck(L, ud != NULL, pos, "'text' expected"); return ud ? (struct rspamd_lua_text *) ud : NULL; } else if (pos_type == LUA_TSTRING) { /* * Fake static lua_text, we allow to use this function multiple times * by having a small array of static structures. */ static unsigned cur_txt_idx = 0; static struct rspamd_lua_text fake_text[4]; gsize len; int sel_idx; sel_idx = cur_txt_idx++ % G_N_ELEMENTS(fake_text); fake_text[sel_idx].start = lua_tolstring(L, pos, &len); if (len >= G_MAXUINT) { return NULL; } fake_text[sel_idx].len = len; fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE; return &fake_text[sel_idx]; } return NULL; } struct rspamd_lua_text * lua_new_text(lua_State *L, const char *start, gsize len, gboolean own) { struct rspamd_lua_text *t; t = lua_newuserdata(L, sizeof(*t)); t->flags = 0; if (own) { char *storage; if (len > 0) { storage = g_malloc(len); if (start != NULL) { memcpy(storage, start, len); } t->start = storage; t->flags = RSPAMD_TEXT_FLAG_OWN; } else { t->start = ""; } } else { t->start = start; } t->len = len; rspamd_lua_setclass(L, rspamd_text_classname, -1); return t; } struct rspamd_lua_text * lua_new_text_task(lua_State *L, struct rspamd_task *task, const char *start, gsize len, gboolean own) { struct rspamd_lua_text *t; t = lua_newuserdata(L, sizeof(*t)); t->flags = 0; if (own) { char *storage; if (len > 0) { storage = rspamd_mempool_alloc(task->task_pool, len); if (start != NULL) { memcpy(storage, start, len); } t->start = storage; } else { t->start = ""; } } else { t->start = start; } t->len = len; rspamd_lua_setclass(L, rspamd_text_classname, -1); return t; } bool lua_is_text_binary(struct rspamd_lua_text *t) { if (t == NULL || t->len == 0) { return false; } if (rspamd_str_has_8bit(t->start, t->len)) { if (rspamd_fast_utf8_validate(t->start, t->len) == 0) { return false; } return true; } return false; } static int lua_text_fromstring(lua_State *L) { LUA_TRACE_POINT; const char *str; gsize l = 0; gboolean transparent = FALSE; str = luaL_checklstring(L, 1, &l); if (str) { if (lua_isboolean(L, 2)) { transparent = lua_toboolean(L, 2); } lua_new_text(L, str, l, !transparent); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_null(lua_State *L) { LUA_TRACE_POINT; lua_new_text(L, NULL, 0, false); return 1; } static int lua_text_randombytes(lua_State *L) { LUA_TRACE_POINT; unsigned int nbytes = luaL_checkinteger(L, 1); struct rspamd_lua_text *out; out = lua_new_text(L, NULL, nbytes, TRUE); randombytes_buf((char *) out->start, nbytes); out->len = nbytes; return 1; } #define MAX_REC 10 static void lua_text_tbl_length(lua_State *L, gsize dlen, gsize *dest, unsigned int rec) { gsize tblen, stlen; struct rspamd_lua_text *elt; if (rec > MAX_REC) { luaL_error(L, "lua_text_tbl_length: recursion limit exceeded"); return; } tblen = rspamd_lua_table_size(L, -1); for (gsize i = 0; i < tblen; i++) { lua_rawgeti(L, -1, i + 1); if (lua_type(L, -1) == LUA_TSTRING) { #if LUA_VERSION_NUM >= 502 stlen = lua_rawlen(L, -1); #else stlen = lua_objlen(L, -1); #endif (*dest) += stlen; } else if (lua_type(L, -1) == LUA_TUSERDATA) { elt = (struct rspamd_lua_text *) lua_touserdata(L, -1); if (elt) { (*dest) += elt->len; } } else if (lua_type(L, -1) == LUA_TTABLE) { lua_text_tbl_length(L, dlen, dest, rec + 1); } if (i != tblen - 1) { (*dest) += dlen; } lua_pop(L, 1); } } static void lua_text_tbl_append(lua_State *L, const char *delim, gsize dlen, char **dest, unsigned int rec) { const char *st; gsize tblen, stlen; struct rspamd_lua_text *elt; if (rec > MAX_REC) { luaL_error(L, "lua_text_tbl_length: recursion limit exceeded"); return; } tblen = rspamd_lua_table_size(L, -1); for (unsigned int i = 0; i < tblen; i++) { lua_rawgeti(L, -1, i + 1); if (lua_type(L, -1) == LUA_TSTRING) { st = lua_tolstring(L, -1, &stlen); memcpy((*dest), st, stlen); (*dest) += stlen; } else if (lua_type(L, -1) == LUA_TUSERDATA) { elt = (struct rspamd_lua_text *) lua_touserdata(L, -1); if (elt) { memcpy((*dest), elt->start, elt->len); (*dest) += elt->len; } } else if (lua_type(L, -1) == LUA_TTABLE) { lua_text_tbl_append(L, delim, dlen, dest, rec + 1); } if (dlen && i != tblen - 1) { memcpy((*dest), delim, dlen); (*dest) += dlen; } lua_pop(L, 1); } } static int lua_text_fromtable(lua_State *L) { LUA_TRACE_POINT; const char *delim = ""; struct rspamd_lua_text *t; gsize textlen = 0, dlen, oldtop = lua_gettop(L); char *dest; if (!lua_istable(L, 1)) { return luaL_error(L, "invalid arguments"); } if (lua_type(L, 2) == LUA_TSTRING) { delim = lua_tolstring(L, 2, &dlen); } else { dlen = 0; } /* Calculate length needed */ lua_pushvalue(L, 1); lua_text_tbl_length(L, dlen, &textlen, 0); lua_pop(L, 1); /* Allocate new text */ t = lua_newuserdata(L, sizeof(*t)); dest = g_malloc(textlen); t->start = dest; t->len = textlen; t->flags = RSPAMD_TEXT_FLAG_OWN; rspamd_lua_setclass(L, rspamd_text_classname, -1); lua_pushvalue(L, 1); lua_text_tbl_append(L, delim, dlen, &dest, 0); lua_pop(L, 1); /* Table arg */ int newtop = lua_gettop(L); g_assert(newtop == oldtop + 1); return 1; } static int lua_text_len(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); gsize l = 0; if (t != NULL) { l = t->len; } else { return luaL_error(L, "invalid arguments"); } lua_pushinteger(L, l); return 1; } static int lua_text_str(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t != NULL) { lua_pushlstring(L, t->start, t->len); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_ptr(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t != NULL) { lua_pushlightuserdata(L, (gpointer) t->start); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_take_ownership(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); char *dest; if (t != NULL) { if (t->flags & RSPAMD_TEXT_FLAG_OWN) { /* We already own it */ lua_pushboolean(L, true); } else { dest = g_malloc(t->len); memcpy(dest, t->start, t->len); t->start = dest; t->flags |= RSPAMD_TEXT_FLAG_OWN; lua_pushboolean(L, true); } } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_span(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); int64_t start = lua_tointeger(L, 2), len = -1; if (t && start >= 1 && start <= t->len) { if (lua_isnumber(L, 3)) { len = lua_tonumber(L, 3); } if (len == -1) { len = t->len - (start - 1); } if (len < 0 || (len > (t->len - (start - 1)))) { return luaL_error(L, "invalid length"); } lua_new_text(L, t->start + (start - 1), len, FALSE); } else { if (!t) { return luaL_error(L, "invalid arguments, text required"); } else { return luaL_error(L, "invalid arguments: start offset %d " "is larger than text len %d", (int) start, (int) t->len); } } return 1; } /* Helpers to behave exactly as Lua does */ static inline gsize relative_pos_start(int pos, gsize len) { if (pos > 0) { return pos; } else if (pos == 0) { return 1; } else if (pos < -((int) len)) { return 1; } /* Negative pos inside str */ return len + ((gsize) pos) + 1; } static inline gsize relative_pos_end(int pos, gsize len) { if (pos > (int) len) { return len; } else if (pos >= 0) { return (size_t) pos; } else if (pos < -((int) len)) { return 0; } return len + ((gsize) pos) + 1; } static int lua_text_sub(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t) { size_t start = relative_pos_start(luaL_checkinteger(L, 2), t->len); size_t end = relative_pos_end(luaL_optinteger(L, 3, -1), t->len); if (start <= end) { lua_new_text(L, t->start + (start - 1), (end - start) + 1, FALSE); } else { lua_new_text(L, "", 0, TRUE); } } else { return luaL_error(L, "invalid arguments"); } return 1; } static int64_t rspamd_lua_text_push_line(lua_State *L, struct rspamd_lua_text *t, int64_t start_offset, const char *sep_pos, gboolean stringify) { const char *start; gsize len; int64_t ret; start = t->start + start_offset; len = sep_pos ? (sep_pos - start) : (t->len - start_offset); ret = start_offset + len; /* Trim line */ while (len > 0) { if (start[len - 1] == '\r' || start[len - 1] == '\n') { len--; } else { break; } } if (stringify) { lua_pushlstring(L, start, len); } else { struct rspamd_lua_text *ntext; ntext = lua_newuserdata(L, sizeof(*ntext)); rspamd_lua_setclass(L, rspamd_text_classname, -1); ntext->start = start; ntext->len = len; ntext->flags = 0; /* Not own as it must be owned by a top object */ } return ret; } static int rspamd_lua_text_readline(lua_State *L) { struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)); gboolean stringify = lua_toboolean(L, lua_upvalueindex(2)); int64_t pos = lua_tointeger(L, lua_upvalueindex(3)); if (pos < 0) { return luaL_error(L, "invalid pos: %d", (int) pos); } if (pos >= t->len) { /* We are done */ return 0; } const char *sep_pos; /* We look just for `\n` ignoring `\r` as it is very rare nowadays */ sep_pos = memchr(t->start + pos, '\n', t->len - pos); if (sep_pos == NULL) { /* Either last `\n` or `\r` separated text */ sep_pos = memchr(t->start + pos, '\r', t->len - pos); } pos = rspamd_lua_text_push_line(L, t, pos, sep_pos, stringify); /* Skip separators */ while (pos < t->len) { if (t->start[pos] == '\n' || t->start[pos] == '\r') { pos++; } else { break; } } /* Update pos */ lua_pushinteger(L, pos); lua_replace(L, lua_upvalueindex(3)); return 1; } static int lua_text_lines(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); gboolean stringify = FALSE; if (t) { if (lua_isboolean(L, 2)) { stringify = lua_toboolean(L, 2); } lua_pushvalue(L, 1); lua_pushboolean(L, stringify); lua_pushinteger(L, 0); /* Current pos */ lua_pushcclosure(L, rspamd_lua_text_readline, 3); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int rspamd_lua_text_regexp_split(lua_State *L) { struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)), *new_t; struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **) lua_touserdata(L, lua_upvalueindex(2)); gboolean stringify = lua_toboolean(L, lua_upvalueindex(3)); int64_t pos = lua_tointeger(L, lua_upvalueindex(4)); gboolean matched; if (pos < 0) { return luaL_error(L, "invalid pos: %d", (int) pos); } if (pos >= t->len) { /* We are done */ return 0; } const char *start, *end, *old_start; end = t->start + pos; for (;;) { old_start = end; matched = rspamd_regexp_search(re->re, t->start, t->len, &start, &end, FALSE, NULL); if (matched) { if (start - old_start > 0) { if (stringify) { lua_pushlstring(L, old_start, start - old_start); } else { new_t = lua_newuserdata(L, sizeof(*t)); rspamd_lua_setclass(L, rspamd_text_classname, -1); new_t->start = old_start; new_t->len = start - old_start; new_t->flags = 0; } break; } else { if (start == end) { matched = FALSE; break; } /* * All match separators (e.g. starting separator, * we need to skip it). Continue iterations. */ } } else { /* No match, stop */ break; } } if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) { /* No more matches, but we might need to push the last element */ if (end == NULL) { end = t->start; } /* No separators, need to push the whole remaining part */ if (stringify) { lua_pushlstring(L, end, (t->start + t->len) - end); } else { new_t = lua_newuserdata(L, sizeof(*t)); rspamd_lua_setclass(L, rspamd_text_classname, -1); new_t->start = end; new_t->len = (t->start + t->len) - end; new_t->flags = 0; } pos = t->len; } else { pos = end - t->start; } /* Update pos */ lua_pushinteger(L, pos); lua_replace(L, lua_upvalueindex(4)); return 1; } static int lua_text_split(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); struct rspamd_lua_regexp *re; gboolean stringify = FALSE, own_re = FALSE; if (t == NULL) { return luaL_error(L, "invalid arguments"); } if (lua_type(L, 2) == LUA_TUSERDATA) { re = lua_check_regexp(L, 2); } else { rspamd_regexp_t *c_re; GError *err = NULL; c_re = rspamd_regexp_new(lua_tostring(L, 2), NULL, &err); if (c_re == NULL) { int ret = luaL_error(L, "cannot parse regexp: %s, error: %s", lua_tostring(L, 2), err == NULL ? "undefined" : err->message); if (err) { g_error_free(err); } return ret; } re = g_malloc0(sizeof(struct rspamd_lua_regexp)); re->re = c_re; re->re_pattern = g_strdup(lua_tostring(L, 2)); re->module = rspamd_lua_get_module_name(L); own_re = TRUE; } if (re) { if (lua_isboolean(L, 3)) { stringify = lua_toboolean(L, 3); } /* Upvalues */ lua_pushvalue(L, 1); /* text */ if (own_re) { struct rspamd_lua_regexp **pre; pre = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *)); rspamd_lua_setclass(L, rspamd_regexp_classname, -1); *pre = re; } else { lua_pushvalue(L, 2); /* regexp */ } lua_pushboolean(L, stringify); lua_pushinteger(L, 0); /* Current pos */ lua_pushcclosure(L, rspamd_lua_text_regexp_split, 4); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_at(lua_State *L) { return lua_text_byte(L); } static int lua_text_byte(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (!t) { return luaL_error(L, "invalid arguments"); } gsize start = relative_pos_start(luaL_optinteger(L, 2, 1), t->len); gsize end = relative_pos_end(luaL_optinteger(L, 3, start), t->len); start--; if (start >= end) { return 0; } for (gsize i = start; i < end; i++) { lua_pushinteger(L, t->start[i]); } return end - start; } static int lua_text_memchr(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); int c; bool reverse = false; if (lua_isnumber(L, 2)) { c = lua_tonumber(L, 2); } else { gsize l; const char *str = lua_tolstring(L, 2, &l); if (str) { c = str[0]; if (l != 1) { return luaL_error(L, "need exactly one character to search"); } } else { return luaL_error(L, "invalid arguments"); } } if (t) { void *f; if (lua_isboolean(L, 3)) { reverse = lua_toboolean(L, 3); } if (reverse) { f = rspamd_memrchr(t->start, c, t->len); } else { f = memchr(t->start, c, t->len); } if (f) { lua_pushinteger(L, ((const char *) f) - t->start + 1); } else { lua_pushinteger(L, -1); } } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_bytes(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t) { lua_createtable(L, t->len, 0); for (gsize i = 0; i < t->len; i++) { lua_pushinteger(L, (unsigned char) t->start[i]); lua_rawseti(L, -2, i + 1); } } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_save_in_file(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); const char *fname = NULL; unsigned int mode = 00644; int fd = -1; gboolean need_close = FALSE; if (t != NULL) { if (lua_type(L, 2) == LUA_TSTRING) { fname = luaL_checkstring(L, 2); if (lua_type(L, 3) == LUA_TNUMBER) { mode = lua_tointeger(L, 3); } } else if (lua_type(L, 2) == LUA_TNUMBER) { /* Created fd */ fd = lua_tointeger(L, 2); } if (fd == -1) { if (fname) { fd = rspamd_file_xopen(fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0); if (fd == -1) { lua_pushboolean(L, false); lua_pushstring(L, strerror(errno)); return 2; } need_close = TRUE; } else { fd = STDOUT_FILENO; } } if (write(fd, t->start, t->len) == -1) { if (fd != STDOUT_FILENO) { close(fd); } lua_pushboolean(L, false); lua_pushstring(L, strerror(errno)); return 2; } if (need_close) { close(fd); } lua_pushboolean(L, true); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_gc(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t != NULL) { g_assert(!(t->flags & RSPAMD_TEXT_FLAG_FAKE)); if (t->flags & RSPAMD_TEXT_FLAG_OWN) { if (t->flags & RSPAMD_TEXT_FLAG_WIPE) { rspamd_explicit_memzero((unsigned char *) t->start, t->len); } if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) { munmap((gpointer) t->start, t->len); } else { if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) { free((gpointer) t->start); } else { g_free((gpointer) t->start); } } } } return 0; } static int lua_text_eq(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1), *t2 = lua_check_text_or_string(L, 2); if (t1->len == t2->len) { lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) == 0); } else { lua_pushboolean(L, false); } return 1; } static int lua_text_lt(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1), *t2 = lua_check_text_or_string(L, 2); if (t1 && t2) { if (t1->len == t2->len) { lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) < 0); } else { lua_pushboolean(L, t1->len < t2->len); } } return 1; } static int lua_text_concat(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1), *t2 = lua_check_text_or_string(L, 2); if (t1 && t2) { struct rspamd_lua_text *final; final = lua_new_text(L, NULL, t1->len + t2->len, TRUE); memcpy((void *) final->start, t1->start, t1->len); memcpy((void *) (final->start + t1->len), t2->start, t2->len); } return 1; } static int lua_text_wipe(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t != NULL) { if (t->flags & RSPAMD_TEXT_FLAG_OWN) { rspamd_explicit_memzero((unsigned char *) t->start, t->len); } else { return luaL_error(L, "cannot wipe not owned text"); } } else { return luaL_error(L, "invalid arguments"); } return 0; } static int lua_text_base32(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1), *out; enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT; if (t != NULL) { if (lua_type(L, 2) == LUA_TSTRING) { btype = rspamd_base32_decode_type_from_str(lua_tostring(L, 2)); if (btype == RSPAMD_BASE32_INVALID) { return luaL_error(L, "invalid b32 type: %s", lua_tostring(L, 2)); } } out = lua_new_text(L, NULL, t->len * 8 / 5 + 2, TRUE); out->len = rspamd_encode_base32_buf(t->start, t->len, (char *) out->start, out->len, btype); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_base64(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1), *out; gsize line_len = 0; gboolean fold = FALSE; if (t != NULL) { if (lua_type(L, 2) == LUA_TNUMBER) { line_len = lua_tointeger(L, 2); if (line_len <= 8) { return luaL_error(L, "too small line length (at least 8 is required)"); } } enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF; if (lua_type(L, 3) == LUA_TSTRING) { const char *how_str = lua_tostring(L, 3); if (g_ascii_strcasecmp(how_str, "cr") == 0) { how = RSPAMD_TASK_NEWLINES_CR; } else if (g_ascii_strcasecmp(how_str, "lf") == 0) { how = RSPAMD_TASK_NEWLINES_LF; } else if (g_ascii_strcasecmp(how_str, "crlf") != 0) { return luaL_error(L, "invalid newline style: %s", how_str); } } if (lua_type(L, 4) == LUA_TBOOLEAN) { fold = lua_toboolean(L, 4); } gsize sz_len; out = lua_newuserdata(L, sizeof(*t)); out->flags = RSPAMD_TEXT_FLAG_OWN; out->start = rspamd_encode_base64_common(t->start, t->len, line_len, &sz_len, fold, how); out->len = sz_len; rspamd_lua_setclass(L, rspamd_text_classname, -1); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_hex(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1), *out; if (t != NULL) { out = lua_new_text(L, NULL, t->len * 2, TRUE); out->len = rspamd_encode_hex_buf(t->start, t->len, (char *) out->start, out->len); } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_find(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); gsize patlen, init = 1; const char *pat = luaL_checklstring(L, 2, &patlen); if (t != NULL && pat != NULL) { if (lua_isnumber(L, 3)) { init = relative_pos_start(lua_tointeger(L, 3), t->len); } init--; if (init > t->len) { return luaL_error(L, "invalid arguments to find: init too large"); } goffset pos = rspamd_substring_search(t->start + init, t->len - init, pat, patlen); if (pos == -1) { lua_pushnil(L); return 1; } lua_pushinteger(L, pos + 1); lua_pushinteger(L, pos + patlen); } else { return luaL_error(L, "invalid arguments"); } return 2; } #define BITOP(a, b, op) \ ((a)[(uint64_t) (b) / (8u * sizeof *(a))] op(uint64_t) 1 << ((uint64_t) (b) % (8u * sizeof *(a)))) static int lua_text_exclude_chars(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); gssize patlen; const char *pat = lua_tolstring(L, 2, &patlen), *p, *end; char *dest, *d; uint64_t byteset[32 / sizeof(uint64_t)]; /* Bitset for ascii */ gboolean copy = TRUE; unsigned int *plen; if (t != NULL && pat && patlen > 0) { if (lua_isboolean(L, 3)) { copy = lua_toboolean(L, 3); } else if (t->flags & RSPAMD_TEXT_FLAG_OWN) { copy = FALSE; } if (!copy) { dest = (char *) t->start; plen = &t->len; lua_pushvalue(L, 1); /* Push text as a result */ } else { /* We need to copy read only text */ struct rspamd_lua_text *nt; dest = g_malloc(t->len); nt = lua_newuserdata(L, sizeof(*nt)); rspamd_lua_setclass(L, rspamd_text_classname, -1); nt->len = t->len; nt->flags = RSPAMD_TEXT_FLAG_OWN; memcpy(dest, t->start, t->len); nt->start = dest; plen = &nt->len; } /* Fill pattern bitset */ memset(byteset, 0, sizeof byteset); while (patlen > 0) { if (*pat == '%') { pat++; patlen--; if (patlen > 0) { /* * This stuff assumes little endian, but GUINT64_FROM_LE should * deal with proper conversion */ switch (*pat) { case '%': BITOP(byteset, *(unsigned char *) pat, |=); break; case 's': /* "\r\n\t\f " */ byteset[0] |= GUINT64_FROM_LE(0x100003600LLU); break; case 'n': /* newlines: "\r\n" */ byteset[0] |= GUINT64_FROM_LE(0x2400LLU); break; case '8': /* 8 bit characters */ byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); break; case 'c': /* Non printable (control) characters */ byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU); /* Del character */ byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU); break; } } else { /* Last '%' */ BITOP(byteset, (unsigned char) '%', |=); } } else { BITOP(byteset, *(unsigned char *) pat, |=); } pat++; patlen--; } for (; patlen > 0 && BITOP(byteset, *(unsigned char *) pat, |=); pat++, patlen--) ; p = t->start; end = t->start + t->len; d = dest; while (p < end) { if (!BITOP(byteset, *(unsigned char *) p, &)) { *d++ = *p; } p++; } *(plen) = d - dest; } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_oneline(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); const char *p, *end; char *dest, *d; uint64_t byteset[32 / sizeof(uint64_t)]; /* Bitset for ascii */ gboolean copy = TRUE, seen_8bit = FALSE; unsigned int *plen; if (t != NULL) { if (lua_isboolean(L, 2)) { copy = lua_toboolean(L, 2); } else if (t->flags & RSPAMD_TEXT_FLAG_OWN) { copy = FALSE; } if (!copy) { dest = (char *) t->start; plen = &t->len; lua_pushvalue(L, 1); /* Push text as a result */ } else { /* We need to copy read only text */ struct rspamd_lua_text *nt; dest = g_malloc(t->len); nt = lua_newuserdata(L, sizeof(*nt)); rspamd_lua_setclass(L, rspamd_text_classname, -1); nt->len = t->len; nt->flags = RSPAMD_TEXT_FLAG_OWN; memcpy(dest, t->start, t->len); nt->start = dest; plen = &nt->len; } /* Fill pattern bitset */ memset(byteset, 0, sizeof byteset); /* All spaces */ byteset[0] |= GUINT64_FROM_LE(0x100003600LLU); /* Control characters */ byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU); /* Del character */ byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU); /* 8 bit characters */ byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); p = t->start; end = t->start + t->len; d = dest; while (p < end) { if (!BITOP(byteset, *(unsigned char *) p, &)) { *d++ = *p; } else { if ((*(unsigned char *) p) & 0x80) { seen_8bit = TRUE; *d++ = *p; } else { if (*p == ' ') { if (d != dest) { *d++ = *p++; } while (p < end && g_ascii_isspace(*p)) { p++; } continue; /* To avoid p++ */ } else if (*p == '\r' || *p == '\n') { if (d != dest) { *d++ = ' '; p++; } while (p < end && g_ascii_isspace(*p)) { p++; } continue; /* To avoid p++ */ } } } p++; } while (d > dest && g_ascii_isspace(*(d - 1))) { d--; } if (seen_8bit) { if (rspamd_fast_utf8_validate(dest, d - dest) != 0) { /* Need to make it valid :( */ UChar32 uc; goffset err_offset; gsize remain = d - dest; char *nd = dest; while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(nd, remain)) > 0) { int i = 0; err_offset--; /* As it returns it 1 indexed */ nd += err_offset; remain -= err_offset; /* Each invalid character of input requires 3 bytes of output (+2 bytes) */ while (i < remain) { int old_pos = i; U8_NEXT(nd, i, remain, uc); if (uc < 0) { nd[old_pos] = '?'; } else { break; } } nd += i; remain -= i; } } } *(plen) = d - dest; } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_lower(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1), *nt; gboolean is_utf8 = FALSE, is_inplace = FALSE; if (t != NULL) { if (lua_isboolean(L, 2)) { is_utf8 = lua_toboolean(L, 2); } if (lua_isboolean(L, 3)) { is_inplace = lua_toboolean(L, 3); } if (is_inplace) { nt = t; lua_pushvalue(L, 1); } else { nt = lua_new_text(L, t->start, t->len, TRUE); } if (!is_utf8) { rspamd_str_lc((char *) nt->start, nt->len); } else { rspamd_str_lc_utf8((char *) nt->start, nt->len); } } else { return luaL_error(L, "invalid arguments"); } return 1; } static int lua_text_strtoul(lua_State *L) { LUA_TRACE_POINT; struct rspamd_lua_text *t = lua_check_text(L, 1); if (t) { unsigned long ll; if (rspamd_strtoul(t->start, t->len, &ll)) { lua_pushinteger(L, ll); } else { lua_pushnil(L); } } else { return luaL_error(L, "invalid arguments"); } return 1; } /* Used to distinguish lua text metatable */ static const unsigned int rspamd_lua_text_cookie = 0x2b21ef6fU; static int lua_load_text(lua_State *L) { lua_newtable(L); lua_pushstring(L, "cookie"); lua_pushnumber(L, rspamd_lua_text_cookie); lua_settable(L, -3); luaL_register(L, NULL, textlib_f); return 1; } void luaopen_text(lua_State *L) { rspamd_lua_new_class(L, rspamd_text_classname, textlib_m); lua_pushstring(L, "cookie"); lua_pushnumber(L, rspamd_lua_text_cookie); lua_settable(L, -3); lua_pop(L, 1); rspamd_lua_add_preload(L, "rspamd_text", lua_load_text); }