1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789 |
- /*
- * Copyright 2024 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "lua_common.h"
- #include "libcryptobox/cryptobox.h"
- #include "contrib/fastutf8/fastutf8.h"
- #include "unix-std.h"
-
- /***
- * @module rspamd_text
- * This module provides access to opaque text structures used widely to prevent
- * copying between Lua and C for various concerns: performance, security etc...
- *
- * You can convert rspamd_text into string but it will copy data.
- */
-
- /***
- * @function rspamd_text.fromstring(str)
- * Creates rspamd_text from Lua string (copied to the text)
- * @param {string} str string to use
- * @return {rspamd_text} resulting text
- */
- LUA_FUNCTION_DEF(text, fromstring);
-
- /***
- * @function rspamd_text.null()
- * Creates rspamd_text with NULL pointer for testing purposes
- * @param {string} str string to use
- * @return {rspamd_text} resulting text
- */
- LUA_FUNCTION_DEF(text, null);
- /***
- * @function rspamd_text.randombytes(nbytes)
- * Creates rspamd_text with random bytes inside (raw bytes)
- * @param {number} nbytes number of random bytes generated
- * @return {rspamd_text} random bytes text
- */
- LUA_FUNCTION_DEF(text, randombytes);
-
- /***
- * @function rspamd_text.fromtable(tbl[, delim])
- * Same as `table.concat` but generates rspamd_text instead of the Lua string
- * @param {table} tbl table to use
- * @param {string} delim optional delimiter
- * @return {rspamd_text} resulting text
- */
- LUA_FUNCTION_DEF(text, fromtable);
- /***
- * @method rspamd_text:byte(pos[, pos2])
- * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified
- * @param {integer} pos index
- * @param {integer} pos2 index
- * @return {integer} byte at the position `pos` or varargs of bytes
- */
- LUA_FUNCTION_DEF(text, byte);
- /***
- * @method rspamd_text:len()
- * Returns length of a string
- * @return {number} length of string in **bytes**
- */
- LUA_FUNCTION_DEF(text, len);
- /***
- * @method rspamd_text:str()
- * Converts text to string by copying its content
- * @return {string} copy of text as Lua string
- */
- LUA_FUNCTION_DEF(text, str);
- /***
- * @method rspamd_text:ptr()
- * Converts text to lightuserdata
- * @return {lightuserdata} pointer value of rspamd_text
- */
- LUA_FUNCTION_DEF(text, ptr);
- /***
- * @method rspamd_text:save_in_file(fname[, mode])
- * Saves text in file
- * @return {boolean} true if save has been completed
- */
- LUA_FUNCTION_DEF(text, save_in_file);
- /***
- * @method rspamd_text:span(start[, len])
- * Returns a span for lua_text starting at pos [start] (1 indexed) and with
- * length `len` (or to the end of the text)
- * @param {integer} start start index
- * @param {integer} len length of span
- * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
- */
- LUA_FUNCTION_DEF(text, span);
- /***
- * @method rspamd_text:sub(start[, len])
- * Returns a substring for lua_text similar to string.sub from Lua
- * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
- */
- LUA_FUNCTION_DEF(text, sub);
- /***
- * @method rspamd_text:lines([stringify])
- * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
- * @param {boolean} stringify stringify lines
- * @return {iterator} iterator triplet
- */
- LUA_FUNCTION_DEF(text, lines);
- /***
- * @method rspamd_text:split(regexp, [stringify])
- * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
- * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
- * @param {boolean} stringify stringify lines
- * @return {iterator} iterator triplet
- */
- LUA_FUNCTION_DEF(text, split);
- /***
- * @method rspamd_text:at(pos)
- * Returns a byte at the position `pos`
- * @param {integer} pos index
- * @return {integer} byte at the position `pos` or nil if pos out of bound
- */
- LUA_FUNCTION_DEF(text, at);
- /***
- * @method rspamd_text:memchr(chr, [reverse])
- * Returns the first or the last position of the character `chr` in the text or
- * -1 in case if a character has not been found. Indexes start from `1`
- * @param {string/number} chr character or a character code to find
- * @param {boolean} reverse last character if `true`
- * @return {integer} position of the character or `-1`
- */
- LUA_FUNCTION_DEF(text, memchr);
- /***
- * @method rspamd_text:bytes()
- * Converts text to an array of bytes
- * @return {table|integer} bytes in the array (as unsigned char)
- */
- LUA_FUNCTION_DEF(text, bytes);
- /***
- * @method rspamd_text:lower([is_utf, [inplace]])
- * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
- * @param {boolean} is_utf apply utf8 lowercase
- * @param {boolean} inplace lowercase the original text
- * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
- */
- LUA_FUNCTION_DEF(text, lower);
- LUA_FUNCTION_DEF(text, take_ownership);
- /***
- * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
- * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
- * where all chars from `set_to_exclude` are removed
- * Patterns supported:
- *
- * - %s - all space characters
- * - %n - all newline characters
- * - %c - all control characters (it includes 8bit characters and spaces)
- * - %8 - all 8 bit characters
- * - %% - just a percent character
- *
- * @param {string} set_to_exclude characters to exclude
- * @param {boolean} always_copy always copy the source text
- * @return {rspamd_text} modified or copied text
- */
- LUA_FUNCTION_DEF(text, exclude_chars);
- /***
- * @method rspamd_text:oneline([always_copy])
- * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
- * where the following transformations are made:
- * - All spaces sequences are replaced with a single space
- * - All newlines sequences are replaced with a single space
- * - Trailing and leading spaces are removed
- * - Control characters are excluded
- * - UTF8 sequences are normalised
- *
- * @param {boolean} always_copy always copy the source text
- * @return {rspamd_text} modified or copied text
- */
- LUA_FUNCTION_DEF(text, oneline);
- /***
- * @method rspamd_text:base32([b32type])
- * Returns a text encoded in base32 (new rspamd_text is allocated)
- *
- * @param {string} b32type base32 type (default, bleach, rfc)
- * @return {rspamd_text} new text encoded in base32
- */
- LUA_FUNCTION_DEF(text, base32);
- /***
- * @method rspamd_text:base64([line_length, [nline, [fold]]])
- * Returns a text encoded in base64 (new rspamd_text is allocated)
- *
- * @param {number} line_length return text split with newlines up to this attribute
- * @param {string} nline newline type: `cr`, `lf`, `crlf`
- * @param {boolean} fold use folding when splitting into lines (false by default)
- * @return {rspamd_text} new text encoded in base64
- */
- LUA_FUNCTION_DEF(text, base64);
- /***
- * @method rspamd_text:hex()
- * Returns a text encoded in hex (new rspamd_text is allocated)
- *
- * @return {rspamd_text} new text encoded in hex
- */
- LUA_FUNCTION_DEF(text, hex);
- /***
- * @method rspamd_text:find(pattern [, init])
- * Looks for the first match of pattern in the string s.
- * If it finds a match, then find returns the indices of s where this occurrence
- * starts and ends; otherwise, it returns nil. A third,
- * optional numerical argument init specifies where to start the search;
- * its default value is 1 and can be negative.
- * This method currently supports merely a plain search, no patterns.
- *
- * @param {string} pattern pattern to find
- * @param {number} init specifies where to start the search (1 default)
- * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil
- */
- LUA_FUNCTION_DEF(text, find);
- LUA_FUNCTION_DEF(text, gc);
- LUA_FUNCTION_DEF(text, eq);
- LUA_FUNCTION_DEF(text, lt);
- LUA_FUNCTION_DEF(text, concat);
- LUA_FUNCTION_DEF(text, strtoul);
-
- static const struct luaL_reg textlib_f[] = {
- LUA_INTERFACE_DEF(text, fromstring),
- {"from_string", lua_text_fromstring},
- LUA_INTERFACE_DEF(text, fromtable),
- {"from_table", lua_text_fromtable},
- LUA_INTERFACE_DEF(text, null),
- LUA_INTERFACE_DEF(text, randombytes),
- {NULL, NULL}};
-
- static const struct luaL_reg textlib_m[] = {
- LUA_INTERFACE_DEF(text, len),
- LUA_INTERFACE_DEF(text, str),
- LUA_INTERFACE_DEF(text, ptr),
- LUA_INTERFACE_DEF(text, take_ownership),
- LUA_INTERFACE_DEF(text, save_in_file),
- LUA_INTERFACE_DEF(text, span),
- LUA_INTERFACE_DEF(text, sub),
- LUA_INTERFACE_DEF(text, lines),
- LUA_INTERFACE_DEF(text, split),
- LUA_INTERFACE_DEF(text, at),
- LUA_INTERFACE_DEF(text, memchr),
- LUA_INTERFACE_DEF(text, byte),
- LUA_INTERFACE_DEF(text, bytes),
- LUA_INTERFACE_DEF(text, lower),
- LUA_INTERFACE_DEF(text, exclude_chars),
- LUA_INTERFACE_DEF(text, oneline),
- LUA_INTERFACE_DEF(text, base32),
- LUA_INTERFACE_DEF(text, base64),
- LUA_INTERFACE_DEF(text, hex),
- LUA_INTERFACE_DEF(text, find),
- LUA_INTERFACE_DEF(text, strtoul),
- {"write", lua_text_save_in_file},
- {"__len", lua_text_len},
- {"__tostring", lua_text_str},
- {"__gc", lua_text_gc},
- {"__eq", lua_text_eq},
- {"__lt", lua_text_lt},
- {"__concat", lua_text_concat},
- {NULL, NULL}};
-
- struct rspamd_lua_text *
- lua_check_text(lua_State *L, int pos)
- {
- void *ud = rspamd_lua_check_udata(L, pos, rspamd_text_classname);
- luaL_argcheck(L, ud != NULL, pos, "'text' expected");
- return ud ? (struct rspamd_lua_text *) ud : NULL;
- }
-
- struct rspamd_lua_text *
- lua_check_text_or_string(lua_State *L, int pos)
- {
- int pos_type = lua_type(L, pos);
-
- if (pos_type == LUA_TUSERDATA) {
- void *ud = rspamd_lua_check_udata(L, pos, rspamd_text_classname);
- luaL_argcheck(L, ud != NULL, pos, "'text' expected");
- return ud ? (struct rspamd_lua_text *) ud : NULL;
- }
- else if (pos_type == LUA_TSTRING) {
- /*
- * Fake static lua_text, we allow to use this function multiple times
- * by having a small array of static structures.
- */
- static unsigned cur_txt_idx = 0;
- static struct rspamd_lua_text fake_text[4];
- gsize len;
- int sel_idx;
-
- sel_idx = cur_txt_idx++ % G_N_ELEMENTS(fake_text);
- fake_text[sel_idx].start = lua_tolstring(L, pos, &len);
-
- if (len >= G_MAXUINT) {
- return NULL;
- }
-
- fake_text[sel_idx].len = len;
- fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE;
-
- return &fake_text[sel_idx];
- }
-
- return NULL;
- }
-
- struct rspamd_lua_text *
- lua_new_text(lua_State *L, const char *start, gsize len, gboolean own)
- {
- struct rspamd_lua_text *t;
-
- t = lua_newuserdata(L, sizeof(*t));
- t->flags = 0;
-
- if (own) {
- char *storage;
-
- if (len > 0) {
- storage = g_malloc(len);
-
- if (start != NULL) {
- memcpy(storage, start, len);
- }
-
- t->start = storage;
- t->flags = RSPAMD_TEXT_FLAG_OWN;
- }
- else {
- t->start = "";
- }
- }
- else {
- t->start = start;
- }
-
- t->len = len;
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
-
- return t;
- }
-
- struct rspamd_lua_text *
- lua_new_text_task(lua_State *L, struct rspamd_task *task,
- const char *start, gsize len, gboolean own)
- {
- struct rspamd_lua_text *t;
-
- t = lua_newuserdata(L, sizeof(*t));
- t->flags = 0;
-
- if (own) {
- char *storage;
-
- if (len > 0) {
- storage = rspamd_mempool_alloc(task->task_pool, len);
-
- if (start != NULL) {
- memcpy(storage, start, len);
- }
-
- t->start = storage;
- }
- else {
- t->start = "";
- }
- }
- else {
- t->start = start;
- }
-
- t->len = len;
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
-
- return t;
- }
-
- bool lua_is_text_binary(struct rspamd_lua_text *t)
- {
- if (t == NULL || t->len == 0) {
- return false;
- }
-
- if (rspamd_str_has_8bit(t->start, t->len)) {
- if (rspamd_fast_utf8_validate(t->start, t->len) == 0) {
- return false;
- }
- return true;
- }
-
- return false;
- }
-
-
- static int
- lua_text_fromstring(lua_State *L)
- {
- LUA_TRACE_POINT;
- const char *str;
- gsize l = 0;
- gboolean transparent = FALSE;
-
- str = luaL_checklstring(L, 1, &l);
-
- if (str) {
- if (lua_isboolean(L, 2)) {
- transparent = lua_toboolean(L, 2);
- }
-
- lua_new_text(L, str, l, !transparent);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
-
- return 1;
- }
-
- static int
- lua_text_null(lua_State *L)
- {
- LUA_TRACE_POINT;
-
- lua_new_text(L, NULL, 0, false);
-
- return 1;
- }
-
- static int
- lua_text_randombytes(lua_State *L)
- {
- LUA_TRACE_POINT;
- unsigned int nbytes = luaL_checkinteger(L, 1);
- struct rspamd_lua_text *out;
-
- out = lua_new_text(L, NULL, nbytes, TRUE);
- randombytes_buf((char *) out->start, nbytes);
- out->len = nbytes;
-
- return 1;
- }
-
- #define MAX_REC 10
-
- static void
- lua_text_tbl_length(lua_State *L, gsize dlen, gsize *dest, unsigned int rec)
- {
- gsize tblen, stlen;
- struct rspamd_lua_text *elt;
-
- if (rec > MAX_REC) {
- luaL_error(L, "lua_text_tbl_length: recursion limit exceeded");
-
- return;
- }
-
- tblen = rspamd_lua_table_size(L, -1);
-
- for (gsize i = 0; i < tblen; i++) {
- lua_rawgeti(L, -1, i + 1);
-
- if (lua_type(L, -1) == LUA_TSTRING) {
- #if LUA_VERSION_NUM >= 502
- stlen = lua_rawlen(L, -1);
- #else
- stlen = lua_objlen(L, -1);
- #endif
- (*dest) += stlen;
- }
- else if (lua_type(L, -1) == LUA_TUSERDATA) {
- elt = (struct rspamd_lua_text *) lua_touserdata(L, -1);
-
- if (elt) {
- (*dest) += elt->len;
- }
- }
- else if (lua_type(L, -1) == LUA_TTABLE) {
- lua_text_tbl_length(L, dlen, dest, rec + 1);
- }
-
- if (i != tblen - 1) {
- (*dest) += dlen;
- }
-
- lua_pop(L, 1);
- }
- }
-
- static void
- lua_text_tbl_append(lua_State *L,
- const char *delim,
- gsize dlen,
- char **dest,
- unsigned int rec)
- {
- const char *st;
- gsize tblen, stlen;
- struct rspamd_lua_text *elt;
-
- if (rec > MAX_REC) {
- luaL_error(L, "lua_text_tbl_length: recursion limit exceeded");
-
- return;
- }
-
- tblen = rspamd_lua_table_size(L, -1);
-
- for (unsigned int i = 0; i < tblen; i++) {
- lua_rawgeti(L, -1, i + 1);
-
- if (lua_type(L, -1) == LUA_TSTRING) {
- st = lua_tolstring(L, -1, &stlen);
- memcpy((*dest), st, stlen);
- (*dest) += stlen;
- }
- else if (lua_type(L, -1) == LUA_TUSERDATA) {
- elt = (struct rspamd_lua_text *) lua_touserdata(L, -1);
-
- if (elt) {
- memcpy((*dest), elt->start, elt->len);
- (*dest) += elt->len;
- }
- }
- else if (lua_type(L, -1) == LUA_TTABLE) {
- lua_text_tbl_append(L, delim, dlen, dest, rec + 1);
- }
-
- if (dlen && i != tblen - 1) {
- memcpy((*dest), delim, dlen);
- (*dest) += dlen;
- }
-
- lua_pop(L, 1);
- }
- }
-
- static int
- lua_text_fromtable(lua_State *L)
- {
- LUA_TRACE_POINT;
- const char *delim = "";
- struct rspamd_lua_text *t;
- gsize textlen = 0, dlen, oldtop = lua_gettop(L);
- char *dest;
-
- if (!lua_istable(L, 1)) {
- return luaL_error(L, "invalid arguments");
- }
-
- if (lua_type(L, 2) == LUA_TSTRING) {
- delim = lua_tolstring(L, 2, &dlen);
- }
- else {
- dlen = 0;
- }
-
- /* Calculate length needed */
- lua_pushvalue(L, 1);
- lua_text_tbl_length(L, dlen, &textlen, 0);
- lua_pop(L, 1);
-
- /* Allocate new text */
- t = lua_newuserdata(L, sizeof(*t));
- dest = g_malloc(textlen);
- t->start = dest;
- t->len = textlen;
- t->flags = RSPAMD_TEXT_FLAG_OWN;
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
-
- lua_pushvalue(L, 1);
- lua_text_tbl_append(L, delim, dlen, &dest, 0);
- lua_pop(L, 1); /* Table arg */
-
- int newtop = lua_gettop(L);
- g_assert(newtop == oldtop + 1);
-
- return 1;
- }
-
- static int
- lua_text_len(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- gsize l = 0;
-
- if (t != NULL) {
- l = t->len;
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- lua_pushinteger(L, l);
-
- return 1;
- }
-
- static int
- lua_text_str(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t != NULL) {
- lua_pushlstring(L, t->start, t->len);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_ptr(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t != NULL) {
- lua_pushlightuserdata(L, (gpointer) t->start);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_take_ownership(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- char *dest;
-
- if (t != NULL) {
- if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- /* We already own it */
- lua_pushboolean(L, true);
- }
- else {
- dest = g_malloc(t->len);
- memcpy(dest, t->start, t->len);
- t->start = dest;
- t->flags |= RSPAMD_TEXT_FLAG_OWN;
- lua_pushboolean(L, true);
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_span(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- int64_t start = lua_tointeger(L, 2), len = -1;
-
- if (t && start >= 1 && start <= t->len) {
- if (lua_isnumber(L, 3)) {
- len = lua_tonumber(L, 3);
- }
-
- if (len == -1) {
- len = t->len - (start - 1);
- }
-
- if (len < 0 || (len > (t->len - (start - 1)))) {
- return luaL_error(L, "invalid length");
- }
-
- lua_new_text(L, t->start + (start - 1), len, FALSE);
- }
- else {
- if (!t) {
- return luaL_error(L, "invalid arguments, text required");
- }
- else {
- return luaL_error(L, "invalid arguments: start offset %d "
- "is larger than text len %d",
- (int) start, (int) t->len);
- }
- }
-
- return 1;
- }
-
- /* Helpers to behave exactly as Lua does */
- static inline gsize
- relative_pos_start(int pos, gsize len)
- {
- if (pos > 0) {
- return pos;
- }
- else if (pos == 0) {
- return 1;
- }
- else if (pos < -((int) len)) {
- return 1;
- }
-
- /* Negative pos inside str */
- return len + ((gsize) pos) + 1;
- }
-
- static inline gsize
- relative_pos_end(int pos, gsize len)
- {
- if (pos > (int) len) {
- return len;
- }
- else if (pos >= 0) {
- return (size_t) pos;
- }
- else if (pos < -((int) len)) {
- return 0;
- }
-
- return len + ((gsize) pos) + 1;
- }
-
- static int
- lua_text_sub(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t) {
- size_t start = relative_pos_start(luaL_checkinteger(L, 2),
- t->len);
- size_t end = relative_pos_end(luaL_optinteger(L, 3, -1),
- t->len);
-
-
- if (start <= end) {
- lua_new_text(L, t->start + (start - 1),
- (end - start) + 1, FALSE);
- }
- else {
- lua_new_text(L, "", 0, TRUE);
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int64_t
- rspamd_lua_text_push_line(lua_State *L,
- struct rspamd_lua_text *t,
- int64_t start_offset,
- const char *sep_pos,
- gboolean stringify)
- {
- const char *start;
- gsize len;
- int64_t ret;
-
- start = t->start + start_offset;
- len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
- ret = start_offset + len;
-
- /* Trim line */
- while (len > 0) {
- if (start[len - 1] == '\r' || start[len - 1] == '\n') {
- len--;
- }
- else {
- break;
- }
- }
-
- if (stringify) {
- lua_pushlstring(L, start, len);
- }
- else {
- struct rspamd_lua_text *ntext;
-
- ntext = lua_newuserdata(L, sizeof(*ntext));
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
- ntext->start = start;
- ntext->len = len;
- ntext->flags = 0; /* Not own as it must be owned by a top object */
- }
-
- return ret;
- }
-
- static int
- rspamd_lua_text_readline(lua_State *L)
- {
- struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1));
- gboolean stringify = lua_toboolean(L, lua_upvalueindex(2));
- int64_t pos = lua_tointeger(L, lua_upvalueindex(3));
-
- if (pos < 0) {
- return luaL_error(L, "invalid pos: %d", (int) pos);
- }
-
- if (pos >= t->len) {
- /* We are done */
- return 0;
- }
-
- const char *sep_pos;
-
- /* We look just for `\n` ignoring `\r` as it is very rare nowadays */
- sep_pos = memchr(t->start + pos, '\n', t->len - pos);
-
- if (sep_pos == NULL) {
- /* Either last `\n` or `\r` separated text */
- sep_pos = memchr(t->start + pos, '\r', t->len - pos);
- }
-
- pos = rspamd_lua_text_push_line(L, t, pos, sep_pos, stringify);
-
- /* Skip separators */
- while (pos < t->len) {
- if (t->start[pos] == '\n' || t->start[pos] == '\r') {
- pos++;
- }
- else {
- break;
- }
- }
-
- /* Update pos */
- lua_pushinteger(L, pos);
- lua_replace(L, lua_upvalueindex(3));
-
- return 1;
- }
-
- static int
- lua_text_lines(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- gboolean stringify = FALSE;
-
- if (t) {
- if (lua_isboolean(L, 2)) {
- stringify = lua_toboolean(L, 2);
- }
-
- lua_pushvalue(L, 1);
- lua_pushboolean(L, stringify);
- lua_pushinteger(L, 0); /* Current pos */
- lua_pushcclosure(L, rspamd_lua_text_readline, 3);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- rspamd_lua_text_regexp_split(lua_State *L)
- {
- struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)),
- *new_t;
- struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
- lua_touserdata(L, lua_upvalueindex(2));
- gboolean stringify = lua_toboolean(L, lua_upvalueindex(3));
- int64_t pos = lua_tointeger(L, lua_upvalueindex(4));
- gboolean matched;
-
- if (pos < 0) {
- return luaL_error(L, "invalid pos: %d", (int) pos);
- }
-
- if (pos >= t->len) {
- /* We are done */
- return 0;
- }
-
- const char *start, *end, *old_start;
-
- end = t->start + pos;
-
- for (;;) {
- old_start = end;
-
- matched = rspamd_regexp_search(re->re, t->start, t->len, &start, &end, FALSE,
- NULL);
-
- if (matched) {
- if (start - old_start > 0) {
- if (stringify) {
- lua_pushlstring(L, old_start, start - old_start);
- }
- else {
- new_t = lua_newuserdata(L, sizeof(*t));
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
- new_t->start = old_start;
- new_t->len = start - old_start;
- new_t->flags = 0;
- }
-
- break;
- }
- else {
- if (start == end) {
- matched = FALSE;
- break;
- }
- /*
- * All match separators (e.g. starting separator,
- * we need to skip it). Continue iterations.
- */
- }
- }
- else {
- /* No match, stop */
- break;
- }
- }
-
- if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
- /* No more matches, but we might need to push the last element */
- if (end == NULL) {
- end = t->start;
- }
- /* No separators, need to push the whole remaining part */
- if (stringify) {
- lua_pushlstring(L, end, (t->start + t->len) - end);
- }
- else {
- new_t = lua_newuserdata(L, sizeof(*t));
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
- new_t->start = end;
- new_t->len = (t->start + t->len) - end;
- new_t->flags = 0;
- }
-
- pos = t->len;
- }
- else {
-
- pos = end - t->start;
- }
-
- /* Update pos */
- lua_pushinteger(L, pos);
- lua_replace(L, lua_upvalueindex(4));
-
- return 1;
- }
-
- static int
- lua_text_split(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- struct rspamd_lua_regexp *re;
- gboolean stringify = FALSE, own_re = FALSE;
-
- if (t == NULL) {
- return luaL_error(L, "invalid arguments");
- }
-
- if (lua_type(L, 2) == LUA_TUSERDATA) {
- re = lua_check_regexp(L, 2);
- }
- else {
- rspamd_regexp_t *c_re;
- GError *err = NULL;
-
- c_re = rspamd_regexp_new(lua_tostring(L, 2), NULL, &err);
- if (c_re == NULL) {
-
- int ret = luaL_error(L, "cannot parse regexp: %s, error: %s",
- lua_tostring(L, 2),
- err == NULL ? "undefined" : err->message);
- if (err) {
- g_error_free(err);
- }
-
- return ret;
- }
-
- re = g_malloc0(sizeof(struct rspamd_lua_regexp));
- re->re = c_re;
- re->re_pattern = g_strdup(lua_tostring(L, 2));
- re->module = rspamd_lua_get_module_name(L);
- own_re = TRUE;
- }
-
- if (re) {
- if (lua_isboolean(L, 3)) {
- stringify = lua_toboolean(L, 3);
- }
-
- /* Upvalues */
- lua_pushvalue(L, 1); /* text */
-
- if (own_re) {
- struct rspamd_lua_regexp **pre;
- pre = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
- rspamd_lua_setclass(L, rspamd_regexp_classname, -1);
- *pre = re;
- }
- else {
- lua_pushvalue(L, 2); /* regexp */
- }
-
- lua_pushboolean(L, stringify);
- lua_pushinteger(L, 0); /* Current pos */
- lua_pushcclosure(L, rspamd_lua_text_regexp_split, 4);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
-
- static int
- lua_text_at(lua_State *L)
- {
- return lua_text_byte(L);
- }
-
- static int
- lua_text_byte(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- if (!t) {
- return luaL_error(L, "invalid arguments");
- }
-
- gsize start = relative_pos_start(luaL_optinteger(L, 2, 1), t->len);
- gsize end = relative_pos_end(luaL_optinteger(L, 3, start), t->len);
- start--;
-
- if (start >= end) {
- return 0;
- }
-
- for (gsize i = start; i < end; i++) {
- lua_pushinteger(L, t->start[i]);
- }
- return end - start;
- }
-
- static int
- lua_text_memchr(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- int c;
- bool reverse = false;
-
- if (lua_isnumber(L, 2)) {
- c = lua_tonumber(L, 2);
- }
- else {
- gsize l;
- const char *str = lua_tolstring(L, 2, &l);
-
- if (str) {
- c = str[0];
-
- if (l != 1) {
- return luaL_error(L, "need exactly one character to search");
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
- }
-
- if (t) {
- void *f;
-
- if (lua_isboolean(L, 3)) {
- reverse = lua_toboolean(L, 3);
- }
-
- if (reverse) {
- f = rspamd_memrchr(t->start, c, t->len);
- }
- else {
- f = memchr(t->start, c, t->len);
- }
-
- if (f) {
- lua_pushinteger(L, ((const char *) f) - t->start + 1);
- }
- else {
- lua_pushinteger(L, -1);
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_bytes(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t) {
- lua_createtable(L, t->len, 0);
-
- for (gsize i = 0; i < t->len; i++) {
- lua_pushinteger(L, (unsigned char) t->start[i]);
- lua_rawseti(L, -2, i + 1);
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_save_in_file(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- const char *fname = NULL;
- unsigned int mode = 00644;
- int fd = -1;
- gboolean need_close = FALSE;
-
- if (t != NULL) {
- if (lua_type(L, 2) == LUA_TSTRING) {
- fname = luaL_checkstring(L, 2);
-
- if (lua_type(L, 3) == LUA_TNUMBER) {
- mode = lua_tointeger(L, 3);
- }
- }
- else if (lua_type(L, 2) == LUA_TNUMBER) {
- /* Created fd */
- fd = lua_tointeger(L, 2);
- }
-
- if (fd == -1) {
- if (fname) {
- fd = rspamd_file_xopen(fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
-
- if (fd == -1) {
- lua_pushboolean(L, false);
- lua_pushstring(L, strerror(errno));
-
- return 2;
- }
- need_close = TRUE;
- }
- else {
- fd = STDOUT_FILENO;
- }
- }
-
- if (write(fd, t->start, t->len) == -1) {
- if (fd != STDOUT_FILENO) {
- close(fd);
- }
-
- lua_pushboolean(L, false);
- lua_pushstring(L, strerror(errno));
-
- return 2;
- }
-
- if (need_close) {
- close(fd);
- }
-
- lua_pushboolean(L, true);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_gc(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t != NULL) {
- g_assert(!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
-
- if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
- rspamd_explicit_memzero((unsigned char *) t->start, t->len);
- }
-
- if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
- munmap((gpointer) t->start, t->len);
- }
- else {
- if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
- free((gpointer) t->start);
- }
- else {
- g_free((gpointer) t->start);
- }
- }
- }
- }
-
- return 0;
- }
-
- static int
- lua_text_eq(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
- *t2 = lua_check_text_or_string(L, 2);
-
- if (t1->len == t2->len) {
- lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) == 0);
- }
- else {
- lua_pushboolean(L, false);
- }
-
- return 1;
- }
-
- static int
- lua_text_lt(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
- *t2 = lua_check_text_or_string(L, 2);
-
- if (t1 && t2) {
- if (t1->len == t2->len) {
- lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) < 0);
- }
- else {
- lua_pushboolean(L, t1->len < t2->len);
- }
- }
-
- return 1;
- }
-
- static int
- lua_text_concat(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
- *t2 = lua_check_text_or_string(L, 2);
-
- if (t1 && t2) {
- struct rspamd_lua_text *final;
-
- final = lua_new_text(L, NULL, t1->len + t2->len, TRUE);
- memcpy((void *) final->start, t1->start, t1->len);
- memcpy((void *) (final->start + t1->len), t2->start, t2->len);
- }
-
- return 1;
- }
-
- static int
- lua_text_wipe(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t != NULL) {
- if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- rspamd_explicit_memzero((unsigned char *) t->start, t->len);
- }
- else {
- return luaL_error(L, "cannot wipe not owned text");
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 0;
- }
-
- static int
- lua_text_base32(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
- enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
-
- if (t != NULL) {
- if (lua_type(L, 2) == LUA_TSTRING) {
- btype = rspamd_base32_decode_type_from_str(lua_tostring(L, 2));
-
- if (btype == RSPAMD_BASE32_INVALID) {
- return luaL_error(L, "invalid b32 type: %s", lua_tostring(L, 2));
- }
- }
-
- out = lua_new_text(L, NULL, t->len * 8 / 5 + 2, TRUE);
- out->len = rspamd_encode_base32_buf(t->start, t->len, (char *) out->start,
- out->len, btype);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_base64(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
- gsize line_len = 0;
- gboolean fold = FALSE;
-
- if (t != NULL) {
- if (lua_type(L, 2) == LUA_TNUMBER) {
- line_len = lua_tointeger(L, 2);
-
- if (line_len <= 8) {
- return luaL_error(L, "too small line length (at least 8 is required)");
- }
- }
-
- enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
-
- if (lua_type(L, 3) == LUA_TSTRING) {
- const char *how_str = lua_tostring(L, 3);
-
- if (g_ascii_strcasecmp(how_str, "cr") == 0) {
- how = RSPAMD_TASK_NEWLINES_CR;
- }
- else if (g_ascii_strcasecmp(how_str, "lf") == 0) {
- how = RSPAMD_TASK_NEWLINES_LF;
- }
- else if (g_ascii_strcasecmp(how_str, "crlf") != 0) {
- return luaL_error(L, "invalid newline style: %s", how_str);
- }
- }
-
- if (lua_type(L, 4) == LUA_TBOOLEAN) {
- fold = lua_toboolean(L, 4);
- }
-
- gsize sz_len;
-
- out = lua_newuserdata(L, sizeof(*t));
- out->flags = RSPAMD_TEXT_FLAG_OWN;
- out->start = rspamd_encode_base64_common(t->start, t->len,
- line_len, &sz_len, fold, how);
- out->len = sz_len;
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_hex(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
-
- if (t != NULL) {
-
- out = lua_new_text(L, NULL, t->len * 2, TRUE);
- out->len = rspamd_encode_hex_buf(t->start, t->len, (char *) out->start,
- out->len);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_find(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- gsize patlen, init = 1;
- const char *pat = luaL_checklstring(L, 2, &patlen);
-
- if (t != NULL && pat != NULL) {
-
- if (lua_isnumber(L, 3)) {
- init = relative_pos_start(lua_tointeger(L, 3), t->len);
- }
-
- init--;
-
- if (init > t->len) {
- return luaL_error(L, "invalid arguments to find: init too large");
- }
-
- goffset pos = rspamd_substring_search(t->start + init,
- t->len - init,
- pat, patlen);
-
- if (pos == -1) {
- lua_pushnil(L);
-
- return 1;
- }
-
- lua_pushinteger(L, pos + 1);
- lua_pushinteger(L, pos + patlen);
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 2;
- }
-
- #define BITOP(a, b, op) \
- ((a)[(uint64_t) (b) / (8u * sizeof *(a))] op(uint64_t) 1 << ((uint64_t) (b) % (8u * sizeof *(a))))
-
- static int
- lua_text_exclude_chars(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- gssize patlen;
- const char *pat = lua_tolstring(L, 2, &patlen), *p, *end;
- char *dest, *d;
- uint64_t byteset[32 / sizeof(uint64_t)]; /* Bitset for ascii */
- gboolean copy = TRUE;
- unsigned int *plen;
-
- if (t != NULL && pat && patlen > 0) {
- if (lua_isboolean(L, 3)) {
- copy = lua_toboolean(L, 3);
- }
- else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- copy = FALSE;
- }
-
- if (!copy) {
- dest = (char *) t->start;
- plen = &t->len;
- lua_pushvalue(L, 1); /* Push text as a result */
- }
- else {
- /* We need to copy read only text */
- struct rspamd_lua_text *nt;
-
- dest = g_malloc(t->len);
- nt = lua_newuserdata(L, sizeof(*nt));
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
- nt->len = t->len;
- nt->flags = RSPAMD_TEXT_FLAG_OWN;
- memcpy(dest, t->start, t->len);
- nt->start = dest;
- plen = &nt->len;
- }
-
- /* Fill pattern bitset */
- memset(byteset, 0, sizeof byteset);
-
- while (patlen > 0) {
- if (*pat == '%') {
- pat++;
- patlen--;
-
- if (patlen > 0) {
- /*
- * This stuff assumes little endian, but GUINT64_FROM_LE should
- * deal with proper conversion
- */
- switch (*pat) {
- case '%':
- BITOP(byteset, *(unsigned char *) pat, |=);
- break;
- case 's':
- /* "\r\n\t\f " */
- byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
- break;
- case 'n':
- /* newlines: "\r\n" */
- byteset[0] |= GUINT64_FROM_LE(0x2400LLU);
- break;
- case '8':
- /* 8 bit characters */
- byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
- byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
- break;
- case 'c':
- /* Non printable (control) characters */
- byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU);
- /* Del character */
- byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU);
- break;
- }
- }
- else {
- /* Last '%' */
- BITOP(byteset, (unsigned char) '%', |=);
- }
- }
- else {
- BITOP(byteset, *(unsigned char *) pat, |=);
- }
-
- pat++;
- patlen--;
- }
- for (; patlen > 0 && BITOP(byteset, *(unsigned char *) pat, |=); pat++, patlen--)
- ;
-
- p = t->start;
- end = t->start + t->len;
- d = dest;
-
- while (p < end) {
- if (!BITOP(byteset, *(unsigned char *) p, &)) {
- *d++ = *p;
- }
-
- p++;
- }
-
- *(plen) = d - dest;
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_oneline(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
- const char *p, *end;
- char *dest, *d;
- uint64_t byteset[32 / sizeof(uint64_t)]; /* Bitset for ascii */
- gboolean copy = TRUE, seen_8bit = FALSE;
- unsigned int *plen;
-
- if (t != NULL) {
- if (lua_isboolean(L, 2)) {
- copy = lua_toboolean(L, 2);
- }
- else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- copy = FALSE;
- }
-
- if (!copy) {
- dest = (char *) t->start;
- plen = &t->len;
- lua_pushvalue(L, 1); /* Push text as a result */
- }
- else {
- /* We need to copy read only text */
- struct rspamd_lua_text *nt;
-
- dest = g_malloc(t->len);
- nt = lua_newuserdata(L, sizeof(*nt));
- rspamd_lua_setclass(L, rspamd_text_classname, -1);
- nt->len = t->len;
- nt->flags = RSPAMD_TEXT_FLAG_OWN;
- memcpy(dest, t->start, t->len);
- nt->start = dest;
- plen = &nt->len;
- }
-
- /* Fill pattern bitset */
- memset(byteset, 0, sizeof byteset);
- /* All spaces */
- byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
- /* Control characters */
- byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU);
- /* Del character */
- byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU);
- /* 8 bit characters */
- byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
- byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
-
- p = t->start;
- end = t->start + t->len;
- d = dest;
-
- while (p < end) {
- if (!BITOP(byteset, *(unsigned char *) p, &)) {
- *d++ = *p;
- }
- else {
- if ((*(unsigned char *) p) & 0x80) {
- seen_8bit = TRUE;
- *d++ = *p;
- }
- else {
- if (*p == ' ') {
- if (d != dest) {
- *d++ = *p++;
- }
-
- while (p < end && g_ascii_isspace(*p)) {
- p++;
- }
-
- continue; /* To avoid p++ */
- }
- else if (*p == '\r' || *p == '\n') {
- if (d != dest) {
- *d++ = ' ';
- p++;
- }
-
- while (p < end && g_ascii_isspace(*p)) {
- p++;
- }
-
- continue; /* To avoid p++ */
- }
- }
- }
-
- p++;
- }
-
- while (d > dest && g_ascii_isspace(*(d - 1))) {
- d--;
- }
-
- if (seen_8bit) {
- if (rspamd_fast_utf8_validate(dest, d - dest) != 0) {
- /* Need to make it valid :( */
- UChar32 uc;
- goffset err_offset;
- gsize remain = d - dest;
- char *nd = dest;
-
- while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(nd, remain)) > 0) {
- int i = 0;
-
- err_offset--; /* As it returns it 1 indexed */
- nd += err_offset;
- remain -= err_offset;
-
- /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
- while (i < remain) {
- int old_pos = i;
- U8_NEXT(nd, i, remain, uc);
-
- if (uc < 0) {
- nd[old_pos] = '?';
- }
- else {
- break;
- }
- }
-
- nd += i;
- remain -= i;
- }
- }
- }
-
- *(plen) = d - dest;
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_lower(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1), *nt;
- gboolean is_utf8 = FALSE, is_inplace = FALSE;
-
- if (t != NULL) {
- if (lua_isboolean(L, 2)) {
- is_utf8 = lua_toboolean(L, 2);
- }
- if (lua_isboolean(L, 3)) {
- is_inplace = lua_toboolean(L, 3);
- }
-
- if (is_inplace) {
- nt = t;
- lua_pushvalue(L, 1);
- }
- else {
- nt = lua_new_text(L, t->start, t->len, TRUE);
- }
-
- if (!is_utf8) {
- rspamd_str_lc((char *) nt->start, nt->len);
- }
- else {
- rspamd_str_lc_utf8((char *) nt->start, nt->len);
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- static int
- lua_text_strtoul(lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text(L, 1);
-
- if (t) {
- unsigned long ll;
-
- if (rspamd_strtoul(t->start, t->len, &ll)) {
- lua_pushinteger(L, ll);
- }
- else {
- lua_pushnil(L);
- }
- }
- else {
- return luaL_error(L, "invalid arguments");
- }
-
- return 1;
- }
-
- /* Used to distinguish lua text metatable */
- static const unsigned int rspamd_lua_text_cookie = 0x2b21ef6fU;
-
- static int
- lua_load_text(lua_State *L)
- {
- lua_newtable(L);
- lua_pushstring(L, "cookie");
- lua_pushnumber(L, rspamd_lua_text_cookie);
- lua_settable(L, -3);
- luaL_register(L, NULL, textlib_f);
-
- return 1;
- }
-
- void luaopen_text(lua_State *L)
- {
- rspamd_lua_new_class(L, rspamd_text_classname, textlib_m);
- lua_pushstring(L, "cookie");
- lua_pushnumber(L, rspamd_lua_text_cookie);
- lua_settable(L, -3);
- lua_pop(L, 1);
-
- rspamd_lua_add_preload(L, "rspamd_text", lua_load_text);
- }
|