123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740 |
- /*-
- * Copyright 2019 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "lua_common.h"
- #include "libcryptobox/cryptobox.h"
- #include "contrib/fastutf8/fastutf8.h"
- #include "unix-std.h"
-
- /***
- * @module rspamd_text
- * This module provides access to opaque text structures used widely to prevent
- * copying between Lua and C for various concerns: performance, security etc...
- *
- * You can convert rspamd_text into string but it will copy data.
- */
-
- /***
- * @function rspamd_text.fromstring(str)
- * Creates rspamd_text from Lua string (copied to the text)
- * @param {string} str string to use
- * @return {rspamd_text} resulting text
- */
- LUA_FUNCTION_DEF (text, fromstring);
-
- /***
- * @function rspamd_text.null()
- * Creates rspamd_text with NULL pointer for testing purposes
- * @param {string} str string to use
- * @return {rspamd_text} resulting text
- */
- LUA_FUNCTION_DEF (text, null);
- /***
- * @function rspamd_text.randombytes(nbytes)
- * Creates rspamd_text with random bytes inside (raw bytes)
- * @param {number} nbytes number of random bytes generated
- * @return {rspamd_text} random bytes text
- */
- LUA_FUNCTION_DEF (text, randombytes);
-
- /***
- * @function rspamd_text.fromtable(tbl[, delim])
- * Same as `table.concat` but generates rspamd_text instead of the Lua string
- * @param {table} tbl table to use
- * @param {string} delim optional delimiter
- * @return {rspamd_text} resulting text
- */
- LUA_FUNCTION_DEF (text, fromtable);
- /***
- * @method rspamd_text:byte(pos[, pos2])
- * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified
- * @param {integer} pos index
- * @param {integer} pos2 index
- * @return {integer} byte at the position `pos` or varargs of bytes
- */
- LUA_FUNCTION_DEF (text, byte);
- /***
- * @method rspamd_text:len()
- * Returns length of a string
- * @return {number} length of string in **bytes**
- */
- LUA_FUNCTION_DEF (text, len);
- /***
- * @method rspamd_text:str()
- * Converts text to string by copying its content
- * @return {string} copy of text as Lua string
- */
- LUA_FUNCTION_DEF (text, str);
- /***
- * @method rspamd_text:ptr()
- * Converts text to lightuserdata
- * @return {lightuserdata} pointer value of rspamd_text
- */
- LUA_FUNCTION_DEF (text, ptr);
- /***
- * @method rspamd_text:save_in_file(fname[, mode])
- * Saves text in file
- * @return {boolean} true if save has been completed
- */
- LUA_FUNCTION_DEF (text, save_in_file);
- /***
- * @method rspamd_text:span(start[, len])
- * Returns a span for lua_text starting at pos [start] (1 indexed) and with
- * length `len` (or to the end of the text)
- * @param {integer} start start index
- * @param {integer} len length of span
- * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
- */
- LUA_FUNCTION_DEF (text, span);
- /***
- * @method rspamd_text:sub(start[, len])
- * Returns a substrin for lua_text similar to string.sub from Lua
- * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
- */
- LUA_FUNCTION_DEF (text, sub);
- /***
- * @method rspamd_text:lines([stringify])
- * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
- * @param {boolean} stringify stringify lines
- * @return {iterator} iterator triplet
- */
- LUA_FUNCTION_DEF (text, lines);
- /***
- * @method rspamd_text:split(regexp, [stringify])
- * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
- * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
- * @param {boolean} stringify stringify lines
- * @return {iterator} iterator triplet
- */
- LUA_FUNCTION_DEF (text, split);
- /***
- * @method rspamd_text:at(pos)
- * Returns a byte at the position `pos`
- * @param {integer} pos index
- * @return {integer} byte at the position `pos` or nil if pos out of bound
- */
- LUA_FUNCTION_DEF (text, at);
- /***
- * @method rspamd_text:memchr(chr, [reverse])
- * Returns the first or the last position of the character `chr` in the text or
- * -1 in case if a character has not been found. Indexes start from `1`
- * @param {string/number} chr character or a character code to find
- * @param {boolean} reverse last character if `true`
- * @return {integer} position of the character or `-1`
- */
- LUA_FUNCTION_DEF (text, memchr);
- /***
- * @method rspamd_text:bytes()
- * Converts text to an array of bytes
- * @return {table|integer} bytes in the array (as unsigned char)
- */
- LUA_FUNCTION_DEF (text, bytes);
- /***
- * @method rspamd_text:lower([is_utf, [inplace]])
- * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
- * @param {boolean} is_utf apply utf8 lowercase
- * @param {boolean} inplace lowercase the original text
- * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
- */
- LUA_FUNCTION_DEF (text, lower);
- LUA_FUNCTION_DEF (text, take_ownership);
- /***
- * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
- * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
- * where all chars from `set_to_exclude` are removed
- * Patterns supported:
- *
- * - %s - all space characters
- * - %n - all newline characters
- * - %c - all control characters (it includes 8bit characters and spaces)
- * - %8 - all 8 bit characters
- * - %% - just a percent character
- *
- * @param {string} set_to_exclude characters to exclude
- * @param {boolean} always_copy always copy the source text
- * @return {rspamd_text} modified or copied text
- */
- LUA_FUNCTION_DEF (text, exclude_chars);
- /***
- * @method rspamd_text:oneline([always_copy])
- * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
- * where the following transformations are made:
- * - All spaces sequences are replaced with a single space
- * - All newlines sequences are replaced with a single space
- * - Trailing and leading spaces are removed
- * - Control characters are excluded
- * - UTF8 sequences are normalised
- *
- * @param {boolean} always_copy always copy the source text
- * @return {rspamd_text} modified or copied text
- */
- LUA_FUNCTION_DEF (text, oneline);
- /***
- * @method rspamd_text:base32([b32type])
- * Returns a text encoded in base32 (new rspamd_text is allocated)
- *
- * @param {string} b32type base32 type (default, bleach, rfc)
- * @return {rspamd_text} new text encoded in base32
- */
- LUA_FUNCTION_DEF (text, base32);
- /***
- * @method rspamd_text:base64([line_length, [nline, [fold]]])
- * Returns a text encoded in base64 (new rspamd_text is allocated)
- *
- * @param {number} line_length return text splited with newlines up to this attribute
- * @param {string} nline newline type: `cr`, `lf`, `crlf`
- * @param {boolean} fold use folding when splitting into lines (false by default)
- * @return {rspamd_text} new text encoded in base64
- */
- LUA_FUNCTION_DEF (text, base64);
- /***
- * @method rspamd_text:hex()
- * Returns a text encoded in hex (new rspamd_text is allocated)
- *
- * @return {rspamd_text} new text encoded in hex
- */
- LUA_FUNCTION_DEF (text, hex);
- /***
- * @method rspamd_text:find(pattern [, init])
- * Looks for the first match of pattern in the string s.
- * If it finds a match, then find returns the indices of s where this occurrence
- * starts and ends; otherwise, it returns nil. A third,
- * optional numerical argument init specifies where to start the search;
- * its default value is 1 and can be negative.
- * This method currently supports merely a plain search, no patterns.
- *
- * @param {string} pattern pattern to find
- * @param {number} init specifies where to start the search (1 default)
- * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil
- */
- LUA_FUNCTION_DEF (text, find);
- LUA_FUNCTION_DEF (text, gc);
- LUA_FUNCTION_DEF (text, eq);
- LUA_FUNCTION_DEF (text, lt);
- LUA_FUNCTION_DEF (text, concat);
- LUA_FUNCTION_DEF (text, strtoul);
-
- static const struct luaL_reg textlib_f[] = {
- LUA_INTERFACE_DEF (text, fromstring),
- {"from_string", lua_text_fromstring},
- LUA_INTERFACE_DEF (text, fromtable),
- {"from_table", lua_text_fromtable},
- LUA_INTERFACE_DEF (text, null),
- LUA_INTERFACE_DEF (text, randombytes),
- {NULL, NULL}
- };
-
- static const struct luaL_reg textlib_m[] = {
- LUA_INTERFACE_DEF (text, len),
- LUA_INTERFACE_DEF (text, str),
- LUA_INTERFACE_DEF (text, ptr),
- LUA_INTERFACE_DEF (text, take_ownership),
- LUA_INTERFACE_DEF (text, save_in_file),
- LUA_INTERFACE_DEF (text, span),
- LUA_INTERFACE_DEF (text, sub),
- LUA_INTERFACE_DEF (text, lines),
- LUA_INTERFACE_DEF (text, split),
- LUA_INTERFACE_DEF (text, at),
- LUA_INTERFACE_DEF (text, memchr),
- LUA_INTERFACE_DEF (text, byte),
- LUA_INTERFACE_DEF (text, bytes),
- LUA_INTERFACE_DEF (text, lower),
- LUA_INTERFACE_DEF (text, exclude_chars),
- LUA_INTERFACE_DEF (text, oneline),
- LUA_INTERFACE_DEF (text, base32),
- LUA_INTERFACE_DEF (text, base64),
- LUA_INTERFACE_DEF (text, hex),
- LUA_INTERFACE_DEF (text, find),
- LUA_INTERFACE_DEF (text, strtoul),
- {"write", lua_text_save_in_file},
- {"__len", lua_text_len},
- {"__tostring", lua_text_str},
- {"__gc", lua_text_gc},
- {"__eq", lua_text_eq},
- {"__lt", lua_text_lt},
- {"__concat", lua_text_concat},
- {NULL, NULL}
- };
-
- struct rspamd_lua_text *
- lua_check_text (lua_State * L, gint pos)
- {
- void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
- luaL_argcheck (L, ud != NULL, pos, "'text' expected");
- return ud ? (struct rspamd_lua_text *)ud : NULL;
- }
-
- struct rspamd_lua_text *
- lua_check_text_or_string (lua_State * L, gint pos)
- {
- gint pos_type = lua_type (L, pos);
-
- if (pos_type == LUA_TUSERDATA) {
- void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
- luaL_argcheck (L, ud != NULL, pos, "'text' expected");
- return ud ? (struct rspamd_lua_text *) ud : NULL;
- }
- else if (pos_type == LUA_TSTRING) {
- /*
- * Fake static lua_text, we allow to use this function multiple times
- * by having a small array of static structures.
- */
- static int cur_txt_idx = 0;
- static struct rspamd_lua_text fake_text[4];
- gsize len;
- int sel_idx;
-
- sel_idx = cur_txt_idx++ % G_N_ELEMENTS (fake_text);
- fake_text[sel_idx].start = lua_tolstring (L, pos, &len);
-
- if (len >= G_MAXUINT) {
- return NULL;
- }
-
- fake_text[sel_idx].len = len;
- fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE;
-
- return &fake_text[sel_idx];
- }
-
- return NULL;
- }
-
- struct rspamd_lua_text *
- lua_new_text (lua_State *L, const gchar *start, gsize len, gboolean own)
- {
- struct rspamd_lua_text *t;
-
- t = lua_newuserdata (L, sizeof (*t));
- t->flags = 0;
-
- if (own) {
- gchar *storage;
-
- if (len > 0) {
- storage = g_malloc (len);
-
- if (start != NULL) {
- memcpy (storage, start, len);
- }
-
- t->start = storage;
- t->flags = RSPAMD_TEXT_FLAG_OWN;
- }
- else {
- t->start = "";
- }
- }
- else {
- t->start = start;
- }
-
- t->len = len;
- rspamd_lua_setclass (L, "rspamd{text}", -1);
-
- return t;
- }
-
-
- static gint
- lua_text_fromstring (lua_State *L)
- {
- LUA_TRACE_POINT;
- const gchar *str;
- gsize l = 0;
- gboolean transparent = FALSE;
-
- str = luaL_checklstring (L, 1, &l);
-
- if (str) {
- if (lua_isboolean (L, 2)) {
- transparent = lua_toboolean (L, 2);
- }
-
- lua_new_text (L, str, l, !transparent);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
-
- return 1;
- }
-
- static gint
- lua_text_null (lua_State *L)
- {
- LUA_TRACE_POINT;
-
- lua_new_text (L, NULL, 0, false);
-
- return 1;
- }
-
- static gint
- lua_text_randombytes (lua_State *L)
- {
- LUA_TRACE_POINT;
- guint nbytes = luaL_checkinteger (L, 1);
- struct rspamd_lua_text *out;
-
- out = lua_new_text (L, NULL, nbytes, TRUE);
- randombytes_buf ((char *)out->start, nbytes);
- out->len = nbytes;
-
- return 1;
- }
-
- #define MAX_REC 10
-
- static void
- lua_text_tbl_length (lua_State *L, gsize dlen, gsize *dest, guint rec)
- {
- gsize tblen, stlen;
- struct rspamd_lua_text *elt;
-
- if (rec > MAX_REC) {
- luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
-
- return;
- }
-
- tblen = rspamd_lua_table_size (L, -1);
-
- for (gsize i = 0; i < tblen; i ++) {
- lua_rawgeti (L, -1, i + 1);
-
- if (lua_type (L, -1) == LUA_TSTRING) {
- #if LUA_VERSION_NUM >= 502
- stlen = lua_rawlen (L, -1);
- #else
- stlen = lua_objlen (L, -1);
- #endif
- (*dest) += stlen;
- }
- else if (lua_type (L, -1) == LUA_TUSERDATA){
- elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
-
- if (elt) {
- (*dest) += elt->len;
- }
- }
- else if (lua_type (L, -1) == LUA_TTABLE) {
- lua_text_tbl_length (L, dlen, dest, rec + 1);
- }
-
- if (i != tblen - 1) {
- (*dest) += dlen;
- }
-
- lua_pop (L, 1);
- }
- }
-
- static void
- lua_text_tbl_append (lua_State *L,
- const gchar *delim,
- gsize dlen,
- gchar **dest,
- guint rec)
- {
- const gchar *st;
- gsize tblen, stlen;
- struct rspamd_lua_text *elt;
-
- if (rec > MAX_REC) {
- luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
-
- return;
- }
-
- tblen = rspamd_lua_table_size (L, -1);
-
- for (guint i = 0; i < tblen; i ++) {
- lua_rawgeti (L, -1, i + 1);
-
- if (lua_type (L, -1) == LUA_TSTRING) {
- st = lua_tolstring (L, -1, &stlen);
- memcpy ((*dest), st, stlen);
- (*dest) += stlen;
- }
- else if (lua_type (L, -1) == LUA_TUSERDATA){
- elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
-
- if (elt) {
- memcpy ((*dest), elt->start, elt->len);
- (*dest) += elt->len;
- }
- }
- else if (lua_type (L, -1) == LUA_TTABLE) {
- lua_text_tbl_append (L, delim, dlen, dest, rec + 1);
- }
-
- if (dlen && i != tblen - 1) {
- memcpy ((*dest), delim, dlen);
- (*dest) += dlen;
- }
-
- lua_pop (L, 1);
- }
- }
-
- static gint
- lua_text_fromtable (lua_State *L)
- {
- LUA_TRACE_POINT;
- const gchar *delim = "";
- struct rspamd_lua_text *t;
- gsize textlen = 0, dlen, oldtop = lua_gettop (L);
- gchar *dest;
-
- if (!lua_istable (L, 1)) {
- return luaL_error (L, "invalid arguments");
- }
-
- if (lua_type (L, 2) == LUA_TSTRING) {
- delim = lua_tolstring (L, 2, &dlen);
- }
- else {
- dlen = 0;
- }
-
- /* Calculate length needed */
- lua_pushvalue (L, 1);
- lua_text_tbl_length (L, dlen, &textlen, 0);
- lua_pop (L, 1);
-
- /* Allocate new text */
- t = lua_newuserdata (L, sizeof (*t));
- dest = g_malloc (textlen);
- t->start = dest;
- t->len = textlen;
- t->flags = RSPAMD_TEXT_FLAG_OWN;
- rspamd_lua_setclass (L, "rspamd{text}", -1);
-
- lua_pushvalue (L, 1);
- lua_text_tbl_append (L, delim, dlen, &dest, 0);
- lua_pop (L, 1); /* Table arg */
-
- gint newtop = lua_gettop (L);
- g_assert ( newtop== oldtop + 1);
-
- return 1;
- }
-
- static gint
- lua_text_len (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- gsize l = 0;
-
- if (t != NULL) {
- l = t->len;
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- lua_pushinteger (L, l);
-
- return 1;
- }
-
- static gint
- lua_text_str (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t != NULL) {
- lua_pushlstring (L, t->start, t->len);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_ptr (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t != NULL) {
- lua_pushlightuserdata (L, (gpointer)t->start);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_take_ownership (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- gchar *dest;
-
- if (t != NULL) {
- if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- /* We already own it */
- lua_pushboolean (L, true);
- }
- else {
- dest = g_malloc (t->len);
- memcpy (dest, t->start, t->len);
- t->start = dest;
- t->flags |= RSPAMD_TEXT_FLAG_OWN;
- lua_pushboolean (L, true);
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_span (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- gint64 start = lua_tointeger (L, 2), len = -1;
-
- if (t && start >= 1 && start <= t->len) {
- if (lua_isnumber (L, 3)) {
- len = lua_tonumber (L, 3);
- }
-
- if (len == -1) {
- len = t->len - (start - 1);
- }
-
- if (len < 0 || (len > (t->len - (start - 1)))) {
- return luaL_error (L, "invalid length");
- }
-
- lua_new_text (L, t->start + (start - 1), len, FALSE);
- }
- else {
- if (!t) {
- return luaL_error (L, "invalid arguments, text required");
- }
- else {
- return luaL_error (L, "invalid arguments: start offset %d "
- "is larger than text len %d", (int)start, (int)t->len);
- }
- }
-
- return 1;
- }
-
- /* Helpers to behave exactly as Lua does */
- static inline gsize
- relative_pos_start (gint pos, gsize len)
- {
- if (pos > 0) {
- return pos;
- }
- else if (pos == 0) {
- return 1;
- }
- else if (pos < -((gint) len)) {
- return 1;
- }
-
- /* Negative pos inside str */
- return len + ((gsize)pos) + 1;
- }
-
- static inline gsize
- relative_pos_end (gint pos, gsize len)
- {
- if (pos > (gint)len) {
- return len;
- }
- else if (pos >= 0) {
- return (size_t) pos;
- }
- else if (pos < -((gint)len)) {
- return 0;
- }
-
- return len + ((gsize)pos) + 1;
- }
-
- static gint
- lua_text_sub (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t) {
- size_t start = relative_pos_start (luaL_checkinteger (L, 2),
- t->len);
- size_t end = relative_pos_end (luaL_optinteger (L, 3, -1),
- t->len);
-
-
- if (start <= end) {
- lua_new_text (L, t->start + (start - 1),
- (end - start) + 1, FALSE);
- }
- else {
- lua_new_text (L, "", 0, TRUE);
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint64
- rspamd_lua_text_push_line (lua_State *L,
- struct rspamd_lua_text *t,
- gint64 start_offset,
- const gchar *sep_pos,
- gboolean stringify)
- {
- const gchar *start;
- gsize len;
- gint64 ret;
-
- start = t->start + start_offset;
- len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
- ret = start_offset + len;
-
- /* Trim line */
- while (len > 0) {
- if (start[len - 1] == '\r' || start[len - 1] == '\n') {
- len --;
- }
- else {
- break;
- }
- }
-
- if (stringify) {
- lua_pushlstring (L, start, len);
- }
- else {
- struct rspamd_lua_text *ntext;
-
- ntext = lua_newuserdata (L, sizeof (*ntext));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- ntext->start = start;
- ntext->len = len;
- ntext->flags = 0; /* Not own as it must be owned by a top object */
- }
-
- return ret;
- }
-
- static gint
- rspamd_lua_text_readline (lua_State *L)
- {
- struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1));
- gboolean stringify = lua_toboolean (L, lua_upvalueindex (2));
- gint64 pos = lua_tointeger (L, lua_upvalueindex (3));
-
- if (pos < 0) {
- return luaL_error (L, "invalid pos: %d", (gint)pos);
- }
-
- if (pos >= t->len) {
- /* We are done */
- return 0;
- }
-
- const gchar *sep_pos;
-
- /* We look just for `\n` ignoring `\r` as it is very rare nowadays */
- sep_pos = memchr (t->start + pos, '\n', t->len - pos);
-
- if (sep_pos == NULL) {
- /* Either last `\n` or `\r` separated text */
- sep_pos = memchr (t->start + pos, '\r', t->len - pos);
- }
-
- pos = rspamd_lua_text_push_line (L, t, pos, sep_pos, stringify);
-
- /* Skip separators */
- while (pos < t->len) {
- if (t->start[pos] == '\n' || t->start[pos] == '\r') {
- pos ++;
- }
- else {
- break;
- }
- }
-
- /* Update pos */
- lua_pushinteger (L, pos);
- lua_replace (L, lua_upvalueindex (3));
-
- return 1;
- }
-
- static gint
- lua_text_lines (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- gboolean stringify = FALSE;
-
- if (t) {
- if (lua_isboolean (L, 2)) {
- stringify = lua_toboolean (L, 2);
- }
-
- lua_pushvalue (L, 1);
- lua_pushboolean (L, stringify);
- lua_pushinteger (L, 0); /* Current pos */
- lua_pushcclosure (L, rspamd_lua_text_readline, 3);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- rspamd_lua_text_regexp_split (lua_State *L) {
- struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1)),
- *new_t;
- struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
- lua_touserdata (L, lua_upvalueindex (2));
- gboolean stringify = lua_toboolean (L, lua_upvalueindex (3));
- gint64 pos = lua_tointeger (L, lua_upvalueindex (4));
- gboolean matched;
-
- if (pos < 0) {
- return luaL_error (L, "invalid pos: %d", (gint) pos);
- }
-
- if (pos >= t->len) {
- /* We are done */
- return 0;
- }
-
- const gchar *start, *end, *old_start;
-
- end = t->start + pos;
-
- for (;;) {
- old_start = end;
-
- matched = rspamd_regexp_search (re->re, t->start, t->len, &start, &end, FALSE,
- NULL);
-
- if (matched) {
- if (start - old_start > 0) {
- if (stringify) {
- lua_pushlstring (L, old_start, start - old_start);
- }
- else {
- new_t = lua_newuserdata (L, sizeof (*t));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- new_t->start = old_start;
- new_t->len = start - old_start;
- new_t->flags = 0;
- }
-
- break;
- }
- else {
- if (start == end) {
- matched = FALSE;
- break;
- }
- /*
- * All match separators (e.g. starting separator,
- * we need to skip it). Continue iterations.
- */
- }
- }
- else {
- /* No match, stop */
- break;
- }
- }
-
- if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
- /* No more matches, but we might need to push the last element */
- if (end == NULL) {
- end = t->start;
- }
- /* No separators, need to push the whole remaining part */
- if (stringify) {
- lua_pushlstring (L, end, (t->start + t->len) - end);
- }
- else {
- new_t = lua_newuserdata (L, sizeof (*t));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- new_t->start = end;
- new_t->len = (t->start + t->len) - end;
- new_t->flags = 0;
- }
-
- pos = t->len;
- }
- else {
-
- pos = end - t->start;
- }
-
- /* Update pos */
- lua_pushinteger (L, pos);
- lua_replace (L, lua_upvalueindex (4));
-
- return 1;
- }
-
- static gint
- lua_text_split (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- struct rspamd_lua_regexp *re;
- gboolean stringify = FALSE, own_re = FALSE;
-
- if (t == NULL) {
- return luaL_error (L, "invalid arguments");
- }
-
- if (lua_type (L, 2) == LUA_TUSERDATA) {
- re = lua_check_regexp (L, 2);
- }
- else {
- rspamd_regexp_t *c_re;
- GError *err = NULL;
-
- c_re = rspamd_regexp_new (lua_tostring (L, 2), NULL, &err);
- if (c_re == NULL) {
-
- gint ret = luaL_error (L, "cannot parse regexp: %s, error: %s",
- lua_tostring (L, 2),
- err == NULL ? "undefined" : err->message);
- if (err) {
- g_error_free (err);
- }
-
- return ret;
- }
-
- re = g_malloc0 (sizeof (struct rspamd_lua_regexp));
- re->re = c_re;
- re->re_pattern = g_strdup (lua_tostring (L, 2));
- re->module = rspamd_lua_get_module_name (L);
- own_re = TRUE;
- }
-
- if (re) {
- if (lua_isboolean (L, 3)) {
- stringify = lua_toboolean (L, 3);
- }
-
- /* Upvalues */
- lua_pushvalue (L, 1); /* text */
-
- if (own_re) {
- struct rspamd_lua_regexp **pre;
- pre = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
- rspamd_lua_setclass (L, "rspamd{regexp}", -1);
- *pre = re;
- }
- else {
- lua_pushvalue (L, 2); /* regexp */
- }
-
- lua_pushboolean (L, stringify);
- lua_pushinteger (L, 0); /* Current pos */
- lua_pushcclosure (L, rspamd_lua_text_regexp_split, 4);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
-
- static gint
- lua_text_at (lua_State *L)
- {
- return lua_text_byte(L);
- }
-
- static gint
- lua_text_byte (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- if (!t) {
- return luaL_error (L, "invalid arguments");
- }
-
- gsize start = relative_pos_start (luaL_optinteger (L, 2, 1), t->len);
- gsize end = relative_pos_end (luaL_optinteger (L, 3, start), t->len);
- start--;
-
- if (start >= end) {
- return 0;
- }
-
- for (gsize i = start; i < end; i++) {
- lua_pushinteger (L, t->start[i]);
- }
- return end - start;
- }
-
- static gint
- lua_text_memchr (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- int c;
- bool reverse = false;
-
- if (lua_isnumber (L, 2)) {
- c = lua_tonumber (L, 2);
- }
- else {
- gsize l;
- const gchar *str = lua_tolstring (L, 2, &l);
-
- if (str) {
- c = str[0];
-
- if (l != 1) {
- return luaL_error (L, "need exactly one character to search");
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
- }
-
- if (t) {
- void *f;
-
- if (lua_isboolean (L, 3)) {
- reverse = lua_toboolean (L, 3);
- }
-
- if (reverse) {
- f = rspamd_memrchr (t->start, c, t->len);
- }
- else {
- f = memchr (t->start, c, t->len);
- }
-
- if (f) {
- lua_pushinteger (L, ((const char *)f) - t->start + 1);
- }
- else {
- lua_pushinteger (L, -1);
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_bytes (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t) {
- lua_createtable (L, t->len, 0);
-
- for (gsize i = 0; i < t->len; i ++) {
- lua_pushinteger (L, (guchar)t->start[i]);
- lua_rawseti (L, -2, i + 1);
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_save_in_file (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- const gchar *fname = NULL;
- guint mode = 00644;
- gint fd = -1;
- gboolean need_close = FALSE;
-
- if (t != NULL) {
- if (lua_type (L, 2) == LUA_TSTRING) {
- fname = luaL_checkstring (L, 2);
-
- if (lua_type (L, 3) == LUA_TNUMBER) {
- mode = lua_tonumber (L, 3);
- }
- }
- else if (lua_type (L, 2) == LUA_TNUMBER) {
- /* Created fd */
- fd = lua_tonumber (L, 2);
- }
-
- if (fd == -1) {
- if (fname) {
- fd = rspamd_file_xopen (fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
-
- if (fd == -1) {
- lua_pushboolean (L, false);
- lua_pushstring (L, strerror (errno));
-
- return 2;
- }
- need_close = TRUE;
- }
- else {
- fd = STDOUT_FILENO;
- }
- }
-
- if (write (fd, t->start, t->len) == -1) {
- if (fd != STDOUT_FILENO) {
- close (fd);
- }
-
- lua_pushboolean (L, false);
- lua_pushstring (L, strerror (errno));
-
- return 2;
- }
-
- if (need_close) {
- close (fd);
- }
-
- lua_pushboolean (L, true);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_gc (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t != NULL) {
- g_assert (!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
-
- if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
- rspamd_explicit_memzero ((guchar *)t->start, t->len);
- }
-
- if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
- munmap ((gpointer)t->start, t->len);
- }
- else {
- if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
- free ((gpointer) t->start);
- }
- else {
- g_free ((gpointer) t->start);
- }
- }
- }
-
- }
-
- return 0;
- }
-
- static gint
- lua_text_eq (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
- *t2 = lua_check_text_or_string (L, 2);
-
- if (t1->len == t2->len) {
- lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) == 0);
- }
- else {
- lua_pushboolean (L, false);
- }
-
- return 1;
- }
-
- static gint
- lua_text_lt (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
- *t2 = lua_check_text_or_string (L, 2);
-
- if (t1 && t2) {
- if (t1->len == t2->len) {
- lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) < 0);
- }
- else {
- lua_pushboolean (L, t1->len < t2->len);
- }
- }
-
- return 1;
- }
-
- static gint
- lua_text_concat (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
- *t2 = lua_check_text_or_string (L, 2);
-
- if (t1 && t2) {
- struct rspamd_lua_text *final;
-
- final = lua_new_text (L, NULL, t1->len + t2->len, TRUE);
- memcpy ((void *)final->start, t1->start, t1->len);
- memcpy ((void *)(final->start + t1->len), t2->start, t2->len);
- }
-
- return 1;
- }
-
- static gint
- lua_text_wipe (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t != NULL) {
- if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- rspamd_explicit_memzero ((guchar *)t->start, t->len);
- }
- else {
- return luaL_error (L, "cannot wipe not owned text");
- }
-
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 0;
- }
-
- static gint
- lua_text_base32 (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
- enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
-
- if (t != NULL) {
- if (lua_type (L, 2) == LUA_TSTRING) {
- btype = rspamd_base32_decode_type_from_str (lua_tostring (L, 2));
-
- if (btype == RSPAMD_BASE32_INVALID) {
- return luaL_error (L, "invalid b32 type: %s", lua_tostring (L, 2));
- }
- }
-
- out = lua_new_text (L, NULL, t->len * 8 / 5 + 2, TRUE);
- out->len = rspamd_encode_base32_buf (t->start, t->len, (gchar *)out->start,
- out->len, btype);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_base64 (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
- gsize line_len = 0;
- gboolean fold = FALSE;
-
- if (t != NULL) {
- if (lua_type (L, 2) == LUA_TNUMBER) {
- line_len = lua_tointeger (L, 2);
-
- if (line_len <= 8) {
- return luaL_error (L, "too small line length (at least 8 is required)");
- }
- }
-
- enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
-
- if (lua_type (L, 3) == LUA_TSTRING) {
- const gchar *how_str = lua_tostring (L, 3);
-
- if (g_ascii_strcasecmp (how_str, "cr") == 0) {
- how = RSPAMD_TASK_NEWLINES_CR;
- }
- else if (g_ascii_strcasecmp (how_str, "lf") == 0) {
- how = RSPAMD_TASK_NEWLINES_LF;
- }
- else if (g_ascii_strcasecmp (how_str, "crlf") != 0) {
- return luaL_error (L, "invalid newline style: %s", how_str);
- }
- }
-
- if (lua_type (L, 4) == LUA_TBOOLEAN) {
- fold = lua_toboolean (L, 4);
- }
-
- gsize sz_len;
-
- out = lua_newuserdata (L, sizeof (*t));
- out->flags = RSPAMD_TEXT_FLAG_OWN;
- out->start = rspamd_encode_base64_common (t->start, t->len,
- line_len, &sz_len, fold, how);
- out->len = sz_len;
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_hex (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
-
- if (t != NULL) {
-
- out = lua_new_text (L, NULL, t->len * 2, TRUE);
- out->len = rspamd_encode_hex_buf (t->start, t->len, (gchar *)out->start,
- out->len);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_find (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- gsize patlen, init = 1;
- const gchar *pat = luaL_checklstring (L, 2, &patlen);
-
- if (t != NULL && pat != NULL) {
-
- if (lua_isnumber (L, 3)) {
- init = relative_pos_start (lua_tointeger (L, 3), t->len);
- }
-
- init --;
-
- if (init > t->len) {
- return luaL_error (L, "invalid arguments to find: init too large");
- }
-
- goffset pos = rspamd_substring_search (t->start + init,
- t->len - init,
- pat, patlen);
-
- if (pos == -1) {
- lua_pushnil (L);
-
- return 1;
- }
-
- lua_pushinteger (L, pos + 1);
- lua_pushinteger (L, pos + patlen);
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 2;
- }
-
- #define BITOP(a,b,op) \
- ((a)[(gsize)(b)/(8*sizeof *(a))] op (gsize)1<<((gsize)(b)%(8*sizeof *(a))))
-
- static gint
- lua_text_exclude_chars (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- gssize patlen;
- const gchar *pat = lua_tolstring (L, 2, &patlen), *p, *end;
- gchar *dest, *d;
- gsize byteset[32 / sizeof(gsize)]; /* Bitset for ascii */
- gboolean copy = TRUE;
- guint *plen;
-
- if (t != NULL && pat && patlen > 0) {
- if (lua_isboolean (L, 3)) {
- copy = lua_toboolean (L, 3);
- }
- else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- copy = FALSE;
- }
-
- if (!copy) {
- dest = (gchar *)t->start;
- plen = &t->len;
- lua_pushvalue (L, 1); /* Push text as a result */
- }
- else {
- /* We need to copy read only text */
- struct rspamd_lua_text *nt;
-
- dest = g_malloc (t->len);
- nt = lua_newuserdata (L, sizeof (*nt));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- nt->len = t->len;
- nt->flags = RSPAMD_TEXT_FLAG_OWN;
- memcpy (dest, t->start, t->len);
- nt->start = dest;
- plen = &nt->len;
- }
-
- /* Fill pattern bitset */
- memset (byteset, 0, sizeof byteset);
-
- while (patlen > 0) {
- if (*pat == '%') {
- pat ++;
- patlen --;
-
- if (patlen > 0) {
- /*
- * This stuff assumes little endian, but GSIZE_FROM_LE should
- * deal with proper conversion
- */
- switch (*pat) {
- case '%':
- BITOP (byteset, *(guchar *) pat, |=);
- break;
- case 's':
- /* "\r\n\t\f " */
- byteset[0] |= GSIZE_FROM_LE (0x100003600);
- break;
- case 'n':
- /* newlines: "\r\n" */
- byteset[0] |= GSIZE_FROM_LE (0x2400);
- break;
- case '8':
- /* 8 bit characters */
- byteset[2] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
- byteset[3] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
- break;
- case 'c':
- /* Non printable (control) characters */
- byteset[0] |= GSIZE_FROM_LE (0xffffffff);
- /* Del character */
- byteset[1] |= GSIZE_FROM_LE (0x8000000000000000);
- break;
- }
- }
- else {
- /* Last '%' */
- BITOP (byteset, (guchar)'%', |=);
- }
- }
- else {
- BITOP (byteset, *(guchar *)pat, |=);
- }
-
- pat ++;
- patlen --;
- }
- for (; patlen > 0 && BITOP (byteset, *(guchar *)pat, |=); pat++, patlen --);
-
- p = t->start;
- end = t->start + t->len;
- d = dest;
-
- while (p < end) {
- if (!BITOP (byteset, *(guchar *)p, &)) {
- *d++ = *p;
- }
-
- p ++;
- }
-
- *(plen) = d - dest;
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_oneline (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
- const gchar *p, *end;
- gchar *dest, *d;
- gsize byteset[32 / sizeof(gsize)]; /* Bitset for ascii */
- gboolean copy = TRUE, seen_8bit = FALSE;
- guint *plen;
-
- if (t != NULL) {
- if (lua_isboolean (L, 2)) {
- copy = lua_toboolean (L, 2);
- }
- else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
- copy = FALSE;
- }
-
- if (!copy) {
- dest = (gchar *)t->start;
- plen = &t->len;
- lua_pushvalue (L, 1); /* Push text as a result */
- }
- else {
- /* We need to copy read only text */
- struct rspamd_lua_text *nt;
-
- dest = g_malloc (t->len);
- nt = lua_newuserdata (L, sizeof (*nt));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- nt->len = t->len;
- nt->flags = RSPAMD_TEXT_FLAG_OWN;
- memcpy (dest, t->start, t->len);
- nt->start = dest;
- plen = &nt->len;
- }
-
- /* Fill pattern bitset */
- memset (byteset, 0, sizeof byteset);
- /* All spaces */
- byteset[0] |= GSIZE_FROM_LE (0x100003600);
- /* Control characters */
- byteset[0] |= GSIZE_FROM_LE (0xffffffff);
- /* Del character */
- byteset[1] |= GSIZE_FROM_LE (0x8000000000000000);
- /* 8 bit characters */
- byteset[2] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
- byteset[3] |= GSIZE_FROM_LE (0xffffffffffffffffLLU);
-
- p = t->start;
- end = t->start + t->len;
- d = dest;
-
- while (p < end) {
- if (!BITOP (byteset, *(guchar *)p, &)) {
- *d++ = *p;
- }
- else {
- if ((*(guchar *)p) & 0x80) {
- seen_8bit = TRUE;
- *d++ = *p;
- }
- else {
- if (*p == ' ') {
- if (d != dest) {
- *d++ = *p++;
- }
-
- while (p < end && g_ascii_isspace (*p)) {
- p ++;
- }
-
- continue; /* To avoid p++ */
- }
- else if (*p == '\r' || *p == '\n') {
- if (d != dest) {
- *d++ = ' ';
- p ++;
- }
-
- while (p < end && g_ascii_isspace (*p)) {
- p ++;
- }
-
- continue; /* To avoid p++ */
- }
- }
- }
-
- p ++;
- }
-
- while (d > dest && g_ascii_isspace (*(d - 1))) {
- d --;
- }
-
- if (seen_8bit) {
- if (rspamd_fast_utf8_validate (dest, d - dest) != 0) {
- /* Need to make it valid :( */
- UChar32 uc;
- goffset err_offset;
- gsize remain = d - dest;
- gchar *nd = dest;
-
- while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (nd, remain)) > 0) {
- gint i = 0;
-
- err_offset --; /* As it returns it 1 indexed */
- nd += err_offset;
- remain -= err_offset;
-
- /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
- while (i < remain) {
- gint old_pos = i;
- U8_NEXT (nd, i, remain, uc);
-
- if (uc < 0) {
- nd[old_pos] = '?';
- }
- else {
- break;
- }
- }
-
- nd += i;
- remain -= i;
- }
- }
- }
-
- *(plen) = d - dest;
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_lower (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1), *nt;
- gboolean is_utf8 = FALSE, is_inplace = FALSE;
-
- if (t != NULL) {
- if (lua_isboolean (L, 2)) {
- is_utf8 = lua_toboolean (L, 2);
- }
- if (lua_isboolean (L, 3)) {
- is_inplace = lua_toboolean (L, 3);
- }
-
- if (is_inplace) {
- nt = t;
- lua_pushvalue (L, 1);
- }
- else {
- nt = lua_new_text (L, t->start, t->len, TRUE);
- }
-
- if (!is_utf8) {
- rspamd_str_lc ((gchar *) nt->start, nt->len);
- }
- else {
- rspamd_str_lc_utf8 ((gchar *) nt->start, nt->len);
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- static gint
- lua_text_strtoul (lua_State *L)
- {
- LUA_TRACE_POINT;
- struct rspamd_lua_text *t = lua_check_text (L, 1);
-
- if (t) {
- unsigned long ll;
-
- if (rspamd_strtoul (t->start, t->len, &ll)) {
- lua_pushinteger (L, ll);
- }
- else {
- lua_pushnil (L);
- }
- }
- else {
- return luaL_error (L, "invalid arguments");
- }
-
- return 1;
- }
-
- /* Used to distinguish lua text metatable */
- static const guint rspamd_lua_text_cookie = 0x2b21ef6fU;
-
- static gint
- lua_load_text (lua_State * L)
- {
- lua_newtable (L);
- lua_pushstring (L, "cookie");
- lua_pushnumber (L, rspamd_lua_text_cookie);
- lua_settable (L, -3);
- luaL_register (L, NULL, textlib_f);
-
- return 1;
- }
-
- void
- luaopen_text (lua_State *L)
- {
- rspamd_lua_new_class (L, "rspamd{text}", textlib_m);
- lua_pushstring (L, "cookie");
- lua_pushnumber (L, rspamd_lua_text_cookie);
- lua_settable (L, -3);
- lua_pop (L, 1);
-
- rspamd_lua_add_preload (L, "rspamd_text", lua_load_text);
- }
|