You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd_symcache.c 93KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "util.h"
  18. #include "rspamd.h"
  19. #include "message.h"
  20. #include "rspamd_symcache.h"
  21. #include "cfg_file.h"
  22. #include "lua/lua_common.h"
  23. #include "unix-std.h"
  24. #include "contrib/t1ha/t1ha.h"
  25. #include "libserver/worker_util.h"
  26. #include "khash.h"
  27. #include "utlist.h"
  28. #include <math.h>
  29. #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
  30. # include <stdalign.h>
  31. #endif
  32. #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  33. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  34. G_STRFUNC, \
  35. __VA_ARGS__)
  36. #define msg_warn_cache(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  37. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  38. G_STRFUNC, \
  39. __VA_ARGS__)
  40. #define msg_info_cache(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  41. cache->static_pool->tag.tagname, cache->cfg->checksum, \
  42. G_STRFUNC, \
  43. __VA_ARGS__)
  44. #define msg_debug_cache(...) rspamd_conditional_debug_fast (NULL, NULL, \
  45. rspamd_symcache_log_id, "symcache", cache->cfg->checksum, \
  46. G_STRFUNC, \
  47. __VA_ARGS__)
  48. #define msg_debug_cache_task(...) rspamd_conditional_debug_fast (NULL, NULL, \
  49. rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
  50. G_STRFUNC, \
  51. __VA_ARGS__)
  52. INIT_LOG_MODULE(symcache)
  53. #define CHECK_START_BIT(checkpoint, dyn_item) \
  54. ((dyn_item)->started)
  55. #define SET_START_BIT(checkpoint, dyn_item) \
  56. (dyn_item)->started = 1
  57. #define CLR_START_BIT(checkpoint, dyn_item) \
  58. (dyn_item)->started = 0
  59. #define CHECK_FINISH_BIT(checkpoint, dyn_item) \
  60. ((dyn_item)->finished)
  61. #define SET_FINISH_BIT(checkpoint, dyn_item) \
  62. (dyn_item)->finished = 1
  63. #define CLR_FINISH_BIT(checkpoint, dyn_item) \
  64. (dyn_item)->finished = 0
  65. static const guchar rspamd_symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0 };
  66. struct rspamd_symcache_header {
  67. guchar magic[8];
  68. guint nitems;
  69. guchar checksum[64];
  70. guchar unused[128];
  71. };
  72. struct symcache_order {
  73. GPtrArray *d;
  74. guint id;
  75. ref_entry_t ref;
  76. };
  77. /*
  78. * This structure is optimised to store ids list:
  79. * - If the first element is -1 then use dynamic part, else use static part
  80. */
  81. struct rspamd_symcache_id_list {
  82. union {
  83. guint32 st[4];
  84. struct {
  85. guint32 e; /* First element */
  86. guint16 len;
  87. guint16 allocated;
  88. guint *n;
  89. } dyn;
  90. };
  91. };
  92. struct rspamd_symcache_condition {
  93. gint cb;
  94. struct rspamd_symcache_condition *prev, *next;
  95. };
  96. struct rspamd_symcache_item {
  97. /* This block is likely shared */
  98. struct rspamd_symcache_item_stat *st;
  99. guint64 last_count;
  100. struct rspamd_counter_data *cd;
  101. gchar *symbol;
  102. const gchar *type_descr;
  103. gint type;
  104. /* Callback data */
  105. union {
  106. struct {
  107. symbol_func_t func;
  108. gpointer user_data;
  109. struct rspamd_symcache_condition *conditions;
  110. } normal;
  111. struct {
  112. gint parent;
  113. struct rspamd_symcache_item *parent_item;
  114. } virtual;
  115. } specific;
  116. /* Condition of execution */
  117. gboolean enabled;
  118. /* Used for async stuff checks */
  119. gboolean is_filter;
  120. gboolean is_virtual;
  121. /* Priority */
  122. gint priority;
  123. /* Topological order */
  124. guint order;
  125. gint id;
  126. gint frequency_peaks;
  127. /* Settings ids */
  128. struct rspamd_symcache_id_list allowed_ids;
  129. /* Allows execution but not symbols insertion */
  130. struct rspamd_symcache_id_list exec_only_ids;
  131. struct rspamd_symcache_id_list forbidden_ids;
  132. /* Dependencies */
  133. GPtrArray *deps;
  134. GPtrArray *rdeps;
  135. /* Container */
  136. GPtrArray *container;
  137. };
  138. struct rspamd_symcache {
  139. /* Hash table for fast access */
  140. GHashTable *items_by_symbol;
  141. GPtrArray *items_by_id;
  142. struct symcache_order *items_by_order;
  143. GPtrArray *connfilters;
  144. GPtrArray *prefilters;
  145. GPtrArray *filters;
  146. GPtrArray *postfilters;
  147. GPtrArray *composites;
  148. GPtrArray *idempotent;
  149. GPtrArray *virtual;
  150. GList *delayed_deps;
  151. GList *delayed_conditions;
  152. rspamd_mempool_t *static_pool;
  153. guint64 cksum;
  154. gdouble total_weight;
  155. guint used_items;
  156. guint stats_symbols_count;
  157. guint64 total_hits;
  158. guint id;
  159. struct rspamd_config *cfg;
  160. gdouble reload_time;
  161. gdouble last_profile;
  162. gint peak_cb;
  163. };
  164. struct rspamd_symcache_dynamic_item {
  165. guint16 start_msec; /* Relative to task time */
  166. unsigned started:1;
  167. unsigned finished:1;
  168. /* unsigned pad:14; */
  169. guint32 async_events;
  170. };
  171. struct cache_dependency {
  172. struct rspamd_symcache_item *item; /* Real dependency */
  173. gchar *sym; /* Symbolic dep name */
  174. gint id; /* Real from */
  175. gint vid; /* Virtual from */
  176. };
  177. struct delayed_cache_dependency {
  178. gchar *from;
  179. gchar *to;
  180. };
  181. struct delayed_cache_condition {
  182. gchar *sym;
  183. gint cbref;
  184. lua_State *L;
  185. };
  186. struct cache_savepoint {
  187. guint version;
  188. guint items_inflight;
  189. gboolean profile;
  190. gboolean has_slow;
  191. gdouble profile_start;
  192. struct rspamd_scan_result *rs;
  193. gdouble lim;
  194. struct rspamd_symcache_item *cur_item;
  195. struct symcache_order *order;
  196. struct rspamd_symcache_dynamic_item dynamic_items[];
  197. };
  198. struct rspamd_cache_refresh_cbdata {
  199. gdouble last_resort;
  200. ev_timer resort_ev;
  201. struct rspamd_symcache *cache;
  202. struct rspamd_worker *w;
  203. struct ev_loop *event_loop;
  204. };
  205. /* At least once per minute */
  206. #define PROFILE_MAX_TIME (60.0)
  207. /* For messages larger than 2Mb enable profiling */
  208. #define PROFILE_MESSAGE_SIZE_THRESHOLD (1024 * 1024 * 2)
  209. /* Enable profile at least once per this amount of messages processed */
  210. #define PROFILE_PROBABILITY (0.01)
  211. /* weight, frequency, time */
  212. #define TIME_ALPHA (1.0)
  213. #define WEIGHT_ALPHA (0.1)
  214. #define FREQ_ALPHA (0.01)
  215. #define SCORE_FUN(w, f, t) (((w) > 0 ? (w) : WEIGHT_ALPHA) \
  216. * ((f) > 0 ? (f) : FREQ_ALPHA) \
  217. / (t > TIME_ALPHA ? t : TIME_ALPHA))
  218. static gboolean rspamd_symcache_check_symbol (struct rspamd_task *task,
  219. struct rspamd_symcache *cache,
  220. struct rspamd_symcache_item *item,
  221. struct cache_savepoint *checkpoint);
  222. static gboolean rspamd_symcache_check_deps (struct rspamd_task *task,
  223. struct rspamd_symcache *cache,
  224. struct rspamd_symcache_item *item,
  225. struct cache_savepoint *checkpoint,
  226. guint recursion,
  227. gboolean check_only);
  228. static void rspamd_symcache_disable_symbol_checkpoint (struct rspamd_task *task,
  229. struct rspamd_symcache *cache, const gchar *symbol);
  230. static void rspamd_symcache_enable_symbol_checkpoint (struct rspamd_task *task,
  231. struct rspamd_symcache *cache, const gchar *symbol);
  232. static void
  233. rspamd_symcache_order_dtor (gpointer p)
  234. {
  235. struct symcache_order *ord = p;
  236. g_ptr_array_free (ord->d, TRUE);
  237. g_free (ord);
  238. }
  239. static void
  240. rspamd_symcache_order_unref (gpointer p)
  241. {
  242. struct symcache_order *ord = p;
  243. REF_RELEASE (ord);
  244. }
  245. static gint
  246. rspamd_id_cmp (const void * a, const void * b)
  247. {
  248. return (*(guint32*)a - *(guint32*)b);
  249. }
  250. static struct symcache_order *
  251. rspamd_symcache_order_new (struct rspamd_symcache *cache,
  252. gsize nelts)
  253. {
  254. struct symcache_order *ord;
  255. ord = g_malloc0 (sizeof (*ord));
  256. ord->d = g_ptr_array_sized_new (nelts);
  257. ord->id = cache->id;
  258. REF_INIT_RETAIN (ord, rspamd_symcache_order_dtor);
  259. return ord;
  260. }
  261. static inline struct rspamd_symcache_dynamic_item*
  262. rspamd_symcache_get_dynamic (struct cache_savepoint *checkpoint,
  263. struct rspamd_symcache_item *item)
  264. {
  265. return &checkpoint->dynamic_items[item->id];
  266. }
  267. static inline struct rspamd_symcache_item *
  268. rspamd_symcache_find_filter (struct rspamd_symcache *cache,
  269. const gchar *name,
  270. bool resolve_parent)
  271. {
  272. struct rspamd_symcache_item *item;
  273. g_assert (cache != NULL);
  274. if (name == NULL) {
  275. return NULL;
  276. }
  277. item = g_hash_table_lookup (cache->items_by_symbol, name);
  278. if (item != NULL) {
  279. if (resolve_parent && item->is_virtual && !(item->type & SYMBOL_TYPE_GHOST)) {
  280. item =item->specific.virtual.parent_item;
  281. }
  282. return item;
  283. }
  284. return NULL;
  285. }
  286. const gchar *
  287. rspamd_symcache_get_parent (struct rspamd_symcache *cache,
  288. const gchar *symbol)
  289. {
  290. struct rspamd_symcache_item *item, *parent;
  291. g_assert (cache != NULL);
  292. if (symbol == NULL) {
  293. return NULL;
  294. }
  295. item = g_hash_table_lookup (cache->items_by_symbol, symbol);
  296. if (item != NULL) {
  297. if (item->is_virtual && !(item->type & SYMBOL_TYPE_GHOST)) {
  298. parent = item->specific.virtual.parent_item;
  299. if (!parent) {
  300. item->specific.virtual.parent_item = g_ptr_array_index (cache->items_by_id,
  301. item->specific.virtual.parent);
  302. parent = item->specific.virtual.parent_item;
  303. }
  304. item = parent;
  305. }
  306. return item->symbol;
  307. }
  308. return NULL;
  309. }
  310. static gint
  311. postfilters_cmp (const void *p1, const void *p2, gpointer ud)
  312. {
  313. const struct rspamd_symcache_item *i1 = *(struct rspamd_symcache_item **)p1,
  314. *i2 = *(struct rspamd_symcache_item **)p2;
  315. double w1, w2;
  316. w1 = i1->priority;
  317. w2 = i2->priority;
  318. if (w1 > w2) {
  319. return 1;
  320. }
  321. else if (w1 < w2) {
  322. return -1;
  323. }
  324. return 0;
  325. }
  326. static gint
  327. prefilters_cmp (const void *p1, const void *p2, gpointer ud)
  328. {
  329. const struct rspamd_symcache_item *i1 = *(struct rspamd_symcache_item **)p1,
  330. *i2 = *(struct rspamd_symcache_item **)p2;
  331. double w1, w2;
  332. w1 = i1->priority;
  333. w2 = i2->priority;
  334. if (w1 < w2) {
  335. return 1;
  336. }
  337. else if (w1 > w2) {
  338. return -1;
  339. }
  340. return 0;
  341. }
  342. #define TSORT_MARK_PERM(it) (it)->order |= (1u << 31)
  343. #define TSORT_MARK_TEMP(it) (it)->order |= (1u << 30)
  344. #define TSORT_IS_MARKED_PERM(it) ((it)->order & (1u << 31))
  345. #define TSORT_IS_MARKED_TEMP(it) ((it)->order & (1u << 30))
  346. #define TSORT_UNMASK(it) ((it)->order & ~((1u << 31) | (1u << 30)))
  347. static gint
  348. cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
  349. {
  350. const struct rspamd_symcache_item *i1 = *(struct rspamd_symcache_item **)p1,
  351. *i2 = *(struct rspamd_symcache_item **)p2;
  352. struct rspamd_symcache *cache = ud;
  353. double w1, w2;
  354. double weight1, weight2;
  355. double f1 = 0, f2 = 0, t1, t2, avg_freq, avg_weight;
  356. guint o1 = TSORT_UNMASK (i1), o2 = TSORT_UNMASK (i2);
  357. if (o1 == o2) {
  358. /* Heurstic */
  359. if (i1->priority == i2->priority) {
  360. avg_freq = ((gdouble) cache->total_hits / cache->used_items);
  361. avg_weight = (cache->total_weight / cache->used_items);
  362. f1 = (double) i1->st->total_hits / avg_freq;
  363. f2 = (double) i2->st->total_hits / avg_freq;
  364. weight1 = fabs (i1->st->weight) / avg_weight;
  365. weight2 = fabs (i2->st->weight) / avg_weight;
  366. t1 = i1->st->avg_time;
  367. t2 = i2->st->avg_time;
  368. w1 = SCORE_FUN (weight1, f1, t1);
  369. w2 = SCORE_FUN (weight2, f2, t2);
  370. } else {
  371. /* Strict sorting */
  372. w1 = abs (i1->priority);
  373. w2 = abs (i2->priority);
  374. }
  375. }
  376. else {
  377. w1 = o1;
  378. w2 = o2;
  379. }
  380. if (w2 > w1) {
  381. return 1;
  382. }
  383. else if (w2 < w1) {
  384. return -1;
  385. }
  386. return 0;
  387. }
  388. static void
  389. rspamd_symcache_tsort_visit (struct rspamd_symcache *cache,
  390. struct rspamd_symcache_item *it,
  391. guint cur_order)
  392. {
  393. struct cache_dependency *dep;
  394. guint i;
  395. if (TSORT_IS_MARKED_PERM (it)) {
  396. if (cur_order > TSORT_UNMASK (it)) {
  397. /* Need to recalculate the whole chain */
  398. it->order = cur_order; /* That also removes all masking */
  399. }
  400. else {
  401. /* We are fine, stop DFS */
  402. return;
  403. }
  404. }
  405. else if (TSORT_IS_MARKED_TEMP (it)) {
  406. msg_err_cache ("cyclic dependencies found when checking '%s'!",
  407. it->symbol);
  408. return;
  409. }
  410. TSORT_MARK_TEMP (it);
  411. msg_debug_cache ("visiting node: %s (%d)", it->symbol, cur_order);
  412. PTR_ARRAY_FOREACH (it->deps, i, dep) {
  413. msg_debug_cache ("visiting dep: %s (%d)", dep->item->symbol, cur_order + 1);
  414. rspamd_symcache_tsort_visit (cache, dep->item, cur_order + 1);
  415. }
  416. it->order = cur_order;
  417. TSORT_MARK_PERM (it);
  418. }
  419. static void
  420. rspamd_symcache_resort (struct rspamd_symcache *cache)
  421. {
  422. struct symcache_order *ord;
  423. guint i;
  424. guint64 total_hits = 0;
  425. struct rspamd_symcache_item *it;
  426. ord = rspamd_symcache_order_new (cache, cache->filters->len);
  427. for (i = 0; i < cache->filters->len; i ++) {
  428. it = g_ptr_array_index (cache->filters, i);
  429. total_hits += it->st->total_hits;
  430. it->order = 0;
  431. g_ptr_array_add (ord->d, it);
  432. }
  433. /* Topological sort, intended to be O(N) but my implementation
  434. * is not linear (semi-linear usually) as I want to make it as
  435. * simple as possible.
  436. * On each stage it does DFS for unseen nodes. In theory, that
  437. * can be more complicated than linear - O(N^2) for specially
  438. * crafted data. But I don't care.
  439. */
  440. PTR_ARRAY_FOREACH (ord->d, i, it) {
  441. if (it->order == 0) {
  442. rspamd_symcache_tsort_visit (cache, it, 1);
  443. }
  444. }
  445. /*
  446. * Now we have all sorted and can do some heuristical sort, keeping
  447. * topological order invariant
  448. */
  449. g_ptr_array_sort_with_data (ord->d, cache_logic_cmp, cache);
  450. cache->total_hits = total_hits;
  451. if (cache->items_by_order) {
  452. REF_RELEASE (cache->items_by_order);
  453. }
  454. cache->items_by_order = ord;
  455. }
  456. static void
  457. rspamd_symcache_propagate_dep (struct rspamd_symcache *cache,
  458. struct rspamd_symcache_item *it,
  459. struct rspamd_symcache_item *dit)
  460. {
  461. const guint *ids;
  462. guint nids = 0;
  463. msg_debug_cache ("check id propagation for dependency %s from %s",
  464. it->symbol, dit->symbol);
  465. ids = rspamd_symcache_get_allowed_settings_ids (cache, dit->symbol, &nids);
  466. /* TODO: merge? */
  467. if (nids > 0) {
  468. msg_info_cache ("propagate allowed ids from %s to %s",
  469. dit->symbol, it->symbol);
  470. rspamd_symcache_set_allowed_settings_ids (cache, it->symbol, ids,
  471. nids);
  472. }
  473. ids = rspamd_symcache_get_forbidden_settings_ids (cache, dit->symbol, &nids);
  474. if (nids > 0) {
  475. msg_info_cache ("propagate forbidden ids from %s to %s",
  476. dit->symbol, it->symbol);
  477. rspamd_symcache_set_forbidden_settings_ids (cache, it->symbol, ids,
  478. nids);
  479. }
  480. }
  481. static void
  482. rspamd_symcache_process_dep (struct rspamd_symcache *cache,
  483. struct rspamd_symcache_item *it,
  484. struct cache_dependency *dep)
  485. {
  486. struct rspamd_symcache_item *dit = NULL, *vdit = NULL;
  487. struct cache_dependency *rdep;
  488. if (dep->id >= 0) {
  489. msg_debug_cache ("process real dependency %s on %s", it->symbol, dep->sym);
  490. dit = rspamd_symcache_find_filter (cache, dep->sym, true);
  491. }
  492. if (dep->vid >= 0) {
  493. /* Case of the virtual symbol that depends on another (maybe virtual) symbol */
  494. vdit = rspamd_symcache_find_filter (cache, dep->sym, false);
  495. if (!vdit) {
  496. msg_err_cache ("cannot add dependency from %s on %s: no dependency symbol registered",
  497. dep->sym, dit->symbol);
  498. }
  499. else {
  500. msg_debug_cache ("process virtual dependency %s(%d) on %s(%d)", it->symbol,
  501. dep->vid, vdit->symbol, vdit->id);
  502. }
  503. }
  504. else {
  505. vdit = dit;
  506. }
  507. if (dit != NULL) {
  508. if (!dit->is_filter) {
  509. /*
  510. * Check sanity:
  511. * - filters -> prefilter dependency is OK and always satisfied
  512. * - postfilter -> (filter, prefilter) dep is ok
  513. * - idempotent -> (any) dep is OK
  514. *
  515. * Otherwise, emit error
  516. * However, even if everything is fine this dep is useless ¯\_(ツ)_/¯
  517. */
  518. gboolean ok_dep = FALSE;
  519. if (it->is_filter) {
  520. if (dit->is_filter) {
  521. ok_dep = TRUE;
  522. }
  523. else if (dit->type & SYMBOL_TYPE_PREFILTER) {
  524. ok_dep = TRUE;
  525. }
  526. }
  527. else if (it->type & SYMBOL_TYPE_POSTFILTER) {
  528. if (dit->type & SYMBOL_TYPE_PREFILTER) {
  529. ok_dep = TRUE;
  530. }
  531. }
  532. else if (it->type & SYMBOL_TYPE_IDEMPOTENT) {
  533. if (dit->type & (SYMBOL_TYPE_PREFILTER|SYMBOL_TYPE_POSTFILTER)) {
  534. ok_dep = TRUE;
  535. }
  536. }
  537. else if (it->type & SYMBOL_TYPE_PREFILTER) {
  538. if (it->priority < dit->priority) {
  539. /* Also OK */
  540. ok_dep = TRUE;
  541. }
  542. }
  543. if (!ok_dep) {
  544. msg_err_cache ("cannot add dependency from %s on %s: invalid symbol types",
  545. dep->sym, dit->symbol);
  546. return;
  547. }
  548. }
  549. else {
  550. if (dit->id == it->id) {
  551. msg_err_cache ("cannot add dependency on self: %s -> %s "
  552. "(resolved to %s)",
  553. it->symbol, dep->sym, dit->symbol);
  554. } else {
  555. rdep = rspamd_mempool_alloc (cache->static_pool,
  556. sizeof (*rdep));
  557. rdep->sym = dep->sym;
  558. rdep->item = it;
  559. rdep->id = it->id;
  560. g_assert (dit->rdeps != NULL);
  561. g_ptr_array_add (dit->rdeps, rdep);
  562. dep->item = dit;
  563. dep->id = dit->id;
  564. msg_debug_cache ("add dependency from %d on %d", it->id,
  565. dit->id);
  566. }
  567. }
  568. }
  569. else if (dep->id >= 0) {
  570. msg_err_cache ("cannot find dependency on symbol %s for symbol %s",
  571. dep->sym, it->symbol);
  572. return;
  573. }
  574. if (vdit) {
  575. /* Use virtual symbol to propagate deps */
  576. rspamd_symcache_propagate_dep (cache, it, vdit);
  577. }
  578. }
  579. /* Sort items in logical order */
  580. static void
  581. rspamd_symcache_post_init (struct rspamd_symcache *cache)
  582. {
  583. struct rspamd_symcache_item *it, *vit;
  584. struct cache_dependency *dep;
  585. struct delayed_cache_dependency *ddep;
  586. struct delayed_cache_condition *dcond;
  587. GList *cur;
  588. gint i, j;
  589. cur = cache->delayed_deps;
  590. while (cur) {
  591. ddep = cur->data;
  592. vit = rspamd_symcache_find_filter (cache, ddep->from, false);
  593. it = rspamd_symcache_find_filter (cache, ddep->from, true);
  594. if (it == NULL) {
  595. msg_err_cache ("cannot register delayed dependency between %s and %s: "
  596. "%s is missing", ddep->from, ddep->to, ddep->from);
  597. }
  598. else {
  599. msg_debug_cache ("delayed between %s(%d:%d) -> %s", ddep->from,
  600. it->id, vit->id, ddep->to);
  601. rspamd_symcache_add_dependency (cache, it->id, ddep->to, vit != it ?
  602. vit->id : -1);
  603. }
  604. cur = g_list_next (cur);
  605. }
  606. cur = cache->delayed_conditions;
  607. while (cur) {
  608. dcond = cur->data;
  609. it = rspamd_symcache_find_filter (cache, dcond->sym, true);
  610. if (it == NULL) {
  611. msg_err_cache (
  612. "cannot register delayed condition for %s",
  613. dcond->sym);
  614. luaL_unref (dcond->L, LUA_REGISTRYINDEX, dcond->cbref);
  615. }
  616. else {
  617. struct rspamd_symcache_condition *ncond = rspamd_mempool_alloc0 (cache->static_pool,
  618. sizeof (*ncond));
  619. ncond->cb = dcond->cbref;
  620. DL_APPEND (it->specific.normal.conditions, ncond);
  621. }
  622. cur = g_list_next (cur);
  623. }
  624. PTR_ARRAY_FOREACH (cache->items_by_id, i, it) {
  625. PTR_ARRAY_FOREACH (it->deps, j, dep) {
  626. rspamd_symcache_process_dep (cache, it, dep);
  627. }
  628. if (it->deps) {
  629. /* Reversed loop to make removal safe */
  630. for (j = it->deps->len - 1; j >= 0; j--) {
  631. dep = g_ptr_array_index (it->deps, j);
  632. if (dep->item == NULL) {
  633. /* Remove useless dep */
  634. g_ptr_array_remove_index (it->deps, j);
  635. }
  636. }
  637. }
  638. }
  639. /* Special case for virtual symbols */
  640. PTR_ARRAY_FOREACH (cache->virtual, i, it) {
  641. PTR_ARRAY_FOREACH (it->deps, j, dep) {
  642. rspamd_symcache_process_dep (cache, it, dep);
  643. }
  644. }
  645. g_ptr_array_sort_with_data (cache->connfilters, prefilters_cmp, cache);
  646. g_ptr_array_sort_with_data (cache->prefilters, prefilters_cmp, cache);
  647. g_ptr_array_sort_with_data (cache->postfilters, postfilters_cmp, cache);
  648. g_ptr_array_sort_with_data (cache->idempotent, postfilters_cmp, cache);
  649. rspamd_symcache_resort (cache);
  650. }
  651. static gboolean
  652. rspamd_symcache_load_items (struct rspamd_symcache *cache, const gchar *name)
  653. {
  654. struct rspamd_symcache_header *hdr;
  655. struct stat st;
  656. struct ucl_parser *parser;
  657. ucl_object_t *top;
  658. const ucl_object_t *cur, *elt;
  659. ucl_object_iter_t it;
  660. struct rspamd_symcache_item *item, *parent;
  661. const guchar *p;
  662. gint fd;
  663. gpointer map;
  664. fd = open (name, O_RDONLY);
  665. if (fd == -1) {
  666. msg_info_cache ("cannot open file %s, error %d, %s", name,
  667. errno, strerror (errno));
  668. return FALSE;
  669. }
  670. rspamd_file_lock (fd, FALSE);
  671. if (fstat (fd, &st) == -1) {
  672. rspamd_file_unlock (fd, FALSE);
  673. close (fd);
  674. msg_info_cache ("cannot stat file %s, error %d, %s", name,
  675. errno, strerror (errno));
  676. return FALSE;
  677. }
  678. if (st.st_size < (gint)sizeof (*hdr)) {
  679. rspamd_file_unlock (fd, FALSE);
  680. close (fd);
  681. errno = EINVAL;
  682. msg_info_cache ("cannot use file %s, error %d, %s", name,
  683. errno, strerror (errno));
  684. return FALSE;
  685. }
  686. map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  687. if (map == MAP_FAILED) {
  688. rspamd_file_unlock (fd, FALSE);
  689. close (fd);
  690. msg_info_cache ("cannot mmap file %s, error %d, %s", name,
  691. errno, strerror (errno));
  692. return FALSE;
  693. }
  694. hdr = map;
  695. if (memcmp (hdr->magic, rspamd_symcache_magic,
  696. sizeof (rspamd_symcache_magic)) != 0) {
  697. msg_info_cache ("cannot use file %s, bad magic", name);
  698. munmap (map, st.st_size);
  699. rspamd_file_unlock (fd, FALSE);
  700. close (fd);
  701. return FALSE;
  702. }
  703. parser = ucl_parser_new (0);
  704. p = (const guchar *)(hdr + 1);
  705. if (!ucl_parser_add_chunk (parser, p, st.st_size - sizeof (*hdr))) {
  706. msg_info_cache ("cannot use file %s, cannot parse: %s", name,
  707. ucl_parser_get_error (parser));
  708. munmap (map, st.st_size);
  709. ucl_parser_free (parser);
  710. rspamd_file_unlock (fd, FALSE);
  711. close (fd);
  712. return FALSE;
  713. }
  714. top = ucl_parser_get_object (parser);
  715. munmap (map, st.st_size);
  716. rspamd_file_unlock (fd, FALSE);
  717. close (fd);
  718. ucl_parser_free (parser);
  719. if (top == NULL || ucl_object_type (top) != UCL_OBJECT) {
  720. msg_info_cache ("cannot use file %s, bad object", name);
  721. ucl_object_unref (top);
  722. return FALSE;
  723. }
  724. it = ucl_object_iterate_new (top);
  725. while ((cur = ucl_object_iterate_safe (it, true))) {
  726. item = g_hash_table_lookup (cache->items_by_symbol, ucl_object_key (cur));
  727. if (item) {
  728. /* Copy saved info */
  729. /*
  730. * XXX: don't save or load weight, it should be obtained from the
  731. * metric
  732. */
  733. #if 0
  734. elt = ucl_object_lookup (cur, "weight");
  735. if (elt) {
  736. w = ucl_object_todouble (elt);
  737. if (w != 0) {
  738. item->weight = w;
  739. }
  740. }
  741. #endif
  742. elt = ucl_object_lookup (cur, "time");
  743. if (elt) {
  744. item->st->avg_time = ucl_object_todouble (elt);
  745. }
  746. elt = ucl_object_lookup (cur, "count");
  747. if (elt) {
  748. item->st->total_hits = ucl_object_toint (elt);
  749. item->last_count = item->st->total_hits;
  750. }
  751. elt = ucl_object_lookup (cur, "frequency");
  752. if (elt && ucl_object_type (elt) == UCL_OBJECT) {
  753. const ucl_object_t *freq_elt;
  754. freq_elt = ucl_object_lookup (elt, "avg");
  755. if (freq_elt) {
  756. item->st->avg_frequency = ucl_object_todouble (freq_elt);
  757. }
  758. freq_elt = ucl_object_lookup (elt, "stddev");
  759. if (freq_elt) {
  760. item->st->stddev_frequency = ucl_object_todouble (freq_elt);
  761. }
  762. }
  763. if (item->is_virtual && !(item->type & SYMBOL_TYPE_GHOST)) {
  764. g_assert (item->specific.virtual.parent < (gint)cache->items_by_id->len);
  765. parent = g_ptr_array_index (cache->items_by_id,
  766. item->specific.virtual.parent);
  767. item->specific.virtual.parent_item = parent;
  768. if (parent->st->weight < item->st->weight) {
  769. parent->st->weight = item->st->weight;
  770. }
  771. /*
  772. * We maintain avg_time for virtual symbols equal to the
  773. * parent item avg_time
  774. */
  775. item->st->avg_time = parent->st->avg_time;
  776. }
  777. cache->total_weight += fabs (item->st->weight);
  778. cache->total_hits += item->st->total_hits;
  779. }
  780. }
  781. ucl_object_iterate_free (it);
  782. ucl_object_unref (top);
  783. return TRUE;
  784. }
  785. #define ROUND_DOUBLE(x) (floor((x) * 100.0) / 100.0)
  786. static gboolean
  787. rspamd_symcache_save_items (struct rspamd_symcache *cache, const gchar *name)
  788. {
  789. struct rspamd_symcache_header hdr;
  790. ucl_object_t *top, *elt, *freq;
  791. GHashTableIter it;
  792. struct rspamd_symcache_item *item;
  793. struct ucl_emitter_functions *efunc;
  794. gpointer k, v;
  795. gint fd;
  796. FILE *fp;
  797. bool ret;
  798. gchar path[PATH_MAX];
  799. rspamd_snprintf (path, sizeof (path), "%s.new", name);
  800. for (;;) {
  801. fd = open (path, O_CREAT | O_WRONLY | O_EXCL, 00644);
  802. if (fd == -1) {
  803. if (errno == EEXIST) {
  804. /* Some other process is already writing data, give up silently */
  805. return TRUE;
  806. }
  807. msg_err_cache ("cannot open file %s, error %d, %s", path,
  808. errno, strerror (errno));
  809. return FALSE;
  810. }
  811. break;
  812. }
  813. rspamd_file_lock (fd, FALSE);
  814. fp = fdopen (fd, "w");
  815. memset (&hdr, 0, sizeof (hdr));
  816. memcpy (hdr.magic, rspamd_symcache_magic,
  817. sizeof (rspamd_symcache_magic));
  818. if (fwrite (&hdr, sizeof (hdr), 1, fp) == -1) {
  819. msg_err_cache ("cannot write to file %s, error %d, %s", path,
  820. errno, strerror (errno));
  821. rspamd_file_unlock (fd, FALSE);
  822. fclose (fp);
  823. return FALSE;
  824. }
  825. top = ucl_object_typed_new (UCL_OBJECT);
  826. g_hash_table_iter_init (&it, cache->items_by_symbol);
  827. while (g_hash_table_iter_next (&it, &k, &v)) {
  828. item = v;
  829. elt = ucl_object_typed_new (UCL_OBJECT);
  830. ucl_object_insert_key (elt,
  831. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  832. "weight", 0, false);
  833. ucl_object_insert_key (elt,
  834. ucl_object_fromdouble (ROUND_DOUBLE (item->st->time_counter.mean)),
  835. "time", 0, false);
  836. ucl_object_insert_key (elt, ucl_object_fromint (item->st->total_hits),
  837. "count", 0, false);
  838. freq = ucl_object_typed_new (UCL_OBJECT);
  839. ucl_object_insert_key (freq,
  840. ucl_object_fromdouble (ROUND_DOUBLE (item->st->frequency_counter.mean)),
  841. "avg", 0, false);
  842. ucl_object_insert_key (freq,
  843. ucl_object_fromdouble (ROUND_DOUBLE (item->st->frequency_counter.stddev)),
  844. "stddev", 0, false);
  845. ucl_object_insert_key (elt, freq, "frequency", 0, false);
  846. ucl_object_insert_key (top, elt, k, 0, false);
  847. }
  848. efunc = ucl_object_emit_file_funcs (fp);
  849. ret = ucl_object_emit_full (top, UCL_EMIT_JSON_COMPACT, efunc, NULL);
  850. ucl_object_emit_funcs_free (efunc);
  851. ucl_object_unref (top);
  852. rspamd_file_unlock (fd, FALSE);
  853. fclose (fp);
  854. if (rename (path, name) == -1) {
  855. msg_err_cache ("cannot rename %s -> %s, error %d, %s", path, name,
  856. errno, strerror (errno));
  857. (void)unlink (path);
  858. ret = FALSE;
  859. }
  860. return ret;
  861. }
  862. #undef ROUND_DOUBLE
  863. gint
  864. rspamd_symcache_add_symbol (struct rspamd_symcache *cache,
  865. const gchar *name,
  866. gint priority,
  867. symbol_func_t func,
  868. gpointer user_data,
  869. enum rspamd_symbol_type type,
  870. gint parent)
  871. {
  872. struct rspamd_symcache_item *item = NULL;
  873. const gchar *type_str = "normal";
  874. g_assert (cache != NULL);
  875. if (name == NULL && !(type & SYMBOL_TYPE_CALLBACK)) {
  876. msg_warn_cache ("no name for non-callback symbol!");
  877. }
  878. else if ((type & SYMBOL_TYPE_VIRTUAL & (~SYMBOL_TYPE_GHOST)) && parent == -1) {
  879. msg_warn_cache ("no parent symbol is associated with virtual symbol %s",
  880. name);
  881. }
  882. if (name != NULL && !(type & SYMBOL_TYPE_CALLBACK)) {
  883. struct rspamd_symcache_item *existing;
  884. if (strcspn (name, " \t\n\r") != strlen (name)) {
  885. msg_warn_cache ("bogus characters in symbol name: \"%s\"",
  886. name);
  887. }
  888. if ((existing = g_hash_table_lookup (cache->items_by_symbol, name)) != NULL) {
  889. if (existing->type & SYMBOL_TYPE_GHOST) {
  890. /*
  891. * Complicated part:
  892. * - we need to remove the existing ghost symbol
  893. * - we need to cleanup containers:
  894. * - symbols hash
  895. * - specific array
  896. * - items_by_it
  897. * - decrement used_items
  898. */
  899. msg_info_cache ("duplicate ghost symbol %s is removed", name);
  900. if (existing->container) {
  901. g_ptr_array_remove (existing->container, existing);
  902. }
  903. g_ptr_array_remove (cache->items_by_id, existing->container);
  904. cache->used_items --;
  905. g_hash_table_remove (cache->items_by_symbol, name);
  906. /*
  907. * Here can be memory leak, but we assume that ghost symbols
  908. * are also virtual
  909. */
  910. }
  911. else {
  912. msg_err_cache ("skip duplicate symbol registration for %s", name);
  913. return -1;
  914. }
  915. }
  916. }
  917. if (type & (SYMBOL_TYPE_CLASSIFIER|SYMBOL_TYPE_CALLBACK|
  918. SYMBOL_TYPE_PREFILTER|SYMBOL_TYPE_POSTFILTER|
  919. SYMBOL_TYPE_IDEMPOTENT|SYMBOL_TYPE_GHOST)) {
  920. type |= SYMBOL_TYPE_NOSTAT;
  921. }
  922. item = rspamd_mempool_alloc0 (cache->static_pool,
  923. sizeof (struct rspamd_symcache_item));
  924. item->st = rspamd_mempool_alloc0_shared (cache->static_pool,
  925. sizeof (*item->st));
  926. item->enabled = TRUE;
  927. /*
  928. * We do not share cd to skip locking, instead we'll just calculate it on
  929. * save or accumulate
  930. */
  931. item->cd = rspamd_mempool_alloc0 (cache->static_pool,
  932. sizeof (struct rspamd_counter_data));
  933. item->priority = priority;
  934. item->type = type;
  935. if ((type & SYMBOL_TYPE_FINE) && item->priority == 0) {
  936. /* Make priority for negative weighted symbols */
  937. item->priority = 1;
  938. }
  939. if (func) {
  940. /* Non-virtual symbol */
  941. g_assert (parent == -1);
  942. if (item->type & SYMBOL_TYPE_PREFILTER) {
  943. type_str = "prefilter";
  944. g_ptr_array_add (cache->prefilters, item);
  945. item->container = cache->prefilters;
  946. }
  947. else if (item->type & SYMBOL_TYPE_IDEMPOTENT) {
  948. type_str = "idempotent";
  949. g_ptr_array_add (cache->idempotent, item);
  950. item->container = cache->idempotent;
  951. }
  952. else if (item->type & SYMBOL_TYPE_POSTFILTER) {
  953. type_str = "postfilter";
  954. g_ptr_array_add (cache->postfilters, item);
  955. item->container = cache->postfilters;
  956. }
  957. else if (item->type & SYMBOL_TYPE_CONNFILTER) {
  958. type_str = "connfilter";
  959. g_ptr_array_add (cache->connfilters, item);
  960. item->container = cache->connfilters;
  961. }
  962. else {
  963. item->is_filter = TRUE;
  964. g_ptr_array_add (cache->filters, item);
  965. item->container = cache->filters;
  966. }
  967. item->id = cache->items_by_id->len;
  968. g_ptr_array_add (cache->items_by_id, item);
  969. item->specific.normal.func = func;
  970. item->specific.normal.user_data = user_data;
  971. item->specific.normal.conditions = NULL;
  972. }
  973. else {
  974. /*
  975. * Three possibilities here when no function is specified:
  976. * - virtual symbol (beware of ghosts!)
  977. * - classifier symbol
  978. * - composite symbol
  979. */
  980. if (item->type & SYMBOL_TYPE_COMPOSITE) {
  981. item->specific.normal.conditions = NULL;
  982. item->specific.normal.user_data = user_data;
  983. g_assert (user_data != NULL);
  984. g_ptr_array_add (cache->composites, item);
  985. item->id = cache->items_by_id->len;
  986. g_ptr_array_add (cache->items_by_id, item);
  987. item->container = cache->composites;
  988. type_str = "composite";
  989. }
  990. else if (item->type & SYMBOL_TYPE_CLASSIFIER) {
  991. /* Treat it as normal symbol to allow enable/disable */
  992. item->id = cache->items_by_id->len;
  993. g_ptr_array_add (cache->items_by_id, item);
  994. item->is_filter = TRUE;
  995. item->specific.normal.func = NULL;
  996. item->specific.normal.user_data = NULL;
  997. item->specific.normal.conditions = NULL;
  998. type_str = "classifier";
  999. }
  1000. else {
  1001. item->is_virtual = TRUE;
  1002. item->specific.virtual.parent = parent;
  1003. item->specific.virtual.parent_item =
  1004. g_ptr_array_index (cache->items_by_id, parent);
  1005. item->id = cache->virtual->len;
  1006. g_ptr_array_add (cache->virtual, item);
  1007. item->container = cache->virtual;
  1008. /* Not added to items_by_id, handled by parent */
  1009. type_str = "virtual";
  1010. }
  1011. }
  1012. cache->used_items ++;
  1013. cache->id ++;
  1014. if (!(item->type &
  1015. (SYMBOL_TYPE_IDEMPOTENT|SYMBOL_TYPE_NOSTAT|SYMBOL_TYPE_CLASSIFIER))) {
  1016. if (name != NULL) {
  1017. cache->cksum = t1ha (name, strlen (name),
  1018. cache->cksum);
  1019. } else {
  1020. cache->cksum = t1ha (&item->id, sizeof (item->id),
  1021. cache->cksum);
  1022. }
  1023. cache->stats_symbols_count ++;
  1024. }
  1025. if (name != NULL) {
  1026. item->symbol = rspamd_mempool_strdup (cache->static_pool, name);
  1027. msg_debug_cache ("used items: %d, added symbol: %s, %d; symbol type: %s",
  1028. cache->used_items, name, item->id, type_str);
  1029. } else {
  1030. g_assert (func != NULL);
  1031. msg_debug_cache ("used items: %d, added unnamed symbol: %d; symbol type: %s",
  1032. cache->used_items, item->id, type_str);
  1033. }
  1034. item->deps = g_ptr_array_new ();
  1035. item->rdeps = g_ptr_array_new ();
  1036. item->type_descr = type_str;
  1037. rspamd_mempool_add_destructor (cache->static_pool,
  1038. rspamd_ptr_array_free_hard, item->deps);
  1039. rspamd_mempool_add_destructor (cache->static_pool,
  1040. rspamd_ptr_array_free_hard, item->rdeps);
  1041. if (name != NULL) {
  1042. g_hash_table_insert (cache->items_by_symbol, item->symbol, item);
  1043. }
  1044. return item->id;
  1045. }
  1046. void
  1047. rspamd_symcache_set_peak_callback (struct rspamd_symcache *cache,
  1048. gint cbref)
  1049. {
  1050. g_assert (cache != NULL);
  1051. if (cache->peak_cb != -1) {
  1052. luaL_unref (cache->cfg->lua_state, LUA_REGISTRYINDEX,
  1053. cache->peak_cb);
  1054. }
  1055. cache->peak_cb = cbref;
  1056. msg_info_cache ("registered peak callback");
  1057. }
  1058. gboolean
  1059. rspamd_symcache_add_condition_delayed (struct rspamd_symcache *cache,
  1060. const gchar *sym, lua_State *L, gint cbref)
  1061. {
  1062. struct delayed_cache_condition *ncond;
  1063. g_assert (cache != NULL);
  1064. g_assert (sym != NULL);
  1065. ncond = g_malloc0 (sizeof (*ncond));
  1066. ncond->sym = g_strdup (sym);
  1067. ncond->cbref = cbref;
  1068. ncond->L = L;
  1069. cache->id ++;
  1070. cache->delayed_conditions = g_list_prepend (cache->delayed_conditions, ncond);
  1071. return TRUE;
  1072. }
  1073. void
  1074. rspamd_symcache_save (struct rspamd_symcache *cache)
  1075. {
  1076. if (cache != NULL) {
  1077. if (cache->cfg->cache_filename) {
  1078. /* Try to sync values to the disk */
  1079. if (!rspamd_symcache_save_items (cache,
  1080. cache->cfg->cache_filename)) {
  1081. msg_err_cache ("cannot save cache data to %s: %s",
  1082. cache->cfg->cache_filename, strerror (errno));
  1083. }
  1084. }
  1085. }
  1086. }
  1087. void
  1088. rspamd_symcache_destroy (struct rspamd_symcache *cache)
  1089. {
  1090. GList *cur;
  1091. struct delayed_cache_dependency *ddep;
  1092. struct delayed_cache_condition *dcond;
  1093. if (cache != NULL) {
  1094. if (cache->delayed_deps) {
  1095. cur = cache->delayed_deps;
  1096. while (cur) {
  1097. ddep = cur->data;
  1098. g_free (ddep->from);
  1099. g_free (ddep->to);
  1100. g_free (ddep);
  1101. cur = g_list_next (cur);
  1102. }
  1103. g_list_free (cache->delayed_deps);
  1104. }
  1105. if (cache->delayed_conditions) {
  1106. cur = cache->delayed_conditions;
  1107. while (cur) {
  1108. dcond = cur->data;
  1109. g_free (dcond->sym);
  1110. g_free (dcond);
  1111. cur = g_list_next (cur);
  1112. }
  1113. g_list_free (cache->delayed_conditions);
  1114. }
  1115. g_hash_table_destroy (cache->items_by_symbol);
  1116. g_ptr_array_free (cache->items_by_id, TRUE);
  1117. rspamd_mempool_delete (cache->static_pool);
  1118. g_ptr_array_free (cache->connfilters, TRUE);
  1119. g_ptr_array_free (cache->prefilters, TRUE);
  1120. g_ptr_array_free (cache->filters, TRUE);
  1121. g_ptr_array_free (cache->postfilters, TRUE);
  1122. g_ptr_array_free (cache->idempotent, TRUE);
  1123. g_ptr_array_free (cache->composites, TRUE);
  1124. g_ptr_array_free (cache->virtual, TRUE);
  1125. REF_RELEASE (cache->items_by_order);
  1126. if (cache->peak_cb != -1) {
  1127. luaL_unref (cache->cfg->lua_state, LUA_REGISTRYINDEX, cache->peak_cb);
  1128. }
  1129. g_free (cache);
  1130. }
  1131. }
  1132. struct rspamd_symcache*
  1133. rspamd_symcache_new (struct rspamd_config *cfg)
  1134. {
  1135. struct rspamd_symcache *cache;
  1136. cache = g_malloc0 (sizeof (struct rspamd_symcache));
  1137. cache->static_pool =
  1138. rspamd_mempool_new (rspamd_mempool_suggest_size (), "symcache", 0);
  1139. cache->items_by_symbol = g_hash_table_new (rspamd_str_hash,
  1140. rspamd_str_equal);
  1141. cache->items_by_id = g_ptr_array_new ();
  1142. cache->connfilters = g_ptr_array_new ();
  1143. cache->prefilters = g_ptr_array_new ();
  1144. cache->filters = g_ptr_array_new ();
  1145. cache->postfilters = g_ptr_array_new ();
  1146. cache->idempotent = g_ptr_array_new ();
  1147. cache->composites = g_ptr_array_new ();
  1148. cache->virtual = g_ptr_array_new ();
  1149. cache->reload_time = cfg->cache_reload_time;
  1150. cache->total_hits = 1;
  1151. cache->total_weight = 1.0;
  1152. cache->cfg = cfg;
  1153. cache->cksum = 0xdeadbabe;
  1154. cache->peak_cb = -1;
  1155. cache->id = (guint)rspamd_random_uint64_fast ();
  1156. return cache;
  1157. }
  1158. static void
  1159. rspamd_symcache_metric_connect_cb (gpointer k, gpointer v, gpointer ud)
  1160. {
  1161. struct rspamd_symcache *cache = (struct rspamd_symcache *)ud;
  1162. const gchar *sym = k;
  1163. struct rspamd_symbol *s = (struct rspamd_symbol *)v;
  1164. gdouble weight;
  1165. struct rspamd_symcache_item *item;
  1166. weight = *s->weight_ptr;
  1167. item = g_hash_table_lookup (cache->items_by_symbol, sym);
  1168. if (item) {
  1169. item->st->weight = weight;
  1170. s->cache_item = item;
  1171. }
  1172. }
  1173. gboolean
  1174. rspamd_symcache_init (struct rspamd_symcache *cache)
  1175. {
  1176. gboolean res = TRUE;
  1177. g_assert (cache != NULL);
  1178. cache->reload_time = cache->cfg->cache_reload_time;
  1179. if (cache->cfg->cache_filename != NULL) {
  1180. res = rspamd_symcache_load_items (cache, cache->cfg->cache_filename);
  1181. }
  1182. rspamd_symcache_post_init (cache);
  1183. /* Connect metric symbols with symcache symbols */
  1184. if (cache->cfg->symbols) {
  1185. g_hash_table_foreach (cache->cfg->symbols,
  1186. rspamd_symcache_metric_connect_cb,
  1187. cache);
  1188. }
  1189. return res;
  1190. }
  1191. static void
  1192. rspamd_symcache_validate_cb (gpointer k, gpointer v, gpointer ud)
  1193. {
  1194. struct rspamd_symcache_item *item = v, *parent;
  1195. struct rspamd_config *cfg;
  1196. struct rspamd_symcache *cache = (struct rspamd_symcache *)ud;
  1197. struct rspamd_symbol *s;
  1198. gboolean skipped, ghost;
  1199. gint p1, p2;
  1200. ghost = item->st->weight == 0 ? TRUE : FALSE;
  1201. cfg = cache->cfg;
  1202. /* Check whether this item is skipped */
  1203. skipped = !ghost;
  1204. g_assert (cfg != NULL);
  1205. if ((item->type &
  1206. (SYMBOL_TYPE_NORMAL|SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_COMPOSITE|SYMBOL_TYPE_CLASSIFIER))
  1207. && g_hash_table_lookup (cfg->symbols, item->symbol) == NULL) {
  1208. if (cfg->unknown_weight != 0) {
  1209. skipped = FALSE;
  1210. item->st->weight = cfg->unknown_weight;
  1211. s = rspamd_mempool_alloc0 (cache->static_pool,
  1212. sizeof (*s));
  1213. s->name = item->symbol;
  1214. s->weight_ptr = &item->st->weight;
  1215. g_hash_table_insert (cfg->symbols, item->symbol, s);
  1216. msg_info_cache ("adding unknown symbol %s", item->symbol);
  1217. ghost = FALSE;
  1218. }
  1219. else {
  1220. skipped = TRUE;
  1221. }
  1222. }
  1223. else {
  1224. skipped = FALSE;
  1225. }
  1226. if (!ghost && skipped) {
  1227. if (!(item->type & SYMBOL_TYPE_SKIPPED)) {
  1228. item->type |= SYMBOL_TYPE_SKIPPED;
  1229. msg_warn_cache ("symbol %s has no score registered, skip its check",
  1230. item->symbol);
  1231. }
  1232. }
  1233. if (ghost) {
  1234. msg_debug_cache ("symbol %s is registered as ghost symbol, it won't be inserted "
  1235. "to any metric", item->symbol);
  1236. }
  1237. if (item->st->weight < 0 && item->priority == 0) {
  1238. item->priority ++;
  1239. }
  1240. if (item->is_virtual) {
  1241. if (!(item->type & SYMBOL_TYPE_GHOST)) {
  1242. g_assert (item->specific.virtual.parent != -1);
  1243. g_assert (item->specific.virtual.parent < (gint) cache->items_by_id->len);
  1244. parent = g_ptr_array_index (cache->items_by_id,
  1245. item->specific.virtual.parent);
  1246. item->specific.virtual.parent_item = parent;
  1247. if (fabs (parent->st->weight) < fabs (item->st->weight)) {
  1248. parent->st->weight = item->st->weight;
  1249. }
  1250. p1 = abs (item->priority);
  1251. p2 = abs (parent->priority);
  1252. if (p1 != p2) {
  1253. parent->priority = MAX (p1, p2);
  1254. item->priority = parent->priority;
  1255. }
  1256. }
  1257. }
  1258. cache->total_weight += fabs (item->st->weight);
  1259. }
  1260. gboolean
  1261. rspamd_symcache_validate (struct rspamd_symcache *cache,
  1262. struct rspamd_config *cfg,
  1263. gboolean strict)
  1264. {
  1265. struct rspamd_symcache_item *item;
  1266. GHashTableIter it;
  1267. gpointer k, v;
  1268. struct rspamd_symbol *sym_def;
  1269. gboolean ignore_symbol = FALSE, ret = TRUE;
  1270. if (cache == NULL) {
  1271. msg_err ("empty cache is invalid");
  1272. return FALSE;
  1273. }
  1274. g_hash_table_foreach (cache->items_by_symbol,
  1275. rspamd_symcache_validate_cb,
  1276. cache);
  1277. /* Now check each metric item and find corresponding symbol in a cache */
  1278. g_hash_table_iter_init (&it, cfg->symbols);
  1279. while (g_hash_table_iter_next (&it, &k, &v)) {
  1280. ignore_symbol = FALSE;
  1281. sym_def = v;
  1282. if (sym_def && (sym_def->flags &
  1283. (RSPAMD_SYMBOL_FLAG_IGNORE_METRIC|RSPAMD_SYMBOL_FLAG_DISABLED))) {
  1284. ignore_symbol = TRUE;
  1285. }
  1286. if (!ignore_symbol) {
  1287. item = g_hash_table_lookup (cache->items_by_symbol, k);
  1288. if (item == NULL) {
  1289. msg_warn_cache (
  1290. "symbol '%s' has its score defined but there is no "
  1291. "corresponding rule registered",
  1292. k);
  1293. if (strict) {
  1294. ret = FALSE;
  1295. }
  1296. }
  1297. }
  1298. else if (sym_def->flags & RSPAMD_SYMBOL_FLAG_DISABLED) {
  1299. item = g_hash_table_lookup (cache->items_by_symbol, k);
  1300. if (item) {
  1301. item->enabled = FALSE;
  1302. }
  1303. }
  1304. }
  1305. return ret;
  1306. }
  1307. /* Return true if metric has score that is more than spam score for it */
  1308. static gboolean
  1309. rspamd_symcache_metric_limit (struct rspamd_task *task,
  1310. struct cache_savepoint *cp)
  1311. {
  1312. struct rspamd_scan_result *res;
  1313. double ms;
  1314. if (task->flags & RSPAMD_TASK_FLAG_PASS_ALL) {
  1315. return FALSE;
  1316. }
  1317. if (cp->lim == 0.0) {
  1318. res = task->result;
  1319. if (res) {
  1320. ms = rspamd_task_get_required_score (task, res);
  1321. if (!isnan (ms) && cp->lim < ms) {
  1322. cp->rs = res;
  1323. cp->lim = ms;
  1324. }
  1325. }
  1326. }
  1327. if (cp->rs) {
  1328. if (cp->rs->score > cp->lim) {
  1329. return TRUE;
  1330. }
  1331. }
  1332. else {
  1333. /* No reject score define, always check all rules */
  1334. cp->lim = -1;
  1335. }
  1336. return FALSE;
  1337. }
  1338. static inline gboolean
  1339. rspamd_symcache_check_id_list (const struct rspamd_symcache_id_list *ls, guint32 id)
  1340. {
  1341. guint i;
  1342. if (ls->dyn.e == -1) {
  1343. guint *res = bsearch (&id, ls->dyn.n, ls->dyn.len, sizeof (guint32),
  1344. rspamd_id_cmp);
  1345. if (res) {
  1346. return TRUE;
  1347. }
  1348. }
  1349. else {
  1350. for (i = 0; i < G_N_ELEMENTS (ls->st); i ++) {
  1351. if (ls->st[i] == id) {
  1352. return TRUE;
  1353. }
  1354. else if (ls->st[i] == 0) {
  1355. return FALSE;
  1356. }
  1357. }
  1358. }
  1359. return FALSE;
  1360. }
  1361. gboolean
  1362. rspamd_symcache_is_item_allowed (struct rspamd_task *task,
  1363. struct rspamd_symcache_item *item,
  1364. gboolean exec_only)
  1365. {
  1366. const gchar *what = "execution";
  1367. if (!exec_only) {
  1368. what = "symbol insertion";
  1369. }
  1370. /* Static checks */
  1371. if (!item->enabled ||
  1372. (RSPAMD_TASK_IS_EMPTY (task) && !(item->type & SYMBOL_TYPE_EMPTY)) ||
  1373. (item->type & SYMBOL_TYPE_MIME_ONLY && !RSPAMD_TASK_IS_MIME(task))) {
  1374. if (!item->enabled) {
  1375. msg_debug_cache_task ("skipping %s of %s as it is permanently disabled; symbol type=%s",
  1376. what, item->symbol, item->type_descr);
  1377. return FALSE;
  1378. }
  1379. else {
  1380. /*
  1381. * Exclude virtual symbols
  1382. */
  1383. if (exec_only) {
  1384. msg_debug_cache_task ("skipping check of %s as it cannot be "
  1385. "executed for this task type; symbol type=%s",
  1386. item->symbol, item->type_descr);
  1387. return FALSE;
  1388. }
  1389. }
  1390. }
  1391. /* Settings checks */
  1392. if (task->settings_elt != 0) {
  1393. guint32 id = task->settings_elt->id;
  1394. if (item->forbidden_ids.st[0] != 0 &&
  1395. rspamd_symcache_check_id_list (&item->forbidden_ids,
  1396. id)) {
  1397. msg_debug_cache_task ("deny %s of %s as it is forbidden for "
  1398. "settings id %ud; symbol type=%s",
  1399. what,
  1400. item->symbol,
  1401. id,
  1402. item->type_descr);
  1403. return FALSE;
  1404. }
  1405. if (!(item->type & SYMBOL_TYPE_EXPLICIT_DISABLE)) {
  1406. if (item->allowed_ids.st[0] == 0 ||
  1407. !rspamd_symcache_check_id_list (&item->allowed_ids,
  1408. id)) {
  1409. if (task->settings_elt->policy == RSPAMD_SETTINGS_POLICY_IMPLICIT_ALLOW) {
  1410. msg_debug_cache_task ("allow execution of %s settings id %ud "
  1411. "allows implicit execution of the symbols;"
  1412. "symbol type=%s",
  1413. item->symbol,
  1414. id,
  1415. item->type_descr);
  1416. return TRUE;
  1417. }
  1418. if (exec_only) {
  1419. /*
  1420. * Special case if any of our virtual children are enabled
  1421. */
  1422. if (rspamd_symcache_check_id_list (&item->exec_only_ids, id)) {
  1423. return TRUE;
  1424. }
  1425. }
  1426. msg_debug_cache_task ("deny %s of %s as it is not listed "
  1427. "as allowed for settings id %ud; symbol type=%s",
  1428. what,
  1429. item->symbol,
  1430. id,
  1431. item->type_descr);
  1432. return FALSE;
  1433. }
  1434. }
  1435. else {
  1436. msg_debug_cache_task ("allow %s of %s for "
  1437. "settings id %ud as it can be only disabled explicitly;"
  1438. " symbol type=%s",
  1439. what,
  1440. item->symbol,
  1441. id,
  1442. item->type_descr);
  1443. }
  1444. }
  1445. else if (item->type & SYMBOL_TYPE_EXPLICIT_ENABLE) {
  1446. msg_debug_cache_task ("deny %s of %s as it must be explicitly enabled; symbol type=%s",
  1447. what,
  1448. item->symbol,
  1449. item->type_descr);
  1450. return FALSE;
  1451. }
  1452. /* Allow all symbols with no settings id */
  1453. return TRUE;
  1454. }
  1455. static gboolean
  1456. rspamd_symcache_check_symbol (struct rspamd_task *task,
  1457. struct rspamd_symcache *cache,
  1458. struct rspamd_symcache_item *item,
  1459. struct cache_savepoint *checkpoint)
  1460. {
  1461. struct rspamd_task **ptask;
  1462. lua_State *L;
  1463. gboolean check = TRUE;
  1464. struct rspamd_symcache_dynamic_item *dyn_item =
  1465. rspamd_symcache_get_dynamic (checkpoint, item);
  1466. if (item->type & (SYMBOL_TYPE_CLASSIFIER|SYMBOL_TYPE_COMPOSITE)) {
  1467. /* Classifiers are special :( */
  1468. return TRUE;
  1469. }
  1470. if (rspamd_session_blocked (task->s)) {
  1471. /*
  1472. * We cannot add new events as session is either destroyed or
  1473. * being cleaned up.
  1474. */
  1475. return TRUE;
  1476. }
  1477. g_assert (!item->is_virtual);
  1478. g_assert (item->specific.normal.func != NULL);
  1479. if (CHECK_START_BIT (checkpoint, dyn_item)) {
  1480. /*
  1481. * This can actually happen when deps span over different layers
  1482. */
  1483. return CHECK_FINISH_BIT (checkpoint, dyn_item);
  1484. }
  1485. /* Check has been started */
  1486. SET_START_BIT (checkpoint, dyn_item);
  1487. if (!rspamd_symcache_is_item_allowed (task, item, TRUE)) {
  1488. check = FALSE;
  1489. }
  1490. else if (item->specific.normal.conditions) {
  1491. struct rspamd_symcache_condition *cur_cond;
  1492. DL_FOREACH (item->specific.normal.conditions, cur_cond) {
  1493. /* We also executes condition callback to check if we need this symbol */
  1494. L = task->cfg->lua_state;
  1495. lua_rawgeti (L, LUA_REGISTRYINDEX, cur_cond->cb);
  1496. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  1497. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1498. *ptask = task;
  1499. if (lua_pcall (L, 1, 1, 0) != 0) {
  1500. msg_info_task ("call to condition for %s failed: %s",
  1501. item->symbol, lua_tostring (L, -1));
  1502. lua_pop (L, 1);
  1503. }
  1504. else {
  1505. check = lua_toboolean (L, -1);
  1506. lua_pop (L, 1);
  1507. }
  1508. if (!check) {
  1509. break;
  1510. }
  1511. }
  1512. if (!check) {
  1513. msg_debug_cache_task ("skipping check of %s as its start condition is false; "
  1514. "symbol type = %s",
  1515. item->symbol, item->type_descr);
  1516. }
  1517. }
  1518. if (check) {
  1519. msg_debug_cache_task ("execute %s, %d; symbol type = %s", item->symbol,
  1520. item->id, item->type_descr);
  1521. if (checkpoint->profile) {
  1522. ev_now_update_if_cheap (task->event_loop);
  1523. dyn_item->start_msec = (ev_now (task->event_loop) -
  1524. checkpoint->profile_start) * 1e3;
  1525. }
  1526. dyn_item->async_events = 0;
  1527. checkpoint->cur_item = item;
  1528. checkpoint->items_inflight ++;
  1529. /* Callback now must finalize itself */
  1530. item->specific.normal.func (task, item, item->specific.normal.user_data);
  1531. checkpoint->cur_item = NULL;
  1532. if (checkpoint->items_inflight == 0) {
  1533. return TRUE;
  1534. }
  1535. if (dyn_item->async_events == 0 && !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1536. msg_err_cache ("critical error: item %s has no async events pending, "
  1537. "but it is not finalised", item->symbol);
  1538. g_assert_not_reached ();
  1539. }
  1540. return FALSE;
  1541. }
  1542. else {
  1543. SET_FINISH_BIT (checkpoint, dyn_item);
  1544. }
  1545. return TRUE;
  1546. }
  1547. static gboolean
  1548. rspamd_symcache_check_deps (struct rspamd_task *task,
  1549. struct rspamd_symcache *cache,
  1550. struct rspamd_symcache_item *item,
  1551. struct cache_savepoint *checkpoint,
  1552. guint recursion,
  1553. gboolean check_only)
  1554. {
  1555. struct cache_dependency *dep;
  1556. guint i;
  1557. gboolean ret = TRUE;
  1558. static const guint max_recursion = 20;
  1559. struct rspamd_symcache_dynamic_item *dyn_item;
  1560. if (recursion > max_recursion) {
  1561. msg_err_task ("cyclic dependencies: maximum check level %ud exceed when "
  1562. "checking dependencies for %s", max_recursion, item->symbol);
  1563. return TRUE;
  1564. }
  1565. if (item->deps != NULL && item->deps->len > 0) {
  1566. for (i = 0; i < item->deps->len; i ++) {
  1567. dep = g_ptr_array_index (item->deps, i);
  1568. if (dep->item == NULL) {
  1569. /* Assume invalid deps as done */
  1570. msg_debug_cache_task ("symbol %d(%s) has invalid dependencies on %d(%s)",
  1571. item->id, item->symbol, dep->id, dep->sym);
  1572. continue;
  1573. }
  1574. dyn_item = rspamd_symcache_get_dynamic (checkpoint, dep->item);
  1575. if (!CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1576. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  1577. /* Not started */
  1578. if (!check_only) {
  1579. if (!rspamd_symcache_check_deps (task, cache,
  1580. dep->item,
  1581. checkpoint,
  1582. recursion + 1,
  1583. check_only)) {
  1584. ret = FALSE;
  1585. msg_debug_cache_task ("delayed dependency %d(%s) for "
  1586. "symbol %d(%s)",
  1587. dep->id, dep->sym, item->id, item->symbol);
  1588. }
  1589. else if (!rspamd_symcache_check_symbol (task, cache,
  1590. dep->item,
  1591. checkpoint)) {
  1592. /* Now started, but has events pending */
  1593. ret = FALSE;
  1594. msg_debug_cache_task ("started check of %d(%s) symbol "
  1595. "as dep for "
  1596. "%d(%s)",
  1597. dep->id, dep->sym, item->id, item->symbol);
  1598. }
  1599. else {
  1600. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) is "
  1601. "already processed",
  1602. dep->id, dep->sym, item->id, item->symbol);
  1603. }
  1604. }
  1605. else {
  1606. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) "
  1607. "cannot be started now",
  1608. dep->id, dep->sym,
  1609. item->id, item->symbol);
  1610. ret = FALSE;
  1611. }
  1612. }
  1613. else {
  1614. /* Started but not finished */
  1615. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) is "
  1616. "still executing",
  1617. dep->id, dep->sym,
  1618. item->id, item->symbol);
  1619. ret = FALSE;
  1620. }
  1621. }
  1622. else {
  1623. msg_debug_cache_task ("dependency %d(%s) for symbol %d(%s) is already "
  1624. "checked",
  1625. dep->id, dep->sym,
  1626. item->id, item->symbol);
  1627. }
  1628. }
  1629. }
  1630. return ret;
  1631. }
  1632. static struct cache_savepoint *
  1633. rspamd_symcache_make_checkpoint (struct rspamd_task *task,
  1634. struct rspamd_symcache *cache)
  1635. {
  1636. struct cache_savepoint *checkpoint;
  1637. if (cache->items_by_order->id != cache->id) {
  1638. /*
  1639. * Cache has been modified, need to resort it
  1640. */
  1641. msg_info_cache ("symbols cache has been modified since last check:"
  1642. " old id: %ud, new id: %ud",
  1643. cache->items_by_order->id, cache->id);
  1644. rspamd_symcache_resort (cache);
  1645. }
  1646. checkpoint = rspamd_mempool_alloc0 (task->task_pool,
  1647. sizeof (*checkpoint) +
  1648. sizeof (struct rspamd_symcache_dynamic_item) * cache->items_by_id->len);
  1649. g_assert (cache->items_by_order != NULL);
  1650. checkpoint->version = cache->items_by_order->d->len;
  1651. checkpoint->order = cache->items_by_order;
  1652. REF_RETAIN (checkpoint->order);
  1653. rspamd_mempool_add_destructor (task->task_pool,
  1654. rspamd_symcache_order_unref, checkpoint->order);
  1655. /* Calculate profile probability */
  1656. ev_now_update_if_cheap (task->event_loop);
  1657. ev_tstamp now = ev_now (task->event_loop);
  1658. checkpoint->profile_start = now;
  1659. if ((cache->last_profile == 0.0 || now > cache->last_profile + PROFILE_MAX_TIME) ||
  1660. (task->msg.len >= PROFILE_MESSAGE_SIZE_THRESHOLD) ||
  1661. (rspamd_random_double_fast () >= (1 - PROFILE_PROBABILITY))) {
  1662. msg_debug_cache_task ("enable profiling of symbols for task");
  1663. checkpoint->profile = TRUE;
  1664. cache->last_profile = now;
  1665. }
  1666. task->checkpoint = checkpoint;
  1667. return checkpoint;
  1668. }
  1669. gboolean
  1670. rspamd_symcache_process_settings (struct rspamd_task *task,
  1671. struct rspamd_symcache *cache)
  1672. {
  1673. const ucl_object_t *wl, *cur, *disabled, *enabled;
  1674. struct rspamd_symbols_group *gr;
  1675. GHashTableIter gr_it;
  1676. ucl_object_iter_t it = NULL;
  1677. gboolean already_disabled = FALSE;
  1678. gpointer k, v;
  1679. wl = ucl_object_lookup (task->settings, "whitelist");
  1680. if (wl != NULL) {
  1681. msg_info_task ("task is whitelisted");
  1682. task->flags |= RSPAMD_TASK_FLAG_SKIP;
  1683. return TRUE;
  1684. }
  1685. enabled = ucl_object_lookup (task->settings, "symbols_enabled");
  1686. if (enabled) {
  1687. /* Disable all symbols but selected */
  1688. rspamd_symcache_disable_all_symbols (task, cache,
  1689. SYMBOL_TYPE_EXPLICIT_DISABLE);
  1690. already_disabled = TRUE;
  1691. it = NULL;
  1692. while ((cur = ucl_iterate_object (enabled, &it, true)) != NULL) {
  1693. rspamd_symcache_enable_symbol_checkpoint (task, cache,
  1694. ucl_object_tostring (cur));
  1695. }
  1696. }
  1697. /* Enable groups of symbols */
  1698. enabled = ucl_object_lookup (task->settings, "groups_enabled");
  1699. if (enabled) {
  1700. it = NULL;
  1701. if (!already_disabled) {
  1702. rspamd_symcache_disable_all_symbols (task, cache,
  1703. SYMBOL_TYPE_EXPLICIT_DISABLE);
  1704. }
  1705. while ((cur = ucl_iterate_object (enabled, &it, true)) != NULL) {
  1706. if (ucl_object_type (cur) == UCL_STRING) {
  1707. gr = g_hash_table_lookup (task->cfg->groups,
  1708. ucl_object_tostring (cur));
  1709. if (gr) {
  1710. g_hash_table_iter_init (&gr_it, gr->symbols);
  1711. while (g_hash_table_iter_next (&gr_it, &k, &v)) {
  1712. rspamd_symcache_enable_symbol_checkpoint (task, cache, k);
  1713. }
  1714. }
  1715. }
  1716. }
  1717. }
  1718. disabled = ucl_object_lookup (task->settings, "symbols_disabled");
  1719. if (disabled) {
  1720. it = NULL;
  1721. while ((cur = ucl_iterate_object (disabled, &it, true)) != NULL) {
  1722. rspamd_symcache_disable_symbol_checkpoint (task, cache,
  1723. ucl_object_tostring (cur));
  1724. }
  1725. }
  1726. /* Disable groups of symbols */
  1727. disabled = ucl_object_lookup (task->settings, "groups_disabled");
  1728. if (disabled) {
  1729. it = NULL;
  1730. while ((cur = ucl_iterate_object (disabled, &it, true)) != NULL) {
  1731. if (ucl_object_type (cur) == UCL_STRING) {
  1732. gr = g_hash_table_lookup (task->cfg->groups,
  1733. ucl_object_tostring (cur));
  1734. if (gr) {
  1735. g_hash_table_iter_init (&gr_it, gr->symbols);
  1736. while (g_hash_table_iter_next (&gr_it, &k, &v)) {
  1737. rspamd_symcache_disable_symbol_checkpoint (task, cache, k);
  1738. }
  1739. }
  1740. }
  1741. }
  1742. }
  1743. return FALSE;
  1744. }
  1745. gboolean
  1746. rspamd_symcache_process_symbols (struct rspamd_task *task,
  1747. struct rspamd_symcache *cache,
  1748. gint stage)
  1749. {
  1750. struct rspamd_symcache_item *item = NULL;
  1751. struct rspamd_symcache_dynamic_item *dyn_item;
  1752. struct cache_savepoint *checkpoint;
  1753. gint i;
  1754. gboolean all_done = TRUE;
  1755. gint saved_priority;
  1756. guint start_events_pending;
  1757. g_assert (cache != NULL);
  1758. if (task->checkpoint == NULL) {
  1759. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  1760. task->checkpoint = checkpoint;
  1761. }
  1762. else {
  1763. checkpoint = task->checkpoint;
  1764. }
  1765. msg_debug_cache_task ("symbols processing stage at pass: %d", stage);
  1766. start_events_pending = rspamd_session_events_pending (task->s);
  1767. switch (stage) {
  1768. case RSPAMD_TASK_STAGE_CONNFILTERS:
  1769. /* Check for connection filters */
  1770. saved_priority = G_MININT;
  1771. all_done = TRUE;
  1772. for (i = 0; i < (gint) cache->connfilters->len; i++) {
  1773. item = g_ptr_array_index (cache->connfilters, i);
  1774. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1775. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1776. return TRUE;
  1777. }
  1778. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1779. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1780. if (checkpoint->has_slow) {
  1781. /* Delay */
  1782. checkpoint->has_slow = FALSE;
  1783. return FALSE;
  1784. }
  1785. /* Check priorities */
  1786. if (saved_priority == G_MININT) {
  1787. saved_priority = item->priority;
  1788. }
  1789. else {
  1790. if (item->priority < saved_priority &&
  1791. rspamd_session_events_pending (task->s) > start_events_pending) {
  1792. /*
  1793. * Delay further checks as we have higher
  1794. * priority filters to be processed
  1795. */
  1796. return FALSE;
  1797. }
  1798. }
  1799. rspamd_symcache_check_symbol (task, cache, item,
  1800. checkpoint);
  1801. all_done = FALSE;
  1802. }
  1803. }
  1804. break;
  1805. case RSPAMD_TASK_STAGE_PRE_FILTERS:
  1806. /* Check for prefilters */
  1807. saved_priority = G_MININT;
  1808. all_done = TRUE;
  1809. for (i = 0; i < (gint) cache->prefilters->len; i++) {
  1810. item = g_ptr_array_index (cache->prefilters, i);
  1811. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1812. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1813. return TRUE;
  1814. }
  1815. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1816. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1817. /* Check priorities */
  1818. if (checkpoint->has_slow) {
  1819. /* Delay */
  1820. checkpoint->has_slow = FALSE;
  1821. return FALSE;
  1822. }
  1823. if (saved_priority == G_MININT) {
  1824. saved_priority = item->priority;
  1825. }
  1826. else {
  1827. if (item->priority < saved_priority &&
  1828. rspamd_session_events_pending (task->s) > start_events_pending) {
  1829. /*
  1830. * Delay further checks as we have higher
  1831. * priority filters to be processed
  1832. */
  1833. return FALSE;
  1834. }
  1835. }
  1836. rspamd_symcache_check_symbol (task, cache, item,
  1837. checkpoint);
  1838. all_done = FALSE;
  1839. }
  1840. }
  1841. break;
  1842. case RSPAMD_TASK_STAGE_FILTERS:
  1843. all_done = TRUE;
  1844. for (i = 0; i < (gint) checkpoint->version; i++) {
  1845. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1846. return TRUE;
  1847. }
  1848. item = g_ptr_array_index (checkpoint->order->d, i);
  1849. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1850. if (item->type & SYMBOL_TYPE_CLASSIFIER) {
  1851. continue;
  1852. }
  1853. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  1854. all_done = FALSE;
  1855. if (!rspamd_symcache_check_deps (task, cache, item,
  1856. checkpoint, 0, FALSE)) {
  1857. msg_debug_cache_task ("blocked execution of %d(%s) unless deps are "
  1858. "resolved",
  1859. item->id, item->symbol);
  1860. continue;
  1861. }
  1862. rspamd_symcache_check_symbol (task, cache, item,
  1863. checkpoint);
  1864. if (checkpoint->has_slow) {
  1865. /* Delay */
  1866. checkpoint->has_slow = FALSE;
  1867. return FALSE;
  1868. }
  1869. }
  1870. if (!(item->type & SYMBOL_TYPE_FINE)) {
  1871. if (rspamd_symcache_metric_limit (task, checkpoint)) {
  1872. msg_info_task ("task has already scored more than %.2f, so do "
  1873. "not "
  1874. "plan more checks",
  1875. checkpoint->rs->score);
  1876. all_done = TRUE;
  1877. break;
  1878. }
  1879. }
  1880. }
  1881. break;
  1882. case RSPAMD_TASK_STAGE_POST_FILTERS:
  1883. /* Check for postfilters */
  1884. saved_priority = G_MININT;
  1885. all_done = TRUE;
  1886. for (i = 0; i < (gint) cache->postfilters->len; i++) {
  1887. if (RSPAMD_TASK_IS_SKIPPED (task)) {
  1888. return TRUE;
  1889. }
  1890. item = g_ptr_array_index (cache->postfilters, i);
  1891. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1892. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1893. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1894. /* Check priorities */
  1895. all_done = FALSE;
  1896. if (checkpoint->has_slow) {
  1897. /* Delay */
  1898. checkpoint->has_slow = FALSE;
  1899. return FALSE;
  1900. }
  1901. if (saved_priority == G_MININT) {
  1902. saved_priority = item->priority;
  1903. }
  1904. else {
  1905. if (item->priority > saved_priority &&
  1906. rspamd_session_events_pending (task->s) > start_events_pending) {
  1907. /*
  1908. * Delay further checks as we have higher
  1909. * priority filters to be processed
  1910. */
  1911. return FALSE;
  1912. }
  1913. }
  1914. rspamd_symcache_check_symbol (task, cache, item,
  1915. checkpoint);
  1916. }
  1917. }
  1918. break;
  1919. case RSPAMD_TASK_STAGE_IDEMPOTENT:
  1920. /* Check for postfilters */
  1921. saved_priority = G_MININT;
  1922. for (i = 0; i < (gint) cache->idempotent->len; i++) {
  1923. item = g_ptr_array_index (cache->idempotent, i);
  1924. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  1925. if (!CHECK_START_BIT (checkpoint, dyn_item) &&
  1926. !CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  1927. /* Check priorities */
  1928. if (checkpoint->has_slow) {
  1929. /* Delay */
  1930. checkpoint->has_slow = FALSE;
  1931. return FALSE;
  1932. }
  1933. if (saved_priority == G_MININT) {
  1934. saved_priority = item->priority;
  1935. }
  1936. else {
  1937. if (item->priority > saved_priority &&
  1938. rspamd_session_events_pending (task->s) > start_events_pending) {
  1939. /*
  1940. * Delay further checks as we have higher
  1941. * priority filters to be processed
  1942. */
  1943. return FALSE;
  1944. }
  1945. }
  1946. rspamd_symcache_check_symbol (task, cache, item,
  1947. checkpoint);
  1948. }
  1949. }
  1950. break;
  1951. default:
  1952. g_assert_not_reached ();
  1953. }
  1954. return all_done;
  1955. }
  1956. struct counters_cbdata {
  1957. ucl_object_t *top;
  1958. struct rspamd_symcache *cache;
  1959. };
  1960. /* Leave several digits */
  1961. #define P10(X) (1e##X)
  1962. #define ROUND_DOUBLE_DIGITS(x, dig) (floor((x) * P10(dig)) / P10(dig))
  1963. #define ROUND_DOUBLE(x) ROUND_DOUBLE_DIGITS(x, 3)
  1964. static void
  1965. rspamd_symcache_counters_cb (gpointer k, gpointer v, gpointer ud)
  1966. {
  1967. struct counters_cbdata *cbd = ud;
  1968. ucl_object_t *obj, *top;
  1969. struct rspamd_symcache_item *item = v, *parent;
  1970. const gchar *symbol = k;
  1971. top = cbd->top;
  1972. obj = ucl_object_typed_new (UCL_OBJECT);
  1973. ucl_object_insert_key (obj, ucl_object_fromstring (symbol ? symbol : "unknown"),
  1974. "symbol", 0, false);
  1975. if (item->is_virtual) {
  1976. if (!(item->type & SYMBOL_TYPE_GHOST)) {
  1977. parent = g_ptr_array_index (cbd->cache->items_by_id,
  1978. item->specific.virtual.parent);
  1979. ucl_object_insert_key (obj,
  1980. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  1981. "weight", 0, false);
  1982. ucl_object_insert_key (obj,
  1983. ucl_object_fromdouble (ROUND_DOUBLE (parent->st->avg_frequency)),
  1984. "frequency", 0, false);
  1985. ucl_object_insert_key (obj,
  1986. ucl_object_fromint (parent->st->total_hits),
  1987. "hits", 0, false);
  1988. ucl_object_insert_key (obj,
  1989. ucl_object_fromdouble (ROUND_DOUBLE (parent->st->avg_time)),
  1990. "time", 0, false);
  1991. }
  1992. else {
  1993. ucl_object_insert_key (obj,
  1994. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  1995. "weight", 0, false);
  1996. ucl_object_insert_key (obj,
  1997. ucl_object_fromdouble (0.0),
  1998. "frequency", 0, false);
  1999. ucl_object_insert_key (obj,
  2000. ucl_object_fromdouble (0.0),
  2001. "hits", 0, false);
  2002. ucl_object_insert_key (obj,
  2003. ucl_object_fromdouble (0.0),
  2004. "time", 0, false);
  2005. }
  2006. }
  2007. else {
  2008. ucl_object_insert_key (obj,
  2009. ucl_object_fromdouble (ROUND_DOUBLE (item->st->weight)),
  2010. "weight", 0, false);
  2011. ucl_object_insert_key (obj,
  2012. ucl_object_fromdouble (ROUND_DOUBLE (item->st->avg_frequency)),
  2013. "frequency", 0, false);
  2014. ucl_object_insert_key (obj,
  2015. ucl_object_fromint (item->st->total_hits),
  2016. "hits", 0, false);
  2017. ucl_object_insert_key (obj,
  2018. ucl_object_fromdouble (ROUND_DOUBLE (item->st->avg_time)),
  2019. "time", 0, false);
  2020. }
  2021. ucl_array_append (top, obj);
  2022. }
  2023. #undef ROUND_DOUBLE
  2024. ucl_object_t *
  2025. rspamd_symcache_counters (struct rspamd_symcache *cache)
  2026. {
  2027. ucl_object_t *top;
  2028. struct counters_cbdata cbd;
  2029. g_assert (cache != NULL);
  2030. top = ucl_object_typed_new (UCL_ARRAY);
  2031. cbd.top = top;
  2032. cbd.cache = cache;
  2033. g_hash_table_foreach (cache->items_by_symbol,
  2034. rspamd_symcache_counters_cb, &cbd);
  2035. return top;
  2036. }
  2037. static void
  2038. rspamd_symcache_call_peak_cb (struct ev_loop *ev_base,
  2039. struct rspamd_symcache *cache,
  2040. struct rspamd_symcache_item *item,
  2041. gdouble cur_value,
  2042. gdouble cur_err)
  2043. {
  2044. lua_State *L = cache->cfg->lua_state;
  2045. struct ev_loop **pbase;
  2046. lua_rawgeti (L, LUA_REGISTRYINDEX, cache->peak_cb);
  2047. pbase = lua_newuserdata (L, sizeof (*pbase));
  2048. *pbase = ev_base;
  2049. rspamd_lua_setclass (L, "rspamd{ev_base}", -1);
  2050. lua_pushstring (L, item->symbol);
  2051. lua_pushnumber (L, item->st->avg_frequency);
  2052. lua_pushnumber (L, sqrt (item->st->stddev_frequency));
  2053. lua_pushnumber (L, cur_value);
  2054. lua_pushnumber (L, cur_err);
  2055. if (lua_pcall (L, 6, 0, 0) != 0) {
  2056. msg_info_cache ("call to peak function for %s failed: %s",
  2057. item->symbol, lua_tostring (L, -1));
  2058. lua_pop (L, 1);
  2059. }
  2060. }
  2061. static void
  2062. rspamd_symcache_resort_cb (EV_P_ ev_timer *w, int revents)
  2063. {
  2064. gdouble tm;
  2065. struct rspamd_cache_refresh_cbdata *cbdata =
  2066. (struct rspamd_cache_refresh_cbdata *)w->data;
  2067. struct rspamd_symcache *cache;
  2068. struct rspamd_symcache_item *item;
  2069. guint i;
  2070. gdouble cur_ticks;
  2071. static const double decay_rate = 0.25;
  2072. cache = cbdata->cache;
  2073. /* Plan new event */
  2074. tm = rspamd_time_jitter (cache->reload_time, 0);
  2075. cur_ticks = rspamd_get_ticks (FALSE);
  2076. msg_debug_cache ("resort symbols cache, next reload in %.2f seconds", tm);
  2077. g_assert (cache != NULL);
  2078. cbdata->resort_ev.repeat = tm;
  2079. ev_timer_again (EV_A_ w);
  2080. if (rspamd_worker_is_primary_controller (cbdata->w)) {
  2081. /* Gather stats from shared execution times */
  2082. for (i = 0; i < cache->filters->len; i ++) {
  2083. item = g_ptr_array_index (cache->filters, i);
  2084. item->st->total_hits += item->st->hits;
  2085. g_atomic_int_set (&item->st->hits, 0);
  2086. if (item->last_count > 0 && cbdata->w->index == 0) {
  2087. /* Calculate frequency */
  2088. gdouble cur_err, cur_value;
  2089. cur_value = (item->st->total_hits - item->last_count) /
  2090. (cur_ticks - cbdata->last_resort);
  2091. rspamd_set_counter_ema (&item->st->frequency_counter,
  2092. cur_value, decay_rate);
  2093. item->st->avg_frequency = item->st->frequency_counter.mean;
  2094. item->st->stddev_frequency = item->st->frequency_counter.stddev;
  2095. if (cur_value > 0) {
  2096. msg_debug_cache ("frequency for %s is %.2f, avg: %.2f",
  2097. item->symbol, cur_value, item->st->avg_frequency);
  2098. }
  2099. cur_err = (item->st->avg_frequency - cur_value);
  2100. cur_err *= cur_err;
  2101. /*
  2102. * TODO: replace magic number
  2103. */
  2104. if (item->st->frequency_counter.number > 10 &&
  2105. cur_err > sqrt (item->st->stddev_frequency) * 3) {
  2106. item->frequency_peaks ++;
  2107. msg_debug_cache ("peak found for %s is %.2f, avg: %.2f, "
  2108. "stddev: %.2f, error: %.2f, peaks: %d",
  2109. item->symbol, cur_value,
  2110. item->st->avg_frequency,
  2111. item->st->stddev_frequency,
  2112. cur_err,
  2113. item->frequency_peaks);
  2114. if (cache->peak_cb != -1) {
  2115. rspamd_symcache_call_peak_cb (cbdata->event_loop,
  2116. cache, item,
  2117. cur_value, cur_err);
  2118. }
  2119. }
  2120. }
  2121. item->last_count = item->st->total_hits;
  2122. if (item->cd->number > 0) {
  2123. if (item->type & (SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_NORMAL)) {
  2124. item->st->avg_time = item->cd->mean;
  2125. rspamd_set_counter_ema (&item->st->time_counter,
  2126. item->st->avg_time, decay_rate);
  2127. item->st->avg_time = item->st->time_counter.mean;
  2128. memset (item->cd, 0, sizeof (*item->cd));
  2129. }
  2130. }
  2131. }
  2132. cbdata->last_resort = cur_ticks;
  2133. /* We don't do actual sorting due to topological guarantees */
  2134. }
  2135. }
  2136. static void
  2137. rspamd_symcache_refresh_dtor (void *d)
  2138. {
  2139. struct rspamd_cache_refresh_cbdata *cbdata =
  2140. (struct rspamd_cache_refresh_cbdata *)d;
  2141. ev_timer_stop (cbdata->event_loop, &cbdata->resort_ev);
  2142. }
  2143. void
  2144. rspamd_symcache_start_refresh (struct rspamd_symcache *cache,
  2145. struct ev_loop *ev_base, struct rspamd_worker *w)
  2146. {
  2147. gdouble tm;
  2148. struct rspamd_cache_refresh_cbdata *cbdata;
  2149. cbdata = rspamd_mempool_alloc0 (cache->static_pool, sizeof (*cbdata));
  2150. cbdata->last_resort = rspamd_get_ticks (TRUE);
  2151. cbdata->event_loop = ev_base;
  2152. cbdata->w = w;
  2153. cbdata->cache = cache;
  2154. tm = rspamd_time_jitter (cache->reload_time, 0);
  2155. msg_debug_cache ("next reload in %.2f seconds", tm);
  2156. g_assert (cache != NULL);
  2157. cbdata->resort_ev.data = cbdata;
  2158. ev_timer_init (&cbdata->resort_ev, rspamd_symcache_resort_cb,
  2159. tm, tm);
  2160. ev_timer_start (cbdata->event_loop, &cbdata->resort_ev);
  2161. rspamd_mempool_add_destructor (cache->static_pool,
  2162. rspamd_symcache_refresh_dtor, cbdata);
  2163. }
  2164. void
  2165. rspamd_symcache_inc_frequency (struct rspamd_symcache *cache,
  2166. struct rspamd_symcache_item *item)
  2167. {
  2168. if (item != NULL) {
  2169. g_atomic_int_inc (&item->st->hits);
  2170. }
  2171. }
  2172. void
  2173. rspamd_symcache_add_dependency (struct rspamd_symcache *cache,
  2174. gint id_from, const gchar *to,
  2175. gint virtual_id_from)
  2176. {
  2177. struct rspamd_symcache_item *source, *vsource;
  2178. struct cache_dependency *dep;
  2179. g_assert (id_from >= 0 && id_from < (gint)cache->items_by_id->len);
  2180. source = (struct rspamd_symcache_item *)g_ptr_array_index (cache->items_by_id, id_from);
  2181. dep = rspamd_mempool_alloc (cache->static_pool, sizeof (*dep));
  2182. dep->id = id_from;
  2183. dep->sym = rspamd_mempool_strdup (cache->static_pool, to);
  2184. /* Will be filled later */
  2185. dep->item = NULL;
  2186. dep->vid = -1;
  2187. g_ptr_array_add (source->deps, dep);
  2188. if (virtual_id_from >= 0) {
  2189. g_assert (virtual_id_from < (gint)cache->virtual->len);
  2190. /* We need that for settings id propagation */
  2191. vsource = (struct rspamd_symcache_item *)
  2192. g_ptr_array_index (cache->virtual, virtual_id_from);
  2193. dep = rspamd_mempool_alloc (cache->static_pool, sizeof (*dep));
  2194. dep->vid = virtual_id_from;
  2195. dep->id = -1;
  2196. dep->sym = rspamd_mempool_strdup (cache->static_pool, to);
  2197. /* Will be filled later */
  2198. dep->item = NULL;
  2199. g_ptr_array_add (vsource->deps, dep);
  2200. }
  2201. }
  2202. void
  2203. rspamd_symcache_add_delayed_dependency (struct rspamd_symcache *cache,
  2204. const gchar *from, const gchar *to)
  2205. {
  2206. struct delayed_cache_dependency *ddep;
  2207. g_assert (from != NULL);
  2208. g_assert (to != NULL);
  2209. ddep = g_malloc0 (sizeof (*ddep));
  2210. ddep->from = g_strdup (from);
  2211. ddep->to = g_strdup (to);
  2212. cache->delayed_deps = g_list_prepend (cache->delayed_deps, ddep);
  2213. }
  2214. gint
  2215. rspamd_symcache_find_symbol (struct rspamd_symcache *cache, const gchar *name)
  2216. {
  2217. struct rspamd_symcache_item *item;
  2218. g_assert (cache != NULL);
  2219. if (name == NULL) {
  2220. return -1;
  2221. }
  2222. item = g_hash_table_lookup (cache->items_by_symbol, name);
  2223. if (item != NULL) {
  2224. return item->id;
  2225. }
  2226. return -1;
  2227. }
  2228. gboolean
  2229. rspamd_symcache_stat_symbol (struct rspamd_symcache *cache,
  2230. const gchar *name,
  2231. gdouble *frequency,
  2232. gdouble *freq_stddev,
  2233. gdouble *tm,
  2234. guint *nhits)
  2235. {
  2236. struct rspamd_symcache_item *item;
  2237. g_assert (cache != NULL);
  2238. if (name == NULL) {
  2239. return FALSE;
  2240. }
  2241. item = g_hash_table_lookup (cache->items_by_symbol, name);
  2242. if (item != NULL) {
  2243. *frequency = item->st->avg_frequency;
  2244. *freq_stddev = sqrt (item->st->stddev_frequency);
  2245. *tm = item->st->time_counter.mean;
  2246. if (nhits) {
  2247. *nhits = item->st->hits;
  2248. }
  2249. return TRUE;
  2250. }
  2251. return FALSE;
  2252. }
  2253. const gchar *
  2254. rspamd_symcache_symbol_by_id (struct rspamd_symcache *cache,
  2255. gint id)
  2256. {
  2257. struct rspamd_symcache_item *item;
  2258. g_assert (cache != NULL);
  2259. if (id < 0 || id >= (gint)cache->items_by_id->len) {
  2260. return NULL;
  2261. }
  2262. item = g_ptr_array_index (cache->items_by_id, id);
  2263. return item->symbol;
  2264. }
  2265. guint
  2266. rspamd_symcache_stats_symbols_count (struct rspamd_symcache *cache)
  2267. {
  2268. g_assert (cache != NULL);
  2269. return cache->stats_symbols_count;
  2270. }
  2271. void
  2272. rspamd_symcache_disable_all_symbols (struct rspamd_task *task,
  2273. struct rspamd_symcache *cache,
  2274. guint skip_mask)
  2275. {
  2276. struct cache_savepoint *checkpoint;
  2277. guint i;
  2278. struct rspamd_symcache_item *item;
  2279. struct rspamd_symcache_dynamic_item *dyn_item;
  2280. if (task->checkpoint == NULL) {
  2281. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2282. task->checkpoint = checkpoint;
  2283. }
  2284. else {
  2285. checkpoint = task->checkpoint;
  2286. }
  2287. /* Enable for squeezed symbols */
  2288. PTR_ARRAY_FOREACH (cache->items_by_id, i, item) {
  2289. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2290. if (!(item->type & (skip_mask))) {
  2291. SET_FINISH_BIT (checkpoint, dyn_item);
  2292. SET_START_BIT (checkpoint, dyn_item);
  2293. }
  2294. }
  2295. }
  2296. static void
  2297. rspamd_symcache_disable_symbol_checkpoint (struct rspamd_task *task,
  2298. struct rspamd_symcache *cache, const gchar *symbol)
  2299. {
  2300. struct cache_savepoint *checkpoint;
  2301. struct rspamd_symcache_item *item;
  2302. struct rspamd_symcache_dynamic_item *dyn_item;
  2303. if (task->checkpoint == NULL) {
  2304. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2305. task->checkpoint = checkpoint;
  2306. }
  2307. else {
  2308. checkpoint = task->checkpoint;
  2309. }
  2310. item = rspamd_symcache_find_filter (cache, symbol, true);
  2311. if (item) {
  2312. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2313. SET_FINISH_BIT (checkpoint, dyn_item);
  2314. SET_START_BIT (checkpoint, dyn_item);
  2315. msg_debug_cache_task ("disable execution of %s", symbol);
  2316. }
  2317. else {
  2318. msg_info_task ("cannot disable %s: not found", symbol);
  2319. }
  2320. }
  2321. static void
  2322. rspamd_symcache_enable_symbol_checkpoint (struct rspamd_task *task,
  2323. struct rspamd_symcache *cache, const gchar *symbol)
  2324. {
  2325. struct cache_savepoint *checkpoint;
  2326. struct rspamd_symcache_item *item;
  2327. struct rspamd_symcache_dynamic_item *dyn_item;
  2328. if (task->checkpoint == NULL) {
  2329. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2330. task->checkpoint = checkpoint;
  2331. }
  2332. else {
  2333. checkpoint = task->checkpoint;
  2334. }
  2335. item = rspamd_symcache_find_filter (cache, symbol, true);
  2336. if (item) {
  2337. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2338. dyn_item->finished = 0;
  2339. dyn_item->started = 0;
  2340. msg_debug_cache_task ("enable execution of %s", symbol);
  2341. }
  2342. else {
  2343. msg_info_task ("cannot enable %s: not found", symbol);
  2344. }
  2345. }
  2346. struct rspamd_abstract_callback_data*
  2347. rspamd_symcache_get_cbdata (struct rspamd_symcache *cache,
  2348. const gchar *symbol)
  2349. {
  2350. struct rspamd_symcache_item *item;
  2351. g_assert (cache != NULL);
  2352. g_assert (symbol != NULL);
  2353. item = rspamd_symcache_find_filter (cache, symbol, true);
  2354. if (item) {
  2355. return item->specific.normal.user_data;
  2356. }
  2357. return NULL;
  2358. }
  2359. gboolean
  2360. rspamd_symcache_is_checked (struct rspamd_task *task,
  2361. struct rspamd_symcache *cache, const gchar *symbol)
  2362. {
  2363. struct cache_savepoint *checkpoint;
  2364. struct rspamd_symcache_item *item;
  2365. struct rspamd_symcache_dynamic_item *dyn_item;
  2366. g_assert (cache != NULL);
  2367. g_assert (symbol != NULL);
  2368. if (task->checkpoint == NULL) {
  2369. checkpoint = rspamd_symcache_make_checkpoint (task, cache);
  2370. task->checkpoint = checkpoint;
  2371. }
  2372. else {
  2373. checkpoint = task->checkpoint;
  2374. }
  2375. item = rspamd_symcache_find_filter (cache, symbol, true);
  2376. if (item) {
  2377. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2378. return dyn_item->started;
  2379. }
  2380. return FALSE;
  2381. }
  2382. void
  2383. rspamd_symcache_disable_symbol_perm (struct rspamd_symcache *cache,
  2384. const gchar *symbol,
  2385. gboolean resolve_parent)
  2386. {
  2387. struct rspamd_symcache_item *item;
  2388. g_assert (cache != NULL);
  2389. g_assert (symbol != NULL);
  2390. item = rspamd_symcache_find_filter (cache, symbol, resolve_parent);
  2391. if (item) {
  2392. item->enabled = FALSE;
  2393. }
  2394. }
  2395. void
  2396. rspamd_symcache_enable_symbol_perm (struct rspamd_symcache *cache,
  2397. const gchar *symbol)
  2398. {
  2399. struct rspamd_symcache_item *item;
  2400. g_assert (cache != NULL);
  2401. g_assert (symbol != NULL);
  2402. item = rspamd_symcache_find_filter (cache, symbol, true);
  2403. if (item) {
  2404. item->enabled = TRUE;
  2405. }
  2406. }
  2407. guint64
  2408. rspamd_symcache_get_cksum (struct rspamd_symcache *cache)
  2409. {
  2410. g_assert (cache != NULL);
  2411. return cache->cksum;
  2412. }
  2413. gboolean
  2414. rspamd_symcache_is_symbol_enabled (struct rspamd_task *task,
  2415. struct rspamd_symcache *cache,
  2416. const gchar *symbol)
  2417. {
  2418. struct cache_savepoint *checkpoint;
  2419. struct rspamd_symcache_item *item;
  2420. struct rspamd_symcache_dynamic_item *dyn_item;
  2421. lua_State *L;
  2422. struct rspamd_task **ptask;
  2423. gboolean ret = TRUE;
  2424. g_assert (cache != NULL);
  2425. g_assert (symbol != NULL);
  2426. checkpoint = task->checkpoint;
  2427. if (checkpoint) {
  2428. item = rspamd_symcache_find_filter (cache, symbol, true);
  2429. if (item) {
  2430. if (!rspamd_symcache_is_item_allowed (task, item, TRUE)) {
  2431. ret = FALSE;
  2432. }
  2433. else {
  2434. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2435. if (CHECK_START_BIT (checkpoint, dyn_item)) {
  2436. ret = FALSE;
  2437. }
  2438. else {
  2439. if (item->specific.normal.conditions) {
  2440. struct rspamd_symcache_condition *cur_cond;
  2441. DL_FOREACH (item->specific.normal.conditions, cur_cond) {
  2442. /*
  2443. * We also executes condition callback to check
  2444. * if we need this symbol
  2445. */
  2446. L = task->cfg->lua_state;
  2447. lua_rawgeti (L, LUA_REGISTRYINDEX, cur_cond->cb);
  2448. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  2449. rspamd_lua_setclass (L, "rspamd{task}", -1);
  2450. *ptask = task;
  2451. if (lua_pcall (L, 1, 1, 0) != 0) {
  2452. msg_info_task ("call to condition for %s failed: %s",
  2453. item->symbol, lua_tostring (L, -1));
  2454. lua_pop (L, 1);
  2455. }
  2456. else {
  2457. ret = lua_toboolean (L, -1);
  2458. lua_pop (L, 1);
  2459. }
  2460. if (!ret) {
  2461. break;
  2462. }
  2463. }
  2464. }
  2465. }
  2466. }
  2467. }
  2468. }
  2469. return ret;
  2470. }
  2471. gboolean
  2472. rspamd_symcache_enable_symbol (struct rspamd_task *task,
  2473. struct rspamd_symcache *cache,
  2474. const gchar *symbol)
  2475. {
  2476. struct cache_savepoint *checkpoint;
  2477. struct rspamd_symcache_item *item;
  2478. struct rspamd_symcache_dynamic_item *dyn_item;
  2479. gboolean ret = FALSE;
  2480. g_assert (cache != NULL);
  2481. g_assert (symbol != NULL);
  2482. checkpoint = task->checkpoint;
  2483. if (checkpoint) {
  2484. item = rspamd_symcache_find_filter (cache, symbol, true);
  2485. if (item) {
  2486. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2487. if (!CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  2488. ret = TRUE;
  2489. CLR_START_BIT (checkpoint, dyn_item);
  2490. CLR_FINISH_BIT (checkpoint, dyn_item);
  2491. }
  2492. else {
  2493. msg_debug_task ("cannot enable symbol %s: already started", symbol);
  2494. }
  2495. }
  2496. }
  2497. return ret;
  2498. }
  2499. gboolean
  2500. rspamd_symcache_disable_symbol (struct rspamd_task *task,
  2501. struct rspamd_symcache *cache,
  2502. const gchar *symbol)
  2503. {
  2504. struct cache_savepoint *checkpoint;
  2505. struct rspamd_symcache_item *item;
  2506. struct rspamd_symcache_dynamic_item *dyn_item;
  2507. gboolean ret = FALSE;
  2508. g_assert (cache != NULL);
  2509. g_assert (symbol != NULL);
  2510. checkpoint = task->checkpoint;
  2511. if (checkpoint) {
  2512. item = rspamd_symcache_find_filter (cache, symbol, true);
  2513. if (item) {
  2514. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2515. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  2516. ret = TRUE;
  2517. SET_START_BIT (checkpoint, dyn_item);
  2518. SET_FINISH_BIT (checkpoint, dyn_item);
  2519. }
  2520. else {
  2521. if (!CHECK_FINISH_BIT (checkpoint, dyn_item)) {
  2522. msg_warn_task ("cannot disable symbol %s: already started",
  2523. symbol);
  2524. }
  2525. }
  2526. }
  2527. }
  2528. return ret;
  2529. }
  2530. void
  2531. rspamd_symcache_foreach (struct rspamd_symcache *cache,
  2532. void (*func) (struct rspamd_symcache_item *, gpointer),
  2533. gpointer ud)
  2534. {
  2535. struct rspamd_symcache_item *item;
  2536. GHashTableIter it;
  2537. gpointer k, v;
  2538. g_hash_table_iter_init (&it, cache->items_by_symbol);
  2539. while (g_hash_table_iter_next (&it, &k, &v)) {
  2540. item = (struct rspamd_symcache_item *)v;
  2541. func (item, ud);
  2542. }
  2543. }
  2544. struct rspamd_symcache_item *
  2545. rspamd_symcache_get_cur_item (struct rspamd_task *task)
  2546. {
  2547. struct cache_savepoint *checkpoint = task->checkpoint;
  2548. if (checkpoint == NULL) {
  2549. return NULL;
  2550. }
  2551. return checkpoint->cur_item;
  2552. }
  2553. /**
  2554. * Replaces the current item being processed.
  2555. * Returns the current item being processed (if any)
  2556. * @param task
  2557. * @param item
  2558. * @return
  2559. */
  2560. struct rspamd_symcache_item *
  2561. rspamd_symcache_set_cur_item (struct rspamd_task *task,
  2562. struct rspamd_symcache_item *item)
  2563. {
  2564. struct cache_savepoint *checkpoint = task->checkpoint;
  2565. struct rspamd_symcache_item *ex;
  2566. ex = checkpoint->cur_item;
  2567. checkpoint->cur_item = item;
  2568. return ex;
  2569. }
  2570. struct rspamd_symcache_delayed_cbdata {
  2571. struct rspamd_symcache_item *item;
  2572. struct rspamd_task *task;
  2573. struct rspamd_async_event *event;
  2574. struct ev_timer tm;
  2575. };
  2576. static void
  2577. rspamd_symcache_delayed_item_fin (gpointer ud)
  2578. {
  2579. struct rspamd_symcache_delayed_cbdata *cbd =
  2580. (struct rspamd_symcache_delayed_cbdata *)ud;
  2581. struct rspamd_task *task;
  2582. struct cache_savepoint *checkpoint;
  2583. task = cbd->task;
  2584. checkpoint = task->checkpoint;
  2585. checkpoint->has_slow = FALSE;
  2586. ev_timer_stop (task->event_loop, &cbd->tm);
  2587. }
  2588. static void
  2589. rspamd_symcache_delayed_item_cb (EV_P_ ev_timer *w, int what)
  2590. {
  2591. struct rspamd_symcache_delayed_cbdata *cbd =
  2592. (struct rspamd_symcache_delayed_cbdata *)w->data;
  2593. struct rspamd_symcache_item *item;
  2594. struct rspamd_task *task;
  2595. struct cache_dependency *rdep;
  2596. struct cache_savepoint *checkpoint;
  2597. struct rspamd_symcache_dynamic_item *dyn_item;
  2598. guint i;
  2599. item = cbd->item;
  2600. task = cbd->task;
  2601. checkpoint = task->checkpoint;
  2602. cbd->event = NULL;
  2603. /* Timer will be stopped here */
  2604. rspamd_session_remove_event (task->s,
  2605. rspamd_symcache_delayed_item_fin, cbd);
  2606. /* Process all reverse dependencies */
  2607. PTR_ARRAY_FOREACH (item->rdeps, i, rdep) {
  2608. if (rdep->item) {
  2609. dyn_item = rspamd_symcache_get_dynamic (checkpoint, rdep->item);
  2610. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  2611. msg_debug_cache_task ("check item %d(%s) rdep of %s ",
  2612. rdep->item->id, rdep->item->symbol, item->symbol);
  2613. if (!rspamd_symcache_check_deps (task, task->cfg->cache,
  2614. rdep->item,
  2615. checkpoint, 0, FALSE)) {
  2616. msg_debug_cache_task ("blocked execution of %d(%s) rdep of %s "
  2617. "unless deps are resolved",
  2618. rdep->item->id, rdep->item->symbol, item->symbol);
  2619. }
  2620. else {
  2621. rspamd_symcache_check_symbol (task, task->cfg->cache,
  2622. rdep->item,
  2623. checkpoint);
  2624. }
  2625. }
  2626. }
  2627. }
  2628. }
  2629. static void
  2630. rspamd_delayed_timer_dtor (gpointer d)
  2631. {
  2632. struct rspamd_symcache_delayed_cbdata *cbd =
  2633. (struct rspamd_symcache_delayed_cbdata *)d;
  2634. if (cbd->event) {
  2635. /* Event has not been executed */
  2636. rspamd_session_remove_event (cbd->task->s,
  2637. rspamd_symcache_delayed_item_fin, cbd);
  2638. cbd->event = NULL;
  2639. }
  2640. }
  2641. /**
  2642. * Finalize the current async element potentially calling its deps
  2643. */
  2644. void
  2645. rspamd_symcache_finalize_item (struct rspamd_task *task,
  2646. struct rspamd_symcache_item *item)
  2647. {
  2648. struct cache_savepoint *checkpoint = task->checkpoint;
  2649. struct cache_dependency *rdep;
  2650. struct rspamd_symcache_dynamic_item *dyn_item;
  2651. gdouble diff;
  2652. guint i;
  2653. gboolean enable_slow_timer = FALSE;
  2654. const gdouble slow_diff_limit = 300;
  2655. /* Sanity checks */
  2656. g_assert (checkpoint->items_inflight > 0);
  2657. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2658. if (dyn_item->async_events > 0) {
  2659. /*
  2660. * XXX: Race condition
  2661. *
  2662. * It is possible that some async event is still in flight, but we
  2663. * already know its result, however, it is the responsibility of that
  2664. * event to decrease async events count and call this function
  2665. * one more time
  2666. */
  2667. msg_debug_cache_task ("postpone finalisation of %s(%d) as there are %d "
  2668. "async events pendning",
  2669. item->symbol, item->id, dyn_item->async_events);
  2670. return;
  2671. }
  2672. msg_debug_cache_task ("process finalize for item %s(%d)", item->symbol, item->id);
  2673. SET_FINISH_BIT (checkpoint, dyn_item);
  2674. checkpoint->items_inflight --;
  2675. checkpoint->cur_item = NULL;
  2676. if (checkpoint->profile) {
  2677. ev_now_update_if_cheap (task->event_loop);
  2678. diff = ((ev_now (task->event_loop) - checkpoint->profile_start) * 1e3 -
  2679. dyn_item->start_msec);
  2680. if (diff > slow_diff_limit) {
  2681. if (!checkpoint->has_slow) {
  2682. checkpoint->has_slow = TRUE;
  2683. enable_slow_timer = TRUE;
  2684. msg_info_task ("slow rule: %s(%d): %.2f ms; enable slow timer delay",
  2685. item->symbol, item->id,
  2686. diff);
  2687. }
  2688. else {
  2689. msg_info_task ("slow rule: %s(%d): %.2f ms",
  2690. item->symbol, item->id,
  2691. diff);
  2692. }
  2693. }
  2694. if (G_UNLIKELY (RSPAMD_TASK_IS_PROFILING (task))) {
  2695. rspamd_task_profile_set (task, item->symbol, diff);
  2696. }
  2697. if (rspamd_worker_is_scanner (task->worker)) {
  2698. rspamd_set_counter (item->cd, diff);
  2699. }
  2700. }
  2701. if (enable_slow_timer) {
  2702. struct rspamd_symcache_delayed_cbdata *cbd =
  2703. rspamd_mempool_alloc (task->task_pool,sizeof (*cbd));
  2704. /* Add timer to allow something else to be executed */
  2705. ev_timer *tm = &cbd->tm;
  2706. cbd->event = rspamd_session_add_event (task->s,
  2707. rspamd_symcache_delayed_item_fin, cbd,
  2708. "symcache");
  2709. /*
  2710. * If no event could be added, then we are already in the destruction
  2711. * phase. So the main issue is to deal with has slow here
  2712. */
  2713. if (cbd->event) {
  2714. ev_timer_init (tm, rspamd_symcache_delayed_item_cb, 0.1, 0.0);
  2715. ev_set_priority (tm, EV_MINPRI);
  2716. rspamd_mempool_add_destructor (task->task_pool,
  2717. rspamd_delayed_timer_dtor, cbd);
  2718. cbd->task = task;
  2719. cbd->item = item;
  2720. tm->data = cbd;
  2721. ev_timer_start (task->event_loop, tm);
  2722. }
  2723. else {
  2724. /* Just reset as no timer is added */
  2725. checkpoint->has_slow = FALSE;
  2726. }
  2727. return;
  2728. }
  2729. /* Process all reverse dependencies */
  2730. PTR_ARRAY_FOREACH (item->rdeps, i, rdep) {
  2731. if (rdep->item) {
  2732. dyn_item = rspamd_symcache_get_dynamic (checkpoint, rdep->item);
  2733. if (!CHECK_START_BIT (checkpoint, dyn_item)) {
  2734. msg_debug_cache_task ("check item %d(%s) rdep of %s ",
  2735. rdep->item->id, rdep->item->symbol, item->symbol);
  2736. if (!rspamd_symcache_check_deps (task, task->cfg->cache,
  2737. rdep->item,
  2738. checkpoint, 0, FALSE)) {
  2739. msg_debug_cache_task ("blocked execution of %d(%s) rdep of %s "
  2740. "unless deps are resolved",
  2741. rdep->item->id, rdep->item->symbol, item->symbol);
  2742. }
  2743. else {
  2744. rspamd_symcache_check_symbol (task, task->cfg->cache,
  2745. rdep->item,
  2746. checkpoint);
  2747. }
  2748. }
  2749. }
  2750. }
  2751. }
  2752. guint
  2753. rspamd_symcache_item_async_inc_full (struct rspamd_task *task,
  2754. struct rspamd_symcache_item *item,
  2755. const gchar *subsystem,
  2756. const gchar *loc)
  2757. {
  2758. struct rspamd_symcache_dynamic_item *dyn_item;
  2759. struct cache_savepoint *checkpoint = task->checkpoint;
  2760. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2761. msg_debug_cache_task ("increase async events counter for %s(%d) = %d + 1; "
  2762. "subsystem %s (%s)",
  2763. item->symbol, item->id, dyn_item->async_events, subsystem, loc);
  2764. return ++dyn_item->async_events;
  2765. }
  2766. guint
  2767. rspamd_symcache_item_async_dec_full (struct rspamd_task *task,
  2768. struct rspamd_symcache_item *item,
  2769. const gchar *subsystem,
  2770. const gchar *loc)
  2771. {
  2772. struct rspamd_symcache_dynamic_item *dyn_item;
  2773. struct cache_savepoint *checkpoint = task->checkpoint;
  2774. dyn_item = rspamd_symcache_get_dynamic (checkpoint, item);
  2775. msg_debug_cache_task ("decrease async events counter for %s(%d) = %d - 1; "
  2776. "subsystem %s (%s)",
  2777. item->symbol, item->id, dyn_item->async_events, subsystem, loc);
  2778. g_assert (dyn_item->async_events > 0);
  2779. return --dyn_item->async_events;
  2780. }
  2781. gboolean
  2782. rspamd_symcache_item_async_dec_check_full (struct rspamd_task *task,
  2783. struct rspamd_symcache_item *item,
  2784. const gchar *subsystem,
  2785. const gchar *loc)
  2786. {
  2787. if (rspamd_symcache_item_async_dec_full (task, item, subsystem, loc) == 0) {
  2788. rspamd_symcache_finalize_item (task, item);
  2789. return TRUE;
  2790. }
  2791. return FALSE;
  2792. }
  2793. gboolean
  2794. rspamd_symcache_add_symbol_flags (struct rspamd_symcache *cache,
  2795. const gchar *symbol,
  2796. guint flags)
  2797. {
  2798. struct rspamd_symcache_item *item;
  2799. g_assert (cache != NULL);
  2800. g_assert (symbol != NULL);
  2801. item = rspamd_symcache_find_filter (cache, symbol, true);
  2802. if (item) {
  2803. item->type |= flags;
  2804. return TRUE;
  2805. }
  2806. return FALSE;
  2807. }
  2808. gboolean
  2809. rspamd_symcache_set_symbol_flags (struct rspamd_symcache *cache,
  2810. const gchar *symbol,
  2811. guint flags)
  2812. {
  2813. struct rspamd_symcache_item *item;
  2814. g_assert (cache != NULL);
  2815. g_assert (symbol != NULL);
  2816. item = rspamd_symcache_find_filter (cache, symbol, true);
  2817. if (item) {
  2818. item->type = flags;
  2819. return TRUE;
  2820. }
  2821. return FALSE;
  2822. }
  2823. guint
  2824. rspamd_symcache_get_symbol_flags (struct rspamd_symcache *cache,
  2825. const gchar *symbol)
  2826. {
  2827. struct rspamd_symcache_item *item;
  2828. g_assert (cache != NULL);
  2829. g_assert (symbol != NULL);
  2830. item = rspamd_symcache_find_filter (cache, symbol, true);
  2831. if (item) {
  2832. return item->type;
  2833. }
  2834. return 0;
  2835. }
  2836. void
  2837. rspamd_symcache_composites_foreach (struct rspamd_task *task,
  2838. struct rspamd_symcache *cache,
  2839. GHFunc func,
  2840. gpointer fd)
  2841. {
  2842. guint i;
  2843. struct rspamd_symcache_item *item;
  2844. struct rspamd_symcache_dynamic_item *dyn_item;
  2845. if (task->checkpoint == NULL) {
  2846. return;
  2847. }
  2848. PTR_ARRAY_FOREACH (cache->composites, i, item) {
  2849. dyn_item = rspamd_symcache_get_dynamic (task->checkpoint, item);
  2850. if (!CHECK_START_BIT (task->checkpoint, dyn_item)) {
  2851. /* Cannot do it due to 2 passes */
  2852. /* SET_START_BIT (task->checkpoint, dyn_item); */
  2853. func (item->symbol, item->specific.normal.user_data, fd);
  2854. SET_FINISH_BIT (task->checkpoint, dyn_item);
  2855. }
  2856. }
  2857. }
  2858. bool
  2859. rspamd_symcache_set_allowed_settings_ids (struct rspamd_symcache *cache,
  2860. const gchar *symbol,
  2861. const guint32 *ids,
  2862. guint nids)
  2863. {
  2864. struct rspamd_symcache_item *item;
  2865. item = rspamd_symcache_find_filter (cache, symbol, false);
  2866. if (item == NULL) {
  2867. return false;
  2868. }
  2869. if (nids <= G_N_ELEMENTS (item->allowed_ids.st)) {
  2870. /* Use static version */
  2871. memset (&item->allowed_ids, 0, sizeof (item->allowed_ids));
  2872. for (guint i = 0; i < nids; i++) {
  2873. item->allowed_ids.st[i] = ids[i];
  2874. }
  2875. }
  2876. else {
  2877. /* Need to use a separate list */
  2878. item->allowed_ids.dyn.e = -1; /* Flag */
  2879. item->allowed_ids.dyn.n = rspamd_mempool_alloc (cache->static_pool,
  2880. sizeof (guint32) * nids);
  2881. item->allowed_ids.dyn.len = nids;
  2882. item->allowed_ids.dyn.allocated = nids;
  2883. for (guint i = 0; i < nids; i++) {
  2884. item->allowed_ids.dyn.n[i] = ids[i];
  2885. }
  2886. /* Keep sorted */
  2887. qsort (item->allowed_ids.dyn.n, nids, sizeof (guint32), rspamd_id_cmp);
  2888. }
  2889. return true;
  2890. }
  2891. bool
  2892. rspamd_symcache_set_forbidden_settings_ids (struct rspamd_symcache *cache,
  2893. const gchar *symbol,
  2894. const guint32 *ids,
  2895. guint nids)
  2896. {
  2897. struct rspamd_symcache_item *item;
  2898. item = rspamd_symcache_find_filter (cache, symbol, false);
  2899. if (item == NULL) {
  2900. return false;
  2901. }
  2902. g_assert (nids < G_MAXUINT16);
  2903. if (nids <= G_N_ELEMENTS (item->forbidden_ids.st)) {
  2904. /* Use static version */
  2905. memset (&item->forbidden_ids, 0, sizeof (item->forbidden_ids));
  2906. for (guint i = 0; i < nids; i++) {
  2907. item->forbidden_ids.st[i] = ids[i];
  2908. }
  2909. }
  2910. else {
  2911. /* Need to use a separate list */
  2912. item->forbidden_ids.dyn.e = -1; /* Flag */
  2913. item->forbidden_ids.dyn.n = rspamd_mempool_alloc (cache->static_pool,
  2914. sizeof (guint32) * nids);
  2915. item->forbidden_ids.dyn.len = nids;
  2916. item->forbidden_ids.dyn.allocated = nids;
  2917. for (guint i = 0; i < nids; i++) {
  2918. item->forbidden_ids.dyn.n[i] = ids[i];
  2919. }
  2920. /* Keep sorted */
  2921. qsort (item->forbidden_ids.dyn.n, nids, sizeof (guint32), rspamd_id_cmp);
  2922. }
  2923. return true;
  2924. }
  2925. const guint32*
  2926. rspamd_symcache_get_allowed_settings_ids (struct rspamd_symcache *cache,
  2927. const gchar *symbol,
  2928. guint *nids)
  2929. {
  2930. struct rspamd_symcache_item *item;
  2931. guint cnt = 0;
  2932. item = rspamd_symcache_find_filter (cache, symbol, false);
  2933. if (item == NULL) {
  2934. return NULL;
  2935. }
  2936. if (item->allowed_ids.dyn.e == -1) {
  2937. /* Dynamic list */
  2938. *nids = item->allowed_ids.dyn.len;
  2939. return item->allowed_ids.dyn.n;
  2940. }
  2941. else {
  2942. while (item->allowed_ids.st[cnt] != 0 && cnt < G_N_ELEMENTS (item->allowed_ids.st)) {
  2943. cnt ++;
  2944. }
  2945. *nids = cnt;
  2946. return item->allowed_ids.st;
  2947. }
  2948. }
  2949. const guint32*
  2950. rspamd_symcache_get_forbidden_settings_ids (struct rspamd_symcache *cache,
  2951. const gchar *symbol,
  2952. guint *nids)
  2953. {
  2954. struct rspamd_symcache_item *item;
  2955. guint cnt = 0;
  2956. item = rspamd_symcache_find_filter (cache, symbol, false);
  2957. if (item == NULL) {
  2958. return NULL;
  2959. }
  2960. if (item->forbidden_ids.dyn.e == -1) {
  2961. /* Dynamic list */
  2962. *nids = item->allowed_ids.dyn.len;
  2963. return item->allowed_ids.dyn.n;
  2964. }
  2965. else {
  2966. while (item->forbidden_ids.st[cnt] != 0 && cnt < G_N_ELEMENTS (item->allowed_ids.st)) {
  2967. cnt ++;
  2968. }
  2969. *nids = cnt;
  2970. return item->forbidden_ids.st;
  2971. }
  2972. }
  2973. /* Insertion sort: usable for near-sorted ids list */
  2974. static inline void
  2975. rspamd_ids_insertion_sort (guint *a, guint n)
  2976. {
  2977. for (guint i = 1; i < n; i++) {
  2978. guint32 tmp = a[i];
  2979. guint j = i;
  2980. while (j > 0 && tmp < a[j - 1]) {
  2981. a[j] = a[j - 1];
  2982. j --;
  2983. }
  2984. a[j] = tmp;
  2985. }
  2986. }
  2987. static inline void
  2988. rspamd_symcache_add_id_to_list (rspamd_mempool_t *pool,
  2989. struct rspamd_symcache_id_list *ls,
  2990. guint32 id)
  2991. {
  2992. guint cnt = 0;
  2993. guint *new_array;
  2994. if (ls->st[0] == -1) {
  2995. /* Dynamic array */
  2996. if (ls->dyn.len < ls->dyn.allocated) {
  2997. /* Trivial, append + sort */
  2998. ls->dyn.n[ls->dyn.len++] = id;
  2999. }
  3000. else {
  3001. /* Reallocate */
  3002. g_assert (ls->dyn.allocated <= G_MAXINT16);
  3003. ls->dyn.allocated *= 2;
  3004. new_array = rspamd_mempool_alloc (pool,
  3005. ls->dyn.allocated * sizeof (guint32));
  3006. memcpy (new_array, ls->dyn.n, ls->dyn.len * sizeof (guint32));
  3007. ls->dyn.n = new_array;
  3008. ls->dyn.n[ls->dyn.len++] = id;
  3009. }
  3010. rspamd_ids_insertion_sort (ls->dyn.n, ls->dyn.len);
  3011. }
  3012. else {
  3013. /* Static part */
  3014. while (ls->st[cnt] != 0 && cnt < G_N_ELEMENTS (ls->st)) {
  3015. cnt ++;
  3016. }
  3017. if (cnt < G_N_ELEMENTS (ls->st)) {
  3018. ls->st[cnt] = id;
  3019. }
  3020. else {
  3021. /* Switch to dynamic */
  3022. new_array = rspamd_mempool_alloc (pool,
  3023. G_N_ELEMENTS (ls->st) * 2 * sizeof (guint32));
  3024. memcpy (new_array, ls->st, G_N_ELEMENTS (ls->st) * sizeof (guint32));
  3025. ls->dyn.n = new_array;
  3026. ls->dyn.e = -1;
  3027. ls->dyn.allocated = G_N_ELEMENTS (ls->st) * 2;
  3028. ls->dyn.len = G_N_ELEMENTS (ls->st);
  3029. /* Recursively jump to dynamic branch that will handle insertion + sorting */
  3030. rspamd_symcache_add_id_to_list (pool, ls, id);
  3031. }
  3032. }
  3033. }
  3034. void
  3035. rspamd_symcache_process_settings_elt (struct rspamd_symcache *cache,
  3036. struct rspamd_config_settings_elt *elt)
  3037. {
  3038. guint32 id = elt->id;
  3039. ucl_object_iter_t iter;
  3040. struct rspamd_symcache_item *item, *parent;
  3041. const ucl_object_t *cur;
  3042. if (elt->symbols_disabled) {
  3043. /* Process denied symbols */
  3044. iter = NULL;
  3045. while ((cur = ucl_object_iterate (elt->symbols_disabled, &iter, true)) != NULL) {
  3046. const gchar *sym = ucl_object_key (cur);
  3047. item = rspamd_symcache_find_filter (cache, sym, false);
  3048. if (item) {
  3049. if (item->is_virtual) {
  3050. /*
  3051. * Virtual symbols are special:
  3052. * we ignore them in symcache but prevent them from being
  3053. * inserted.
  3054. */
  3055. rspamd_symcache_add_id_to_list (cache->static_pool,
  3056. &item->forbidden_ids, id);
  3057. msg_debug_cache ("deny virtual symbol %s for settings %ud (%s); "
  3058. "parent can still be executed",
  3059. sym, id, elt->name);
  3060. }
  3061. else {
  3062. /* Normal symbol, disable it */
  3063. rspamd_symcache_add_id_to_list (cache->static_pool,
  3064. &item->forbidden_ids, id);
  3065. msg_debug_cache ("deny symbol %s for settings %ud (%s)",
  3066. sym, id, elt->name);
  3067. }
  3068. }
  3069. else {
  3070. msg_warn_cache ("cannot find a symbol to disable %s "
  3071. "when processing settings %ud (%s)",
  3072. sym, id, elt->name);
  3073. }
  3074. }
  3075. }
  3076. if (elt->symbols_enabled) {
  3077. iter = NULL;
  3078. while ((cur = ucl_object_iterate (elt->symbols_enabled, &iter, true)) != NULL) {
  3079. /* Here, we resolve parent and explicitly allow it */
  3080. const gchar *sym = ucl_object_key (cur);
  3081. item = rspamd_symcache_find_filter (cache, sym, false);
  3082. if (item) {
  3083. if (item->is_virtual) {
  3084. if (!(item->type & SYMBOL_TYPE_GHOST)) {
  3085. parent = rspamd_symcache_find_filter (cache, sym, true);
  3086. if (parent) {
  3087. if (elt->symbols_disabled &&
  3088. ucl_object_lookup (elt->symbols_disabled, parent->symbol)) {
  3089. msg_err_cache ("conflict in %s: cannot enable disabled symbol %s, "
  3090. "wanted to enable symbol %s",
  3091. elt->name, parent->symbol, sym);
  3092. continue;
  3093. }
  3094. rspamd_symcache_add_id_to_list (cache->static_pool,
  3095. &parent->exec_only_ids, id);
  3096. msg_debug_cache ("allow just execution of symbol %s for settings %ud (%s)",
  3097. parent->symbol, id, elt->name);
  3098. }
  3099. }
  3100. /* Ignore ghosts */
  3101. }
  3102. rspamd_symcache_add_id_to_list (cache->static_pool,
  3103. &item->allowed_ids, id);
  3104. msg_debug_cache ("allow execution of symbol %s for settings %ud (%s)",
  3105. sym, id, elt->name);
  3106. }
  3107. else {
  3108. msg_warn_cache ("cannot find a symbol to enable %s "
  3109. "when processing settings %ud (%s)",
  3110. sym, id, elt->name);
  3111. }
  3112. }
  3113. }
  3114. }
  3115. gint
  3116. rspamd_symcache_item_flags (struct rspamd_symcache_item *item)
  3117. {
  3118. if (item) {
  3119. return item->type;
  3120. }
  3121. return 0;
  3122. }
  3123. const gchar*
  3124. rspamd_symcache_item_name (struct rspamd_symcache_item *item)
  3125. {
  3126. return item ? item->symbol : NULL;
  3127. }
  3128. const struct rspamd_symcache_item_stat *
  3129. rspamd_symcache_item_stat (struct rspamd_symcache_item *item)
  3130. {
  3131. return item ? item->st : NULL;
  3132. }
  3133. gboolean
  3134. rspamd_symcache_item_is_enabled (struct rspamd_symcache_item *item)
  3135. {
  3136. if (item) {
  3137. if (!item->enabled) {
  3138. return FALSE;
  3139. }
  3140. if (item->is_virtual && item->specific.virtual.parent_item != NULL) {
  3141. return rspamd_symcache_item_is_enabled (item->specific.virtual.parent_item);
  3142. }
  3143. return TRUE;
  3144. }
  3145. return FALSE;
  3146. }
  3147. struct rspamd_symcache_item * rspamd_symcache_item_get_parent (
  3148. struct rspamd_symcache_item *item)
  3149. {
  3150. if (item && item->is_virtual && item->specific.virtual.parent_item != NULL) {
  3151. return item->specific.virtual.parent_item;
  3152. }
  3153. return NULL;
  3154. }
  3155. const GPtrArray*
  3156. rspamd_symcache_item_get_deps (struct rspamd_symcache_item *item)
  3157. {
  3158. struct rspamd_symcache_item *parent;
  3159. if (item) {
  3160. parent = rspamd_symcache_item_get_parent (item);
  3161. if (parent) {
  3162. item = parent;
  3163. }
  3164. return item->deps;
  3165. }
  3166. return NULL;
  3167. }
  3168. const GPtrArray*
  3169. rspamd_symcache_item_get_rdeps (struct rspamd_symcache_item *item)
  3170. {
  3171. struct rspamd_symcache_item *parent;
  3172. if (item) {
  3173. parent = rspamd_symcache_item_get_parent (item);
  3174. if (parent) {
  3175. item = parent;
  3176. }
  3177. return item->rdeps;
  3178. }
  3179. return NULL;
  3180. }
  3181. void
  3182. rspamd_symcache_enable_profile (struct rspamd_task *task)
  3183. {
  3184. struct cache_savepoint *checkpoint = task->checkpoint;
  3185. if (checkpoint && !checkpoint->profile) {
  3186. ev_now_update_if_cheap (task->event_loop);
  3187. ev_tstamp now = ev_now (task->event_loop);
  3188. checkpoint->profile_start = now;
  3189. msg_debug_cache_task ("enable profiling of symbols for task");
  3190. checkpoint->profile = TRUE;
  3191. }
  3192. }