You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_check.c 115KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:fuzzy
  17. * rspamd module that checks fuzzy checksums for messages
  18. *
  19. * Allowed options:
  20. * - symbol (string): symbol to insert (default: 'R_FUZZY')
  21. * - max_score (double): maximum score to that weights of hashes would be normalized (default: 0 - no normalization)
  22. *
  23. * - fuzzy_map (string): a string that contains map in format { fuzzy_key => [ symbol, weight ] } where fuzzy_key is number of
  24. * fuzzy list. This string itself should be in format 1:R_FUZZY_SAMPLE1:10,2:R_FUZZY_SAMPLE2:1 etc, where first number is fuzzy
  25. * key, second is symbol to insert and third - weight for normalization
  26. *
  27. * - min_length (integer): minimum length (in characters) for text part to be checked for fuzzy hash (default: 0 - no limit)
  28. * - whitelist (map string): map of ip addresses that should not be checked with this module
  29. * - servers (string): list of fuzzy servers in format "server1:port,server2:port" - these servers would be used for checking and storing
  30. * fuzzy hashes
  31. */
  32. #include "config.h"
  33. #include "libmime/message.h"
  34. #include "libserver/maps/map.h"
  35. #include "libserver/maps/map_helpers.h"
  36. #include "libmime/images.h"
  37. #include "libserver/worker_util.h"
  38. #include "libserver/mempool_vars_internal.h"
  39. #include "fuzzy_wire.h"
  40. #include "utlist.h"
  41. #include "ottery.h"
  42. #include "lua/lua_common.h"
  43. #include "unix-std.h"
  44. #include "libserver/http/http_private.h"
  45. #include "libserver/http/http_router.h"
  46. #include "libstat/stat_api.h"
  47. #include <math.h>
  48. #include "libutil/libev_helper.h"
  49. #define DEFAULT_SYMBOL "R_FUZZY_HASH"
  50. #define DEFAULT_IO_TIMEOUT 1.0
  51. #define DEFAULT_RETRANSMITS 3
  52. #define DEFAULT_MAX_ERRORS 4
  53. #define DEFAULT_REVIVE_TIME 60
  54. #define DEFAULT_PORT 11335
  55. #define RSPAMD_FUZZY_PLUGIN_VERSION RSPAMD_FUZZY_VERSION
  56. static const gint rspamd_fuzzy_hash_len = 5;
  57. static const gchar *M = "fuzzy check";
  58. struct fuzzy_ctx;
  59. struct fuzzy_mapping {
  60. guint64 fuzzy_flag;
  61. const gchar *symbol;
  62. double weight;
  63. };
  64. struct fuzzy_rule {
  65. struct upstream_list *servers;
  66. const gchar *symbol;
  67. const gchar *algorithm_str;
  68. const gchar *name;
  69. const ucl_object_t *ucl_obj;
  70. enum rspamd_shingle_alg alg;
  71. GHashTable *mappings;
  72. GPtrArray *fuzzy_headers;
  73. GString *hash_key;
  74. GString *shingles_key;
  75. gdouble io_timeout;
  76. struct rspamd_cryptobox_keypair *local_key;
  77. struct rspamd_cryptobox_pubkey *peer_key;
  78. double max_score;
  79. double weight_threshold;
  80. gboolean read_only;
  81. gboolean skip_unknown;
  82. gboolean no_share;
  83. gboolean no_subject;
  84. gint learn_condition_cb;
  85. guint32 retransmits;
  86. struct rspamd_hash_map_helper *skip_map;
  87. struct fuzzy_ctx *ctx;
  88. gint lua_id;
  89. };
  90. struct fuzzy_ctx {
  91. struct module_ctx ctx;
  92. rspamd_mempool_t *fuzzy_pool;
  93. GPtrArray *fuzzy_rules;
  94. struct rspamd_config *cfg;
  95. const gchar *default_symbol;
  96. struct rspamd_radix_map_helper *whitelist;
  97. struct rspamd_keypair_cache *keypairs_cache;
  98. guint max_errors;
  99. gdouble revive_time;
  100. gdouble io_timeout;
  101. gint check_mime_part_ref; /* Lua callback */
  102. gint process_rule_ref; /* Lua callback */
  103. gint cleanup_rules_ref;
  104. guint32 retransmits;
  105. gboolean enabled;
  106. };
  107. enum fuzzy_result_type {
  108. FUZZY_RESULT_TXT,
  109. FUZZY_RESULT_IMG,
  110. FUZZY_RESULT_CONTENT,
  111. FUZZY_RESULT_BIN
  112. };
  113. struct fuzzy_client_result {
  114. const gchar *symbol;
  115. gchar *option;
  116. gdouble score;
  117. gdouble prob;
  118. enum fuzzy_result_type type;
  119. };
  120. struct fuzzy_client_session {
  121. GPtrArray *commands;
  122. GPtrArray *results;
  123. struct rspamd_task *task;
  124. struct rspamd_symcache_dynamic_item *item;
  125. struct upstream *server;
  126. struct fuzzy_rule *rule;
  127. struct ev_loop *event_loop;
  128. struct rspamd_io_ev ev;
  129. gint state;
  130. gint fd;
  131. guint retransmits;
  132. };
  133. struct fuzzy_learn_session {
  134. GPtrArray *commands;
  135. gint *saved;
  136. struct {
  137. const gchar *error_message;
  138. gint error_code;
  139. } err;
  140. struct rspamd_http_connection_entry *http_entry;
  141. struct rspamd_async_session *session;
  142. struct upstream *server;
  143. struct fuzzy_rule *rule;
  144. struct rspamd_task *task;
  145. struct ev_loop *event_loop;
  146. struct rspamd_io_ev ev;
  147. gint fd;
  148. guint retransmits;
  149. };
  150. #define FUZZY_CMD_FLAG_REPLIED (1 << 0)
  151. #define FUZZY_CMD_FLAG_SENT (1 << 1)
  152. #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
  153. #define FUZZY_CMD_FLAG_CONTENT (1 << 3)
  154. #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
  155. #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
  156. #define FUZZY_CHECK_FLAG_NOTEXT (1 << 2)
  157. struct fuzzy_cmd_io {
  158. guint32 tag;
  159. guint32 flags;
  160. struct iovec io;
  161. struct rspamd_mime_part *part;
  162. struct rspamd_fuzzy_cmd cmd;
  163. };
  164. static const char *default_headers = "Subject,Content-Type,Reply-To,X-Mailer";
  165. static void fuzzy_symbol_callback(struct rspamd_task *task,
  166. struct rspamd_symcache_dynamic_item *item,
  167. void *unused);
  168. /* Initialization */
  169. gint fuzzy_check_module_init(struct rspamd_config *cfg,
  170. struct module_ctx **ctx);
  171. gint fuzzy_check_module_config(struct rspamd_config *cfg, bool valdate);
  172. gint fuzzy_check_module_reconfig(struct rspamd_config *cfg);
  173. static gint fuzzy_attach_controller(struct module_ctx *ctx,
  174. GHashTable *commands);
  175. static gint fuzzy_lua_learn_handler(lua_State *L);
  176. static gint fuzzy_lua_unlearn_handler(lua_State *L);
  177. static gint fuzzy_lua_gen_hashes_handler(lua_State *L);
  178. static gint fuzzy_lua_hex_hashes_handler(lua_State *L);
  179. static gint fuzzy_lua_list_storages(lua_State *L);
  180. static gint fuzzy_lua_ping_storage(lua_State *L);
  181. module_t fuzzy_check_module = {
  182. "fuzzy_check",
  183. fuzzy_check_module_init,
  184. fuzzy_check_module_config,
  185. fuzzy_check_module_reconfig,
  186. fuzzy_attach_controller,
  187. RSPAMD_MODULE_VER,
  188. (guint) -1,
  189. };
  190. static inline struct fuzzy_ctx *
  191. fuzzy_get_context(struct rspamd_config *cfg)
  192. {
  193. return (struct fuzzy_ctx *) g_ptr_array_index(cfg->c_modules,
  194. fuzzy_check_module.ctx_offset);
  195. }
  196. static void
  197. parse_flags(struct fuzzy_rule *rule,
  198. struct rspamd_config *cfg,
  199. const ucl_object_t *val,
  200. gint cb_id)
  201. {
  202. const ucl_object_t *elt;
  203. struct fuzzy_mapping *map;
  204. const gchar *sym = NULL;
  205. if (val->type == UCL_STRING) {
  206. msg_err_config(
  207. "string mappings are deprecated and no longer supported, use new style configuration");
  208. }
  209. else if (val->type == UCL_OBJECT) {
  210. elt = ucl_object_lookup(val, "symbol");
  211. if (elt == NULL || !ucl_object_tostring_safe(elt, &sym)) {
  212. sym = ucl_object_key(val);
  213. }
  214. if (sym != NULL) {
  215. map =
  216. rspamd_mempool_alloc(cfg->cfg_pool,
  217. sizeof(struct fuzzy_mapping));
  218. map->symbol = sym;
  219. elt = ucl_object_lookup(val, "flag");
  220. if (elt != NULL) {
  221. map->fuzzy_flag = ucl_obj_toint(elt);
  222. elt = ucl_object_lookup(val, "max_score");
  223. if (elt != NULL) {
  224. map->weight = ucl_obj_todouble(elt);
  225. }
  226. else {
  227. map->weight = rule->max_score;
  228. }
  229. /* Add flag to hash table */
  230. g_hash_table_insert(rule->mappings,
  231. GINT_TO_POINTER(map->fuzzy_flag), map);
  232. rspamd_symcache_add_symbol(cfg->cache,
  233. map->symbol, 0,
  234. NULL, NULL,
  235. SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE,
  236. cb_id);
  237. }
  238. else {
  239. msg_err_config("fuzzy_map parameter has no flag definition");
  240. }
  241. }
  242. else {
  243. msg_err_config("fuzzy_map parameter has no symbol definition");
  244. }
  245. }
  246. else {
  247. msg_err_config("fuzzy_map parameter is of an unsupported type");
  248. }
  249. }
  250. static GPtrArray *
  251. parse_fuzzy_headers(struct rspamd_config *cfg, const gchar *str)
  252. {
  253. gchar **strvec;
  254. gint num, i;
  255. GPtrArray *res;
  256. strvec = g_strsplit_set(str, ",", 0);
  257. num = g_strv_length(strvec);
  258. res = g_ptr_array_sized_new(num);
  259. for (i = 0; i < num; i++) {
  260. g_strstrip(strvec[i]);
  261. g_ptr_array_add(res, rspamd_mempool_strdup(
  262. cfg->cfg_pool, strvec[i]));
  263. }
  264. g_strfreev(strvec);
  265. return res;
  266. }
  267. static double
  268. fuzzy_normalize(gint32 in, double weight)
  269. {
  270. if (weight == 0) {
  271. return 0;
  272. }
  273. #ifdef HAVE_TANH
  274. return tanh(G_E * (double) in / weight);
  275. #else
  276. return (in < weight ? in / weight : weight);
  277. #endif
  278. }
  279. static struct fuzzy_rule *
  280. fuzzy_rule_new(const char *default_symbol, rspamd_mempool_t *pool)
  281. {
  282. struct fuzzy_rule *rule;
  283. rule = rspamd_mempool_alloc0(pool, sizeof(struct fuzzy_rule));
  284. rule->mappings = g_hash_table_new(g_direct_hash, g_direct_equal);
  285. rule->symbol = default_symbol;
  286. rspamd_mempool_add_destructor(pool,
  287. (rspamd_mempool_destruct_t) g_hash_table_unref,
  288. rule->mappings);
  289. rule->read_only = FALSE;
  290. rule->weight_threshold = NAN;
  291. return rule;
  292. }
  293. static void
  294. fuzzy_free_rule(gpointer r)
  295. {
  296. struct fuzzy_rule *rule = (struct fuzzy_rule *) r;
  297. g_string_free(rule->hash_key, TRUE);
  298. g_string_free(rule->shingles_key, TRUE);
  299. if (rule->local_key) {
  300. rspamd_keypair_unref(rule->local_key);
  301. }
  302. if (rule->peer_key) {
  303. rspamd_pubkey_unref(rule->peer_key);
  304. }
  305. }
  306. static gint
  307. fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj,
  308. const gchar *name, gint cb_id)
  309. {
  310. const ucl_object_t *value, *cur;
  311. struct fuzzy_rule *rule;
  312. ucl_object_iter_t it = NULL;
  313. const char *k = NULL, *key_str = NULL, *shingles_key_str = NULL, *lua_script;
  314. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(cfg);
  315. if (obj->type != UCL_OBJECT) {
  316. msg_err_config("invalid rule definition");
  317. return -1;
  318. }
  319. if ((value = ucl_object_lookup_any(obj, "enabled", "enable", NULL)) != NULL) {
  320. if (!ucl_object_toboolean(value)) {
  321. msg_info_config("fuzzy rule %s is disabled by configuration", name);
  322. return 0;
  323. }
  324. }
  325. rule = fuzzy_rule_new(fuzzy_module_ctx->default_symbol,
  326. cfg->cfg_pool);
  327. rule->ucl_obj = obj;
  328. rule->ctx = fuzzy_module_ctx;
  329. rule->learn_condition_cb = -1;
  330. rule->alg = RSPAMD_SHINGLES_OLD;
  331. rule->skip_map = NULL;
  332. if ((value = ucl_object_lookup(obj, "skip_hashes")) != NULL) {
  333. rspamd_map_add_from_ucl(cfg, value,
  334. "Fuzzy hashes whitelist",
  335. rspamd_kv_list_read,
  336. rspamd_kv_list_fin,
  337. rspamd_kv_list_dtor,
  338. (void **) &rule->skip_map,
  339. NULL, RSPAMD_MAP_DEFAULT);
  340. }
  341. if ((value = ucl_object_lookup(obj, "headers")) != NULL) {
  342. it = NULL;
  343. while ((cur = ucl_object_iterate(value, &it, value->type == UCL_ARRAY)) != NULL) {
  344. GPtrArray *tmp;
  345. guint i;
  346. gpointer ptr;
  347. tmp = parse_fuzzy_headers(cfg, ucl_obj_tostring(cur));
  348. if (tmp) {
  349. if (rule->fuzzy_headers) {
  350. PTR_ARRAY_FOREACH(tmp, i, ptr)
  351. {
  352. g_ptr_array_add(rule->fuzzy_headers, ptr);
  353. }
  354. g_ptr_array_free(tmp, TRUE);
  355. }
  356. else {
  357. rule->fuzzy_headers = tmp;
  358. }
  359. }
  360. }
  361. }
  362. else {
  363. rule->fuzzy_headers = parse_fuzzy_headers(cfg, default_headers);
  364. }
  365. if (rule->fuzzy_headers != NULL) {
  366. rspamd_mempool_add_destructor(cfg->cfg_pool,
  367. (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
  368. rule->fuzzy_headers);
  369. }
  370. if ((value = ucl_object_lookup(obj, "max_score")) != NULL) {
  371. rule->max_score = ucl_obj_todouble(value);
  372. }
  373. if ((value = ucl_object_lookup(obj, "retransmits")) != NULL) {
  374. rule->retransmits = ucl_obj_toint(value);
  375. }
  376. else {
  377. rule->retransmits = fuzzy_module_ctx->retransmits;
  378. }
  379. if ((value = ucl_object_lookup(obj, "timeout")) != NULL) {
  380. rule->io_timeout = ucl_obj_todouble(value);
  381. }
  382. else {
  383. rule->io_timeout = fuzzy_module_ctx->io_timeout;
  384. }
  385. if ((value = ucl_object_lookup(obj, "symbol")) != NULL) {
  386. rule->symbol = ucl_obj_tostring(value);
  387. }
  388. if (name) {
  389. rule->name = name;
  390. }
  391. else {
  392. rule->name = rule->symbol;
  393. }
  394. if ((value = ucl_object_lookup(obj, "read_only")) != NULL) {
  395. rule->read_only = ucl_obj_toboolean(value);
  396. }
  397. if ((value = ucl_object_lookup(obj, "skip_unknown")) != NULL) {
  398. rule->skip_unknown = ucl_obj_toboolean(value);
  399. }
  400. if ((value = ucl_object_lookup(obj, "no_share")) != NULL) {
  401. rule->no_share = ucl_obj_toboolean(value);
  402. }
  403. if ((value = ucl_object_lookup(obj, "no_subject")) != NULL) {
  404. rule->no_subject = ucl_obj_toboolean(value);
  405. }
  406. if ((value = ucl_object_lookup(obj, "algorithm")) != NULL) {
  407. rule->algorithm_str = ucl_object_tostring(value);
  408. if (rule->algorithm_str) {
  409. if (g_ascii_strcasecmp(rule->algorithm_str, "old") == 0 ||
  410. g_ascii_strcasecmp(rule->algorithm_str, "siphash") == 0) {
  411. rule->alg = RSPAMD_SHINGLES_OLD;
  412. }
  413. else if (g_ascii_strcasecmp(rule->algorithm_str, "xxhash") == 0) {
  414. rule->alg = RSPAMD_SHINGLES_XXHASH;
  415. }
  416. else if (g_ascii_strcasecmp(rule->algorithm_str, "mumhash") == 0) {
  417. rule->alg = RSPAMD_SHINGLES_MUMHASH;
  418. }
  419. else if (g_ascii_strcasecmp(rule->algorithm_str, "fasthash") == 0 ||
  420. g_ascii_strcasecmp(rule->algorithm_str, "fast") == 0) {
  421. rule->alg = RSPAMD_SHINGLES_FAST;
  422. }
  423. else {
  424. msg_warn_config("unknown algorithm: %s, use siphash by default",
  425. rule->algorithm_str);
  426. }
  427. }
  428. }
  429. /* Set a consistent and short string name */
  430. switch (rule->alg) {
  431. case RSPAMD_SHINGLES_OLD:
  432. rule->algorithm_str = "sip";
  433. break;
  434. case RSPAMD_SHINGLES_XXHASH:
  435. rule->algorithm_str = "xx";
  436. break;
  437. case RSPAMD_SHINGLES_MUMHASH:
  438. rule->algorithm_str = "mum";
  439. break;
  440. case RSPAMD_SHINGLES_FAST:
  441. rule->algorithm_str = "fast";
  442. break;
  443. }
  444. if ((value = ucl_object_lookup(obj, "servers")) != NULL) {
  445. rule->servers = rspamd_upstreams_create(cfg->ups_ctx);
  446. /* pass max_error and revive_time configuration in upstream for fuzzy storage
  447. * it allows to configure error_rate threshold and upstream dead timer
  448. */
  449. rspamd_upstreams_set_limits(rule->servers,
  450. (gdouble) fuzzy_module_ctx->revive_time, NAN, NAN, NAN,
  451. (guint) fuzzy_module_ctx->max_errors, 0);
  452. rspamd_mempool_add_destructor(cfg->cfg_pool,
  453. (rspamd_mempool_destruct_t) rspamd_upstreams_destroy,
  454. rule->servers);
  455. if (!rspamd_upstreams_from_ucl(rule->servers, value, DEFAULT_PORT, NULL)) {
  456. msg_err_config("cannot read servers definition");
  457. return -1;
  458. }
  459. }
  460. if ((value = ucl_object_lookup(obj, "fuzzy_map")) != NULL) {
  461. it = NULL;
  462. while ((cur = ucl_object_iterate(value, &it, true)) != NULL) {
  463. parse_flags(rule, cfg, cur, cb_id);
  464. }
  465. }
  466. if ((value = ucl_object_lookup(obj, "encryption_key")) != NULL) {
  467. /* Create key from user's input */
  468. k = ucl_object_tostring(value);
  469. if (k == NULL || (rule->peer_key =
  470. rspamd_pubkey_from_base32(k, 0, RSPAMD_KEYPAIR_KEX,
  471. RSPAMD_CRYPTOBOX_MODE_25519)) == NULL) {
  472. msg_err_config("bad encryption key value: %s",
  473. k);
  474. return -1;
  475. }
  476. rule->local_key = rspamd_keypair_new(RSPAMD_KEYPAIR_KEX,
  477. RSPAMD_CRYPTOBOX_MODE_25519);
  478. }
  479. if ((value = ucl_object_lookup(obj, "learn_condition")) != NULL) {
  480. lua_script = ucl_object_tostring(value);
  481. if (lua_script) {
  482. if (luaL_dostring(cfg->lua_state, lua_script) != 0) {
  483. msg_err_config("cannot execute lua script for fuzzy "
  484. "learn condition: %s",
  485. lua_tostring(cfg->lua_state, -1));
  486. }
  487. else {
  488. if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) {
  489. rule->learn_condition_cb = luaL_ref(cfg->lua_state,
  490. LUA_REGISTRYINDEX);
  491. msg_info_config("loaded learn condition script for fuzzy rule:"
  492. " %s",
  493. rule->name);
  494. }
  495. else {
  496. msg_err_config("lua script must return "
  497. "function(task) and not %s",
  498. lua_typename(cfg->lua_state,
  499. lua_type(cfg->lua_state, -1)));
  500. }
  501. }
  502. }
  503. }
  504. key_str = NULL;
  505. if ((value = ucl_object_lookup(obj, "fuzzy_key")) != NULL) {
  506. /* Create key from user's input */
  507. key_str = ucl_object_tostring(value);
  508. }
  509. /* Setup keys */
  510. if (key_str == NULL) {
  511. /* Use some default key for all ops */
  512. key_str = "rspamd";
  513. }
  514. rule->hash_key = g_string_sized_new(rspamd_cryptobox_HASHBYTES);
  515. rspamd_cryptobox_hash(rule->hash_key->str, key_str, strlen(key_str), NULL, 0);
  516. rule->hash_key->len = rspamd_cryptobox_HASHKEYBYTES;
  517. shingles_key_str = NULL;
  518. if ((value = ucl_object_lookup(obj, "fuzzy_shingles_key")) != NULL) {
  519. shingles_key_str = ucl_object_tostring(value);
  520. }
  521. if (shingles_key_str == NULL) {
  522. shingles_key_str = "rspamd";
  523. }
  524. rule->shingles_key = g_string_sized_new(rspamd_cryptobox_HASHBYTES);
  525. rspamd_cryptobox_hash(rule->shingles_key->str, shingles_key_str,
  526. strlen(shingles_key_str), NULL, 0);
  527. rule->shingles_key->len = 16;
  528. if (rspamd_upstreams_count(rule->servers) == 0) {
  529. msg_err_config("no servers defined for fuzzy rule with name: %s",
  530. rule->name);
  531. return -1;
  532. }
  533. else {
  534. g_ptr_array_add(fuzzy_module_ctx->fuzzy_rules, rule);
  535. if (rule->symbol != fuzzy_module_ctx->default_symbol) {
  536. int vid = rspamd_symcache_add_symbol(cfg->cache, rule->symbol,
  537. 0,
  538. NULL, NULL,
  539. SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE,
  540. cb_id);
  541. if (rule->io_timeout > 0) {
  542. char timeout_buf[32];
  543. rspamd_snprintf(timeout_buf, sizeof(timeout_buf), "%f",
  544. rule->io_timeout);
  545. rspamd_symcache_add_symbol_augmentation(cfg->cache,
  546. vid, "timeout",
  547. timeout_buf);
  548. }
  549. }
  550. msg_info_config("added fuzzy rule %s, key: %*xs, "
  551. "shingles_key: %*xs, algorithm: %s",
  552. rule->symbol,
  553. 6, rule->hash_key->str,
  554. 6, rule->shingles_key->str,
  555. rule->algorithm_str);
  556. }
  557. if ((value = ucl_object_lookup(obj, "weight_threshold")) != NULL) {
  558. rule->weight_threshold = ucl_object_todouble(value);
  559. }
  560. /*
  561. * Process rule in Lua
  562. */
  563. gint err_idx, ret;
  564. lua_State *L = (lua_State *) cfg->lua_state;
  565. lua_pushcfunction(L, &rspamd_lua_traceback);
  566. err_idx = lua_gettop(L);
  567. lua_rawgeti(L, LUA_REGISTRYINDEX, fuzzy_module_ctx->process_rule_ref);
  568. ucl_object_push_lua(L, obj, true);
  569. if ((ret = lua_pcall(L, 1, 1, err_idx)) != 0) {
  570. msg_err_config("call to process_rule lua "
  571. "script failed (%d): %s",
  572. ret, lua_tostring(L, -1));
  573. rule->lua_id = -1;
  574. }
  575. else {
  576. rule->lua_id = lua_tonumber(L, -1);
  577. }
  578. lua_settop(L, err_idx - 1);
  579. rspamd_mempool_add_destructor(cfg->cfg_pool, fuzzy_free_rule,
  580. rule);
  581. return 0;
  582. }
  583. gint fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx)
  584. {
  585. struct fuzzy_ctx *fuzzy_module_ctx;
  586. fuzzy_module_ctx = rspamd_mempool_alloc0(cfg->cfg_pool,
  587. sizeof(struct fuzzy_ctx));
  588. fuzzy_module_ctx->fuzzy_pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
  589. NULL, 0);
  590. /* TODO: this should match rules count actually */
  591. fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new(32);
  592. fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new();
  593. fuzzy_module_ctx->cfg = cfg;
  594. fuzzy_module_ctx->process_rule_ref = -1;
  595. fuzzy_module_ctx->check_mime_part_ref = -1;
  596. fuzzy_module_ctx->cleanup_rules_ref = -1;
  597. rspamd_mempool_add_destructor(cfg->cfg_pool,
  598. (rspamd_mempool_destruct_t) rspamd_mempool_delete,
  599. fuzzy_module_ctx->fuzzy_pool);
  600. rspamd_mempool_add_destructor(cfg->cfg_pool,
  601. (rspamd_mempool_destruct_t) rspamd_keypair_cache_destroy,
  602. fuzzy_module_ctx->keypairs_cache);
  603. rspamd_mempool_add_destructor(cfg->cfg_pool,
  604. (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
  605. fuzzy_module_ctx->fuzzy_rules);
  606. *ctx = (struct module_ctx *) fuzzy_module_ctx;
  607. rspamd_rcl_add_doc_by_path(cfg,
  608. NULL,
  609. "Fuzzy check plugin",
  610. "fuzzy_check",
  611. UCL_OBJECT,
  612. NULL,
  613. 0,
  614. NULL,
  615. 0);
  616. rspamd_rcl_add_doc_by_path(cfg,
  617. "fuzzy_check",
  618. "Default symbol",
  619. "symbol",
  620. UCL_STRING,
  621. NULL,
  622. 0,
  623. NULL,
  624. 0);
  625. rspamd_rcl_add_doc_by_path(cfg,
  626. "fuzzy_check",
  627. "Minimum number of *words* to check a text part",
  628. "min_length",
  629. UCL_INT,
  630. NULL,
  631. 0,
  632. NULL,
  633. 0);
  634. rspamd_rcl_add_doc_by_path(cfg,
  635. "fuzzy_check",
  636. "Minimum number of *bytes* to check a non-text part",
  637. "min_bytes",
  638. UCL_INT,
  639. NULL,
  640. 0,
  641. NULL,
  642. 0);
  643. rspamd_rcl_add_doc_by_path(cfg,
  644. "fuzzy_check",
  645. "Multiplier for bytes limit when checking for text parts",
  646. "text_multiplier",
  647. UCL_FLOAT,
  648. NULL,
  649. 0,
  650. NULL,
  651. 0);
  652. rspamd_rcl_add_doc_by_path(cfg,
  653. "fuzzy_check",
  654. "Minimum height in pixels for embedded images to check using fuzzy storage",
  655. "min_height",
  656. UCL_INT,
  657. NULL,
  658. 0,
  659. NULL,
  660. 0);
  661. rspamd_rcl_add_doc_by_path(cfg,
  662. "fuzzy_check",
  663. "Minimum width in pixels for embedded images to check using fuzzy storage",
  664. "min_width",
  665. UCL_INT,
  666. NULL,
  667. 0,
  668. NULL,
  669. 0);
  670. rspamd_rcl_add_doc_by_path(cfg,
  671. "fuzzy_check",
  672. "Timeout for waiting reply from a fuzzy server",
  673. "timeout",
  674. UCL_TIME,
  675. NULL,
  676. 0,
  677. NULL,
  678. 0);
  679. rspamd_rcl_add_doc_by_path(cfg,
  680. "fuzzy_check",
  681. "Maximum number of retransmits for a single request",
  682. "retransmits",
  683. UCL_INT,
  684. NULL,
  685. 0,
  686. NULL,
  687. 0);
  688. rspamd_rcl_add_doc_by_path(cfg,
  689. "fuzzy_check",
  690. "Maximum number of upstream errors, affects error rate threshold",
  691. "max_errors",
  692. UCL_INT,
  693. NULL,
  694. 0,
  695. NULL,
  696. 0);
  697. rspamd_rcl_add_doc_by_path(cfg,
  698. "fuzzy_check",
  699. "Time to lapse before re-resolve faulty upstream",
  700. "revive_time",
  701. UCL_FLOAT,
  702. NULL,
  703. 0,
  704. NULL,
  705. 0);
  706. rspamd_rcl_add_doc_by_path(cfg,
  707. "fuzzy_check",
  708. "Whitelisted IPs map",
  709. "whitelist",
  710. UCL_STRING,
  711. NULL,
  712. 0,
  713. NULL,
  714. 0);
  715. /* Rules doc strings */
  716. rspamd_rcl_add_doc_by_path(cfg,
  717. "fuzzy_check",
  718. "Fuzzy check rule",
  719. "rule",
  720. UCL_OBJECT,
  721. NULL,
  722. 0,
  723. NULL,
  724. 0);
  725. rspamd_rcl_add_doc_by_path(cfg,
  726. "fuzzy_check.rule",
  727. "Headers that are used to make a separate hash",
  728. "headers",
  729. UCL_ARRAY,
  730. NULL,
  731. 0,
  732. NULL,
  733. 0);
  734. rspamd_rcl_add_doc_by_path(cfg,
  735. "fuzzy_check.rule",
  736. "Whitelisted hashes map",
  737. "skip_hashes",
  738. UCL_STRING,
  739. NULL,
  740. 0,
  741. NULL,
  742. 0);
  743. rspamd_rcl_add_doc_by_path(cfg,
  744. "fuzzy_check.rule",
  745. "Set of mime types (in form type/subtype, or type/*, or *) to check with fuzzy",
  746. "mime_types",
  747. UCL_ARRAY,
  748. NULL,
  749. 0,
  750. NULL,
  751. 0);
  752. rspamd_rcl_add_doc_by_path(cfg,
  753. "fuzzy_check.rule",
  754. "Maximum value for fuzzy hash when weight of symbol is exactly 1.0 (if value is higher then score is still 1.0)",
  755. "max_score",
  756. UCL_INT,
  757. NULL,
  758. 0,
  759. NULL,
  760. 0);
  761. rspamd_rcl_add_doc_by_path(cfg,
  762. "fuzzy_check.rule",
  763. "List of servers to check (or learn)",
  764. "servers",
  765. UCL_STRING,
  766. NULL,
  767. 0,
  768. NULL,
  769. 0);
  770. rspamd_rcl_add_doc_by_path(cfg,
  771. "fuzzy_check.rule",
  772. "If true then never try to learn this fuzzy storage",
  773. "read_only",
  774. UCL_BOOLEAN,
  775. NULL,
  776. 0,
  777. NULL,
  778. 0);
  779. rspamd_rcl_add_doc_by_path(cfg,
  780. "fuzzy_check.rule",
  781. "If true then ignore unknown flags and not add the default fuzzy symbol",
  782. "skip_unknown",
  783. UCL_BOOLEAN,
  784. NULL,
  785. 0,
  786. NULL,
  787. 0);
  788. rspamd_rcl_add_doc_by_path(cfg,
  789. "fuzzy_check.rule",
  790. "Default symbol for rule (if no flags defined or matched)",
  791. "symbol",
  792. UCL_STRING,
  793. NULL,
  794. 0,
  795. NULL,
  796. 0);
  797. rspamd_rcl_add_doc_by_path(cfg,
  798. "fuzzy_check.rule",
  799. "Base32 value for the protocol encryption public key",
  800. "encryption_key",
  801. UCL_STRING,
  802. NULL,
  803. 0,
  804. NULL,
  805. 0);
  806. rspamd_rcl_add_doc_by_path(cfg,
  807. "fuzzy_check.rule",
  808. "Base32 value for the hashing key (for private storages)",
  809. "fuzzy_key",
  810. UCL_STRING,
  811. NULL,
  812. 0,
  813. NULL,
  814. 0);
  815. rspamd_rcl_add_doc_by_path(cfg,
  816. "fuzzy_check.rule",
  817. "Base32 value for the shingles hashing key (for private storages)",
  818. "fuzzy_shingles_key",
  819. UCL_STRING,
  820. NULL,
  821. 0,
  822. NULL,
  823. 0);
  824. rspamd_rcl_add_doc_by_path(cfg,
  825. "fuzzy_check.rule",
  826. "Lua script that returns boolean function to check if this task "
  827. "should be considered when learning fuzzy storage",
  828. "learn_condition",
  829. UCL_STRING,
  830. NULL,
  831. 0,
  832. NULL,
  833. 0);
  834. rspamd_rcl_add_doc_by_path(cfg,
  835. "fuzzy_check.rule",
  836. "Map of SYMBOL -> data for flags configuration",
  837. "fuzzy_map",
  838. UCL_OBJECT,
  839. NULL,
  840. 0,
  841. NULL,
  842. 0);
  843. rspamd_rcl_add_doc_by_path(cfg,
  844. "fuzzy_check.rule",
  845. "Use direct hash for short texts",
  846. "short_text_direct_hash",
  847. UCL_BOOLEAN,
  848. NULL,
  849. 0,
  850. "true",
  851. 0);
  852. rspamd_rcl_add_doc_by_path(cfg,
  853. "fuzzy_check.rule",
  854. "Override module default min bytes for this rule",
  855. "min_bytes",
  856. UCL_INT,
  857. NULL,
  858. 0,
  859. NULL,
  860. 0);
  861. /* Fuzzy map doc strings */
  862. rspamd_rcl_add_doc_by_path(cfg,
  863. "fuzzy_check.rule.fuzzy_map",
  864. "Maximum score for this flag",
  865. "max_score",
  866. UCL_INT,
  867. NULL,
  868. 0,
  869. NULL,
  870. 0);
  871. rspamd_rcl_add_doc_by_path(cfg,
  872. "fuzzy_check.rule.fuzzy_map",
  873. "Flag number",
  874. "flag",
  875. UCL_INT,
  876. NULL,
  877. 0,
  878. NULL,
  879. 0);
  880. rspamd_rcl_add_doc_by_path(cfg,
  881. "fuzzy_check.rule",
  882. "Do no use subject to distinguish short text hashes",
  883. "no_subject",
  884. UCL_BOOLEAN,
  885. NULL,
  886. 0,
  887. "false",
  888. 0);
  889. rspamd_rcl_add_doc_by_path(cfg,
  890. "fuzzy_check.rule",
  891. "Disable sharing message stats with the fuzzy server",
  892. "no_share",
  893. UCL_BOOLEAN,
  894. NULL,
  895. 0,
  896. "false",
  897. 0);
  898. return 0;
  899. }
  900. gint fuzzy_check_module_config(struct rspamd_config *cfg, bool validate)
  901. {
  902. const ucl_object_t *value, *cur, *elt;
  903. ucl_object_iter_t it;
  904. gint res = TRUE, cb_id, nrules = 0;
  905. lua_State *L = cfg->lua_state;
  906. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(cfg);
  907. if (!rspamd_config_is_module_enabled(cfg, "fuzzy_check")) {
  908. return TRUE;
  909. }
  910. fuzzy_module_ctx->enabled = TRUE;
  911. fuzzy_module_ctx->check_mime_part_ref = -1;
  912. fuzzy_module_ctx->process_rule_ref = -1;
  913. fuzzy_module_ctx->cleanup_rules_ref = -1;
  914. /* Interact with lua_fuzzy */
  915. if (luaL_dostring(L, "return require \"lua_fuzzy\"") != 0) {
  916. msg_err_config("cannot require lua_fuzzy: %s",
  917. lua_tostring(L, -1));
  918. fuzzy_module_ctx->enabled = FALSE;
  919. }
  920. else {
  921. #if LUA_VERSION_NUM >= 504
  922. lua_settop(L, -2);
  923. #endif
  924. if (lua_type(L, -1) != LUA_TTABLE) {
  925. msg_err_config("lua fuzzy must return "
  926. "table and not %s",
  927. lua_typename(L, lua_type(L, -1)));
  928. fuzzy_module_ctx->enabled = FALSE;
  929. }
  930. else {
  931. lua_pushstring(L, "process_rule");
  932. lua_gettable(L, -2);
  933. if (lua_type(L, -1) != LUA_TFUNCTION) {
  934. msg_err_config("process_rule must return "
  935. "function and not %s",
  936. lua_typename(L, lua_type(L, -1)));
  937. fuzzy_module_ctx->enabled = FALSE;
  938. }
  939. else {
  940. fuzzy_module_ctx->process_rule_ref = luaL_ref(L, LUA_REGISTRYINDEX);
  941. }
  942. lua_pushstring(L, "check_mime_part");
  943. lua_gettable(L, -2);
  944. if (lua_type(L, -1) != LUA_TFUNCTION) {
  945. msg_err_config("check_mime_part must return "
  946. "function and not %s",
  947. lua_typename(L, lua_type(L, -1)));
  948. fuzzy_module_ctx->enabled = FALSE;
  949. }
  950. else {
  951. fuzzy_module_ctx->check_mime_part_ref = luaL_ref(L, LUA_REGISTRYINDEX);
  952. }
  953. lua_pushstring(L, "cleanup_rules");
  954. lua_gettable(L, -2);
  955. if (lua_type(L, -1) != LUA_TFUNCTION) {
  956. msg_err_config("cleanup_rules must return "
  957. "function and not %s",
  958. lua_typename(L, lua_type(L, -1)));
  959. fuzzy_module_ctx->enabled = FALSE;
  960. }
  961. else {
  962. fuzzy_module_ctx->cleanup_rules_ref = luaL_ref(L, LUA_REGISTRYINDEX);
  963. }
  964. }
  965. }
  966. lua_settop(L, 0);
  967. if (!fuzzy_module_ctx->enabled) {
  968. return TRUE;
  969. }
  970. if ((value =
  971. rspamd_config_get_module_opt(cfg, "fuzzy_check", "symbol")) != NULL) {
  972. fuzzy_module_ctx->default_symbol = ucl_obj_tostring(value);
  973. }
  974. else {
  975. fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL;
  976. }
  977. if ((value =
  978. rspamd_config_get_module_opt(cfg, "fuzzy_check", "timeout")) != NULL) {
  979. fuzzy_module_ctx->io_timeout = ucl_obj_todouble(value);
  980. }
  981. else {
  982. fuzzy_module_ctx->io_timeout = DEFAULT_IO_TIMEOUT;
  983. }
  984. if ((value =
  985. rspamd_config_get_module_opt(cfg,
  986. "fuzzy_check",
  987. "retransmits")) != NULL) {
  988. fuzzy_module_ctx->retransmits = ucl_obj_toint(value);
  989. }
  990. else {
  991. fuzzy_module_ctx->retransmits = DEFAULT_RETRANSMITS;
  992. }
  993. if ((value =
  994. rspamd_config_get_module_opt(cfg, "fuzzy_check",
  995. "max_errors")) != NULL) {
  996. fuzzy_module_ctx->max_errors = ucl_obj_toint(value);
  997. }
  998. else {
  999. fuzzy_module_ctx->max_errors = DEFAULT_MAX_ERRORS;
  1000. }
  1001. if ((value =
  1002. rspamd_config_get_module_opt(cfg, "fuzzy_check",
  1003. "revive_time")) != NULL) {
  1004. fuzzy_module_ctx->revive_time = ucl_obj_todouble(value);
  1005. }
  1006. else {
  1007. fuzzy_module_ctx->revive_time = DEFAULT_REVIVE_TIME;
  1008. }
  1009. if ((value =
  1010. rspamd_config_get_module_opt(cfg, "fuzzy_check",
  1011. "whitelist")) != NULL) {
  1012. rspamd_config_radix_from_ucl(cfg, value, "Fuzzy whitelist",
  1013. &fuzzy_module_ctx->whitelist,
  1014. NULL,
  1015. NULL, "fuzzy ip whitelist");
  1016. }
  1017. else {
  1018. fuzzy_module_ctx->whitelist = NULL;
  1019. }
  1020. if ((value =
  1021. rspamd_config_get_module_opt(cfg, "fuzzy_check", "rule")) != NULL) {
  1022. cb_id = rspamd_symcache_add_symbol(cfg->cache,
  1023. "FUZZY_CALLBACK", 0, fuzzy_symbol_callback, NULL,
  1024. SYMBOL_TYPE_CALLBACK | SYMBOL_TYPE_FINE,
  1025. -1);
  1026. rspamd_config_add_symbol(cfg,
  1027. "FUZZY_CALLBACK",
  1028. 0.0,
  1029. "Fuzzy check callback",
  1030. "fuzzy",
  1031. RSPAMD_SYMBOL_FLAG_IGNORE_METRIC,
  1032. 1,
  1033. 1);
  1034. /*
  1035. * Here we can have 2 possibilities:
  1036. *
  1037. * unnamed rules:
  1038. *
  1039. * rule {
  1040. * ...
  1041. * }
  1042. * rule {
  1043. * ...
  1044. * }
  1045. *
  1046. * - or - named rules:
  1047. *
  1048. * rule {
  1049. * "rule1": {
  1050. * ...
  1051. * }
  1052. * "rule2": {
  1053. * ...
  1054. * }
  1055. * }
  1056. *
  1057. * So, for each element, we check, if there 'servers' key. If 'servers' is
  1058. * presented, then we treat it as unnamed rule, otherwise we treat it as
  1059. * named rule.
  1060. */
  1061. LL_FOREACH(value, cur)
  1062. {
  1063. if (ucl_object_lookup(cur, "servers")) {
  1064. /* Unnamed rule */
  1065. fuzzy_parse_rule(cfg, cur, NULL, cb_id);
  1066. nrules++;
  1067. }
  1068. else {
  1069. /* Named rule */
  1070. it = NULL;
  1071. while ((elt = ucl_object_iterate(cur, &it, true)) != NULL) {
  1072. fuzzy_parse_rule(cfg, elt, ucl_object_key(elt), cb_id);
  1073. nrules++;
  1074. }
  1075. }
  1076. }
  1077. /* We want that to check bad mime attachments */
  1078. rspamd_symcache_add_delayed_dependency(cfg->cache,
  1079. "FUZZY_CALLBACK", "MIME_TYPES_CALLBACK");
  1080. }
  1081. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  1082. msg_warn_config("fuzzy module is enabled but no rules are defined");
  1083. }
  1084. msg_info_config("init internal fuzzy_check module, %d rules loaded",
  1085. nrules);
  1086. /* Register global methods */
  1087. lua_getglobal(L, "rspamd_plugins");
  1088. if (lua_type(L, -1) == LUA_TTABLE) {
  1089. lua_pushstring(L, "fuzzy_check");
  1090. lua_createtable(L, 0, 3);
  1091. /* Set methods */
  1092. lua_pushstring(L, "unlearn");
  1093. lua_pushcfunction(L, fuzzy_lua_unlearn_handler);
  1094. lua_settable(L, -3);
  1095. lua_pushstring(L, "learn");
  1096. lua_pushcfunction(L, fuzzy_lua_learn_handler);
  1097. lua_settable(L, -3);
  1098. lua_pushstring(L, "gen_hashes");
  1099. lua_pushcfunction(L, fuzzy_lua_gen_hashes_handler);
  1100. lua_settable(L, -3);
  1101. lua_pushstring(L, "hex_hashes");
  1102. lua_pushcfunction(L, fuzzy_lua_hex_hashes_handler);
  1103. lua_settable(L, -3);
  1104. lua_pushstring(L, "list_storages");
  1105. lua_pushcfunction(L, fuzzy_lua_list_storages);
  1106. lua_settable(L, -3);
  1107. lua_pushstring(L, "ping_storage");
  1108. lua_pushcfunction(L, fuzzy_lua_ping_storage);
  1109. lua_settable(L, -3);
  1110. /* Finish fuzzy_check key */
  1111. lua_settable(L, -3);
  1112. }
  1113. lua_settop(L, 0);
  1114. return res;
  1115. }
  1116. gint fuzzy_check_module_reconfig(struct rspamd_config *cfg)
  1117. {
  1118. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(cfg);
  1119. if (fuzzy_module_ctx->cleanup_rules_ref != -1) {
  1120. /* Sync lua_fuzzy rules */
  1121. gint err_idx, ret;
  1122. lua_State *L = (lua_State *) cfg->lua_state;
  1123. lua_pushcfunction(L, &rspamd_lua_traceback);
  1124. err_idx = lua_gettop(L);
  1125. lua_rawgeti(L, LUA_REGISTRYINDEX, fuzzy_module_ctx->cleanup_rules_ref);
  1126. if ((ret = lua_pcall(L, 0, 0, err_idx)) != 0) {
  1127. msg_err_config("call to cleanup_rules lua "
  1128. "script failed (%d): %s",
  1129. ret, lua_tostring(L, -1));
  1130. }
  1131. luaL_unref(cfg->lua_state, LUA_REGISTRYINDEX,
  1132. fuzzy_module_ctx->cleanup_rules_ref);
  1133. lua_settop(L, 0);
  1134. }
  1135. if (fuzzy_module_ctx->check_mime_part_ref != -1) {
  1136. luaL_unref(cfg->lua_state, LUA_REGISTRYINDEX,
  1137. fuzzy_module_ctx->check_mime_part_ref);
  1138. }
  1139. if (fuzzy_module_ctx->process_rule_ref != -1) {
  1140. luaL_unref(cfg->lua_state, LUA_REGISTRYINDEX,
  1141. fuzzy_module_ctx->process_rule_ref);
  1142. }
  1143. return fuzzy_check_module_config(cfg, false);
  1144. }
  1145. /* Finalize IO */
  1146. static void
  1147. fuzzy_io_fin(void *ud)
  1148. {
  1149. struct fuzzy_client_session *session = ud;
  1150. if (session->commands) {
  1151. g_ptr_array_free(session->commands, TRUE);
  1152. }
  1153. if (session->results) {
  1154. g_ptr_array_free(session->results, TRUE);
  1155. }
  1156. rspamd_ev_watcher_stop(session->event_loop, &session->ev);
  1157. close(session->fd);
  1158. }
  1159. static GArray *
  1160. fuzzy_preprocess_words(struct rspamd_mime_text_part *part, rspamd_mempool_t *pool)
  1161. {
  1162. return part->utf_words;
  1163. }
  1164. static void
  1165. fuzzy_encrypt_cmd(struct fuzzy_rule *rule,
  1166. struct rspamd_fuzzy_encrypted_req_hdr *hdr,
  1167. guchar *data, gsize datalen)
  1168. {
  1169. const guchar *pk;
  1170. guint pklen;
  1171. g_assert(hdr != NULL);
  1172. g_assert(data != NULL);
  1173. g_assert(rule != NULL);
  1174. /* Encrypt data */
  1175. memcpy(hdr->magic,
  1176. fuzzy_encrypted_magic,
  1177. sizeof(hdr->magic));
  1178. ottery_rand_bytes(hdr->nonce, sizeof(hdr->nonce));
  1179. pk = rspamd_keypair_component(rule->local_key,
  1180. RSPAMD_KEYPAIR_COMPONENT_PK, &pklen);
  1181. memcpy(hdr->pubkey, pk, MIN(pklen, sizeof(hdr->pubkey)));
  1182. pk = rspamd_pubkey_get_pk(rule->peer_key, &pklen);
  1183. memcpy(hdr->key_id, pk, MIN(sizeof(hdr->key_id), pklen));
  1184. rspamd_keypair_cache_process(rule->ctx->keypairs_cache,
  1185. rule->local_key, rule->peer_key);
  1186. rspamd_cryptobox_encrypt_nm_inplace(data, datalen,
  1187. hdr->nonce, rspamd_pubkey_get_nm(rule->peer_key, rule->local_key),
  1188. hdr->mac,
  1189. rspamd_pubkey_alg(rule->peer_key));
  1190. }
  1191. static struct fuzzy_cmd_io *
  1192. fuzzy_cmd_stat(struct fuzzy_rule *rule,
  1193. int c,
  1194. gint flag,
  1195. guint32 weight,
  1196. rspamd_mempool_t *pool)
  1197. {
  1198. struct rspamd_fuzzy_cmd *cmd;
  1199. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1200. struct fuzzy_cmd_io *io;
  1201. if (rule->peer_key) {
  1202. enccmd = rspamd_mempool_alloc0(pool, sizeof(*enccmd));
  1203. cmd = &enccmd->cmd;
  1204. }
  1205. else {
  1206. cmd = rspamd_mempool_alloc0(pool, sizeof(*cmd));
  1207. }
  1208. cmd->cmd = c;
  1209. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1210. cmd->shingles_count = 0;
  1211. cmd->tag = ottery_rand_uint32();
  1212. io = rspamd_mempool_alloc(pool, sizeof(*io));
  1213. io->flags = 0;
  1214. io->tag = cmd->tag;
  1215. memcpy(&io->cmd, cmd, sizeof(io->cmd));
  1216. if (rule->peer_key && enccmd) {
  1217. fuzzy_encrypt_cmd(rule, &enccmd->hdr, (guchar *) cmd, sizeof(*cmd));
  1218. io->io.iov_base = enccmd;
  1219. io->io.iov_len = sizeof(*enccmd);
  1220. }
  1221. else {
  1222. io->io.iov_base = cmd;
  1223. io->io.iov_len = sizeof(*cmd);
  1224. }
  1225. return io;
  1226. }
  1227. static inline double
  1228. fuzzy_milliseconds_since_midnight(void)
  1229. {
  1230. double now = rspamd_get_calendar_ticks();
  1231. double ms = now - (int64_t) now;
  1232. now = (((int64_t) now % 86400) + ms) * 1000;
  1233. return now;
  1234. }
  1235. static struct fuzzy_cmd_io *
  1236. fuzzy_cmd_ping(struct fuzzy_rule *rule,
  1237. rspamd_mempool_t *pool)
  1238. {
  1239. struct rspamd_fuzzy_cmd *cmd;
  1240. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1241. struct fuzzy_cmd_io *io;
  1242. if (rule->peer_key) {
  1243. enccmd = rspamd_mempool_alloc0(pool, sizeof(*enccmd));
  1244. cmd = &enccmd->cmd;
  1245. }
  1246. else {
  1247. cmd = rspamd_mempool_alloc0(pool, sizeof(*cmd));
  1248. }
  1249. /* Get milliseconds since midnight */
  1250. cmd->cmd = FUZZY_PING;
  1251. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1252. cmd->shingles_count = 0;
  1253. cmd->value = fuzzy_milliseconds_since_midnight(); /* Record timestamp */
  1254. cmd->tag = ottery_rand_uint32();
  1255. io = rspamd_mempool_alloc(pool, sizeof(*io));
  1256. io->flags = 0;
  1257. io->tag = cmd->tag;
  1258. memcpy(&io->cmd, cmd, sizeof(io->cmd));
  1259. if (rule->peer_key && enccmd) {
  1260. fuzzy_encrypt_cmd(rule, &enccmd->hdr, (guchar *) cmd, sizeof(*cmd));
  1261. io->io.iov_base = enccmd;
  1262. io->io.iov_len = sizeof(*enccmd);
  1263. }
  1264. else {
  1265. io->io.iov_base = cmd;
  1266. io->io.iov_len = sizeof(*cmd);
  1267. }
  1268. return io;
  1269. }
  1270. static struct fuzzy_cmd_io *
  1271. fuzzy_cmd_hash(struct fuzzy_rule *rule,
  1272. int c,
  1273. const rspamd_ftok_t *hash,
  1274. gint flag,
  1275. guint32 weight,
  1276. rspamd_mempool_t *pool)
  1277. {
  1278. struct rspamd_fuzzy_cmd *cmd;
  1279. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1280. struct fuzzy_cmd_io *io;
  1281. if (rule->peer_key) {
  1282. enccmd = rspamd_mempool_alloc0(pool, sizeof(*enccmd));
  1283. cmd = &enccmd->cmd;
  1284. }
  1285. else {
  1286. cmd = rspamd_mempool_alloc0(pool, sizeof(*cmd));
  1287. }
  1288. if (hash->len == sizeof(cmd->digest) * 2) {
  1289. /* It is hex encoding */
  1290. if (rspamd_decode_hex_buf(hash->begin, hash->len, cmd->digest,
  1291. sizeof(cmd->digest)) == -1) {
  1292. msg_err_pool("cannot decode hash, wrong encoding");
  1293. return NULL;
  1294. }
  1295. }
  1296. else {
  1297. msg_err_pool("cannot decode hash, wrong length: %z", hash->len);
  1298. return NULL;
  1299. }
  1300. cmd->cmd = c;
  1301. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1302. cmd->shingles_count = 0;
  1303. cmd->tag = ottery_rand_uint32();
  1304. io = rspamd_mempool_alloc(pool, sizeof(*io));
  1305. io->flags = 0;
  1306. io->tag = cmd->tag;
  1307. memcpy(&io->cmd, cmd, sizeof(io->cmd));
  1308. if (rule->peer_key && enccmd) {
  1309. fuzzy_encrypt_cmd(rule, &enccmd->hdr, (guchar *) cmd, sizeof(*cmd));
  1310. io->io.iov_base = enccmd;
  1311. io->io.iov_len = sizeof(*enccmd);
  1312. }
  1313. else {
  1314. io->io.iov_base = cmd;
  1315. io->io.iov_len = sizeof(*cmd);
  1316. }
  1317. return io;
  1318. }
  1319. struct rspamd_cached_shingles {
  1320. struct rspamd_shingle *sh;
  1321. guchar digest[rspamd_cryptobox_HASHBYTES];
  1322. guint additional_length;
  1323. guchar *additional_data;
  1324. };
  1325. static struct rspamd_cached_shingles *
  1326. fuzzy_cmd_get_cached(struct fuzzy_rule *rule,
  1327. struct rspamd_task *task,
  1328. struct rspamd_mime_part *mp)
  1329. {
  1330. gchar key[32];
  1331. gint key_part;
  1332. struct rspamd_cached_shingles **cached;
  1333. memcpy(&key_part, rule->shingles_key->str, sizeof(key_part));
  1334. rspamd_snprintf(key, sizeof(key), "%s%d", rule->algorithm_str,
  1335. key_part);
  1336. cached = (struct rspamd_cached_shingles **) rspamd_mempool_get_variable(
  1337. task->task_pool, key);
  1338. if (cached && cached[mp->part_number]) {
  1339. return cached[mp->part_number];
  1340. }
  1341. return NULL;
  1342. }
  1343. static void
  1344. fuzzy_cmd_set_cached(struct fuzzy_rule *rule,
  1345. struct rspamd_task *task,
  1346. struct rspamd_mime_part *mp,
  1347. struct rspamd_cached_shingles *data)
  1348. {
  1349. gchar key[32];
  1350. gint key_part;
  1351. struct rspamd_cached_shingles **cached;
  1352. memcpy(&key_part, rule->shingles_key->str, sizeof(key_part));
  1353. rspamd_snprintf(key, sizeof(key), "%s%d", rule->algorithm_str,
  1354. key_part);
  1355. cached = (struct rspamd_cached_shingles **) rspamd_mempool_get_variable(
  1356. task->task_pool, key);
  1357. if (cached) {
  1358. cached[mp->part_number] = data;
  1359. }
  1360. else {
  1361. cached = rspamd_mempool_alloc0(task->task_pool, sizeof(*cached) *
  1362. (MESSAGE_FIELD(task, parts)->len + 1));
  1363. cached[mp->part_number] = data;
  1364. rspamd_mempool_set_variable(task->task_pool, key, cached, NULL);
  1365. }
  1366. }
  1367. static gboolean
  1368. fuzzy_rule_check_mimepart(struct rspamd_task *task,
  1369. struct fuzzy_rule *rule,
  1370. struct rspamd_mime_part *part,
  1371. gboolean *need_check,
  1372. gboolean *fuzzy_check)
  1373. {
  1374. lua_State *L = (lua_State *) task->cfg->lua_state;
  1375. gint old_top = lua_gettop(L);
  1376. if (rule->lua_id != -1 && rule->ctx->check_mime_part_ref != -1) {
  1377. gint err_idx, ret;
  1378. struct rspamd_task **ptask;
  1379. struct rspamd_mime_part **ppart;
  1380. lua_pushcfunction(L, &rspamd_lua_traceback);
  1381. err_idx = lua_gettop(L);
  1382. lua_rawgeti(L, LUA_REGISTRYINDEX, rule->ctx->check_mime_part_ref);
  1383. ptask = lua_newuserdata(L, sizeof(*ptask));
  1384. *ptask = task;
  1385. rspamd_lua_setclass(L, rspamd_task_classname, -1);
  1386. ppart = lua_newuserdata(L, sizeof(*ppart));
  1387. *ppart = part;
  1388. rspamd_lua_setclass(L, "rspamd{mimepart}", -1);
  1389. lua_pushnumber(L, rule->lua_id);
  1390. if ((ret = lua_pcall(L, 3, 2, err_idx)) != 0) {
  1391. msg_err_task("call to check_mime_part lua "
  1392. "script failed (%d): %s",
  1393. ret, lua_tostring(L, -1));
  1394. ret = FALSE;
  1395. }
  1396. else {
  1397. ret = TRUE;
  1398. *need_check = lua_toboolean(L, -2);
  1399. *fuzzy_check = lua_toboolean(L, -1);
  1400. }
  1401. lua_settop(L, old_top);
  1402. return ret;
  1403. }
  1404. return FALSE;
  1405. }
  1406. #define MAX_FUZZY_DOMAIN 64
  1407. static guint
  1408. fuzzy_cmd_extension_length(struct rspamd_task *task,
  1409. struct fuzzy_rule *rule)
  1410. {
  1411. guint total = 0;
  1412. if (rule->no_share) {
  1413. return 0;
  1414. }
  1415. /* From domain */
  1416. if (MESSAGE_FIELD(task, from_mime) && MESSAGE_FIELD(task, from_mime)->len > 0) {
  1417. struct rspamd_email_address *addr = g_ptr_array_index(MESSAGE_FIELD(task,
  1418. from_mime),
  1419. 0);
  1420. if (addr->domain_len > 0) {
  1421. total += 2; /* 2 bytes: type + length */
  1422. total += MIN(MAX_FUZZY_DOMAIN, addr->domain_len);
  1423. }
  1424. }
  1425. if (task->from_addr && rspamd_inet_address_get_af(task->from_addr) == AF_INET) {
  1426. total += sizeof(struct in_addr) + 1;
  1427. }
  1428. else if (task->from_addr && rspamd_inet_address_get_af(task->from_addr) == AF_INET6) {
  1429. total += sizeof(struct in6_addr) + 1;
  1430. }
  1431. return total;
  1432. }
  1433. static guint
  1434. fuzzy_cmd_write_extensions(struct rspamd_task *task,
  1435. struct fuzzy_rule *rule,
  1436. guchar *dest,
  1437. gsize available)
  1438. {
  1439. guint written = 0;
  1440. if (rule->no_share) {
  1441. return 0;
  1442. }
  1443. if (MESSAGE_FIELD(task, from_mime) && MESSAGE_FIELD(task, from_mime)->len > 0) {
  1444. struct rspamd_email_address *addr = g_ptr_array_index(MESSAGE_FIELD(task,
  1445. from_mime),
  1446. 0);
  1447. guint to_write = MIN(MAX_FUZZY_DOMAIN, addr->domain_len) + 2;
  1448. if (to_write > 0 && to_write <= available) {
  1449. *dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN;
  1450. *dest++ = to_write - 2;
  1451. if (addr->domain_len < MAX_FUZZY_DOMAIN) {
  1452. memcpy(dest, addr->domain, addr->domain_len);
  1453. dest += addr->domain_len;
  1454. }
  1455. else {
  1456. /* Trim from left */
  1457. memcpy(dest,
  1458. addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN),
  1459. MAX_FUZZY_DOMAIN);
  1460. dest += MAX_FUZZY_DOMAIN;
  1461. }
  1462. available -= to_write;
  1463. written += to_write;
  1464. }
  1465. }
  1466. if (task->from_addr && rspamd_inet_address_get_af(task->from_addr) == AF_INET) {
  1467. if (available >= sizeof(struct in_addr) + 1) {
  1468. guint klen;
  1469. guchar *inet_data = rspamd_inet_address_get_hash_key(task->from_addr, &klen);
  1470. *dest++ = RSPAMD_FUZZY_EXT_SOURCE_IP4;
  1471. memcpy(dest, inet_data, klen);
  1472. dest += klen;
  1473. available -= klen + 1;
  1474. written += klen + 1;
  1475. }
  1476. }
  1477. else if (task->from_addr && rspamd_inet_address_get_af(task->from_addr) == AF_INET6) {
  1478. if (available >= sizeof(struct in6_addr) + 1) {
  1479. guint klen;
  1480. guchar *inet_data = rspamd_inet_address_get_hash_key(task->from_addr, &klen);
  1481. *dest++ = RSPAMD_FUZZY_EXT_SOURCE_IP6;
  1482. memcpy(dest, inet_data, klen);
  1483. dest += klen;
  1484. available -= klen + 1;
  1485. written += klen + 1;
  1486. }
  1487. }
  1488. return written;
  1489. }
  1490. /*
  1491. * Create fuzzy command from a text part
  1492. */
  1493. static struct fuzzy_cmd_io *
  1494. fuzzy_cmd_from_text_part(struct rspamd_task *task,
  1495. struct fuzzy_rule *rule,
  1496. int c,
  1497. gint flag,
  1498. guint32 weight,
  1499. gboolean short_text,
  1500. struct rspamd_mime_text_part *part,
  1501. struct rspamd_mime_part *mp)
  1502. {
  1503. struct rspamd_fuzzy_shingle_cmd *shcmd = NULL;
  1504. struct rspamd_fuzzy_cmd *cmd = NULL;
  1505. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd = NULL;
  1506. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1507. struct rspamd_cached_shingles *cached = NULL;
  1508. struct rspamd_shingle *sh = NULL;
  1509. guint i;
  1510. rspamd_cryptobox_hash_state_t st;
  1511. rspamd_stat_token_t *word;
  1512. GArray *words;
  1513. struct fuzzy_cmd_io *io;
  1514. guint additional_length;
  1515. guchar *additional_data;
  1516. cached = fuzzy_cmd_get_cached(rule, task, mp);
  1517. /*
  1518. * Important note:
  1519. *
  1520. * We assume that fuzzy io is a consistent memory layout to fit into
  1521. * iov structure of size 1
  1522. *
  1523. * However, there are 4 possibilities:
  1524. * 1) non encrypted, non shingle command - just one cmd
  1525. * 2) encrypted, non shingle command - encryption hdr + cmd
  1526. * 3) non encrypted, shingle command - cmd + shingle
  1527. * 4) encrypted, shingle command - encryption hdr + cmd + shingle
  1528. *
  1529. * Extensions are always at the end, but since we also have caching (sigh, meh...)
  1530. * then we have one piece that looks like cmd (+ shingle) + extensions
  1531. * To encrypt it optionally we take this memory and prepend encryption header
  1532. *
  1533. * In case of cached version we do the same: allocate, copy from cached (including extra)
  1534. * and optionally encrypt.
  1535. *
  1536. * However, there should be no extensions in case of unencrypted connection
  1537. * (for sanity + privacy).
  1538. */
  1539. if (cached) {
  1540. additional_length = cached->additional_length;
  1541. additional_data = cached->additional_data;
  1542. /* Copy cached */
  1543. if (short_text) {
  1544. enccmd = rspamd_mempool_alloc0(task->task_pool,
  1545. sizeof(*enccmd) + additional_length);
  1546. cmd = &enccmd->cmd;
  1547. memcpy(cmd->digest, cached->digest,
  1548. sizeof(cached->digest));
  1549. cmd->shingles_count = 0;
  1550. memcpy(((guchar *) enccmd) + sizeof(*enccmd), additional_data,
  1551. additional_length);
  1552. }
  1553. else if (cached->sh) {
  1554. encshcmd = rspamd_mempool_alloc0(task->task_pool,
  1555. additional_length + sizeof(*encshcmd));
  1556. shcmd = &encshcmd->cmd;
  1557. memcpy(&shcmd->sgl, cached->sh, sizeof(struct rspamd_shingle));
  1558. memcpy(shcmd->basic.digest, cached->digest,
  1559. sizeof(cached->digest));
  1560. memcpy(((guchar *) encshcmd) + sizeof(*encshcmd), additional_data,
  1561. additional_length);
  1562. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1563. }
  1564. else {
  1565. return NULL;
  1566. }
  1567. }
  1568. else {
  1569. additional_length = fuzzy_cmd_extension_length(task, rule);
  1570. cached = rspamd_mempool_alloc0(task->task_pool, sizeof(*cached) +
  1571. additional_length);
  1572. /*
  1573. * Allocate extensions and never touch it except copying to avoid
  1574. * occasional encryption
  1575. */
  1576. cached->additional_length = additional_length;
  1577. cached->additional_data = ((guchar *) cached) + sizeof(*cached);
  1578. if (additional_length > 0) {
  1579. fuzzy_cmd_write_extensions(task, rule, cached->additional_data,
  1580. additional_length);
  1581. }
  1582. if (short_text) {
  1583. enccmd = rspamd_mempool_alloc0(task->task_pool,
  1584. sizeof(*enccmd) + additional_length);
  1585. cmd = &enccmd->cmd;
  1586. rspamd_cryptobox_hash_init(&st, rule->hash_key->str,
  1587. rule->hash_key->len);
  1588. rspamd_cryptobox_hash_update(&st, part->utf_stripped_content->data,
  1589. part->utf_stripped_content->len);
  1590. if (!rule->no_subject && (MESSAGE_FIELD(task, subject))) {
  1591. /* We also include subject */
  1592. rspamd_cryptobox_hash_update(&st, MESSAGE_FIELD(task, subject),
  1593. strlen(MESSAGE_FIELD(task, subject)));
  1594. }
  1595. rspamd_cryptobox_hash_final(&st, cmd->digest);
  1596. memcpy(cached->digest, cmd->digest, sizeof(cached->digest));
  1597. cached->sh = NULL;
  1598. additional_data = ((guchar *) enccmd) + sizeof(*enccmd);
  1599. memcpy(additional_data, cached->additional_data, additional_length);
  1600. }
  1601. else {
  1602. encshcmd = rspamd_mempool_alloc0(task->task_pool,
  1603. sizeof(*encshcmd) + additional_length);
  1604. shcmd = &encshcmd->cmd;
  1605. /*
  1606. * Generate hash from all words in the part
  1607. */
  1608. rspamd_cryptobox_hash_init(&st, rule->hash_key->str, rule->hash_key->len);
  1609. words = fuzzy_preprocess_words(part, task->task_pool);
  1610. for (i = 0; i < words->len; i++) {
  1611. word = &g_array_index(words, rspamd_stat_token_t, i);
  1612. if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0)) {
  1613. rspamd_cryptobox_hash_update(&st, word->stemmed.begin,
  1614. word->stemmed.len);
  1615. }
  1616. }
  1617. rspamd_cryptobox_hash_final(&st, shcmd->basic.digest);
  1618. msg_debug_task("loading shingles of type %s with key %*xs",
  1619. rule->algorithm_str,
  1620. 16, rule->shingles_key->str);
  1621. sh = rspamd_shingles_from_text(words,
  1622. rule->shingles_key->str, task->task_pool,
  1623. rspamd_shingles_default_filter, NULL,
  1624. rule->alg);
  1625. if (sh != NULL) {
  1626. memcpy(&shcmd->sgl, sh, sizeof(shcmd->sgl));
  1627. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1628. }
  1629. else {
  1630. /* No shingles, no check */
  1631. return NULL;
  1632. }
  1633. cached->sh = sh;
  1634. memcpy(cached->digest, shcmd->basic.digest, sizeof(cached->digest));
  1635. additional_data = ((guchar *) encshcmd) + sizeof(*encshcmd);
  1636. memcpy(additional_data, cached->additional_data, additional_length);
  1637. }
  1638. /*
  1639. * We always save encrypted command as it can handle both
  1640. * encrypted and unencrypted requests.
  1641. *
  1642. * Since it is copied when obtained from the cache, it is safe to use
  1643. * it this way.
  1644. */
  1645. fuzzy_cmd_set_cached(rule, task, mp, cached);
  1646. }
  1647. io = rspamd_mempool_alloc(task->task_pool, sizeof(*io));
  1648. io->part = mp;
  1649. if (!short_text) {
  1650. shcmd->basic.tag = ottery_rand_uint32();
  1651. shcmd->basic.cmd = c;
  1652. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1653. if (c != FUZZY_CHECK) {
  1654. shcmd->basic.flag = flag;
  1655. shcmd->basic.value = weight;
  1656. }
  1657. io->tag = shcmd->basic.tag;
  1658. memcpy(&io->cmd, &shcmd->basic, sizeof(io->cmd));
  1659. }
  1660. else {
  1661. cmd->tag = ottery_rand_uint32();
  1662. cmd->cmd = c;
  1663. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1664. if (c != FUZZY_CHECK) {
  1665. cmd->flag = flag;
  1666. cmd->value = weight;
  1667. }
  1668. io->tag = cmd->tag;
  1669. memcpy(&io->cmd, cmd, sizeof(io->cmd));
  1670. }
  1671. io->flags = 0;
  1672. if (rule->peer_key) {
  1673. /* Encrypt data */
  1674. if (!short_text) {
  1675. fuzzy_encrypt_cmd(rule, &encshcmd->hdr, (guchar *) shcmd,
  1676. sizeof(*shcmd) + additional_length);
  1677. io->io.iov_base = encshcmd;
  1678. io->io.iov_len = sizeof(*encshcmd) + additional_length;
  1679. }
  1680. else {
  1681. fuzzy_encrypt_cmd(rule, &enccmd->hdr, (guchar *) cmd,
  1682. sizeof(*cmd) + additional_length);
  1683. io->io.iov_base = enccmd;
  1684. io->io.iov_len = sizeof(*enccmd) + additional_length;
  1685. }
  1686. }
  1687. else {
  1688. if (!short_text) {
  1689. io->io.iov_base = shcmd;
  1690. io->io.iov_len = sizeof(*shcmd) + additional_length;
  1691. }
  1692. else {
  1693. io->io.iov_base = cmd;
  1694. io->io.iov_len = sizeof(*cmd) + additional_length;
  1695. }
  1696. }
  1697. return io;
  1698. }
  1699. #if 0
  1700. static struct fuzzy_cmd_io *
  1701. fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
  1702. int c,
  1703. gint flag,
  1704. guint32 weight,
  1705. struct rspamd_task *task,
  1706. struct rspamd_image *img,
  1707. struct rspamd_mime_part *mp)
  1708. {
  1709. struct rspamd_fuzzy_shingle_cmd *shcmd;
  1710. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd;
  1711. struct fuzzy_cmd_io *io;
  1712. struct rspamd_shingle *sh;
  1713. struct rspamd_cached_shingles *cached;
  1714. cached = fuzzy_cmd_get_cached (rule, task, mp);
  1715. if (cached) {
  1716. /* Copy cached */
  1717. encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
  1718. shcmd = &encshcmd->cmd;
  1719. memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
  1720. memcpy (shcmd->basic.digest, cached->digest,
  1721. sizeof (cached->digest));
  1722. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1723. }
  1724. else {
  1725. encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
  1726. shcmd = &encshcmd->cmd;
  1727. /*
  1728. * Generate shingles
  1729. */
  1730. sh = rspamd_shingles_from_image (img->dct,
  1731. rule->shingles_key->str, task->task_pool,
  1732. rspamd_shingles_default_filter, NULL,
  1733. rule->alg);
  1734. if (sh != NULL) {
  1735. memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl));
  1736. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1737. #if 0
  1738. for (unsigned int i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  1739. msg_err ("shingle %d: %L", i, sh->hashes[i]);
  1740. }
  1741. #endif
  1742. }
  1743. rspamd_cryptobox_hash (shcmd->basic.digest,
  1744. (const guchar *)img->dct, RSPAMD_DCT_LEN / NBBY,
  1745. rule->hash_key->str, rule->hash_key->len);
  1746. msg_debug_task ("loading shingles of type %s with key %*xs",
  1747. rule->algorithm_str,
  1748. 16, rule->shingles_key->str);
  1749. /*
  1750. * We always save encrypted command as it can handle both
  1751. * encrypted and unencrypted requests.
  1752. *
  1753. * Since it is copied when obtained from the cache, it is safe to use
  1754. * it this way.
  1755. */
  1756. cached = rspamd_mempool_alloc (task->task_pool, sizeof (*cached));
  1757. cached->sh = sh;
  1758. memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
  1759. fuzzy_cmd_set_cached (rule, task, mp, cached);
  1760. }
  1761. shcmd->basic.tag = ottery_rand_uint32 ();
  1762. shcmd->basic.cmd = c;
  1763. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1764. if (c != FUZZY_CHECK) {
  1765. shcmd->basic.flag = flag;
  1766. shcmd->basic.value = weight;
  1767. }
  1768. io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
  1769. io->part = mp;
  1770. io->tag = shcmd->basic.tag;
  1771. io->flags = FUZZY_CMD_FLAG_IMAGE;
  1772. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1773. if (rule->peer_key) {
  1774. /* Encrypt data */
  1775. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd, sizeof (*shcmd));
  1776. io->io.iov_base = encshcmd;
  1777. io->io.iov_len = sizeof (*encshcmd);
  1778. }
  1779. else {
  1780. io->io.iov_base = shcmd;
  1781. io->io.iov_len = sizeof (*shcmd);
  1782. }
  1783. return io;
  1784. }
  1785. #endif
  1786. static struct fuzzy_cmd_io *
  1787. fuzzy_cmd_from_data_part(struct fuzzy_rule *rule,
  1788. int c,
  1789. gint flag,
  1790. guint32 weight,
  1791. struct rspamd_task *task,
  1792. guchar digest[rspamd_cryptobox_HASHBYTES],
  1793. struct rspamd_mime_part *mp)
  1794. {
  1795. struct rspamd_fuzzy_cmd *cmd;
  1796. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1797. struct fuzzy_cmd_io *io;
  1798. guint additional_length;
  1799. guchar *additional_data;
  1800. additional_length = fuzzy_cmd_extension_length(task, rule);
  1801. if (rule->peer_key) {
  1802. enccmd = rspamd_mempool_alloc0(task->task_pool,
  1803. sizeof(*enccmd) + additional_length);
  1804. cmd = &enccmd->cmd;
  1805. additional_data = ((guchar *) enccmd) + sizeof(*enccmd);
  1806. }
  1807. else {
  1808. cmd = rspamd_mempool_alloc0(task->task_pool,
  1809. sizeof(*cmd) + additional_length);
  1810. additional_data = ((guchar *) cmd) + sizeof(*cmd);
  1811. }
  1812. cmd->cmd = c;
  1813. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1814. if (c != FUZZY_CHECK) {
  1815. cmd->flag = flag;
  1816. cmd->value = weight;
  1817. }
  1818. cmd->shingles_count = 0;
  1819. cmd->tag = ottery_rand_uint32();
  1820. memcpy(cmd->digest, digest, sizeof(cmd->digest));
  1821. io = rspamd_mempool_alloc(task->task_pool, sizeof(*io));
  1822. io->flags = 0;
  1823. io->tag = cmd->tag;
  1824. io->part = mp;
  1825. memcpy(&io->cmd, cmd, sizeof(io->cmd));
  1826. if (additional_length > 0) {
  1827. fuzzy_cmd_write_extensions(task, rule, additional_data,
  1828. additional_length);
  1829. }
  1830. if (rule->peer_key) {
  1831. g_assert(enccmd != NULL);
  1832. fuzzy_encrypt_cmd(rule, &enccmd->hdr, (guchar *) cmd,
  1833. sizeof(*cmd) + additional_length);
  1834. io->io.iov_base = enccmd;
  1835. io->io.iov_len = sizeof(*enccmd) + additional_length;
  1836. }
  1837. else {
  1838. io->io.iov_base = cmd;
  1839. io->io.iov_len = sizeof(*cmd) + additional_length;
  1840. }
  1841. return io;
  1842. }
  1843. static gboolean
  1844. fuzzy_cmd_to_wire(gint fd, struct iovec *io)
  1845. {
  1846. struct msghdr msg;
  1847. memset(&msg, 0, sizeof(msg));
  1848. msg.msg_iov = io;
  1849. msg.msg_iovlen = 1;
  1850. while (sendmsg(fd, &msg, 0) == -1) {
  1851. if (errno == EINTR) {
  1852. continue;
  1853. }
  1854. return FALSE;
  1855. }
  1856. return TRUE;
  1857. }
  1858. static gboolean
  1859. fuzzy_cmd_vector_to_wire(gint fd, GPtrArray *v)
  1860. {
  1861. guint i;
  1862. gboolean all_sent = TRUE, all_replied = TRUE;
  1863. struct fuzzy_cmd_io *io;
  1864. gboolean processed = FALSE;
  1865. /* First try to resend unsent commands */
  1866. for (i = 0; i < v->len; i++) {
  1867. io = g_ptr_array_index(v, i);
  1868. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1869. continue;
  1870. }
  1871. all_replied = FALSE;
  1872. if (!(io->flags & FUZZY_CMD_FLAG_SENT)) {
  1873. if (!fuzzy_cmd_to_wire(fd, &io->io)) {
  1874. return FALSE;
  1875. }
  1876. processed = TRUE;
  1877. io->flags |= FUZZY_CMD_FLAG_SENT;
  1878. all_sent = FALSE;
  1879. }
  1880. }
  1881. if (all_sent && !all_replied) {
  1882. /* Now try to resend each command in the vector */
  1883. for (i = 0; i < v->len; i++) {
  1884. io = g_ptr_array_index(v, i);
  1885. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1886. io->flags &= ~FUZZY_CMD_FLAG_SENT;
  1887. }
  1888. }
  1889. return fuzzy_cmd_vector_to_wire(fd, v);
  1890. }
  1891. return processed;
  1892. }
  1893. /*
  1894. * Read replies one-by-one and remove them from req array
  1895. */
  1896. static const struct rspamd_fuzzy_reply *
  1897. fuzzy_process_reply(guchar **pos, gint *r, GPtrArray *req,
  1898. struct fuzzy_rule *rule, struct rspamd_fuzzy_cmd **pcmd,
  1899. struct fuzzy_cmd_io **pio)
  1900. {
  1901. guchar *p = *pos;
  1902. gint remain = *r;
  1903. guint i, required_size;
  1904. struct fuzzy_cmd_io *io;
  1905. const struct rspamd_fuzzy_reply *rep;
  1906. struct rspamd_fuzzy_encrypted_reply encrep;
  1907. gboolean found = FALSE;
  1908. if (rule->peer_key) {
  1909. required_size = sizeof(encrep);
  1910. }
  1911. else {
  1912. required_size = sizeof(*rep);
  1913. }
  1914. if (remain <= 0 || (guint) remain < required_size) {
  1915. return NULL;
  1916. }
  1917. if (rule->peer_key) {
  1918. memcpy(&encrep, p, sizeof(encrep));
  1919. *pos += required_size;
  1920. *r -= required_size;
  1921. /* Try to decrypt reply */
  1922. rspamd_keypair_cache_process(rule->ctx->keypairs_cache,
  1923. rule->local_key, rule->peer_key);
  1924. if (!rspamd_cryptobox_decrypt_nm_inplace((guchar *) &encrep.rep,
  1925. sizeof(encrep.rep),
  1926. encrep.hdr.nonce,
  1927. rspamd_pubkey_get_nm(rule->peer_key, rule->local_key),
  1928. encrep.hdr.mac,
  1929. rspamd_pubkey_alg(rule->peer_key))) {
  1930. msg_info("cannot decrypt reply");
  1931. return NULL;
  1932. }
  1933. /* Copy decrypted over the input wire */
  1934. memcpy(p, &encrep.rep, sizeof(encrep.rep));
  1935. }
  1936. else {
  1937. *pos += required_size;
  1938. *r -= required_size;
  1939. }
  1940. rep = (const struct rspamd_fuzzy_reply *) p;
  1941. /*
  1942. * Search for tag
  1943. */
  1944. for (i = 0; i < req->len; i++) {
  1945. io = g_ptr_array_index(req, i);
  1946. if (io->tag == rep->v1.tag) {
  1947. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1948. io->flags |= FUZZY_CMD_FLAG_REPLIED;
  1949. if (pcmd) {
  1950. *pcmd = &io->cmd;
  1951. }
  1952. if (pio) {
  1953. *pio = io;
  1954. }
  1955. return rep;
  1956. }
  1957. found = TRUE;
  1958. }
  1959. }
  1960. if (!found) {
  1961. msg_info("unexpected tag: %ud", rep->v1.tag);
  1962. }
  1963. return NULL;
  1964. }
  1965. static void
  1966. fuzzy_insert_result(struct fuzzy_client_session *session,
  1967. const struct rspamd_fuzzy_reply *rep,
  1968. struct rspamd_fuzzy_cmd *cmd,
  1969. struct fuzzy_cmd_io *io,
  1970. guint flag)
  1971. {
  1972. const gchar *symbol;
  1973. struct fuzzy_mapping *map;
  1974. struct rspamd_task *task = session->task;
  1975. double weight;
  1976. double nval;
  1977. guchar buf[2048];
  1978. const gchar *type = "bin";
  1979. struct fuzzy_client_result *res;
  1980. gboolean is_fuzzy = FALSE;
  1981. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  1982. /* Discriminate scores for small images */
  1983. static const guint short_image_limit = 32 * 1024;
  1984. /* Get mapping by flag */
  1985. if ((map =
  1986. g_hash_table_lookup(session->rule->mappings,
  1987. GINT_TO_POINTER(rep->v1.flag))) == NULL) {
  1988. /* Default symbol and default weight */
  1989. symbol = session->rule->symbol;
  1990. weight = session->rule->max_score;
  1991. }
  1992. else {
  1993. /* Get symbol and weight from map */
  1994. symbol = map->symbol;
  1995. weight = map->weight;
  1996. }
  1997. res = rspamd_mempool_alloc0(task->task_pool, sizeof(*res));
  1998. res->prob = rep->v1.prob;
  1999. res->symbol = symbol;
  2000. /*
  2001. * Hash is assumed to be found if probability is more than 0.5
  2002. * In that case `value` means number of matches
  2003. * Otherwise `value` means error code
  2004. */
  2005. nval = fuzzy_normalize(rep->v1.value, weight);
  2006. if (io) {
  2007. if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  2008. if (!io->part || io->part->parsed_data.len <= short_image_limit) {
  2009. nval *= rspamd_normalize_probability(rep->v1.prob, 0.5);
  2010. }
  2011. type = "img";
  2012. res->type = FUZZY_RESULT_IMG;
  2013. }
  2014. else {
  2015. /* Calc real probability */
  2016. nval *= sqrtf(rep->v1.prob);
  2017. if (cmd->shingles_count > 0) {
  2018. type = "txt";
  2019. res->type = FUZZY_RESULT_TXT;
  2020. }
  2021. else {
  2022. if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
  2023. type = "content";
  2024. res->type = FUZZY_RESULT_CONTENT;
  2025. }
  2026. else {
  2027. res->type = FUZZY_RESULT_BIN;
  2028. }
  2029. }
  2030. }
  2031. }
  2032. res->score = nval;
  2033. if (memcmp(rep->digest, cmd->digest, sizeof(rep->digest)) != 0) {
  2034. is_fuzzy = TRUE;
  2035. }
  2036. if (map != NULL || !session->rule->skip_unknown) {
  2037. GList *fuzzy_var;
  2038. rspamd_fstring_t *hex_result;
  2039. gchar timebuf[64];
  2040. struct tm tm_split;
  2041. if (session->rule->skip_map) {
  2042. rspamd_encode_hex_buf(cmd->digest, sizeof(cmd->digest),
  2043. hexbuf, sizeof(hexbuf) - 1);
  2044. hexbuf[sizeof(hexbuf) - 1] = '\0';
  2045. if (rspamd_match_hash_map(session->rule->skip_map, hexbuf,
  2046. sizeof(hexbuf) - 1)) {
  2047. return;
  2048. }
  2049. }
  2050. rspamd_encode_hex_buf(rep->digest, sizeof(rep->digest),
  2051. hexbuf, sizeof(hexbuf) - 1);
  2052. hexbuf[sizeof(hexbuf) - 1] = '\0';
  2053. rspamd_gmtime(rep->ts, &tm_split);
  2054. rspamd_snprintf(timebuf, sizeof(timebuf), "%02d.%02d.%4d %02d:%02d:%02d GMT",
  2055. tm_split.tm_mday,
  2056. tm_split.tm_mon + 1,
  2057. tm_split.tm_year + 1900,
  2058. tm_split.tm_hour, tm_split.tm_min, tm_split.tm_sec);
  2059. if (is_fuzzy) {
  2060. msg_notice_task(
  2061. "found fuzzy hash(%s) %s (%*xs requested) with weight: "
  2062. "%.2f, probability %.2f, in list: %s:%d%s; added on %s",
  2063. type,
  2064. hexbuf,
  2065. (gint) sizeof(cmd->digest), cmd->digest,
  2066. nval,
  2067. (gdouble) rep->v1.prob,
  2068. symbol,
  2069. rep->v1.flag,
  2070. map == NULL ? "(unknown)" : "",
  2071. timebuf);
  2072. }
  2073. else {
  2074. msg_notice_task(
  2075. "found exact fuzzy hash(%s) %s with weight: "
  2076. "%.2f, probability %.2f, in list: %s:%d%s; added on %s",
  2077. type,
  2078. hexbuf,
  2079. nval,
  2080. (gdouble) rep->v1.prob,
  2081. symbol,
  2082. rep->v1.flag,
  2083. map == NULL ? "(unknown)" : "",
  2084. timebuf);
  2085. }
  2086. rspamd_snprintf(buf,
  2087. sizeof(buf),
  2088. "%d:%*s:%.2f:%s",
  2089. rep->v1.flag,
  2090. (gint) MIN(rspamd_fuzzy_hash_len * 2, sizeof(rep->digest) * 2), hexbuf,
  2091. rep->v1.prob,
  2092. type);
  2093. res->option = rspamd_mempool_strdup(task->task_pool, buf);
  2094. g_ptr_array_add(session->results, res);
  2095. /* Store hex string in pool variable */
  2096. hex_result = rspamd_mempool_alloc(task->task_pool,
  2097. sizeof(rspamd_fstring_t) + sizeof(hexbuf));
  2098. memcpy(hex_result->str, hexbuf, sizeof(hexbuf));
  2099. hex_result->len = sizeof(hexbuf) - 1;
  2100. hex_result->allocated = (gsize) -1;
  2101. fuzzy_var = rspamd_mempool_get_variable(task->task_pool,
  2102. RSPAMD_MEMPOOL_FUZZY_RESULT);
  2103. if (fuzzy_var == NULL) {
  2104. fuzzy_var = g_list_prepend(NULL, hex_result);
  2105. rspamd_mempool_set_variable(task->task_pool,
  2106. RSPAMD_MEMPOOL_FUZZY_RESULT, fuzzy_var,
  2107. (rspamd_mempool_destruct_t) g_list_free);
  2108. }
  2109. else {
  2110. /* Not very efficient, but we don't really use it intensively */
  2111. fuzzy_var = g_list_append(fuzzy_var, hex_result);
  2112. }
  2113. }
  2114. }
  2115. static gint
  2116. fuzzy_check_try_read(struct fuzzy_client_session *session)
  2117. {
  2118. struct rspamd_task *task;
  2119. const struct rspamd_fuzzy_reply *rep;
  2120. struct rspamd_fuzzy_cmd *cmd = NULL;
  2121. struct fuzzy_cmd_io *io = NULL;
  2122. gint r, ret;
  2123. guchar buf[2048], *p;
  2124. task = session->task;
  2125. if ((r = read(session->fd, buf, sizeof(buf) - 1)) == -1) {
  2126. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  2127. return 0;
  2128. }
  2129. else {
  2130. return -1;
  2131. }
  2132. }
  2133. else {
  2134. p = buf;
  2135. ret = 0;
  2136. while ((rep = fuzzy_process_reply(&p, &r,
  2137. session->commands, session->rule, &cmd, &io)) != NULL) {
  2138. if (rep->v1.prob > 0.5) {
  2139. if (cmd->cmd == FUZZY_CHECK) {
  2140. fuzzy_insert_result(session, rep, cmd, io, rep->v1.flag);
  2141. }
  2142. else if (cmd->cmd == FUZZY_STAT) {
  2143. /*
  2144. * We store fuzzy stat in the following way:
  2145. * 1) We store fuzzy hashes as a hash of rspamd_fuzzy_stat_entry
  2146. * 2) We store the resulting hash table inside pool variable `fuzzy_stat`
  2147. */
  2148. struct rspamd_fuzzy_stat_entry *pval;
  2149. GHashTable *stats_hash;
  2150. stats_hash = (GHashTable *) rspamd_mempool_get_variable(task->task_pool,
  2151. RSPAMD_MEMPOOL_FUZZY_STAT);
  2152. if (stats_hash == NULL) {
  2153. stats_hash = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
  2154. rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT,
  2155. stats_hash,
  2156. (rspamd_mempool_destruct_t) g_hash_table_destroy);
  2157. }
  2158. pval = g_hash_table_lookup(stats_hash, session->rule->name);
  2159. if (pval == NULL) {
  2160. pval = rspamd_mempool_alloc(task->task_pool,
  2161. sizeof(*pval));
  2162. pval->name = rspamd_mempool_strdup(task->task_pool,
  2163. session->rule->name);
  2164. /* Safe, as pval->name is owned by the pool */
  2165. g_hash_table_insert(stats_hash, (char *) pval->name, pval);
  2166. }
  2167. pval->fuzzy_cnt = (((guint64) rep->v1.value) << 32) + rep->v1.flag;
  2168. }
  2169. }
  2170. else if (rep->v1.value == 403) {
  2171. rspamd_task_insert_result(task, "FUZZY_BLOCKED", 0.0,
  2172. session->rule->name);
  2173. }
  2174. else if (rep->v1.value == 401) {
  2175. if (cmd->cmd != FUZZY_CHECK) {
  2176. msg_info_task(
  2177. "fuzzy check error for %d: skipped by server",
  2178. rep->v1.flag);
  2179. }
  2180. }
  2181. else if (rep->v1.value != 0) {
  2182. msg_info_task(
  2183. "fuzzy check error for %d: unknown error (%d)",
  2184. rep->v1.flag,
  2185. rep->v1.value);
  2186. }
  2187. ret = 1;
  2188. }
  2189. }
  2190. return ret;
  2191. }
  2192. static void
  2193. fuzzy_insert_metric_results(struct rspamd_task *task, struct fuzzy_rule *rule,
  2194. GPtrArray *results)
  2195. {
  2196. struct fuzzy_client_result *res;
  2197. guint i;
  2198. gboolean seen_text_hash = FALSE,
  2199. seen_img_hash = FALSE,
  2200. seen_text_part = FALSE,
  2201. seen_long_text = FALSE;
  2202. gdouble prob_txt = 0.0, mult;
  2203. struct rspamd_mime_text_part *tp;
  2204. /* About 5 words */
  2205. static const unsigned int text_length_cutoff = 25;
  2206. PTR_ARRAY_FOREACH(results, i, res)
  2207. {
  2208. if (res->type == FUZZY_RESULT_TXT) {
  2209. seen_text_hash = TRUE;
  2210. prob_txt = MAX(prob_txt, res->prob);
  2211. }
  2212. else if (res->type == FUZZY_RESULT_IMG) {
  2213. seen_img_hash = TRUE;
  2214. }
  2215. }
  2216. if (task->message) {
  2217. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp)
  2218. {
  2219. if (!IS_TEXT_PART_EMPTY(tp) && tp->utf_words != NULL && tp->utf_words->len > 0) {
  2220. seen_text_part = TRUE;
  2221. if (tp->utf_stripped_text.magic == UTEXT_MAGIC) {
  2222. if (utext_isLengthExpensive(&tp->utf_stripped_text)) {
  2223. seen_long_text =
  2224. utext_nativeLength(&tp->utf_stripped_text) >
  2225. text_length_cutoff;
  2226. }
  2227. else {
  2228. /* Cannot directly calculate length */
  2229. seen_long_text =
  2230. (tp->utf_stripped_content->len / 2) >
  2231. text_length_cutoff;
  2232. }
  2233. }
  2234. }
  2235. }
  2236. }
  2237. PTR_ARRAY_FOREACH(results, i, res)
  2238. {
  2239. mult = 1.0;
  2240. if (res->type == FUZZY_RESULT_IMG) {
  2241. if (!seen_text_hash) {
  2242. if (seen_long_text) {
  2243. mult *= 0.25;
  2244. }
  2245. else if (seen_text_part) {
  2246. /* We have some short text + image */
  2247. mult *= 0.9;
  2248. }
  2249. /* Otherwise apply full score */
  2250. }
  2251. else if (prob_txt < 0.75) {
  2252. /* Penalize sole image without matching text */
  2253. if (prob_txt > 0.5) {
  2254. mult *= prob_txt;
  2255. }
  2256. else {
  2257. mult *= 0.5; /* cutoff */
  2258. }
  2259. }
  2260. }
  2261. else if (res->type == FUZZY_RESULT_TXT) {
  2262. if (seen_img_hash) {
  2263. /* Slightly increase score */
  2264. mult = 1.1;
  2265. }
  2266. }
  2267. gdouble weight = res->score * mult;
  2268. if (!isnan(rule->weight_threshold)) {
  2269. if (weight >= rule->weight_threshold) {
  2270. rspamd_task_insert_result_single(task, res->symbol,
  2271. weight, res->option);
  2272. }
  2273. else {
  2274. msg_info_task("%s is not added: weight=%.4f below threshold",
  2275. res->symbol, weight);
  2276. }
  2277. }
  2278. else {
  2279. rspamd_task_insert_result_single(task, res->symbol,
  2280. weight, res->option);
  2281. }
  2282. }
  2283. }
  2284. static gboolean
  2285. fuzzy_check_session_is_completed(struct fuzzy_client_session *session)
  2286. {
  2287. struct fuzzy_cmd_io *io;
  2288. guint nreplied = 0, i;
  2289. rspamd_upstream_ok(session->server);
  2290. for (i = 0; i < session->commands->len; i++) {
  2291. io = g_ptr_array_index(session->commands, i);
  2292. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  2293. nreplied++;
  2294. }
  2295. }
  2296. if (nreplied == session->commands->len) {
  2297. fuzzy_insert_metric_results(session->task, session->rule, session->results);
  2298. if (session->item) {
  2299. rspamd_symcache_item_async_dec_check(session->task, session->item, M);
  2300. }
  2301. rspamd_session_remove_event(session->task->s, fuzzy_io_fin, session);
  2302. return TRUE;
  2303. }
  2304. return FALSE;
  2305. }
  2306. /* Fuzzy check timeout callback */
  2307. static void
  2308. fuzzy_check_timer_callback(gint fd, short what, void *arg)
  2309. {
  2310. struct fuzzy_client_session *session = arg;
  2311. struct rspamd_task *task;
  2312. task = session->task;
  2313. /* We might be here because of other checks being slow */
  2314. if (fuzzy_check_try_read(session) > 0) {
  2315. if (fuzzy_check_session_is_completed(session)) {
  2316. return;
  2317. }
  2318. }
  2319. if (session->retransmits >= session->rule->retransmits) {
  2320. msg_err_task("got IO timeout with server %s(%s), after %d/%d retransmits",
  2321. rspamd_upstream_name(session->server),
  2322. rspamd_inet_address_to_string_pretty(
  2323. rspamd_upstream_addr_cur(session->server)),
  2324. session->retransmits,
  2325. session->rule->retransmits);
  2326. rspamd_upstream_fail(session->server, TRUE, "timeout");
  2327. if (session->item) {
  2328. rspamd_symcache_item_async_dec_check(session->task, session->item, M);
  2329. }
  2330. rspamd_session_remove_event(session->task->s, fuzzy_io_fin, session);
  2331. }
  2332. else {
  2333. /* Plan write event */
  2334. rspamd_ev_watcher_reschedule(session->event_loop,
  2335. &session->ev, EV_READ | EV_WRITE);
  2336. session->retransmits++;
  2337. }
  2338. }
  2339. /* Fuzzy check callback */
  2340. static void
  2341. fuzzy_check_io_callback(gint fd, short what, void *arg)
  2342. {
  2343. struct fuzzy_client_session *session = arg;
  2344. struct rspamd_task *task;
  2345. gint r;
  2346. enum {
  2347. return_error = 0,
  2348. return_want_more,
  2349. return_finished
  2350. } ret = return_error;
  2351. task = session->task;
  2352. if ((what & EV_READ) || session->state == 1) {
  2353. /* Try to read reply */
  2354. r = fuzzy_check_try_read(session);
  2355. switch (r) {
  2356. case 0:
  2357. if (what & EV_READ) {
  2358. ret = return_want_more;
  2359. }
  2360. else {
  2361. if (what & EV_WRITE) {
  2362. /* Retransmit attempt */
  2363. if (!fuzzy_cmd_vector_to_wire(fd, session->commands)) {
  2364. ret = return_error;
  2365. }
  2366. else {
  2367. session->state = 1;
  2368. ret = return_want_more;
  2369. }
  2370. }
  2371. else {
  2372. /* It is actually time out */
  2373. fuzzy_check_timer_callback(fd, what, arg);
  2374. return;
  2375. }
  2376. }
  2377. break;
  2378. case 1:
  2379. ret = return_finished;
  2380. break;
  2381. default:
  2382. ret = return_error;
  2383. break;
  2384. }
  2385. }
  2386. else if (what & EV_WRITE) {
  2387. if (!fuzzy_cmd_vector_to_wire(fd, session->commands)) {
  2388. ret = return_error;
  2389. }
  2390. else {
  2391. session->state = 1;
  2392. ret = return_want_more;
  2393. }
  2394. }
  2395. else {
  2396. fuzzy_check_timer_callback(fd, what, arg);
  2397. return;
  2398. }
  2399. if (ret == return_want_more) {
  2400. /* Processed write, switch to reading */
  2401. rspamd_ev_watcher_reschedule(session->event_loop,
  2402. &session->ev, EV_READ);
  2403. }
  2404. else if (ret == return_error) {
  2405. /* Error state */
  2406. msg_err_task("got error on IO with server %s(%s), on %s, %d, %s",
  2407. rspamd_upstream_name(session->server),
  2408. rspamd_inet_address_to_string_pretty(
  2409. rspamd_upstream_addr_cur(session->server)),
  2410. session->state == 1 ? "read" : "write",
  2411. errno,
  2412. strerror(errno));
  2413. rspamd_upstream_fail(session->server, TRUE, strerror(errno));
  2414. if (session->item) {
  2415. rspamd_symcache_item_async_dec_check(session->task, session->item, M);
  2416. }
  2417. rspamd_session_remove_event(session->task->s, fuzzy_io_fin, session);
  2418. }
  2419. else {
  2420. /* Read something from network */
  2421. if (!fuzzy_check_session_is_completed(session)) {
  2422. /* Need to read more */
  2423. rspamd_ev_watcher_reschedule(session->event_loop,
  2424. &session->ev, EV_READ);
  2425. }
  2426. }
  2427. }
  2428. static void
  2429. fuzzy_controller_lua_fin(void *ud)
  2430. {
  2431. struct fuzzy_learn_session *session = ud;
  2432. (*session->saved)--;
  2433. rspamd_ev_watcher_stop(session->event_loop, &session->ev);
  2434. close(session->fd);
  2435. }
  2436. /* Controller IO */
  2437. static void
  2438. fuzzy_controller_timer_callback(gint fd, short what, void *arg)
  2439. {
  2440. struct fuzzy_learn_session *session = arg;
  2441. struct rspamd_task *task;
  2442. task = session->task;
  2443. if (session->retransmits >= session->rule->retransmits) {
  2444. rspamd_upstream_fail(session->server, TRUE, "timeout");
  2445. msg_err_task_check("got IO timeout with server %s(%s), "
  2446. "after %d/%d retransmits",
  2447. rspamd_upstream_name(session->server),
  2448. rspamd_inet_address_to_string_pretty(
  2449. rspamd_upstream_addr_cur(session->server)),
  2450. session->retransmits,
  2451. session->rule->retransmits);
  2452. if (session->session) {
  2453. rspamd_session_remove_event(session->session, fuzzy_controller_lua_fin,
  2454. session);
  2455. }
  2456. else {
  2457. if (session->http_entry) {
  2458. rspamd_controller_send_error(session->http_entry,
  2459. 500, "IO timeout with fuzzy storage");
  2460. }
  2461. if (*session->saved > 0) {
  2462. (*session->saved)--;
  2463. if (*session->saved == 0) {
  2464. if (session->http_entry) {
  2465. rspamd_task_free(session->task);
  2466. }
  2467. session->task = NULL;
  2468. }
  2469. }
  2470. if (session->http_entry) {
  2471. rspamd_http_connection_unref(session->http_entry->conn);
  2472. }
  2473. rspamd_ev_watcher_stop(session->event_loop,
  2474. &session->ev);
  2475. close(session->fd);
  2476. }
  2477. }
  2478. else {
  2479. /* Plan write event */
  2480. rspamd_ev_watcher_reschedule(session->event_loop,
  2481. &session->ev, EV_READ | EV_WRITE);
  2482. session->retransmits++;
  2483. }
  2484. }
  2485. static void
  2486. fuzzy_controller_io_callback(gint fd, short what, void *arg)
  2487. {
  2488. struct fuzzy_learn_session *session = arg;
  2489. const struct rspamd_fuzzy_reply *rep;
  2490. struct fuzzy_mapping *map;
  2491. struct rspamd_task *task;
  2492. guchar buf[2048], *p;
  2493. struct fuzzy_cmd_io *io;
  2494. struct rspamd_fuzzy_cmd *cmd = NULL;
  2495. const gchar *symbol, *ftype;
  2496. gint r;
  2497. enum {
  2498. return_error = 0,
  2499. return_want_more,
  2500. return_finished
  2501. } ret = return_want_more;
  2502. guint i, nreplied;
  2503. const gchar *op = "process";
  2504. task = session->task;
  2505. if (what & EV_READ) {
  2506. if ((r = read(fd, buf, sizeof(buf) - 1)) == -1) {
  2507. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  2508. rspamd_ev_watcher_reschedule(session->event_loop,
  2509. &session->ev, EV_READ);
  2510. return;
  2511. }
  2512. msg_info_task("cannot process fuzzy hash for message: %s",
  2513. strerror(errno));
  2514. session->err.error_message = "read socket error";
  2515. session->err.error_code = errno;
  2516. ret = return_error;
  2517. }
  2518. else {
  2519. p = buf;
  2520. ret = return_want_more;
  2521. while ((rep = fuzzy_process_reply(&p, &r,
  2522. session->commands, session->rule, &cmd, &io)) != NULL) {
  2523. if ((map =
  2524. g_hash_table_lookup(session->rule->mappings,
  2525. GINT_TO_POINTER(rep->v1.flag))) == NULL) {
  2526. /* Default symbol and default weight */
  2527. symbol = session->rule->symbol;
  2528. }
  2529. else {
  2530. /* Get symbol and weight from map */
  2531. symbol = map->symbol;
  2532. }
  2533. ftype = "bin";
  2534. if (io) {
  2535. if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  2536. ftype = "img";
  2537. }
  2538. else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
  2539. ftype = "content";
  2540. }
  2541. else if (cmd->shingles_count > 0) {
  2542. ftype = "txt";
  2543. }
  2544. if (io->cmd.cmd == FUZZY_WRITE) {
  2545. op = "added";
  2546. }
  2547. else if (io->cmd.cmd == FUZZY_DEL) {
  2548. op = "deleted";
  2549. }
  2550. }
  2551. if (rep->v1.prob > 0.5) {
  2552. msg_info_task("%s fuzzy hash (%s) %*xs, list: %s:%d for "
  2553. "message <%s>",
  2554. op,
  2555. ftype,
  2556. (gint) sizeof(rep->digest), rep->digest,
  2557. symbol,
  2558. rep->v1.flag,
  2559. MESSAGE_FIELD_CHECK(session->task, message_id));
  2560. }
  2561. else {
  2562. if (rep->v1.value == 401) {
  2563. msg_info_task(
  2564. "fuzzy hash (%s) for message cannot be %s"
  2565. "<%s>, %*xs, "
  2566. "list %s:%d, skipped by server",
  2567. ftype,
  2568. op,
  2569. MESSAGE_FIELD_CHECK(session->task, message_id),
  2570. (gint) sizeof(rep->digest), rep->digest,
  2571. symbol,
  2572. rep->v1.flag);
  2573. session->err.error_message = "fuzzy hash is skipped";
  2574. session->err.error_code = rep->v1.value;
  2575. }
  2576. else {
  2577. msg_info_task(
  2578. "fuzzy hash (%s) for message cannot be %s"
  2579. "<%s>, %*xs, "
  2580. "list %s:%d, error: %d",
  2581. ftype,
  2582. op,
  2583. MESSAGE_FIELD_CHECK(session->task, message_id),
  2584. (gint) sizeof(rep->digest), rep->digest,
  2585. symbol,
  2586. rep->v1.flag,
  2587. rep->v1.value);
  2588. session->err.error_message = "process fuzzy error";
  2589. session->err.error_code = rep->v1.value;
  2590. }
  2591. ret = return_finished;
  2592. }
  2593. }
  2594. nreplied = 0;
  2595. for (i = 0; i < session->commands->len; i++) {
  2596. io = g_ptr_array_index(session->commands, i);
  2597. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  2598. nreplied++;
  2599. }
  2600. }
  2601. if (nreplied == session->commands->len) {
  2602. ret = return_finished;
  2603. }
  2604. }
  2605. }
  2606. else if (what & EV_WRITE) {
  2607. /* Send commands to storage */
  2608. if (!fuzzy_cmd_vector_to_wire(fd, session->commands)) {
  2609. session->err.error_message = "write socket error";
  2610. session->err.error_code = errno;
  2611. ret = return_error;
  2612. }
  2613. }
  2614. else {
  2615. fuzzy_controller_timer_callback(fd, what, arg);
  2616. return;
  2617. }
  2618. if (ret == return_want_more) {
  2619. rspamd_ev_watcher_reschedule(session->event_loop,
  2620. &session->ev, EV_READ);
  2621. return;
  2622. }
  2623. else if (ret == return_error) {
  2624. msg_err_task("got error in IO with server %s(%s), %d, %s",
  2625. rspamd_upstream_name(session->server),
  2626. rspamd_inet_address_to_string_pretty(
  2627. rspamd_upstream_addr_cur(session->server)),
  2628. errno, strerror(errno));
  2629. rspamd_upstream_fail(session->server, FALSE, strerror(errno));
  2630. }
  2631. /*
  2632. * XXX: actually, we check merely a single reply, which is not correct...
  2633. * XXX: when we send a command, we do not check if *all* commands have been
  2634. * written
  2635. * XXX: please, please, change this code some day
  2636. */
  2637. if (session->session == NULL) {
  2638. (*session->saved)--;
  2639. if (session->http_entry) {
  2640. rspamd_http_connection_unref(session->http_entry->conn);
  2641. }
  2642. rspamd_ev_watcher_stop(session->event_loop, &session->ev);
  2643. close(session->fd);
  2644. if (*session->saved == 0) {
  2645. goto cleanup;
  2646. }
  2647. }
  2648. else {
  2649. /* Lua handler */
  2650. rspamd_session_remove_event(session->session, fuzzy_controller_lua_fin, session);
  2651. }
  2652. return;
  2653. cleanup:
  2654. /*
  2655. * When we send learn commands to fuzzy storages, this code is executed
  2656. * *once* when we have queried all storages. We also don't know which
  2657. * storage has been failed.
  2658. *
  2659. * Therefore, we cleanup sessions earlier and actually this code is wrong.
  2660. */
  2661. if (session->err.error_code != 0) {
  2662. if (session->http_entry) {
  2663. rspamd_controller_send_error(session->http_entry,
  2664. session->err.error_code, session->err.error_message);
  2665. }
  2666. }
  2667. else {
  2668. rspamd_upstream_ok(session->server);
  2669. if (session->http_entry) {
  2670. ucl_object_t *reply, *hashes;
  2671. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  2672. reply = ucl_object_typed_new(UCL_OBJECT);
  2673. ucl_object_insert_key(reply, ucl_object_frombool(true),
  2674. "success", 0, false);
  2675. hashes = ucl_object_typed_new(UCL_ARRAY);
  2676. for (i = 0; i < session->commands->len; i++) {
  2677. io = g_ptr_array_index(session->commands, i);
  2678. rspamd_snprintf(hexbuf, sizeof(hexbuf), "%*xs",
  2679. (gint) sizeof(io->cmd.digest), io->cmd.digest);
  2680. ucl_array_append(hashes, ucl_object_fromstring(hexbuf));
  2681. }
  2682. ucl_object_insert_key(reply, hashes, "hashes", 0, false);
  2683. rspamd_controller_send_ucl(session->http_entry, reply);
  2684. ucl_object_unref(reply);
  2685. }
  2686. }
  2687. if (session->task != NULL) {
  2688. if (session->http_entry) {
  2689. rspamd_task_free(session->task);
  2690. }
  2691. session->task = NULL;
  2692. }
  2693. }
  2694. static GPtrArray *
  2695. fuzzy_generate_commands(struct rspamd_task *task, struct fuzzy_rule *rule,
  2696. gint c, gint flag, guint32 value, guint flags)
  2697. {
  2698. struct rspamd_mime_text_part *part;
  2699. struct rspamd_mime_part *mime_part;
  2700. struct rspamd_image *image;
  2701. struct fuzzy_cmd_io *io, *cur;
  2702. guint i, j;
  2703. GPtrArray *res = NULL;
  2704. gboolean check_part, fuzzy_check;
  2705. if (c == FUZZY_STAT) {
  2706. res = g_ptr_array_sized_new(1);
  2707. io = fuzzy_cmd_stat(rule, c, flag, value, task->task_pool);
  2708. if (io) {
  2709. g_ptr_array_add(res, io);
  2710. }
  2711. goto end;
  2712. }
  2713. else if (c == FUZZY_PING) {
  2714. res = g_ptr_array_sized_new(1);
  2715. io = fuzzy_cmd_ping(rule, task->task_pool);
  2716. if (io) {
  2717. g_ptr_array_add(res, io);
  2718. }
  2719. goto end;
  2720. }
  2721. if (task->message == NULL) {
  2722. goto end;
  2723. }
  2724. res = g_ptr_array_sized_new(MESSAGE_FIELD(task, parts)->len + 1);
  2725. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, mime_part)
  2726. {
  2727. check_part = FALSE;
  2728. fuzzy_check = FALSE;
  2729. if (fuzzy_rule_check_mimepart(task, rule, mime_part, &check_part,
  2730. &fuzzy_check)) {
  2731. io = NULL;
  2732. if (check_part) {
  2733. if (mime_part->part_type == RSPAMD_MIME_PART_TEXT &&
  2734. !(flags & FUZZY_CHECK_FLAG_NOTEXT)) {
  2735. part = mime_part->specific.txt;
  2736. io = fuzzy_cmd_from_text_part(task, rule,
  2737. c,
  2738. flag,
  2739. value,
  2740. !fuzzy_check,
  2741. part,
  2742. mime_part);
  2743. }
  2744. else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
  2745. !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
  2746. image = mime_part->specific.img;
  2747. io = fuzzy_cmd_from_data_part(rule, c, flag, value,
  2748. task,
  2749. image->parent->digest,
  2750. mime_part);
  2751. io->flags |= FUZZY_CMD_FLAG_IMAGE;
  2752. }
  2753. else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
  2754. const struct rspamd_lua_specific_part *lua_spec;
  2755. lua_spec = &mime_part->specific.lua_specific;
  2756. if (lua_spec->type == RSPAMD_LUA_PART_TABLE) {
  2757. lua_State *L = (lua_State *) task->cfg->lua_state;
  2758. gint old_top;
  2759. old_top = lua_gettop(L);
  2760. /* Push table */
  2761. lua_rawgeti(L, LUA_REGISTRYINDEX, lua_spec->cbref);
  2762. lua_pushstring(L, "fuzzy_hashes");
  2763. lua_gettable(L, -2);
  2764. if (lua_type(L, -1) == LUA_TTABLE) {
  2765. gint tbl_pos = lua_gettop(L);
  2766. for (lua_pushnil(L); lua_next(L, tbl_pos);
  2767. lua_pop(L, 1)) {
  2768. const gchar *h = NULL;
  2769. gsize hlen = 0;
  2770. if (lua_isstring(L, -1)) {
  2771. h = lua_tolstring(L, -1, &hlen);
  2772. }
  2773. else if (lua_type(L, -1) == LUA_TUSERDATA) {
  2774. struct rspamd_lua_text *t;
  2775. t = lua_check_text(L, -1);
  2776. if (t) {
  2777. h = t->start;
  2778. hlen = t->len;
  2779. }
  2780. }
  2781. if (hlen == rspamd_cryptobox_HASHBYTES) {
  2782. io = fuzzy_cmd_from_data_part(rule, c,
  2783. flag, value,
  2784. task,
  2785. (guchar *) h,
  2786. mime_part);
  2787. if (io) {
  2788. io->flags |= FUZZY_CMD_FLAG_CONTENT;
  2789. g_ptr_array_add(res, io);
  2790. }
  2791. }
  2792. }
  2793. }
  2794. lua_settop(L, old_top);
  2795. /*
  2796. * Add part itself as well
  2797. */
  2798. io = fuzzy_cmd_from_data_part(rule, c,
  2799. flag, value,
  2800. task,
  2801. mime_part->digest,
  2802. mime_part);
  2803. }
  2804. }
  2805. else {
  2806. io = fuzzy_cmd_from_data_part(rule, c, flag, value,
  2807. task,
  2808. mime_part->digest, mime_part);
  2809. }
  2810. if (io) {
  2811. gboolean skip_existing = FALSE;
  2812. PTR_ARRAY_FOREACH(res, j, cur)
  2813. {
  2814. if (memcmp(cur->cmd.digest, io->cmd.digest,
  2815. sizeof(io->cmd.digest)) == 0) {
  2816. skip_existing = TRUE;
  2817. break;
  2818. }
  2819. }
  2820. if (!skip_existing) {
  2821. g_ptr_array_add(res, io);
  2822. }
  2823. }
  2824. }
  2825. }
  2826. }
  2827. end:
  2828. if (res && res->len == 0) {
  2829. g_ptr_array_free(res, TRUE);
  2830. return NULL;
  2831. }
  2832. return res;
  2833. }
  2834. static inline void
  2835. register_fuzzy_client_call(struct rspamd_task *task,
  2836. struct fuzzy_rule *rule,
  2837. GPtrArray *commands)
  2838. {
  2839. struct fuzzy_client_session *session;
  2840. struct upstream *selected;
  2841. rspamd_inet_addr_t *addr;
  2842. gint sock;
  2843. if (!rspamd_session_blocked(task->s)) {
  2844. /* Get upstream */
  2845. selected = rspamd_upstream_get(rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN,
  2846. NULL, 0);
  2847. if (selected) {
  2848. addr = rspamd_upstream_addr_next(selected);
  2849. if ((sock = rspamd_inet_address_connect(addr, SOCK_DGRAM, TRUE)) == -1) {
  2850. msg_warn_task("cannot connect to %s(%s), %d, %s",
  2851. rspamd_upstream_name(selected),
  2852. rspamd_inet_address_to_string_pretty(addr),
  2853. errno,
  2854. strerror(errno));
  2855. rspamd_upstream_fail(selected, TRUE, strerror(errno));
  2856. g_ptr_array_free(commands, TRUE);
  2857. }
  2858. else {
  2859. /* Create session for a socket */
  2860. session =
  2861. rspamd_mempool_alloc0(task->task_pool,
  2862. sizeof(struct fuzzy_client_session));
  2863. session->state = 0;
  2864. session->commands = commands;
  2865. session->task = task;
  2866. session->fd = sock;
  2867. session->server = selected;
  2868. session->rule = rule;
  2869. session->results = g_ptr_array_sized_new(32);
  2870. session->event_loop = task->event_loop;
  2871. rspamd_ev_watcher_init(&session->ev,
  2872. sock,
  2873. EV_WRITE,
  2874. fuzzy_check_io_callback,
  2875. session);
  2876. rspamd_ev_watcher_start(session->event_loop, &session->ev,
  2877. rule->io_timeout);
  2878. rspamd_session_add_event(task->s, fuzzy_io_fin, session, M);
  2879. session->item = rspamd_symcache_get_cur_item(task);
  2880. if (session->item) {
  2881. rspamd_symcache_item_async_inc(task, session->item, M);
  2882. }
  2883. }
  2884. }
  2885. }
  2886. }
  2887. /* This callback is called when we check message in fuzzy hashes storage */
  2888. static void
  2889. fuzzy_symbol_callback(struct rspamd_task *task,
  2890. struct rspamd_symcache_dynamic_item *item,
  2891. void *unused)
  2892. {
  2893. struct fuzzy_rule *rule;
  2894. guint i;
  2895. GPtrArray *commands;
  2896. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  2897. if (!fuzzy_module_ctx->enabled) {
  2898. rspamd_symcache_finalize_item(task, item);
  2899. return;
  2900. }
  2901. /* Check whitelist */
  2902. if (fuzzy_module_ctx->whitelist) {
  2903. if (rspamd_match_radix_map_addr(fuzzy_module_ctx->whitelist,
  2904. task->from_addr) != NULL) {
  2905. msg_info_task("<%s>, address %s is whitelisted, skip fuzzy check",
  2906. MESSAGE_FIELD(task, message_id),
  2907. rspamd_inet_address_to_string(task->from_addr));
  2908. rspamd_symcache_finalize_item(task, item);
  2909. return;
  2910. }
  2911. }
  2912. rspamd_symcache_item_async_inc(task, item, M);
  2913. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  2914. {
  2915. commands = fuzzy_generate_commands(task, rule, FUZZY_CHECK, 0, 0, 0);
  2916. if (commands != NULL) {
  2917. register_fuzzy_client_call(task, rule, commands);
  2918. }
  2919. }
  2920. rspamd_symcache_item_async_dec_check(task, item, M);
  2921. }
  2922. void fuzzy_stat_command(struct rspamd_task *task)
  2923. {
  2924. struct fuzzy_rule *rule;
  2925. guint i;
  2926. GPtrArray *commands;
  2927. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  2928. if (!fuzzy_module_ctx->enabled) {
  2929. return;
  2930. }
  2931. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  2932. {
  2933. commands = fuzzy_generate_commands(task, rule, FUZZY_STAT, 0, 0, 0);
  2934. if (commands != NULL) {
  2935. register_fuzzy_client_call(task, rule, commands);
  2936. }
  2937. }
  2938. }
  2939. static inline gint
  2940. register_fuzzy_controller_call(struct rspamd_http_connection_entry *entry,
  2941. struct fuzzy_rule *rule,
  2942. struct rspamd_task *task,
  2943. GPtrArray *commands,
  2944. gint *saved)
  2945. {
  2946. struct fuzzy_learn_session *s;
  2947. struct upstream *selected;
  2948. rspamd_inet_addr_t *addr;
  2949. struct rspamd_controller_session *session = entry->ud;
  2950. gint sock;
  2951. gint ret = -1;
  2952. /* Get upstream */
  2953. while ((selected = rspamd_upstream_get_forced(rule->servers,
  2954. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  2955. /* Create UDP socket */
  2956. addr = rspamd_upstream_addr_next(selected);
  2957. if ((sock = rspamd_inet_address_connect(addr,
  2958. SOCK_DGRAM, TRUE)) == -1) {
  2959. msg_warn_task("cannot connect to fuzzy storage %s (%s rule): %s",
  2960. rspamd_inet_address_to_string_pretty(addr),
  2961. rule->name,
  2962. strerror(errno));
  2963. rspamd_upstream_fail(selected, TRUE, strerror(errno));
  2964. }
  2965. else {
  2966. s =
  2967. rspamd_mempool_alloc0(session->pool,
  2968. sizeof(struct fuzzy_learn_session));
  2969. s->task = task;
  2970. s->commands = commands;
  2971. s->http_entry = entry;
  2972. s->server = selected;
  2973. s->saved = saved;
  2974. s->fd = sock;
  2975. s->rule = rule;
  2976. s->event_loop = task->event_loop;
  2977. /* We ref connection to avoid freeing before we process fuzzy rule */
  2978. rspamd_http_connection_ref(entry->conn);
  2979. rspamd_ev_watcher_init(&s->ev,
  2980. sock,
  2981. EV_WRITE,
  2982. fuzzy_controller_io_callback,
  2983. s);
  2984. rspamd_ev_watcher_start(s->event_loop, &s->ev, rule->io_timeout);
  2985. (*saved)++;
  2986. ret = 1;
  2987. }
  2988. }
  2989. return ret;
  2990. }
  2991. static void
  2992. fuzzy_process_handler(struct rspamd_http_connection_entry *conn_ent,
  2993. struct rspamd_http_message *msg, gint cmd, gint value, gint flag,
  2994. struct fuzzy_ctx *ctx, gboolean is_hash, guint flags)
  2995. {
  2996. struct fuzzy_rule *rule;
  2997. struct rspamd_controller_session *session = conn_ent->ud;
  2998. struct rspamd_task *task, **ptask;
  2999. gboolean processed = FALSE, skip = FALSE;
  3000. gint res = 0;
  3001. guint i;
  3002. GPtrArray *commands;
  3003. lua_State *L;
  3004. gint r, *saved, rules = 0, err_idx;
  3005. struct fuzzy_ctx *fuzzy_module_ctx;
  3006. /* Prepare task */
  3007. task = rspamd_task_new(session->wrk, session->cfg, NULL,
  3008. session->lang_det, conn_ent->rt->event_loop, FALSE);
  3009. task->cfg = ctx->cfg;
  3010. saved = rspamd_mempool_alloc0(session->pool, sizeof(gint));
  3011. fuzzy_module_ctx = fuzzy_get_context(ctx->cfg);
  3012. if (!is_hash) {
  3013. /* Allocate message from string */
  3014. /* XXX: what about encrypted messages ? */
  3015. task->msg.begin = msg->body_buf.begin;
  3016. task->msg.len = msg->body_buf.len;
  3017. r = rspamd_message_parse(task);
  3018. if (r == -1) {
  3019. msg_warn_task("<%s>: cannot process message for fuzzy",
  3020. MESSAGE_FIELD(task, message_id));
  3021. rspamd_task_free(task);
  3022. rspamd_controller_send_error(conn_ent, 400,
  3023. "Message processing error");
  3024. return;
  3025. }
  3026. rspamd_message_process(task);
  3027. }
  3028. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3029. {
  3030. if (rule->read_only) {
  3031. continue;
  3032. }
  3033. /* Check for flag */
  3034. if (g_hash_table_lookup(rule->mappings,
  3035. GINT_TO_POINTER(flag)) == NULL) {
  3036. msg_info_task("skip rule %s as it has no flag %d defined"
  3037. " false",
  3038. rule->name, flag);
  3039. continue;
  3040. }
  3041. /* Check learn condition */
  3042. if (rule->learn_condition_cb != -1) {
  3043. skip = FALSE;
  3044. L = session->cfg->lua_state;
  3045. lua_pushcfunction(L, &rspamd_lua_traceback);
  3046. err_idx = lua_gettop(L);
  3047. lua_rawgeti(L, LUA_REGISTRYINDEX, rule->learn_condition_cb);
  3048. ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
  3049. *ptask = task;
  3050. rspamd_lua_setclass(L, rspamd_task_classname, -1);
  3051. if (lua_pcall(L, 1, LUA_MULTRET, err_idx) != 0) {
  3052. msg_err_task("call to fuzzy learn condition failed: %s",
  3053. lua_tostring(L, -1));
  3054. }
  3055. else {
  3056. if (lua_gettop(L) > err_idx + 1) {
  3057. /* 2 return values */
  3058. skip = !(lua_toboolean(L, err_idx + 1));
  3059. if (lua_isnumber(L, err_idx + 2)) {
  3060. msg_info_task("learn condition changed flag from %d to "
  3061. "%d",
  3062. flag,
  3063. (gint) lua_tonumber(L, err_idx + 2));
  3064. flag = lua_tonumber(L, err_idx + 2);
  3065. }
  3066. }
  3067. else {
  3068. if (lua_isboolean(L, err_idx + 1)) {
  3069. skip = !(lua_toboolean(L, err_idx + 1));
  3070. }
  3071. else {
  3072. msg_warn_task("set skip for rule %s as its condition "
  3073. "callback returned"
  3074. " a valid boolean",
  3075. rule->name);
  3076. skip = TRUE;
  3077. }
  3078. }
  3079. }
  3080. /* Result + error function */
  3081. lua_settop(L, err_idx - 1);
  3082. if (skip) {
  3083. msg_info_task("skip rule %s by condition callback",
  3084. rule->name);
  3085. continue;
  3086. }
  3087. }
  3088. rules++;
  3089. res = 0;
  3090. if (is_hash) {
  3091. GPtrArray *args;
  3092. const rspamd_ftok_t *arg;
  3093. guint j;
  3094. args = rspamd_http_message_find_header_multiple(msg, "Hash");
  3095. if (args) {
  3096. struct fuzzy_cmd_io *io;
  3097. commands = g_ptr_array_sized_new(args->len);
  3098. for (j = 0; j < args->len; j++) {
  3099. arg = g_ptr_array_index(args, j);
  3100. io = fuzzy_cmd_hash(rule, cmd, arg, flag, value,
  3101. task->task_pool);
  3102. if (io) {
  3103. g_ptr_array_add(commands, io);
  3104. }
  3105. }
  3106. res = register_fuzzy_controller_call(conn_ent,
  3107. rule,
  3108. task,
  3109. commands,
  3110. saved);
  3111. rspamd_mempool_add_destructor(task->task_pool,
  3112. rspamd_ptr_array_free_hard, commands);
  3113. g_ptr_array_free(args, TRUE);
  3114. }
  3115. else {
  3116. rspamd_controller_send_error(conn_ent, 400,
  3117. "No hash defined");
  3118. rspamd_task_free(task);
  3119. return;
  3120. }
  3121. }
  3122. else {
  3123. commands = fuzzy_generate_commands(task, rule, cmd, flag, value,
  3124. flags);
  3125. if (commands != NULL) {
  3126. res = register_fuzzy_controller_call(conn_ent,
  3127. rule,
  3128. task,
  3129. commands,
  3130. saved);
  3131. rspamd_mempool_add_destructor(task->task_pool,
  3132. rspamd_ptr_array_free_hard, commands);
  3133. }
  3134. }
  3135. if (res > 0) {
  3136. processed = TRUE;
  3137. }
  3138. }
  3139. if (res == -1) {
  3140. if (!processed) {
  3141. msg_warn_task("cannot send fuzzy request: %s",
  3142. strerror(errno));
  3143. rspamd_controller_send_error(conn_ent, 400, "Message sending error");
  3144. rspamd_task_free(task);
  3145. return;
  3146. }
  3147. else {
  3148. /* Some rules failed and some rules are OK */
  3149. msg_warn_task("some rules are not processed, but we still sent this request");
  3150. }
  3151. }
  3152. else if (!processed) {
  3153. if (rules) {
  3154. msg_warn_task("no content to generate fuzzy");
  3155. rspamd_controller_send_error(conn_ent, 404,
  3156. "No content to generate fuzzy for flag %d", flag);
  3157. }
  3158. else {
  3159. if (skip) {
  3160. rspamd_controller_send_error(conn_ent, 403,
  3161. "Message is conditionally skipped for flag %d", flag);
  3162. }
  3163. else {
  3164. msg_warn_task("no fuzzy rules found for flag %d", flag);
  3165. rspamd_controller_send_error(conn_ent, 404,
  3166. "No fuzzy rules matched for flag %d", flag);
  3167. }
  3168. }
  3169. rspamd_task_free(task);
  3170. }
  3171. }
  3172. static int
  3173. fuzzy_controller_handler(struct rspamd_http_connection_entry *conn_ent,
  3174. struct rspamd_http_message *msg, struct module_ctx *ctx, gint cmd,
  3175. gboolean is_hash)
  3176. {
  3177. const rspamd_ftok_t *arg;
  3178. glong value = 1, flag = 0, send_flags = 0;
  3179. struct fuzzy_ctx *fuzzy_module_ctx = (struct fuzzy_ctx *) ctx;
  3180. if (!fuzzy_module_ctx->enabled) {
  3181. msg_err("fuzzy_check module is not enabled");
  3182. rspamd_controller_send_error(conn_ent, 500, "Module disabled");
  3183. return 0;
  3184. }
  3185. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  3186. msg_err("fuzzy_check module has no rules defined");
  3187. rspamd_controller_send_error(conn_ent, 500, "Module has no rules");
  3188. return 0;
  3189. }
  3190. /* Get size */
  3191. arg = rspamd_http_message_find_header(msg, "Weight");
  3192. if (arg) {
  3193. errno = 0;
  3194. if (!rspamd_strtol(arg->begin, arg->len, &value)) {
  3195. msg_info("error converting numeric argument %T", arg);
  3196. }
  3197. }
  3198. arg = rspamd_http_message_find_header(msg, "Flag");
  3199. if (arg) {
  3200. errno = 0;
  3201. if (!rspamd_strtol(arg->begin, arg->len, &flag)) {
  3202. msg_info("error converting numeric argument %T", arg);
  3203. flag = 0;
  3204. }
  3205. }
  3206. else {
  3207. flag = 0;
  3208. arg = rspamd_http_message_find_header(msg, "Symbol");
  3209. /* Search flag by symbol */
  3210. if (arg) {
  3211. struct fuzzy_rule *rule;
  3212. guint i;
  3213. GHashTableIter it;
  3214. gpointer k, v;
  3215. struct fuzzy_mapping *map;
  3216. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3217. {
  3218. if (flag != 0) {
  3219. break;
  3220. }
  3221. g_hash_table_iter_init(&it, rule->mappings);
  3222. while (g_hash_table_iter_next(&it, &k, &v)) {
  3223. map = v;
  3224. if (strlen(map->symbol) == arg->len &&
  3225. rspamd_lc_cmp(map->symbol, arg->begin, arg->len) == 0) {
  3226. flag = map->fuzzy_flag;
  3227. break;
  3228. }
  3229. }
  3230. }
  3231. }
  3232. }
  3233. if (flag == 0) {
  3234. msg_err("no flag defined to learn fuzzy");
  3235. rspamd_controller_send_error(conn_ent, 404, "Unknown or missing flag");
  3236. return 0;
  3237. }
  3238. arg = rspamd_http_message_find_header(msg, "Skip-Images");
  3239. if (arg) {
  3240. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3241. }
  3242. arg = rspamd_http_message_find_header(msg, "Skip-Attachments");
  3243. if (arg) {
  3244. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3245. }
  3246. arg = rspamd_http_message_find_header(msg, "Skip-Text");
  3247. if (arg) {
  3248. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3249. }
  3250. fuzzy_process_handler(conn_ent, msg, cmd, value, flag,
  3251. (struct fuzzy_ctx *) ctx, is_hash, send_flags);
  3252. return 0;
  3253. }
  3254. static inline gint
  3255. fuzzy_check_send_lua_learn(struct fuzzy_rule *rule,
  3256. struct rspamd_task *task,
  3257. GPtrArray *commands,
  3258. gint *saved)
  3259. {
  3260. struct fuzzy_learn_session *s;
  3261. struct upstream *selected;
  3262. rspamd_inet_addr_t *addr;
  3263. gint sock;
  3264. gint ret = -1;
  3265. /* Get upstream */
  3266. if (!rspamd_session_blocked(task->s)) {
  3267. while ((selected = rspamd_upstream_get(rule->servers,
  3268. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  3269. /* Create UDP socket */
  3270. addr = rspamd_upstream_addr_next(selected);
  3271. if ((sock = rspamd_inet_address_connect(addr,
  3272. SOCK_DGRAM, TRUE)) == -1) {
  3273. rspamd_upstream_fail(selected, TRUE, strerror(errno));
  3274. }
  3275. else {
  3276. s =
  3277. rspamd_mempool_alloc0(task->task_pool,
  3278. sizeof(struct fuzzy_learn_session));
  3279. s->task = task;
  3280. s->commands = commands;
  3281. s->http_entry = NULL;
  3282. s->server = selected;
  3283. s->saved = saved;
  3284. s->fd = sock;
  3285. s->rule = rule;
  3286. s->session = task->s;
  3287. s->event_loop = task->event_loop;
  3288. rspamd_ev_watcher_init(&s->ev,
  3289. sock,
  3290. EV_WRITE,
  3291. fuzzy_controller_io_callback,
  3292. s);
  3293. rspamd_ev_watcher_start(s->event_loop, &s->ev,
  3294. rule->io_timeout);
  3295. rspamd_session_add_event(task->s,
  3296. fuzzy_controller_lua_fin,
  3297. s,
  3298. M);
  3299. (*saved)++;
  3300. ret = 1;
  3301. }
  3302. }
  3303. }
  3304. return ret;
  3305. }
  3306. static gboolean
  3307. fuzzy_check_lua_process_learn(struct rspamd_task *task,
  3308. gint cmd, gint value, gint flag, guint send_flags)
  3309. {
  3310. struct fuzzy_rule *rule;
  3311. gboolean processed = FALSE, res = TRUE;
  3312. guint i;
  3313. GPtrArray *commands;
  3314. gint *saved, rules = 0;
  3315. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  3316. saved = rspamd_mempool_alloc0(task->task_pool, sizeof(gint));
  3317. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3318. {
  3319. if (!res) {
  3320. break;
  3321. }
  3322. if (rule->read_only) {
  3323. continue;
  3324. }
  3325. /* Check for flag */
  3326. if (g_hash_table_lookup(rule->mappings,
  3327. GINT_TO_POINTER(flag)) == NULL) {
  3328. msg_info_task("skip rule %s as it has no flag %d defined"
  3329. " false",
  3330. rule->name, flag);
  3331. continue;
  3332. }
  3333. rules++;
  3334. res = 0;
  3335. commands = fuzzy_generate_commands(task, rule, cmd, flag,
  3336. value, send_flags);
  3337. if (commands != NULL) {
  3338. res = fuzzy_check_send_lua_learn(rule, task, commands,
  3339. saved);
  3340. rspamd_mempool_add_destructor(task->task_pool,
  3341. rspamd_ptr_array_free_hard, commands);
  3342. }
  3343. if (res) {
  3344. processed = TRUE;
  3345. }
  3346. }
  3347. if (res == -1) {
  3348. msg_warn_task("cannot send fuzzy request: %s",
  3349. strerror(errno));
  3350. }
  3351. else if (!processed) {
  3352. if (rules) {
  3353. msg_warn_task("no content to generate fuzzy");
  3354. return FALSE;
  3355. }
  3356. else {
  3357. msg_warn_task("no fuzzy rules found for flag %d", flag);
  3358. return FALSE;
  3359. }
  3360. }
  3361. return TRUE;
  3362. }
  3363. static gint
  3364. fuzzy_lua_learn_handler(lua_State *L)
  3365. {
  3366. struct rspamd_task *task = lua_check_task(L, 1);
  3367. if (task == NULL) {
  3368. return luaL_error(L, "invalid arguments");
  3369. }
  3370. guint flag = 0, weight = 1, send_flags = 0;
  3371. const gchar *symbol;
  3372. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  3373. if (lua_type(L, 2) == LUA_TNUMBER) {
  3374. flag = lua_tointeger(L, 2);
  3375. }
  3376. else if (lua_type(L, 2) == LUA_TSTRING) {
  3377. struct fuzzy_rule *rule;
  3378. guint i;
  3379. GHashTableIter it;
  3380. gpointer k, v;
  3381. struct fuzzy_mapping *map;
  3382. symbol = lua_tostring(L, 2);
  3383. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3384. {
  3385. if (flag != 0) {
  3386. break;
  3387. }
  3388. g_hash_table_iter_init(&it, rule->mappings);
  3389. while (g_hash_table_iter_next(&it, &k, &v)) {
  3390. map = v;
  3391. if (g_ascii_strcasecmp(symbol, map->symbol) == 0) {
  3392. flag = map->fuzzy_flag;
  3393. break;
  3394. }
  3395. }
  3396. }
  3397. }
  3398. if (flag == 0) {
  3399. return luaL_error(L, "bad flag");
  3400. }
  3401. if (lua_type(L, 3) == LUA_TNUMBER) {
  3402. weight = lua_tonumber(L, 3);
  3403. }
  3404. if (lua_type(L, 4) == LUA_TTABLE) {
  3405. const gchar *sf;
  3406. for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
  3407. sf = lua_tostring(L, -1);
  3408. if (sf) {
  3409. if (g_ascii_strcasecmp(sf, "noimages") == 0) {
  3410. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3411. }
  3412. else if (g_ascii_strcasecmp(sf, "noattachments") == 0) {
  3413. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3414. }
  3415. else if (g_ascii_strcasecmp(sf, "notext") == 0) {
  3416. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3417. }
  3418. }
  3419. }
  3420. }
  3421. lua_pushboolean(L,
  3422. fuzzy_check_lua_process_learn(task, FUZZY_WRITE, weight, flag,
  3423. send_flags));
  3424. return 1;
  3425. }
  3426. static gint
  3427. fuzzy_lua_unlearn_handler(lua_State *L)
  3428. {
  3429. struct rspamd_task *task = lua_check_task(L, 1);
  3430. if (task == NULL) {
  3431. return luaL_error(L, "invalid arguments");
  3432. }
  3433. guint flag = 0, weight = 1.0, send_flags = 0;
  3434. const gchar *symbol;
  3435. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  3436. if (lua_type(L, 2) == LUA_TNUMBER) {
  3437. flag = lua_tointeger(L, 2);
  3438. }
  3439. else if (lua_type(L, 2) == LUA_TSTRING) {
  3440. struct fuzzy_rule *rule;
  3441. guint i;
  3442. GHashTableIter it;
  3443. gpointer k, v;
  3444. struct fuzzy_mapping *map;
  3445. symbol = lua_tostring(L, 2);
  3446. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3447. {
  3448. if (flag != 0) {
  3449. break;
  3450. }
  3451. g_hash_table_iter_init(&it, rule->mappings);
  3452. while (g_hash_table_iter_next(&it, &k, &v)) {
  3453. map = v;
  3454. if (g_ascii_strcasecmp(symbol, map->symbol) == 0) {
  3455. flag = map->fuzzy_flag;
  3456. break;
  3457. }
  3458. }
  3459. }
  3460. }
  3461. if (flag == 0) {
  3462. return luaL_error(L, "bad flag");
  3463. }
  3464. if (lua_type(L, 3) == LUA_TNUMBER) {
  3465. weight = lua_tonumber(L, 3);
  3466. }
  3467. if (lua_type(L, 4) == LUA_TTABLE) {
  3468. const gchar *sf;
  3469. for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
  3470. sf = lua_tostring(L, -1);
  3471. if (sf) {
  3472. if (g_ascii_strcasecmp(sf, "noimages") == 0) {
  3473. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3474. }
  3475. else if (g_ascii_strcasecmp(sf, "noattachments") == 0) {
  3476. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3477. }
  3478. else if (g_ascii_strcasecmp(sf, "notext") == 0) {
  3479. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3480. }
  3481. }
  3482. }
  3483. }
  3484. lua_pushboolean(L,
  3485. fuzzy_check_lua_process_learn(task, FUZZY_DEL, weight, flag,
  3486. send_flags));
  3487. return 1;
  3488. }
  3489. static gint
  3490. fuzzy_lua_gen_hashes_handler(lua_State *L)
  3491. {
  3492. struct rspamd_task *task = lua_check_task(L, 1);
  3493. if (task == NULL) {
  3494. return luaL_error(L, "invalid arguments");
  3495. }
  3496. guint flag = 0, weight = 1, send_flags = 0;
  3497. const gchar *symbol;
  3498. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  3499. struct fuzzy_rule *rule;
  3500. GPtrArray *commands;
  3501. gint cmd = FUZZY_WRITE;
  3502. gint i;
  3503. if (lua_type(L, 2) == LUA_TNUMBER) {
  3504. flag = lua_tonumber(L, 2);
  3505. }
  3506. else if (lua_type(L, 2) == LUA_TSTRING) {
  3507. struct fuzzy_rule *rule;
  3508. GHashTableIter it;
  3509. gpointer k, v;
  3510. struct fuzzy_mapping *map;
  3511. symbol = lua_tostring(L, 2);
  3512. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3513. {
  3514. if (flag != 0) {
  3515. break;
  3516. }
  3517. g_hash_table_iter_init(&it, rule->mappings);
  3518. while (g_hash_table_iter_next(&it, &k, &v)) {
  3519. map = v;
  3520. if (g_ascii_strcasecmp(symbol, map->symbol) == 0) {
  3521. flag = map->fuzzy_flag;
  3522. break;
  3523. }
  3524. }
  3525. }
  3526. }
  3527. if (flag == 0) {
  3528. return luaL_error(L, "bad flag");
  3529. }
  3530. if (lua_type(L, 3) == LUA_TNUMBER) {
  3531. weight = lua_tonumber(L, 3);
  3532. }
  3533. /* Flags */
  3534. if (lua_type(L, 4) == LUA_TTABLE) {
  3535. const gchar *sf;
  3536. for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) {
  3537. sf = lua_tostring(L, -1);
  3538. if (sf) {
  3539. if (g_ascii_strcasecmp(sf, "noimages") == 0) {
  3540. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3541. }
  3542. else if (g_ascii_strcasecmp(sf, "noattachments") == 0) {
  3543. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3544. }
  3545. else if (g_ascii_strcasecmp(sf, "notext") == 0) {
  3546. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3547. }
  3548. }
  3549. }
  3550. }
  3551. /* Type */
  3552. if (lua_type(L, 5) == LUA_TSTRING) {
  3553. const gchar *cmd_name = lua_tostring(L, 5);
  3554. if (strcmp(cmd_name, "add") == 0 || strcmp(cmd_name, "write") == 0) {
  3555. cmd = FUZZY_WRITE;
  3556. }
  3557. else if (strcmp(cmd_name, "delete") == 0 || strcmp(cmd_name, "remove") == 0) {
  3558. cmd = FUZZY_DEL;
  3559. }
  3560. else {
  3561. return luaL_error(L, "invalid command: %s", cmd_name);
  3562. }
  3563. }
  3564. lua_createtable(L, 0, fuzzy_module_ctx->fuzzy_rules->len);
  3565. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3566. {
  3567. if (rule->read_only) {
  3568. continue;
  3569. }
  3570. /* Check for flag */
  3571. if (g_hash_table_lookup(rule->mappings,
  3572. GINT_TO_POINTER(flag)) == NULL) {
  3573. msg_info_task("skip rule %s as it has no flag %d defined"
  3574. " false",
  3575. rule->name, flag);
  3576. continue;
  3577. }
  3578. commands = fuzzy_generate_commands(task, rule, cmd, flag,
  3579. weight, send_flags);
  3580. if (commands != NULL) {
  3581. struct fuzzy_cmd_io *io;
  3582. gint j;
  3583. lua_pushstring(L, rule->name);
  3584. lua_createtable(L, commands->len, 0);
  3585. PTR_ARRAY_FOREACH(commands, j, io)
  3586. {
  3587. lua_pushlstring(L, io->io.iov_base, io->io.iov_len);
  3588. lua_rawseti(L, -2, j + 1);
  3589. }
  3590. lua_settable(L, -3); /* ret[rule->name] = {raw_fuzzy1, ..., raw_fuzzyn} */
  3591. g_ptr_array_free(commands, TRUE);
  3592. }
  3593. }
  3594. return 1;
  3595. }
  3596. static gint
  3597. fuzzy_lua_hex_hashes_handler(lua_State *L)
  3598. {
  3599. struct rspamd_task *task = lua_check_task(L, 1);
  3600. if (task == NULL) {
  3601. return luaL_error(L, "invalid arguments");
  3602. }
  3603. guint flag = 0, weight = 1, send_flags = 0;
  3604. const gchar *symbol;
  3605. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  3606. struct fuzzy_rule *rule;
  3607. GPtrArray *commands;
  3608. gint i;
  3609. if (lua_type(L, 2) == LUA_TNUMBER) {
  3610. flag = lua_tonumber(L, 2);
  3611. }
  3612. else if (lua_type(L, 2) == LUA_TSTRING) {
  3613. struct fuzzy_rule *rule;
  3614. GHashTableIter it;
  3615. gpointer k, v;
  3616. struct fuzzy_mapping *map;
  3617. symbol = lua_tostring(L, 2);
  3618. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3619. {
  3620. if (flag != 0) {
  3621. break;
  3622. }
  3623. g_hash_table_iter_init(&it, rule->mappings);
  3624. while (g_hash_table_iter_next(&it, &k, &v)) {
  3625. map = v;
  3626. if (g_ascii_strcasecmp(symbol, map->symbol) == 0) {
  3627. flag = map->fuzzy_flag;
  3628. break;
  3629. }
  3630. }
  3631. }
  3632. }
  3633. if (flag == 0) {
  3634. return luaL_error(L, "bad flag");
  3635. }
  3636. lua_createtable(L, 0, fuzzy_module_ctx->fuzzy_rules->len);
  3637. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3638. {
  3639. /* Check for flag */
  3640. if (g_hash_table_lookup(rule->mappings,
  3641. GINT_TO_POINTER(flag)) == NULL) {
  3642. msg_debug_task("skip rule %s as it has no flag %d defined"
  3643. " false",
  3644. rule->name, flag);
  3645. continue;
  3646. }
  3647. commands = fuzzy_generate_commands(task, rule, FUZZY_CHECK, flag,
  3648. weight, send_flags);
  3649. lua_pushstring(L, rule->name);
  3650. if (commands != NULL) {
  3651. lua_createtable(L, commands->len, 0);
  3652. /*
  3653. * We have all commands cached, so we can just read their cached value to
  3654. * get hex hashes
  3655. */
  3656. struct rspamd_mime_part *mp;
  3657. gint j, part_idx = 1;
  3658. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), j, mp)
  3659. {
  3660. struct rspamd_cached_shingles *cached;
  3661. cached = fuzzy_cmd_get_cached(rule, task, mp);
  3662. if (cached) {
  3663. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  3664. gint r = rspamd_encode_hex_buf(cached->digest, sizeof(cached->digest), hexbuf,
  3665. sizeof(hexbuf));
  3666. lua_pushlstring(L, hexbuf, r);
  3667. lua_rawseti(L, -2, part_idx++);
  3668. }
  3669. }
  3670. g_ptr_array_free(commands, TRUE);
  3671. }
  3672. else {
  3673. lua_pushnil(L);
  3674. }
  3675. /* res[rule->name] = {hex_hash1, ..., hex_hashn} */
  3676. lua_settable(L, -3);
  3677. }
  3678. return 1;
  3679. }
  3680. static gboolean
  3681. fuzzy_add_handler(struct rspamd_http_connection_entry *conn_ent,
  3682. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3683. {
  3684. return fuzzy_controller_handler(conn_ent, msg,
  3685. ctx, FUZZY_WRITE, FALSE);
  3686. }
  3687. static gboolean
  3688. fuzzy_delete_handler(struct rspamd_http_connection_entry *conn_ent,
  3689. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3690. {
  3691. return fuzzy_controller_handler(conn_ent, msg,
  3692. ctx, FUZZY_DEL, FALSE);
  3693. }
  3694. static gboolean
  3695. fuzzy_deletehash_handler(struct rspamd_http_connection_entry *conn_ent,
  3696. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3697. {
  3698. return fuzzy_controller_handler(conn_ent, msg,
  3699. ctx, FUZZY_DEL, TRUE);
  3700. }
  3701. static int
  3702. fuzzy_attach_controller(struct module_ctx *ctx, GHashTable *commands)
  3703. {
  3704. struct fuzzy_ctx *fctx = (struct fuzzy_ctx *) ctx;
  3705. struct rspamd_custom_controller_command *cmd;
  3706. cmd = rspamd_mempool_alloc(fctx->fuzzy_pool, sizeof(*cmd));
  3707. cmd->privileged = TRUE;
  3708. cmd->require_message = TRUE;
  3709. cmd->handler = fuzzy_add_handler;
  3710. cmd->ctx = ctx;
  3711. g_hash_table_insert(commands, "/fuzzyadd", cmd);
  3712. cmd = rspamd_mempool_alloc(fctx->fuzzy_pool, sizeof(*cmd));
  3713. cmd->privileged = TRUE;
  3714. cmd->require_message = TRUE;
  3715. cmd->handler = fuzzy_delete_handler;
  3716. cmd->ctx = ctx;
  3717. g_hash_table_insert(commands, "/fuzzydel", cmd);
  3718. cmd = rspamd_mempool_alloc(fctx->fuzzy_pool, sizeof(*cmd));
  3719. cmd->privileged = TRUE;
  3720. cmd->require_message = FALSE;
  3721. cmd->handler = fuzzy_deletehash_handler;
  3722. cmd->ctx = ctx;
  3723. g_hash_table_insert(commands, "/fuzzydelhash", cmd);
  3724. return 0;
  3725. }
  3726. /* Lua handlers */
  3727. /* TODO: move to a separate unit, as this file is now a bit too hard to read */
  3728. static void
  3729. lua_upstream_str_inserter(struct upstream *up, guint idx, void *ud)
  3730. {
  3731. lua_State *L = (lua_State *) ud;
  3732. lua_pushstring(L, rspamd_upstream_name(up));
  3733. lua_rawseti(L, -2, idx + 1);
  3734. }
  3735. static gint
  3736. fuzzy_lua_list_storages(lua_State *L)
  3737. {
  3738. struct rspamd_config *cfg = lua_check_config(L, 1);
  3739. if (cfg == NULL) {
  3740. return luaL_error(L, "invalid arguments");
  3741. }
  3742. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(cfg);
  3743. struct fuzzy_rule *rule;
  3744. guint i;
  3745. lua_createtable(L, 0, fuzzy_module_ctx->fuzzy_rules->len);
  3746. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3747. {
  3748. lua_newtable(L);
  3749. lua_pushboolean(L, rule->read_only);
  3750. lua_setfield(L, -2, "read_only");
  3751. /* Push servers */
  3752. lua_createtable(L, rspamd_upstreams_count(rule->servers), 0);
  3753. rspamd_upstreams_foreach(rule->servers, lua_upstream_str_inserter, L);
  3754. lua_setfield(L, -2, "servers");
  3755. /* Push flags */
  3756. GHashTableIter it;
  3757. lua_createtable(L, 0, g_hash_table_size(rule->mappings));
  3758. gpointer k, v;
  3759. struct fuzzy_mapping *map;
  3760. g_hash_table_iter_init(&it, rule->mappings);
  3761. while (g_hash_table_iter_next(&it, &k, &v)) {
  3762. map = v;
  3763. lua_pushinteger(L, map->fuzzy_flag);
  3764. lua_setfield(L, -2, map->symbol);
  3765. }
  3766. lua_setfield(L, -2, "flags");
  3767. /* Final table */
  3768. lua_setfield(L, -2, rule->name);
  3769. }
  3770. return 1;
  3771. }
  3772. struct fuzzy_lua_session {
  3773. struct rspamd_task *task;
  3774. lua_State *L;
  3775. rspamd_inet_addr_t *addr;
  3776. GPtrArray *commands;
  3777. struct fuzzy_rule *rule;
  3778. struct rspamd_io_ev ev;
  3779. gint cbref;
  3780. gint fd;
  3781. };
  3782. static void
  3783. fuzzy_lua_session_fin(void *ud)
  3784. {
  3785. struct fuzzy_lua_session *session = ud;
  3786. if (session->commands) {
  3787. g_ptr_array_free(session->commands, TRUE);
  3788. }
  3789. rspamd_ev_watcher_stop(session->task->event_loop, &session->ev);
  3790. luaL_unref(session->L, LUA_REGISTRYINDEX, session->cbref);
  3791. }
  3792. static gboolean
  3793. fuzzy_lua_session_is_completed(struct fuzzy_lua_session *session)
  3794. {
  3795. struct fuzzy_cmd_io *io;
  3796. guint nreplied = 0, i;
  3797. for (i = 0; i < session->commands->len; i++) {
  3798. io = g_ptr_array_index(session->commands, i);
  3799. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  3800. nreplied++;
  3801. }
  3802. }
  3803. if (nreplied == session->commands->len) {
  3804. rspamd_session_remove_event(session->task->s, fuzzy_lua_session_fin, session);
  3805. return TRUE;
  3806. }
  3807. return FALSE;
  3808. }
  3809. static void
  3810. fuzzy_lua_push_result(struct fuzzy_lua_session *session, gdouble latency)
  3811. {
  3812. lua_rawgeti(session->L, LUA_REGISTRYINDEX, session->cbref);
  3813. lua_pushboolean(session->L, TRUE);
  3814. rspamd_lua_ip_push(session->L, session->addr);
  3815. lua_pushnumber(session->L, latency);
  3816. /* TODO: check results maybe? */
  3817. lua_pcall(session->L, 3, 0, 0);
  3818. }
  3819. #ifdef __GNUC__
  3820. static void
  3821. fuzzy_lua_push_error(struct fuzzy_lua_session *session, const gchar *err_fmt, ...) __attribute__((format(printf, 2, 3)));
  3822. #endif
  3823. static void
  3824. fuzzy_lua_push_error(struct fuzzy_lua_session *session, const gchar *err_fmt, ...)
  3825. {
  3826. va_list v;
  3827. va_start(v, err_fmt);
  3828. lua_rawgeti(session->L, LUA_REGISTRYINDEX, session->cbref);
  3829. lua_pushboolean(session->L, FALSE);
  3830. rspamd_lua_ip_push(session->L, session->addr);
  3831. lua_pushvfstring(session->L, err_fmt, v);
  3832. va_end(v);
  3833. /* TODO: check results maybe? */
  3834. lua_pcall(session->L, 3, 0, 0);
  3835. }
  3836. static gint
  3837. fuzzy_lua_try_read(struct fuzzy_lua_session *session)
  3838. {
  3839. const struct rspamd_fuzzy_reply *rep;
  3840. struct rspamd_fuzzy_cmd *cmd = NULL;
  3841. struct fuzzy_cmd_io *io = NULL;
  3842. gint r, ret;
  3843. guchar buf[2048], *p;
  3844. if ((r = read(session->fd, buf, sizeof(buf) - 1)) == -1) {
  3845. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  3846. return 0;
  3847. }
  3848. else {
  3849. fuzzy_lua_push_error(session, "cannot read from socket: %s", strerror(errno));
  3850. return -1;
  3851. }
  3852. }
  3853. else {
  3854. p = buf;
  3855. ret = 0;
  3856. while ((rep = fuzzy_process_reply(&p, &r,
  3857. session->commands, session->rule, &cmd, &io)) != NULL) {
  3858. if (rep->v1.prob > 0.5) {
  3859. if (cmd->cmd == FUZZY_PING) {
  3860. fuzzy_lua_push_result(session, fuzzy_milliseconds_since_midnight() - rep->v1.value);
  3861. }
  3862. else {
  3863. fuzzy_lua_push_error(session, "unsupported");
  3864. }
  3865. }
  3866. else {
  3867. fuzzy_lua_push_error(session, "invalid reply from server: %d", rep->v1.value);
  3868. }
  3869. ret = 1;
  3870. }
  3871. }
  3872. return ret;
  3873. }
  3874. /* Fuzzy check callback */
  3875. static void
  3876. fuzzy_lua_io_callback(gint fd, short what, void *arg)
  3877. {
  3878. struct fuzzy_lua_session *session = arg;
  3879. gint r;
  3880. enum {
  3881. return_error = 0,
  3882. return_want_more,
  3883. return_finished
  3884. } ret = return_error;
  3885. if (what & EV_READ) {
  3886. /* Try to read reply */
  3887. r = fuzzy_lua_try_read(session);
  3888. switch (r) {
  3889. case 0:
  3890. if (what & EV_READ) {
  3891. ret = return_want_more;
  3892. }
  3893. else {
  3894. if (what & EV_WRITE) {
  3895. /* Retransmit attempt */
  3896. if (!fuzzy_cmd_vector_to_wire(fd, session->commands)) {
  3897. fuzzy_lua_push_error(session, "cannot write to socket");
  3898. ret = return_error;
  3899. }
  3900. else {
  3901. ret = return_want_more;
  3902. }
  3903. }
  3904. }
  3905. break;
  3906. case 1:
  3907. ret = return_finished;
  3908. break;
  3909. default:
  3910. ret = return_error;
  3911. break;
  3912. }
  3913. }
  3914. else if (what & EV_WRITE) {
  3915. if (!fuzzy_cmd_vector_to_wire(fd, session->commands)) {
  3916. fuzzy_lua_push_error(session, "cannot write to socket");
  3917. ret = return_error;
  3918. }
  3919. else {
  3920. ret = return_want_more;
  3921. }
  3922. }
  3923. else {
  3924. /* Timeout */
  3925. fuzzy_lua_push_error(session, "timeout waiting for the reply");
  3926. ret = return_error;
  3927. }
  3928. if (ret == return_want_more) {
  3929. /* Processed write, switch to reading */
  3930. rspamd_ev_watcher_reschedule(session->task->event_loop,
  3931. &session->ev, EV_READ);
  3932. }
  3933. else if (ret == return_error) {
  3934. rspamd_session_remove_event(session->task->s, fuzzy_lua_session_fin, session);
  3935. }
  3936. else {
  3937. /* Read something from network */
  3938. if (!fuzzy_lua_session_is_completed(session)) {
  3939. /* Need to read more */
  3940. rspamd_ev_watcher_reschedule(session->task->event_loop,
  3941. &session->ev, EV_READ);
  3942. }
  3943. }
  3944. }
  3945. /***
  3946. * @function fuzzy_check.ping_storage(task, callback, rule, timeout[, server_override])
  3947. * @return
  3948. */
  3949. static gint
  3950. fuzzy_lua_ping_storage(lua_State *L)
  3951. {
  3952. struct rspamd_task *task = lua_check_task(L, 1);
  3953. if (task == NULL) {
  3954. return luaL_error(L, "invalid arguments: task");
  3955. }
  3956. /* Other arguments sanity */
  3957. if (lua_type(L, 2) != LUA_TFUNCTION || lua_type(L, 3) != LUA_TSTRING || lua_type(L, 4) != LUA_TNUMBER) {
  3958. return luaL_error(L, "invalid arguments: callback/rule/timeout argument");
  3959. }
  3960. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context(task->cfg);
  3961. struct fuzzy_rule *rule, *rule_found = NULL;
  3962. int i;
  3963. const char *rule_name = lua_tostring(L, 3);
  3964. PTR_ARRAY_FOREACH(fuzzy_module_ctx->fuzzy_rules, i, rule)
  3965. {
  3966. if (strcmp(rule->name, rule_name) == 0) {
  3967. rule_found = rule;
  3968. break;
  3969. }
  3970. }
  3971. if (rule_found == NULL) {
  3972. return luaL_error(L, "invalid arguments: no such rule defined");
  3973. }
  3974. rspamd_inet_addr_t *addr = NULL;
  3975. if (lua_type(L, 5) == LUA_TSTRING) {
  3976. const gchar *server_name = lua_tostring(L, 5);
  3977. enum rspamd_parse_host_port_result res;
  3978. GPtrArray *addrs = g_ptr_array_new();
  3979. /* We resolve address synchronously here! Why? Because it is an override... */
  3980. res = rspamd_parse_host_port_priority(server_name, &addrs, 0, NULL,
  3981. 11335, FALSE, task->task_pool);
  3982. if (res == RSPAMD_PARSE_ADDR_FAIL) {
  3983. lua_pushboolean(L, FALSE);
  3984. lua_pushfstring(L, "invalid arguments: cannot resolve %s", server_name);
  3985. return 2;
  3986. }
  3987. /* Get random address */
  3988. addr = rspamd_inet_address_copy(g_ptr_array_index(addrs, rspamd_random_uint64_fast() % addrs->len),
  3989. task->task_pool);
  3990. rspamd_mempool_add_destructor(task->task_pool,
  3991. rspamd_ptr_array_free_hard, addrs);
  3992. }
  3993. else {
  3994. struct upstream *selected = rspamd_upstream_get(rule_found->servers,
  3995. RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0);
  3996. addr = rspamd_upstream_addr_next(selected);
  3997. }
  3998. if (addr != NULL) {
  3999. int sock;
  4000. GPtrArray *commands = fuzzy_generate_commands(task, rule, FUZZY_PING, 0, 0, 0);
  4001. if ((sock = rspamd_inet_address_connect(addr, SOCK_DGRAM, TRUE)) == -1) {
  4002. lua_pushboolean(L, FALSE);
  4003. lua_pushfstring(L, "cannot connect to %s, %s",
  4004. rspamd_inet_address_to_string_pretty(addr),
  4005. strerror(errno));
  4006. return 2;
  4007. }
  4008. else {
  4009. /* Create a dedicated ping session for a socket */
  4010. struct fuzzy_lua_session *session =
  4011. rspamd_mempool_alloc0(task->task_pool,
  4012. sizeof(struct fuzzy_lua_session));
  4013. session->task = task;
  4014. session->fd = sock;
  4015. session->addr = addr;
  4016. session->commands = commands;
  4017. session->L = L;
  4018. session->rule = rule_found;
  4019. /* Store callback */
  4020. lua_pushvalue(L, 2);
  4021. session->cbref = luaL_ref(L, LUA_REGISTRYINDEX);
  4022. rspamd_session_add_event(task->s, fuzzy_lua_session_fin, session, M);
  4023. rspamd_ev_watcher_init(&session->ev,
  4024. sock,
  4025. EV_WRITE,
  4026. fuzzy_lua_io_callback,
  4027. session);
  4028. rspamd_ev_watcher_start(session->task->event_loop, &session->ev,
  4029. lua_tonumber(L, 4));
  4030. }
  4031. }
  4032. lua_pushboolean(L, TRUE);
  4033. return 1;
  4034. }