You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_check.c 99KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:fuzzy
  17. * rspamd module that checks fuzzy checksums for messages
  18. *
  19. * Allowed options:
  20. * - symbol (string): symbol to insert (default: 'R_FUZZY')
  21. * - max_score (double): maximum score to that weights of hashes would be normalized (default: 0 - no normalization)
  22. *
  23. * - fuzzy_map (string): a string that contains map in format { fuzzy_key => [ symbol, weight ] } where fuzzy_key is number of
  24. * fuzzy list. This string itself should be in format 1:R_FUZZY_SAMPLE1:10,2:R_FUZZY_SAMPLE2:1 etc, where first number is fuzzy
  25. * key, second is symbol to insert and third - weight for normalization
  26. *
  27. * - min_length (integer): minimum length (in characters) for text part to be checked for fuzzy hash (default: 0 - no limit)
  28. * - whitelist (map string): map of ip addresses that should not be checked with this module
  29. * - servers (string): list of fuzzy servers in format "server1:port,server2:port" - these servers would be used for checking and storing
  30. * fuzzy hashes
  31. */
  32. #include "config.h"
  33. #include "libmime/message.h"
  34. #include "libserver/maps/map.h"
  35. #include "libserver/maps/map_helpers.h"
  36. #include "libmime/images.h"
  37. #include "libserver/worker_util.h"
  38. #include "libserver/mempool_vars_internal.h"
  39. #include "fuzzy_wire.h"
  40. #include "utlist.h"
  41. #include "ottery.h"
  42. #include "lua/lua_common.h"
  43. #include "unix-std.h"
  44. #include "libserver/http/http_private.h"
  45. #include "libserver/http/http_router.h"
  46. #include "libstat/stat_api.h"
  47. #include <math.h>
  48. #include "libutil/libev_helper.h"
  49. #define DEFAULT_SYMBOL "R_FUZZY_HASH"
  50. #define DEFAULT_IO_TIMEOUT 500
  51. #define DEFAULT_RETRANSMITS 3
  52. #define DEFAULT_MAX_ERRORS 4
  53. #define DEFAULT_REVIVE_TIME 60
  54. #define DEFAULT_PORT 11335
  55. #define RSPAMD_FUZZY_PLUGIN_VERSION RSPAMD_FUZZY_VERSION
  56. static const gint rspamd_fuzzy_hash_len = 5;
  57. static const gchar *M = "fuzzy check";
  58. struct fuzzy_ctx;
  59. struct fuzzy_mapping {
  60. guint64 fuzzy_flag;
  61. const gchar *symbol;
  62. double weight;
  63. };
  64. struct fuzzy_rule {
  65. struct upstream_list *servers;
  66. const gchar *symbol;
  67. const gchar *algorithm_str;
  68. const gchar *name;
  69. const ucl_object_t *ucl_obj;
  70. enum rspamd_shingle_alg alg;
  71. GHashTable *mappings;
  72. GPtrArray *fuzzy_headers;
  73. GString *hash_key;
  74. GString *shingles_key;
  75. struct rspamd_cryptobox_keypair *local_key;
  76. struct rspamd_cryptobox_pubkey *peer_key;
  77. double max_score;
  78. double weight_threshold;
  79. gboolean read_only;
  80. gboolean skip_unknown;
  81. gboolean no_share;
  82. gboolean no_subject;
  83. gint learn_condition_cb;
  84. struct rspamd_hash_map_helper *skip_map;
  85. struct fuzzy_ctx *ctx;
  86. gint lua_id;
  87. };
  88. struct fuzzy_ctx {
  89. struct module_ctx ctx;
  90. rspamd_mempool_t *fuzzy_pool;
  91. GPtrArray *fuzzy_rules;
  92. struct rspamd_config *cfg;
  93. const gchar *default_symbol;
  94. struct rspamd_radix_map_helper *whitelist;
  95. struct rspamd_keypair_cache *keypairs_cache;
  96. guint32 io_timeout;
  97. guint32 retransmits;
  98. guint max_errors;
  99. gdouble revive_time;
  100. gint check_mime_part_ref; /* Lua callback */
  101. gint process_rule_ref; /* Lua callback */
  102. gint cleanup_rules_ref;
  103. gboolean enabled;
  104. };
  105. enum fuzzy_result_type {
  106. FUZZY_RESULT_TXT,
  107. FUZZY_RESULT_IMG,
  108. FUZZY_RESULT_CONTENT,
  109. FUZZY_RESULT_BIN
  110. };
  111. struct fuzzy_client_result {
  112. const gchar *symbol;
  113. gchar *option;
  114. gdouble score;
  115. gdouble prob;
  116. enum fuzzy_result_type type;
  117. };
  118. struct fuzzy_client_session {
  119. GPtrArray *commands;
  120. GPtrArray *results;
  121. struct rspamd_task *task;
  122. struct rspamd_symcache_item *item;
  123. struct upstream *server;
  124. struct fuzzy_rule *rule;
  125. struct ev_loop *event_loop;
  126. struct rspamd_io_ev ev;
  127. gint state;
  128. gint fd;
  129. guint retransmits;
  130. };
  131. struct fuzzy_learn_session {
  132. GPtrArray *commands;
  133. gint *saved;
  134. struct {
  135. const gchar *error_message;
  136. gint error_code;
  137. } err;
  138. struct rspamd_http_connection_entry *http_entry;
  139. struct rspamd_async_session *session;
  140. struct upstream *server;
  141. struct fuzzy_rule *rule;
  142. struct rspamd_task *task;
  143. struct ev_loop *event_loop;
  144. struct rspamd_io_ev ev;
  145. gint fd;
  146. guint retransmits;
  147. };
  148. #define FUZZY_CMD_FLAG_REPLIED (1 << 0)
  149. #define FUZZY_CMD_FLAG_SENT (1 << 1)
  150. #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
  151. #define FUZZY_CMD_FLAG_CONTENT (1 << 3)
  152. #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
  153. #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
  154. #define FUZZY_CHECK_FLAG_NOTEXT (1 << 2)
  155. struct fuzzy_cmd_io {
  156. guint32 tag;
  157. guint32 flags;
  158. struct iovec io;
  159. struct rspamd_mime_part *part;
  160. struct rspamd_fuzzy_cmd cmd;
  161. };
  162. static const char *default_headers = "Subject,Content-Type,Reply-To,X-Mailer";
  163. static void fuzzy_symbol_callback (struct rspamd_task *task,
  164. struct rspamd_symcache_item *item,
  165. void *unused);
  166. /* Initialization */
  167. gint fuzzy_check_module_init (struct rspamd_config *cfg,
  168. struct module_ctx **ctx);
  169. gint fuzzy_check_module_config (struct rspamd_config *cfg, bool valdate);
  170. gint fuzzy_check_module_reconfig (struct rspamd_config *cfg);
  171. static gint fuzzy_attach_controller (struct module_ctx *ctx,
  172. GHashTable *commands);
  173. static gint fuzzy_lua_learn_handler (lua_State *L);
  174. static gint fuzzy_lua_unlearn_handler (lua_State *L);
  175. static gint fuzzy_lua_gen_hashes_handler (lua_State *L);
  176. module_t fuzzy_check_module = {
  177. "fuzzy_check",
  178. fuzzy_check_module_init,
  179. fuzzy_check_module_config,
  180. fuzzy_check_module_reconfig,
  181. fuzzy_attach_controller,
  182. RSPAMD_MODULE_VER,
  183. (guint)-1,
  184. };
  185. static inline struct fuzzy_ctx *
  186. fuzzy_get_context (struct rspamd_config *cfg)
  187. {
  188. return (struct fuzzy_ctx *)g_ptr_array_index (cfg->c_modules,
  189. fuzzy_check_module.ctx_offset);
  190. }
  191. static void
  192. parse_flags (struct fuzzy_rule *rule,
  193. struct rspamd_config *cfg,
  194. const ucl_object_t *val,
  195. gint cb_id)
  196. {
  197. const ucl_object_t *elt;
  198. struct fuzzy_mapping *map;
  199. const gchar *sym = NULL;
  200. if (val->type == UCL_STRING) {
  201. msg_err_config (
  202. "string mappings are deprecated and no longer supported, use new style configuration");
  203. }
  204. else if (val->type == UCL_OBJECT) {
  205. elt = ucl_object_lookup (val, "symbol");
  206. if (elt == NULL || !ucl_object_tostring_safe (elt, &sym)) {
  207. sym = ucl_object_key (val);
  208. }
  209. if (sym != NULL) {
  210. map =
  211. rspamd_mempool_alloc (cfg->cfg_pool,
  212. sizeof (struct fuzzy_mapping));
  213. map->symbol = sym;
  214. elt = ucl_object_lookup (val, "flag");
  215. if (elt != NULL) {
  216. map->fuzzy_flag = ucl_obj_toint (elt);
  217. elt = ucl_object_lookup (val, "max_score");
  218. if (elt != NULL) {
  219. map->weight = ucl_obj_todouble (elt);
  220. }
  221. else {
  222. map->weight = rule->max_score;
  223. }
  224. /* Add flag to hash table */
  225. g_hash_table_insert (rule->mappings,
  226. GINT_TO_POINTER (map->fuzzy_flag), map);
  227. rspamd_symcache_add_symbol (cfg->cache,
  228. map->symbol, 0,
  229. NULL, NULL,
  230. SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE,
  231. cb_id);
  232. }
  233. else {
  234. msg_err_config ("fuzzy_map parameter has no flag definition");
  235. }
  236. }
  237. else {
  238. msg_err_config ("fuzzy_map parameter has no symbol definition");
  239. }
  240. }
  241. else {
  242. msg_err_config ("fuzzy_map parameter is of an unsupported type");
  243. }
  244. }
  245. static GPtrArray *
  246. parse_fuzzy_headers (struct rspamd_config *cfg, const gchar *str)
  247. {
  248. gchar **strvec;
  249. gint num, i;
  250. GPtrArray *res;
  251. strvec = g_strsplit_set (str, ",", 0);
  252. num = g_strv_length (strvec);
  253. res = g_ptr_array_sized_new (num);
  254. for (i = 0; i < num; i++) {
  255. g_strstrip (strvec[i]);
  256. g_ptr_array_add (res, rspamd_mempool_strdup (
  257. cfg->cfg_pool, strvec[i]));
  258. }
  259. g_strfreev (strvec);
  260. return res;
  261. }
  262. static double
  263. fuzzy_normalize (gint32 in, double weight)
  264. {
  265. if (weight == 0) {
  266. return 0;
  267. }
  268. #ifdef HAVE_TANH
  269. return tanh (G_E * (double)in / weight);
  270. #else
  271. return (in < weight ? in / weight : weight);
  272. #endif
  273. }
  274. static struct fuzzy_rule *
  275. fuzzy_rule_new (const char *default_symbol, rspamd_mempool_t *pool)
  276. {
  277. struct fuzzy_rule *rule;
  278. rule = rspamd_mempool_alloc0 (pool, sizeof (struct fuzzy_rule));
  279. rule->mappings = g_hash_table_new (g_direct_hash, g_direct_equal);
  280. rule->symbol = default_symbol;
  281. rspamd_mempool_add_destructor (pool,
  282. (rspamd_mempool_destruct_t)g_hash_table_unref,
  283. rule->mappings);
  284. rule->read_only = FALSE;
  285. rule->weight_threshold = NAN;
  286. return rule;
  287. }
  288. static void
  289. fuzzy_free_rule (gpointer r)
  290. {
  291. struct fuzzy_rule *rule = (struct fuzzy_rule *)r;
  292. g_string_free (rule->hash_key, TRUE);
  293. g_string_free (rule->shingles_key, TRUE);
  294. if (rule->local_key) {
  295. rspamd_keypair_unref (rule->local_key);
  296. }
  297. if (rule->peer_key) {
  298. rspamd_pubkey_unref (rule->peer_key);
  299. }
  300. }
  301. static gint
  302. fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj,
  303. const gchar *name, gint cb_id)
  304. {
  305. const ucl_object_t *value, *cur;
  306. struct fuzzy_rule *rule;
  307. ucl_object_iter_t it = NULL;
  308. const char *k = NULL, *key_str = NULL, *shingles_key_str = NULL, *lua_script;
  309. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg);
  310. if (obj->type != UCL_OBJECT) {
  311. msg_err_config ("invalid rule definition");
  312. return -1;
  313. }
  314. if ((value = ucl_object_lookup_any (obj, "enabled", "enable", NULL)) != NULL) {
  315. if (!ucl_object_toboolean (value)) {
  316. msg_info_config ("fuzzy rule %s is disabled by configuration", name);
  317. return 0;
  318. }
  319. }
  320. rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol,
  321. cfg->cfg_pool);
  322. rule->ucl_obj = obj;
  323. rule->ctx = fuzzy_module_ctx;
  324. rule->learn_condition_cb = -1;
  325. rule->alg = RSPAMD_SHINGLES_OLD;
  326. rule->skip_map = NULL;
  327. if ((value = ucl_object_lookup (obj, "skip_hashes")) != NULL) {
  328. rspamd_map_add_from_ucl (cfg, value,
  329. "Fuzzy hashes whitelist",
  330. rspamd_kv_list_read,
  331. rspamd_kv_list_fin,
  332. rspamd_kv_list_dtor,
  333. (void **)&rule->skip_map,
  334. NULL, RSPAMD_MAP_DEFAULT);
  335. }
  336. if ((value = ucl_object_lookup (obj, "headers")) != NULL) {
  337. it = NULL;
  338. while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY))
  339. != NULL) {
  340. GPtrArray *tmp;
  341. guint i;
  342. gpointer ptr;
  343. tmp = parse_fuzzy_headers (cfg, ucl_obj_tostring (cur));
  344. if (tmp) {
  345. if (rule->fuzzy_headers) {
  346. PTR_ARRAY_FOREACH (tmp, i, ptr) {
  347. g_ptr_array_add (rule->fuzzy_headers, ptr);
  348. }
  349. g_ptr_array_free (tmp, TRUE);
  350. }
  351. else {
  352. rule->fuzzy_headers = tmp;
  353. }
  354. }
  355. }
  356. }
  357. else {
  358. rule->fuzzy_headers = parse_fuzzy_headers (cfg, default_headers);
  359. }
  360. if (rule->fuzzy_headers != NULL) {
  361. rspamd_mempool_add_destructor (cfg->cfg_pool,
  362. (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
  363. rule->fuzzy_headers);
  364. }
  365. if ((value = ucl_object_lookup (obj, "max_score")) != NULL) {
  366. rule->max_score = ucl_obj_todouble (value);
  367. }
  368. if ((value = ucl_object_lookup (obj, "symbol")) != NULL) {
  369. rule->symbol = ucl_obj_tostring (value);
  370. }
  371. if (name) {
  372. rule->name = name;
  373. }
  374. else {
  375. rule->name = rule->symbol;
  376. }
  377. if ((value = ucl_object_lookup (obj, "read_only")) != NULL) {
  378. rule->read_only = ucl_obj_toboolean (value);
  379. }
  380. if ((value = ucl_object_lookup (obj, "skip_unknown")) != NULL) {
  381. rule->skip_unknown = ucl_obj_toboolean (value);
  382. }
  383. if ((value = ucl_object_lookup (obj, "no_share")) != NULL) {
  384. rule->no_share = ucl_obj_toboolean (value);
  385. }
  386. if ((value = ucl_object_lookup (obj, "no_subject")) != NULL) {
  387. rule->no_subject = ucl_obj_toboolean (value);
  388. }
  389. if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) {
  390. rule->algorithm_str = ucl_object_tostring (value);
  391. if (rule->algorithm_str) {
  392. if (g_ascii_strcasecmp (rule->algorithm_str, "old") == 0 ||
  393. g_ascii_strcasecmp (rule->algorithm_str, "siphash") == 0) {
  394. rule->alg = RSPAMD_SHINGLES_OLD;
  395. }
  396. else if (g_ascii_strcasecmp (rule->algorithm_str, "xxhash") == 0) {
  397. rule->alg = RSPAMD_SHINGLES_XXHASH;
  398. }
  399. else if (g_ascii_strcasecmp (rule->algorithm_str, "mumhash") == 0) {
  400. rule->alg = RSPAMD_SHINGLES_MUMHASH;
  401. }
  402. else if (g_ascii_strcasecmp (rule->algorithm_str, "fasthash") == 0 ||
  403. g_ascii_strcasecmp (rule->algorithm_str, "fast") == 0) {
  404. rule->alg = RSPAMD_SHINGLES_FAST;
  405. }
  406. else {
  407. msg_warn_config ("unknown algorithm: %s, use siphash by default",
  408. rule->algorithm_str);
  409. }
  410. }
  411. }
  412. /* Set a consistent and short string name */
  413. switch (rule->alg) {
  414. case RSPAMD_SHINGLES_OLD:
  415. rule->algorithm_str = "sip";
  416. break;
  417. case RSPAMD_SHINGLES_XXHASH:
  418. rule->algorithm_str = "xx";
  419. break;
  420. case RSPAMD_SHINGLES_MUMHASH:
  421. rule->algorithm_str = "mum";
  422. break;
  423. case RSPAMD_SHINGLES_FAST:
  424. rule->algorithm_str = "fast";
  425. break;
  426. }
  427. if ((value = ucl_object_lookup (obj, "servers")) != NULL) {
  428. rule->servers = rspamd_upstreams_create (cfg->ups_ctx);
  429. /* pass max_error and revive_time configuration in upstream for fuzzy storage
  430. * it allows to configure error_rate threshold and upstream dead timer
  431. */
  432. rspamd_upstreams_set_limits (rule->servers,
  433. (gdouble) fuzzy_module_ctx->revive_time, NAN, NAN, NAN,
  434. (guint) fuzzy_module_ctx->max_errors, 0);
  435. rspamd_mempool_add_destructor (cfg->cfg_pool,
  436. (rspamd_mempool_destruct_t)rspamd_upstreams_destroy,
  437. rule->servers);
  438. if (!rspamd_upstreams_from_ucl (rule->servers, value, DEFAULT_PORT, NULL)) {
  439. msg_err_config ("cannot read servers definition");
  440. return -1;
  441. }
  442. }
  443. if ((value = ucl_object_lookup (obj, "fuzzy_map")) != NULL) {
  444. it = NULL;
  445. while ((cur = ucl_object_iterate (value, &it, true)) != NULL) {
  446. parse_flags (rule, cfg, cur, cb_id);
  447. }
  448. }
  449. if ((value = ucl_object_lookup (obj, "encryption_key")) != NULL) {
  450. /* Create key from user's input */
  451. k = ucl_object_tostring (value);
  452. if (k == NULL || (rule->peer_key =
  453. rspamd_pubkey_from_base32 (k, 0, RSPAMD_KEYPAIR_KEX,
  454. RSPAMD_CRYPTOBOX_MODE_25519)) == NULL) {
  455. msg_err_config ("bad encryption key value: %s",
  456. k);
  457. return -1;
  458. }
  459. rule->local_key = rspamd_keypair_new (RSPAMD_KEYPAIR_KEX,
  460. RSPAMD_CRYPTOBOX_MODE_25519);
  461. }
  462. if ((value = ucl_object_lookup (obj, "learn_condition")) != NULL) {
  463. lua_script = ucl_object_tostring (value);
  464. if (lua_script) {
  465. if (luaL_dostring (cfg->lua_state, lua_script) != 0) {
  466. msg_err_config ("cannot execute lua script for fuzzy "
  467. "learn condition: %s", lua_tostring (cfg->lua_state, -1));
  468. }
  469. else {
  470. if (lua_type (cfg->lua_state, -1) == LUA_TFUNCTION) {
  471. rule->learn_condition_cb = luaL_ref (cfg->lua_state,
  472. LUA_REGISTRYINDEX);
  473. msg_info_config ("loaded learn condition script for fuzzy rule:"
  474. " %s", rule->name);
  475. }
  476. else {
  477. msg_err_config ("lua script must return "
  478. "function(task) and not %s",
  479. lua_typename (cfg->lua_state,
  480. lua_type (cfg->lua_state, -1)));
  481. }
  482. }
  483. }
  484. }
  485. key_str = NULL;
  486. if ((value = ucl_object_lookup (obj, "fuzzy_key")) != NULL) {
  487. /* Create key from user's input */
  488. key_str = ucl_object_tostring (value);
  489. }
  490. /* Setup keys */
  491. if (key_str == NULL) {
  492. /* Use some default key for all ops */
  493. key_str = "rspamd";
  494. }
  495. rule->hash_key = g_string_sized_new (rspamd_cryptobox_HASHBYTES);
  496. rspamd_cryptobox_hash (rule->hash_key->str, key_str, strlen (key_str), NULL, 0);
  497. rule->hash_key->len = rspamd_cryptobox_HASHKEYBYTES;
  498. shingles_key_str = NULL;
  499. if ((value = ucl_object_lookup (obj, "fuzzy_shingles_key")) != NULL) {
  500. shingles_key_str = ucl_object_tostring (value);
  501. }
  502. if (shingles_key_str == NULL) {
  503. shingles_key_str = "rspamd";
  504. }
  505. rule->shingles_key = g_string_sized_new (rspamd_cryptobox_HASHBYTES);
  506. rspamd_cryptobox_hash (rule->shingles_key->str, shingles_key_str,
  507. strlen (shingles_key_str), NULL, 0);
  508. rule->shingles_key->len = 16;
  509. if (rspamd_upstreams_count (rule->servers) == 0) {
  510. msg_err_config ("no servers defined for fuzzy rule with name: %s",
  511. rule->name);
  512. return -1;
  513. }
  514. else {
  515. g_ptr_array_add (fuzzy_module_ctx->fuzzy_rules, rule);
  516. if (rule->symbol != fuzzy_module_ctx->default_symbol) {
  517. rspamd_symcache_add_symbol (cfg->cache, rule->symbol,
  518. 0,
  519. NULL, NULL,
  520. SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE,
  521. cb_id);
  522. }
  523. msg_info_config ("added fuzzy rule %s, key: %*xs, "
  524. "shingles_key: %*xs, algorithm: %s",
  525. rule->symbol,
  526. 6, rule->hash_key->str,
  527. 6, rule->shingles_key->str,
  528. rule->algorithm_str);
  529. }
  530. if ((value = ucl_object_lookup (obj, "weight_threshold")) != NULL) {
  531. rule->weight_threshold = ucl_object_todouble (value);
  532. }
  533. /*
  534. * Process rule in Lua
  535. */
  536. gint err_idx, ret;
  537. lua_State *L = (lua_State *)cfg->lua_state;
  538. lua_pushcfunction (L, &rspamd_lua_traceback);
  539. err_idx = lua_gettop (L);
  540. lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->process_rule_ref);
  541. ucl_object_push_lua (L, obj, true);
  542. if ((ret = lua_pcall (L, 1, 1, err_idx)) != 0) {
  543. msg_err_config ("call to process_rule lua "
  544. "script failed (%d): %s", ret, lua_tostring (L, -1));
  545. rule->lua_id = -1;
  546. }
  547. else {
  548. rule->lua_id = lua_tonumber (L, -1);
  549. }
  550. lua_settop (L, err_idx - 1);
  551. rspamd_mempool_add_destructor (cfg->cfg_pool, fuzzy_free_rule,
  552. rule);
  553. return 0;
  554. }
  555. gint
  556. fuzzy_check_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  557. {
  558. struct fuzzy_ctx *fuzzy_module_ctx;
  559. fuzzy_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  560. sizeof (struct fuzzy_ctx));
  561. fuzzy_module_ctx->fuzzy_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  562. NULL, 0);
  563. /* TODO: this should match rules count actually */
  564. fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new (32);
  565. fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new ();
  566. fuzzy_module_ctx->cfg = cfg;
  567. fuzzy_module_ctx->process_rule_ref = -1;
  568. fuzzy_module_ctx->check_mime_part_ref = -1;
  569. fuzzy_module_ctx->cleanup_rules_ref = -1;
  570. rspamd_mempool_add_destructor (cfg->cfg_pool,
  571. (rspamd_mempool_destruct_t)rspamd_mempool_delete,
  572. fuzzy_module_ctx->fuzzy_pool);
  573. rspamd_mempool_add_destructor (cfg->cfg_pool,
  574. (rspamd_mempool_destruct_t)rspamd_keypair_cache_destroy,
  575. fuzzy_module_ctx->keypairs_cache);
  576. rspamd_mempool_add_destructor (cfg->cfg_pool,
  577. (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard,
  578. fuzzy_module_ctx->fuzzy_rules);
  579. *ctx = (struct module_ctx *)fuzzy_module_ctx;
  580. rspamd_rcl_add_doc_by_path (cfg,
  581. NULL,
  582. "Fuzzy check plugin",
  583. "fuzzy_check",
  584. UCL_OBJECT,
  585. NULL,
  586. 0,
  587. NULL,
  588. 0);
  589. rspamd_rcl_add_doc_by_path (cfg,
  590. "fuzzy_check",
  591. "Default symbol",
  592. "symbol",
  593. UCL_STRING,
  594. NULL,
  595. 0,
  596. NULL,
  597. 0);
  598. rspamd_rcl_add_doc_by_path (cfg,
  599. "fuzzy_check",
  600. "Minimum number of *words* to check a text part",
  601. "min_length",
  602. UCL_INT,
  603. NULL,
  604. 0,
  605. NULL,
  606. 0);
  607. rspamd_rcl_add_doc_by_path (cfg,
  608. "fuzzy_check",
  609. "Minimum number of *bytes* to check a non-text part",
  610. "min_bytes",
  611. UCL_INT,
  612. NULL,
  613. 0,
  614. NULL,
  615. 0);
  616. rspamd_rcl_add_doc_by_path (cfg,
  617. "fuzzy_check",
  618. "Multiplier for bytes limit when checking for text parts",
  619. "text_multiplier",
  620. UCL_FLOAT,
  621. NULL,
  622. 0,
  623. NULL,
  624. 0);
  625. rspamd_rcl_add_doc_by_path (cfg,
  626. "fuzzy_check",
  627. "Minimum height in pixels for embedded images to check using fuzzy storage",
  628. "min_height",
  629. UCL_INT,
  630. NULL,
  631. 0,
  632. NULL,
  633. 0);
  634. rspamd_rcl_add_doc_by_path (cfg,
  635. "fuzzy_check",
  636. "Minimum width in pixels for embedded images to check using fuzzy storage",
  637. "min_width",
  638. UCL_INT,
  639. NULL,
  640. 0,
  641. NULL,
  642. 0);
  643. rspamd_rcl_add_doc_by_path (cfg,
  644. "fuzzy_check",
  645. "Timeout for waiting reply from a fuzzy server",
  646. "timeout",
  647. UCL_TIME,
  648. NULL,
  649. 0,
  650. NULL,
  651. 0);
  652. rspamd_rcl_add_doc_by_path (cfg,
  653. "fuzzy_check",
  654. "Maximum number of retransmits for a single request",
  655. "retransmits",
  656. UCL_INT,
  657. NULL,
  658. 0,
  659. NULL,
  660. 0);
  661. rspamd_rcl_add_doc_by_path (cfg,
  662. "fuzzy_check",
  663. "Maximum number of upstream errors, affects error rate threshold",
  664. "max_errors",
  665. UCL_INT,
  666. NULL,
  667. 0,
  668. NULL,
  669. 0);
  670. rspamd_rcl_add_doc_by_path (cfg,
  671. "fuzzy_check",
  672. "Time to lapse before re-resolve faulty upstream",
  673. "revive_time",
  674. UCL_FLOAT,
  675. NULL,
  676. 0,
  677. NULL,
  678. 0);
  679. rspamd_rcl_add_doc_by_path (cfg,
  680. "fuzzy_check",
  681. "Whitelisted IPs map",
  682. "whitelist",
  683. UCL_STRING,
  684. NULL,
  685. 0,
  686. NULL,
  687. 0);
  688. /* Rules doc strings */
  689. rspamd_rcl_add_doc_by_path (cfg,
  690. "fuzzy_check",
  691. "Fuzzy check rule",
  692. "rule",
  693. UCL_OBJECT,
  694. NULL,
  695. 0,
  696. NULL,
  697. 0);
  698. rspamd_rcl_add_doc_by_path (cfg,
  699. "fuzzy_check.rule",
  700. "Headers that are used to make a separate hash",
  701. "headers",
  702. UCL_ARRAY,
  703. NULL,
  704. 0,
  705. NULL,
  706. 0);
  707. rspamd_rcl_add_doc_by_path (cfg,
  708. "fuzzy_check.rule",
  709. "Whitelisted hashes map",
  710. "skip_hashes",
  711. UCL_STRING,
  712. NULL,
  713. 0,
  714. NULL,
  715. 0);
  716. rspamd_rcl_add_doc_by_path (cfg,
  717. "fuzzy_check.rule",
  718. "Set of mime types (in form type/subtype, or type/*, or *) to check with fuzzy",
  719. "mime_types",
  720. UCL_ARRAY,
  721. NULL,
  722. 0,
  723. NULL,
  724. 0);
  725. rspamd_rcl_add_doc_by_path (cfg,
  726. "fuzzy_check.rule",
  727. "Maximum value for fuzzy hash when weight of symbol is exactly 1.0 (if value is higher then score is still 1.0)",
  728. "max_score",
  729. UCL_INT,
  730. NULL,
  731. 0,
  732. NULL,
  733. 0);
  734. rspamd_rcl_add_doc_by_path (cfg,
  735. "fuzzy_check.rule",
  736. "List of servers to check (or learn)",
  737. "servers",
  738. UCL_STRING,
  739. NULL,
  740. 0,
  741. NULL,
  742. 0);
  743. rspamd_rcl_add_doc_by_path (cfg,
  744. "fuzzy_check.rule",
  745. "If true then never try to learn this fuzzy storage",
  746. "read_only",
  747. UCL_BOOLEAN,
  748. NULL,
  749. 0,
  750. NULL,
  751. 0);
  752. rspamd_rcl_add_doc_by_path (cfg,
  753. "fuzzy_check.rule",
  754. "If true then ignore unknown flags and not add the default fuzzy symbol",
  755. "skip_unknown",
  756. UCL_BOOLEAN,
  757. NULL,
  758. 0,
  759. NULL,
  760. 0);
  761. rspamd_rcl_add_doc_by_path (cfg,
  762. "fuzzy_check.rule",
  763. "Default symbol for rule (if no flags defined or matched)",
  764. "symbol",
  765. UCL_STRING,
  766. NULL,
  767. 0,
  768. NULL,
  769. 0);
  770. rspamd_rcl_add_doc_by_path (cfg,
  771. "fuzzy_check.rule",
  772. "Base32 value for the protocol encryption public key",
  773. "encryption_key",
  774. UCL_STRING,
  775. NULL,
  776. 0,
  777. NULL,
  778. 0);
  779. rspamd_rcl_add_doc_by_path (cfg,
  780. "fuzzy_check.rule",
  781. "Base32 value for the hashing key (for private storages)",
  782. "fuzzy_key",
  783. UCL_STRING,
  784. NULL,
  785. 0,
  786. NULL,
  787. 0);
  788. rspamd_rcl_add_doc_by_path (cfg,
  789. "fuzzy_check.rule",
  790. "Base32 value for the shingles hashing key (for private storages)",
  791. "fuzzy_shingles_key",
  792. UCL_STRING,
  793. NULL,
  794. 0,
  795. NULL,
  796. 0);
  797. rspamd_rcl_add_doc_by_path (cfg,
  798. "fuzzy_check.rule",
  799. "Lua script that returns boolean function to check if this task "
  800. "should be considered when learning fuzzy storage",
  801. "learn_condition",
  802. UCL_STRING,
  803. NULL,
  804. 0,
  805. NULL,
  806. 0);
  807. rspamd_rcl_add_doc_by_path (cfg,
  808. "fuzzy_check.rule",
  809. "Map of SYMBOL -> data for flags configuration",
  810. "fuzzy_map",
  811. UCL_OBJECT,
  812. NULL,
  813. 0,
  814. NULL,
  815. 0);
  816. rspamd_rcl_add_doc_by_path (cfg,
  817. "fuzzy_check.rule",
  818. "Use direct hash for short texts",
  819. "short_text_direct_hash",
  820. UCL_BOOLEAN,
  821. NULL,
  822. 0,
  823. "true",
  824. 0);
  825. rspamd_rcl_add_doc_by_path (cfg,
  826. "fuzzy_check.rule",
  827. "Override module default min bytes for this rule",
  828. "min_bytes",
  829. UCL_INT,
  830. NULL,
  831. 0,
  832. NULL,
  833. 0);
  834. /* Fuzzy map doc strings */
  835. rspamd_rcl_add_doc_by_path (cfg,
  836. "fuzzy_check.rule.fuzzy_map",
  837. "Maximum score for this flag",
  838. "max_score",
  839. UCL_INT,
  840. NULL,
  841. 0,
  842. NULL,
  843. 0);
  844. rspamd_rcl_add_doc_by_path (cfg,
  845. "fuzzy_check.rule.fuzzy_map",
  846. "Flag number",
  847. "flag",
  848. UCL_INT,
  849. NULL,
  850. 0,
  851. NULL,
  852. 0);
  853. rspamd_rcl_add_doc_by_path (cfg,
  854. "fuzzy_check.rule",
  855. "Do no use subject to distinguish short text hashes",
  856. "no_subject",
  857. UCL_BOOLEAN,
  858. NULL,
  859. 0,
  860. "false",
  861. 0);
  862. rspamd_rcl_add_doc_by_path (cfg,
  863. "fuzzy_check.rule",
  864. "Disable sharing message stats with the fuzzy server",
  865. "no_share",
  866. UCL_BOOLEAN,
  867. NULL,
  868. 0,
  869. "false",
  870. 0);
  871. return 0;
  872. }
  873. gint
  874. fuzzy_check_module_config (struct rspamd_config *cfg, bool validate)
  875. {
  876. const ucl_object_t *value, *cur, *elt;
  877. ucl_object_iter_t it;
  878. gint res = TRUE, cb_id, nrules = 0;
  879. lua_State *L = cfg->lua_state;
  880. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg);
  881. if (!rspamd_config_is_module_enabled (cfg, "fuzzy_check")) {
  882. return TRUE;
  883. }
  884. fuzzy_module_ctx->enabled = TRUE;
  885. fuzzy_module_ctx->check_mime_part_ref = -1;
  886. fuzzy_module_ctx->process_rule_ref = -1;
  887. fuzzy_module_ctx->cleanup_rules_ref = -1;
  888. /* Interact with lua_fuzzy */
  889. if (luaL_dostring (L, "return require \"lua_fuzzy\"") != 0) {
  890. msg_err_config ("cannot require lua_fuzzy: %s",
  891. lua_tostring (L, -1));
  892. fuzzy_module_ctx->enabled = FALSE;
  893. }
  894. else {
  895. if (lua_type (L, -1) != LUA_TTABLE) {
  896. msg_err_config ("lua fuzzy must return "
  897. "table and not %s",
  898. lua_typename (L, lua_type (L, -1)));
  899. fuzzy_module_ctx->enabled = FALSE;
  900. }
  901. else {
  902. lua_pushstring (L, "process_rule");
  903. lua_gettable (L, -2);
  904. if (lua_type (L, -1) != LUA_TFUNCTION) {
  905. msg_err_config ("process_rule must return "
  906. "function and not %s",
  907. lua_typename (L, lua_type (L, -1)));
  908. fuzzy_module_ctx->enabled = FALSE;
  909. }
  910. else {
  911. fuzzy_module_ctx->process_rule_ref = luaL_ref (L, LUA_REGISTRYINDEX);
  912. }
  913. lua_pushstring (L, "check_mime_part");
  914. lua_gettable (L, -2);
  915. if (lua_type (L, -1) != LUA_TFUNCTION) {
  916. msg_err_config ("check_mime_part must return "
  917. "function and not %s",
  918. lua_typename (L, lua_type (L, -1)));
  919. fuzzy_module_ctx->enabled = FALSE;
  920. }
  921. else {
  922. fuzzy_module_ctx->check_mime_part_ref = luaL_ref (L, LUA_REGISTRYINDEX);
  923. }
  924. lua_pushstring (L, "cleanup_rules");
  925. lua_gettable (L, -2);
  926. if (lua_type (L, -1) != LUA_TFUNCTION) {
  927. msg_err_config ("cleanup_rules must return "
  928. "function and not %s",
  929. lua_typename (L, lua_type (L, -1)));
  930. fuzzy_module_ctx->enabled = FALSE;
  931. }
  932. else {
  933. fuzzy_module_ctx->cleanup_rules_ref = luaL_ref (L, LUA_REGISTRYINDEX);
  934. }
  935. }
  936. }
  937. lua_settop (L, 0);
  938. if (!fuzzy_module_ctx->enabled) {
  939. return TRUE;
  940. }
  941. if ((value =
  942. rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) {
  943. fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value);
  944. }
  945. else {
  946. fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL;
  947. }
  948. if ((value =
  949. rspamd_config_get_module_opt (cfg, "fuzzy_check", "timeout")) != NULL) {
  950. fuzzy_module_ctx->io_timeout = ucl_obj_todouble (value) * 1000;
  951. }
  952. else {
  953. fuzzy_module_ctx->io_timeout = DEFAULT_IO_TIMEOUT;
  954. }
  955. if ((value =
  956. rspamd_config_get_module_opt (cfg,
  957. "fuzzy_check",
  958. "retransmits")) != NULL) {
  959. fuzzy_module_ctx->retransmits = ucl_obj_toint (value);
  960. }
  961. else {
  962. fuzzy_module_ctx->retransmits = DEFAULT_RETRANSMITS;
  963. }
  964. if ((value =
  965. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  966. "max_errors")) != NULL) {
  967. fuzzy_module_ctx->max_errors = ucl_obj_toint (value);
  968. }
  969. else {
  970. fuzzy_module_ctx->max_errors = DEFAULT_MAX_ERRORS;
  971. }
  972. if ((value =
  973. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  974. "revive_time")) != NULL) {
  975. fuzzy_module_ctx->revive_time = ucl_obj_todouble (value);
  976. }
  977. else {
  978. fuzzy_module_ctx->revive_time = DEFAULT_REVIVE_TIME;
  979. }
  980. if ((value =
  981. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  982. "whitelist")) != NULL) {
  983. rspamd_config_radix_from_ucl (cfg, value, "Fuzzy whitelist",
  984. &fuzzy_module_ctx->whitelist,
  985. NULL,
  986. NULL, "fuzzy ip whitelist");
  987. }
  988. else {
  989. fuzzy_module_ctx->whitelist = NULL;
  990. }
  991. if ((value =
  992. rspamd_config_get_module_opt (cfg, "fuzzy_check", "rule")) != NULL) {
  993. cb_id = rspamd_symcache_add_symbol (cfg->cache,
  994. "FUZZY_CALLBACK", 0, fuzzy_symbol_callback, NULL,
  995. SYMBOL_TYPE_CALLBACK | SYMBOL_TYPE_FINE,
  996. -1);
  997. rspamd_config_add_symbol (cfg,
  998. "FUZZY_CALLBACK",
  999. 0.0,
  1000. "Fuzzy check callback",
  1001. "fuzzy",
  1002. RSPAMD_SYMBOL_FLAG_IGNORE_METRIC,
  1003. 1,
  1004. 1);
  1005. /*
  1006. * Here we can have 2 possibilities:
  1007. *
  1008. * unnamed rules:
  1009. *
  1010. * rule {
  1011. * ...
  1012. * }
  1013. * rule {
  1014. * ...
  1015. * }
  1016. *
  1017. * - or - named rules:
  1018. *
  1019. * rule {
  1020. * "rule1": {
  1021. * ...
  1022. * }
  1023. * "rule2": {
  1024. * ...
  1025. * }
  1026. * }
  1027. *
  1028. * So, for each element, we check, if there 'servers' key. If 'servers' is
  1029. * presented, then we treat it as unnamed rule, otherwise we treat it as
  1030. * named rule.
  1031. */
  1032. LL_FOREACH (value, cur) {
  1033. if (ucl_object_lookup (cur, "servers")) {
  1034. /* Unnamed rule */
  1035. fuzzy_parse_rule (cfg, cur, NULL, cb_id);
  1036. nrules ++;
  1037. }
  1038. else {
  1039. /* Named rule */
  1040. it = NULL;
  1041. while ((elt = ucl_object_iterate (cur, &it, true)) != NULL) {
  1042. fuzzy_parse_rule (cfg, elt, ucl_object_key (elt), cb_id);
  1043. nrules ++;
  1044. }
  1045. }
  1046. }
  1047. /* We want that to check bad mime attachments */
  1048. rspamd_symcache_add_delayed_dependency (cfg->cache,
  1049. "FUZZY_CALLBACK", "MIME_TYPES_CALLBACK");
  1050. }
  1051. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  1052. msg_warn_config ("fuzzy module is enabled but no rules are defined");
  1053. }
  1054. msg_info_config ("init internal fuzzy_check module, %d rules loaded",
  1055. nrules);
  1056. /* Register global methods */
  1057. lua_getglobal (L, "rspamd_plugins");
  1058. if (lua_type (L, -1) == LUA_TTABLE) {
  1059. lua_pushstring (L, "fuzzy_check");
  1060. lua_createtable (L, 0, 3);
  1061. /* Set methods */
  1062. lua_pushstring (L, "unlearn");
  1063. lua_pushcfunction (L, fuzzy_lua_unlearn_handler);
  1064. lua_settable (L, -3);
  1065. lua_pushstring (L, "learn");
  1066. lua_pushcfunction (L, fuzzy_lua_learn_handler);
  1067. lua_settable (L, -3);
  1068. lua_pushstring (L, "gen_hashes");
  1069. lua_pushcfunction (L, fuzzy_lua_gen_hashes_handler);
  1070. lua_settable (L, -3);
  1071. /* Finish fuzzy_check key */
  1072. lua_settable (L, -3);
  1073. }
  1074. lua_settop (L, 0);
  1075. return res;
  1076. }
  1077. gint
  1078. fuzzy_check_module_reconfig (struct rspamd_config *cfg)
  1079. {
  1080. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg);
  1081. if (fuzzy_module_ctx->cleanup_rules_ref != -1) {
  1082. /* Sync lua_fuzzy rules */
  1083. gint err_idx, ret;
  1084. lua_State *L = (lua_State *)cfg->lua_state;
  1085. lua_pushcfunction (L, &rspamd_lua_traceback);
  1086. err_idx = lua_gettop (L);
  1087. lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->cleanup_rules_ref);
  1088. if ((ret = lua_pcall (L, 0, 0, err_idx)) != 0) {
  1089. msg_err_config ("call to cleanup_rules lua "
  1090. "script failed (%d): %s", ret, lua_tostring (L, -1));
  1091. }
  1092. luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX,
  1093. fuzzy_module_ctx->cleanup_rules_ref);
  1094. lua_settop (L, 0);
  1095. }
  1096. if (fuzzy_module_ctx->check_mime_part_ref != -1) {
  1097. luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX,
  1098. fuzzy_module_ctx->check_mime_part_ref);
  1099. }
  1100. if (fuzzy_module_ctx->process_rule_ref != -1) {
  1101. luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX,
  1102. fuzzy_module_ctx->process_rule_ref);
  1103. }
  1104. return fuzzy_check_module_config (cfg, false);
  1105. }
  1106. /* Finalize IO */
  1107. static void
  1108. fuzzy_io_fin (void *ud)
  1109. {
  1110. struct fuzzy_client_session *session = ud;
  1111. if (session->commands) {
  1112. g_ptr_array_free (session->commands, TRUE);
  1113. }
  1114. if (session->results) {
  1115. g_ptr_array_free (session->results, TRUE);
  1116. }
  1117. rspamd_ev_watcher_stop (session->event_loop, &session->ev);
  1118. close (session->fd);
  1119. }
  1120. static GArray *
  1121. fuzzy_preprocess_words (struct rspamd_mime_text_part *part, rspamd_mempool_t *pool)
  1122. {
  1123. return part->utf_words;
  1124. }
  1125. static void
  1126. fuzzy_encrypt_cmd (struct fuzzy_rule *rule,
  1127. struct rspamd_fuzzy_encrypted_req_hdr *hdr,
  1128. guchar *data, gsize datalen)
  1129. {
  1130. const guchar *pk;
  1131. guint pklen;
  1132. g_assert (hdr != NULL);
  1133. g_assert (data != NULL);
  1134. g_assert (rule != NULL);
  1135. /* Encrypt data */
  1136. memcpy (hdr->magic,
  1137. fuzzy_encrypted_magic,
  1138. sizeof (hdr->magic));
  1139. ottery_rand_bytes (hdr->nonce, sizeof (hdr->nonce));
  1140. pk = rspamd_keypair_component (rule->local_key,
  1141. RSPAMD_KEYPAIR_COMPONENT_PK, &pklen);
  1142. memcpy (hdr->pubkey, pk, MIN (pklen, sizeof (hdr->pubkey)));
  1143. pk = rspamd_pubkey_get_pk (rule->peer_key, &pklen);
  1144. memcpy (hdr->key_id, pk, MIN (sizeof (hdr->key_id), pklen));
  1145. rspamd_keypair_cache_process (rule->ctx->keypairs_cache,
  1146. rule->local_key, rule->peer_key);
  1147. rspamd_cryptobox_encrypt_nm_inplace (data, datalen,
  1148. hdr->nonce, rspamd_pubkey_get_nm (rule->peer_key, rule->local_key),
  1149. hdr->mac,
  1150. rspamd_pubkey_alg (rule->peer_key));
  1151. }
  1152. static struct fuzzy_cmd_io *
  1153. fuzzy_cmd_stat (struct fuzzy_rule *rule,
  1154. int c,
  1155. gint flag,
  1156. guint32 weight,
  1157. rspamd_mempool_t *pool)
  1158. {
  1159. struct rspamd_fuzzy_cmd *cmd;
  1160. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1161. struct fuzzy_cmd_io *io;
  1162. if (rule->peer_key) {
  1163. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1164. cmd = &enccmd->cmd;
  1165. }
  1166. else {
  1167. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1168. }
  1169. cmd->cmd = c;
  1170. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1171. cmd->shingles_count = 0;
  1172. cmd->tag = ottery_rand_uint32 ();
  1173. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1174. io->flags = 0;
  1175. io->tag = cmd->tag;
  1176. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1177. if (rule->peer_key && enccmd) {
  1178. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd, sizeof (*cmd));
  1179. io->io.iov_base = enccmd;
  1180. io->io.iov_len = sizeof (*enccmd);
  1181. }
  1182. else {
  1183. io->io.iov_base = cmd;
  1184. io->io.iov_len = sizeof (*cmd);
  1185. }
  1186. return io;
  1187. }
  1188. static struct fuzzy_cmd_io *
  1189. fuzzy_cmd_hash (struct fuzzy_rule *rule,
  1190. int c,
  1191. const rspamd_ftok_t *hash,
  1192. gint flag,
  1193. guint32 weight,
  1194. rspamd_mempool_t *pool)
  1195. {
  1196. struct rspamd_fuzzy_cmd *cmd;
  1197. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1198. struct fuzzy_cmd_io *io;
  1199. if (rule->peer_key) {
  1200. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1201. cmd = &enccmd->cmd;
  1202. }
  1203. else {
  1204. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1205. }
  1206. if (hash->len == sizeof (cmd->digest) * 2) {
  1207. /* It is hex encoding */
  1208. if (rspamd_decode_hex_buf (hash->begin, hash->len, cmd->digest,
  1209. sizeof (cmd->digest)) == -1) {
  1210. msg_err_pool ("cannot decode hash, wrong encoding");
  1211. return NULL;
  1212. }
  1213. }
  1214. else {
  1215. msg_err_pool ("cannot decode hash, wrong length: %z", hash->len);
  1216. return NULL;
  1217. }
  1218. cmd->cmd = c;
  1219. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1220. cmd->shingles_count = 0;
  1221. cmd->tag = ottery_rand_uint32 ();
  1222. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1223. io->flags = 0;
  1224. io->tag = cmd->tag;
  1225. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1226. if (rule->peer_key && enccmd) {
  1227. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd, sizeof (*cmd));
  1228. io->io.iov_base = enccmd;
  1229. io->io.iov_len = sizeof (*enccmd);
  1230. }
  1231. else {
  1232. io->io.iov_base = cmd;
  1233. io->io.iov_len = sizeof (*cmd);
  1234. }
  1235. return io;
  1236. }
  1237. struct rspamd_cached_shingles {
  1238. struct rspamd_shingle *sh;
  1239. guchar digest[rspamd_cryptobox_HASHBYTES];
  1240. guint additional_length;
  1241. guchar *additional_data;
  1242. };
  1243. static struct rspamd_cached_shingles *
  1244. fuzzy_cmd_get_cached (struct fuzzy_rule *rule,
  1245. struct rspamd_task *task,
  1246. struct rspamd_mime_part *mp)
  1247. {
  1248. gchar key[32];
  1249. gint key_part;
  1250. struct rspamd_cached_shingles **cached;
  1251. memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
  1252. rspamd_snprintf (key, sizeof (key), "%s%d", rule->algorithm_str,
  1253. key_part);
  1254. cached = (struct rspamd_cached_shingles **)rspamd_mempool_get_variable (
  1255. task->task_pool, key);
  1256. if (cached && cached[mp->part_number]) {
  1257. return cached[mp->part_number];
  1258. }
  1259. return NULL;
  1260. }
  1261. static void
  1262. fuzzy_cmd_set_cached (struct fuzzy_rule *rule,
  1263. struct rspamd_task *task,
  1264. struct rspamd_mime_part *mp,
  1265. struct rspamd_cached_shingles *data)
  1266. {
  1267. gchar key[32];
  1268. gint key_part;
  1269. struct rspamd_cached_shingles **cached;
  1270. memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
  1271. rspamd_snprintf (key, sizeof (key), "%s%d", rule->algorithm_str,
  1272. key_part);
  1273. cached = (struct rspamd_cached_shingles **)rspamd_mempool_get_variable (
  1274. task->task_pool, key);
  1275. if (cached) {
  1276. cached[mp->part_number] = data;
  1277. }
  1278. else {
  1279. cached = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cached) *
  1280. (MESSAGE_FIELD (task, parts)->len + 1));
  1281. cached[mp->part_number] = data;
  1282. rspamd_mempool_set_variable (task->task_pool, key, cached, NULL);
  1283. }
  1284. }
  1285. static gboolean
  1286. fuzzy_rule_check_mimepart (struct rspamd_task *task,
  1287. struct fuzzy_rule *rule,
  1288. struct rspamd_mime_part *part,
  1289. gboolean *need_check,
  1290. gboolean *fuzzy_check)
  1291. {
  1292. lua_State *L = (lua_State *)task->cfg->lua_state;
  1293. gint old_top = lua_gettop (L);
  1294. if (rule->lua_id != -1 && rule->ctx->check_mime_part_ref != -1) {
  1295. gint err_idx, ret;
  1296. struct rspamd_task **ptask;
  1297. struct rspamd_mime_part **ppart;
  1298. lua_pushcfunction (L, &rspamd_lua_traceback);
  1299. err_idx = lua_gettop (L);
  1300. lua_rawgeti (L, LUA_REGISTRYINDEX, rule->ctx->check_mime_part_ref);
  1301. ptask = lua_newuserdata (L, sizeof (*ptask));
  1302. *ptask = task;
  1303. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1304. ppart = lua_newuserdata (L, sizeof (*ppart));
  1305. *ppart = part;
  1306. rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
  1307. lua_pushnumber (L, rule->lua_id);
  1308. if ((ret = lua_pcall (L, 3, 2, err_idx)) != 0) {
  1309. msg_err_task ("call to check_mime_part lua "
  1310. "script failed (%d): %s", ret, lua_tostring (L, -1));
  1311. ret = FALSE;
  1312. }
  1313. else {
  1314. ret = TRUE;
  1315. *need_check = lua_toboolean (L, -2);
  1316. *fuzzy_check = lua_toboolean (L, -1);
  1317. }
  1318. lua_settop (L, old_top);
  1319. return ret;
  1320. }
  1321. return FALSE;
  1322. }
  1323. #define MAX_FUZZY_DOMAIN 64
  1324. static guint
  1325. fuzzy_cmd_extension_length (struct rspamd_task *task,
  1326. struct fuzzy_rule *rule)
  1327. {
  1328. guint total = 0;
  1329. if (rule->no_share) {
  1330. return 0;
  1331. }
  1332. /* From domain */
  1333. if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) {
  1334. struct rspamd_email_address *addr = g_ptr_array_index (MESSAGE_FIELD (task,
  1335. from_mime), 0);
  1336. if (addr->domain_len > 0) {
  1337. total += 2; /* 2 bytes: type + length */
  1338. total += MIN (MAX_FUZZY_DOMAIN, addr->domain_len);
  1339. }
  1340. }
  1341. if (task->from_addr && rspamd_inet_address_get_af (task->from_addr) == AF_INET) {
  1342. total += sizeof (struct in_addr) + 1;
  1343. }
  1344. else if (task->from_addr&& rspamd_inet_address_get_af (task->from_addr) == AF_INET6) {
  1345. total += sizeof (struct in6_addr) + 1;
  1346. }
  1347. return total;
  1348. }
  1349. static guint
  1350. fuzzy_cmd_write_extensions (struct rspamd_task *task,
  1351. struct fuzzy_rule *rule,
  1352. guchar *dest,
  1353. gsize available)
  1354. {
  1355. guint written = 0;
  1356. if (rule->no_share) {
  1357. return 0;
  1358. }
  1359. if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) {
  1360. struct rspamd_email_address *addr = g_ptr_array_index (MESSAGE_FIELD (task,
  1361. from_mime), 0);
  1362. guint to_write = MIN (MAX_FUZZY_DOMAIN, addr->domain_len) + 2;
  1363. if (to_write > 0 && to_write <= available) {
  1364. *dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN;
  1365. *dest++ = to_write - 2;
  1366. if (addr->domain_len < MAX_FUZZY_DOMAIN) {
  1367. memcpy (dest, addr->domain, addr->domain_len);
  1368. dest += addr->domain_len;
  1369. }
  1370. else {
  1371. /* Trim from left */
  1372. memcpy (dest,
  1373. addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN),
  1374. MAX_FUZZY_DOMAIN);
  1375. dest += MAX_FUZZY_DOMAIN;
  1376. }
  1377. available -= to_write;
  1378. written += to_write;
  1379. }
  1380. }
  1381. if (task->from_addr && rspamd_inet_address_get_af (task->from_addr) == AF_INET) {
  1382. if (available >= sizeof (struct in_addr) + 1) {
  1383. guint klen;
  1384. guchar *inet_data = rspamd_inet_address_get_hash_key (task->from_addr, &klen);
  1385. *dest++ = RSPAMD_FUZZY_EXT_SOURCE_IP4;
  1386. memcpy (dest, inet_data, klen);
  1387. dest += klen;
  1388. available -= klen + 1;
  1389. written += klen + 1;
  1390. }
  1391. }
  1392. else if (task->from_addr && rspamd_inet_address_get_af (task->from_addr) == AF_INET6) {
  1393. if (available >= sizeof (struct in6_addr) + 1) {
  1394. guint klen;
  1395. guchar *inet_data = rspamd_inet_address_get_hash_key (task->from_addr, &klen);
  1396. *dest++ = RSPAMD_FUZZY_EXT_SOURCE_IP6;
  1397. memcpy (dest, inet_data, klen);
  1398. dest += klen;
  1399. available -= klen + 1;
  1400. written += klen + 1;
  1401. }
  1402. }
  1403. return written;
  1404. }
  1405. /*
  1406. * Create fuzzy command from a text part
  1407. */
  1408. static struct fuzzy_cmd_io *
  1409. fuzzy_cmd_from_text_part (struct rspamd_task *task,
  1410. struct fuzzy_rule *rule,
  1411. int c,
  1412. gint flag,
  1413. guint32 weight,
  1414. gboolean short_text,
  1415. struct rspamd_mime_text_part *part,
  1416. struct rspamd_mime_part *mp)
  1417. {
  1418. struct rspamd_fuzzy_shingle_cmd *shcmd = NULL;
  1419. struct rspamd_fuzzy_cmd *cmd = NULL;
  1420. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd = NULL;
  1421. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1422. struct rspamd_cached_shingles *cached = NULL;
  1423. struct rspamd_shingle *sh = NULL;
  1424. guint i;
  1425. rspamd_cryptobox_hash_state_t st;
  1426. rspamd_stat_token_t *word;
  1427. GArray *words;
  1428. struct fuzzy_cmd_io *io;
  1429. guint additional_length;
  1430. guchar *additional_data;
  1431. cached = fuzzy_cmd_get_cached (rule, task, mp);
  1432. /*
  1433. * Important note:
  1434. *
  1435. * We assume that fuzzy io is a consistent memory layout to fit into
  1436. * iov structure of size 1
  1437. *
  1438. * However, there are 4 possibilities:
  1439. * 1) non encrypted, non shingle command - just one cmd
  1440. * 2) encrypted, non shingle command - encryption hdr + cmd
  1441. * 3) non encrypted, shingle command - cmd + shingle
  1442. * 4) encrypted, shingle command - encryption hdr + cmd + shingle
  1443. *
  1444. * Extensions are always at the end, but since we also have caching (sigh, meh...)
  1445. * then we have one piece that looks like cmd (+ shingle) + extensions
  1446. * To encrypt it optionally we take this memory and prepend encryption header
  1447. *
  1448. * In case of cached version we do the same: allocate, copy from cached (including extra)
  1449. * and optionally encrypt.
  1450. *
  1451. * However, there should be no extensions in case of unencrypted connection
  1452. * (for sanity + privacy).
  1453. */
  1454. if (cached) {
  1455. additional_length = cached->additional_length;
  1456. additional_data = cached->additional_data;
  1457. /* Copy cached */
  1458. if (short_text) {
  1459. enccmd = rspamd_mempool_alloc0 (task->task_pool,
  1460. sizeof (*enccmd) + additional_length);
  1461. cmd = &enccmd->cmd;
  1462. memcpy (cmd->digest, cached->digest,
  1463. sizeof (cached->digest));
  1464. cmd->shingles_count = 0;
  1465. memcpy (((guchar *)enccmd) + sizeof (*enccmd), additional_data,
  1466. additional_length);
  1467. }
  1468. else if (cached->sh) {
  1469. encshcmd = rspamd_mempool_alloc0 (task->task_pool,
  1470. additional_length + sizeof (*encshcmd));
  1471. shcmd = &encshcmd->cmd;
  1472. memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
  1473. memcpy (shcmd->basic.digest, cached->digest,
  1474. sizeof (cached->digest));
  1475. memcpy (((guchar *)encshcmd) + sizeof (*encshcmd), additional_data,
  1476. additional_length);
  1477. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1478. }
  1479. else {
  1480. return NULL;
  1481. }
  1482. }
  1483. else {
  1484. additional_length = fuzzy_cmd_extension_length (task, rule);
  1485. cached = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cached) +
  1486. additional_length);
  1487. /*
  1488. * Allocate extensions and never touch it except copying to avoid
  1489. * occasional encryption
  1490. */
  1491. cached->additional_length = additional_length;
  1492. cached->additional_data = ((guchar *)cached) + sizeof (*cached);
  1493. if (additional_length > 0) {
  1494. fuzzy_cmd_write_extensions (task, rule, cached->additional_data,
  1495. additional_length);
  1496. }
  1497. if (short_text) {
  1498. enccmd = rspamd_mempool_alloc0 (task->task_pool,
  1499. sizeof (*enccmd) + additional_length);
  1500. cmd = &enccmd->cmd;
  1501. rspamd_cryptobox_hash_init (&st, rule->hash_key->str,
  1502. rule->hash_key->len);
  1503. rspamd_cryptobox_hash_update (&st, part->utf_stripped_content->data,
  1504. part->utf_stripped_content->len);
  1505. if (!rule->no_subject && (MESSAGE_FIELD (task, subject))) {
  1506. /* We also include subject */
  1507. rspamd_cryptobox_hash_update (&st, MESSAGE_FIELD (task, subject),
  1508. strlen (MESSAGE_FIELD (task, subject)));
  1509. }
  1510. rspamd_cryptobox_hash_final (&st, cmd->digest);
  1511. memcpy (cached->digest, cmd->digest, sizeof (cached->digest));
  1512. cached->sh = NULL;
  1513. additional_data = ((guchar *)enccmd) + sizeof (*enccmd);
  1514. memcpy (additional_data, cached->additional_data, additional_length);
  1515. }
  1516. else {
  1517. encshcmd = rspamd_mempool_alloc0 (task->task_pool,
  1518. sizeof (*encshcmd) + additional_length);
  1519. shcmd = &encshcmd->cmd;
  1520. /*
  1521. * Generate hash from all words in the part
  1522. */
  1523. rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
  1524. words = fuzzy_preprocess_words (part, task->task_pool);
  1525. for (i = 0; i < words->len; i ++) {
  1526. word = &g_array_index (words, rspamd_stat_token_t, i);
  1527. if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED)
  1528. || word->stemmed.len == 0)) {
  1529. rspamd_cryptobox_hash_update (&st, word->stemmed.begin,
  1530. word->stemmed.len);
  1531. }
  1532. }
  1533. rspamd_cryptobox_hash_final (&st, shcmd->basic.digest);
  1534. msg_debug_task ("loading shingles of type %s with key %*xs",
  1535. rule->algorithm_str,
  1536. 16, rule->shingles_key->str);
  1537. sh = rspamd_shingles_from_text (words,
  1538. rule->shingles_key->str, task->task_pool,
  1539. rspamd_shingles_default_filter, NULL,
  1540. rule->alg);
  1541. if (sh != NULL) {
  1542. memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl));
  1543. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1544. }
  1545. else {
  1546. /* No shingles, no check */
  1547. return NULL;
  1548. }
  1549. cached->sh = sh;
  1550. memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
  1551. additional_data = ((guchar *)encshcmd) + sizeof (*encshcmd);
  1552. memcpy (additional_data, cached->additional_data, additional_length);
  1553. }
  1554. /*
  1555. * We always save encrypted command as it can handle both
  1556. * encrypted and unencrypted requests.
  1557. *
  1558. * Since it is copied when obtained from the cache, it is safe to use
  1559. * it this way.
  1560. */
  1561. fuzzy_cmd_set_cached (rule, task, mp, cached);
  1562. }
  1563. io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
  1564. io->part = mp;
  1565. if (!short_text) {
  1566. shcmd->basic.tag = ottery_rand_uint32 ();
  1567. shcmd->basic.cmd = c;
  1568. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1569. if (c != FUZZY_CHECK) {
  1570. shcmd->basic.flag = flag;
  1571. shcmd->basic.value = weight;
  1572. }
  1573. io->tag = shcmd->basic.tag;
  1574. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1575. }
  1576. else {
  1577. cmd->tag = ottery_rand_uint32 ();
  1578. cmd->cmd = c;
  1579. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1580. if (c != FUZZY_CHECK) {
  1581. cmd->flag = flag;
  1582. cmd->value = weight;
  1583. }
  1584. io->tag = cmd->tag;
  1585. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1586. }
  1587. io->flags = 0;
  1588. if (rule->peer_key) {
  1589. /* Encrypt data */
  1590. if (!short_text) {
  1591. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd,
  1592. sizeof (*shcmd) + additional_length);
  1593. io->io.iov_base = encshcmd;
  1594. io->io.iov_len = sizeof (*encshcmd) + additional_length;
  1595. }
  1596. else {
  1597. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd,
  1598. sizeof (*cmd) + additional_length);
  1599. io->io.iov_base = enccmd;
  1600. io->io.iov_len = sizeof (*enccmd) + additional_length;
  1601. }
  1602. }
  1603. else {
  1604. if (!short_text) {
  1605. io->io.iov_base = shcmd;
  1606. io->io.iov_len = sizeof (*shcmd) + additional_length;
  1607. }
  1608. else {
  1609. io->io.iov_base = cmd;
  1610. io->io.iov_len = sizeof (*cmd) + additional_length;
  1611. }
  1612. }
  1613. return io;
  1614. }
  1615. #if 0
  1616. static struct fuzzy_cmd_io *
  1617. fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
  1618. int c,
  1619. gint flag,
  1620. guint32 weight,
  1621. struct rspamd_task *task,
  1622. struct rspamd_image *img,
  1623. struct rspamd_mime_part *mp)
  1624. {
  1625. struct rspamd_fuzzy_shingle_cmd *shcmd;
  1626. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd;
  1627. struct fuzzy_cmd_io *io;
  1628. struct rspamd_shingle *sh;
  1629. struct rspamd_cached_shingles *cached;
  1630. cached = fuzzy_cmd_get_cached (rule, task, mp);
  1631. if (cached) {
  1632. /* Copy cached */
  1633. encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
  1634. shcmd = &encshcmd->cmd;
  1635. memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
  1636. memcpy (shcmd->basic.digest, cached->digest,
  1637. sizeof (cached->digest));
  1638. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1639. }
  1640. else {
  1641. encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
  1642. shcmd = &encshcmd->cmd;
  1643. /*
  1644. * Generate shingles
  1645. */
  1646. sh = rspamd_shingles_from_image (img->dct,
  1647. rule->shingles_key->str, task->task_pool,
  1648. rspamd_shingles_default_filter, NULL,
  1649. rule->alg);
  1650. if (sh != NULL) {
  1651. memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl));
  1652. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1653. #if 0
  1654. for (unsigned int i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  1655. msg_err ("shingle %d: %L", i, sh->hashes[i]);
  1656. }
  1657. #endif
  1658. }
  1659. rspamd_cryptobox_hash (shcmd->basic.digest,
  1660. (const guchar *)img->dct, RSPAMD_DCT_LEN / NBBY,
  1661. rule->hash_key->str, rule->hash_key->len);
  1662. msg_debug_task ("loading shingles of type %s with key %*xs",
  1663. rule->algorithm_str,
  1664. 16, rule->shingles_key->str);
  1665. /*
  1666. * We always save encrypted command as it can handle both
  1667. * encrypted and unencrypted requests.
  1668. *
  1669. * Since it is copied when obtained from the cache, it is safe to use
  1670. * it this way.
  1671. */
  1672. cached = rspamd_mempool_alloc (task->task_pool, sizeof (*cached));
  1673. cached->sh = sh;
  1674. memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
  1675. fuzzy_cmd_set_cached (rule, task, mp, cached);
  1676. }
  1677. shcmd->basic.tag = ottery_rand_uint32 ();
  1678. shcmd->basic.cmd = c;
  1679. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1680. if (c != FUZZY_CHECK) {
  1681. shcmd->basic.flag = flag;
  1682. shcmd->basic.value = weight;
  1683. }
  1684. io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
  1685. io->part = mp;
  1686. io->tag = shcmd->basic.tag;
  1687. io->flags = FUZZY_CMD_FLAG_IMAGE;
  1688. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1689. if (rule->peer_key) {
  1690. /* Encrypt data */
  1691. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd, sizeof (*shcmd));
  1692. io->io.iov_base = encshcmd;
  1693. io->io.iov_len = sizeof (*encshcmd);
  1694. }
  1695. else {
  1696. io->io.iov_base = shcmd;
  1697. io->io.iov_len = sizeof (*shcmd);
  1698. }
  1699. return io;
  1700. }
  1701. #endif
  1702. static struct fuzzy_cmd_io *
  1703. fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
  1704. int c,
  1705. gint flag,
  1706. guint32 weight,
  1707. struct rspamd_task *task,
  1708. guchar digest[rspamd_cryptobox_HASHBYTES],
  1709. struct rspamd_mime_part *mp)
  1710. {
  1711. struct rspamd_fuzzy_cmd *cmd;
  1712. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1713. struct fuzzy_cmd_io *io;
  1714. guint additional_length;
  1715. guchar *additional_data;
  1716. additional_length = fuzzy_cmd_extension_length (task, rule);
  1717. if (rule->peer_key) {
  1718. enccmd = rspamd_mempool_alloc0 (task->task_pool,
  1719. sizeof (*enccmd) + additional_length);
  1720. cmd = &enccmd->cmd;
  1721. additional_data = ((guchar *)enccmd) + sizeof (*enccmd);
  1722. }
  1723. else {
  1724. cmd = rspamd_mempool_alloc0 (task->task_pool,
  1725. sizeof (*cmd) + additional_length);
  1726. additional_data = ((guchar *)cmd) + sizeof (*cmd);
  1727. }
  1728. cmd->cmd = c;
  1729. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1730. if (c != FUZZY_CHECK) {
  1731. cmd->flag = flag;
  1732. cmd->value = weight;
  1733. }
  1734. cmd->shingles_count = 0;
  1735. cmd->tag = ottery_rand_uint32 ();
  1736. memcpy (cmd->digest, digest, sizeof (cmd->digest));
  1737. io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
  1738. io->flags = 0;
  1739. io->tag = cmd->tag;
  1740. io->part = mp;
  1741. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1742. if (additional_length > 0) {
  1743. fuzzy_cmd_write_extensions (task, rule, additional_data,
  1744. additional_length);
  1745. }
  1746. if (rule->peer_key) {
  1747. g_assert (enccmd != NULL);
  1748. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd,
  1749. sizeof (*cmd) + additional_length);
  1750. io->io.iov_base = enccmd;
  1751. io->io.iov_len = sizeof (*enccmd) + additional_length;
  1752. }
  1753. else {
  1754. io->io.iov_base = cmd;
  1755. io->io.iov_len = sizeof (*cmd) + additional_length;
  1756. }
  1757. return io;
  1758. }
  1759. static gboolean
  1760. fuzzy_cmd_to_wire (gint fd, struct iovec *io)
  1761. {
  1762. struct msghdr msg;
  1763. memset (&msg, 0, sizeof (msg));
  1764. msg.msg_iov = io;
  1765. msg.msg_iovlen = 1;
  1766. while (sendmsg (fd, &msg, 0) == -1) {
  1767. if (errno == EINTR) {
  1768. continue;
  1769. }
  1770. return FALSE;
  1771. }
  1772. return TRUE;
  1773. }
  1774. static gboolean
  1775. fuzzy_cmd_vector_to_wire (gint fd, GPtrArray *v)
  1776. {
  1777. guint i;
  1778. gboolean all_sent = TRUE, all_replied = TRUE;
  1779. struct fuzzy_cmd_io *io;
  1780. gboolean processed = FALSE;
  1781. /* First try to resend unsent commands */
  1782. for (i = 0; i < v->len; i ++) {
  1783. io = g_ptr_array_index (v, i);
  1784. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1785. continue;
  1786. }
  1787. all_replied = FALSE;
  1788. if (!(io->flags & FUZZY_CMD_FLAG_SENT)) {
  1789. if (!fuzzy_cmd_to_wire (fd, &io->io)) {
  1790. return FALSE;
  1791. }
  1792. processed = TRUE;
  1793. io->flags |= FUZZY_CMD_FLAG_SENT;
  1794. all_sent = FALSE;
  1795. }
  1796. }
  1797. if (all_sent && !all_replied) {
  1798. /* Now try to resend each command in the vector */
  1799. for (i = 0; i < v->len; i++) {
  1800. io = g_ptr_array_index (v, i);
  1801. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1802. io->flags &= ~FUZZY_CMD_FLAG_SENT;
  1803. }
  1804. }
  1805. return fuzzy_cmd_vector_to_wire (fd, v);
  1806. }
  1807. return processed;
  1808. }
  1809. /*
  1810. * Read replies one-by-one and remove them from req array
  1811. */
  1812. static const struct rspamd_fuzzy_reply *
  1813. fuzzy_process_reply (guchar **pos, gint *r, GPtrArray *req,
  1814. struct fuzzy_rule *rule, struct rspamd_fuzzy_cmd **pcmd,
  1815. struct fuzzy_cmd_io **pio)
  1816. {
  1817. guchar *p = *pos;
  1818. gint remain = *r;
  1819. guint i, required_size;
  1820. struct fuzzy_cmd_io *io;
  1821. const struct rspamd_fuzzy_reply *rep;
  1822. struct rspamd_fuzzy_encrypted_reply encrep;
  1823. gboolean found = FALSE;
  1824. if (rule->peer_key) {
  1825. required_size = sizeof (encrep);
  1826. }
  1827. else {
  1828. required_size = sizeof (*rep);
  1829. }
  1830. if (remain <= 0 || (guint)remain < required_size) {
  1831. return NULL;
  1832. }
  1833. if (rule->peer_key) {
  1834. memcpy (&encrep, p, sizeof (encrep));
  1835. *pos += required_size;
  1836. *r -= required_size;
  1837. /* Try to decrypt reply */
  1838. rspamd_keypair_cache_process (rule->ctx->keypairs_cache,
  1839. rule->local_key, rule->peer_key);
  1840. if (!rspamd_cryptobox_decrypt_nm_inplace ((guchar *)&encrep.rep,
  1841. sizeof (encrep.rep),
  1842. encrep.hdr.nonce,
  1843. rspamd_pubkey_get_nm (rule->peer_key, rule->local_key),
  1844. encrep.hdr.mac,
  1845. rspamd_pubkey_alg (rule->peer_key))) {
  1846. msg_info ("cannot decrypt reply");
  1847. return NULL;
  1848. }
  1849. /* Copy decrypted over the input wire */
  1850. memcpy (p, &encrep.rep, sizeof (encrep.rep));
  1851. }
  1852. else {
  1853. *pos += required_size;
  1854. *r -= required_size;
  1855. }
  1856. rep = (const struct rspamd_fuzzy_reply *) p;
  1857. /*
  1858. * Search for tag
  1859. */
  1860. for (i = 0; i < req->len; i ++) {
  1861. io = g_ptr_array_index (req, i);
  1862. if (io->tag == rep->v1.tag) {
  1863. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1864. io->flags |= FUZZY_CMD_FLAG_REPLIED;
  1865. if (pcmd) {
  1866. *pcmd = &io->cmd;
  1867. }
  1868. if (pio) {
  1869. *pio = io;
  1870. }
  1871. return rep;
  1872. }
  1873. found = TRUE;
  1874. }
  1875. }
  1876. if (!found) {
  1877. msg_info ("unexpected tag: %ud", rep->v1.tag);
  1878. }
  1879. return NULL;
  1880. }
  1881. static void
  1882. fuzzy_insert_result (struct fuzzy_client_session *session,
  1883. const struct rspamd_fuzzy_reply *rep,
  1884. struct rspamd_fuzzy_cmd *cmd,
  1885. struct fuzzy_cmd_io *io,
  1886. guint flag)
  1887. {
  1888. const gchar *symbol;
  1889. struct fuzzy_mapping *map;
  1890. struct rspamd_task *task = session->task;
  1891. double weight;
  1892. double nval;
  1893. guchar buf[2048];
  1894. const gchar *type = "bin";
  1895. struct fuzzy_client_result *res;
  1896. gboolean is_fuzzy = FALSE;
  1897. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  1898. /* Discriminate scores for small images */
  1899. static const guint short_image_limit = 32 * 1024;
  1900. /* Get mapping by flag */
  1901. if ((map =
  1902. g_hash_table_lookup (session->rule->mappings,
  1903. GINT_TO_POINTER (rep->v1.flag))) == NULL) {
  1904. /* Default symbol and default weight */
  1905. symbol = session->rule->symbol;
  1906. weight = session->rule->max_score;
  1907. }
  1908. else {
  1909. /* Get symbol and weight from map */
  1910. symbol = map->symbol;
  1911. weight = map->weight;
  1912. }
  1913. res = rspamd_mempool_alloc0 (task->task_pool, sizeof (*res));
  1914. res->prob = rep->v1.prob;
  1915. res->symbol = symbol;
  1916. /*
  1917. * Hash is assumed to be found if probability is more than 0.5
  1918. * In that case `value` means number of matches
  1919. * Otherwise `value` means error code
  1920. */
  1921. nval = fuzzy_normalize (rep->v1.value, weight);
  1922. if (io) {
  1923. if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  1924. if (!io->part || io->part->parsed_data.len <= short_image_limit) {
  1925. nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
  1926. }
  1927. type = "img";
  1928. res->type = FUZZY_RESULT_IMG;
  1929. }
  1930. else {
  1931. /* Calc real probability */
  1932. nval *= sqrtf (rep->v1.prob);
  1933. if (cmd->shingles_count > 0) {
  1934. type = "txt";
  1935. res->type = FUZZY_RESULT_TXT;
  1936. }
  1937. else {
  1938. if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
  1939. type = "content";
  1940. res->type = FUZZY_RESULT_CONTENT;
  1941. }
  1942. else {
  1943. res->type = FUZZY_RESULT_BIN;
  1944. }
  1945. }
  1946. }
  1947. }
  1948. res->score = nval;
  1949. if (memcmp (rep->digest, cmd->digest, sizeof (rep->digest)) != 0) {
  1950. is_fuzzy = TRUE;
  1951. }
  1952. if (map != NULL || !session->rule->skip_unknown) {
  1953. GList *fuzzy_var;
  1954. rspamd_fstring_t *hex_result;
  1955. gchar timebuf[64];
  1956. struct tm tm_split;
  1957. if (session->rule->skip_map) {
  1958. rspamd_encode_hex_buf (cmd->digest, sizeof (cmd->digest),
  1959. hexbuf, sizeof (hexbuf) - 1);
  1960. hexbuf[sizeof (hexbuf) - 1] = '\0';
  1961. if (rspamd_match_hash_map (session->rule->skip_map, hexbuf,
  1962. sizeof (hexbuf) - 1)) {
  1963. return;
  1964. }
  1965. }
  1966. rspamd_encode_hex_buf (rep->digest, sizeof (rep->digest),
  1967. hexbuf, sizeof (hexbuf) - 1);
  1968. hexbuf[sizeof (hexbuf) - 1] = '\0';
  1969. rspamd_gmtime (rep->ts, &tm_split);
  1970. rspamd_snprintf (timebuf, sizeof (timebuf), "%02d.%02d.%4d %02d:%02d:%02d GMT",
  1971. tm_split.tm_mday,
  1972. tm_split.tm_mon + 1,
  1973. tm_split.tm_year + 1900,
  1974. tm_split.tm_hour, tm_split.tm_min, tm_split.tm_sec);
  1975. if (is_fuzzy) {
  1976. msg_info_task (
  1977. "found fuzzy hash(%s) %s (%*xs requested) with weight: "
  1978. "%.2f, probability %.2f, in list: %s:%d%s; added on %s",
  1979. type,
  1980. hexbuf,
  1981. (gint) sizeof (cmd->digest), cmd->digest,
  1982. nval,
  1983. (gdouble) rep->v1.prob,
  1984. symbol,
  1985. rep->v1.flag,
  1986. map == NULL ? "(unknown)" : "",
  1987. timebuf);
  1988. }
  1989. else {
  1990. msg_info_task (
  1991. "found exact fuzzy hash(%s) %s with weight: "
  1992. "%.2f, probability %.2f, in list: %s:%d%s; added on %s",
  1993. type,
  1994. hexbuf,
  1995. nval,
  1996. (gdouble) rep->v1.prob,
  1997. symbol,
  1998. rep->v1.flag,
  1999. map == NULL ? "(unknown)" : "",
  2000. timebuf);
  2001. }
  2002. rspamd_snprintf (buf,
  2003. sizeof (buf),
  2004. "%d:%*s:%.2f:%s",
  2005. rep->v1.flag,
  2006. (gint)MIN(rspamd_fuzzy_hash_len * 2, sizeof (rep->digest) * 2), hexbuf,
  2007. rep->v1.prob,
  2008. type);
  2009. res->option = rspamd_mempool_strdup (task->task_pool, buf);
  2010. g_ptr_array_add (session->results, res);
  2011. /* Store hex string in pool variable */
  2012. hex_result = rspamd_mempool_alloc (task->task_pool,
  2013. sizeof (rspamd_fstring_t) + sizeof (hexbuf));
  2014. memcpy (hex_result->str, hexbuf, sizeof (hexbuf));
  2015. hex_result->len = sizeof (hexbuf) - 1;
  2016. hex_result->allocated = (gsize)-1;
  2017. fuzzy_var = rspamd_mempool_get_variable (task->task_pool,
  2018. RSPAMD_MEMPOOL_FUZZY_RESULT);
  2019. if (fuzzy_var == NULL) {
  2020. fuzzy_var = g_list_prepend (NULL, hex_result);
  2021. rspamd_mempool_set_variable (task->task_pool,
  2022. RSPAMD_MEMPOOL_FUZZY_RESULT, fuzzy_var,
  2023. (rspamd_mempool_destruct_t)g_list_free);
  2024. }
  2025. else {
  2026. /* Not very efficient, but we don't really use it intensively */
  2027. fuzzy_var = g_list_append (fuzzy_var, hex_result);
  2028. }
  2029. }
  2030. }
  2031. static gint
  2032. fuzzy_check_try_read (struct fuzzy_client_session *session)
  2033. {
  2034. struct rspamd_task *task;
  2035. const struct rspamd_fuzzy_reply *rep;
  2036. struct rspamd_fuzzy_cmd *cmd = NULL;
  2037. struct fuzzy_cmd_io *io = NULL;
  2038. gint r, ret;
  2039. guchar buf[2048], *p;
  2040. task = session->task;
  2041. if ((r = read (session->fd, buf, sizeof (buf) - 1)) == -1) {
  2042. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  2043. return 0;
  2044. }
  2045. else {
  2046. return -1;
  2047. }
  2048. }
  2049. else {
  2050. p = buf;
  2051. ret = 0;
  2052. while ((rep = fuzzy_process_reply (&p, &r,
  2053. session->commands, session->rule, &cmd, &io)) != NULL) {
  2054. if (rep->v1.prob > 0.5) {
  2055. if (cmd->cmd == FUZZY_CHECK) {
  2056. fuzzy_insert_result (session, rep, cmd, io, rep->v1.flag);
  2057. }
  2058. else if (cmd->cmd == FUZZY_STAT) {
  2059. /* Just set pool variable to extract it in further */
  2060. struct rspamd_fuzzy_stat_entry *pval;
  2061. GList *res;
  2062. pval = rspamd_mempool_alloc (task->task_pool, sizeof (*pval));
  2063. pval->fuzzy_cnt = rep->v1.flag;
  2064. pval->name = session->rule->name;
  2065. res = rspamd_mempool_get_variable (task->task_pool, "fuzzy_stat");
  2066. if (res == NULL) {
  2067. res = g_list_append (NULL, pval);
  2068. rspamd_mempool_set_variable (task->task_pool, "fuzzy_stat",
  2069. res, (rspamd_mempool_destruct_t)g_list_free);
  2070. }
  2071. else {
  2072. res = g_list_append (res, pval);
  2073. }
  2074. }
  2075. }
  2076. else if (rep->v1.value == 403) {
  2077. rspamd_task_insert_result (task, "FUZZY_BLOCKED", 0.0,
  2078. session->rule->name);
  2079. }
  2080. else if (rep->v1.value == 401) {
  2081. if (cmd->cmd != FUZZY_CHECK) {
  2082. msg_info_task (
  2083. "fuzzy check error for %d: skipped by server",
  2084. rep->v1.flag);
  2085. }
  2086. }
  2087. else if (rep->v1.value != 0) {
  2088. msg_info_task (
  2089. "fuzzy check error for %d: unknown error (%d)",
  2090. rep->v1.flag,
  2091. rep->v1.value);
  2092. }
  2093. ret = 1;
  2094. }
  2095. }
  2096. return ret;
  2097. }
  2098. static void
  2099. fuzzy_insert_metric_results (struct rspamd_task *task, struct fuzzy_rule *rule,
  2100. GPtrArray *results)
  2101. {
  2102. struct fuzzy_client_result *res;
  2103. guint i;
  2104. gboolean seen_text_hash = FALSE,
  2105. seen_img_hash = FALSE,
  2106. seen_text_part = FALSE,
  2107. seen_long_text = FALSE;
  2108. gdouble prob_txt = 0.0, mult;
  2109. struct rspamd_mime_text_part *tp;
  2110. /* About 5 words */
  2111. static const unsigned int text_length_cutoff = 25;
  2112. PTR_ARRAY_FOREACH (results, i, res) {
  2113. if (res->type == FUZZY_RESULT_TXT) {
  2114. seen_text_hash = TRUE;
  2115. prob_txt = MAX (prob_txt, res->prob);
  2116. }
  2117. else if (res->type == FUZZY_RESULT_IMG) {
  2118. seen_img_hash = TRUE;
  2119. }
  2120. }
  2121. if (task->message) {
  2122. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
  2123. if (!IS_TEXT_PART_EMPTY (tp) && tp->utf_words != NULL && tp->utf_words->len > 0) {
  2124. seen_text_part = TRUE;
  2125. if (tp->utf_stripped_text.magic == UTEXT_MAGIC) {
  2126. if (utext_isLengthExpensive (&tp->utf_stripped_text)) {
  2127. seen_long_text =
  2128. utext_nativeLength (&tp->utf_stripped_text) >
  2129. text_length_cutoff;
  2130. }
  2131. else {
  2132. /* Cannot directly calculate length */
  2133. seen_long_text =
  2134. (tp->utf_stripped_content->len / 2) >
  2135. text_length_cutoff;
  2136. }
  2137. }
  2138. }
  2139. }
  2140. }
  2141. PTR_ARRAY_FOREACH (results, i, res) {
  2142. mult = 1.0;
  2143. if (res->type == FUZZY_RESULT_IMG) {
  2144. if (!seen_text_hash) {
  2145. if (seen_long_text) {
  2146. mult *= 0.25;
  2147. }
  2148. else if (seen_text_part) {
  2149. /* We have some short text + image */
  2150. mult *= 0.9;
  2151. }
  2152. /* Otherwise apply full score */
  2153. }
  2154. else if (prob_txt < 0.75) {
  2155. /* Penalize sole image without matching text */
  2156. if (prob_txt > 0.5) {
  2157. mult *= prob_txt;
  2158. }
  2159. else {
  2160. mult *= 0.5; /* cutoff */
  2161. }
  2162. }
  2163. }
  2164. else if (res->type == FUZZY_RESULT_TXT) {
  2165. if (seen_img_hash) {
  2166. /* Slightly increase score */
  2167. mult = 1.1;
  2168. }
  2169. }
  2170. gdouble weight = res->score * mult;
  2171. if (!isnan (rule->weight_threshold)) {
  2172. if (weight >= rule->weight_threshold) {
  2173. rspamd_task_insert_result_single (task, res->symbol,
  2174. weight, res->option);
  2175. }
  2176. else {
  2177. msg_info_task ("%s is not added: weight=%.4f below threshold",
  2178. res->symbol, weight);
  2179. }
  2180. }
  2181. else {
  2182. rspamd_task_insert_result_single (task, res->symbol,
  2183. weight, res->option);
  2184. }
  2185. }
  2186. }
  2187. static gboolean
  2188. fuzzy_check_session_is_completed (struct fuzzy_client_session *session)
  2189. {
  2190. struct fuzzy_cmd_io *io;
  2191. guint nreplied = 0, i;
  2192. rspamd_upstream_ok (session->server);
  2193. for (i = 0; i < session->commands->len; i++) {
  2194. io = g_ptr_array_index (session->commands, i);
  2195. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  2196. nreplied++;
  2197. }
  2198. }
  2199. if (nreplied == session->commands->len) {
  2200. fuzzy_insert_metric_results (session->task, session->rule, session->results);
  2201. if (session->item) {
  2202. rspamd_symcache_item_async_dec_check (session->task, session->item, M);
  2203. }
  2204. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  2205. return TRUE;
  2206. }
  2207. return FALSE;
  2208. }
  2209. /* Fuzzy check timeout callback */
  2210. static void
  2211. fuzzy_check_timer_callback (gint fd, short what, void *arg)
  2212. {
  2213. struct fuzzy_client_session *session = arg;
  2214. struct rspamd_task *task;
  2215. task = session->task;
  2216. /* We might be here because of other checks being slow */
  2217. if (fuzzy_check_try_read (session) > 0) {
  2218. if (fuzzy_check_session_is_completed (session)) {
  2219. return;
  2220. }
  2221. }
  2222. if (session->retransmits >= session->rule->ctx->retransmits) {
  2223. msg_err_task ("got IO timeout with server %s(%s), after %d retransmits",
  2224. rspamd_upstream_name (session->server),
  2225. rspamd_inet_address_to_string_pretty (
  2226. rspamd_upstream_addr_cur (session->server)),
  2227. session->retransmits);
  2228. rspamd_upstream_fail (session->server, TRUE, "timeout");
  2229. if (session->item) {
  2230. rspamd_symcache_item_async_dec_check (session->task, session->item, M);
  2231. }
  2232. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  2233. }
  2234. else {
  2235. /* Plan write event */
  2236. rspamd_ev_watcher_reschedule (session->event_loop,
  2237. &session->ev, EV_READ|EV_WRITE);
  2238. session->retransmits ++;
  2239. }
  2240. }
  2241. /* Fuzzy check callback */
  2242. static void
  2243. fuzzy_check_io_callback (gint fd, short what, void *arg)
  2244. {
  2245. struct fuzzy_client_session *session = arg;
  2246. struct rspamd_task *task;
  2247. gint r;
  2248. enum {
  2249. return_error = 0,
  2250. return_want_more,
  2251. return_finished
  2252. } ret = return_error;
  2253. task = session->task;
  2254. if ((what & EV_READ) || session->state == 1) {
  2255. /* Try to read reply */
  2256. r = fuzzy_check_try_read (session);
  2257. switch (r) {
  2258. case 0:
  2259. if (what & EV_READ) {
  2260. ret = return_want_more;
  2261. }
  2262. else {
  2263. /* It is actually time out */
  2264. fuzzy_check_timer_callback (fd, what, arg);
  2265. return;
  2266. }
  2267. break;
  2268. case 1:
  2269. ret = return_finished;
  2270. break;
  2271. default:
  2272. ret = return_error;
  2273. break;
  2274. }
  2275. }
  2276. else if (what & EV_WRITE) {
  2277. if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
  2278. ret = return_error;
  2279. }
  2280. else {
  2281. session->state = 1;
  2282. ret = return_want_more;
  2283. }
  2284. }
  2285. else {
  2286. fuzzy_check_timer_callback (fd, what, arg);
  2287. return;
  2288. }
  2289. if (ret == return_want_more) {
  2290. /* Processed write, switch to reading */
  2291. rspamd_ev_watcher_reschedule (session->event_loop,
  2292. &session->ev, EV_READ);
  2293. }
  2294. else if (ret == return_error) {
  2295. /* Error state */
  2296. msg_err_task ("got error on IO with server %s(%s), on %s, %d, %s",
  2297. rspamd_upstream_name (session->server),
  2298. rspamd_inet_address_to_string_pretty (
  2299. rspamd_upstream_addr_cur (session->server)),
  2300. session->state == 1 ? "read" : "write",
  2301. errno,
  2302. strerror (errno));
  2303. rspamd_upstream_fail (session->server, TRUE, strerror (errno));
  2304. if (session->item) {
  2305. rspamd_symcache_item_async_dec_check (session->task, session->item, M);
  2306. }
  2307. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  2308. }
  2309. else {
  2310. /* Read something from network */
  2311. if (!fuzzy_check_session_is_completed (session)) {
  2312. /* Need to read more */
  2313. rspamd_ev_watcher_reschedule (session->event_loop,
  2314. &session->ev, EV_READ);
  2315. }
  2316. }
  2317. }
  2318. static void
  2319. fuzzy_lua_fin (void *ud)
  2320. {
  2321. struct fuzzy_learn_session *session = ud;
  2322. (*session->saved)--;
  2323. rspamd_ev_watcher_stop (session->event_loop, &session->ev);
  2324. close (session->fd);
  2325. }
  2326. /* Controller IO */
  2327. static void
  2328. fuzzy_controller_timer_callback (gint fd, short what, void *arg)
  2329. {
  2330. struct fuzzy_learn_session *session = arg;
  2331. struct rspamd_task *task;
  2332. task = session->task;
  2333. if (session->retransmits >= session->rule->ctx->retransmits) {
  2334. rspamd_upstream_fail (session->server, TRUE, "timeout");
  2335. msg_err_task_check ("got IO timeout with server %s(%s), "
  2336. "after %d retransmits",
  2337. rspamd_upstream_name (session->server),
  2338. rspamd_inet_address_to_string_pretty (
  2339. rspamd_upstream_addr_cur (session->server)),
  2340. session->retransmits);
  2341. if (session->session) {
  2342. rspamd_session_remove_event (session->session, fuzzy_lua_fin,
  2343. session);
  2344. }
  2345. else {
  2346. if (session->http_entry) {
  2347. rspamd_controller_send_error (session->http_entry,
  2348. 500, "IO timeout with fuzzy storage");
  2349. }
  2350. if (*session->saved > 0 ) {
  2351. (*session->saved)--;
  2352. if (*session->saved == 0) {
  2353. if (session->http_entry) {
  2354. rspamd_task_free (session->task);
  2355. }
  2356. session->task = NULL;
  2357. }
  2358. }
  2359. if (session->http_entry) {
  2360. rspamd_http_connection_unref (session->http_entry->conn);
  2361. }
  2362. rspamd_ev_watcher_stop (session->event_loop,
  2363. &session->ev);
  2364. close (session->fd);
  2365. }
  2366. }
  2367. else {
  2368. /* Plan write event */
  2369. rspamd_ev_watcher_reschedule (session->event_loop,
  2370. &session->ev, EV_READ|EV_WRITE);
  2371. session->retransmits ++;
  2372. }
  2373. }
  2374. static void
  2375. fuzzy_controller_io_callback (gint fd, short what, void *arg)
  2376. {
  2377. struct fuzzy_learn_session *session = arg;
  2378. const struct rspamd_fuzzy_reply *rep;
  2379. struct fuzzy_mapping *map;
  2380. struct rspamd_task *task;
  2381. guchar buf[2048], *p;
  2382. struct fuzzy_cmd_io *io;
  2383. struct rspamd_fuzzy_cmd *cmd = NULL;
  2384. const gchar *symbol, *ftype;
  2385. gint r;
  2386. enum {
  2387. return_error = 0,
  2388. return_want_more,
  2389. return_finished
  2390. } ret = return_want_more;
  2391. guint i, nreplied;
  2392. const gchar *op = "process";
  2393. task = session->task;
  2394. if (what & EV_READ) {
  2395. if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) {
  2396. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  2397. rspamd_ev_watcher_reschedule (session->event_loop,
  2398. &session->ev, EV_READ);
  2399. return;
  2400. }
  2401. msg_info_task ("cannot process fuzzy hash for message: %s",
  2402. strerror (errno));
  2403. session->err.error_message = "read socket error";
  2404. session->err.error_code = errno;
  2405. ret = return_error;
  2406. }
  2407. else {
  2408. p = buf;
  2409. ret = return_want_more;
  2410. while ((rep = fuzzy_process_reply (&p, &r,
  2411. session->commands, session->rule, &cmd, &io)) != NULL) {
  2412. if ((map =
  2413. g_hash_table_lookup (session->rule->mappings,
  2414. GINT_TO_POINTER (rep->v1.flag))) == NULL) {
  2415. /* Default symbol and default weight */
  2416. symbol = session->rule->symbol;
  2417. }
  2418. else {
  2419. /* Get symbol and weight from map */
  2420. symbol = map->symbol;
  2421. }
  2422. ftype = "bin";
  2423. if (io) {
  2424. if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  2425. ftype = "img";
  2426. }
  2427. else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
  2428. ftype = "content";
  2429. }
  2430. else if (cmd->shingles_count > 0) {
  2431. ftype = "txt";
  2432. }
  2433. if (io->cmd.cmd == FUZZY_WRITE) {
  2434. op = "added";
  2435. }
  2436. else if (io->cmd.cmd == FUZZY_DEL) {
  2437. op = "deleted";
  2438. }
  2439. }
  2440. if (rep->v1.prob > 0.5) {
  2441. msg_info_task ("%s fuzzy hash (%s) %*xs, list: %s:%d for "
  2442. "message <%s>",
  2443. op,
  2444. ftype,
  2445. (gint)sizeof (rep->digest), rep->digest,
  2446. symbol,
  2447. rep->v1.flag,
  2448. MESSAGE_FIELD_CHECK (session->task, message_id));
  2449. }
  2450. else {
  2451. if (rep->v1.value == 401) {
  2452. msg_info_task (
  2453. "fuzzy hash (%s) for message cannot be %s"
  2454. "<%s>, %*xs, "
  2455. "list %s:%d, skipped by server",
  2456. ftype,
  2457. op,
  2458. MESSAGE_FIELD_CHECK (session->task, message_id),
  2459. (gint)sizeof (rep->digest), rep->digest,
  2460. symbol,
  2461. rep->v1.flag);
  2462. session->err.error_message = "fuzzy hash is skipped";
  2463. session->err.error_code = rep->v1.value;
  2464. }
  2465. else {
  2466. msg_info_task (
  2467. "fuzzy hash (%s) for message cannot be %s"
  2468. "<%s>, %*xs, "
  2469. "list %s:%d, error: %d",
  2470. ftype,
  2471. op,
  2472. MESSAGE_FIELD_CHECK (session->task, message_id),
  2473. (gint)sizeof (rep->digest), rep->digest,
  2474. symbol,
  2475. rep->v1.flag,
  2476. rep->v1.value);
  2477. session->err.error_message = "process fuzzy error";
  2478. session->err.error_code = rep->v1.value;
  2479. }
  2480. ret = return_finished;
  2481. }
  2482. }
  2483. nreplied = 0;
  2484. for (i = 0; i < session->commands->len; i++) {
  2485. io = g_ptr_array_index (session->commands, i);
  2486. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  2487. nreplied++;
  2488. }
  2489. }
  2490. if (nreplied == session->commands->len) {
  2491. ret = return_finished;
  2492. }
  2493. }
  2494. }
  2495. else if (what & EV_WRITE) {
  2496. /* Send commands to storage */
  2497. if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
  2498. session->err.error_message = "write socket error";
  2499. session->err.error_code = errno;
  2500. ret = return_error;
  2501. }
  2502. }
  2503. else {
  2504. fuzzy_controller_timer_callback (fd, what, arg);
  2505. return;
  2506. }
  2507. if (ret == return_want_more) {
  2508. rspamd_ev_watcher_reschedule (session->event_loop,
  2509. &session->ev, EV_READ);
  2510. return;
  2511. }
  2512. else if (ret == return_error) {
  2513. msg_err_task ("got error in IO with server %s(%s), %d, %s",
  2514. rspamd_upstream_name (session->server),
  2515. rspamd_inet_address_to_string_pretty (
  2516. rspamd_upstream_addr_cur (session->server)),
  2517. errno, strerror (errno));
  2518. rspamd_upstream_fail (session->server, FALSE, strerror (errno));
  2519. }
  2520. /*
  2521. * XXX: actually, we check merely a single reply, which is not correct...
  2522. * XXX: when we send a command, we do not check if *all* commands have been
  2523. * written
  2524. * XXX: please, please, change this code some day
  2525. */
  2526. if (session->session == NULL) {
  2527. (*session->saved)--;
  2528. if (session->http_entry) {
  2529. rspamd_http_connection_unref (session->http_entry->conn);
  2530. }
  2531. rspamd_ev_watcher_stop (session->event_loop, &session->ev);
  2532. close (session->fd);
  2533. if (*session->saved == 0) {
  2534. goto cleanup;
  2535. }
  2536. }
  2537. else {
  2538. /* Lua handler */
  2539. rspamd_session_remove_event (session->session, fuzzy_lua_fin, session);
  2540. }
  2541. return;
  2542. cleanup:
  2543. /*
  2544. * When we send learn commands to fuzzy storages, this code is executed
  2545. * *once* when we have queried all storages. We also don't know which
  2546. * storage has been failed.
  2547. *
  2548. * Therefore, we cleanup sessions earlier and actually this code is wrong.
  2549. */
  2550. if (session->err.error_code != 0) {
  2551. if (session->http_entry) {
  2552. rspamd_controller_send_error (session->http_entry,
  2553. session->err.error_code, session->err.error_message);
  2554. }
  2555. }
  2556. else {
  2557. rspamd_upstream_ok (session->server);
  2558. if (session->http_entry) {
  2559. ucl_object_t *reply, *hashes;
  2560. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  2561. reply = ucl_object_typed_new (UCL_OBJECT);
  2562. ucl_object_insert_key (reply, ucl_object_frombool (true),
  2563. "success", 0, false);
  2564. hashes = ucl_object_typed_new (UCL_ARRAY);
  2565. for (i = 0; i < session->commands->len; i ++) {
  2566. io = g_ptr_array_index (session->commands, i);
  2567. rspamd_snprintf (hexbuf, sizeof (hexbuf), "%*xs",
  2568. (gint)sizeof (io->cmd.digest), io->cmd.digest);
  2569. ucl_array_append (hashes, ucl_object_fromstring (hexbuf));
  2570. }
  2571. ucl_object_insert_key (reply, hashes, "hashes", 0, false);
  2572. rspamd_controller_send_ucl (session->http_entry, reply);
  2573. ucl_object_unref (reply);
  2574. }
  2575. }
  2576. if (session->task != NULL) {
  2577. if (session->http_entry) {
  2578. rspamd_task_free (session->task);
  2579. }
  2580. session->task = NULL;
  2581. }
  2582. }
  2583. static GPtrArray *
  2584. fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
  2585. gint c, gint flag, guint32 value, guint flags)
  2586. {
  2587. struct rspamd_mime_text_part *part;
  2588. struct rspamd_mime_part *mime_part;
  2589. struct rspamd_image *image;
  2590. struct fuzzy_cmd_io *io, *cur;
  2591. guint i, j;
  2592. GPtrArray *res = NULL;
  2593. gboolean check_part, fuzzy_check;
  2594. if (c == FUZZY_STAT) {
  2595. res = g_ptr_array_sized_new (1);
  2596. io = fuzzy_cmd_stat (rule, c, flag, value, task->task_pool);
  2597. if (io) {
  2598. g_ptr_array_add (res, io);
  2599. }
  2600. goto end;
  2601. }
  2602. if (task->message == NULL) {
  2603. goto end;
  2604. }
  2605. res = g_ptr_array_sized_new (MESSAGE_FIELD (task, parts)->len + 1);
  2606. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, mime_part) {
  2607. check_part = FALSE;
  2608. fuzzy_check = FALSE;
  2609. if (fuzzy_rule_check_mimepart (task, rule, mime_part, &check_part,
  2610. &fuzzy_check)) {
  2611. io = NULL;
  2612. if (check_part) {
  2613. if (mime_part->part_type == RSPAMD_MIME_PART_TEXT &&
  2614. !(flags & FUZZY_CHECK_FLAG_NOTEXT)) {
  2615. part = mime_part->specific.txt;
  2616. io = fuzzy_cmd_from_text_part (task, rule,
  2617. c,
  2618. flag,
  2619. value,
  2620. !fuzzy_check,
  2621. part,
  2622. mime_part);
  2623. }
  2624. else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
  2625. !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
  2626. image = mime_part->specific.img;
  2627. io = fuzzy_cmd_from_data_part (rule, c, flag, value,
  2628. task,
  2629. image->parent->digest,
  2630. mime_part);
  2631. io->flags |= FUZZY_CMD_FLAG_IMAGE;
  2632. }
  2633. else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
  2634. const struct rspamd_lua_specific_part *lua_spec;
  2635. lua_spec = &mime_part->specific.lua_specific;
  2636. if (lua_spec->type == RSPAMD_LUA_PART_TABLE) {
  2637. lua_State *L = (lua_State *)task->cfg->lua_state;
  2638. gint old_top;
  2639. old_top = lua_gettop (L);
  2640. /* Push table */
  2641. lua_rawgeti (L, LUA_REGISTRYINDEX, lua_spec->cbref);
  2642. lua_pushstring (L, "fuzzy_hashes");
  2643. lua_gettable (L, -2);
  2644. if (lua_type (L, -1) == LUA_TTABLE) {
  2645. gint tbl_pos = lua_gettop (L);
  2646. for (lua_pushnil (L); lua_next (L, tbl_pos);
  2647. lua_pop (L, 1)) {
  2648. const gchar *h = NULL;
  2649. gsize hlen = 0;
  2650. if (lua_isstring (L, -1)) {
  2651. h = lua_tolstring (L, -1, &hlen);
  2652. }
  2653. else if (lua_type (L, -1) == LUA_TUSERDATA) {
  2654. struct rspamd_lua_text *t;
  2655. t = lua_check_text (L, -1);
  2656. if (t) {
  2657. h = t->start;
  2658. hlen = t->len;
  2659. }
  2660. }
  2661. if (hlen == rspamd_cryptobox_HASHBYTES) {
  2662. io = fuzzy_cmd_from_data_part (rule, c,
  2663. flag, value,
  2664. task,
  2665. (guchar *)h,
  2666. mime_part);
  2667. if (io) {
  2668. io->flags |= FUZZY_CMD_FLAG_CONTENT;
  2669. g_ptr_array_add (res, io);
  2670. }
  2671. }
  2672. }
  2673. }
  2674. lua_settop (L, old_top);
  2675. /*
  2676. * Add part itself as well
  2677. */
  2678. io = fuzzy_cmd_from_data_part (rule, c,
  2679. flag, value,
  2680. task,
  2681. mime_part->digest,
  2682. mime_part);
  2683. }
  2684. }
  2685. else {
  2686. io = fuzzy_cmd_from_data_part (rule, c, flag, value,
  2687. task,
  2688. mime_part->digest, mime_part);
  2689. }
  2690. if (io) {
  2691. gboolean skip_existing = FALSE;
  2692. PTR_ARRAY_FOREACH (res, j, cur) {
  2693. if (memcmp (cur->cmd.digest, io->cmd.digest,
  2694. sizeof (io->cmd.digest)) == 0) {
  2695. skip_existing = TRUE;
  2696. break;
  2697. }
  2698. }
  2699. if (!skip_existing) {
  2700. g_ptr_array_add (res, io);
  2701. }
  2702. }
  2703. }
  2704. }
  2705. }
  2706. end:
  2707. if (res && res->len == 0) {
  2708. g_ptr_array_free (res, TRUE);
  2709. return NULL;
  2710. }
  2711. return res;
  2712. }
  2713. static inline void
  2714. register_fuzzy_client_call (struct rspamd_task *task,
  2715. struct fuzzy_rule *rule,
  2716. GPtrArray *commands)
  2717. {
  2718. struct fuzzy_client_session *session;
  2719. struct upstream *selected;
  2720. rspamd_inet_addr_t *addr;
  2721. gint sock;
  2722. if (!rspamd_session_blocked (task->s)) {
  2723. /* Get upstream */
  2724. selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN,
  2725. NULL, 0);
  2726. if (selected) {
  2727. addr = rspamd_upstream_addr_next (selected);
  2728. if ((sock = rspamd_inet_address_connect (addr, SOCK_DGRAM, TRUE)) == -1) {
  2729. msg_warn_task ("cannot connect to %s(%s), %d, %s",
  2730. rspamd_upstream_name (selected),
  2731. rspamd_inet_address_to_string_pretty (addr),
  2732. errno,
  2733. strerror (errno));
  2734. rspamd_upstream_fail (selected, TRUE, strerror (errno));
  2735. g_ptr_array_free (commands, TRUE);
  2736. } else {
  2737. /* Create session for a socket */
  2738. session =
  2739. rspamd_mempool_alloc0 (task->task_pool,
  2740. sizeof (struct fuzzy_client_session));
  2741. session->state = 0;
  2742. session->commands = commands;
  2743. session->task = task;
  2744. session->fd = sock;
  2745. session->server = selected;
  2746. session->rule = rule;
  2747. session->results = g_ptr_array_sized_new (32);
  2748. session->event_loop = task->event_loop;
  2749. rspamd_ev_watcher_init (&session->ev,
  2750. sock,
  2751. EV_WRITE,
  2752. fuzzy_check_io_callback,
  2753. session);
  2754. rspamd_ev_watcher_start (session->event_loop, &session->ev,
  2755. ((double)rule->ctx->io_timeout) / 1000.0);
  2756. rspamd_session_add_event (task->s, fuzzy_io_fin, session, M);
  2757. session->item = rspamd_symcache_get_cur_item (task);
  2758. if (session->item) {
  2759. rspamd_symcache_item_async_inc (task, session->item, M);
  2760. }
  2761. }
  2762. }
  2763. }
  2764. }
  2765. /* This callback is called when we check message in fuzzy hashes storage */
  2766. static void
  2767. fuzzy_symbol_callback (struct rspamd_task *task,
  2768. struct rspamd_symcache_item *item,
  2769. void *unused)
  2770. {
  2771. struct fuzzy_rule *rule;
  2772. guint i;
  2773. GPtrArray *commands;
  2774. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  2775. if (!fuzzy_module_ctx->enabled) {
  2776. rspamd_symcache_finalize_item (task, item);
  2777. return;
  2778. }
  2779. /* Check whitelist */
  2780. if (fuzzy_module_ctx->whitelist) {
  2781. if (rspamd_match_radix_map_addr (fuzzy_module_ctx->whitelist,
  2782. task->from_addr) != NULL) {
  2783. msg_info_task ("<%s>, address %s is whitelisted, skip fuzzy check",
  2784. MESSAGE_FIELD (task, message_id),
  2785. rspamd_inet_address_to_string (task->from_addr));
  2786. rspamd_symcache_finalize_item (task, item);
  2787. return;
  2788. }
  2789. }
  2790. rspamd_symcache_item_async_inc (task, item, M);
  2791. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2792. commands = fuzzy_generate_commands (task, rule, FUZZY_CHECK, 0, 0, 0);
  2793. if (commands != NULL) {
  2794. register_fuzzy_client_call (task, rule, commands);
  2795. }
  2796. }
  2797. rspamd_symcache_item_async_dec_check (task, item, M);
  2798. }
  2799. void
  2800. fuzzy_stat_command (struct rspamd_task *task)
  2801. {
  2802. struct fuzzy_rule *rule;
  2803. guint i;
  2804. GPtrArray *commands;
  2805. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  2806. if (!fuzzy_module_ctx->enabled) {
  2807. return;
  2808. }
  2809. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2810. commands = fuzzy_generate_commands (task, rule, FUZZY_STAT, 0, 0, 0);
  2811. if (commands != NULL) {
  2812. register_fuzzy_client_call (task, rule, commands);
  2813. }
  2814. }
  2815. }
  2816. static inline gint
  2817. register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
  2818. struct fuzzy_rule *rule,
  2819. struct rspamd_task *task,
  2820. GPtrArray *commands,
  2821. gint *saved)
  2822. {
  2823. struct fuzzy_learn_session *s;
  2824. struct upstream *selected;
  2825. rspamd_inet_addr_t *addr;
  2826. struct rspamd_controller_session *session = entry->ud;
  2827. gint sock;
  2828. gint ret = -1;
  2829. /* Get upstream */
  2830. while ((selected = rspamd_upstream_get_forced (rule->servers,
  2831. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  2832. /* Create UDP socket */
  2833. addr = rspamd_upstream_addr_next (selected);
  2834. if ((sock = rspamd_inet_address_connect (addr,
  2835. SOCK_DGRAM, TRUE)) == -1) {
  2836. msg_warn_task ("cannot connect to fuzzy storage %s (%s rule): %s",
  2837. rspamd_inet_address_to_string_pretty (addr),
  2838. rule->name,
  2839. strerror (errno));
  2840. rspamd_upstream_fail (selected, TRUE, strerror (errno));
  2841. }
  2842. else {
  2843. s =
  2844. rspamd_mempool_alloc0 (session->pool,
  2845. sizeof (struct fuzzy_learn_session));
  2846. s->task = task;
  2847. s->commands = commands;
  2848. s->http_entry = entry;
  2849. s->server = selected;
  2850. s->saved = saved;
  2851. s->fd = sock;
  2852. s->rule = rule;
  2853. s->event_loop = task->event_loop;
  2854. /* We ref connection to avoid freeing before we process fuzzy rule */
  2855. rspamd_http_connection_ref (entry->conn);
  2856. rspamd_ev_watcher_init (&s->ev,
  2857. sock,
  2858. EV_WRITE,
  2859. fuzzy_controller_io_callback,
  2860. s);
  2861. rspamd_ev_watcher_start (s->event_loop, &s->ev,
  2862. ((double)rule->ctx->io_timeout) / 1000.0);
  2863. (*saved)++;
  2864. ret = 1;
  2865. }
  2866. }
  2867. return ret;
  2868. }
  2869. static void
  2870. fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
  2871. struct rspamd_http_message *msg, gint cmd, gint value, gint flag,
  2872. struct fuzzy_ctx *ctx, gboolean is_hash, guint flags)
  2873. {
  2874. struct fuzzy_rule *rule;
  2875. struct rspamd_controller_session *session = conn_ent->ud;
  2876. struct rspamd_task *task, **ptask;
  2877. gboolean processed = FALSE, skip = FALSE;
  2878. gint res = 0;
  2879. guint i;
  2880. GPtrArray *commands;
  2881. lua_State *L;
  2882. gint r, *saved, rules = 0, err_idx;
  2883. struct fuzzy_ctx *fuzzy_module_ctx;
  2884. /* Prepare task */
  2885. task = rspamd_task_new (session->wrk, session->cfg, NULL,
  2886. session->lang_det, conn_ent->rt->event_loop, FALSE);
  2887. task->cfg = ctx->cfg;
  2888. saved = rspamd_mempool_alloc0 (session->pool, sizeof (gint));
  2889. fuzzy_module_ctx = fuzzy_get_context (ctx->cfg);
  2890. if (!is_hash) {
  2891. /* Allocate message from string */
  2892. /* XXX: what about encrypted messages ? */
  2893. task->msg.begin = msg->body_buf.begin;
  2894. task->msg.len = msg->body_buf.len;
  2895. r = rspamd_message_parse (task);
  2896. if (r == -1) {
  2897. msg_warn_task ("<%s>: cannot process message for fuzzy",
  2898. MESSAGE_FIELD (task, message_id));
  2899. rspamd_task_free (task);
  2900. rspamd_controller_send_error (conn_ent, 400,
  2901. "Message processing error");
  2902. return;
  2903. }
  2904. rspamd_message_process (task);
  2905. }
  2906. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2907. if (rule->read_only) {
  2908. continue;
  2909. }
  2910. /* Check for flag */
  2911. if (g_hash_table_lookup (rule->mappings,
  2912. GINT_TO_POINTER (flag)) == NULL) {
  2913. msg_info_task ("skip rule %s as it has no flag %d defined"
  2914. " false", rule->name, flag);
  2915. continue;
  2916. }
  2917. /* Check learn condition */
  2918. if (rule->learn_condition_cb != -1) {
  2919. skip = FALSE;
  2920. L = session->cfg->lua_state;
  2921. lua_pushcfunction (L, &rspamd_lua_traceback);
  2922. err_idx = lua_gettop (L);
  2923. lua_rawgeti (L, LUA_REGISTRYINDEX, rule->learn_condition_cb);
  2924. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  2925. *ptask = task;
  2926. rspamd_lua_setclass (L, "rspamd{task}", -1);
  2927. if (lua_pcall (L, 1, LUA_MULTRET, err_idx) != 0) {
  2928. msg_err_task ("call to fuzzy learn condition failed: %s",
  2929. lua_tostring (L, -1));
  2930. }
  2931. else {
  2932. if (lua_gettop (L) > err_idx + 1) {
  2933. /* 2 return values */
  2934. skip = !(lua_toboolean (L, err_idx + 1));
  2935. if (lua_isnumber (L, err_idx + 2)) {
  2936. msg_info_task ("learn condition changed flag from %d to "
  2937. "%d", flag,
  2938. (gint)lua_tonumber (L, err_idx + 2));
  2939. flag = lua_tonumber (L, err_idx + 2);
  2940. }
  2941. }
  2942. else {
  2943. if (lua_isboolean (L, err_idx + 1)) {
  2944. skip = !(lua_toboolean (L, err_idx + 1));
  2945. }
  2946. else {
  2947. msg_warn_task ("set skip for rule %s as its condition "
  2948. "callback returned"
  2949. " a valid boolean", rule->name);
  2950. skip = TRUE;
  2951. }
  2952. }
  2953. }
  2954. /* Result + error function */
  2955. lua_settop (L, err_idx - 1);
  2956. if (skip) {
  2957. msg_info_task ("skip rule %s by condition callback",
  2958. rule->name);
  2959. continue;
  2960. }
  2961. }
  2962. rules ++;
  2963. res = 0;
  2964. if (is_hash) {
  2965. GPtrArray *args;
  2966. const rspamd_ftok_t *arg;
  2967. guint j;
  2968. args = rspamd_http_message_find_header_multiple (msg, "Hash");
  2969. if (args) {
  2970. struct fuzzy_cmd_io *io;
  2971. commands = g_ptr_array_sized_new (args->len);
  2972. for (j = 0; j < args->len; j ++) {
  2973. arg = g_ptr_array_index (args, j);
  2974. io = fuzzy_cmd_hash (rule, cmd, arg, flag, value,
  2975. task->task_pool);
  2976. if (io) {
  2977. g_ptr_array_add (commands, io);
  2978. }
  2979. }
  2980. res = register_fuzzy_controller_call (conn_ent,
  2981. rule,
  2982. task,
  2983. commands,
  2984. saved);
  2985. rspamd_mempool_add_destructor (task->task_pool,
  2986. rspamd_ptr_array_free_hard, commands);
  2987. g_ptr_array_free (args, TRUE);
  2988. }
  2989. else {
  2990. rspamd_controller_send_error (conn_ent, 400,
  2991. "No hash defined");
  2992. rspamd_task_free (task);
  2993. return;
  2994. }
  2995. }
  2996. else {
  2997. commands = fuzzy_generate_commands (task, rule, cmd, flag, value,
  2998. flags);
  2999. if (commands != NULL) {
  3000. res = register_fuzzy_controller_call (conn_ent,
  3001. rule,
  3002. task,
  3003. commands,
  3004. saved);
  3005. rspamd_mempool_add_destructor (task->task_pool,
  3006. rspamd_ptr_array_free_hard, commands);
  3007. }
  3008. }
  3009. if (res > 0) {
  3010. processed = TRUE;
  3011. }
  3012. }
  3013. if (res == -1) {
  3014. if (!processed) {
  3015. msg_warn_task ("cannot send fuzzy request: %s",
  3016. strerror (errno));
  3017. rspamd_controller_send_error (conn_ent, 400, "Message sending error");
  3018. rspamd_task_free (task);
  3019. return;
  3020. }
  3021. else {
  3022. /* Some rules failed and some rules are OK */
  3023. msg_warn_task ("some rules are not processed, but we still sent this request");
  3024. }
  3025. }
  3026. else if (!processed) {
  3027. if (rules) {
  3028. msg_warn_task ("no content to generate fuzzy");
  3029. rspamd_controller_send_error (conn_ent, 404,
  3030. "No content to generate fuzzy for flag %d", flag);
  3031. }
  3032. else {
  3033. if (skip) {
  3034. rspamd_controller_send_error (conn_ent, 403,
  3035. "Message is conditionally skipped for flag %d", flag);
  3036. }
  3037. else {
  3038. msg_warn_task ("no fuzzy rules found for flag %d", flag);
  3039. rspamd_controller_send_error (conn_ent, 404,
  3040. "No fuzzy rules matched for flag %d", flag);
  3041. }
  3042. }
  3043. rspamd_task_free (task);
  3044. }
  3045. }
  3046. static int
  3047. fuzzy_controller_handler (struct rspamd_http_connection_entry *conn_ent,
  3048. struct rspamd_http_message *msg, struct module_ctx *ctx, gint cmd,
  3049. gboolean is_hash)
  3050. {
  3051. const rspamd_ftok_t *arg;
  3052. glong value = 1, flag = 0, send_flags = 0;
  3053. struct fuzzy_ctx *fuzzy_module_ctx = (struct fuzzy_ctx *)ctx;
  3054. if (!fuzzy_module_ctx->enabled) {
  3055. msg_err ("fuzzy_check module is not enabled");
  3056. rspamd_controller_send_error (conn_ent, 500, "Module disabled");
  3057. return 0;
  3058. }
  3059. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  3060. msg_err ("fuzzy_check module has no rules defined");
  3061. rspamd_controller_send_error (conn_ent, 500, "Module has no rules");
  3062. return 0;
  3063. }
  3064. /* Get size */
  3065. arg = rspamd_http_message_find_header (msg, "Weight");
  3066. if (arg) {
  3067. errno = 0;
  3068. if (!rspamd_strtol (arg->begin, arg->len, &value)) {
  3069. msg_info ("error converting numeric argument %T", arg);
  3070. }
  3071. }
  3072. arg = rspamd_http_message_find_header (msg, "Flag");
  3073. if (arg) {
  3074. errno = 0;
  3075. if (!rspamd_strtol (arg->begin, arg->len, &flag)) {
  3076. msg_info ("error converting numeric argument %T", arg);
  3077. flag = 0;
  3078. }
  3079. }
  3080. else {
  3081. flag = 0;
  3082. arg = rspamd_http_message_find_header (msg, "Symbol");
  3083. /* Search flag by symbol */
  3084. if (arg) {
  3085. struct fuzzy_rule *rule;
  3086. guint i;
  3087. GHashTableIter it;
  3088. gpointer k, v;
  3089. struct fuzzy_mapping *map;
  3090. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3091. if (flag != 0) {
  3092. break;
  3093. }
  3094. g_hash_table_iter_init (&it, rule->mappings);
  3095. while (g_hash_table_iter_next (&it, &k, &v)) {
  3096. map = v;
  3097. if (strlen (map->symbol) == arg->len &&
  3098. rspamd_lc_cmp (map->symbol, arg->begin, arg->len) == 0) {
  3099. flag = map->fuzzy_flag;
  3100. break;
  3101. }
  3102. }
  3103. }
  3104. }
  3105. }
  3106. if (flag == 0) {
  3107. msg_err ("no flag defined to learn fuzzy");
  3108. rspamd_controller_send_error (conn_ent, 404, "Unknown or missing flag");
  3109. return 0;
  3110. }
  3111. arg = rspamd_http_message_find_header (msg, "Skip-Images");
  3112. if (arg) {
  3113. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3114. }
  3115. arg = rspamd_http_message_find_header (msg, "Skip-Attachments");
  3116. if (arg) {
  3117. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3118. }
  3119. arg = rspamd_http_message_find_header (msg, "Skip-Text");
  3120. if (arg) {
  3121. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3122. }
  3123. fuzzy_process_handler (conn_ent, msg, cmd, value, flag,
  3124. (struct fuzzy_ctx *)ctx, is_hash, send_flags);
  3125. return 0;
  3126. }
  3127. static inline gint
  3128. fuzzy_check_send_lua_learn (struct fuzzy_rule *rule,
  3129. struct rspamd_task *task,
  3130. GPtrArray *commands,
  3131. gint *saved)
  3132. {
  3133. struct fuzzy_learn_session *s;
  3134. struct upstream *selected;
  3135. rspamd_inet_addr_t *addr;
  3136. gint sock;
  3137. gint ret = -1;
  3138. /* Get upstream */
  3139. if (!rspamd_session_blocked (task->s)) {
  3140. while ((selected = rspamd_upstream_get (rule->servers,
  3141. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  3142. /* Create UDP socket */
  3143. addr = rspamd_upstream_addr_next (selected);
  3144. if ((sock = rspamd_inet_address_connect (addr,
  3145. SOCK_DGRAM, TRUE)) == -1) {
  3146. rspamd_upstream_fail (selected, TRUE, strerror (errno));
  3147. } else {
  3148. s =
  3149. rspamd_mempool_alloc0 (task->task_pool,
  3150. sizeof (struct fuzzy_learn_session));
  3151. s->task = task;
  3152. s->commands = commands;
  3153. s->http_entry = NULL;
  3154. s->server = selected;
  3155. s->saved = saved;
  3156. s->fd = sock;
  3157. s->rule = rule;
  3158. s->session = task->s;
  3159. s->event_loop = task->event_loop;
  3160. rspamd_ev_watcher_init (&s->ev,
  3161. sock,
  3162. EV_WRITE,
  3163. fuzzy_controller_io_callback,
  3164. s);
  3165. rspamd_ev_watcher_start (s->event_loop, &s->ev,
  3166. ((double)rule->ctx->io_timeout) / 1000.0);
  3167. rspamd_session_add_event (task->s,
  3168. fuzzy_lua_fin,
  3169. s,
  3170. M);
  3171. (*saved)++;
  3172. ret = 1;
  3173. }
  3174. }
  3175. }
  3176. return ret;
  3177. }
  3178. static gboolean
  3179. fuzzy_check_lua_process_learn (struct rspamd_task *task,
  3180. gint cmd, gint value, gint flag, guint send_flags)
  3181. {
  3182. struct fuzzy_rule *rule;
  3183. gboolean processed = FALSE, res = TRUE;
  3184. guint i;
  3185. GPtrArray *commands;
  3186. gint *saved, rules = 0;
  3187. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  3188. saved = rspamd_mempool_alloc0 (task->task_pool, sizeof (gint));
  3189. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3190. if (!res) {
  3191. break;
  3192. }
  3193. if (rule->read_only) {
  3194. continue;
  3195. }
  3196. /* Check for flag */
  3197. if (g_hash_table_lookup (rule->mappings,
  3198. GINT_TO_POINTER (flag)) == NULL) {
  3199. msg_info_task ("skip rule %s as it has no flag %d defined"
  3200. " false", rule->name, flag);
  3201. continue;
  3202. }
  3203. rules ++;
  3204. res = 0;
  3205. commands = fuzzy_generate_commands (task, rule, cmd, flag,
  3206. value, send_flags);
  3207. if (commands != NULL) {
  3208. res = fuzzy_check_send_lua_learn (rule, task, commands,
  3209. saved);
  3210. rspamd_mempool_add_destructor (task->task_pool,
  3211. rspamd_ptr_array_free_hard, commands);
  3212. }
  3213. if (res) {
  3214. processed = TRUE;
  3215. }
  3216. }
  3217. if (res == -1) {
  3218. msg_warn_task ("cannot send fuzzy request: %s",
  3219. strerror (errno));
  3220. }
  3221. else if (!processed) {
  3222. if (rules) {
  3223. msg_warn_task ("no content to generate fuzzy");
  3224. return FALSE;
  3225. }
  3226. else {
  3227. msg_warn_task ("no fuzzy rules found for flag %d", flag);
  3228. return FALSE;
  3229. }
  3230. }
  3231. return TRUE;
  3232. }
  3233. static gint
  3234. fuzzy_lua_learn_handler (lua_State *L)
  3235. {
  3236. struct rspamd_task *task = lua_check_task (L, 1);
  3237. guint flag = 0, weight = 1, send_flags = 0;
  3238. const gchar *symbol;
  3239. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  3240. if (task) {
  3241. if (lua_type (L, 2) == LUA_TNUMBER) {
  3242. flag = lua_tonumber (L, 2);
  3243. }
  3244. else if (lua_type (L, 2) == LUA_TSTRING) {
  3245. struct fuzzy_rule *rule;
  3246. guint i;
  3247. GHashTableIter it;
  3248. gpointer k, v;
  3249. struct fuzzy_mapping *map;
  3250. symbol = lua_tostring (L, 2);
  3251. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3252. if (flag != 0) {
  3253. break;
  3254. }
  3255. g_hash_table_iter_init (&it, rule->mappings);
  3256. while (g_hash_table_iter_next (&it, &k, &v)) {
  3257. map = v;
  3258. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  3259. flag = map->fuzzy_flag;
  3260. break;
  3261. }
  3262. }
  3263. }
  3264. }
  3265. if (flag == 0) {
  3266. return luaL_error (L, "bad flag");
  3267. }
  3268. if (lua_type (L, 3) == LUA_TNUMBER) {
  3269. weight = lua_tonumber (L, 3);
  3270. }
  3271. if (lua_type (L, 4) == LUA_TTABLE) {
  3272. const gchar *sf;
  3273. for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
  3274. sf = lua_tostring (L, -1);
  3275. if (sf) {
  3276. if (g_ascii_strcasecmp (sf, "noimages") == 0) {
  3277. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3278. }
  3279. else if (g_ascii_strcasecmp (sf, "noattachments") == 0) {
  3280. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3281. }
  3282. else if (g_ascii_strcasecmp (sf, "notext") == 0) {
  3283. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3284. }
  3285. }
  3286. }
  3287. }
  3288. lua_pushboolean (L,
  3289. fuzzy_check_lua_process_learn (task, FUZZY_WRITE, weight, flag,
  3290. send_flags));
  3291. }
  3292. else {
  3293. return luaL_error (L, "invalid arguments");
  3294. }
  3295. return 1;
  3296. }
  3297. static gint
  3298. fuzzy_lua_unlearn_handler (lua_State *L)
  3299. {
  3300. struct rspamd_task *task = lua_check_task (L, 1);
  3301. guint flag = 0, weight = 1.0, send_flags = 0;
  3302. const gchar *symbol;
  3303. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  3304. if (task) {
  3305. if (lua_type (L, 2) == LUA_TNUMBER) {
  3306. flag = lua_tonumber (L, 1);
  3307. }
  3308. else if (lua_type (L, 2) == LUA_TSTRING) {
  3309. struct fuzzy_rule *rule;
  3310. guint i;
  3311. GHashTableIter it;
  3312. gpointer k, v;
  3313. struct fuzzy_mapping *map;
  3314. symbol = lua_tostring (L, 2);
  3315. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3316. if (flag != 0) {
  3317. break;
  3318. }
  3319. g_hash_table_iter_init (&it, rule->mappings);
  3320. while (g_hash_table_iter_next (&it, &k, &v)) {
  3321. map = v;
  3322. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  3323. flag = map->fuzzy_flag;
  3324. break;
  3325. }
  3326. }
  3327. }
  3328. }
  3329. if (flag == 0) {
  3330. return luaL_error (L, "bad flag");
  3331. }
  3332. if (lua_type (L, 3) == LUA_TNUMBER) {
  3333. weight = lua_tonumber (L, 3);
  3334. }
  3335. if (lua_type (L, 4) == LUA_TTABLE) {
  3336. const gchar *sf;
  3337. for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
  3338. sf = lua_tostring (L, -1);
  3339. if (sf) {
  3340. if (g_ascii_strcasecmp (sf, "noimages") == 0) {
  3341. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3342. }
  3343. else if (g_ascii_strcasecmp (sf, "noattachments") == 0) {
  3344. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3345. }
  3346. else if (g_ascii_strcasecmp (sf, "notext") == 0) {
  3347. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3348. }
  3349. }
  3350. }
  3351. }
  3352. lua_pushboolean (L,
  3353. fuzzy_check_lua_process_learn (task, FUZZY_DEL, weight, flag,
  3354. send_flags));
  3355. }
  3356. else {
  3357. return luaL_error (L, "invalid arguments");
  3358. }
  3359. return 1;
  3360. }
  3361. static gint
  3362. fuzzy_lua_gen_hashes_handler (lua_State *L)
  3363. {
  3364. struct rspamd_task *task = lua_check_task (L, 1);
  3365. guint flag = 0, weight = 1, send_flags = 0;
  3366. const gchar *symbol;
  3367. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  3368. struct fuzzy_rule *rule;
  3369. GPtrArray *commands;
  3370. gint cmd = FUZZY_WRITE;
  3371. gint i;
  3372. if (task) {
  3373. if (lua_type (L, 2) == LUA_TNUMBER) {
  3374. flag = lua_tonumber (L, 2);
  3375. }
  3376. else if (lua_type (L, 2) == LUA_TSTRING) {
  3377. struct fuzzy_rule *rule;
  3378. guint i;
  3379. GHashTableIter it;
  3380. gpointer k, v;
  3381. struct fuzzy_mapping *map;
  3382. symbol = lua_tostring (L, 2);
  3383. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3384. if (flag != 0) {
  3385. break;
  3386. }
  3387. g_hash_table_iter_init (&it, rule->mappings);
  3388. while (g_hash_table_iter_next (&it, &k, &v)) {
  3389. map = v;
  3390. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  3391. flag = map->fuzzy_flag;
  3392. break;
  3393. }
  3394. }
  3395. }
  3396. }
  3397. if (flag == 0) {
  3398. return luaL_error (L, "bad flag");
  3399. }
  3400. if (lua_type (L, 3) == LUA_TNUMBER) {
  3401. weight = lua_tonumber (L, 3);
  3402. }
  3403. /* Flags */
  3404. if (lua_type (L, 4) == LUA_TTABLE) {
  3405. const gchar *sf;
  3406. for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
  3407. sf = lua_tostring (L, -1);
  3408. if (sf) {
  3409. if (g_ascii_strcasecmp (sf, "noimages") == 0) {
  3410. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3411. }
  3412. else if (g_ascii_strcasecmp (sf, "noattachments") == 0) {
  3413. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3414. }
  3415. else if (g_ascii_strcasecmp (sf, "notext") == 0) {
  3416. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3417. }
  3418. }
  3419. }
  3420. }
  3421. /* Type */
  3422. if (lua_type (L, 5) == LUA_TSTRING) {
  3423. const gchar *cmd_name = lua_tostring (L, 5);
  3424. if (strcmp (cmd_name, "add") == 0 || strcmp (cmd_name, "write") == 0) {
  3425. cmd = FUZZY_WRITE;
  3426. }
  3427. else if (strcmp (cmd_name, "delete") == 0 || strcmp (cmd_name, "remove") == 0) {
  3428. cmd = FUZZY_DEL;
  3429. }
  3430. else {
  3431. return luaL_error (L, "invalid command: %s", cmd_name);
  3432. }
  3433. }
  3434. lua_createtable (L, 0, fuzzy_module_ctx->fuzzy_rules->len);
  3435. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3436. if (rule->read_only) {
  3437. continue;
  3438. }
  3439. /* Check for flag */
  3440. if (g_hash_table_lookup (rule->mappings,
  3441. GINT_TO_POINTER (flag)) == NULL) {
  3442. msg_info_task ("skip rule %s as it has no flag %d defined"
  3443. " false", rule->name, flag);
  3444. continue;
  3445. }
  3446. commands = fuzzy_generate_commands (task, rule, cmd, flag,
  3447. weight, send_flags);
  3448. if (commands != NULL) {
  3449. struct fuzzy_cmd_io *io;
  3450. gint j;
  3451. lua_pushstring (L, rule->name);
  3452. lua_createtable (L, commands->len, 0);
  3453. PTR_ARRAY_FOREACH (commands, j, io) {
  3454. lua_pushlstring (L, io->io.iov_base, io->io.iov_len);
  3455. lua_rawseti (L, -2, j + 1);
  3456. }
  3457. lua_settable (L, -3); /* ret[rule->name] = {raw_fuzzy1, ..., raw_fuzzyn} */
  3458. g_ptr_array_free (commands, TRUE);
  3459. }
  3460. }
  3461. }
  3462. else {
  3463. return luaL_error (L, "invalid arguments");
  3464. }
  3465. return 1;
  3466. }
  3467. static gboolean
  3468. fuzzy_add_handler (struct rspamd_http_connection_entry *conn_ent,
  3469. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3470. {
  3471. return fuzzy_controller_handler (conn_ent, msg,
  3472. ctx, FUZZY_WRITE, FALSE);
  3473. }
  3474. static gboolean
  3475. fuzzy_delete_handler (struct rspamd_http_connection_entry *conn_ent,
  3476. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3477. {
  3478. return fuzzy_controller_handler (conn_ent, msg,
  3479. ctx, FUZZY_DEL, FALSE);
  3480. }
  3481. static gboolean
  3482. fuzzy_deletehash_handler (struct rspamd_http_connection_entry *conn_ent,
  3483. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3484. {
  3485. return fuzzy_controller_handler (conn_ent, msg,
  3486. ctx, FUZZY_DEL, TRUE);
  3487. }
  3488. static int
  3489. fuzzy_attach_controller (struct module_ctx *ctx, GHashTable *commands)
  3490. {
  3491. struct fuzzy_ctx *fctx = (struct fuzzy_ctx *)ctx;
  3492. struct rspamd_custom_controller_command *cmd;
  3493. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  3494. cmd->privilleged = TRUE;
  3495. cmd->require_message = TRUE;
  3496. cmd->handler = fuzzy_add_handler;
  3497. cmd->ctx = ctx;
  3498. g_hash_table_insert (commands, "/fuzzyadd", cmd);
  3499. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  3500. cmd->privilleged = TRUE;
  3501. cmd->require_message = TRUE;
  3502. cmd->handler = fuzzy_delete_handler;
  3503. cmd->ctx = ctx;
  3504. g_hash_table_insert (commands, "/fuzzydel", cmd);
  3505. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  3506. cmd->privilleged = TRUE;
  3507. cmd->require_message = FALSE;
  3508. cmd->handler = fuzzy_deletehash_handler;
  3509. cmd->ctx = ctx;
  3510. g_hash_table_insert (commands, "/fuzzydelhash", cmd);
  3511. return 0;
  3512. }