You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_check.c 88KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:fuzzy
  17. * rspamd module that checks fuzzy checksums for messages
  18. *
  19. * Allowed options:
  20. * - symbol (string): symbol to insert (default: 'R_FUZZY')
  21. * - max_score (double): maximum score to that weights of hashes would be normalized (default: 0 - no normalization)
  22. *
  23. * - fuzzy_map (string): a string that contains map in format { fuzzy_key => [ symbol, weight ] } where fuzzy_key is number of
  24. * fuzzy list. This string itself should be in format 1:R_FUZZY_SAMPLE1:10,2:R_FUZZY_SAMPLE2:1 etc, where first number is fuzzy
  25. * key, second is symbol to insert and third - weight for normalization
  26. *
  27. * - min_length (integer): minimum length (in characters) for text part to be checked for fuzzy hash (default: 0 - no limit)
  28. * - whitelist (map string): map of ip addresses that should not be checked with this module
  29. * - servers (string): list of fuzzy servers in format "server1:port,server2:port" - these servers would be used for checking and storing
  30. * fuzzy hashes
  31. */
  32. #include "config.h"
  33. #include "libmime/message.h"
  34. #include "libserver/maps/map.h"
  35. #include "libserver/maps/map_helpers.h"
  36. #include "libmime/images.h"
  37. #include "libserver/worker_util.h"
  38. #include "libserver/mempool_vars_internal.h"
  39. #include "fuzzy_wire.h"
  40. #include "utlist.h"
  41. #include "ottery.h"
  42. #include "lua/lua_common.h"
  43. #include "unix-std.h"
  44. #include "libserver/http/http_private.h"
  45. #include "libserver/http/http_router.h"
  46. #include "libstat/stat_api.h"
  47. #include <math.h>
  48. #include "libutil/libev_helper.h"
  49. #define DEFAULT_SYMBOL "R_FUZZY_HASH"
  50. #define DEFAULT_IO_TIMEOUT 500
  51. #define DEFAULT_RETRANSMITS 3
  52. #define DEFAULT_MAX_ERRORS 4
  53. #define DEFAULT_REVIVE_TIME 60
  54. #define DEFAULT_PORT 11335
  55. #define RSPAMD_FUZZY_PLUGIN_VERSION RSPAMD_FUZZY_VERSION
  56. static const gint rspamd_fuzzy_hash_len = 5;
  57. static const gchar *M = "fuzzy check";
  58. struct fuzzy_ctx;
  59. struct fuzzy_mapping {
  60. guint64 fuzzy_flag;
  61. const gchar *symbol;
  62. double weight;
  63. };
  64. struct fuzzy_rule {
  65. struct upstream_list *servers;
  66. const gchar *symbol;
  67. const gchar *algorithm_str;
  68. const gchar *name;
  69. const ucl_object_t *ucl_obj;
  70. enum rspamd_shingle_alg alg;
  71. GHashTable *mappings;
  72. GPtrArray *fuzzy_headers;
  73. GString *hash_key;
  74. GString *shingles_key;
  75. struct rspamd_cryptobox_keypair *local_key;
  76. struct rspamd_cryptobox_pubkey *peer_key;
  77. double max_score;
  78. gboolean read_only;
  79. gboolean skip_unknown;
  80. gint learn_condition_cb;
  81. struct rspamd_hash_map_helper *skip_map;
  82. struct fuzzy_ctx *ctx;
  83. gint lua_id;
  84. };
  85. struct fuzzy_ctx {
  86. struct module_ctx ctx;
  87. rspamd_mempool_t *fuzzy_pool;
  88. GPtrArray *fuzzy_rules;
  89. struct rspamd_config *cfg;
  90. const gchar *default_symbol;
  91. struct rspamd_radix_map_helper *whitelist;
  92. struct rspamd_keypair_cache *keypairs_cache;
  93. guint32 io_timeout;
  94. guint32 retransmits;
  95. guint max_errors;
  96. gdouble revive_time;
  97. gint check_mime_part_ref; /* Lua callback */
  98. gint process_rule_ref; /* Lua callback */
  99. gint cleanup_rules_ref;
  100. gboolean enabled;
  101. };
  102. enum fuzzy_result_type {
  103. FUZZY_RESULT_TXT,
  104. FUZZY_RESULT_IMG,
  105. FUZZY_RESULT_CONTENT,
  106. FUZZY_RESULT_BIN
  107. };
  108. struct fuzzy_client_result {
  109. const gchar *symbol;
  110. gchar *option;
  111. gdouble score;
  112. gdouble prob;
  113. enum fuzzy_result_type type;
  114. };
  115. struct fuzzy_client_session {
  116. GPtrArray *commands;
  117. GPtrArray *results;
  118. struct rspamd_task *task;
  119. struct rspamd_symcache_item *item;
  120. struct upstream *server;
  121. struct fuzzy_rule *rule;
  122. struct ev_loop *event_loop;
  123. struct rspamd_io_ev ev;
  124. gint state;
  125. gint fd;
  126. guint retransmits;
  127. };
  128. struct fuzzy_learn_session {
  129. GPtrArray *commands;
  130. gint *saved;
  131. GError **err;
  132. struct rspamd_http_connection_entry *http_entry;
  133. struct rspamd_async_session *session;
  134. struct upstream *server;
  135. struct fuzzy_rule *rule;
  136. struct rspamd_task *task;
  137. struct ev_loop *event_loop;
  138. struct rspamd_io_ev ev;
  139. gint fd;
  140. guint retransmits;
  141. };
  142. #define FUZZY_CMD_FLAG_REPLIED (1 << 0)
  143. #define FUZZY_CMD_FLAG_SENT (1 << 1)
  144. #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
  145. #define FUZZY_CMD_FLAG_CONTENT (1 << 3)
  146. #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
  147. #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
  148. #define FUZZY_CHECK_FLAG_NOTEXT (1 << 2)
  149. struct fuzzy_cmd_io {
  150. guint32 tag;
  151. guint32 flags;
  152. struct iovec io;
  153. struct rspamd_mime_part *part;
  154. struct rspamd_fuzzy_cmd cmd;
  155. };
  156. static const char *default_headers = "Subject,Content-Type,Reply-To,X-Mailer";
  157. static void fuzzy_symbol_callback (struct rspamd_task *task,
  158. struct rspamd_symcache_item *item,
  159. void *unused);
  160. /* Initialization */
  161. gint fuzzy_check_module_init (struct rspamd_config *cfg,
  162. struct module_ctx **ctx);
  163. gint fuzzy_check_module_config (struct rspamd_config *cfg);
  164. gint fuzzy_check_module_reconfig (struct rspamd_config *cfg);
  165. static gint fuzzy_attach_controller (struct module_ctx *ctx,
  166. GHashTable *commands);
  167. static gint fuzzy_lua_learn_handler (lua_State *L);
  168. static gint fuzzy_lua_unlearn_handler (lua_State *L);
  169. module_t fuzzy_check_module = {
  170. "fuzzy_check",
  171. fuzzy_check_module_init,
  172. fuzzy_check_module_config,
  173. fuzzy_check_module_reconfig,
  174. fuzzy_attach_controller,
  175. RSPAMD_MODULE_VER,
  176. (guint)-1,
  177. };
  178. static inline struct fuzzy_ctx *
  179. fuzzy_get_context (struct rspamd_config *cfg)
  180. {
  181. return (struct fuzzy_ctx *)g_ptr_array_index (cfg->c_modules,
  182. fuzzy_check_module.ctx_offset);
  183. }
  184. static void
  185. parse_flags (struct fuzzy_rule *rule,
  186. struct rspamd_config *cfg,
  187. const ucl_object_t *val,
  188. gint cb_id)
  189. {
  190. const ucl_object_t *elt;
  191. struct fuzzy_mapping *map;
  192. const gchar *sym = NULL;
  193. if (val->type == UCL_STRING) {
  194. msg_err_config (
  195. "string mappings are deprecated and no longer supported, use new style configuration");
  196. }
  197. else if (val->type == UCL_OBJECT) {
  198. elt = ucl_object_lookup (val, "symbol");
  199. if (elt == NULL || !ucl_object_tostring_safe (elt, &sym)) {
  200. sym = ucl_object_key (val);
  201. }
  202. if (sym != NULL) {
  203. map =
  204. rspamd_mempool_alloc (cfg->cfg_pool,
  205. sizeof (struct fuzzy_mapping));
  206. map->symbol = sym;
  207. elt = ucl_object_lookup (val, "flag");
  208. if (elt != NULL) {
  209. map->fuzzy_flag = ucl_obj_toint (elt);
  210. elt = ucl_object_lookup (val, "max_score");
  211. if (elt != NULL) {
  212. map->weight = ucl_obj_todouble (elt);
  213. }
  214. else {
  215. map->weight = rule->max_score;
  216. }
  217. /* Add flag to hash table */
  218. g_hash_table_insert (rule->mappings,
  219. GINT_TO_POINTER (map->fuzzy_flag), map);
  220. rspamd_symcache_add_symbol (cfg->cache,
  221. map->symbol, 0,
  222. NULL, NULL,
  223. SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE,
  224. cb_id);
  225. }
  226. else {
  227. msg_err_config ("fuzzy_map parameter has no flag definition");
  228. }
  229. }
  230. else {
  231. msg_err_config ("fuzzy_map parameter has no symbol definition");
  232. }
  233. }
  234. else {
  235. msg_err_config ("fuzzy_map parameter is of an unsupported type");
  236. }
  237. }
  238. static GPtrArray *
  239. parse_fuzzy_headers (struct rspamd_config *cfg, const gchar *str)
  240. {
  241. gchar **strvec;
  242. gint num, i;
  243. GPtrArray *res;
  244. strvec = g_strsplit_set (str, ",", 0);
  245. num = g_strv_length (strvec);
  246. res = g_ptr_array_sized_new (num);
  247. for (i = 0; i < num; i++) {
  248. g_strstrip (strvec[i]);
  249. g_ptr_array_add (res, rspamd_mempool_strdup (
  250. cfg->cfg_pool, strvec[i]));
  251. }
  252. g_strfreev (strvec);
  253. return res;
  254. }
  255. static double
  256. fuzzy_normalize (gint32 in, double weight)
  257. {
  258. if (weight == 0) {
  259. return 0;
  260. }
  261. #ifdef HAVE_TANH
  262. return tanh (G_E * (double)in / weight);
  263. #else
  264. return (in < weight ? in / weight : weight);
  265. #endif
  266. }
  267. static struct fuzzy_rule *
  268. fuzzy_rule_new (const char *default_symbol, rspamd_mempool_t *pool)
  269. {
  270. struct fuzzy_rule *rule;
  271. rule = rspamd_mempool_alloc0 (pool, sizeof (struct fuzzy_rule));
  272. rule->mappings = g_hash_table_new (g_direct_hash, g_direct_equal);
  273. rule->symbol = default_symbol;
  274. rspamd_mempool_add_destructor (pool,
  275. (rspamd_mempool_destruct_t)g_hash_table_unref,
  276. rule->mappings);
  277. rule->read_only = FALSE;
  278. return rule;
  279. }
  280. static void
  281. fuzzy_free_rule (gpointer r)
  282. {
  283. struct fuzzy_rule *rule = (struct fuzzy_rule *)r;
  284. g_string_free (rule->hash_key, TRUE);
  285. g_string_free (rule->shingles_key, TRUE);
  286. if (rule->local_key) {
  287. rspamd_keypair_unref (rule->local_key);
  288. }
  289. if (rule->peer_key) {
  290. rspamd_pubkey_unref (rule->peer_key);
  291. }
  292. }
  293. static gint
  294. fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj,
  295. const gchar *name, gint cb_id)
  296. {
  297. const ucl_object_t *value, *cur;
  298. struct fuzzy_rule *rule;
  299. ucl_object_iter_t it = NULL;
  300. const char *k = NULL, *key_str = NULL, *shingles_key_str = NULL, *lua_script;
  301. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg);
  302. if (obj->type != UCL_OBJECT) {
  303. msg_err_config ("invalid rule definition");
  304. return -1;
  305. }
  306. rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol,
  307. cfg->cfg_pool);
  308. rule->ucl_obj = obj;
  309. rule->ctx = fuzzy_module_ctx;
  310. rule->learn_condition_cb = -1;
  311. rule->alg = RSPAMD_SHINGLES_OLD;
  312. rule->skip_map = NULL;
  313. if ((value = ucl_object_lookup (obj, "skip_hashes")) != NULL) {
  314. rspamd_map_add_from_ucl (cfg, value,
  315. "Fuzzy hashes whitelist",
  316. rspamd_kv_list_read,
  317. rspamd_kv_list_fin,
  318. rspamd_kv_list_dtor,
  319. (void **)&rule->skip_map,
  320. NULL);
  321. }
  322. if ((value = ucl_object_lookup (obj, "headers")) != NULL) {
  323. it = NULL;
  324. while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY))
  325. != NULL) {
  326. GPtrArray *tmp;
  327. guint i;
  328. gpointer ptr;
  329. tmp = parse_fuzzy_headers (cfg, ucl_obj_tostring (cur));
  330. if (tmp) {
  331. if (rule->fuzzy_headers) {
  332. PTR_ARRAY_FOREACH (tmp, i, ptr) {
  333. g_ptr_array_add (rule->fuzzy_headers, ptr);
  334. }
  335. g_ptr_array_free (tmp, TRUE);
  336. }
  337. else {
  338. rule->fuzzy_headers = tmp;
  339. }
  340. }
  341. }
  342. }
  343. else {
  344. rule->fuzzy_headers = parse_fuzzy_headers (cfg, default_headers);
  345. }
  346. if (rule->fuzzy_headers != NULL) {
  347. rspamd_mempool_add_destructor (cfg->cfg_pool,
  348. (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
  349. rule->fuzzy_headers);
  350. }
  351. if ((value = ucl_object_lookup (obj, "max_score")) != NULL) {
  352. rule->max_score = ucl_obj_todouble (value);
  353. }
  354. if ((value = ucl_object_lookup (obj, "symbol")) != NULL) {
  355. rule->symbol = ucl_obj_tostring (value);
  356. }
  357. if (name) {
  358. rule->name = name;
  359. }
  360. else {
  361. rule->name = rule->symbol;
  362. }
  363. if ((value = ucl_object_lookup (obj, "read_only")) != NULL) {
  364. rule->read_only = ucl_obj_toboolean (value);
  365. }
  366. if ((value = ucl_object_lookup (obj, "skip_unknown")) != NULL) {
  367. rule->skip_unknown = ucl_obj_toboolean (value);
  368. }
  369. if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) {
  370. rule->algorithm_str = ucl_object_tostring (value);
  371. if (rule->algorithm_str) {
  372. if (g_ascii_strcasecmp (rule->algorithm_str, "old") == 0 ||
  373. g_ascii_strcasecmp (rule->algorithm_str, "siphash") == 0) {
  374. rule->alg = RSPAMD_SHINGLES_OLD;
  375. }
  376. else if (g_ascii_strcasecmp (rule->algorithm_str, "xxhash") == 0) {
  377. rule->alg = RSPAMD_SHINGLES_XXHASH;
  378. }
  379. else if (g_ascii_strcasecmp (rule->algorithm_str, "mumhash") == 0) {
  380. rule->alg = RSPAMD_SHINGLES_MUMHASH;
  381. }
  382. else if (g_ascii_strcasecmp (rule->algorithm_str, "fasthash") == 0 ||
  383. g_ascii_strcasecmp (rule->algorithm_str, "fast") == 0) {
  384. rule->alg = RSPAMD_SHINGLES_FAST;
  385. }
  386. else {
  387. msg_warn_config ("unknown algorithm: %s, use siphash by default",
  388. rule->algorithm_str);
  389. }
  390. }
  391. }
  392. /* Set a consistent and short string name */
  393. switch (rule->alg) {
  394. case RSPAMD_SHINGLES_OLD:
  395. rule->algorithm_str = "sip";
  396. break;
  397. case RSPAMD_SHINGLES_XXHASH:
  398. rule->algorithm_str = "xx";
  399. break;
  400. case RSPAMD_SHINGLES_MUMHASH:
  401. rule->algorithm_str = "mum";
  402. break;
  403. case RSPAMD_SHINGLES_FAST:
  404. rule->algorithm_str = "fast";
  405. break;
  406. }
  407. if ((value = ucl_object_lookup (obj, "servers")) != NULL) {
  408. rule->servers = rspamd_upstreams_create (cfg->ups_ctx);
  409. /* pass max_error and revive_time configuration in upstream for fuzzy storage
  410. * it allows to configure error_rate threshold and upstream dead timer
  411. */
  412. rspamd_upstreams_set_limits (rule->servers,
  413. (gdouble) fuzzy_module_ctx->revive_time, NAN, NAN, NAN,
  414. (guint) fuzzy_module_ctx->max_errors, 0);
  415. rspamd_mempool_add_destructor (cfg->cfg_pool,
  416. (rspamd_mempool_destruct_t)rspamd_upstreams_destroy,
  417. rule->servers);
  418. if (!rspamd_upstreams_from_ucl (rule->servers, value, DEFAULT_PORT, NULL)) {
  419. msg_err_config ("cannot read servers definition");
  420. return -1;
  421. }
  422. }
  423. if ((value = ucl_object_lookup (obj, "fuzzy_map")) != NULL) {
  424. it = NULL;
  425. while ((cur = ucl_object_iterate (value, &it, true)) != NULL) {
  426. parse_flags (rule, cfg, cur, cb_id);
  427. }
  428. }
  429. if ((value = ucl_object_lookup (obj, "encryption_key")) != NULL) {
  430. /* Create key from user's input */
  431. k = ucl_object_tostring (value);
  432. if (k == NULL || (rule->peer_key =
  433. rspamd_pubkey_from_base32 (k, 0, RSPAMD_KEYPAIR_KEX,
  434. RSPAMD_CRYPTOBOX_MODE_25519)) == NULL) {
  435. msg_err_config ("bad encryption key value: %s",
  436. k);
  437. return -1;
  438. }
  439. rule->local_key = rspamd_keypair_new (RSPAMD_KEYPAIR_KEX,
  440. RSPAMD_CRYPTOBOX_MODE_25519);
  441. }
  442. if ((value = ucl_object_lookup (obj, "learn_condition")) != NULL) {
  443. lua_script = ucl_object_tostring (value);
  444. if (lua_script) {
  445. if (luaL_dostring (cfg->lua_state, lua_script) != 0) {
  446. msg_err_config ("cannot execute lua script for fuzzy "
  447. "learn condition: %s", lua_tostring (cfg->lua_state, -1));
  448. }
  449. else {
  450. if (lua_type (cfg->lua_state, -1) == LUA_TFUNCTION) {
  451. rule->learn_condition_cb = luaL_ref (cfg->lua_state,
  452. LUA_REGISTRYINDEX);
  453. msg_info_config ("loaded learn condition script for fuzzy rule:"
  454. " %s", rule->name);
  455. }
  456. else {
  457. msg_err_config ("lua script must return "
  458. "function(task) and not %s",
  459. lua_typename (cfg->lua_state,
  460. lua_type (cfg->lua_state, -1)));
  461. }
  462. }
  463. }
  464. }
  465. key_str = NULL;
  466. if ((value = ucl_object_lookup (obj, "fuzzy_key")) != NULL) {
  467. /* Create key from user's input */
  468. key_str = ucl_object_tostring (value);
  469. }
  470. /* Setup keys */
  471. if (key_str == NULL) {
  472. /* Use some default key for all ops */
  473. key_str = "rspamd";
  474. }
  475. rule->hash_key = g_string_sized_new (rspamd_cryptobox_HASHBYTES);
  476. rspamd_cryptobox_hash (rule->hash_key->str, key_str, strlen (key_str), NULL, 0);
  477. rule->hash_key->len = rspamd_cryptobox_HASHKEYBYTES;
  478. shingles_key_str = NULL;
  479. if ((value = ucl_object_lookup (obj, "fuzzy_shingles_key")) != NULL) {
  480. shingles_key_str = ucl_object_tostring (value);
  481. }
  482. if (shingles_key_str == NULL) {
  483. shingles_key_str = "rspamd";
  484. }
  485. rule->shingles_key = g_string_sized_new (rspamd_cryptobox_HASHBYTES);
  486. rspamd_cryptobox_hash (rule->shingles_key->str, shingles_key_str,
  487. strlen (shingles_key_str), NULL, 0);
  488. rule->shingles_key->len = 16;
  489. if (rspamd_upstreams_count (rule->servers) == 0) {
  490. msg_err_config ("no servers defined for fuzzy rule with name: %s",
  491. rule->name);
  492. return -1;
  493. }
  494. else {
  495. g_ptr_array_add (fuzzy_module_ctx->fuzzy_rules, rule);
  496. if (rule->symbol != fuzzy_module_ctx->default_symbol) {
  497. rspamd_symcache_add_symbol (cfg->cache, rule->symbol,
  498. 0,
  499. NULL, NULL,
  500. SYMBOL_TYPE_VIRTUAL | SYMBOL_TYPE_FINE,
  501. cb_id);
  502. }
  503. msg_info_config ("added fuzzy rule %s, key: %*xs, "
  504. "shingles_key: %*xs, algorithm: %s",
  505. rule->symbol,
  506. 6, rule->hash_key->str,
  507. 6, rule->shingles_key->str,
  508. rule->algorithm_str);
  509. }
  510. /*
  511. * Process rule in Lua
  512. */
  513. gint err_idx, ret;
  514. lua_State *L = (lua_State *)cfg->lua_state;
  515. lua_pushcfunction (L, &rspamd_lua_traceback);
  516. err_idx = lua_gettop (L);
  517. lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->process_rule_ref);
  518. ucl_object_push_lua (L, obj, true);
  519. if ((ret = lua_pcall (L, 1, 1, err_idx)) != 0) {
  520. msg_err_config ("call to process_rule lua "
  521. "script failed (%d): %s", ret, lua_tostring (L, -1));
  522. rule->lua_id = -1;
  523. }
  524. else {
  525. rule->lua_id = lua_tonumber (L, -1);
  526. }
  527. lua_settop (L, 0);
  528. rspamd_mempool_add_destructor (cfg->cfg_pool, fuzzy_free_rule,
  529. rule);
  530. return 0;
  531. }
  532. gint
  533. fuzzy_check_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  534. {
  535. struct fuzzy_ctx *fuzzy_module_ctx;
  536. fuzzy_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  537. sizeof (struct fuzzy_ctx));
  538. fuzzy_module_ctx->fuzzy_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  539. NULL, 0);
  540. /* TODO: this should match rules count actually */
  541. fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new (32);
  542. fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new ();
  543. fuzzy_module_ctx->cfg = cfg;
  544. fuzzy_module_ctx->process_rule_ref = -1;
  545. fuzzy_module_ctx->check_mime_part_ref = -1;
  546. fuzzy_module_ctx->cleanup_rules_ref = -1;
  547. rspamd_mempool_add_destructor (cfg->cfg_pool,
  548. (rspamd_mempool_destruct_t)rspamd_mempool_delete,
  549. fuzzy_module_ctx->fuzzy_pool);
  550. rspamd_mempool_add_destructor (cfg->cfg_pool,
  551. (rspamd_mempool_destruct_t)rspamd_keypair_cache_destroy,
  552. fuzzy_module_ctx->keypairs_cache);
  553. rspamd_mempool_add_destructor (cfg->cfg_pool,
  554. (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard,
  555. fuzzy_module_ctx->fuzzy_rules);
  556. *ctx = (struct module_ctx *)fuzzy_module_ctx;
  557. rspamd_rcl_add_doc_by_path (cfg,
  558. NULL,
  559. "Fuzzy check plugin",
  560. "fuzzy_check",
  561. UCL_OBJECT,
  562. NULL,
  563. 0,
  564. NULL,
  565. 0);
  566. rspamd_rcl_add_doc_by_path (cfg,
  567. "fuzzy_check",
  568. "Default symbol",
  569. "symbol",
  570. UCL_STRING,
  571. NULL,
  572. 0,
  573. NULL,
  574. 0);
  575. rspamd_rcl_add_doc_by_path (cfg,
  576. "fuzzy_check",
  577. "Minimum number of *words* to check a text part",
  578. "min_length",
  579. UCL_INT,
  580. NULL,
  581. 0,
  582. NULL,
  583. 0);
  584. rspamd_rcl_add_doc_by_path (cfg,
  585. "fuzzy_check",
  586. "Minimum number of *bytes* to check a non-text part",
  587. "min_bytes",
  588. UCL_INT,
  589. NULL,
  590. 0,
  591. NULL,
  592. 0);
  593. rspamd_rcl_add_doc_by_path (cfg,
  594. "fuzzy_check",
  595. "Multiplier for bytes limit when checking for text parts",
  596. "text_multiplier",
  597. UCL_FLOAT,
  598. NULL,
  599. 0,
  600. NULL,
  601. 0);
  602. rspamd_rcl_add_doc_by_path (cfg,
  603. "fuzzy_check",
  604. "Minimum height in pixels for embedded images to check using fuzzy storage",
  605. "min_height",
  606. UCL_INT,
  607. NULL,
  608. 0,
  609. NULL,
  610. 0);
  611. rspamd_rcl_add_doc_by_path (cfg,
  612. "fuzzy_check",
  613. "Minimum width in pixels for embedded images to check using fuzzy storage",
  614. "min_width",
  615. UCL_INT,
  616. NULL,
  617. 0,
  618. NULL,
  619. 0);
  620. rspamd_rcl_add_doc_by_path (cfg,
  621. "fuzzy_check",
  622. "Timeout for waiting reply from a fuzzy server",
  623. "timeout",
  624. UCL_TIME,
  625. NULL,
  626. 0,
  627. NULL,
  628. 0);
  629. rspamd_rcl_add_doc_by_path (cfg,
  630. "fuzzy_check",
  631. "Maximum number of retransmits for a single request",
  632. "retransmits",
  633. UCL_INT,
  634. NULL,
  635. 0,
  636. NULL,
  637. 0);
  638. rspamd_rcl_add_doc_by_path (cfg,
  639. "fuzzy_check",
  640. "Maximum number of upstream errors, affects error rate threshold",
  641. "max_errors",
  642. UCL_INT,
  643. NULL,
  644. 0,
  645. NULL,
  646. 0);
  647. rspamd_rcl_add_doc_by_path (cfg,
  648. "fuzzy_check",
  649. "Time to lapse before re-resolve faulty upstream",
  650. "revive_time",
  651. UCL_FLOAT,
  652. NULL,
  653. 0,
  654. NULL,
  655. 0);
  656. rspamd_rcl_add_doc_by_path (cfg,
  657. "fuzzy_check",
  658. "Whitelisted IPs map",
  659. "whitelist",
  660. UCL_STRING,
  661. NULL,
  662. 0,
  663. NULL,
  664. 0);
  665. /* Rules doc strings */
  666. rspamd_rcl_add_doc_by_path (cfg,
  667. "fuzzy_check",
  668. "Fuzzy check rule",
  669. "rule",
  670. UCL_OBJECT,
  671. NULL,
  672. 0,
  673. NULL,
  674. 0);
  675. rspamd_rcl_add_doc_by_path (cfg,
  676. "fuzzy_check.rule",
  677. "Headers that are used to make a separate hash",
  678. "headers",
  679. UCL_ARRAY,
  680. NULL,
  681. 0,
  682. NULL,
  683. 0);
  684. rspamd_rcl_add_doc_by_path (cfg,
  685. "fuzzy_check.rule",
  686. "Whitelisted hashes map",
  687. "skip_hashes",
  688. UCL_STRING,
  689. NULL,
  690. 0,
  691. NULL,
  692. 0);
  693. rspamd_rcl_add_doc_by_path (cfg,
  694. "fuzzy_check.rule",
  695. "Set of mime types (in form type/subtype, or type/*, or *) to check with fuzzy",
  696. "mime_types",
  697. UCL_ARRAY,
  698. NULL,
  699. 0,
  700. NULL,
  701. 0);
  702. rspamd_rcl_add_doc_by_path (cfg,
  703. "fuzzy_check.rule",
  704. "Maximum value for fuzzy hash when weight of symbol is exactly 1.0 (if value is higher then score is still 1.0)",
  705. "max_score",
  706. UCL_INT,
  707. NULL,
  708. 0,
  709. NULL,
  710. 0);
  711. rspamd_rcl_add_doc_by_path (cfg,
  712. "fuzzy_check.rule",
  713. "List of servers to check (or learn)",
  714. "servers",
  715. UCL_STRING,
  716. NULL,
  717. 0,
  718. NULL,
  719. 0);
  720. rspamd_rcl_add_doc_by_path (cfg,
  721. "fuzzy_check.rule",
  722. "If true then never try to learn this fuzzy storage",
  723. "read_only",
  724. UCL_BOOLEAN,
  725. NULL,
  726. 0,
  727. NULL,
  728. 0);
  729. rspamd_rcl_add_doc_by_path (cfg,
  730. "fuzzy_check.rule",
  731. "If true then ignore unknown flags and not add the default fuzzy symbol",
  732. "skip_unknown",
  733. UCL_BOOLEAN,
  734. NULL,
  735. 0,
  736. NULL,
  737. 0);
  738. rspamd_rcl_add_doc_by_path (cfg,
  739. "fuzzy_check.rule",
  740. "Default symbol for rule (if no flags defined or matched)",
  741. "symbol",
  742. UCL_STRING,
  743. NULL,
  744. 0,
  745. NULL,
  746. 0);
  747. rspamd_rcl_add_doc_by_path (cfg,
  748. "fuzzy_check.rule",
  749. "Base32 value for the protocol encryption public key",
  750. "encryption_key",
  751. UCL_STRING,
  752. NULL,
  753. 0,
  754. NULL,
  755. 0);
  756. rspamd_rcl_add_doc_by_path (cfg,
  757. "fuzzy_check.rule",
  758. "Base32 value for the hashing key (for private storages)",
  759. "fuzzy_key",
  760. UCL_STRING,
  761. NULL,
  762. 0,
  763. NULL,
  764. 0);
  765. rspamd_rcl_add_doc_by_path (cfg,
  766. "fuzzy_check.rule",
  767. "Base32 value for the shingles hashing key (for private storages)",
  768. "fuzzy_shingles_key",
  769. UCL_STRING,
  770. NULL,
  771. 0,
  772. NULL,
  773. 0);
  774. rspamd_rcl_add_doc_by_path (cfg,
  775. "fuzzy_check.rule",
  776. "Lua script that returns boolean function to check if this task "
  777. "should be considered when learning fuzzy storage",
  778. "learn_condition",
  779. UCL_STRING,
  780. NULL,
  781. 0,
  782. NULL,
  783. 0);
  784. rspamd_rcl_add_doc_by_path (cfg,
  785. "fuzzy_check.rule",
  786. "Map of SYMBOL -> data for flags configuration",
  787. "fuzzy_map",
  788. UCL_OBJECT,
  789. NULL,
  790. 0,
  791. NULL,
  792. 0);
  793. rspamd_rcl_add_doc_by_path (cfg,
  794. "fuzzy_check.rule",
  795. "Use direct hash for short texts",
  796. "short_text_direct_hash",
  797. UCL_BOOLEAN,
  798. NULL,
  799. 0,
  800. NULL,
  801. 0);
  802. rspamd_rcl_add_doc_by_path (cfg,
  803. "fuzzy_check.rule",
  804. "Override module default min bytes for this rule",
  805. "min_bytes",
  806. UCL_INT,
  807. NULL,
  808. 0,
  809. NULL,
  810. 0);
  811. /* Fuzzy map doc strings */
  812. rspamd_rcl_add_doc_by_path (cfg,
  813. "fuzzy_check.rule.fuzzy_map",
  814. "Maximum score for this flag",
  815. "max_score",
  816. UCL_INT,
  817. NULL,
  818. 0,
  819. NULL,
  820. 0);
  821. rspamd_rcl_add_doc_by_path (cfg,
  822. "fuzzy_check.rule.fuzzy_map",
  823. "Flag number",
  824. "flag",
  825. UCL_INT,
  826. NULL,
  827. 0,
  828. NULL,
  829. 0);
  830. return 0;
  831. }
  832. gint
  833. fuzzy_check_module_config (struct rspamd_config *cfg)
  834. {
  835. const ucl_object_t *value, *cur, *elt;
  836. ucl_object_iter_t it;
  837. gint res = TRUE, cb_id, nrules = 0;
  838. lua_State *L = cfg->lua_state;
  839. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg);
  840. if (!rspamd_config_is_module_enabled (cfg, "fuzzy_check")) {
  841. return TRUE;
  842. }
  843. fuzzy_module_ctx->enabled = TRUE;
  844. fuzzy_module_ctx->check_mime_part_ref = -1;
  845. fuzzy_module_ctx->process_rule_ref = -1;
  846. fuzzy_module_ctx->cleanup_rules_ref = -1;
  847. /* Interact with lua_fuzzy */
  848. if (luaL_dostring (L, "return require \"lua_fuzzy\"") != 0) {
  849. msg_err_config ("cannot require lua_fuzzy: %s",
  850. lua_tostring (L, -1));
  851. fuzzy_module_ctx->enabled = FALSE;
  852. }
  853. else {
  854. if (lua_type (L, -1) != LUA_TTABLE) {
  855. msg_err_config ("lua fuzzy must return "
  856. "table and not %s",
  857. lua_typename (L, lua_type (L, -1)));
  858. fuzzy_module_ctx->enabled = FALSE;
  859. } else {
  860. lua_pushstring (L, "process_rule");
  861. lua_gettable (L, -2);
  862. if (lua_type (L, -1) != LUA_TFUNCTION) {
  863. msg_err_config ("process_rule must return "
  864. "function and not %s",
  865. lua_typename (L, lua_type (L, -1)));
  866. fuzzy_module_ctx->enabled = FALSE;
  867. }
  868. else {
  869. fuzzy_module_ctx->process_rule_ref = luaL_ref (L, LUA_REGISTRYINDEX);
  870. }
  871. lua_pushstring (L, "check_mime_part");
  872. lua_gettable (L, -2);
  873. if (lua_type (L, -1) != LUA_TFUNCTION) {
  874. msg_err_config ("check_mime_part must return "
  875. "function and not %s",
  876. lua_typename (L, lua_type (L, -1)));
  877. fuzzy_module_ctx->enabled = FALSE;
  878. }
  879. else {
  880. fuzzy_module_ctx->check_mime_part_ref = luaL_ref (L, LUA_REGISTRYINDEX);
  881. }
  882. lua_pushstring (L, "cleanup_rules");
  883. lua_gettable (L, -2);
  884. if (lua_type (L, -1) != LUA_TFUNCTION) {
  885. msg_err_config ("cleanup_rules must return "
  886. "function and not %s",
  887. lua_typename (L, lua_type (L, -1)));
  888. fuzzy_module_ctx->enabled = FALSE;
  889. }
  890. else {
  891. fuzzy_module_ctx->cleanup_rules_ref = luaL_ref (L, LUA_REGISTRYINDEX);
  892. }
  893. }
  894. }
  895. lua_settop (L, 0);
  896. if (!fuzzy_module_ctx->enabled) {
  897. return TRUE;
  898. }
  899. if ((value =
  900. rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) {
  901. fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value);
  902. }
  903. else {
  904. fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL;
  905. }
  906. if ((value =
  907. rspamd_config_get_module_opt (cfg, "fuzzy_check", "timeout")) != NULL) {
  908. fuzzy_module_ctx->io_timeout = ucl_obj_todouble (value) * 1000;
  909. }
  910. else {
  911. fuzzy_module_ctx->io_timeout = DEFAULT_IO_TIMEOUT;
  912. }
  913. if ((value =
  914. rspamd_config_get_module_opt (cfg,
  915. "fuzzy_check",
  916. "retransmits")) != NULL) {
  917. fuzzy_module_ctx->retransmits = ucl_obj_toint (value);
  918. }
  919. else {
  920. fuzzy_module_ctx->retransmits = DEFAULT_RETRANSMITS;
  921. }
  922. if ((value =
  923. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  924. "max_errors")) != NULL) {
  925. fuzzy_module_ctx->max_errors = ucl_obj_toint (value);
  926. }
  927. else {
  928. fuzzy_module_ctx->max_errors = DEFAULT_MAX_ERRORS;
  929. }
  930. if ((value =
  931. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  932. "revive_time")) != NULL) {
  933. fuzzy_module_ctx->revive_time = ucl_obj_todouble (value);
  934. }
  935. else {
  936. fuzzy_module_ctx->revive_time = DEFAULT_REVIVE_TIME;
  937. }
  938. if ((value =
  939. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  940. "whitelist")) != NULL) {
  941. rspamd_config_radix_from_ucl (cfg, value, "Fuzzy whitelist",
  942. &fuzzy_module_ctx->whitelist,
  943. NULL,
  944. NULL);
  945. }
  946. else {
  947. fuzzy_module_ctx->whitelist = NULL;
  948. }
  949. if ((value =
  950. rspamd_config_get_module_opt (cfg, "fuzzy_check", "rule")) != NULL) {
  951. cb_id = rspamd_symcache_add_symbol (cfg->cache,
  952. "FUZZY_CALLBACK", 0, fuzzy_symbol_callback, NULL,
  953. SYMBOL_TYPE_CALLBACK | SYMBOL_TYPE_FINE,
  954. -1);
  955. rspamd_config_add_symbol (cfg,
  956. "FUZZY_CALLBACK",
  957. 0.0,
  958. "Fuzzy check callback",
  959. "fuzzy",
  960. RSPAMD_SYMBOL_FLAG_IGNORE_METRIC,
  961. 1,
  962. 1);
  963. /*
  964. * Here we can have 2 possibilities:
  965. *
  966. * unnamed rules:
  967. *
  968. * rule {
  969. * ...
  970. * }
  971. * rule {
  972. * ...
  973. * }
  974. *
  975. * - or - named rules:
  976. *
  977. * rule {
  978. * "rule1": {
  979. * ...
  980. * }
  981. * "rule2": {
  982. * ...
  983. * }
  984. * }
  985. *
  986. * So, for each element, we check, if there 'servers' key. If 'servers' is
  987. * presented, then we treat it as unnamed rule, otherwise we treat it as
  988. * named rule.
  989. */
  990. LL_FOREACH (value, cur) {
  991. if (ucl_object_lookup (cur, "servers")) {
  992. /* Unnamed rule */
  993. fuzzy_parse_rule (cfg, cur, NULL, cb_id);
  994. nrules ++;
  995. }
  996. else {
  997. /* Named rule */
  998. it = NULL;
  999. while ((elt = ucl_object_iterate (cur, &it, true)) != NULL) {
  1000. fuzzy_parse_rule (cfg, elt, ucl_object_key (elt), cb_id);
  1001. nrules ++;
  1002. }
  1003. }
  1004. }
  1005. /* We want that to check bad mime attachments */
  1006. rspamd_symcache_add_delayed_dependency (cfg->cache,
  1007. "FUZZY_CALLBACK", "MIME_TYPES_CALLBACK");
  1008. }
  1009. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  1010. msg_warn_config ("fuzzy module is enabled but no rules are defined");
  1011. }
  1012. msg_info_config ("init internal fuzzy_check module, %d rules loaded",
  1013. nrules);
  1014. /* Register global methods */
  1015. lua_getglobal (L, "rspamd_plugins");
  1016. if (lua_type (L, -1) == LUA_TTABLE) {
  1017. lua_pushstring (L, "fuzzy_check");
  1018. lua_createtable (L, 0, 2);
  1019. /* Set methods */
  1020. lua_pushstring (L, "unlearn");
  1021. lua_pushcfunction (L, fuzzy_lua_unlearn_handler);
  1022. lua_settable (L, -3);
  1023. lua_pushstring (L, "learn");
  1024. lua_pushcfunction (L, fuzzy_lua_learn_handler);
  1025. lua_settable (L, -3);
  1026. /* Finish fuzzy_check key */
  1027. lua_settable (L, -3);
  1028. }
  1029. lua_settop (L, 0);
  1030. return res;
  1031. }
  1032. gint
  1033. fuzzy_check_module_reconfig (struct rspamd_config *cfg)
  1034. {
  1035. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (cfg);
  1036. if (fuzzy_module_ctx->cleanup_rules_ref != -1) {
  1037. /* Sync lua_fuzzy rules */
  1038. gint err_idx, ret;
  1039. lua_State *L = (lua_State *)cfg->lua_state;
  1040. lua_pushcfunction (L, &rspamd_lua_traceback);
  1041. err_idx = lua_gettop (L);
  1042. lua_rawgeti (L, LUA_REGISTRYINDEX, fuzzy_module_ctx->cleanup_rules_ref);
  1043. if ((ret = lua_pcall (L, 0, 0, err_idx)) != 0) {
  1044. msg_err_config ("call to cleanup_rules lua "
  1045. "script failed (%d): %s", ret, lua_tostring (L, -1));
  1046. }
  1047. luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX,
  1048. fuzzy_module_ctx->cleanup_rules_ref);
  1049. lua_settop (L, 0);
  1050. }
  1051. if (fuzzy_module_ctx->check_mime_part_ref != -1) {
  1052. luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX,
  1053. fuzzy_module_ctx->check_mime_part_ref);
  1054. }
  1055. if (fuzzy_module_ctx->process_rule_ref != -1) {
  1056. luaL_unref (cfg->lua_state, LUA_REGISTRYINDEX,
  1057. fuzzy_module_ctx->process_rule_ref);
  1058. }
  1059. return fuzzy_check_module_config (cfg);
  1060. }
  1061. /* Finalize IO */
  1062. static void
  1063. fuzzy_io_fin (void *ud)
  1064. {
  1065. struct fuzzy_client_session *session = ud;
  1066. if (session->commands) {
  1067. g_ptr_array_free (session->commands, TRUE);
  1068. }
  1069. if (session->results) {
  1070. g_ptr_array_free (session->results, TRUE);
  1071. }
  1072. rspamd_ev_watcher_stop (session->event_loop, &session->ev);
  1073. close (session->fd);
  1074. }
  1075. static GArray *
  1076. fuzzy_preprocess_words (struct rspamd_mime_text_part *part, rspamd_mempool_t *pool)
  1077. {
  1078. return part->utf_words;
  1079. }
  1080. static void
  1081. fuzzy_encrypt_cmd (struct fuzzy_rule *rule,
  1082. struct rspamd_fuzzy_encrypted_req_hdr *hdr,
  1083. guchar *data, gsize datalen)
  1084. {
  1085. const guchar *pk;
  1086. guint pklen;
  1087. g_assert (hdr != NULL);
  1088. g_assert (data != NULL);
  1089. g_assert (rule != NULL);
  1090. /* Encrypt data */
  1091. memcpy (hdr->magic,
  1092. fuzzy_encrypted_magic,
  1093. sizeof (hdr->magic));
  1094. ottery_rand_bytes (hdr->nonce, sizeof (hdr->nonce));
  1095. pk = rspamd_keypair_component (rule->local_key,
  1096. RSPAMD_KEYPAIR_COMPONENT_PK, &pklen);
  1097. memcpy (hdr->pubkey, pk, MIN (pklen, sizeof (hdr->pubkey)));
  1098. pk = rspamd_pubkey_get_pk (rule->peer_key, &pklen);
  1099. memcpy (hdr->key_id, pk, MIN (sizeof (hdr->key_id), pklen));
  1100. rspamd_keypair_cache_process (rule->ctx->keypairs_cache,
  1101. rule->local_key, rule->peer_key);
  1102. rspamd_cryptobox_encrypt_nm_inplace (data, datalen,
  1103. hdr->nonce, rspamd_pubkey_get_nm (rule->peer_key, rule->local_key),
  1104. hdr->mac,
  1105. rspamd_pubkey_alg (rule->peer_key));
  1106. }
  1107. static struct fuzzy_cmd_io *
  1108. fuzzy_cmd_stat (struct fuzzy_rule *rule,
  1109. int c,
  1110. gint flag,
  1111. guint32 weight,
  1112. rspamd_mempool_t *pool)
  1113. {
  1114. struct rspamd_fuzzy_cmd *cmd;
  1115. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1116. struct fuzzy_cmd_io *io;
  1117. if (rule->peer_key) {
  1118. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1119. cmd = &enccmd->cmd;
  1120. }
  1121. else {
  1122. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1123. }
  1124. cmd->cmd = c;
  1125. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1126. cmd->shingles_count = 0;
  1127. cmd->tag = ottery_rand_uint32 ();
  1128. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1129. io->flags = 0;
  1130. io->tag = cmd->tag;
  1131. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1132. if (rule->peer_key && enccmd) {
  1133. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd, sizeof (*cmd));
  1134. io->io.iov_base = enccmd;
  1135. io->io.iov_len = sizeof (*enccmd);
  1136. }
  1137. else {
  1138. io->io.iov_base = cmd;
  1139. io->io.iov_len = sizeof (*cmd);
  1140. }
  1141. return io;
  1142. }
  1143. static struct fuzzy_cmd_io *
  1144. fuzzy_cmd_hash (struct fuzzy_rule *rule,
  1145. int c,
  1146. const rspamd_ftok_t *hash,
  1147. gint flag,
  1148. guint32 weight,
  1149. rspamd_mempool_t *pool)
  1150. {
  1151. struct rspamd_fuzzy_cmd *cmd;
  1152. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1153. struct fuzzy_cmd_io *io;
  1154. if (rule->peer_key) {
  1155. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1156. cmd = &enccmd->cmd;
  1157. }
  1158. else {
  1159. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1160. }
  1161. if (hash->len == sizeof (cmd->digest) * 2) {
  1162. /* It is hex encoding */
  1163. if (rspamd_decode_hex_buf (hash->begin, hash->len, cmd->digest,
  1164. sizeof (cmd->digest)) == -1) {
  1165. msg_err_pool ("cannot decode hash, wrong encoding");
  1166. return NULL;
  1167. }
  1168. }
  1169. else {
  1170. msg_err_pool ("cannot decode hash, wrong length: %z", hash->len);
  1171. return NULL;
  1172. }
  1173. cmd->cmd = c;
  1174. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1175. cmd->shingles_count = 0;
  1176. cmd->tag = ottery_rand_uint32 ();
  1177. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1178. io->flags = 0;
  1179. io->tag = cmd->tag;
  1180. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1181. if (rule->peer_key && enccmd) {
  1182. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd, sizeof (*cmd));
  1183. io->io.iov_base = enccmd;
  1184. io->io.iov_len = sizeof (*enccmd);
  1185. }
  1186. else {
  1187. io->io.iov_base = cmd;
  1188. io->io.iov_len = sizeof (*cmd);
  1189. }
  1190. return io;
  1191. }
  1192. struct rspamd_cached_shingles {
  1193. struct rspamd_shingle *sh;
  1194. guchar digest[rspamd_cryptobox_HASHBYTES];
  1195. };
  1196. static struct rspamd_cached_shingles *
  1197. fuzzy_cmd_get_cached (struct fuzzy_rule *rule,
  1198. rspamd_mempool_t *pool,
  1199. gpointer p)
  1200. {
  1201. gchar key[32];
  1202. gint key_part;
  1203. memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
  1204. rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
  1205. key_part);
  1206. return rspamd_mempool_get_variable (pool, key);
  1207. }
  1208. static void
  1209. fuzzy_cmd_set_cached (struct fuzzy_rule *rule,
  1210. rspamd_mempool_t *pool,
  1211. gpointer p,
  1212. struct rspamd_cached_shingles *data)
  1213. {
  1214. gchar key[32];
  1215. gint key_part;
  1216. memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
  1217. rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
  1218. key_part);
  1219. /* Key is copied */
  1220. rspamd_mempool_set_variable (pool, key, data, NULL);
  1221. }
  1222. static gboolean
  1223. fuzzy_rule_check_mimepart (struct rspamd_task *task,
  1224. struct fuzzy_rule *rule,
  1225. struct rspamd_mime_part *part,
  1226. gboolean *need_check,
  1227. gboolean *fuzzy_check)
  1228. {
  1229. if (rule->lua_id != -1 && rule->ctx->check_mime_part_ref != -1) {
  1230. gint err_idx, ret;
  1231. lua_State *L = (lua_State *)task->cfg->lua_state;
  1232. struct rspamd_task **ptask;
  1233. struct rspamd_mime_part **ppart;
  1234. lua_pushcfunction (L, &rspamd_lua_traceback);
  1235. err_idx = lua_gettop (L);
  1236. lua_rawgeti (L, LUA_REGISTRYINDEX, rule->ctx->check_mime_part_ref);
  1237. ptask = lua_newuserdata (L, sizeof (*ptask));
  1238. *ptask = task;
  1239. rspamd_lua_setclass (L, "rspamd{task}", -1);
  1240. ppart = lua_newuserdata (L, sizeof (*ppart));
  1241. *ppart = part;
  1242. rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
  1243. lua_pushnumber (L, rule->lua_id);
  1244. if ((ret = lua_pcall (L, 3, 2, err_idx)) != 0) {
  1245. msg_err_task ("call to check_mime_part lua "
  1246. "script failed (%d): %s", ret, lua_tostring (L, -1));
  1247. ret = FALSE;
  1248. }
  1249. else {
  1250. ret = TRUE;
  1251. *need_check = lua_toboolean (L, -2);
  1252. *fuzzy_check = lua_toboolean (L, -1);
  1253. }
  1254. lua_settop (L, 0);
  1255. return ret;
  1256. }
  1257. return FALSE;
  1258. }
  1259. /*
  1260. * Create fuzzy command from a text part
  1261. */
  1262. static struct fuzzy_cmd_io *
  1263. fuzzy_cmd_from_text_part (struct rspamd_task *task,
  1264. struct fuzzy_rule *rule,
  1265. int c,
  1266. gint flag,
  1267. guint32 weight,
  1268. gboolean short_text,
  1269. rspamd_mempool_t *pool,
  1270. struct rspamd_mime_text_part *part,
  1271. struct rspamd_mime_part *mp)
  1272. {
  1273. struct rspamd_fuzzy_shingle_cmd *shcmd = NULL;
  1274. struct rspamd_fuzzy_cmd *cmd = NULL;
  1275. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd = NULL;
  1276. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1277. struct rspamd_cached_shingles *cached = NULL;
  1278. struct rspamd_shingle *sh = NULL;
  1279. guint i;
  1280. rspamd_cryptobox_hash_state_t st;
  1281. rspamd_stat_token_t *word;
  1282. GArray *words;
  1283. struct fuzzy_cmd_io *io;
  1284. cached = fuzzy_cmd_get_cached (rule, pool, mp);
  1285. if (cached) {
  1286. /* Copy cached */
  1287. if (short_text) {
  1288. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1289. cmd = &enccmd->cmd;
  1290. memcpy (cmd->digest, cached->digest,
  1291. sizeof (cached->digest));
  1292. cmd->shingles_count = 0;
  1293. }
  1294. else if (cached->sh) {
  1295. encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1296. shcmd = &encshcmd->cmd;
  1297. memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
  1298. memcpy (shcmd->basic.digest, cached->digest,
  1299. sizeof (cached->digest));
  1300. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1301. }
  1302. else {
  1303. return NULL;
  1304. }
  1305. }
  1306. else {
  1307. cached = rspamd_mempool_alloc (pool, sizeof (*cached));
  1308. if (short_text) {
  1309. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1310. cmd = &enccmd->cmd;
  1311. rspamd_cryptobox_hash_init (&st, rule->hash_key->str,
  1312. rule->hash_key->len);
  1313. rspamd_cryptobox_hash_update (&st, part->utf_stripped_content->data,
  1314. part->utf_stripped_content->len);
  1315. if (MESSAGE_FIELD (task, subject)) {
  1316. /* We also include subject */
  1317. rspamd_cryptobox_hash_update (&st, MESSAGE_FIELD (task, subject),
  1318. strlen (MESSAGE_FIELD (task, subject)));
  1319. }
  1320. rspamd_cryptobox_hash_final (&st, cmd->digest);
  1321. memcpy (cached->digest, cmd->digest, sizeof (cached->digest));
  1322. cached->sh = NULL;
  1323. }
  1324. else {
  1325. encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1326. shcmd = &encshcmd->cmd;
  1327. /*
  1328. * Generate hash from all words in the part
  1329. */
  1330. rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
  1331. words = fuzzy_preprocess_words (part, pool);
  1332. for (i = 0; i < words->len; i ++) {
  1333. word = &g_array_index (words, rspamd_stat_token_t, i);
  1334. if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED)
  1335. || word->stemmed.len == 0)) {
  1336. rspamd_cryptobox_hash_update (&st, word->stemmed.begin,
  1337. word->stemmed.len);
  1338. }
  1339. }
  1340. rspamd_cryptobox_hash_final (&st, shcmd->basic.digest);
  1341. msg_debug_pool ("loading shingles of type %s with key %*xs",
  1342. rule->algorithm_str,
  1343. 16, rule->shingles_key->str);
  1344. sh = rspamd_shingles_from_text (words,
  1345. rule->shingles_key->str, pool,
  1346. rspamd_shingles_default_filter, NULL,
  1347. rule->alg);
  1348. if (sh != NULL) {
  1349. memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl));
  1350. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1351. }
  1352. cached->sh = sh;
  1353. memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
  1354. }
  1355. /*
  1356. * We always save encrypted command as it can handle both
  1357. * encrypted and unencrypted requests.
  1358. *
  1359. * Since it is copied when obtained from the cache, it is safe to use
  1360. * it this way.
  1361. */
  1362. fuzzy_cmd_set_cached (rule, pool, mp, cached);
  1363. }
  1364. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1365. io->part = mp;
  1366. if (!short_text) {
  1367. shcmd->basic.tag = ottery_rand_uint32 ();
  1368. shcmd->basic.cmd = c;
  1369. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1370. if (c != FUZZY_CHECK) {
  1371. shcmd->basic.flag = flag;
  1372. shcmd->basic.value = weight;
  1373. }
  1374. io->tag = shcmd->basic.tag;
  1375. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1376. }
  1377. else {
  1378. cmd->tag = ottery_rand_uint32 ();
  1379. cmd->cmd = c;
  1380. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1381. if (c != FUZZY_CHECK) {
  1382. cmd->flag = flag;
  1383. cmd->value = weight;
  1384. }
  1385. io->tag = cmd->tag;
  1386. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1387. }
  1388. io->flags = 0;
  1389. if (rule->peer_key) {
  1390. /* Encrypt data */
  1391. if (!short_text) {
  1392. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd,
  1393. sizeof (*shcmd));
  1394. io->io.iov_base = encshcmd;
  1395. io->io.iov_len = sizeof (*encshcmd);
  1396. }
  1397. else {
  1398. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd,
  1399. sizeof (*cmd));
  1400. io->io.iov_base = enccmd;
  1401. io->io.iov_len = sizeof (*enccmd);
  1402. }
  1403. }
  1404. else {
  1405. if (!short_text) {
  1406. io->io.iov_base = shcmd;
  1407. io->io.iov_len = sizeof (*shcmd);
  1408. }
  1409. else {
  1410. io->io.iov_base = cmd;
  1411. io->io.iov_len = sizeof (*cmd);
  1412. }
  1413. }
  1414. return io;
  1415. }
  1416. static struct fuzzy_cmd_io *
  1417. fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
  1418. int c,
  1419. gint flag,
  1420. guint32 weight,
  1421. rspamd_mempool_t *pool,
  1422. struct rspamd_image *img,
  1423. struct rspamd_mime_part *mp)
  1424. {
  1425. struct rspamd_fuzzy_shingle_cmd *shcmd;
  1426. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd;
  1427. struct fuzzy_cmd_io *io;
  1428. struct rspamd_shingle *sh;
  1429. struct rspamd_cached_shingles *cached;
  1430. cached = fuzzy_cmd_get_cached (rule, pool, mp);
  1431. if (cached) {
  1432. /* Copy cached */
  1433. encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1434. shcmd = &encshcmd->cmd;
  1435. memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
  1436. memcpy (shcmd->basic.digest, cached->digest,
  1437. sizeof (cached->digest));
  1438. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1439. }
  1440. else {
  1441. encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1442. shcmd = &encshcmd->cmd;
  1443. /*
  1444. * Generate shingles
  1445. */
  1446. sh = rspamd_shingles_from_image (img->dct,
  1447. rule->shingles_key->str, pool,
  1448. rspamd_shingles_default_filter, NULL,
  1449. rule->alg);
  1450. if (sh != NULL) {
  1451. memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl));
  1452. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1453. #if 0
  1454. for (unsigned int i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  1455. msg_err ("shingle %d: %L", i, sh->hashes[i]);
  1456. }
  1457. #endif
  1458. }
  1459. rspamd_cryptobox_hash (shcmd->basic.digest,
  1460. (const guchar *)img->dct, RSPAMD_DCT_LEN / NBBY,
  1461. rule->hash_key->str, rule->hash_key->len);
  1462. msg_debug_pool ("loading shingles of type %s with key %*xs",
  1463. rule->algorithm_str,
  1464. 16, rule->shingles_key->str);
  1465. /*
  1466. * We always save encrypted command as it can handle both
  1467. * encrypted and unencrypted requests.
  1468. *
  1469. * Since it is copied when obtained from the cache, it is safe to use
  1470. * it this way.
  1471. */
  1472. cached = rspamd_mempool_alloc (pool, sizeof (*cached));
  1473. cached->sh = sh;
  1474. memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
  1475. fuzzy_cmd_set_cached (rule, pool, mp, cached);
  1476. }
  1477. shcmd->basic.tag = ottery_rand_uint32 ();
  1478. shcmd->basic.cmd = c;
  1479. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1480. if (c != FUZZY_CHECK) {
  1481. shcmd->basic.flag = flag;
  1482. shcmd->basic.value = weight;
  1483. }
  1484. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1485. io->part = mp;
  1486. io->tag = shcmd->basic.tag;
  1487. io->flags = FUZZY_CMD_FLAG_IMAGE;
  1488. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1489. if (rule->peer_key) {
  1490. /* Encrypt data */
  1491. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd, sizeof (*shcmd));
  1492. io->io.iov_base = encshcmd;
  1493. io->io.iov_len = sizeof (*encshcmd);
  1494. }
  1495. else {
  1496. io->io.iov_base = shcmd;
  1497. io->io.iov_len = sizeof (*shcmd);
  1498. }
  1499. return io;
  1500. }
  1501. static struct fuzzy_cmd_io *
  1502. fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
  1503. int c,
  1504. gint flag,
  1505. guint32 weight,
  1506. rspamd_mempool_t *pool,
  1507. guchar digest[rspamd_cryptobox_HASHBYTES],
  1508. struct rspamd_mime_part *mp)
  1509. {
  1510. struct rspamd_fuzzy_cmd *cmd;
  1511. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1512. struct fuzzy_cmd_io *io;
  1513. if (rule->peer_key) {
  1514. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1515. cmd = &enccmd->cmd;
  1516. }
  1517. else {
  1518. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1519. }
  1520. cmd->cmd = c;
  1521. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1522. if (c != FUZZY_CHECK) {
  1523. cmd->flag = flag;
  1524. cmd->value = weight;
  1525. }
  1526. cmd->shingles_count = 0;
  1527. cmd->tag = ottery_rand_uint32 ();
  1528. memcpy (cmd->digest, digest, sizeof (cmd->digest));
  1529. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1530. io->flags = 0;
  1531. io->tag = cmd->tag;
  1532. io->part = mp;
  1533. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1534. if (rule->peer_key) {
  1535. g_assert (enccmd != NULL);
  1536. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *) cmd, sizeof (*cmd));
  1537. io->io.iov_base = enccmd;
  1538. io->io.iov_len = sizeof (*enccmd);
  1539. }
  1540. else {
  1541. io->io.iov_base = cmd;
  1542. io->io.iov_len = sizeof (*cmd);
  1543. }
  1544. return io;
  1545. }
  1546. static gboolean
  1547. fuzzy_cmd_to_wire (gint fd, struct iovec *io)
  1548. {
  1549. struct msghdr msg;
  1550. memset (&msg, 0, sizeof (msg));
  1551. msg.msg_iov = io;
  1552. msg.msg_iovlen = 1;
  1553. while (sendmsg (fd, &msg, 0) == -1) {
  1554. if (errno == EINTR) {
  1555. continue;
  1556. }
  1557. return FALSE;
  1558. }
  1559. return TRUE;
  1560. }
  1561. static gboolean
  1562. fuzzy_cmd_vector_to_wire (gint fd, GPtrArray *v)
  1563. {
  1564. guint i;
  1565. gboolean all_sent = TRUE, all_replied = TRUE;
  1566. struct fuzzy_cmd_io *io;
  1567. gboolean processed = FALSE;
  1568. /* First try to resend unsent commands */
  1569. for (i = 0; i < v->len; i ++) {
  1570. io = g_ptr_array_index (v, i);
  1571. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1572. continue;
  1573. }
  1574. all_replied = FALSE;
  1575. if (!(io->flags & FUZZY_CMD_FLAG_SENT)) {
  1576. if (!fuzzy_cmd_to_wire (fd, &io->io)) {
  1577. return FALSE;
  1578. }
  1579. processed = TRUE;
  1580. io->flags |= FUZZY_CMD_FLAG_SENT;
  1581. all_sent = FALSE;
  1582. }
  1583. }
  1584. if (all_sent && !all_replied) {
  1585. /* Now try to resend each command in the vector */
  1586. for (i = 0; i < v->len; i++) {
  1587. io = g_ptr_array_index (v, i);
  1588. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1589. io->flags &= ~FUZZY_CMD_FLAG_SENT;
  1590. }
  1591. }
  1592. return fuzzy_cmd_vector_to_wire (fd, v);
  1593. }
  1594. return processed;
  1595. }
  1596. /*
  1597. * Read replies one-by-one and remove them from req array
  1598. */
  1599. static const struct rspamd_fuzzy_reply *
  1600. fuzzy_process_reply (guchar **pos, gint *r, GPtrArray *req,
  1601. struct fuzzy_rule *rule, struct rspamd_fuzzy_cmd **pcmd,
  1602. struct fuzzy_cmd_io **pio)
  1603. {
  1604. guchar *p = *pos;
  1605. gint remain = *r;
  1606. guint i, required_size;
  1607. struct fuzzy_cmd_io *io;
  1608. const struct rspamd_fuzzy_reply *rep;
  1609. struct rspamd_fuzzy_encrypted_reply encrep;
  1610. gboolean found = FALSE;
  1611. if (rule->peer_key) {
  1612. required_size = sizeof (encrep);
  1613. }
  1614. else {
  1615. required_size = sizeof (*rep);
  1616. }
  1617. if (remain <= 0 || (guint)remain < required_size) {
  1618. return NULL;
  1619. }
  1620. if (rule->peer_key) {
  1621. memcpy (&encrep, p, sizeof (encrep));
  1622. *pos += required_size;
  1623. *r -= required_size;
  1624. /* Try to decrypt reply */
  1625. rspamd_keypair_cache_process (rule->ctx->keypairs_cache,
  1626. rule->local_key, rule->peer_key);
  1627. if (!rspamd_cryptobox_decrypt_nm_inplace ((guchar *)&encrep.rep,
  1628. sizeof (encrep.rep),
  1629. encrep.hdr.nonce,
  1630. rspamd_pubkey_get_nm (rule->peer_key, rule->local_key),
  1631. encrep.hdr.mac,
  1632. rspamd_pubkey_alg (rule->peer_key))) {
  1633. msg_info ("cannot decrypt reply");
  1634. return NULL;
  1635. }
  1636. /* Copy decrypted over the input wire */
  1637. memcpy (p, &encrep.rep, sizeof (encrep.rep));
  1638. }
  1639. else {
  1640. *pos += required_size;
  1641. *r -= required_size;
  1642. }
  1643. rep = (const struct rspamd_fuzzy_reply *) p;
  1644. /*
  1645. * Search for tag
  1646. */
  1647. for (i = 0; i < req->len; i ++) {
  1648. io = g_ptr_array_index (req, i);
  1649. if (io->tag == rep->v1.tag) {
  1650. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1651. io->flags |= FUZZY_CMD_FLAG_REPLIED;
  1652. if (pcmd) {
  1653. *pcmd = &io->cmd;
  1654. }
  1655. if (pio) {
  1656. *pio = io;
  1657. }
  1658. return rep;
  1659. }
  1660. found = TRUE;
  1661. }
  1662. }
  1663. if (!found) {
  1664. msg_info ("unexpected tag: %ud", rep->v1.tag);
  1665. }
  1666. return NULL;
  1667. }
  1668. static void
  1669. fuzzy_insert_result (struct fuzzy_client_session *session,
  1670. const struct rspamd_fuzzy_reply *rep,
  1671. struct rspamd_fuzzy_cmd *cmd,
  1672. struct fuzzy_cmd_io *io,
  1673. guint flag)
  1674. {
  1675. const gchar *symbol;
  1676. struct fuzzy_mapping *map;
  1677. struct rspamd_task *task = session->task;
  1678. double weight;
  1679. double nval;
  1680. guchar buf[2048];
  1681. const gchar *type = "bin";
  1682. struct fuzzy_client_result *res;
  1683. gboolean is_fuzzy = FALSE;
  1684. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  1685. /* Discriminate scores for small images */
  1686. static const guint short_image_limit = 32 * 1024;
  1687. /* Get mapping by flag */
  1688. if ((map =
  1689. g_hash_table_lookup (session->rule->mappings,
  1690. GINT_TO_POINTER (rep->v1.flag))) == NULL) {
  1691. /* Default symbol and default weight */
  1692. symbol = session->rule->symbol;
  1693. weight = session->rule->max_score;
  1694. }
  1695. else {
  1696. /* Get symbol and weight from map */
  1697. symbol = map->symbol;
  1698. weight = map->weight;
  1699. }
  1700. res = rspamd_mempool_alloc0 (task->task_pool, sizeof (*res));
  1701. res->prob = rep->v1.prob;
  1702. res->symbol = symbol;
  1703. /*
  1704. * Hash is assumed to be found if probability is more than 0.5
  1705. * In that case `value` means number of matches
  1706. * Otherwise `value` means error code
  1707. */
  1708. nval = fuzzy_normalize (rep->v1.value, weight);
  1709. if (io) {
  1710. if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  1711. if (!io->part || io->part->parsed_data.len <= short_image_limit) {
  1712. nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
  1713. }
  1714. type = "img";
  1715. res->type = FUZZY_RESULT_IMG;
  1716. }
  1717. else {
  1718. /* Calc real probability */
  1719. nval *= sqrtf (rep->v1.prob);
  1720. if (cmd->shingles_count > 0) {
  1721. type = "txt";
  1722. res->type = FUZZY_RESULT_TXT;
  1723. }
  1724. else {
  1725. if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
  1726. type = "content";
  1727. res->type = FUZZY_RESULT_CONTENT;
  1728. }
  1729. else {
  1730. res->type = FUZZY_RESULT_BIN;
  1731. }
  1732. }
  1733. }
  1734. }
  1735. res->score = nval;
  1736. if (memcmp (rep->digest, cmd->digest, sizeof (rep->digest)) != 0) {
  1737. is_fuzzy = TRUE;
  1738. }
  1739. if (map != NULL || !session->rule->skip_unknown) {
  1740. GList *fuzzy_var;
  1741. rspamd_fstring_t *hex_result;
  1742. if (session->rule->skip_map) {
  1743. rspamd_encode_hex_buf (cmd->digest, sizeof (cmd->digest),
  1744. hexbuf, sizeof (hexbuf) - 1);
  1745. hexbuf[sizeof (hexbuf) - 1] = '\0';
  1746. if (rspamd_match_hash_map (session->rule->skip_map, hexbuf)) {
  1747. return;
  1748. }
  1749. }
  1750. rspamd_encode_hex_buf (rep->digest, sizeof (rep->digest),
  1751. hexbuf, sizeof (hexbuf) - 1);
  1752. hexbuf[sizeof (hexbuf) - 1] = '\0';
  1753. if (is_fuzzy) {
  1754. msg_info_task (
  1755. "found fuzzy hash(%s) %s (%*xs requested) with weight: "
  1756. "%.2f, probability %.2f, in list: %s:%d%s",
  1757. type,
  1758. hexbuf,
  1759. (gint) sizeof (cmd->digest), cmd->digest,
  1760. nval,
  1761. (gdouble) rep->v1.prob,
  1762. symbol,
  1763. rep->v1.flag,
  1764. map == NULL ? "(unknown)" : "");
  1765. }
  1766. else {
  1767. msg_info_task (
  1768. "found exact fuzzy hash(%s) %s with weight: "
  1769. "%.2f, probability %.2f, in list: %s:%d%s",
  1770. type,
  1771. hexbuf,
  1772. nval,
  1773. (gdouble) rep->v1.prob,
  1774. symbol,
  1775. rep->v1.flag,
  1776. map == NULL ? "(unknown)" : "");
  1777. }
  1778. rspamd_snprintf (buf,
  1779. sizeof (buf),
  1780. "%d:%*s:%.2f:%s",
  1781. rep->v1.flag,
  1782. (gint)MIN(rspamd_fuzzy_hash_len * 2, sizeof (rep->digest) * 2), hexbuf,
  1783. rep->v1.prob,
  1784. type);
  1785. res->option = rspamd_mempool_strdup (task->task_pool, buf);
  1786. g_ptr_array_add (session->results, res);
  1787. /* Store hex string in pool variable */
  1788. hex_result = rspamd_mempool_alloc (task->task_pool,
  1789. sizeof (rspamd_fstring_t) + sizeof (hexbuf));
  1790. memcpy (hex_result->str, hexbuf, sizeof (hexbuf));
  1791. hex_result->len = sizeof (hexbuf) - 1;
  1792. hex_result->allocated = (gsize)-1;
  1793. fuzzy_var = rspamd_mempool_get_variable (task->task_pool,
  1794. RSPAMD_MEMPOOL_FUZZY_RESULT);
  1795. if (fuzzy_var == NULL) {
  1796. fuzzy_var = g_list_prepend (NULL, hex_result);
  1797. rspamd_mempool_set_variable (task->task_pool,
  1798. RSPAMD_MEMPOOL_FUZZY_RESULT, fuzzy_var,
  1799. (rspamd_mempool_destruct_t)g_list_free);
  1800. }
  1801. else {
  1802. /* Not very efficient, but we don't really use it intensively */
  1803. fuzzy_var = g_list_append (fuzzy_var, hex_result);
  1804. }
  1805. }
  1806. }
  1807. static gint
  1808. fuzzy_check_try_read (struct fuzzy_client_session *session)
  1809. {
  1810. struct rspamd_task *task;
  1811. const struct rspamd_fuzzy_reply *rep;
  1812. struct rspamd_fuzzy_cmd *cmd = NULL;
  1813. struct fuzzy_cmd_io *io = NULL;
  1814. gint r, ret;
  1815. guchar buf[2048], *p;
  1816. task = session->task;
  1817. if ((r = read (session->fd, buf, sizeof (buf) - 1)) == -1) {
  1818. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  1819. return 0;
  1820. }
  1821. else {
  1822. return -1;
  1823. }
  1824. }
  1825. else {
  1826. p = buf;
  1827. ret = 0;
  1828. while ((rep = fuzzy_process_reply (&p, &r,
  1829. session->commands, session->rule, &cmd, &io)) != NULL) {
  1830. if (rep->v1.prob > 0.5) {
  1831. if (cmd->cmd == FUZZY_CHECK) {
  1832. fuzzy_insert_result (session, rep, cmd, io, rep->v1.flag);
  1833. }
  1834. else if (cmd->cmd == FUZZY_STAT) {
  1835. /* Just set pool variable to extract it in further */
  1836. struct rspamd_fuzzy_stat_entry *pval;
  1837. GList *res;
  1838. pval = rspamd_mempool_alloc (task->task_pool, sizeof (*pval));
  1839. pval->fuzzy_cnt = rep->v1.flag;
  1840. pval->name = session->rule->name;
  1841. res = rspamd_mempool_get_variable (task->task_pool, "fuzzy_stat");
  1842. if (res == NULL) {
  1843. res = g_list_append (NULL, pval);
  1844. rspamd_mempool_set_variable (task->task_pool, "fuzzy_stat",
  1845. res, (rspamd_mempool_destruct_t)g_list_free);
  1846. }
  1847. else {
  1848. res = g_list_append (res, pval);
  1849. }
  1850. }
  1851. }
  1852. else if (rep->v1.value == 403) {
  1853. rspamd_task_insert_result (task, "FUZZY_BLOCKED", 0.0,
  1854. session->rule->name);
  1855. }
  1856. else if (rep->v1.value == 401) {
  1857. if (cmd->cmd != FUZZY_CHECK) {
  1858. msg_info_task (
  1859. "fuzzy check error for %d: skipped by server",
  1860. rep->v1.flag);
  1861. }
  1862. }
  1863. else if (rep->v1.value != 0) {
  1864. msg_info_task (
  1865. "fuzzy check error for %d: unknown error (%d)",
  1866. rep->v1.flag,
  1867. rep->v1.value);
  1868. }
  1869. ret = 1;
  1870. }
  1871. }
  1872. return ret;
  1873. }
  1874. static void
  1875. fuzzy_insert_metric_results (struct rspamd_task *task, GPtrArray *results)
  1876. {
  1877. struct fuzzy_client_result *res;
  1878. guint i;
  1879. gboolean seen_text_hash = FALSE,
  1880. seen_img_hash = FALSE,
  1881. seen_text_part = FALSE,
  1882. seen_long_text = FALSE;
  1883. gdouble prob_txt = 0.0, mult;
  1884. struct rspamd_mime_text_part *tp;
  1885. /* About 5 words */
  1886. static const unsigned int text_length_cutoff = 25;
  1887. PTR_ARRAY_FOREACH (results, i, res) {
  1888. if (res->type == FUZZY_RESULT_TXT) {
  1889. seen_text_hash = TRUE;
  1890. prob_txt = MAX (prob_txt, res->prob);
  1891. }
  1892. else if (res->type == FUZZY_RESULT_IMG) {
  1893. seen_img_hash = TRUE;
  1894. }
  1895. }
  1896. if (task->message) {
  1897. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
  1898. if (!IS_PART_EMPTY (tp) && tp->utf_words != NULL && tp->utf_words->len > 0) {
  1899. seen_text_part = TRUE;
  1900. if (tp->utf_stripped_text.magic == UTEXT_MAGIC) {
  1901. if (utext_isLengthExpensive (&tp->utf_stripped_text)) {
  1902. seen_long_text =
  1903. utext_nativeLength (&tp->utf_stripped_text) >
  1904. text_length_cutoff;
  1905. }
  1906. else {
  1907. /* Cannot directly calculate length */
  1908. seen_long_text =
  1909. (tp->utf_stripped_content->len / 2) >
  1910. text_length_cutoff;
  1911. }
  1912. }
  1913. }
  1914. }
  1915. }
  1916. PTR_ARRAY_FOREACH (results, i, res) {
  1917. mult = 1.0;
  1918. if (res->type == FUZZY_RESULT_IMG) {
  1919. if (!seen_text_hash) {
  1920. if (seen_long_text) {
  1921. mult *= 0.25;
  1922. }
  1923. else if (seen_text_part) {
  1924. /* We have some short text + image */
  1925. mult *= 0.9;
  1926. }
  1927. /* Otherwise apply full score */
  1928. }
  1929. else if (prob_txt < 0.75) {
  1930. /* Penalize sole image without matching text */
  1931. if (prob_txt > 0.5) {
  1932. mult *= prob_txt;
  1933. }
  1934. else {
  1935. mult *= 0.5; /* cutoff */
  1936. }
  1937. }
  1938. }
  1939. else if (res->type == FUZZY_RESULT_TXT) {
  1940. if (seen_img_hash) {
  1941. /* Slightly increase score */
  1942. mult = 1.1;
  1943. }
  1944. }
  1945. rspamd_task_insert_result_single (task, res->symbol,
  1946. res->score * mult, res->option);
  1947. }
  1948. }
  1949. static gboolean
  1950. fuzzy_check_session_is_completed (struct fuzzy_client_session *session)
  1951. {
  1952. struct fuzzy_cmd_io *io;
  1953. guint nreplied = 0, i;
  1954. rspamd_upstream_ok (session->server);
  1955. for (i = 0; i < session->commands->len; i++) {
  1956. io = g_ptr_array_index (session->commands, i);
  1957. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1958. nreplied++;
  1959. }
  1960. }
  1961. if (nreplied == session->commands->len) {
  1962. fuzzy_insert_metric_results (session->task, session->results);
  1963. if (session->item) {
  1964. rspamd_symcache_item_async_dec_check (session->task, session->item, M);
  1965. }
  1966. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  1967. return TRUE;
  1968. }
  1969. return FALSE;
  1970. }
  1971. /* Fuzzy check timeout callback */
  1972. static void
  1973. fuzzy_check_timer_callback (gint fd, short what, void *arg)
  1974. {
  1975. struct fuzzy_client_session *session = arg;
  1976. struct rspamd_task *task;
  1977. task = session->task;
  1978. /* We might be here because of other checks being slow */
  1979. if (fuzzy_check_try_read (session) > 0) {
  1980. if (fuzzy_check_session_is_completed (session)) {
  1981. return;
  1982. }
  1983. }
  1984. if (session->retransmits >= session->rule->ctx->retransmits) {
  1985. msg_err_task ("got IO timeout with server %s(%s), after %d retransmits",
  1986. rspamd_upstream_name (session->server),
  1987. rspamd_inet_address_to_string_pretty (
  1988. rspamd_upstream_addr_cur (session->server)),
  1989. session->retransmits);
  1990. rspamd_upstream_fail (session->server, TRUE, "timeout");
  1991. if (session->item) {
  1992. rspamd_symcache_item_async_dec_check (session->task, session->item, M);
  1993. }
  1994. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  1995. }
  1996. else {
  1997. /* Plan write event */
  1998. rspamd_ev_watcher_reschedule (session->event_loop,
  1999. &session->ev, EV_READ|EV_WRITE);
  2000. session->retransmits ++;
  2001. }
  2002. }
  2003. /* Fuzzy check callback */
  2004. static void
  2005. fuzzy_check_io_callback (gint fd, short what, void *arg)
  2006. {
  2007. struct fuzzy_client_session *session = arg;
  2008. struct rspamd_task *task;
  2009. gint r;
  2010. enum {
  2011. return_error = 0,
  2012. return_want_more,
  2013. return_finished
  2014. } ret = return_error;
  2015. task = session->task;
  2016. if ((what & EV_READ) || session->state == 1) {
  2017. /* Try to read reply */
  2018. r = fuzzy_check_try_read (session);
  2019. switch (r) {
  2020. case 0:
  2021. if (what & EV_READ) {
  2022. ret = return_want_more;
  2023. }
  2024. else {
  2025. /* It is actually time out */
  2026. fuzzy_check_timer_callback (fd, what, arg);
  2027. return;
  2028. }
  2029. break;
  2030. case 1:
  2031. ret = return_finished;
  2032. break;
  2033. default:
  2034. ret = return_error;
  2035. break;
  2036. }
  2037. }
  2038. else if (what & EV_WRITE) {
  2039. if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
  2040. ret = return_error;
  2041. }
  2042. else {
  2043. session->state = 1;
  2044. ret = return_want_more;
  2045. }
  2046. }
  2047. else {
  2048. fuzzy_check_timer_callback (fd, what, arg);
  2049. return;
  2050. }
  2051. if (ret == return_want_more) {
  2052. /* Processed write, switch to reading */
  2053. rspamd_ev_watcher_reschedule (session->event_loop,
  2054. &session->ev, EV_READ);
  2055. }
  2056. else if (ret == return_error) {
  2057. /* Error state */
  2058. msg_err_task ("got error on IO with server %s(%s), on %s, %d, %s",
  2059. rspamd_upstream_name (session->server),
  2060. rspamd_inet_address_to_string_pretty (
  2061. rspamd_upstream_addr_cur (session->server)),
  2062. session->state == 1 ? "read" : "write",
  2063. errno,
  2064. strerror (errno));
  2065. rspamd_upstream_fail (session->server, TRUE, strerror (errno));
  2066. if (session->item) {
  2067. rspamd_symcache_item_async_dec_check (session->task, session->item, M);
  2068. }
  2069. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  2070. }
  2071. else {
  2072. /* Read something from network */
  2073. if (!fuzzy_check_session_is_completed (session)) {
  2074. /* Need to read more */
  2075. rspamd_ev_watcher_reschedule (session->event_loop,
  2076. &session->ev, EV_READ);
  2077. }
  2078. }
  2079. }
  2080. static void
  2081. fuzzy_lua_fin (void *ud)
  2082. {
  2083. struct fuzzy_learn_session *session = ud;
  2084. (*session->saved)--;
  2085. rspamd_ev_watcher_stop (session->event_loop, &session->ev);
  2086. close (session->fd);
  2087. }
  2088. /* Controller IO */
  2089. static void
  2090. fuzzy_controller_timer_callback (gint fd, short what, void *arg)
  2091. {
  2092. struct fuzzy_learn_session *session = arg;
  2093. struct rspamd_task *task;
  2094. task = session->task;
  2095. if (session->retransmits >= session->rule->ctx->retransmits) {
  2096. rspamd_upstream_fail (session->server, TRUE, "timeout");
  2097. msg_err_task_check ("got IO timeout with server %s(%s), "
  2098. "after %d retransmits",
  2099. rspamd_upstream_name (session->server),
  2100. rspamd_inet_address_to_string_pretty (
  2101. rspamd_upstream_addr_cur (session->server)),
  2102. session->retransmits);
  2103. if (session->session) {
  2104. rspamd_session_remove_event (session->session, fuzzy_lua_fin,
  2105. session);
  2106. }
  2107. else {
  2108. if (session->http_entry) {
  2109. rspamd_controller_send_error (session->http_entry,
  2110. 500, "IO timeout with fuzzy storage");
  2111. }
  2112. if (*session->saved > 0 ) {
  2113. (*session->saved)--;
  2114. if (*session->saved == 0) {
  2115. if (session->http_entry) {
  2116. rspamd_task_free (session->task);
  2117. }
  2118. session->task = NULL;
  2119. }
  2120. }
  2121. if (session->http_entry) {
  2122. rspamd_http_connection_unref (session->http_entry->conn);
  2123. }
  2124. rspamd_ev_watcher_stop (session->event_loop,
  2125. &session->ev);
  2126. close (session->fd);
  2127. }
  2128. }
  2129. else {
  2130. /* Plan write event */
  2131. rspamd_ev_watcher_reschedule (session->event_loop,
  2132. &session->ev, EV_READ|EV_WRITE);
  2133. session->retransmits ++;
  2134. }
  2135. }
  2136. static void
  2137. fuzzy_controller_io_callback (gint fd, short what, void *arg)
  2138. {
  2139. struct fuzzy_learn_session *session = arg;
  2140. const struct rspamd_fuzzy_reply *rep;
  2141. struct fuzzy_mapping *map;
  2142. struct rspamd_task *task;
  2143. guchar buf[2048], *p;
  2144. struct fuzzy_cmd_io *io;
  2145. struct rspamd_fuzzy_cmd *cmd = NULL;
  2146. const gchar *symbol, *ftype;
  2147. gint r;
  2148. enum {
  2149. return_error = 0,
  2150. return_want_more,
  2151. return_finished
  2152. } ret = return_want_more;
  2153. guint i, nreplied;
  2154. const gchar *op = "process";
  2155. task = session->task;
  2156. if (what & EV_READ) {
  2157. if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) {
  2158. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  2159. rspamd_ev_watcher_reschedule (session->event_loop,
  2160. &session->ev, EV_READ);
  2161. return;
  2162. }
  2163. msg_info_task ("cannot process fuzzy hash for message: %s",
  2164. strerror (errno));
  2165. if (*(session->err) == NULL) {
  2166. g_set_error (session->err,
  2167. g_quark_from_static_string (M),
  2168. errno, "read socket error: %s", strerror (errno));
  2169. }
  2170. ret = return_error;
  2171. }
  2172. else {
  2173. p = buf;
  2174. ret = return_want_more;
  2175. while ((rep = fuzzy_process_reply (&p, &r,
  2176. session->commands, session->rule, &cmd, &io)) != NULL) {
  2177. if ((map =
  2178. g_hash_table_lookup (session->rule->mappings,
  2179. GINT_TO_POINTER (rep->v1.flag))) == NULL) {
  2180. /* Default symbol and default weight */
  2181. symbol = session->rule->symbol;
  2182. }
  2183. else {
  2184. /* Get symbol and weight from map */
  2185. symbol = map->symbol;
  2186. }
  2187. ftype = "bin";
  2188. if (io) {
  2189. if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  2190. ftype = "img";
  2191. }
  2192. else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
  2193. ftype = "content";
  2194. }
  2195. else if (cmd->shingles_count > 0) {
  2196. ftype = "txt";
  2197. }
  2198. if (io->cmd.cmd == FUZZY_WRITE) {
  2199. op = "added";
  2200. }
  2201. else if (io->cmd.cmd == FUZZY_DEL) {
  2202. op = "deleted";
  2203. }
  2204. }
  2205. if (rep->v1.prob > 0.5) {
  2206. msg_info_task ("%s fuzzy hash (%s) %*xs, list: %s:%d for "
  2207. "message <%s>",
  2208. op,
  2209. ftype,
  2210. (gint)sizeof (rep->digest), rep->digest,
  2211. symbol,
  2212. rep->v1.flag,
  2213. MESSAGE_FIELD_CHECK (session->task, message_id));
  2214. }
  2215. else {
  2216. if (rep->v1.value == 401) {
  2217. msg_info_task (
  2218. "fuzzy hash (%s) for message cannot be %s"
  2219. "<%s>, %*xs, "
  2220. "list %s:%d, skipped by server",
  2221. ftype,
  2222. op,
  2223. MESSAGE_FIELD (session->task, message_id),
  2224. (gint)sizeof (rep->digest), rep->digest,
  2225. symbol,
  2226. rep->v1.flag);
  2227. if (*(session->err) == NULL) {
  2228. g_set_error (session->err,
  2229. g_quark_from_static_string (M),
  2230. rep->v1.value, "fuzzy hash is skipped");
  2231. }
  2232. }
  2233. else {
  2234. msg_info_task (
  2235. "fuzzy hash (%s) for message cannot be %s"
  2236. "<%s>, %*xs, "
  2237. "list %s:%d, error: %d",
  2238. ftype,
  2239. op,
  2240. MESSAGE_FIELD (session->task, message_id),
  2241. (gint)sizeof (rep->digest), rep->digest,
  2242. symbol,
  2243. rep->v1.flag,
  2244. rep->v1.value);
  2245. if (*(session->err) == NULL) {
  2246. g_set_error (session->err,
  2247. g_quark_from_static_string (M),
  2248. rep->v1.value, "process fuzzy error");
  2249. }
  2250. }
  2251. ret = return_finished;
  2252. }
  2253. }
  2254. nreplied = 0;
  2255. for (i = 0; i < session->commands->len; i++) {
  2256. io = g_ptr_array_index (session->commands, i);
  2257. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  2258. nreplied++;
  2259. }
  2260. }
  2261. if (nreplied == session->commands->len) {
  2262. ret = return_finished;
  2263. }
  2264. }
  2265. }
  2266. else if (what & EV_WRITE) {
  2267. /* Send commands to storage */
  2268. if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
  2269. if (*(session->err) == NULL) {
  2270. g_set_error (session->err,
  2271. g_quark_from_static_string (M),
  2272. errno, "write socket error: %s", strerror (errno));
  2273. }
  2274. ret = return_error;
  2275. }
  2276. }
  2277. else {
  2278. fuzzy_controller_timer_callback (fd, what, arg);
  2279. return;
  2280. }
  2281. if (ret == return_want_more) {
  2282. rspamd_ev_watcher_reschedule (session->event_loop,
  2283. &session->ev, EV_READ);
  2284. return;
  2285. }
  2286. else if (ret == return_error) {
  2287. msg_err_task ("got error in IO with server %s(%s), %d, %s",
  2288. rspamd_upstream_name (session->server),
  2289. rspamd_inet_address_to_string_pretty (
  2290. rspamd_upstream_addr_cur (session->server)),
  2291. errno, strerror (errno));
  2292. rspamd_upstream_fail (session->server, FALSE, strerror (errno));
  2293. }
  2294. /*
  2295. * XXX: actually, we check merely a single reply, which is not correct...
  2296. * XXX: when we send a command, we do not check if *all* commands have been
  2297. * written
  2298. * XXX: please, please, change this code some day
  2299. */
  2300. if (session->session == NULL) {
  2301. (*session->saved)--;
  2302. if (session->http_entry) {
  2303. rspamd_http_connection_unref (session->http_entry->conn);
  2304. }
  2305. rspamd_ev_watcher_stop (session->event_loop, &session->ev);
  2306. close (session->fd);
  2307. if (*session->saved == 0) {
  2308. goto cleanup;
  2309. }
  2310. }
  2311. else {
  2312. /* Lua handler */
  2313. rspamd_session_remove_event (session->session, fuzzy_lua_fin, session);
  2314. }
  2315. return;
  2316. cleanup:
  2317. /*
  2318. * When we send learn commands to fuzzy storages, this code is executed
  2319. * *once* when we have queried all storages. We also don't know which
  2320. * storage has been failed.
  2321. *
  2322. * Therefore, we cleanup sessions earlier and actually this code is wrong.
  2323. */
  2324. if (*(session->err) != NULL) {
  2325. if (session->http_entry) {
  2326. rspamd_controller_send_error (session->http_entry,
  2327. (*session->err)->code, (*session->err)->message);
  2328. }
  2329. g_error_free (*session->err);
  2330. }
  2331. else {
  2332. rspamd_upstream_ok (session->server);
  2333. if (session->http_entry) {
  2334. ucl_object_t *reply, *hashes;
  2335. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  2336. reply = ucl_object_typed_new (UCL_OBJECT);
  2337. ucl_object_insert_key (reply, ucl_object_frombool (true),
  2338. "success", 0, false);
  2339. hashes = ucl_object_typed_new (UCL_ARRAY);
  2340. for (i = 0; i < session->commands->len; i ++) {
  2341. io = g_ptr_array_index (session->commands, i);
  2342. rspamd_snprintf (hexbuf, sizeof (hexbuf), "%*xs",
  2343. (gint)sizeof (io->cmd.digest), io->cmd.digest);
  2344. ucl_array_append (hashes, ucl_object_fromstring (hexbuf));
  2345. }
  2346. ucl_object_insert_key (reply, hashes, "hashes", 0, false);
  2347. rspamd_controller_send_ucl (session->http_entry, reply);
  2348. ucl_object_unref (reply);
  2349. }
  2350. }
  2351. if (session->task != NULL) {
  2352. if (session->http_entry) {
  2353. rspamd_task_free (session->task);
  2354. }
  2355. session->task = NULL;
  2356. }
  2357. }
  2358. static GPtrArray *
  2359. fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
  2360. gint c, gint flag, guint32 value, guint flags)
  2361. {
  2362. struct rspamd_mime_text_part *part;
  2363. struct rspamd_mime_part *mime_part;
  2364. struct rspamd_image *image;
  2365. struct fuzzy_cmd_io *io, *cur;
  2366. guint i, j;
  2367. GPtrArray *res = NULL;
  2368. gboolean check_part, fuzzy_check;
  2369. if (c == FUZZY_STAT) {
  2370. res = g_ptr_array_sized_new (1);
  2371. io = fuzzy_cmd_stat (rule, c, flag, value, task->task_pool);
  2372. if (io) {
  2373. g_ptr_array_add (res, io);
  2374. }
  2375. goto end;
  2376. }
  2377. if (task->message == NULL) {
  2378. goto end;
  2379. }
  2380. res = g_ptr_array_sized_new (MESSAGE_FIELD (task, parts)->len + 1);
  2381. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, mime_part) {
  2382. check_part = FALSE;
  2383. fuzzy_check = FALSE;
  2384. if (fuzzy_rule_check_mimepart (task, rule, mime_part, &check_part,
  2385. &fuzzy_check)) {
  2386. io = NULL;
  2387. if (check_part) {
  2388. if (mime_part->part_type == RSPAMD_MIME_PART_TEXT &&
  2389. !(flags & FUZZY_CHECK_FLAG_NOTEXT)) {
  2390. part = mime_part->specific.txt;
  2391. io = fuzzy_cmd_from_text_part (task, rule,
  2392. c,
  2393. flag,
  2394. value,
  2395. !fuzzy_check,
  2396. task->task_pool,
  2397. part,
  2398. mime_part);
  2399. }
  2400. else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
  2401. !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
  2402. image = mime_part->specific.img;
  2403. io = fuzzy_cmd_from_data_part (rule, c, flag, value,
  2404. task->task_pool,
  2405. image->parent->digest,
  2406. mime_part);
  2407. io->flags |= FUZZY_CMD_FLAG_IMAGE;
  2408. }
  2409. else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
  2410. const struct rspamd_lua_specific_part *lua_spec;
  2411. lua_spec = &mime_part->specific.lua_specific;
  2412. if (lua_spec->type == RSPAMD_LUA_PART_TABLE) {
  2413. lua_State *L = (lua_State *)task->cfg->lua_state;
  2414. gint old_top;
  2415. old_top = lua_gettop (L);
  2416. /* Push table */
  2417. lua_rawgeti (L, LUA_REGISTRYINDEX, lua_spec->cbref);
  2418. lua_pushstring (L, "fuzzy_hashes");
  2419. lua_gettable (L, -2);
  2420. if (lua_type (L, -1) == LUA_TTABLE) {
  2421. for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) {
  2422. const gchar *h = NULL;
  2423. gsize hlen = 0;
  2424. if (lua_isstring (L, -1)) {
  2425. h = lua_tolstring (L, -1, &hlen);
  2426. }
  2427. else if (lua_type (L, -1) == LUA_TUSERDATA) {
  2428. struct rspamd_lua_text *t;
  2429. t = lua_check_text (L, -1);
  2430. if (t) {
  2431. h = t->start;
  2432. hlen = t->len;
  2433. }
  2434. }
  2435. if (hlen == rspamd_cryptobox_HASHBYTES) {
  2436. io = fuzzy_cmd_from_data_part (rule, c,
  2437. flag, value,
  2438. task->task_pool,
  2439. (guchar *)h,
  2440. mime_part);
  2441. if (io) {
  2442. io->flags |= FUZZY_CMD_FLAG_CONTENT;
  2443. g_ptr_array_add (res, io);
  2444. }
  2445. }
  2446. }
  2447. }
  2448. lua_settop (L, old_top);
  2449. /*
  2450. * Add part itself as well
  2451. */
  2452. io = fuzzy_cmd_from_data_part (rule, c,
  2453. flag, value,
  2454. task->task_pool,
  2455. mime_part->digest,
  2456. mime_part);
  2457. }
  2458. }
  2459. else {
  2460. io = fuzzy_cmd_from_data_part (rule, c, flag, value,
  2461. task->task_pool,
  2462. mime_part->digest, mime_part);
  2463. }
  2464. if (io) {
  2465. gboolean skip_existing = FALSE;
  2466. PTR_ARRAY_FOREACH (res, j, cur) {
  2467. if (memcmp (cur->cmd.digest, io->cmd.digest,
  2468. sizeof (io->cmd.digest)) == 0) {
  2469. skip_existing = TRUE;
  2470. break;
  2471. }
  2472. }
  2473. if (!skip_existing) {
  2474. g_ptr_array_add (res, io);
  2475. }
  2476. }
  2477. }
  2478. }
  2479. }
  2480. end:
  2481. if (res && res->len == 0) {
  2482. g_ptr_array_free (res, TRUE);
  2483. return NULL;
  2484. }
  2485. return res;
  2486. }
  2487. static inline void
  2488. register_fuzzy_client_call (struct rspamd_task *task,
  2489. struct fuzzy_rule *rule,
  2490. GPtrArray *commands)
  2491. {
  2492. struct fuzzy_client_session *session;
  2493. struct upstream *selected;
  2494. rspamd_inet_addr_t *addr;
  2495. gint sock;
  2496. if (!rspamd_session_blocked (task->s)) {
  2497. /* Get upstream */
  2498. selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN,
  2499. NULL, 0);
  2500. if (selected) {
  2501. addr = rspamd_upstream_addr_next (selected);
  2502. if ((sock = rspamd_inet_address_connect (addr, SOCK_DGRAM, TRUE)) == -1) {
  2503. msg_warn_task ("cannot connect to %s(%s), %d, %s",
  2504. rspamd_upstream_name (selected),
  2505. rspamd_inet_address_to_string_pretty (addr),
  2506. errno,
  2507. strerror (errno));
  2508. rspamd_upstream_fail (selected, TRUE, strerror (errno));
  2509. g_ptr_array_free (commands, TRUE);
  2510. } else {
  2511. /* Create session for a socket */
  2512. session =
  2513. rspamd_mempool_alloc0 (task->task_pool,
  2514. sizeof (struct fuzzy_client_session));
  2515. session->state = 0;
  2516. session->commands = commands;
  2517. session->task = task;
  2518. session->fd = sock;
  2519. session->server = selected;
  2520. session->rule = rule;
  2521. session->results = g_ptr_array_sized_new (32);
  2522. session->event_loop = task->event_loop;
  2523. rspamd_ev_watcher_init (&session->ev,
  2524. sock,
  2525. EV_WRITE,
  2526. fuzzy_check_io_callback,
  2527. session);
  2528. rspamd_ev_watcher_start (session->event_loop, &session->ev,
  2529. ((double)rule->ctx->io_timeout) / 1000.0);
  2530. rspamd_session_add_event (task->s, fuzzy_io_fin, session, M);
  2531. session->item = rspamd_symcache_get_cur_item (task);
  2532. if (session->item) {
  2533. rspamd_symcache_item_async_inc (task, session->item, M);
  2534. }
  2535. }
  2536. }
  2537. }
  2538. }
  2539. /* This callback is called when we check message in fuzzy hashes storage */
  2540. static void
  2541. fuzzy_symbol_callback (struct rspamd_task *task,
  2542. struct rspamd_symcache_item *item,
  2543. void *unused)
  2544. {
  2545. struct fuzzy_rule *rule;
  2546. guint i;
  2547. GPtrArray *commands;
  2548. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  2549. if (!fuzzy_module_ctx->enabled) {
  2550. rspamd_symcache_finalize_item (task, item);
  2551. return;
  2552. }
  2553. /* Check whitelist */
  2554. if (fuzzy_module_ctx->whitelist) {
  2555. if (rspamd_match_radix_map_addr (fuzzy_module_ctx->whitelist,
  2556. task->from_addr) != NULL) {
  2557. msg_info_task ("<%s>, address %s is whitelisted, skip fuzzy check",
  2558. MESSAGE_FIELD (task, message_id),
  2559. rspamd_inet_address_to_string (task->from_addr));
  2560. rspamd_symcache_finalize_item (task, item);
  2561. return;
  2562. }
  2563. }
  2564. rspamd_symcache_item_async_inc (task, item, M);
  2565. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2566. commands = fuzzy_generate_commands (task, rule, FUZZY_CHECK, 0, 0, 0);
  2567. if (commands != NULL) {
  2568. register_fuzzy_client_call (task, rule, commands);
  2569. }
  2570. }
  2571. rspamd_symcache_item_async_dec_check (task, item, M);
  2572. }
  2573. void
  2574. fuzzy_stat_command (struct rspamd_task *task)
  2575. {
  2576. struct fuzzy_rule *rule;
  2577. guint i;
  2578. GPtrArray *commands;
  2579. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  2580. if (!fuzzy_module_ctx->enabled) {
  2581. return;
  2582. }
  2583. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2584. commands = fuzzy_generate_commands (task, rule, FUZZY_STAT, 0, 0, 0);
  2585. if (commands != NULL) {
  2586. register_fuzzy_client_call (task, rule, commands);
  2587. }
  2588. }
  2589. }
  2590. static inline gint
  2591. register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
  2592. struct fuzzy_rule *rule,
  2593. struct rspamd_task *task,
  2594. GPtrArray *commands,
  2595. gint *saved,
  2596. GError **err)
  2597. {
  2598. struct fuzzy_learn_session *s;
  2599. struct upstream *selected;
  2600. rspamd_inet_addr_t *addr;
  2601. struct rspamd_controller_session *session = entry->ud;
  2602. gint sock;
  2603. gint ret = -1;
  2604. /* Get upstream */
  2605. while ((selected = rspamd_upstream_get_forced (rule->servers,
  2606. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  2607. /* Create UDP socket */
  2608. addr = rspamd_upstream_addr_next (selected);
  2609. if ((sock = rspamd_inet_address_connect (addr,
  2610. SOCK_DGRAM, TRUE)) == -1) {
  2611. msg_warn_task ("cannot connect to fuzzy storage %s (%s rule): %s",
  2612. rspamd_inet_address_to_string_pretty (addr),
  2613. rule->name,
  2614. strerror (errno));
  2615. rspamd_upstream_fail (selected, TRUE, strerror (errno));
  2616. }
  2617. else {
  2618. s =
  2619. rspamd_mempool_alloc0 (session->pool,
  2620. sizeof (struct fuzzy_learn_session));
  2621. s->task = task;
  2622. s->commands = commands;
  2623. s->http_entry = entry;
  2624. s->server = selected;
  2625. s->saved = saved;
  2626. s->fd = sock;
  2627. s->err = err;
  2628. s->rule = rule;
  2629. s->event_loop = task->event_loop;
  2630. /* We ref connection to avoid freeing before we process fuzzy rule */
  2631. rspamd_http_connection_ref (entry->conn);
  2632. rspamd_ev_watcher_init (&s->ev,
  2633. sock,
  2634. EV_WRITE,
  2635. fuzzy_controller_io_callback,
  2636. s);
  2637. rspamd_ev_watcher_start (s->event_loop, &s->ev,
  2638. ((double)rule->ctx->io_timeout) / 1000.0);
  2639. (*saved)++;
  2640. ret = 1;
  2641. }
  2642. }
  2643. return ret;
  2644. }
  2645. static void
  2646. fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
  2647. struct rspamd_http_message *msg, gint cmd, gint value, gint flag,
  2648. struct fuzzy_ctx *ctx, gboolean is_hash, guint flags)
  2649. {
  2650. struct fuzzy_rule *rule;
  2651. struct rspamd_controller_session *session = conn_ent->ud;
  2652. struct rspamd_task *task, **ptask;
  2653. gboolean processed = FALSE, skip = FALSE;
  2654. gint res = 0;
  2655. guint i;
  2656. GError **err;
  2657. GPtrArray *commands;
  2658. lua_State *L;
  2659. gint r, *saved, rules = 0, err_idx;
  2660. struct fuzzy_ctx *fuzzy_module_ctx;
  2661. /* Prepare task */
  2662. task = rspamd_task_new (session->wrk, session->cfg, NULL,
  2663. session->lang_det, conn_ent->rt->event_loop, FALSE);
  2664. task->cfg = ctx->cfg;
  2665. saved = rspamd_mempool_alloc0 (session->pool, sizeof (gint));
  2666. err = rspamd_mempool_alloc0 (session->pool, sizeof (GError *));
  2667. fuzzy_module_ctx = fuzzy_get_context (ctx->cfg);
  2668. if (!is_hash) {
  2669. /* Allocate message from string */
  2670. /* XXX: what about encrypted messages ? */
  2671. task->msg.begin = msg->body_buf.begin;
  2672. task->msg.len = msg->body_buf.len;
  2673. r = rspamd_message_parse (task);
  2674. if (r == -1) {
  2675. msg_warn_task ("<%s>: cannot process message for fuzzy",
  2676. MESSAGE_FIELD (task, message_id));
  2677. rspamd_task_free (task);
  2678. rspamd_controller_send_error (conn_ent, 400,
  2679. "Message processing error");
  2680. return;
  2681. }
  2682. rspamd_message_process (task);
  2683. }
  2684. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2685. if (rule->read_only) {
  2686. continue;
  2687. }
  2688. /* Check for flag */
  2689. if (g_hash_table_lookup (rule->mappings,
  2690. GINT_TO_POINTER (flag)) == NULL) {
  2691. msg_info_task ("skip rule %s as it has no flag %d defined"
  2692. " false", rule->name, flag);
  2693. continue;
  2694. }
  2695. /* Check learn condition */
  2696. if (rule->learn_condition_cb != -1) {
  2697. skip = FALSE;
  2698. L = session->cfg->lua_state;
  2699. lua_pushcfunction (L, &rspamd_lua_traceback);
  2700. err_idx = lua_gettop (L);
  2701. lua_rawgeti (L, LUA_REGISTRYINDEX, rule->learn_condition_cb);
  2702. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  2703. *ptask = task;
  2704. rspamd_lua_setclass (L, "rspamd{task}", -1);
  2705. if (lua_pcall (L, 1, LUA_MULTRET, err_idx) != 0) {
  2706. msg_err_task ("call to fuzzy learn condition failed: %s",
  2707. lua_tostring (L, -1));
  2708. }
  2709. else {
  2710. if (lua_gettop (L) > err_idx + 1) {
  2711. /* 2 return values */
  2712. skip = !(lua_toboolean (L, err_idx + 1));
  2713. if (lua_isnumber (L, err_idx + 2)) {
  2714. msg_info_task ("learn condition changed flag from %d to "
  2715. "%d", flag,
  2716. (gint)lua_tonumber (L, err_idx + 2));
  2717. flag = lua_tonumber (L, err_idx + 2);
  2718. }
  2719. }
  2720. else {
  2721. if (lua_isboolean (L, err_idx + 1)) {
  2722. skip = !(lua_toboolean (L, err_idx + 1));
  2723. }
  2724. else {
  2725. msg_warn_task ("set skip for rule %s as its condition "
  2726. "callback returned"
  2727. " a valid boolean", rule->name);
  2728. skip = TRUE;
  2729. }
  2730. }
  2731. }
  2732. /* Result + error function */
  2733. lua_settop (L, 0);
  2734. if (skip) {
  2735. msg_info_task ("skip rule %s by condition callback",
  2736. rule->name);
  2737. continue;
  2738. }
  2739. }
  2740. rules ++;
  2741. res = 0;
  2742. if (is_hash) {
  2743. GPtrArray *args;
  2744. const rspamd_ftok_t *arg;
  2745. guint j;
  2746. args = rspamd_http_message_find_header_multiple (msg, "Hash");
  2747. if (args) {
  2748. struct fuzzy_cmd_io *io;
  2749. commands = g_ptr_array_sized_new (args->len);
  2750. for (j = 0; j < args->len; j ++) {
  2751. arg = g_ptr_array_index (args, j);
  2752. io = fuzzy_cmd_hash (rule, cmd, arg, flag, value,
  2753. task->task_pool);
  2754. if (io) {
  2755. g_ptr_array_add (commands, io);
  2756. }
  2757. }
  2758. res = register_fuzzy_controller_call (conn_ent,
  2759. rule,
  2760. task,
  2761. commands,
  2762. saved,
  2763. err);
  2764. rspamd_mempool_add_destructor (task->task_pool,
  2765. rspamd_ptr_array_free_hard, commands);
  2766. g_ptr_array_free (args, TRUE);
  2767. }
  2768. else {
  2769. rspamd_controller_send_error (conn_ent, 400,
  2770. "No hash defined");
  2771. rspamd_task_free (task);
  2772. return;
  2773. }
  2774. }
  2775. else {
  2776. commands = fuzzy_generate_commands (task, rule, cmd, flag, value,
  2777. flags);
  2778. if (commands != NULL) {
  2779. res = register_fuzzy_controller_call (conn_ent,
  2780. rule,
  2781. task,
  2782. commands,
  2783. saved,
  2784. err);
  2785. rspamd_mempool_add_destructor (task->task_pool,
  2786. rspamd_ptr_array_free_hard, commands);
  2787. }
  2788. }
  2789. if (res > 0) {
  2790. processed = TRUE;
  2791. }
  2792. }
  2793. if (res == -1) {
  2794. if (!processed) {
  2795. msg_warn_task ("cannot send fuzzy request: %s",
  2796. strerror (errno));
  2797. rspamd_controller_send_error (conn_ent, 400, "Message sending error");
  2798. rspamd_task_free (task);
  2799. return;
  2800. }
  2801. else {
  2802. /* Some rules failed and some rules are OK */
  2803. msg_warn_task ("some rules are not processed, but we still sent this request");
  2804. }
  2805. }
  2806. else if (!processed) {
  2807. if (rules) {
  2808. msg_warn_task ("no content to generate fuzzy");
  2809. rspamd_controller_send_error (conn_ent, 404,
  2810. "No content to generate fuzzy for flag %d", flag);
  2811. }
  2812. else {
  2813. if (skip) {
  2814. rspamd_controller_send_error (conn_ent, 403,
  2815. "Message is conditionally skipped for flag %d", flag);
  2816. }
  2817. else {
  2818. msg_warn_task ("no fuzzy rules found for flag %d", flag);
  2819. rspamd_controller_send_error (conn_ent, 404,
  2820. "No fuzzy rules matched for flag %d", flag);
  2821. }
  2822. }
  2823. rspamd_task_free (task);
  2824. }
  2825. }
  2826. static int
  2827. fuzzy_controller_handler (struct rspamd_http_connection_entry *conn_ent,
  2828. struct rspamd_http_message *msg, struct module_ctx *ctx, gint cmd,
  2829. gboolean is_hash)
  2830. {
  2831. const rspamd_ftok_t *arg;
  2832. glong value = 1, flag = 0, send_flags = 0;
  2833. struct fuzzy_ctx *fuzzy_module_ctx = (struct fuzzy_ctx *)ctx;
  2834. if (!fuzzy_module_ctx->enabled) {
  2835. msg_err ("fuzzy_check module is not enabled");
  2836. rspamd_controller_send_error (conn_ent, 500, "Module disabled");
  2837. return 0;
  2838. }
  2839. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  2840. msg_err ("fuzzy_check module has no rules defined");
  2841. rspamd_controller_send_error (conn_ent, 500, "Module has no rules");
  2842. return 0;
  2843. }
  2844. /* Get size */
  2845. arg = rspamd_http_message_find_header (msg, "Weight");
  2846. if (arg) {
  2847. errno = 0;
  2848. if (!rspamd_strtol (arg->begin, arg->len, &value)) {
  2849. msg_info ("error converting numeric argument %T", arg);
  2850. }
  2851. }
  2852. arg = rspamd_http_message_find_header (msg, "Flag");
  2853. if (arg) {
  2854. errno = 0;
  2855. if (!rspamd_strtol (arg->begin, arg->len, &flag)) {
  2856. msg_info ("error converting numeric argument %T", arg);
  2857. flag = 0;
  2858. }
  2859. }
  2860. else {
  2861. flag = 0;
  2862. arg = rspamd_http_message_find_header (msg, "Symbol");
  2863. /* Search flag by symbol */
  2864. if (arg) {
  2865. struct fuzzy_rule *rule;
  2866. guint i;
  2867. GHashTableIter it;
  2868. gpointer k, v;
  2869. struct fuzzy_mapping *map;
  2870. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2871. if (flag != 0) {
  2872. break;
  2873. }
  2874. g_hash_table_iter_init (&it, rule->mappings);
  2875. while (g_hash_table_iter_next (&it, &k, &v)) {
  2876. map = v;
  2877. if (strlen (map->symbol) == arg->len &&
  2878. rspamd_lc_cmp (map->symbol, arg->begin, arg->len) == 0) {
  2879. flag = map->fuzzy_flag;
  2880. break;
  2881. }
  2882. }
  2883. }
  2884. }
  2885. }
  2886. if (flag == 0) {
  2887. msg_err ("no flag defined to learn fuzzy");
  2888. rspamd_controller_send_error (conn_ent, 404, "Unknown or missing flag");
  2889. return 0;
  2890. }
  2891. arg = rspamd_http_message_find_header (msg, "Skip-Images");
  2892. if (arg) {
  2893. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  2894. }
  2895. arg = rspamd_http_message_find_header (msg, "Skip-Attachments");
  2896. if (arg) {
  2897. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  2898. }
  2899. arg = rspamd_http_message_find_header (msg, "Skip-Text");
  2900. if (arg) {
  2901. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  2902. }
  2903. fuzzy_process_handler (conn_ent, msg, cmd, value, flag,
  2904. (struct fuzzy_ctx *)ctx, is_hash, send_flags);
  2905. return 0;
  2906. }
  2907. static inline gint
  2908. fuzzy_check_send_lua_learn (struct fuzzy_rule *rule,
  2909. struct rspamd_task *task,
  2910. GPtrArray *commands,
  2911. gint *saved,
  2912. GError **err)
  2913. {
  2914. struct fuzzy_learn_session *s;
  2915. struct upstream *selected;
  2916. rspamd_inet_addr_t *addr;
  2917. gint sock;
  2918. gint ret = -1;
  2919. /* Get upstream */
  2920. if (!rspamd_session_blocked (task->s)) {
  2921. while ((selected = rspamd_upstream_get (rule->servers,
  2922. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  2923. /* Create UDP socket */
  2924. addr = rspamd_upstream_addr_next (selected);
  2925. if ((sock = rspamd_inet_address_connect (addr,
  2926. SOCK_DGRAM, TRUE)) == -1) {
  2927. rspamd_upstream_fail (selected, TRUE, strerror (errno));
  2928. } else {
  2929. s =
  2930. rspamd_mempool_alloc0 (task->task_pool,
  2931. sizeof (struct fuzzy_learn_session));
  2932. s->task = task;
  2933. s->commands = commands;
  2934. s->http_entry = NULL;
  2935. s->server = selected;
  2936. s->saved = saved;
  2937. s->fd = sock;
  2938. s->err = err;
  2939. s->rule = rule;
  2940. s->session = task->s;
  2941. s->event_loop = task->event_loop;
  2942. rspamd_ev_watcher_init (&s->ev,
  2943. sock,
  2944. EV_WRITE,
  2945. fuzzy_controller_io_callback,
  2946. s);
  2947. rspamd_ev_watcher_start (s->event_loop, &s->ev,
  2948. ((double)rule->ctx->io_timeout) / 1000.0);
  2949. rspamd_session_add_event (task->s,
  2950. fuzzy_lua_fin,
  2951. s,
  2952. M);
  2953. (*saved)++;
  2954. ret = 1;
  2955. }
  2956. }
  2957. }
  2958. return ret;
  2959. }
  2960. static gboolean
  2961. fuzzy_check_lua_process_learn (struct rspamd_task *task,
  2962. gint cmd, gint value, gint flag, guint send_flags)
  2963. {
  2964. struct fuzzy_rule *rule;
  2965. gboolean processed = FALSE, res = TRUE;
  2966. guint i;
  2967. GError **err;
  2968. GPtrArray *commands;
  2969. gint *saved, rules = 0;
  2970. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  2971. saved = rspamd_mempool_alloc0 (task->task_pool, sizeof (gint));
  2972. err = rspamd_mempool_alloc0 (task->task_pool, sizeof (GError *));
  2973. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2974. if (!res) {
  2975. break;
  2976. }
  2977. if (rule->read_only) {
  2978. continue;
  2979. }
  2980. /* Check for flag */
  2981. if (g_hash_table_lookup (rule->mappings,
  2982. GINT_TO_POINTER (flag)) == NULL) {
  2983. msg_info_task ("skip rule %s as it has no flag %d defined"
  2984. " false", rule->name, flag);
  2985. continue;
  2986. }
  2987. rules ++;
  2988. res = 0;
  2989. commands = fuzzy_generate_commands (task, rule, cmd, flag,
  2990. value, send_flags);
  2991. if (commands != NULL) {
  2992. res = fuzzy_check_send_lua_learn (rule, task, commands,
  2993. saved, err);
  2994. rspamd_mempool_add_destructor (task->task_pool,
  2995. rspamd_ptr_array_free_hard, commands);
  2996. }
  2997. if (res) {
  2998. processed = TRUE;
  2999. }
  3000. }
  3001. if (res == -1) {
  3002. msg_warn_task ("cannot send fuzzy request: %s",
  3003. strerror (errno));
  3004. }
  3005. else if (!processed) {
  3006. if (rules) {
  3007. msg_warn_task ("no content to generate fuzzy");
  3008. return FALSE;
  3009. }
  3010. else {
  3011. msg_warn_task ("no fuzzy rules found for flag %d", flag);
  3012. return FALSE;
  3013. }
  3014. }
  3015. return TRUE;
  3016. }
  3017. static gint
  3018. fuzzy_lua_learn_handler (lua_State *L)
  3019. {
  3020. struct rspamd_task *task = lua_check_task (L, 1);
  3021. guint flag = 0, weight = 1, send_flags = 0;
  3022. const gchar *symbol;
  3023. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  3024. if (task) {
  3025. if (lua_type (L, 2) == LUA_TNUMBER) {
  3026. flag = lua_tonumber (L, 2);
  3027. }
  3028. else if (lua_type (L, 2) == LUA_TSTRING) {
  3029. struct fuzzy_rule *rule;
  3030. guint i;
  3031. GHashTableIter it;
  3032. gpointer k, v;
  3033. struct fuzzy_mapping *map;
  3034. symbol = lua_tostring (L, 2);
  3035. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3036. if (flag != 0) {
  3037. break;
  3038. }
  3039. g_hash_table_iter_init (&it, rule->mappings);
  3040. while (g_hash_table_iter_next (&it, &k, &v)) {
  3041. map = v;
  3042. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  3043. flag = map->fuzzy_flag;
  3044. break;
  3045. }
  3046. }
  3047. }
  3048. }
  3049. if (flag == 0) {
  3050. return luaL_error (L, "bad flag");
  3051. }
  3052. if (lua_type (L, 3) == LUA_TNUMBER) {
  3053. weight = lua_tonumber (L, 3);
  3054. }
  3055. if (lua_type (L, 4) == LUA_TTABLE) {
  3056. const gchar *sf;
  3057. for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
  3058. sf = lua_tostring (L, -1);
  3059. if (sf) {
  3060. if (g_ascii_strcasecmp (sf, "noimages") == 0) {
  3061. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3062. }
  3063. else if (g_ascii_strcasecmp (sf, "noattachments") == 0) {
  3064. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3065. }
  3066. else if (g_ascii_strcasecmp (sf, "notext") == 0) {
  3067. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3068. }
  3069. }
  3070. }
  3071. }
  3072. lua_pushboolean (L,
  3073. fuzzy_check_lua_process_learn (task, FUZZY_WRITE, weight, flag,
  3074. send_flags));
  3075. }
  3076. else {
  3077. return luaL_error (L, "invalid arguments");
  3078. }
  3079. return 1;
  3080. }
  3081. static gint
  3082. fuzzy_lua_unlearn_handler (lua_State *L)
  3083. {
  3084. struct rspamd_task *task = lua_check_task (L, 1);
  3085. guint flag = 0, weight = 1.0, send_flags = 0;
  3086. const gchar *symbol;
  3087. struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
  3088. if (task) {
  3089. if (lua_type (L, 2) == LUA_TNUMBER) {
  3090. flag = lua_tonumber (L, 1);
  3091. }
  3092. else if (lua_type (L, 2) == LUA_TSTRING) {
  3093. struct fuzzy_rule *rule;
  3094. guint i;
  3095. GHashTableIter it;
  3096. gpointer k, v;
  3097. struct fuzzy_mapping *map;
  3098. symbol = lua_tostring (L, 2);
  3099. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  3100. if (flag != 0) {
  3101. break;
  3102. }
  3103. g_hash_table_iter_init (&it, rule->mappings);
  3104. while (g_hash_table_iter_next (&it, &k, &v)) {
  3105. map = v;
  3106. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  3107. flag = map->fuzzy_flag;
  3108. break;
  3109. }
  3110. }
  3111. }
  3112. }
  3113. if (flag == 0) {
  3114. return luaL_error (L, "bad flag");
  3115. }
  3116. if (lua_type (L, 3) == LUA_TNUMBER) {
  3117. weight = lua_tonumber (L, 3);
  3118. }
  3119. if (lua_type (L, 4) == LUA_TTABLE) {
  3120. const gchar *sf;
  3121. for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
  3122. sf = lua_tostring (L, -1);
  3123. if (sf) {
  3124. if (g_ascii_strcasecmp (sf, "noimages") == 0) {
  3125. send_flags |= FUZZY_CHECK_FLAG_NOIMAGES;
  3126. }
  3127. else if (g_ascii_strcasecmp (sf, "noattachments") == 0) {
  3128. send_flags |= FUZZY_CHECK_FLAG_NOATTACHMENTS;
  3129. }
  3130. else if (g_ascii_strcasecmp (sf, "notext") == 0) {
  3131. send_flags |= FUZZY_CHECK_FLAG_NOTEXT;
  3132. }
  3133. }
  3134. }
  3135. }
  3136. lua_pushboolean (L,
  3137. fuzzy_check_lua_process_learn (task, FUZZY_DEL, weight, flag,
  3138. send_flags));
  3139. }
  3140. else {
  3141. return luaL_error (L, "invalid arguments");
  3142. }
  3143. return 1;
  3144. }
  3145. static gboolean
  3146. fuzzy_add_handler (struct rspamd_http_connection_entry *conn_ent,
  3147. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3148. {
  3149. return fuzzy_controller_handler (conn_ent, msg,
  3150. ctx, FUZZY_WRITE, FALSE);
  3151. }
  3152. static gboolean
  3153. fuzzy_delete_handler (struct rspamd_http_connection_entry *conn_ent,
  3154. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3155. {
  3156. return fuzzy_controller_handler (conn_ent, msg,
  3157. ctx, FUZZY_DEL, FALSE);
  3158. }
  3159. static gboolean
  3160. fuzzy_deletehash_handler (struct rspamd_http_connection_entry *conn_ent,
  3161. struct rspamd_http_message *msg, struct module_ctx *ctx)
  3162. {
  3163. return fuzzy_controller_handler (conn_ent, msg,
  3164. ctx, FUZZY_DEL, TRUE);
  3165. }
  3166. static int
  3167. fuzzy_attach_controller (struct module_ctx *ctx, GHashTable *commands)
  3168. {
  3169. struct fuzzy_ctx *fctx = (struct fuzzy_ctx *)ctx;
  3170. struct rspamd_custom_controller_command *cmd;
  3171. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  3172. cmd->privilleged = TRUE;
  3173. cmd->require_message = TRUE;
  3174. cmd->handler = fuzzy_add_handler;
  3175. cmd->ctx = ctx;
  3176. g_hash_table_insert (commands, "/fuzzyadd", cmd);
  3177. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  3178. cmd->privilleged = TRUE;
  3179. cmd->require_message = TRUE;
  3180. cmd->handler = fuzzy_delete_handler;
  3181. cmd->ctx = ctx;
  3182. g_hash_table_insert (commands, "/fuzzydel", cmd);
  3183. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  3184. cmd->privilleged = TRUE;
  3185. cmd->require_message = FALSE;
  3186. cmd->handler = fuzzy_deletehash_handler;
  3187. cmd->ctx = ctx;
  3188. g_hash_table_insert (commands, "/fuzzydelhash", cmd);
  3189. return 0;
  3190. }