You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fuzzy_check.c 73KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:fuzzy
  17. * rspamd module that checks fuzzy checksums for messages
  18. *
  19. * Allowed options:
  20. * - symbol (string): symbol to insert (default: 'R_FUZZY')
  21. * - max_score (double): maximum score to that weights of hashes would be normalized (default: 0 - no normalization)
  22. *
  23. * - fuzzy_map (string): a string that contains map in format { fuzzy_key => [ symbol, weight ] } where fuzzy_key is number of
  24. * fuzzy list. This string itself should be in format 1:R_FUZZY_SAMPLE1:10,2:R_FUZZY_SAMPLE2:1 etc, where first number is fuzzy
  25. * key, second is symbol to insert and third - weight for normalization
  26. *
  27. * - min_length (integer): minimum length (in characters) for text part to be checked for fuzzy hash (default: 0 - no limit)
  28. * - whitelist (map string): map of ip addresses that should not be checked with this module
  29. * - servers (string): list of fuzzy servers in format "server1:port,server2:port" - these servers would be used for checking and storing
  30. * fuzzy hashes
  31. */
  32. #include "config.h"
  33. #include "libmime/message.h"
  34. #include "libutil/map.h"
  35. #include "libmime/images.h"
  36. #include "libserver/worker_util.h"
  37. #include "fuzzy_wire.h"
  38. #include "utlist.h"
  39. #include "cryptobox.h"
  40. #include "ottery.h"
  41. #include "keypair.h"
  42. #include "lua/lua_common.h"
  43. #include "unix-std.h"
  44. #include "libutil/http_private.h"
  45. #include <math.h>
  46. #define DEFAULT_SYMBOL "R_FUZZY_HASH"
  47. #define DEFAULT_UPSTREAM_ERROR_TIME 10
  48. #define DEFAULT_UPSTREAM_DEAD_TIME 300
  49. #define DEFAULT_UPSTREAM_MAXERRORS 10
  50. #define DEFAULT_IO_TIMEOUT 500
  51. #define DEFAULT_RETRANSMITS 3
  52. #define DEFAULT_PORT 11335
  53. #define RSPAMD_FUZZY_PLUGIN_VERSION RSPAMD_FUZZY_VERSION
  54. static const gint rspamd_fuzzy_hash_len = 5;
  55. struct fuzzy_mapping {
  56. guint64 fuzzy_flag;
  57. const gchar *symbol;
  58. double weight;
  59. };
  60. struct fuzzy_mime_type {
  61. rspamd_regexp_t *type_re;
  62. rspamd_regexp_t *subtype_re;
  63. };
  64. struct fuzzy_rule {
  65. struct upstream_list *servers;
  66. const gchar *symbol;
  67. const gchar *algorithm_str;
  68. const gchar *name;
  69. enum rspamd_shingle_alg alg;
  70. GHashTable *mappings;
  71. GPtrArray *mime_types;
  72. GPtrArray *fuzzy_headers;
  73. GString *hash_key;
  74. GString *shingles_key;
  75. struct rspamd_cryptobox_keypair *local_key;
  76. struct rspamd_cryptobox_pubkey *peer_key;
  77. double max_score;
  78. gboolean read_only;
  79. gboolean skip_unknown;
  80. gint learn_condition_cb;
  81. };
  82. struct fuzzy_ctx {
  83. struct module_ctx ctx;
  84. rspamd_mempool_t *fuzzy_pool;
  85. GPtrArray *fuzzy_rules;
  86. struct rspamd_config *cfg;
  87. const gchar *default_symbol;
  88. guint32 min_hash_len;
  89. radix_compressed_t *whitelist;
  90. struct rspamd_keypair_cache *keypairs_cache;
  91. guint32 min_bytes;
  92. guint32 min_height;
  93. guint32 min_width;
  94. guint32 io_timeout;
  95. guint32 retransmits;
  96. gboolean enabled;
  97. };
  98. struct fuzzy_client_session {
  99. GPtrArray *commands;
  100. struct rspamd_task *task;
  101. struct upstream *server;
  102. rspamd_inet_addr_t *addr;
  103. struct fuzzy_rule *rule;
  104. struct event ev;
  105. struct event timev;
  106. struct timeval tv;
  107. gint state;
  108. gint fd;
  109. guint retransmits;
  110. };
  111. struct fuzzy_learn_session {
  112. GPtrArray *commands;
  113. gint *saved;
  114. GError **err;
  115. struct rspamd_http_connection_entry *http_entry;
  116. struct rspamd_async_session *session;
  117. struct upstream *server;
  118. rspamd_inet_addr_t *addr;
  119. struct fuzzy_rule *rule;
  120. struct rspamd_task *task;
  121. struct event ev;
  122. struct event timev;
  123. struct timeval tv;
  124. gint fd;
  125. guint retransmits;
  126. };
  127. #define FUZZY_CMD_FLAG_REPLIED (1 << 0)
  128. #define FUZZY_CMD_FLAG_SENT (1 << 1)
  129. #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
  130. struct fuzzy_cmd_io {
  131. guint32 tag;
  132. guint32 flags;
  133. struct rspamd_fuzzy_cmd cmd;
  134. struct iovec io;
  135. };
  136. static struct fuzzy_ctx *fuzzy_module_ctx = NULL;
  137. static const char *default_headers = "Subject,Content-Type,Reply-To,X-Mailer";
  138. static void fuzzy_symbol_callback (struct rspamd_task *task, void *unused);
  139. /* Initialization */
  140. gint fuzzy_check_module_init (struct rspamd_config *cfg,
  141. struct module_ctx **ctx);
  142. gint fuzzy_check_module_config (struct rspamd_config *cfg);
  143. gint fuzzy_check_module_reconfig (struct rspamd_config *cfg);
  144. static gint fuzzy_attach_controller (struct module_ctx *ctx,
  145. GHashTable *commands);
  146. static gint fuzzy_lua_learn_handler (lua_State *L);
  147. static gint fuzzy_lua_unlearn_handler (lua_State *L);
  148. module_t fuzzy_check_module = {
  149. "fuzzy_check",
  150. fuzzy_check_module_init,
  151. fuzzy_check_module_config,
  152. fuzzy_check_module_reconfig,
  153. fuzzy_attach_controller,
  154. RSPAMD_MODULE_VER
  155. };
  156. static void
  157. parse_flags (struct fuzzy_rule *rule,
  158. struct rspamd_config *cfg,
  159. const ucl_object_t *val,
  160. gint cb_id)
  161. {
  162. const ucl_object_t *elt;
  163. struct fuzzy_mapping *map;
  164. const gchar *sym = NULL;
  165. if (val->type == UCL_STRING) {
  166. msg_err_config (
  167. "string mappings are deprecated and no longer supported, use new style configuration");
  168. }
  169. else if (val->type == UCL_OBJECT) {
  170. elt = ucl_object_lookup (val, "symbol");
  171. if (elt == NULL || !ucl_object_tostring_safe (elt, &sym)) {
  172. sym = ucl_object_key (val);
  173. }
  174. if (sym != NULL) {
  175. map =
  176. rspamd_mempool_alloc (fuzzy_module_ctx->fuzzy_pool,
  177. sizeof (struct fuzzy_mapping));
  178. map->symbol = sym;
  179. elt = ucl_object_lookup (val, "flag");
  180. if (elt != NULL) {
  181. map->fuzzy_flag = ucl_obj_toint (elt);
  182. elt = ucl_object_lookup (val, "max_score");
  183. if (elt != NULL) {
  184. map->weight = ucl_obj_todouble (elt);
  185. }
  186. else {
  187. map->weight = rule->max_score;
  188. }
  189. /* Add flag to hash table */
  190. g_hash_table_insert (rule->mappings,
  191. GINT_TO_POINTER (map->fuzzy_flag), map);
  192. rspamd_symbols_cache_add_symbol (cfg->cache,
  193. map->symbol, 0,
  194. NULL, NULL,
  195. SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE,
  196. cb_id);
  197. }
  198. else {
  199. msg_err_config ("fuzzy_map parameter has no flag definition");
  200. }
  201. }
  202. else {
  203. msg_err_config ("fuzzy_map parameter has no symbol definition");
  204. }
  205. }
  206. else {
  207. msg_err_config ("fuzzy_map parameter is of an unsupported type");
  208. }
  209. }
  210. static GPtrArray *
  211. parse_mime_types (const gchar *str)
  212. {
  213. gchar **strvec, *p;
  214. gint num, i;
  215. struct fuzzy_mime_type *type;
  216. GPtrArray *res = g_ptr_array_new ();
  217. strvec = g_strsplit_set (str, ",", 0);
  218. num = g_strv_length (strvec);
  219. res = g_ptr_array_sized_new (num);
  220. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  221. rspamd_ptr_array_free_hard, res);
  222. for (i = 0; i < num; i++) {
  223. g_strstrip (strvec[i]);
  224. if ((p = strchr (strvec[i], '/')) != NULL) {
  225. type = rspamd_mempool_alloc (fuzzy_module_ctx->fuzzy_pool,
  226. sizeof (struct fuzzy_mime_type));
  227. type->type_re = rspamd_regexp_from_glob (strvec[i], p - strvec[i],
  228. NULL);
  229. type->subtype_re = rspamd_regexp_from_glob (p + 1, 0, NULL);
  230. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  231. (rspamd_mempool_destruct_t)rspamd_regexp_unref,
  232. type->type_re);
  233. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  234. (rspamd_mempool_destruct_t)rspamd_regexp_unref,
  235. type->subtype_re);
  236. g_ptr_array_add (res, type);
  237. }
  238. else {
  239. type = rspamd_mempool_alloc (fuzzy_module_ctx->fuzzy_pool,
  240. sizeof (struct fuzzy_mime_type));
  241. type->type_re = rspamd_regexp_from_glob (strvec[i], 0, NULL);
  242. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  243. (rspamd_mempool_destruct_t)rspamd_regexp_unref,
  244. type->type_re);
  245. type->subtype_re = NULL;
  246. g_ptr_array_add (res, type);
  247. }
  248. }
  249. g_strfreev (strvec);
  250. return res;
  251. }
  252. static GPtrArray *
  253. parse_fuzzy_headers (const gchar *str)
  254. {
  255. gchar **strvec;
  256. gint num, i;
  257. GPtrArray *res;
  258. strvec = g_strsplit_set (str, ",", 0);
  259. num = g_strv_length (strvec);
  260. res = g_ptr_array_sized_new (num);
  261. for (i = 0; i < num; i++) {
  262. g_strstrip (strvec[i]);
  263. g_ptr_array_add (res, rspamd_mempool_strdup (
  264. fuzzy_module_ctx->fuzzy_pool, strvec[i]));
  265. }
  266. g_strfreev (strvec);
  267. return res;
  268. }
  269. static gboolean
  270. fuzzy_check_content_type (struct fuzzy_rule *rule, struct rspamd_content_type *ct)
  271. {
  272. struct fuzzy_mime_type *ft;
  273. guint i;
  274. PTR_ARRAY_FOREACH (rule->mime_types, i, ft) {
  275. if (ft->type_re) {
  276. if (rspamd_regexp_match (ft->type_re, ct->type.begin, ct->type.len,
  277. TRUE)) {
  278. if (ft->subtype_re) {
  279. if (rspamd_regexp_match (ft->subtype_re, ct->subtype.begin,
  280. ct->subtype.len, TRUE)) {
  281. return TRUE;
  282. }
  283. }
  284. else {
  285. return TRUE;
  286. }
  287. }
  288. }
  289. }
  290. return FALSE;
  291. }
  292. static double
  293. fuzzy_normalize (gint32 in, double weight)
  294. {
  295. if (weight == 0) {
  296. return 0;
  297. }
  298. #ifdef HAVE_TANH
  299. return tanh (G_E * (double)in / weight);
  300. #else
  301. return (in < weight ? in / weight : weight);
  302. #endif
  303. }
  304. static struct fuzzy_rule *
  305. fuzzy_rule_new (const char *default_symbol, rspamd_mempool_t *pool)
  306. {
  307. struct fuzzy_rule *rule;
  308. rule = rspamd_mempool_alloc0 (pool, sizeof (struct fuzzy_rule));
  309. rule->mappings = g_hash_table_new (g_direct_hash, g_direct_equal);
  310. rule->symbol = default_symbol;
  311. rspamd_mempool_add_destructor (pool,
  312. (rspamd_mempool_destruct_t)g_hash_table_unref,
  313. rule->mappings);
  314. rule->read_only = FALSE;
  315. return rule;
  316. }
  317. static void
  318. fuzzy_free_rule (gpointer r)
  319. {
  320. struct fuzzy_rule *rule = (struct fuzzy_rule *)r;
  321. g_string_free (rule->hash_key, TRUE);
  322. g_string_free (rule->shingles_key, TRUE);
  323. if (rule->local_key) {
  324. rspamd_keypair_unref (rule->local_key);
  325. }
  326. if (rule->peer_key) {
  327. rspamd_pubkey_unref (rule->peer_key);
  328. }
  329. }
  330. static gint
  331. fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj,
  332. const gchar *name, gint cb_id)
  333. {
  334. const ucl_object_t *value, *cur;
  335. struct fuzzy_rule *rule;
  336. ucl_object_iter_t it = NULL;
  337. const char *k = NULL, *key_str = NULL, *shingles_key_str = NULL, *lua_script;
  338. if (obj->type != UCL_OBJECT) {
  339. msg_err_config ("invalid rule definition");
  340. return -1;
  341. }
  342. rule = fuzzy_rule_new (fuzzy_module_ctx->default_symbol,
  343. fuzzy_module_ctx->fuzzy_pool);
  344. rule->learn_condition_cb = -1;
  345. rule->alg = RSPAMD_SHINGLES_OLD;
  346. if ((value = ucl_object_lookup (obj, "mime_types")) != NULL) {
  347. it = NULL;
  348. while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY))
  349. != NULL) {
  350. GPtrArray *tmp;
  351. guint i;
  352. gpointer ptr;
  353. tmp = parse_mime_types (ucl_obj_tostring (cur));
  354. if (tmp) {
  355. if (rule->mime_types) {
  356. PTR_ARRAY_FOREACH (tmp, i, ptr) {
  357. g_ptr_array_add (rule->mime_types, ptr);
  358. }
  359. g_ptr_array_free (tmp, TRUE);
  360. }
  361. else {
  362. rule->mime_types = tmp;
  363. }
  364. }
  365. }
  366. }
  367. if (rule->mime_types != NULL) {
  368. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  369. (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard,
  370. rule->mime_types);
  371. }
  372. if ((value = ucl_object_lookup (obj, "headers")) != NULL) {
  373. it = NULL;
  374. while ((cur = ucl_object_iterate (value, &it, value->type == UCL_ARRAY))
  375. != NULL) {
  376. GPtrArray *tmp;
  377. guint i;
  378. gpointer ptr;
  379. tmp = parse_fuzzy_headers (ucl_obj_tostring (cur));
  380. if (tmp) {
  381. if (rule->fuzzy_headers) {
  382. PTR_ARRAY_FOREACH (tmp, i, ptr) {
  383. g_ptr_array_add (rule->fuzzy_headers, ptr);
  384. }
  385. g_ptr_array_free (tmp, TRUE);
  386. }
  387. else {
  388. rule->fuzzy_headers = tmp;
  389. }
  390. }
  391. }
  392. }
  393. else {
  394. rule->fuzzy_headers = parse_fuzzy_headers (default_headers);
  395. }
  396. if (rule->fuzzy_headers != NULL) {
  397. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  398. (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
  399. rule->fuzzy_headers);
  400. }
  401. if ((value = ucl_object_lookup (obj, "max_score")) != NULL) {
  402. rule->max_score = ucl_obj_todouble (value);
  403. }
  404. if ((value = ucl_object_lookup (obj, "symbol")) != NULL) {
  405. rule->symbol = ucl_obj_tostring (value);
  406. }
  407. if (name) {
  408. rule->name = name;
  409. }
  410. else {
  411. rule->name = rule->symbol;
  412. }
  413. if ((value = ucl_object_lookup (obj, "read_only")) != NULL) {
  414. rule->read_only = ucl_obj_toboolean (value);
  415. }
  416. if ((value = ucl_object_lookup (obj, "skip_unknown")) != NULL) {
  417. rule->skip_unknown = ucl_obj_toboolean (value);
  418. }
  419. if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) {
  420. rule->algorithm_str = ucl_object_tostring (value);
  421. if (rule->algorithm_str) {
  422. if (g_ascii_strcasecmp (rule->algorithm_str, "old") == 0 ||
  423. g_ascii_strcasecmp (rule->algorithm_str, "siphash") == 0) {
  424. rule->alg = RSPAMD_SHINGLES_OLD;
  425. }
  426. else if (g_ascii_strcasecmp (rule->algorithm_str, "xxhash") == 0) {
  427. rule->alg = RSPAMD_SHINGLES_XXHASH;
  428. }
  429. else if (g_ascii_strcasecmp (rule->algorithm_str, "mumhash") == 0) {
  430. rule->alg = RSPAMD_SHINGLES_MUMHASH;
  431. }
  432. else if (g_ascii_strcasecmp (rule->algorithm_str, "fasthash") == 0 ||
  433. g_ascii_strcasecmp (rule->algorithm_str, "fast") == 0) {
  434. rule->alg = RSPAMD_SHINGLES_FAST;
  435. }
  436. else {
  437. msg_warn_config ("unknown algorithm: %s, use siphash by default",
  438. rule->algorithm_str);
  439. }
  440. }
  441. }
  442. /* Set a consistent and short string name */
  443. switch (rule->alg) {
  444. case RSPAMD_SHINGLES_OLD:
  445. rule->algorithm_str = "sip";
  446. break;
  447. case RSPAMD_SHINGLES_XXHASH:
  448. rule->algorithm_str = "xx";
  449. break;
  450. case RSPAMD_SHINGLES_MUMHASH:
  451. rule->algorithm_str = "mum";
  452. break;
  453. case RSPAMD_SHINGLES_FAST:
  454. rule->algorithm_str = "fast";
  455. break;
  456. }
  457. if ((value = ucl_object_lookup (obj, "servers")) != NULL) {
  458. rule->servers = rspamd_upstreams_create (cfg->ups_ctx);
  459. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool,
  460. (rspamd_mempool_destruct_t)rspamd_upstreams_destroy,
  461. rule->servers);
  462. if (!rspamd_upstreams_from_ucl (rule->servers, value, DEFAULT_PORT, NULL)) {
  463. msg_err_config ("cannot read servers definition");
  464. return -1;
  465. }
  466. }
  467. if ((value = ucl_object_lookup (obj, "fuzzy_map")) != NULL) {
  468. it = NULL;
  469. while ((cur = ucl_object_iterate (value, &it, true)) != NULL) {
  470. parse_flags (rule, cfg, cur, cb_id);
  471. }
  472. }
  473. if ((value = ucl_object_lookup (obj, "encryption_key")) != NULL) {
  474. /* Create key from user's input */
  475. k = ucl_object_tostring (value);
  476. if (k == NULL || (rule->peer_key =
  477. rspamd_pubkey_from_base32 (k, 0, RSPAMD_KEYPAIR_KEX,
  478. RSPAMD_CRYPTOBOX_MODE_25519)) == NULL) {
  479. msg_err_config ("bad encryption key value: %s",
  480. k);
  481. return -1;
  482. }
  483. rule->local_key = rspamd_keypair_new (RSPAMD_KEYPAIR_KEX,
  484. RSPAMD_CRYPTOBOX_MODE_25519);
  485. }
  486. if ((value = ucl_object_lookup (obj, "learn_condition")) != NULL) {
  487. lua_script = ucl_object_tostring (value);
  488. if (lua_script) {
  489. if (luaL_dostring (cfg->lua_state, lua_script) != 0) {
  490. msg_err_config ("cannot execute lua script for fuzzy "
  491. "learn condition: %s", lua_tostring (cfg->lua_state, -1));
  492. }
  493. else {
  494. if (lua_type (cfg->lua_state, -1) == LUA_TFUNCTION) {
  495. rule->learn_condition_cb = luaL_ref (cfg->lua_state,
  496. LUA_REGISTRYINDEX);
  497. msg_info_config ("loaded learn condition script for fuzzy rule:"
  498. " %s", rule->name);
  499. }
  500. else {
  501. msg_err_config ("lua script must return "
  502. "function(task) and not %s",
  503. lua_typename (cfg->lua_state,
  504. lua_type (cfg->lua_state, -1)));
  505. }
  506. }
  507. }
  508. }
  509. key_str = NULL;
  510. if ((value = ucl_object_lookup (obj, "fuzzy_key")) != NULL) {
  511. /* Create key from user's input */
  512. key_str = ucl_object_tostring (value);
  513. }
  514. /* Setup keys */
  515. if (key_str == NULL) {
  516. /* Use some default key for all ops */
  517. key_str = "rspamd";
  518. }
  519. rule->hash_key = g_string_sized_new (rspamd_cryptobox_HASHBYTES);
  520. rspamd_cryptobox_hash (rule->hash_key->str, key_str, strlen (key_str), NULL, 0);
  521. rule->hash_key->len = rspamd_cryptobox_HASHKEYBYTES;
  522. shingles_key_str = NULL;
  523. if ((value = ucl_object_lookup (obj, "fuzzy_shingles_key")) != NULL) {
  524. shingles_key_str = ucl_object_tostring (value);
  525. }
  526. if (shingles_key_str == NULL) {
  527. shingles_key_str = "rspamd";
  528. }
  529. rule->shingles_key = g_string_sized_new (rspamd_cryptobox_HASHBYTES);
  530. rspamd_cryptobox_hash (rule->shingles_key->str, shingles_key_str,
  531. strlen (shingles_key_str), NULL, 0);
  532. rule->shingles_key->len = 16;
  533. if (rspamd_upstreams_count (rule->servers) == 0) {
  534. msg_err_config ("no servers defined for fuzzy rule with name: %s",
  535. rule->name);
  536. return -1;
  537. }
  538. else {
  539. g_ptr_array_add (fuzzy_module_ctx->fuzzy_rules, rule);
  540. if (rule->symbol != fuzzy_module_ctx->default_symbol) {
  541. rspamd_symbols_cache_add_symbol (cfg->cache, rule->symbol,
  542. 0,
  543. NULL, NULL,
  544. SYMBOL_TYPE_VIRTUAL|SYMBOL_TYPE_FINE,
  545. cb_id);
  546. }
  547. msg_info_config ("added fuzzy rule %s, key: %*xs, "
  548. "shingles_key: %*xs, algorithm: %s",
  549. rule->symbol,
  550. 6, rule->hash_key->str,
  551. 6, rule->shingles_key->str,
  552. rule->algorithm_str);
  553. }
  554. rspamd_mempool_add_destructor (fuzzy_module_ctx->fuzzy_pool, fuzzy_free_rule,
  555. rule);
  556. return 0;
  557. }
  558. gint
  559. fuzzy_check_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  560. {
  561. lua_State *L = cfg->lua_state;
  562. if (fuzzy_module_ctx == NULL) {
  563. fuzzy_module_ctx = g_malloc0 (sizeof (struct fuzzy_ctx));
  564. fuzzy_module_ctx->fuzzy_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), NULL);
  565. /* TODO: this should match rules count actually */
  566. fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new (32);
  567. fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new ();
  568. }
  569. fuzzy_module_ctx->cfg = cfg;
  570. *ctx = (struct module_ctx *)fuzzy_module_ctx;
  571. rspamd_rcl_add_doc_by_path (cfg,
  572. NULL,
  573. "Fuzzy check plugin",
  574. "fuzzy_check",
  575. UCL_OBJECT,
  576. NULL,
  577. 0,
  578. NULL,
  579. 0);
  580. rspamd_rcl_add_doc_by_path (cfg,
  581. "fuzzy_check",
  582. "Default symbol",
  583. "symbol",
  584. UCL_STRING,
  585. NULL,
  586. 0,
  587. NULL,
  588. 0);
  589. rspamd_rcl_add_doc_by_path (cfg,
  590. "fuzzy_check",
  591. "Minimum number of *words* to check a text part",
  592. "min_length",
  593. UCL_INT,
  594. NULL,
  595. 0,
  596. NULL,
  597. 0);
  598. rspamd_rcl_add_doc_by_path (cfg,
  599. "fuzzy_check",
  600. "Minimum number of *bytes* to check a non-text part",
  601. "min_bytes",
  602. UCL_INT,
  603. NULL,
  604. 0,
  605. NULL,
  606. 0);
  607. rspamd_rcl_add_doc_by_path (cfg,
  608. "fuzzy_check",
  609. "Minimum height in pixels for embedded images to check using fuzzy storage",
  610. "min_height",
  611. UCL_INT,
  612. NULL,
  613. 0,
  614. NULL,
  615. 0);
  616. rspamd_rcl_add_doc_by_path (cfg,
  617. "fuzzy_check",
  618. "Minimum width in pixels for embedded images to check using fuzzy storage",
  619. "min_width",
  620. UCL_INT,
  621. NULL,
  622. 0,
  623. NULL,
  624. 0);
  625. rspamd_rcl_add_doc_by_path (cfg,
  626. "fuzzy_check",
  627. "Timeout for waiting reply from a fuzzy server",
  628. "timeout",
  629. UCL_TIME,
  630. NULL,
  631. 0,
  632. NULL,
  633. 0);
  634. rspamd_rcl_add_doc_by_path (cfg,
  635. "fuzzy_check",
  636. "Maximum number of retransmits for a single request",
  637. "retransmits",
  638. UCL_INT,
  639. NULL,
  640. 0,
  641. NULL,
  642. 0);
  643. rspamd_rcl_add_doc_by_path (cfg,
  644. "fuzzy_check",
  645. "Whitelisted IPs map",
  646. "whitelist",
  647. UCL_STRING,
  648. NULL,
  649. 0,
  650. NULL,
  651. 0);
  652. /* Rules doc strings */
  653. rspamd_rcl_add_doc_by_path (cfg,
  654. "fuzzy_check",
  655. "Fuzzy check rule",
  656. "rule",
  657. UCL_OBJECT,
  658. NULL,
  659. 0,
  660. NULL,
  661. 0);
  662. rspamd_rcl_add_doc_by_path (cfg,
  663. "fuzzy_check.rule",
  664. "Headers that are used to make a separate hash",
  665. "headers",
  666. UCL_ARRAY,
  667. NULL,
  668. 0,
  669. NULL,
  670. 0);
  671. rspamd_rcl_add_doc_by_path (cfg,
  672. "fuzzy_check.rule",
  673. "Set of mime types (in form type/subtype, or type/*, or *) to check with fuzzy",
  674. "mime_types",
  675. UCL_ARRAY,
  676. NULL,
  677. 0,
  678. NULL,
  679. 0);
  680. rspamd_rcl_add_doc_by_path (cfg,
  681. "fuzzy_check.rule",
  682. "Maximum value for fuzzy hash when weight of symbol is exactly 1.0 (if value is higher then score is still 1.0)",
  683. "max_score",
  684. UCL_INT,
  685. NULL,
  686. 0,
  687. NULL,
  688. 0);
  689. rspamd_rcl_add_doc_by_path (cfg,
  690. "fuzzy_check.rule",
  691. "List of servers to check (or learn)",
  692. "servers",
  693. UCL_STRING,
  694. NULL,
  695. 0,
  696. NULL,
  697. 0);
  698. rspamd_rcl_add_doc_by_path (cfg,
  699. "fuzzy_check.rule",
  700. "If true then never try to learn this fuzzy storage",
  701. "read_only",
  702. UCL_BOOLEAN,
  703. NULL,
  704. 0,
  705. NULL,
  706. 0);
  707. rspamd_rcl_add_doc_by_path (cfg,
  708. "fuzzy_check.rule",
  709. "If true then ignore unknown flags and not add the default fuzzy symbol",
  710. "skip_unknown",
  711. UCL_BOOLEAN,
  712. NULL,
  713. 0,
  714. NULL,
  715. 0);
  716. rspamd_rcl_add_doc_by_path (cfg,
  717. "fuzzy_check.rule",
  718. "Default symbol for rule (if no flags defined or matched)",
  719. "symbol",
  720. UCL_STRING,
  721. NULL,
  722. 0,
  723. NULL,
  724. 0);
  725. rspamd_rcl_add_doc_by_path (cfg,
  726. "fuzzy_check.rule",
  727. "Base32 value for the protocol encryption public key",
  728. "encryption_key",
  729. UCL_STRING,
  730. NULL,
  731. 0,
  732. NULL,
  733. 0);
  734. rspamd_rcl_add_doc_by_path (cfg,
  735. "fuzzy_check.rule",
  736. "Base32 value for the hashing key (for private storages)",
  737. "fuzzy_key",
  738. UCL_STRING,
  739. NULL,
  740. 0,
  741. NULL,
  742. 0);
  743. rspamd_rcl_add_doc_by_path (cfg,
  744. "fuzzy_check.rule",
  745. "Base32 value for the shingles hashing key (for private storages)",
  746. "fuzzy_shingles_key",
  747. UCL_STRING,
  748. NULL,
  749. 0,
  750. NULL,
  751. 0);
  752. rspamd_rcl_add_doc_by_path (cfg,
  753. "fuzzy_check.rule",
  754. "Lua script that returns boolean function to check if this task "
  755. "should be considered when learning fuzzy storage",
  756. "learn_condition",
  757. UCL_STRING,
  758. NULL,
  759. 0,
  760. NULL,
  761. 0);
  762. rspamd_rcl_add_doc_by_path (cfg,
  763. "fuzzy_check.rule",
  764. "Map of SYMBOL -> data for flags configuration",
  765. "fuzzy_map",
  766. UCL_OBJECT,
  767. NULL,
  768. 0,
  769. NULL,
  770. 0);
  771. /* Fuzzy map doc strings */
  772. rspamd_rcl_add_doc_by_path (cfg,
  773. "fuzzy_check.rule.fuzzy_map",
  774. "Maximum score for this flag",
  775. "max_score",
  776. UCL_INT,
  777. NULL,
  778. 0,
  779. NULL,
  780. 0);
  781. rspamd_rcl_add_doc_by_path (cfg,
  782. "fuzzy_check.rule.fuzzy_map",
  783. "Flag number",
  784. "flag",
  785. UCL_INT,
  786. NULL,
  787. 0,
  788. NULL,
  789. 0);
  790. /* Register global methods */
  791. lua_getglobal (L, "rspamd_plugins");
  792. if (lua_type (L, -1) == LUA_TTABLE) {
  793. lua_pushstring (L, "fuzzy_check");
  794. lua_createtable (L, 0, 2);
  795. /* Set methods */
  796. lua_pushstring (L, "unlearn");
  797. lua_pushcfunction (L, fuzzy_lua_unlearn_handler);
  798. lua_settable (L, -3);
  799. lua_pushstring (L, "learn");
  800. lua_pushcfunction (L, fuzzy_lua_learn_handler);
  801. lua_settable (L, -3);
  802. /* Finish fuzzy_check key */
  803. lua_settable (L, -3);
  804. }
  805. lua_pop (L, 1); /* Remove global function */
  806. return 0;
  807. }
  808. gint
  809. fuzzy_check_module_config (struct rspamd_config *cfg)
  810. {
  811. const ucl_object_t *value, *cur, *elt;
  812. ucl_object_iter_t it;
  813. gint res = TRUE, cb_id, nrules = 0;
  814. if (!rspamd_config_is_module_enabled (cfg, "fuzzy_check")) {
  815. return TRUE;
  816. }
  817. fuzzy_module_ctx->enabled = TRUE;
  818. if ((value =
  819. rspamd_config_get_module_opt (cfg, "fuzzy_check", "symbol")) != NULL) {
  820. fuzzy_module_ctx->default_symbol = ucl_obj_tostring (value);
  821. }
  822. else {
  823. fuzzy_module_ctx->default_symbol = DEFAULT_SYMBOL;
  824. }
  825. if ((value =
  826. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  827. "min_length")) != NULL) {
  828. fuzzy_module_ctx->min_hash_len = ucl_obj_toint (value);
  829. }
  830. else {
  831. fuzzy_module_ctx->min_hash_len = 0;
  832. }
  833. if ((value =
  834. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  835. "min_bytes")) != NULL) {
  836. fuzzy_module_ctx->min_bytes = ucl_obj_toint (value);
  837. }
  838. else {
  839. fuzzy_module_ctx->min_bytes = 0;
  840. }
  841. if ((value =
  842. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  843. "min_height")) != NULL) {
  844. fuzzy_module_ctx->min_height = ucl_obj_toint (value);
  845. }
  846. else {
  847. fuzzy_module_ctx->min_height = 0;
  848. }
  849. if ((value =
  850. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  851. "min_width")) != NULL) {
  852. fuzzy_module_ctx->min_width = ucl_obj_toint (value);
  853. }
  854. else {
  855. fuzzy_module_ctx->min_width = 0;
  856. }
  857. if ((value =
  858. rspamd_config_get_module_opt (cfg, "fuzzy_check", "timeout")) != NULL) {
  859. fuzzy_module_ctx->io_timeout = ucl_obj_todouble (value) * 1000;
  860. }
  861. else {
  862. fuzzy_module_ctx->io_timeout = DEFAULT_IO_TIMEOUT;
  863. }
  864. if ((value =
  865. rspamd_config_get_module_opt (cfg,
  866. "fuzzy_check",
  867. "retransmits")) != NULL) {
  868. fuzzy_module_ctx->retransmits = ucl_obj_toint (value);
  869. }
  870. else {
  871. fuzzy_module_ctx->retransmits = DEFAULT_RETRANSMITS;
  872. }
  873. if ((value =
  874. rspamd_config_get_module_opt (cfg, "fuzzy_check",
  875. "whitelist")) != NULL) {
  876. rspamd_config_radix_from_ucl (cfg, value, "Fuzzy whitelist",
  877. &fuzzy_module_ctx->whitelist, NULL);
  878. }
  879. else {
  880. fuzzy_module_ctx->whitelist = NULL;
  881. }
  882. if ((value =
  883. rspamd_config_get_module_opt (cfg, "fuzzy_check", "rule")) != NULL) {
  884. cb_id = rspamd_symbols_cache_add_symbol (cfg->cache,
  885. "FUZZY_CALLBACK", 0, fuzzy_symbol_callback, NULL,
  886. SYMBOL_TYPE_CALLBACK|SYMBOL_TYPE_FINE,
  887. -1);
  888. /*
  889. * Here we can have 2 possibilities:
  890. *
  891. * unnamed rules:
  892. *
  893. * rule {
  894. * ...
  895. * }
  896. * rule {
  897. * ...
  898. * }
  899. *
  900. * - or - named rules:
  901. *
  902. * rule {
  903. * "rule1": {
  904. * ...
  905. * }
  906. * "rule2": {
  907. * ...
  908. * }
  909. * }
  910. *
  911. * So, for each element, we check, if there 'servers' key. If 'servers' is
  912. * presented, then we treat it as unnamed rule, otherwise we treat it as
  913. * named rule.
  914. */
  915. LL_FOREACH (value, cur) {
  916. if (ucl_object_lookup (cur, "servers")) {
  917. /* Unnamed rule */
  918. fuzzy_parse_rule (cfg, cur, NULL, cb_id);
  919. nrules ++;
  920. }
  921. else {
  922. /* Named rule */
  923. it = NULL;
  924. while ((elt = ucl_object_iterate (cur, &it, true)) != NULL) {
  925. fuzzy_parse_rule (cfg, elt, ucl_object_key (elt), cb_id);
  926. nrules ++;
  927. }
  928. }
  929. }
  930. }
  931. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  932. msg_warn_config ("fuzzy module is enabled but no rules are defined");
  933. }
  934. msg_info_config ("init internal fuzzy_check module, %d rules loaded",
  935. nrules);
  936. return res;
  937. }
  938. gint
  939. fuzzy_check_module_reconfig (struct rspamd_config *cfg)
  940. {
  941. struct module_ctx saved_ctx;
  942. saved_ctx = fuzzy_module_ctx->ctx;
  943. rspamd_mempool_delete (fuzzy_module_ctx->fuzzy_pool);
  944. rspamd_keypair_cache_destroy (fuzzy_module_ctx->keypairs_cache);
  945. g_ptr_array_free (fuzzy_module_ctx->fuzzy_rules, TRUE);
  946. memset (fuzzy_module_ctx, 0, sizeof (*fuzzy_module_ctx));
  947. fuzzy_module_ctx->ctx = saved_ctx;
  948. fuzzy_module_ctx->fuzzy_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), NULL);
  949. fuzzy_module_ctx->cfg = cfg;
  950. fuzzy_module_ctx->fuzzy_rules = g_ptr_array_new ();
  951. fuzzy_module_ctx->keypairs_cache = rspamd_keypair_cache_new (32);
  952. return fuzzy_check_module_config (cfg);
  953. }
  954. /* Finalize IO */
  955. static void
  956. fuzzy_io_fin (void *ud)
  957. {
  958. struct fuzzy_client_session *session = ud;
  959. if (session->commands) {
  960. g_ptr_array_free (session->commands, TRUE);
  961. }
  962. event_del (&session->ev);
  963. event_del (&session->timev);
  964. close (session->fd);
  965. }
  966. static GArray *
  967. fuzzy_preprocess_words (struct rspamd_mime_text_part *part, rspamd_mempool_t *pool)
  968. {
  969. return part->normalized_words;
  970. }
  971. static void
  972. fuzzy_encrypt_cmd (struct fuzzy_rule *rule,
  973. struct rspamd_fuzzy_encrypted_req_hdr *hdr,
  974. guchar *data, gsize datalen)
  975. {
  976. const guchar *pk;
  977. guint pklen;
  978. g_assert (hdr != NULL);
  979. g_assert (data != NULL);
  980. g_assert (rule != NULL);
  981. /* Encrypt data */
  982. memcpy (hdr->magic,
  983. fuzzy_encrypted_magic,
  984. sizeof (hdr->magic));
  985. ottery_rand_bytes (hdr->nonce, sizeof (hdr->nonce));
  986. pk = rspamd_keypair_component (rule->local_key,
  987. RSPAMD_KEYPAIR_COMPONENT_PK, &pklen);
  988. memcpy (hdr->pubkey, pk, MIN (pklen, sizeof (hdr->pubkey)));
  989. pk = rspamd_pubkey_get_pk (rule->peer_key, &pklen);
  990. memcpy (hdr->key_id, pk, MIN (sizeof (hdr->key_id), pklen));
  991. rspamd_keypair_cache_process (fuzzy_module_ctx->keypairs_cache,
  992. rule->local_key, rule->peer_key);
  993. rspamd_cryptobox_encrypt_nm_inplace (data, datalen,
  994. hdr->nonce, rspamd_pubkey_get_nm (rule->peer_key), hdr->mac,
  995. rspamd_pubkey_alg (rule->peer_key));
  996. }
  997. static struct fuzzy_cmd_io *
  998. fuzzy_cmd_stat (struct fuzzy_rule *rule,
  999. int c,
  1000. gint flag,
  1001. guint32 weight,
  1002. rspamd_mempool_t *pool)
  1003. {
  1004. struct rspamd_fuzzy_cmd *cmd;
  1005. struct rspamd_fuzzy_encrypted_cmd *enccmd;
  1006. struct fuzzy_cmd_io *io;
  1007. if (rule->peer_key) {
  1008. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1009. cmd = &enccmd->cmd;
  1010. }
  1011. else {
  1012. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1013. }
  1014. cmd->cmd = c;
  1015. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1016. cmd->shingles_count = 0;
  1017. cmd->tag = ottery_rand_uint32 ();
  1018. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1019. io->flags = 0;
  1020. io->tag = cmd->tag;
  1021. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1022. if (rule->peer_key) {
  1023. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd, sizeof (*cmd));
  1024. io->io.iov_base = enccmd;
  1025. io->io.iov_len = sizeof (*enccmd);
  1026. }
  1027. else {
  1028. io->io.iov_base = cmd;
  1029. io->io.iov_len = sizeof (*cmd);
  1030. }
  1031. return io;
  1032. }
  1033. static struct fuzzy_cmd_io *
  1034. fuzzy_cmd_hash (struct fuzzy_rule *rule,
  1035. int c,
  1036. const rspamd_ftok_t *hash,
  1037. gint flag,
  1038. guint32 weight,
  1039. rspamd_mempool_t *pool)
  1040. {
  1041. struct rspamd_fuzzy_cmd *cmd;
  1042. struct rspamd_fuzzy_encrypted_cmd *enccmd;
  1043. struct fuzzy_cmd_io *io;
  1044. if (rule->peer_key) {
  1045. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1046. cmd = &enccmd->cmd;
  1047. }
  1048. else {
  1049. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1050. }
  1051. if (hash->len == sizeof (cmd->digest) * 2) {
  1052. /* It is hex encoding */
  1053. if (rspamd_decode_hex_buf (hash->begin, hash->len, cmd->digest,
  1054. sizeof (cmd->digest)) == -1) {
  1055. msg_err_pool ("cannot decode hash, wrong encoding");
  1056. return NULL;
  1057. }
  1058. }
  1059. else {
  1060. msg_err_pool ("cannot decode hash, wrong length: %z", hash->len);
  1061. return NULL;
  1062. }
  1063. cmd->cmd = c;
  1064. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1065. cmd->shingles_count = 0;
  1066. cmd->tag = ottery_rand_uint32 ();
  1067. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1068. io->flags = 0;
  1069. io->tag = cmd->tag;
  1070. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1071. if (rule->peer_key) {
  1072. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *)cmd, sizeof (*cmd));
  1073. io->io.iov_base = enccmd;
  1074. io->io.iov_len = sizeof (*enccmd);
  1075. }
  1076. else {
  1077. io->io.iov_base = cmd;
  1078. io->io.iov_len = sizeof (*cmd);
  1079. }
  1080. return io;
  1081. }
  1082. static void *
  1083. fuzzy_cmd_get_cached (struct fuzzy_rule *rule,
  1084. rspamd_mempool_t *pool,
  1085. gpointer p)
  1086. {
  1087. gchar key[32];
  1088. gint key_part;
  1089. memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
  1090. rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
  1091. key_part);
  1092. return rspamd_mempool_get_variable (pool, key);
  1093. }
  1094. static void
  1095. fuzzy_cmd_set_cached (struct fuzzy_rule *rule,
  1096. rspamd_mempool_t *pool,
  1097. gpointer p,
  1098. struct rspamd_fuzzy_encrypted_shingle_cmd *data)
  1099. {
  1100. gchar key[32];
  1101. gint key_part;
  1102. memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
  1103. rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
  1104. key_part);
  1105. /* Key is copied */
  1106. rspamd_mempool_set_variable (pool, key, data, NULL);
  1107. }
  1108. /*
  1109. * Create fuzzy command from a text part
  1110. */
  1111. static struct fuzzy_cmd_io *
  1112. fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
  1113. int c,
  1114. gint flag,
  1115. guint32 weight,
  1116. rspamd_mempool_t *pool,
  1117. struct rspamd_mime_text_part *part)
  1118. {
  1119. struct rspamd_fuzzy_shingle_cmd *shcmd;
  1120. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached;
  1121. struct rspamd_shingle *sh;
  1122. guint i;
  1123. rspamd_cryptobox_hash_state_t st;
  1124. rspamd_ftok_t *word;
  1125. GArray *words;
  1126. struct fuzzy_cmd_io *io;
  1127. cached = fuzzy_cmd_get_cached (rule, pool, part);
  1128. if (cached) {
  1129. /* Copy cached */
  1130. encshcmd = rspamd_mempool_alloc (pool, sizeof (*encshcmd));
  1131. memcpy (encshcmd, cached, sizeof (*encshcmd));
  1132. shcmd = &encshcmd->cmd;
  1133. }
  1134. else {
  1135. encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1136. shcmd = &encshcmd->cmd;
  1137. /*
  1138. * Generate hash from all words in the part
  1139. */
  1140. rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
  1141. words = fuzzy_preprocess_words (part, pool);
  1142. for (i = 0; i < words->len; i ++) {
  1143. word = &g_array_index (words, rspamd_ftok_t, i);
  1144. rspamd_cryptobox_hash_update (&st, word->begin, word->len);
  1145. }
  1146. rspamd_cryptobox_hash_final (&st, shcmd->basic.digest);
  1147. msg_debug_pool ("loading shingles of type %s with key %*xs",
  1148. rule->algorithm_str,
  1149. 16, rule->shingles_key->str);
  1150. sh = rspamd_shingles_from_text (words,
  1151. rule->shingles_key->str, pool,
  1152. rspamd_shingles_default_filter, NULL,
  1153. rule->alg);
  1154. if (sh != NULL) {
  1155. memcpy (&shcmd->sgl, sh, sizeof (shcmd->sgl));
  1156. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1157. }
  1158. /*
  1159. * We always save encrypted command as it can handle both
  1160. * encrypted and unencrypted requests.
  1161. *
  1162. * Since it is copied when obtained from the cache, it is safe to use
  1163. * it this way.
  1164. */
  1165. fuzzy_cmd_set_cached (rule, pool, part, encshcmd);
  1166. }
  1167. shcmd->basic.tag = ottery_rand_uint32 ();
  1168. shcmd->basic.cmd = c;
  1169. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1170. if (c != FUZZY_CHECK) {
  1171. shcmd->basic.flag = flag;
  1172. shcmd->basic.value = weight;
  1173. }
  1174. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1175. io->tag = shcmd->basic.tag;
  1176. io->flags = 0;
  1177. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1178. if (rule->peer_key) {
  1179. /* Encrypt data */
  1180. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd, sizeof (*shcmd));
  1181. io->io.iov_base = encshcmd;
  1182. io->io.iov_len = sizeof (*encshcmd);
  1183. }
  1184. else {
  1185. io->io.iov_base = shcmd;
  1186. io->io.iov_len = sizeof (*shcmd);
  1187. }
  1188. return io;
  1189. }
  1190. static struct fuzzy_cmd_io *
  1191. fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
  1192. int c,
  1193. gint flag,
  1194. guint32 weight,
  1195. rspamd_mempool_t *pool,
  1196. struct rspamd_image *img)
  1197. {
  1198. struct rspamd_fuzzy_shingle_cmd *shcmd;
  1199. struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached;
  1200. struct fuzzy_cmd_io *io;
  1201. struct rspamd_shingle *sh;
  1202. cached = fuzzy_cmd_get_cached (rule, pool, img);
  1203. if (cached) {
  1204. /* Copy cached */
  1205. encshcmd = rspamd_mempool_alloc (pool, sizeof (*encshcmd));
  1206. memcpy (encshcmd, cached, sizeof (*encshcmd));
  1207. shcmd = &encshcmd->cmd;
  1208. }
  1209. else {
  1210. encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
  1211. shcmd = &encshcmd->cmd;
  1212. /*
  1213. * Generate shingles
  1214. */
  1215. sh = rspamd_shingles_from_image (img->dct,
  1216. rule->shingles_key->str, pool,
  1217. rspamd_shingles_default_filter, NULL,
  1218. rule->alg);
  1219. if (sh != NULL) {
  1220. memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl));
  1221. shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
  1222. #if 0
  1223. for (unsigned int i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
  1224. msg_err ("shingle %d: %L", i, sh->hashes[i]);
  1225. }
  1226. #endif
  1227. }
  1228. rspamd_cryptobox_hash (shcmd->basic.digest,
  1229. (const guchar *)img->dct, RSPAMD_DCT_LEN / NBBY,
  1230. rule->hash_key->str, rule->hash_key->len);
  1231. msg_debug_pool ("loading shingles of type %s with key %*xs",
  1232. rule->algorithm_str,
  1233. 16, rule->shingles_key->str);
  1234. /*
  1235. * We always save encrypted command as it can handle both
  1236. * encrypted and unencrypted requests.
  1237. *
  1238. * Since it is copied when obtained from the cache, it is safe to use
  1239. * it this way.
  1240. */
  1241. fuzzy_cmd_set_cached (rule, pool, img, encshcmd);
  1242. }
  1243. shcmd->basic.tag = ottery_rand_uint32 ();
  1244. shcmd->basic.cmd = c;
  1245. shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1246. if (c != FUZZY_CHECK) {
  1247. shcmd->basic.flag = flag;
  1248. shcmd->basic.value = weight;
  1249. }
  1250. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1251. io->tag = shcmd->basic.tag;
  1252. io->flags = FUZZY_CMD_FLAG_IMAGE;
  1253. memcpy (&io->cmd, &shcmd->basic, sizeof (io->cmd));
  1254. if (rule->peer_key) {
  1255. /* Encrypt data */
  1256. fuzzy_encrypt_cmd (rule, &encshcmd->hdr, (guchar *) shcmd, sizeof (*shcmd));
  1257. io->io.iov_base = encshcmd;
  1258. io->io.iov_len = sizeof (*encshcmd);
  1259. }
  1260. else {
  1261. io->io.iov_base = shcmd;
  1262. io->io.iov_len = sizeof (*shcmd);
  1263. }
  1264. return io;
  1265. }
  1266. static struct fuzzy_cmd_io *
  1267. fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
  1268. int c,
  1269. gint flag,
  1270. guint32 weight,
  1271. rspamd_mempool_t *pool,
  1272. guchar digest[rspamd_cryptobox_HASHBYTES])
  1273. {
  1274. struct rspamd_fuzzy_cmd *cmd;
  1275. struct rspamd_fuzzy_encrypted_cmd *enccmd = NULL;
  1276. struct fuzzy_cmd_io *io;
  1277. if (rule->peer_key) {
  1278. enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
  1279. cmd = &enccmd->cmd;
  1280. }
  1281. else {
  1282. cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
  1283. }
  1284. cmd->cmd = c;
  1285. cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION;
  1286. if (c != FUZZY_CHECK) {
  1287. cmd->flag = flag;
  1288. cmd->value = weight;
  1289. }
  1290. cmd->shingles_count = 0;
  1291. cmd->tag = ottery_rand_uint32 ();
  1292. memcpy (cmd->digest, digest, sizeof (cmd->digest));
  1293. io = rspamd_mempool_alloc (pool, sizeof (*io));
  1294. io->flags = 0;
  1295. io->tag = cmd->tag;
  1296. memcpy (&io->cmd, cmd, sizeof (io->cmd));
  1297. if (rule->peer_key) {
  1298. g_assert (enccmd != NULL);
  1299. fuzzy_encrypt_cmd (rule, &enccmd->hdr, (guchar *) cmd, sizeof (*cmd));
  1300. io->io.iov_base = enccmd;
  1301. io->io.iov_len = sizeof (*enccmd);
  1302. }
  1303. else {
  1304. io->io.iov_base = cmd;
  1305. io->io.iov_len = sizeof (*cmd);
  1306. }
  1307. return io;
  1308. }
  1309. static gboolean
  1310. fuzzy_cmd_to_wire (gint fd, struct iovec *io)
  1311. {
  1312. struct msghdr msg;
  1313. memset (&msg, 0, sizeof (msg));
  1314. msg.msg_iov = io;
  1315. msg.msg_iovlen = 1;
  1316. while (sendmsg (fd, &msg, 0) == -1) {
  1317. if (errno == EINTR) {
  1318. continue;
  1319. }
  1320. return FALSE;
  1321. }
  1322. return TRUE;
  1323. }
  1324. static gboolean
  1325. fuzzy_cmd_vector_to_wire (gint fd, GPtrArray *v)
  1326. {
  1327. guint i;
  1328. gboolean all_sent = TRUE, all_replied = TRUE;
  1329. struct fuzzy_cmd_io *io;
  1330. gboolean processed = FALSE;
  1331. /* First try to resend unsent commands */
  1332. for (i = 0; i < v->len; i ++) {
  1333. io = g_ptr_array_index (v, i);
  1334. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1335. continue;
  1336. }
  1337. all_replied = FALSE;
  1338. if (!(io->flags & FUZZY_CMD_FLAG_SENT)) {
  1339. if (!fuzzy_cmd_to_wire (fd, &io->io)) {
  1340. return FALSE;
  1341. }
  1342. processed = TRUE;
  1343. io->flags |= FUZZY_CMD_FLAG_SENT;
  1344. all_sent = FALSE;
  1345. }
  1346. }
  1347. if (all_sent && !all_replied) {
  1348. /* Now try to resend each command in the vector */
  1349. for (i = 0; i < v->len; i++) {
  1350. io = g_ptr_array_index (v, i);
  1351. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1352. io->flags &= ~FUZZY_CMD_FLAG_SENT;
  1353. }
  1354. }
  1355. return fuzzy_cmd_vector_to_wire (fd, v);
  1356. }
  1357. return processed;
  1358. }
  1359. /*
  1360. * Read replies one-by-one and remove them from req array
  1361. */
  1362. static const struct rspamd_fuzzy_reply *
  1363. fuzzy_process_reply (guchar **pos, gint *r, GPtrArray *req,
  1364. struct fuzzy_rule *rule, struct rspamd_fuzzy_cmd **pcmd,
  1365. struct fuzzy_cmd_io **pio)
  1366. {
  1367. guchar *p = *pos;
  1368. gint remain = *r;
  1369. guint i, required_size;
  1370. struct fuzzy_cmd_io *io;
  1371. const struct rspamd_fuzzy_reply *rep;
  1372. struct rspamd_fuzzy_encrypted_reply encrep;
  1373. gboolean found = FALSE;
  1374. if (rule->peer_key) {
  1375. required_size = sizeof (encrep);
  1376. }
  1377. else {
  1378. required_size = sizeof (*rep);
  1379. }
  1380. if (remain <= 0 || (guint)remain < required_size) {
  1381. return NULL;
  1382. }
  1383. if (rule->peer_key) {
  1384. memcpy (&encrep, p, sizeof (encrep));
  1385. *pos += required_size;
  1386. *r -= required_size;
  1387. /* Try to decrypt reply */
  1388. rspamd_keypair_cache_process (fuzzy_module_ctx->keypairs_cache,
  1389. rule->local_key, rule->peer_key);
  1390. if (!rspamd_cryptobox_decrypt_nm_inplace ((guchar *)&encrep.rep,
  1391. sizeof (encrep.rep),
  1392. encrep.hdr.nonce,
  1393. rspamd_pubkey_get_nm (rule->peer_key),
  1394. encrep.hdr.mac,
  1395. rspamd_pubkey_alg (rule->peer_key))) {
  1396. msg_info ("cannot decrypt reply");
  1397. return NULL;
  1398. }
  1399. /* Copy decrypted over the input wire */
  1400. memcpy (p, &encrep.rep, sizeof (encrep.rep));
  1401. }
  1402. else {
  1403. *pos += required_size;
  1404. *r -= required_size;
  1405. }
  1406. rep = (const struct rspamd_fuzzy_reply *) p;
  1407. /*
  1408. * Search for tag
  1409. */
  1410. for (i = 0; i < req->len; i ++) {
  1411. io = g_ptr_array_index (req, i);
  1412. if (io->tag == rep->tag) {
  1413. if (!(io->flags & FUZZY_CMD_FLAG_REPLIED)) {
  1414. io->flags |= FUZZY_CMD_FLAG_REPLIED;
  1415. if (pcmd) {
  1416. *pcmd = &io->cmd;
  1417. }
  1418. if (pio) {
  1419. *pio = io;
  1420. }
  1421. return rep;
  1422. }
  1423. found = TRUE;
  1424. }
  1425. }
  1426. if (!found) {
  1427. msg_info ("unexpected tag: %ud", rep->tag);
  1428. }
  1429. return NULL;
  1430. }
  1431. static void
  1432. fuzzy_insert_result (struct fuzzy_client_session *session,
  1433. const struct rspamd_fuzzy_reply *rep,
  1434. struct rspamd_fuzzy_cmd *cmd,
  1435. struct fuzzy_cmd_io *io,
  1436. guint flag)
  1437. {
  1438. const gchar *symbol;
  1439. struct fuzzy_mapping *map;
  1440. struct rspamd_task *task = session->task;
  1441. double nval;
  1442. guchar buf[2048];
  1443. const gchar *type = "bin";
  1444. /* Get mapping by flag */
  1445. if ((map =
  1446. g_hash_table_lookup (session->rule->mappings,
  1447. GINT_TO_POINTER (rep->flag))) == NULL) {
  1448. /* Default symbol and default weight */
  1449. symbol = session->rule->symbol;
  1450. }
  1451. else {
  1452. /* Get symbol and weight from map */
  1453. symbol = map->symbol;
  1454. }
  1455. /*
  1456. * Hash is assumed to be found if probability is more than 0.5
  1457. * In that case `value` means number of matches
  1458. * Otherwise `value` means error code
  1459. */
  1460. nval = fuzzy_normalize (rep->value,
  1461. session->rule->max_score);
  1462. if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  1463. nval *= rspamd_normalize_probability (rep->prob, 0.5);
  1464. type = "img";
  1465. }
  1466. else {
  1467. /* XXX: we need something better here */
  1468. if (cmd->shingles_count > 0) {
  1469. type = "txt";
  1470. }
  1471. nval *= rep->prob;
  1472. }
  1473. msg_info_task (
  1474. "found fuzzy hash(%s) %*xs with weight: "
  1475. "%.2f, probability %.2f, in list: %s:%d%s",
  1476. type,
  1477. (gint)sizeof (cmd->digest), cmd->digest,
  1478. nval,
  1479. (gdouble)rep->prob,
  1480. symbol,
  1481. rep->flag,
  1482. map == NULL ? "(unknown)" : "");
  1483. if (map != NULL || !session->rule->skip_unknown) {
  1484. rspamd_snprintf (buf,
  1485. sizeof (buf),
  1486. "%d:%*xs:%.2f:%s",
  1487. rep->flag,
  1488. rspamd_fuzzy_hash_len, cmd->digest,
  1489. rep->prob,
  1490. type);
  1491. rspamd_task_insert_result_single (session->task,
  1492. symbol,
  1493. nval,
  1494. buf);
  1495. }
  1496. }
  1497. static gint
  1498. fuzzy_check_try_read (struct fuzzy_client_session *session)
  1499. {
  1500. struct rspamd_task *task;
  1501. const struct rspamd_fuzzy_reply *rep;
  1502. struct rspamd_fuzzy_cmd *cmd = NULL;
  1503. struct fuzzy_cmd_io *io = NULL;
  1504. gint r, ret;
  1505. guchar buf[2048], *p;
  1506. task = session->task;
  1507. if ((r = read (session->fd, buf, sizeof (buf) - 1)) == -1) {
  1508. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  1509. return 0;
  1510. }
  1511. else {
  1512. return -1;
  1513. }
  1514. }
  1515. else {
  1516. p = buf;
  1517. ret = 0;
  1518. while ((rep = fuzzy_process_reply (&p, &r,
  1519. session->commands, session->rule, &cmd, &io)) != NULL) {
  1520. if (rep->prob > 0.5) {
  1521. if (cmd->cmd == FUZZY_CHECK) {
  1522. fuzzy_insert_result (session, rep, cmd, io, rep->flag);
  1523. }
  1524. else if (cmd->cmd == FUZZY_STAT) {
  1525. /* Just set pool variable to extract it in further */
  1526. struct rspamd_fuzzy_stat_entry *pval;
  1527. GList *res;
  1528. pval = rspamd_mempool_alloc (task->task_pool, sizeof (*pval));
  1529. pval->fuzzy_cnt = rep->flag;
  1530. pval->name = session->rule->name;
  1531. res = rspamd_mempool_get_variable (task->task_pool, "fuzzy_stat");
  1532. if (res == NULL) {
  1533. res = g_list_append (NULL, pval);
  1534. rspamd_mempool_set_variable (task->task_pool, "fuzzy_stat",
  1535. res, (rspamd_mempool_destruct_t)g_list_free);
  1536. }
  1537. else {
  1538. res = g_list_append (res, pval);
  1539. }
  1540. }
  1541. }
  1542. else if (rep->value == 403) {
  1543. msg_info_task (
  1544. "fuzzy check error for %d: forbidden",
  1545. rep->flag);
  1546. }
  1547. else if (rep->value != 0) {
  1548. msg_info_task (
  1549. "fuzzy check error for %d: unknown error (%d)",
  1550. rep->flag,
  1551. rep->value);
  1552. }
  1553. ret = 1;
  1554. }
  1555. }
  1556. return ret;
  1557. }
  1558. static gboolean
  1559. fuzzy_check_session_is_completed (struct fuzzy_client_session *session)
  1560. {
  1561. struct fuzzy_cmd_io *io;
  1562. guint nreplied = 0, i;
  1563. rspamd_upstream_ok (session->server);
  1564. for (i = 0; i < session->commands->len; i++) {
  1565. io = g_ptr_array_index (session->commands, i);
  1566. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1567. nreplied++;
  1568. }
  1569. }
  1570. if (nreplied == session->commands->len) {
  1571. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  1572. return TRUE;
  1573. }
  1574. return FALSE;
  1575. }
  1576. /* Fuzzy check callback */
  1577. static void
  1578. fuzzy_check_io_callback (gint fd, short what, void *arg)
  1579. {
  1580. struct fuzzy_client_session *session = arg;
  1581. struct rspamd_task *task;
  1582. struct event_base *ev_base;
  1583. gint r;
  1584. enum {
  1585. return_error = 0,
  1586. return_want_more,
  1587. return_finished
  1588. } ret = return_error;
  1589. task = session->task;
  1590. if ((what & EV_READ) || session->state == 1) {
  1591. /* Try to read reply */
  1592. r = fuzzy_check_try_read (session);
  1593. switch (r) {
  1594. case 0:
  1595. ret = return_want_more;
  1596. break;
  1597. case 1:
  1598. ret = return_finished;
  1599. break;
  1600. default:
  1601. ret = return_error;
  1602. break;
  1603. }
  1604. }
  1605. else if (what & EV_WRITE) {
  1606. if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
  1607. ret = return_error;
  1608. }
  1609. else {
  1610. session->state = 1;
  1611. ret = return_want_more;
  1612. }
  1613. }
  1614. else {
  1615. /* Should not happen */
  1616. g_assert (0);
  1617. }
  1618. if (ret == return_want_more) {
  1619. /* Processed write, switch to reading */
  1620. ev_base = event_get_base (&session->ev);
  1621. event_del (&session->ev);
  1622. event_set (&session->ev, fd, EV_READ,
  1623. fuzzy_check_io_callback, session);
  1624. event_base_set (ev_base, &session->ev);
  1625. event_add (&session->ev, NULL);
  1626. }
  1627. else if (ret == return_error) {
  1628. /* Error state */
  1629. msg_err_task ("got error on IO with server %s(%s), on %s, %d, %s",
  1630. rspamd_upstream_name (session->server),
  1631. rspamd_inet_address_to_string (session->addr),
  1632. session->state == 1 ? "read" : "write",
  1633. errno,
  1634. strerror (errno));
  1635. rspamd_upstream_fail (session->server);
  1636. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  1637. }
  1638. else {
  1639. /* Read something from network */
  1640. if (!fuzzy_check_session_is_completed (session)) {
  1641. /* Need to read more */
  1642. ev_base = event_get_base (&session->ev);
  1643. event_del (&session->ev);
  1644. event_set (&session->ev, session->fd, EV_READ,
  1645. fuzzy_check_io_callback, session);
  1646. event_base_set (ev_base, &session->ev);
  1647. event_add (&session->ev, NULL);
  1648. }
  1649. }
  1650. }
  1651. /* Fuzzy check timeout callback */
  1652. static void
  1653. fuzzy_check_timer_callback (gint fd, short what, void *arg)
  1654. {
  1655. struct fuzzy_client_session *session = arg;
  1656. struct rspamd_task *task;
  1657. struct event_base *ev_base;
  1658. task = session->task;
  1659. /* We might be here because of other checks being slow */
  1660. if (fuzzy_check_try_read (session) > 0) {
  1661. if (fuzzy_check_session_is_completed (session)) {
  1662. return;
  1663. }
  1664. }
  1665. if (session->retransmits >= fuzzy_module_ctx->retransmits) {
  1666. msg_err_task ("got IO timeout with server %s(%s), after %d retransmits",
  1667. rspamd_upstream_name (session->server),
  1668. rspamd_inet_address_to_string (session->addr),
  1669. session->retransmits);
  1670. rspamd_upstream_fail (session->server);
  1671. rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session);
  1672. }
  1673. else {
  1674. /* Plan write event */
  1675. ev_base = event_get_base (&session->ev);
  1676. event_del (&session->ev);
  1677. event_set (&session->ev, fd, EV_WRITE|EV_READ,
  1678. fuzzy_check_io_callback, session);
  1679. event_base_set (ev_base, &session->ev);
  1680. event_add (&session->ev, NULL);
  1681. /* Plan new retransmit timer */
  1682. ev_base = event_get_base (&session->timev);
  1683. event_del (&session->timev);
  1684. event_base_set (ev_base, &session->timev);
  1685. event_add (&session->timev, &session->tv);
  1686. session->retransmits ++;
  1687. }
  1688. }
  1689. static void
  1690. fuzzy_lua_fin (void *ud)
  1691. {
  1692. struct fuzzy_learn_session *session = ud;
  1693. (*session->saved)--;
  1694. event_del (&session->ev);
  1695. event_del (&session->timev);
  1696. close (session->fd);
  1697. }
  1698. /* Controller IO */
  1699. static void
  1700. fuzzy_controller_io_callback (gint fd, short what, void *arg)
  1701. {
  1702. struct fuzzy_learn_session *session = arg;
  1703. const struct rspamd_fuzzy_reply *rep;
  1704. struct fuzzy_mapping *map;
  1705. struct rspamd_task *task;
  1706. guchar buf[2048], *p;
  1707. struct fuzzy_cmd_io *io;
  1708. struct rspamd_fuzzy_cmd *cmd = NULL;
  1709. const gchar *symbol, *ftype;
  1710. struct event_base *ev_base;
  1711. gint r;
  1712. enum {
  1713. return_error = 0,
  1714. return_want_more,
  1715. return_finished
  1716. } ret = return_want_more;
  1717. guint i, nreplied;
  1718. task = session->task;
  1719. if (what & EV_READ) {
  1720. if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) {
  1721. if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
  1722. event_add (&session->ev, NULL);
  1723. return;
  1724. }
  1725. msg_info_task ("cannot process fuzzy hash for message <%s>: %s",
  1726. session->task->message_id, strerror (errno));
  1727. if (*(session->err) == NULL) {
  1728. g_set_error (session->err,
  1729. g_quark_from_static_string ("fuzzy check"),
  1730. errno, "read socket error: %s", strerror (errno));
  1731. }
  1732. ret = return_error;
  1733. }
  1734. else {
  1735. p = buf;
  1736. ret = return_want_more;
  1737. while ((rep = fuzzy_process_reply (&p, &r,
  1738. session->commands, session->rule, &cmd, &io)) != NULL) {
  1739. if ((map =
  1740. g_hash_table_lookup (session->rule->mappings,
  1741. GINT_TO_POINTER (rep->flag))) == NULL) {
  1742. /* Default symbol and default weight */
  1743. symbol = session->rule->symbol;
  1744. }
  1745. else {
  1746. /* Get symbol and weight from map */
  1747. symbol = map->symbol;
  1748. }
  1749. ftype = "bin";
  1750. if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
  1751. ftype = "img";
  1752. }
  1753. else if (cmd->shingles_count > 0) {
  1754. ftype = "txt";
  1755. }
  1756. if (rep->prob > 0.5) {
  1757. msg_info_task ("processed fuzzy hash (%s) %*xs, list: %s:%d for "
  1758. "message <%s>",
  1759. ftype,
  1760. (gint)sizeof (cmd->digest), cmd->digest,
  1761. symbol,
  1762. rep->flag,
  1763. session->task->message_id);
  1764. }
  1765. else {
  1766. msg_info_task ("cannot process fuzzy hash (%s) for message "
  1767. "<%s>, %*xs, "
  1768. "list %s:%d, error: %d",
  1769. ftype,
  1770. session->task->message_id,
  1771. (gint)sizeof (cmd->digest), cmd->digest,
  1772. symbol,
  1773. rep->flag,
  1774. rep->value);
  1775. if (*(session->err) == NULL) {
  1776. g_set_error (session->err,
  1777. g_quark_from_static_string ("fuzzy check"),
  1778. rep->value, "process fuzzy error");
  1779. }
  1780. ret = return_finished;
  1781. }
  1782. }
  1783. nreplied = 0;
  1784. for (i = 0; i < session->commands->len; i++) {
  1785. io = g_ptr_array_index (session->commands, i);
  1786. if (io->flags & FUZZY_CMD_FLAG_REPLIED) {
  1787. nreplied++;
  1788. }
  1789. }
  1790. if (nreplied == session->commands->len) {
  1791. ret = return_finished;
  1792. }
  1793. }
  1794. }
  1795. else if (what & EV_WRITE) {
  1796. /* Send commands to storage */
  1797. if (!fuzzy_cmd_vector_to_wire (fd, session->commands)) {
  1798. if (*(session->err) == NULL) {
  1799. g_set_error (session->err,
  1800. g_quark_from_static_string ("fuzzy check"),
  1801. errno, "write socket error: %s", strerror (errno));
  1802. }
  1803. ret = return_error;
  1804. }
  1805. }
  1806. else {
  1807. g_assert (0);
  1808. }
  1809. if (ret == return_want_more) {
  1810. ev_base = event_get_base (&session->ev);
  1811. event_del (&session->ev);
  1812. event_set (&session->ev, fd, EV_READ,
  1813. fuzzy_controller_io_callback, session);
  1814. event_base_set (ev_base, &session->ev);
  1815. event_add (&session->ev, NULL);
  1816. return;
  1817. }
  1818. else if (ret == return_error) {
  1819. msg_err_task ("got error in IO with server %s(%s), %d, %s",
  1820. rspamd_upstream_name (session->server),
  1821. rspamd_inet_address_to_string (session->addr),
  1822. errno, strerror (errno));
  1823. rspamd_upstream_fail (session->server);
  1824. }
  1825. /*
  1826. * XXX: actually, we check merely a single reply, which is not correct...
  1827. * XXX: when we send a command, we do not check if *all* commands have been
  1828. * written
  1829. * XXX: please, please, change this code some day
  1830. */
  1831. if (session->session == NULL) {
  1832. (*session->saved)--;
  1833. if (session->http_entry) {
  1834. rspamd_http_connection_unref (session->http_entry->conn);
  1835. }
  1836. event_del (&session->ev);
  1837. event_del (&session->timev);
  1838. close (session->fd);
  1839. if (*session->saved == 0) {
  1840. goto cleanup;
  1841. }
  1842. }
  1843. else {
  1844. /* Lua handler */
  1845. rspamd_session_remove_event (session->session, fuzzy_lua_fin, session);
  1846. }
  1847. return;
  1848. cleanup:
  1849. /*
  1850. * When we send learn commands to fuzzy storages, this code is executed
  1851. * *once* when we have queried all storages. We also don't know which
  1852. * storage has been failed.
  1853. *
  1854. * Therefore, we cleanup sessions earlier and actually this code is wrong.
  1855. */
  1856. if (*(session->err) != NULL) {
  1857. if (session->http_entry) {
  1858. rspamd_controller_send_error (session->http_entry,
  1859. (*session->err)->code, (*session->err)->message);
  1860. }
  1861. g_error_free (*session->err);
  1862. }
  1863. else {
  1864. rspamd_upstream_ok (session->server);
  1865. if (session->http_entry) {
  1866. ucl_object_t *reply, *hashes;
  1867. guint i;
  1868. gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
  1869. reply = ucl_object_typed_new (UCL_OBJECT);
  1870. ucl_object_insert_key (reply, ucl_object_frombool (true),
  1871. "success", 0, false);
  1872. hashes = ucl_object_typed_new (UCL_ARRAY);
  1873. for (i = 0; i < session->commands->len; i ++) {
  1874. io = g_ptr_array_index (session->commands, i);
  1875. rspamd_snprintf (hexbuf, sizeof (hexbuf), "%*xs",
  1876. (gint)sizeof (io->cmd.digest), io->cmd.digest);
  1877. ucl_array_append (hashes, ucl_object_fromstring (hexbuf));
  1878. }
  1879. ucl_object_insert_key (reply, hashes, "hashes", 0, false);
  1880. rspamd_controller_send_ucl (session->http_entry, reply);
  1881. ucl_object_unref (reply);
  1882. }
  1883. }
  1884. if (session->task != NULL) {
  1885. if (session->http_entry) {
  1886. rspamd_task_free (session->task);
  1887. }
  1888. session->task = NULL;
  1889. }
  1890. }
  1891. static void
  1892. fuzzy_controller_timer_callback (gint fd, short what, void *arg)
  1893. {
  1894. struct fuzzy_learn_session *session = arg;
  1895. struct rspamd_task *task;
  1896. struct event_base *ev_base;
  1897. task = session->task;
  1898. if (session->retransmits >= fuzzy_module_ctx->retransmits) {
  1899. rspamd_upstream_fail (session->server);
  1900. msg_err_task_check ("got IO timeout with server %s(%s), "
  1901. "after %d retransmits",
  1902. rspamd_upstream_name (session->server),
  1903. rspamd_inet_address_to_string (session->addr),
  1904. session->retransmits);
  1905. if (session->session) {
  1906. rspamd_session_remove_event (session->session, fuzzy_lua_fin,
  1907. session);
  1908. }
  1909. else {
  1910. if (session->http_entry) {
  1911. rspamd_controller_send_error (session->http_entry,
  1912. 500, "IO timeout with fuzzy storage");
  1913. }
  1914. if (*session->saved > 0 ) {
  1915. (*session->saved)--;
  1916. if (*session->saved == 0) {
  1917. if (session->http_entry) {
  1918. rspamd_task_free (session->task);
  1919. }
  1920. session->task = NULL;
  1921. }
  1922. }
  1923. if (session->http_entry) {
  1924. rspamd_http_connection_unref (session->http_entry->conn);
  1925. }
  1926. event_del (&session->ev);
  1927. event_del (&session->timev);
  1928. close (session->fd);
  1929. }
  1930. }
  1931. else {
  1932. /* Plan write event */
  1933. ev_base = event_get_base (&session->ev);
  1934. event_del (&session->ev);
  1935. event_set (&session->ev, fd, EV_WRITE|EV_READ,
  1936. fuzzy_controller_io_callback, session);
  1937. event_base_set (ev_base, &session->ev);
  1938. event_add (&session->ev, NULL);
  1939. /* Plan new retransmit timer */
  1940. ev_base = event_get_base (&session->timev);
  1941. event_del (&session->timev);
  1942. event_base_set (ev_base, &session->timev);
  1943. event_add (&session->timev, &session->tv);
  1944. session->retransmits ++;
  1945. }
  1946. }
  1947. static GPtrArray *
  1948. fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
  1949. gint c, gint flag, guint32 value)
  1950. {
  1951. struct rspamd_mime_text_part *part;
  1952. struct rspamd_mime_part *mime_part;
  1953. struct rspamd_image *image;
  1954. struct fuzzy_cmd_io *io;
  1955. guint i;
  1956. GPtrArray *res;
  1957. res = g_ptr_array_sized_new (task->parts->len + 1);
  1958. if (c == FUZZY_STAT) {
  1959. io = fuzzy_cmd_stat (rule, c, flag, value, task->task_pool);
  1960. if (io) {
  1961. g_ptr_array_add (res, io);
  1962. }
  1963. goto end;
  1964. }
  1965. for (i = 0; i < task->text_parts->len; i ++) {
  1966. part = g_ptr_array_index (task->text_parts, i);
  1967. if (IS_PART_EMPTY (part)) {
  1968. continue;
  1969. }
  1970. /* Check length of part */
  1971. if (fuzzy_module_ctx->min_bytes > part->content->len) {
  1972. msg_info_task ("<%s>, part is shorter than %d bytes (%d bytes), "
  1973. "skip fuzzy check",
  1974. task->message_id, fuzzy_module_ctx->min_bytes,
  1975. part->content->len);
  1976. continue;
  1977. }
  1978. if (part->normalized_words == NULL || part->normalized_words->len == 0) {
  1979. msg_info_task ("<%s>, part hash empty, skip fuzzy check",
  1980. task->message_id);
  1981. continue;
  1982. }
  1983. if (fuzzy_module_ctx->min_hash_len != 0 &&
  1984. part->normalized_words->len < fuzzy_module_ctx->min_hash_len) {
  1985. msg_info_task (
  1986. "<%s>, part hash is shorter than %d symbols, skip fuzzy check",
  1987. task->message_id,
  1988. fuzzy_module_ctx->min_hash_len);
  1989. continue;
  1990. }
  1991. io = fuzzy_cmd_from_text_part (rule, c, flag, value, task->task_pool,
  1992. part);
  1993. if (io) {
  1994. g_ptr_array_add (res, io);
  1995. }
  1996. }
  1997. /* Process other parts and images */
  1998. for (i = 0; i < task->parts->len; i ++) {
  1999. mime_part = g_ptr_array_index (task->parts, i);
  2000. if (mime_part->flags & RSPAMD_MIME_PART_IMAGE) {
  2001. image = mime_part->specific.img;
  2002. if (image->data->len > 0) {
  2003. if (fuzzy_module_ctx->min_height <= 0 || image->height >=
  2004. fuzzy_module_ctx->min_height) {
  2005. if (fuzzy_module_ctx->min_width <= 0 || image->width >=
  2006. fuzzy_module_ctx->min_width) {
  2007. io = fuzzy_cmd_from_data_part (rule, c, flag, value,
  2008. task->task_pool,
  2009. image->parent->digest);
  2010. if (io) {
  2011. g_ptr_array_add (res, io);
  2012. }
  2013. if (image->is_normalized) {
  2014. io = fuzzy_cmd_from_image_part (rule, c, flag, value,
  2015. task->task_pool,
  2016. image);
  2017. if (io) {
  2018. g_ptr_array_add (res, io);
  2019. }
  2020. }
  2021. }
  2022. }
  2023. }
  2024. }
  2025. if (mime_part->parsed_data.len > 0 &&
  2026. fuzzy_check_content_type (rule, mime_part->ct)) {
  2027. if (fuzzy_module_ctx->min_bytes <= 0 || mime_part->parsed_data.len >=
  2028. fuzzy_module_ctx->min_bytes) {
  2029. io = fuzzy_cmd_from_data_part (rule, c, flag, value,
  2030. task->task_pool,
  2031. mime_part->digest);
  2032. if (io) {
  2033. g_ptr_array_add (res, io);
  2034. }
  2035. }
  2036. }
  2037. }
  2038. /* Process metadata */
  2039. #if 0
  2040. io = fuzzy_cmd_from_task_meta (rule, c, flag, value,
  2041. task->task_pool, task);
  2042. if (io) {
  2043. g_ptr_array_add (res, io);
  2044. }
  2045. #endif
  2046. end:
  2047. if (res->len == 0) {
  2048. g_ptr_array_free (res, FALSE);
  2049. return NULL;
  2050. }
  2051. return res;
  2052. }
  2053. static inline void
  2054. register_fuzzy_client_call (struct rspamd_task *task,
  2055. struct fuzzy_rule *rule,
  2056. GPtrArray *commands)
  2057. {
  2058. struct fuzzy_client_session *session;
  2059. struct upstream *selected;
  2060. rspamd_inet_addr_t *addr;
  2061. gint sock;
  2062. /* Get upstream */
  2063. selected = rspamd_upstream_get (rule->servers, RSPAMD_UPSTREAM_ROUND_ROBIN,
  2064. NULL, 0);
  2065. if (selected) {
  2066. addr = rspamd_upstream_addr (selected);
  2067. if ((sock = rspamd_inet_address_connect (addr, SOCK_DGRAM, TRUE)) == -1) {
  2068. msg_warn_task ("cannot connect to %s(%s), %d, %s",
  2069. rspamd_upstream_name (selected),
  2070. rspamd_inet_address_to_string (addr),
  2071. errno,
  2072. strerror (errno));
  2073. rspamd_upstream_fail (selected);
  2074. g_ptr_array_free (commands, TRUE);
  2075. }
  2076. else {
  2077. /* Create session for a socket */
  2078. session =
  2079. rspamd_mempool_alloc0 (task->task_pool,
  2080. sizeof (struct fuzzy_client_session));
  2081. msec_to_tv (fuzzy_module_ctx->io_timeout, &session->tv);
  2082. session->state = 0;
  2083. session->commands = commands;
  2084. session->task = task;
  2085. session->fd = sock;
  2086. session->server = selected;
  2087. session->rule = rule;
  2088. session->addr = addr;
  2089. event_set (&session->ev, sock, EV_WRITE, fuzzy_check_io_callback,
  2090. session);
  2091. event_base_set (session->task->ev_base, &session->ev);
  2092. event_add (&session->ev, NULL);
  2093. evtimer_set (&session->timev, fuzzy_check_timer_callback,
  2094. session);
  2095. event_base_set (session->task->ev_base, &session->timev);
  2096. event_add (&session->timev, &session->tv);
  2097. rspamd_session_add_event (task->s,
  2098. fuzzy_io_fin,
  2099. session,
  2100. g_quark_from_static_string ("fuzzy check"));
  2101. }
  2102. }
  2103. }
  2104. /* This callback is called when we check message in fuzzy hashes storage */
  2105. static void
  2106. fuzzy_symbol_callback (struct rspamd_task *task, void *unused)
  2107. {
  2108. struct fuzzy_rule *rule;
  2109. guint i;
  2110. GPtrArray *commands;
  2111. if (!fuzzy_module_ctx->enabled) {
  2112. return;
  2113. }
  2114. /* Check whitelist */
  2115. if (fuzzy_module_ctx->whitelist) {
  2116. if (radix_find_compressed_addr (fuzzy_module_ctx->whitelist,
  2117. task->from_addr) != RADIX_NO_VALUE) {
  2118. msg_info_task ("<%s>, address %s is whitelisted, skip fuzzy check",
  2119. task->message_id,
  2120. rspamd_inet_address_to_string (task->from_addr));
  2121. return;
  2122. }
  2123. }
  2124. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2125. commands = fuzzy_generate_commands (task, rule, FUZZY_CHECK, 0, 0);
  2126. if (commands != NULL) {
  2127. register_fuzzy_client_call (task, rule, commands);
  2128. }
  2129. }
  2130. }
  2131. void
  2132. fuzzy_stat_command (struct rspamd_task *task)
  2133. {
  2134. struct fuzzy_rule *rule;
  2135. guint i;
  2136. GPtrArray *commands;
  2137. if (!fuzzy_module_ctx->enabled) {
  2138. return;
  2139. }
  2140. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2141. commands = fuzzy_generate_commands (task, rule, FUZZY_STAT, 0, 0);
  2142. if (commands != NULL) {
  2143. register_fuzzy_client_call (task, rule, commands);
  2144. }
  2145. }
  2146. }
  2147. static inline gint
  2148. register_fuzzy_controller_call (struct rspamd_http_connection_entry *entry,
  2149. struct fuzzy_rule *rule,
  2150. struct rspamd_task *task,
  2151. GPtrArray *commands,
  2152. gint *saved,
  2153. GError **err)
  2154. {
  2155. struct fuzzy_learn_session *s;
  2156. struct upstream *selected;
  2157. rspamd_inet_addr_t *addr;
  2158. struct rspamd_controller_session *session = entry->ud;
  2159. gint sock;
  2160. gint ret = -1;
  2161. /* Get upstream */
  2162. while ((selected = rspamd_upstream_get (rule->servers,
  2163. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  2164. /* Create UDP socket */
  2165. addr = rspamd_upstream_addr (selected);
  2166. if ((sock = rspamd_inet_address_connect (addr,
  2167. SOCK_DGRAM, TRUE)) == -1) {
  2168. rspamd_upstream_fail (selected);
  2169. }
  2170. else {
  2171. s =
  2172. rspamd_mempool_alloc0 (session->pool,
  2173. sizeof (struct fuzzy_learn_session));
  2174. msec_to_tv (fuzzy_module_ctx->io_timeout, &s->tv);
  2175. s->task = task;
  2176. s->addr = addr;
  2177. s->commands = commands;
  2178. s->http_entry = entry;
  2179. s->server = selected;
  2180. s->saved = saved;
  2181. s->fd = sock;
  2182. s->err = err;
  2183. s->rule = rule;
  2184. /* We ref connection to avoid freeing before we process fuzzy rule */
  2185. rspamd_http_connection_ref (entry->conn);
  2186. event_set (&s->ev, sock, EV_WRITE, fuzzy_controller_io_callback, s);
  2187. event_base_set (entry->rt->ev_base, &s->ev);
  2188. event_add (&s->ev, NULL);
  2189. evtimer_set (&s->timev, fuzzy_controller_timer_callback,
  2190. s);
  2191. event_base_set (s->task->ev_base, &s->timev);
  2192. event_add (&s->timev, &s->tv);
  2193. (*saved)++;
  2194. ret = 1;
  2195. }
  2196. }
  2197. return ret;
  2198. }
  2199. static void
  2200. fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
  2201. struct rspamd_http_message *msg, gint cmd, gint value, gint flag,
  2202. struct fuzzy_ctx *ctx, gboolean is_hash)
  2203. {
  2204. struct fuzzy_rule *rule;
  2205. struct rspamd_controller_session *session = conn_ent->ud;
  2206. struct rspamd_task *task, **ptask;
  2207. gboolean processed = FALSE, res = TRUE, skip;
  2208. guint i;
  2209. GError **err;
  2210. GPtrArray *commands;
  2211. GString *tb;
  2212. lua_State *L;
  2213. gint r, *saved, rules = 0, err_idx;
  2214. /* Prepare task */
  2215. task = rspamd_task_new (session->wrk, session->cfg);
  2216. task->cfg = ctx->cfg;
  2217. task->ev_base = conn_ent->rt->ev_base;
  2218. saved = rspamd_mempool_alloc0 (session->pool, sizeof (gint));
  2219. err = rspamd_mempool_alloc0 (session->pool, sizeof (GError *));
  2220. if (!is_hash) {
  2221. /* Allocate message from string */
  2222. /* XXX: what about encrypted messsages ? */
  2223. task->msg.begin = msg->body_buf.begin;
  2224. task->msg.len = msg->body_buf.len;
  2225. r = rspamd_message_parse (task);
  2226. if (r == -1) {
  2227. msg_warn_task ("<%s>: cannot process message for fuzzy",
  2228. task->message_id);
  2229. rspamd_task_free (task);
  2230. rspamd_controller_send_error (conn_ent, 400,
  2231. "Message processing error");
  2232. return;
  2233. }
  2234. }
  2235. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2236. if (rule->read_only) {
  2237. continue;
  2238. }
  2239. /* Check for flag */
  2240. if (g_hash_table_lookup (rule->mappings,
  2241. GINT_TO_POINTER (flag)) == NULL) {
  2242. msg_info_task ("skip rule %s as it has no flag %d defined"
  2243. " false", rule->name, flag);
  2244. continue;
  2245. }
  2246. /* Check learn condition */
  2247. if (rule->learn_condition_cb != -1) {
  2248. skip = FALSE;
  2249. L = session->cfg->lua_state;
  2250. lua_pushcfunction (L, &rspamd_lua_traceback);
  2251. err_idx = lua_gettop (L);
  2252. lua_rawgeti (L, LUA_REGISTRYINDEX, rule->learn_condition_cb);
  2253. ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
  2254. *ptask = task;
  2255. rspamd_lua_setclass (L, "rspamd{task}", -1);
  2256. if (lua_pcall (L, 1, LUA_MULTRET, err_idx) != 0) {
  2257. tb = lua_touserdata (L, -1);
  2258. msg_err_task ("call to user extraction script failed: %v", tb);
  2259. g_string_free (tb, TRUE);
  2260. }
  2261. else {
  2262. if (lua_gettop (L) > 1) {
  2263. skip = !(lua_toboolean (L, -2));
  2264. if (lua_isnumber (L, -1)) {
  2265. msg_info_task ("learn condition changed flag from %d to "
  2266. "%d", flag, (guint)lua_tonumber (L, -1));
  2267. flag = lua_tonumber (L, -1);
  2268. }
  2269. }
  2270. else {
  2271. skip = !(lua_toboolean (L, -1));
  2272. }
  2273. }
  2274. /* Result + error function */
  2275. lua_settop (L, 0);
  2276. if (skip) {
  2277. msg_info_task ("skip rule %s as its condition callback returned"
  2278. " false", rule->name);
  2279. continue;
  2280. }
  2281. }
  2282. rules ++;
  2283. res = 0;
  2284. if (is_hash) {
  2285. GPtrArray *args;
  2286. const rspamd_ftok_t *arg;
  2287. guint i;
  2288. args = rspamd_http_message_find_header_multiple (msg, "Hash");
  2289. if (args) {
  2290. struct fuzzy_cmd_io *io;
  2291. commands = g_ptr_array_sized_new (args->len);
  2292. for (i = 0; i < args->len; i ++) {
  2293. arg = g_ptr_array_index (args, i);
  2294. io = fuzzy_cmd_hash (rule, cmd, arg, flag, value,
  2295. task->task_pool);
  2296. if (io) {
  2297. g_ptr_array_add (commands, io);
  2298. }
  2299. }
  2300. res = register_fuzzy_controller_call (conn_ent,
  2301. rule,
  2302. task,
  2303. commands,
  2304. saved,
  2305. err);
  2306. rspamd_mempool_add_destructor (task->task_pool,
  2307. rspamd_ptr_array_free_hard, commands);
  2308. g_ptr_array_free (args, TRUE);
  2309. }
  2310. else {
  2311. rspamd_controller_send_error (conn_ent, 400,
  2312. "No hash defined");
  2313. rspamd_task_free (task);
  2314. return;
  2315. }
  2316. }
  2317. else {
  2318. commands = fuzzy_generate_commands (task, rule, cmd, flag, value);
  2319. if (commands != NULL) {
  2320. res = register_fuzzy_controller_call (conn_ent,
  2321. rule,
  2322. task,
  2323. commands,
  2324. saved,
  2325. err);
  2326. rspamd_mempool_add_destructor (task->task_pool,
  2327. rspamd_ptr_array_free_hard, commands);
  2328. }
  2329. }
  2330. if (res) {
  2331. processed = TRUE;
  2332. }
  2333. }
  2334. if (res == -1) {
  2335. msg_warn_task ("<%s>: cannot send fuzzy request: %s", task->message_id,
  2336. strerror (errno));
  2337. rspamd_controller_send_error (conn_ent, 400, "Message sending error");
  2338. rspamd_task_free (task);
  2339. return;
  2340. }
  2341. else if (!processed) {
  2342. if (rules) {
  2343. msg_warn_task ("<%s>: no content to generate fuzzy",
  2344. task->message_id);
  2345. rspamd_controller_send_error (conn_ent, 404,
  2346. "No content to generate fuzzy for flag %d", flag);
  2347. }
  2348. else {
  2349. msg_warn_task ("<%s>: no fuzzy rules found for flag %d",
  2350. task->message_id,
  2351. flag);
  2352. rspamd_controller_send_error (conn_ent, 404,
  2353. "No fuzzy rules matched for flag %d", flag);
  2354. }
  2355. rspamd_task_free (task);
  2356. return;
  2357. }
  2358. return;
  2359. }
  2360. static int
  2361. fuzzy_controller_handler (struct rspamd_http_connection_entry *conn_ent,
  2362. struct rspamd_http_message *msg, struct module_ctx *ctx, gint cmd,
  2363. gboolean is_hash)
  2364. {
  2365. const rspamd_ftok_t *arg;
  2366. glong value = 1, flag = 0;
  2367. if (!fuzzy_module_ctx->enabled) {
  2368. msg_err ("fuzzy_check module is not enabled");
  2369. rspamd_controller_send_error (conn_ent, 500, "Module disabled");
  2370. return 0;
  2371. }
  2372. if (fuzzy_module_ctx->fuzzy_rules == NULL) {
  2373. msg_err ("fuzzy_check module has no rules defined");
  2374. rspamd_controller_send_error (conn_ent, 500, "Module has no rules");
  2375. return 0;
  2376. }
  2377. /* Get size */
  2378. arg = rspamd_http_message_find_header (msg, "Weight");
  2379. if (arg) {
  2380. errno = 0;
  2381. if (!rspamd_strtol (arg->begin, arg->len, &value)) {
  2382. msg_info ("error converting numeric argument %T", arg);
  2383. }
  2384. }
  2385. arg = rspamd_http_message_find_header (msg, "Flag");
  2386. if (arg) {
  2387. errno = 0;
  2388. if (!rspamd_strtol (arg->begin, arg->len, &flag)) {
  2389. msg_info ("error converting numeric argument %T", arg);
  2390. flag = 0;
  2391. }
  2392. }
  2393. else {
  2394. flag = 0;
  2395. arg = rspamd_http_message_find_header (msg, "Symbol");
  2396. /* Search flag by symbol */
  2397. if (arg) {
  2398. struct fuzzy_rule *rule;
  2399. guint i;
  2400. GHashTableIter it;
  2401. gpointer k, v;
  2402. struct fuzzy_mapping *map;
  2403. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2404. if (flag != 0) {
  2405. break;
  2406. }
  2407. g_hash_table_iter_init (&it, rule->mappings);
  2408. while (g_hash_table_iter_next (&it, &k, &v)) {
  2409. map = v;
  2410. if (strlen (map->symbol) == arg->len &&
  2411. rspamd_lc_cmp (map->symbol, arg->begin, arg->len) == 0) {
  2412. flag = map->fuzzy_flag;
  2413. break;
  2414. }
  2415. }
  2416. }
  2417. }
  2418. }
  2419. if (flag == 0) {
  2420. msg_err ("no flag defined to learn fuzzy");
  2421. rspamd_controller_send_error (conn_ent, 404, "Unknown or missing flag");
  2422. return 0;
  2423. }
  2424. fuzzy_process_handler (conn_ent, msg, cmd, value, flag,
  2425. (struct fuzzy_ctx *)ctx, is_hash);
  2426. return 0;
  2427. }
  2428. static inline gint
  2429. fuzzy_check_send_lua_learn (struct fuzzy_rule *rule,
  2430. struct rspamd_task *task,
  2431. GPtrArray *commands,
  2432. gint *saved,
  2433. GError **err)
  2434. {
  2435. struct fuzzy_learn_session *s;
  2436. struct upstream *selected;
  2437. rspamd_inet_addr_t *addr;
  2438. gint sock;
  2439. gint ret = -1;
  2440. /* Get upstream */
  2441. while ((selected = rspamd_upstream_get (rule->servers,
  2442. RSPAMD_UPSTREAM_SEQUENTIAL, NULL, 0))) {
  2443. /* Create UDP socket */
  2444. addr = rspamd_upstream_addr (selected);
  2445. if ((sock = rspamd_inet_address_connect (addr,
  2446. SOCK_DGRAM, TRUE)) == -1) {
  2447. rspamd_upstream_fail (selected);
  2448. }
  2449. else {
  2450. s =
  2451. rspamd_mempool_alloc0 (task->task_pool,
  2452. sizeof (struct fuzzy_learn_session));
  2453. msec_to_tv (fuzzy_module_ctx->io_timeout, &s->tv);
  2454. s->task = task;
  2455. s->addr = addr;
  2456. s->commands = commands;
  2457. s->http_entry = NULL;
  2458. s->server = selected;
  2459. s->saved = saved;
  2460. s->fd = sock;
  2461. s->err = err;
  2462. s->rule = rule;
  2463. s->session = task->s;
  2464. event_set (&s->ev, sock, EV_WRITE, fuzzy_controller_io_callback, s);
  2465. event_base_set (task->ev_base, &s->ev);
  2466. event_add (&s->ev, NULL);
  2467. evtimer_set (&s->timev, fuzzy_controller_timer_callback, s);
  2468. event_base_set (s->task->ev_base, &s->timev);
  2469. event_add (&s->timev, &s->tv);
  2470. rspamd_session_add_event (task->s,
  2471. fuzzy_lua_fin,
  2472. s,
  2473. g_quark_from_static_string ("fuzzy check"));
  2474. (*saved)++;
  2475. ret = 1;
  2476. }
  2477. }
  2478. return ret;
  2479. }
  2480. static gboolean
  2481. fuzzy_check_lua_process_learn (struct rspamd_task *task,
  2482. gint cmd, gint value, gint flag)
  2483. {
  2484. struct fuzzy_rule *rule;
  2485. gboolean processed = FALSE, res = TRUE;
  2486. guint i;
  2487. GError **err;
  2488. GPtrArray *commands;
  2489. gint *saved, rules = 0;
  2490. saved = rspamd_mempool_alloc0 (task->task_pool, sizeof (gint));
  2491. err = rspamd_mempool_alloc0 (task->task_pool, sizeof (GError *));
  2492. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2493. if (!res) {
  2494. break;
  2495. }
  2496. if (rule->read_only) {
  2497. continue;
  2498. }
  2499. /* Check for flag */
  2500. if (g_hash_table_lookup (rule->mappings,
  2501. GINT_TO_POINTER (flag)) == NULL) {
  2502. msg_info_task ("skip rule %s as it has no flag %d defined"
  2503. " false", rule->name, flag);
  2504. continue;
  2505. }
  2506. rules ++;
  2507. res = 0;
  2508. commands = fuzzy_generate_commands (task, rule, cmd, flag, value);
  2509. if (commands != NULL) {
  2510. res = fuzzy_check_send_lua_learn (rule, task, commands,
  2511. saved, err);
  2512. rspamd_mempool_add_destructor (task->task_pool,
  2513. rspamd_ptr_array_free_hard, commands);
  2514. }
  2515. if (res) {
  2516. processed = TRUE;
  2517. }
  2518. }
  2519. if (res == -1) {
  2520. msg_warn_task ("<%s>: cannot send fuzzy request: %s", task->message_id,
  2521. strerror (errno));
  2522. }
  2523. else if (!processed) {
  2524. if (rules) {
  2525. msg_warn_task ("<%s>: no content to generate fuzzy",
  2526. task->message_id);
  2527. return FALSE;
  2528. }
  2529. else {
  2530. msg_warn_task ("<%s>: no fuzzy rules found for flag %d",
  2531. task->message_id,
  2532. flag);
  2533. return FALSE;
  2534. }
  2535. }
  2536. return TRUE;
  2537. }
  2538. static gint
  2539. fuzzy_lua_learn_handler (lua_State *L)
  2540. {
  2541. struct rspamd_task *task = lua_check_task (L, 1);
  2542. guint flag = 0, weight = 1.0;
  2543. const gchar *symbol;
  2544. if (task) {
  2545. if (lua_type (L, 2) == LUA_TNUMBER) {
  2546. flag = lua_tonumber (L, 2);
  2547. }
  2548. else if (lua_type (L, 2) == LUA_TSTRING) {
  2549. struct fuzzy_rule *rule;
  2550. guint i;
  2551. GHashTableIter it;
  2552. gpointer k, v;
  2553. struct fuzzy_mapping *map;
  2554. symbol = lua_tostring (L, 2);
  2555. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2556. if (flag != 0) {
  2557. break;
  2558. }
  2559. g_hash_table_iter_init (&it, rule->mappings);
  2560. while (g_hash_table_iter_next (&it, &k, &v)) {
  2561. map = v;
  2562. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  2563. flag = map->fuzzy_flag;
  2564. break;
  2565. }
  2566. }
  2567. }
  2568. }
  2569. if (flag == 0) {
  2570. return luaL_error (L, "bad flag");
  2571. }
  2572. if (lua_type (L, 3) == LUA_TNUMBER) {
  2573. weight = lua_tonumber (L, 3);
  2574. }
  2575. lua_pushboolean (L,
  2576. fuzzy_check_lua_process_learn (task, FUZZY_WRITE, weight, flag));
  2577. }
  2578. else {
  2579. return luaL_error (L, "invalid arguments");
  2580. }
  2581. return 1;
  2582. }
  2583. static gint
  2584. fuzzy_lua_unlearn_handler (lua_State *L)
  2585. {
  2586. struct rspamd_task *task = lua_check_task (L, 1);
  2587. guint flag = 0, weight = 1.0;
  2588. const gchar *symbol;
  2589. if (task) {
  2590. if (lua_type (L, 2) == LUA_TNUMBER) {
  2591. flag = lua_tonumber (L, 1);
  2592. }
  2593. else if (lua_type (L, 2) == LUA_TSTRING) {
  2594. struct fuzzy_rule *rule;
  2595. guint i;
  2596. GHashTableIter it;
  2597. gpointer k, v;
  2598. struct fuzzy_mapping *map;
  2599. symbol = lua_tostring (L, 2);
  2600. PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
  2601. if (flag != 0) {
  2602. break;
  2603. }
  2604. g_hash_table_iter_init (&it, rule->mappings);
  2605. while (g_hash_table_iter_next (&it, &k, &v)) {
  2606. map = v;
  2607. if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
  2608. flag = map->fuzzy_flag;
  2609. break;
  2610. }
  2611. }
  2612. }
  2613. }
  2614. if (flag == 0) {
  2615. return luaL_error (L, "bad flag");
  2616. }
  2617. if (lua_type (L, 3) == LUA_TNUMBER) {
  2618. weight = lua_tonumber (L, 3);
  2619. }
  2620. lua_pushboolean (L,
  2621. fuzzy_check_lua_process_learn (task, FUZZY_DEL, weight, flag));
  2622. }
  2623. else {
  2624. return luaL_error (L, "invalid arguments");
  2625. }
  2626. return 1;
  2627. }
  2628. static gboolean
  2629. fuzzy_add_handler (struct rspamd_http_connection_entry *conn_ent,
  2630. struct rspamd_http_message *msg, struct module_ctx *ctx)
  2631. {
  2632. return fuzzy_controller_handler (conn_ent, msg,
  2633. ctx, FUZZY_WRITE, FALSE);
  2634. }
  2635. static gboolean
  2636. fuzzy_delete_handler (struct rspamd_http_connection_entry *conn_ent,
  2637. struct rspamd_http_message *msg, struct module_ctx *ctx)
  2638. {
  2639. return fuzzy_controller_handler (conn_ent, msg,
  2640. ctx, FUZZY_DEL, FALSE);
  2641. }
  2642. static gboolean
  2643. fuzzy_deletehash_handler (struct rspamd_http_connection_entry *conn_ent,
  2644. struct rspamd_http_message *msg, struct module_ctx *ctx)
  2645. {
  2646. return fuzzy_controller_handler (conn_ent, msg,
  2647. ctx, FUZZY_DEL, TRUE);
  2648. }
  2649. static int
  2650. fuzzy_attach_controller (struct module_ctx *ctx, GHashTable *commands)
  2651. {
  2652. struct fuzzy_ctx *fctx = (struct fuzzy_ctx *)ctx;
  2653. struct rspamd_custom_controller_command *cmd;
  2654. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  2655. cmd->privilleged = TRUE;
  2656. cmd->require_message = TRUE;
  2657. cmd->handler = fuzzy_add_handler;
  2658. cmd->ctx = ctx;
  2659. g_hash_table_insert (commands, "/fuzzyadd", cmd);
  2660. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  2661. cmd->privilleged = TRUE;
  2662. cmd->require_message = TRUE;
  2663. cmd->handler = fuzzy_delete_handler;
  2664. cmd->ctx = ctx;
  2665. g_hash_table_insert (commands, "/fuzzydel", cmd);
  2666. cmd = rspamd_mempool_alloc (fctx->fuzzy_pool, sizeof (*cmd));
  2667. cmd->privilleged = TRUE;
  2668. cmd->require_message = FALSE;
  2669. cmd->handler = fuzzy_deletehash_handler;
  2670. cmd->ctx = ctx;
  2671. g_hash_table_insert (commands, "/fuzzydelhash", cmd);
  2672. return 0;
  2673. }