Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

map_helpers.c 44KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996
  1. /*-
  2. * Copyright 2018 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "map_helpers.h"
  17. #include "map_private.h"
  18. #include "khash.h"
  19. #include "radix.h"
  20. #include "rspamd.h"
  21. #include "cryptobox.h"
  22. #include "mempool_vars_internal.h"
  23. #include "contrib/fastutf8/fastutf8.h"
  24. #include "contrib/cdb/cdb.h"
  25. #ifdef WITH_HYPERSCAN
  26. #include "hs.h"
  27. #endif
  28. #ifndef WITH_PCRE2
  29. #include <pcre.h>
  30. #else
  31. #include <pcre2.h>
  32. #endif
  33. static const guint64 map_hash_seed = 0xdeadbabeULL;
  34. static const gchar * const hash_fill = "1";
  35. struct rspamd_map_helper_value {
  36. gsize hits;
  37. gconstpointer key;
  38. gchar value[]; /* Null terminated */
  39. };
  40. #define rspamd_map_ftok_hash(t) (rspamd_icase_hash((t).begin, (t).len, rspamd_hash_seed ()))
  41. #define rspamd_map_ftok_equal(a, b) ((a).len == (b).len && rspamd_lc_cmp((a).begin, (b).begin, (a).len) == 0)
  42. KHASH_INIT (rspamd_map_hash, rspamd_ftok_t,
  43. struct rspamd_map_helper_value *, true,
  44. rspamd_map_ftok_hash, rspamd_map_ftok_equal);
  45. struct rspamd_radix_map_helper {
  46. rspamd_mempool_t *pool;
  47. khash_t(rspamd_map_hash) *htb;
  48. radix_compressed_t *trie;
  49. struct rspamd_map *map;
  50. rspamd_cryptobox_fast_hash_state_t hst;
  51. };
  52. struct rspamd_hash_map_helper {
  53. rspamd_mempool_t *pool;
  54. khash_t(rspamd_map_hash) *htb;
  55. struct rspamd_map *map;
  56. rspamd_cryptobox_fast_hash_state_t hst;
  57. };
  58. struct rspamd_cdb_map_helper {
  59. GQueue cdbs;
  60. struct rspamd_map *map;
  61. rspamd_cryptobox_fast_hash_state_t hst;
  62. gsize total_size;
  63. };
  64. struct rspamd_regexp_map_helper {
  65. rspamd_cryptobox_hash_state_t hst;
  66. guchar re_digest[rspamd_cryptobox_HASHBYTES];
  67. rspamd_mempool_t *pool;
  68. struct rspamd_map *map;
  69. GPtrArray *regexps;
  70. GPtrArray *values;
  71. khash_t(rspamd_map_hash) *htb;
  72. enum rspamd_regexp_map_flags map_flags;
  73. #ifdef WITH_HYPERSCAN
  74. hs_database_t *hs_db;
  75. hs_scratch_t *hs_scratch;
  76. gchar **patterns;
  77. gint *flags;
  78. gint *ids;
  79. #endif
  80. };
  81. /**
  82. * FSM for parsing lists
  83. */
  84. #define MAP_STORE_KEY do { \
  85. while (g_ascii_isspace (*c) && p > c) { c ++; } \
  86. key = g_malloc (p - c + 1); \
  87. rspamd_strlcpy (key, c, p - c + 1); \
  88. stripped_key = g_strstrip (key); \
  89. } while (0)
  90. #define MAP_STORE_VALUE do { \
  91. while (g_ascii_isspace (*c) && p > c) { c ++; } \
  92. value = g_malloc (p - c + 1); \
  93. rspamd_strlcpy (value, c, p - c + 1); \
  94. stripped_value = g_strstrip (value); \
  95. } while (0)
  96. gchar *
  97. rspamd_parse_kv_list (
  98. gchar * chunk,
  99. gint len,
  100. struct map_cb_data *data,
  101. rspamd_map_insert_func func,
  102. const gchar *default_value,
  103. gboolean final)
  104. {
  105. enum {
  106. map_skip_spaces_before_key = 0,
  107. map_read_key,
  108. map_read_key_quoted,
  109. map_read_key_slashed,
  110. map_skip_spaces_after_key,
  111. map_backslash_quoted,
  112. map_backslash_slashed,
  113. map_read_key_after_slash,
  114. map_read_value,
  115. map_read_comment_start,
  116. map_skip_comment,
  117. map_read_eol,
  118. };
  119. gchar *c, *p, *key = NULL, *value = NULL, *stripped_key, *stripped_value, *end;
  120. struct rspamd_map *map = data->map;
  121. guint line_number = 0;
  122. p = chunk;
  123. c = p;
  124. end = p + len;
  125. while (p < end) {
  126. switch (data->state) {
  127. case map_skip_spaces_before_key:
  128. if (g_ascii_isspace (*p)) {
  129. p ++;
  130. }
  131. else {
  132. if (*p == '"') {
  133. p++;
  134. c = p;
  135. data->state = map_read_key_quoted;
  136. }
  137. else if (*p == '/') {
  138. /* Note that c is on '/' here as '/' is a part of key */
  139. c = p;
  140. p++;
  141. data->state = map_read_key_slashed;
  142. }
  143. else {
  144. c = p;
  145. data->state = map_read_key;
  146. }
  147. }
  148. break;
  149. case map_read_key:
  150. /* read key */
  151. /* Check here comments, eol and end of buffer */
  152. if (*p == '#' && (p == c || *(p - 1) != '\\')) {
  153. if (p - c > 0) {
  154. /* Store a single key */
  155. MAP_STORE_KEY;
  156. func (data->cur_data, stripped_key, default_value);
  157. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  158. stripped_key, default_value, line_number);
  159. g_free (key);
  160. }
  161. key = NULL;
  162. data->state = map_read_comment_start;
  163. }
  164. else if (*p == '\r' || *p == '\n') {
  165. if (p - c > 0) {
  166. /* Store a single key */
  167. MAP_STORE_KEY;
  168. func (data->cur_data, stripped_key, default_value);
  169. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  170. stripped_key, default_value, line_number);
  171. g_free (key);
  172. }
  173. data->state = map_read_eol;
  174. key = NULL;
  175. }
  176. else if (g_ascii_isspace (*p)) {
  177. if (p - c > 0) {
  178. MAP_STORE_KEY;
  179. data->state = map_skip_spaces_after_key;
  180. }
  181. else {
  182. msg_err_map ("empty or invalid key found on line %d", line_number);
  183. data->state = map_skip_comment;
  184. }
  185. }
  186. else {
  187. p++;
  188. }
  189. break;
  190. case map_read_key_quoted:
  191. if (*p == '\\') {
  192. data->state = map_backslash_quoted;
  193. p ++;
  194. }
  195. else if (*p == '"') {
  196. /* Allow empty keys in this case */
  197. if (p - c >= 0) {
  198. MAP_STORE_KEY;
  199. data->state = map_skip_spaces_after_key;
  200. }
  201. else {
  202. g_assert_not_reached ();
  203. }
  204. p ++;
  205. }
  206. else {
  207. p ++;
  208. }
  209. break;
  210. case map_read_key_slashed:
  211. if (*p == '\\') {
  212. data->state = map_backslash_slashed;
  213. p ++;
  214. }
  215. else if (*p == '/') {
  216. /* Allow empty keys in this case */
  217. if (p - c >= 0) {
  218. data->state = map_read_key_after_slash;
  219. }
  220. else {
  221. g_assert_not_reached ();
  222. }
  223. }
  224. else {
  225. p ++;
  226. }
  227. break;
  228. case map_read_key_after_slash:
  229. /*
  230. * This state is equal to reading of key but '/' is not
  231. * treated specially
  232. */
  233. if (*p == '#') {
  234. if (p - c > 0) {
  235. /* Store a single key */
  236. MAP_STORE_KEY;
  237. func (data->cur_data, stripped_key, default_value);
  238. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  239. stripped_key, default_value, line_number);
  240. g_free (key);
  241. key = NULL;
  242. }
  243. data->state = map_read_comment_start;
  244. }
  245. else if (*p == '\r' || *p == '\n') {
  246. if (p - c > 0) {
  247. /* Store a single key */
  248. MAP_STORE_KEY;
  249. func (data->cur_data, stripped_key, default_value);
  250. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  251. stripped_key, default_value, line_number);
  252. g_free (key);
  253. key = NULL;
  254. }
  255. data->state = map_read_eol;
  256. key = NULL;
  257. }
  258. else if (g_ascii_isspace (*p)) {
  259. if (p - c > 0) {
  260. MAP_STORE_KEY;
  261. data->state = map_skip_spaces_after_key;
  262. }
  263. else {
  264. msg_err_map ("empty or invalid key found on line %d", line_number);
  265. data->state = map_skip_comment;
  266. }
  267. }
  268. else {
  269. p ++;
  270. }
  271. break;
  272. case map_backslash_quoted:
  273. p ++;
  274. data->state = map_read_key_quoted;
  275. break;
  276. case map_backslash_slashed:
  277. p ++;
  278. data->state = map_read_key_slashed;
  279. break;
  280. case map_skip_spaces_after_key:
  281. if (*p == ' ' || *p == '\t') {
  282. p ++;
  283. }
  284. else {
  285. c = p;
  286. data->state = map_read_value;
  287. }
  288. break;
  289. case map_read_value:
  290. if (key == NULL) {
  291. /* Ignore line */
  292. msg_err_map ("empty or invalid key found on line %d", line_number);
  293. data->state = map_skip_comment;
  294. }
  295. else {
  296. if (*p == '#') {
  297. if (p - c > 0) {
  298. /* Store a single key */
  299. MAP_STORE_VALUE;
  300. func (data->cur_data, stripped_key, stripped_value);
  301. msg_debug_map ("insert key value pair: %s -> %s; line: %d",
  302. stripped_key, stripped_value, line_number);
  303. g_free (key);
  304. g_free (value);
  305. key = NULL;
  306. value = NULL;
  307. } else {
  308. func (data->cur_data, stripped_key, default_value);
  309. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  310. stripped_key, default_value, line_number);
  311. g_free (key);
  312. key = NULL;
  313. }
  314. data->state = map_read_comment_start;
  315. } else if (*p == '\r' || *p == '\n') {
  316. if (p - c > 0) {
  317. /* Store a single key */
  318. MAP_STORE_VALUE;
  319. func (data->cur_data, stripped_key, stripped_value);
  320. msg_debug_map ("insert key value pair: %s -> %s",
  321. stripped_key, stripped_value);
  322. g_free (key);
  323. g_free (value);
  324. key = NULL;
  325. value = NULL;
  326. } else {
  327. func (data->cur_data, stripped_key, default_value);
  328. msg_debug_map ("insert key only pair: %s -> %s",
  329. stripped_key, default_value);
  330. g_free (key);
  331. key = NULL;
  332. }
  333. data->state = map_read_eol;
  334. key = NULL;
  335. }
  336. else {
  337. p++;
  338. }
  339. }
  340. break;
  341. case map_read_comment_start:
  342. if (*p == '#') {
  343. data->state = map_skip_comment;
  344. p ++;
  345. key = NULL;
  346. value = NULL;
  347. }
  348. else {
  349. g_assert_not_reached ();
  350. }
  351. break;
  352. case map_skip_comment:
  353. if (*p == '\r' || *p == '\n') {
  354. data->state = map_read_eol;
  355. }
  356. else {
  357. p ++;
  358. }
  359. break;
  360. case map_read_eol:
  361. /* Skip \r\n and whitespaces */
  362. if (*p == '\r' || *p == '\n') {
  363. if (*p == '\n') {
  364. /* We don't care about \r only line separators, they are too rare */
  365. line_number ++;
  366. }
  367. p++;
  368. }
  369. else {
  370. data->state = map_skip_spaces_before_key;
  371. }
  372. break;
  373. default:
  374. g_assert_not_reached ();
  375. break;
  376. }
  377. }
  378. if (final) {
  379. /* Examine the state */
  380. switch (data->state) {
  381. case map_read_key:
  382. if (p - c > 0) {
  383. /* Store a single key */
  384. MAP_STORE_KEY;
  385. func (data->cur_data, stripped_key, default_value);
  386. msg_debug_map ("insert key only pair: %s -> %s",
  387. stripped_key, default_value);
  388. g_free (key);
  389. key = NULL;
  390. }
  391. break;
  392. case map_read_value:
  393. if (key == NULL) {
  394. /* Ignore line */
  395. msg_err_map ("empty or invalid key found on line %d", line_number);
  396. data->state = map_skip_comment;
  397. }
  398. else {
  399. if (p - c > 0) {
  400. /* Store a single key */
  401. MAP_STORE_VALUE;
  402. func (data->cur_data, stripped_key, stripped_value);
  403. msg_debug_map ("insert key value pair: %s -> %s",
  404. stripped_key, stripped_value);
  405. g_free (key);
  406. g_free (value);
  407. key = NULL;
  408. value = NULL;
  409. } else {
  410. func (data->cur_data, stripped_key, default_value);
  411. msg_debug_map ("insert key only pair: %s -> %s",
  412. stripped_key, default_value);
  413. g_free (key);
  414. key = NULL;
  415. }
  416. }
  417. break;
  418. }
  419. data->state = map_skip_spaces_before_key;
  420. }
  421. return c;
  422. }
  423. /**
  424. * Radix tree helper function
  425. */
  426. void
  427. rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value)
  428. {
  429. struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
  430. struct rspamd_map_helper_value *val;
  431. gsize vlen;
  432. khiter_t k;
  433. gconstpointer nk;
  434. rspamd_ftok_t tok;
  435. gint res;
  436. struct rspamd_map *map;
  437. map = r->map;
  438. tok.begin = key;
  439. tok.len = strlen (key);
  440. k = kh_get (rspamd_map_hash, r->htb, tok);
  441. if (k == kh_end (r->htb)) {
  442. nk = rspamd_mempool_strdup (r->pool, key);
  443. tok.begin = nk;
  444. k = kh_put (rspamd_map_hash, r->htb, tok, &res);
  445. }
  446. else {
  447. val = kh_value (r->htb, k);
  448. if (strcmp (value, val->value) == 0) {
  449. /* Same element, skip */
  450. return;
  451. }
  452. else {
  453. msg_warn_map ("duplicate radix entry found for map %s: %s (old value: '%s', new: '%s')",
  454. map->name, key, val->value, value);
  455. }
  456. nk = kh_key (r->htb, k).begin;
  457. val->key = nk;
  458. kh_value (r->htb, k) = val;
  459. return; /* do not touch radix in case of exact duplicate */
  460. }
  461. vlen = strlen (value);
  462. val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
  463. vlen + 1);
  464. memcpy (val->value, value, vlen);
  465. nk = kh_key (r->htb, k).begin;
  466. val->key = nk;
  467. kh_value (r->htb, k) = val;
  468. rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE,
  469. r->map->name);
  470. rspamd_cryptobox_fast_hash_update (&r->hst, nk, tok.len);
  471. }
  472. void
  473. rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, gconstpointer value)
  474. {
  475. struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
  476. struct rspamd_map_helper_value *val;
  477. gsize vlen;
  478. khiter_t k;
  479. gconstpointer nk;
  480. rspamd_ftok_t tok;
  481. gint res;
  482. struct rspamd_map *map;
  483. map = r->map;
  484. tok.begin = key;
  485. tok.len = strlen (key);
  486. k = kh_get (rspamd_map_hash, r->htb, tok);
  487. if (k == kh_end (r->htb)) {
  488. nk = rspamd_mempool_strdup (r->pool, key);
  489. tok.begin = nk;
  490. k = kh_put (rspamd_map_hash, r->htb, tok, &res);
  491. }
  492. else {
  493. val = kh_value (r->htb, k);
  494. if (strcmp (value, val->value) == 0) {
  495. /* Same element, skip */
  496. return;
  497. }
  498. else {
  499. msg_warn_map ("duplicate radix entry found for map %s: %s (old value: '%s', new: '%s')",
  500. map->name, key, val->value, value);
  501. }
  502. nk = kh_key (r->htb, k).begin;
  503. val->key = nk;
  504. kh_value (r->htb, k) = val;
  505. return; /* do not touch radix in case of exact duplicate */
  506. }
  507. vlen = strlen (value);
  508. val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
  509. vlen + 1);
  510. memcpy (val->value, value, vlen);
  511. nk = kh_key (r->htb, k).begin;
  512. val->key = nk;
  513. kh_value (r->htb, k) = val;
  514. rspamd_radix_add_iplist (key, ",", r->trie, val, TRUE,
  515. r->map->name);
  516. rspamd_cryptobox_fast_hash_update (&r->hst, nk, tok.len);
  517. }
  518. void
  519. rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value)
  520. {
  521. struct rspamd_hash_map_helper *ht = st;
  522. struct rspamd_map_helper_value *val;
  523. khiter_t k;
  524. gconstpointer nk;
  525. gsize vlen;
  526. gint r;
  527. rspamd_ftok_t tok;
  528. struct rspamd_map *map;
  529. tok.begin = key;
  530. tok.len = strlen (key);
  531. map = ht->map;
  532. k = kh_get (rspamd_map_hash, ht->htb, tok);
  533. if (k == kh_end (ht->htb)) {
  534. nk = rspamd_mempool_strdup (ht->pool, key);
  535. tok.begin = nk;
  536. k = kh_put (rspamd_map_hash, ht->htb, tok, &r);
  537. }
  538. else {
  539. val = kh_value (ht->htb, k);
  540. if (strcmp (value, val->value) == 0) {
  541. /* Same element, skip */
  542. return;
  543. }
  544. else {
  545. msg_warn_map ("duplicate hash entry found for map %s: %s (old value: '%s', new: '%s')",
  546. map->name, key, val->value, value);
  547. }
  548. }
  549. /* Null termination due to alloc0 */
  550. vlen = strlen (value);
  551. val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) + vlen + 1);
  552. memcpy (val->value, value, vlen);
  553. tok = kh_key (ht->htb, k);
  554. nk = tok.begin;
  555. val->key = nk;
  556. kh_value (ht->htb, k) = val;
  557. rspamd_cryptobox_fast_hash_update (&ht->hst, nk, tok.len);
  558. }
  559. void
  560. rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value)
  561. {
  562. struct rspamd_regexp_map_helper *re_map = st;
  563. struct rspamd_map *map;
  564. rspamd_regexp_t *re;
  565. gchar *escaped;
  566. GError *err = NULL;
  567. gint pcre_flags;
  568. gsize escaped_len;
  569. struct rspamd_map_helper_value *val;
  570. khiter_t k;
  571. rspamd_ftok_t tok;
  572. gconstpointer nk;
  573. gsize vlen;
  574. gint r;
  575. map = re_map->map;
  576. tok.begin = key;
  577. tok.len = strlen (key);
  578. k = kh_get (rspamd_map_hash, re_map->htb, tok);
  579. if (k == kh_end (re_map->htb)) {
  580. nk = rspamd_mempool_strdup (re_map->pool, key);
  581. tok.begin = nk;
  582. k = kh_put (rspamd_map_hash, re_map->htb, tok, &r);
  583. }
  584. else {
  585. val = kh_value (re_map->htb, k);
  586. /* Always warn about regexp duplicate as it's likely a bad mistake */
  587. msg_warn_map ("duplicate re entry found for map %s: %s (old value: '%s', new: '%s')",
  588. map->name, key, val->value, value);
  589. if (strcmp (val->value, value) == 0) {
  590. /* Same value, skip */
  591. return;
  592. }
  593. /* Replace value but do not touch regexp */
  594. nk = kh_key (re_map->htb, k).begin;
  595. val->key = nk;
  596. kh_value (re_map->htb, k) = val;
  597. return;
  598. }
  599. /* Check regexp stuff */
  600. if (re_map->map_flags & RSPAMD_REGEXP_MAP_FLAG_GLOB) {
  601. escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len,
  602. RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF);
  603. re = rspamd_regexp_new (escaped, NULL, &err);
  604. g_free (escaped);
  605. }
  606. else {
  607. re = rspamd_regexp_new (key, NULL, &err);
  608. }
  609. if (re == NULL) {
  610. msg_err_map ("cannot parse regexp %s: %e", key, err);
  611. if (err) {
  612. g_error_free (err);
  613. }
  614. return;
  615. }
  616. vlen = strlen (value);
  617. val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) +
  618. vlen + 1);
  619. memcpy (val->value, value, vlen); /* Null terminated due to alloc0 previously */
  620. nk = kh_key (re_map->htb, k).begin;
  621. val->key = nk;
  622. kh_value (re_map->htb, k) = val;
  623. rspamd_cryptobox_hash_update (&re_map->hst, nk, tok.len);
  624. pcre_flags = rspamd_regexp_get_pcre_flags (re);
  625. #ifndef WITH_PCRE2
  626. if (pcre_flags & PCRE_FLAG(UTF8)) {
  627. re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
  628. }
  629. #else
  630. if (pcre_flags & PCRE_FLAG(UTF)) {
  631. re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
  632. }
  633. #endif
  634. g_ptr_array_add (re_map->regexps, re);
  635. g_ptr_array_add (re_map->values, val);
  636. }
  637. static void
  638. rspamd_map_helper_traverse_regexp (void *data,
  639. rspamd_map_traverse_cb cb,
  640. gpointer cbdata,
  641. gboolean reset_hits)
  642. {
  643. rspamd_ftok_t tok;
  644. struct rspamd_map_helper_value *val;
  645. struct rspamd_regexp_map_helper *re_map = data;
  646. kh_foreach (re_map->htb, tok, val, {
  647. if (!cb (tok.begin, val->value, val->hits, cbdata)) {
  648. break;
  649. }
  650. if (reset_hits) {
  651. val->hits = 0;
  652. }
  653. });
  654. }
  655. struct rspamd_hash_map_helper *
  656. rspamd_map_helper_new_hash (struct rspamd_map *map)
  657. {
  658. struct rspamd_hash_map_helper *htb;
  659. rspamd_mempool_t *pool;
  660. if (map) {
  661. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  662. map->tag, 0);
  663. }
  664. else {
  665. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  666. NULL, 0);
  667. }
  668. htb = rspamd_mempool_alloc0_type(pool, struct rspamd_hash_map_helper);
  669. htb->htb = kh_init (rspamd_map_hash);
  670. htb->pool = pool;
  671. htb->map = map;
  672. rspamd_cryptobox_fast_hash_init (&htb->hst, map_hash_seed);
  673. return htb;
  674. }
  675. void
  676. rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r)
  677. {
  678. if (r == NULL || r->pool == NULL) {
  679. return;
  680. }
  681. rspamd_mempool_t *pool = r->pool;
  682. kh_destroy (rspamd_map_hash, r->htb);
  683. memset (r, 0, sizeof (*r));
  684. rspamd_mempool_delete (pool);
  685. }
  686. static void
  687. rspamd_map_helper_traverse_hash (void *data,
  688. rspamd_map_traverse_cb cb,
  689. gpointer cbdata,
  690. gboolean reset_hits)
  691. {
  692. rspamd_ftok_t tok;
  693. struct rspamd_map_helper_value *val;
  694. struct rspamd_hash_map_helper *ht = data;
  695. kh_foreach (ht->htb, tok, val, {
  696. if (!cb (tok.begin, val->value, val->hits, cbdata)) {
  697. break;
  698. }
  699. if (reset_hits) {
  700. val->hits = 0;
  701. }
  702. });
  703. }
  704. struct rspamd_radix_map_helper *
  705. rspamd_map_helper_new_radix (struct rspamd_map *map)
  706. {
  707. struct rspamd_radix_map_helper *r;
  708. rspamd_mempool_t *pool;
  709. const gchar *name = "unnamed";
  710. if (map) {
  711. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  712. map->tag, 0);
  713. name = map->name;
  714. }
  715. else {
  716. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  717. NULL, 0);
  718. }
  719. r = rspamd_mempool_alloc0_type (pool, struct rspamd_radix_map_helper);
  720. r->trie = radix_create_compressed_with_pool (pool, name);
  721. r->htb = kh_init (rspamd_map_hash);
  722. r->pool = pool;
  723. r->map = map;
  724. rspamd_cryptobox_fast_hash_init (&r->hst, map_hash_seed);
  725. return r;
  726. }
  727. void
  728. rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r)
  729. {
  730. if (r == NULL || !r->pool) {
  731. return;
  732. }
  733. kh_destroy (rspamd_map_hash, r->htb);
  734. rspamd_mempool_t *pool = r->pool;
  735. memset (r, 0, sizeof (*r));
  736. rspamd_mempool_delete (pool);
  737. }
  738. static void
  739. rspamd_map_helper_traverse_radix (void *data,
  740. rspamd_map_traverse_cb cb,
  741. gpointer cbdata,
  742. gboolean reset_hits)
  743. {
  744. rspamd_ftok_t tok;
  745. struct rspamd_map_helper_value *val;
  746. struct rspamd_radix_map_helper *r = data;
  747. kh_foreach (r->htb, tok, val, {
  748. if (!cb (tok.begin, val->value, val->hits, cbdata)) {
  749. break;
  750. }
  751. if (reset_hits) {
  752. val->hits = 0;
  753. }
  754. });
  755. }
  756. struct rspamd_regexp_map_helper *
  757. rspamd_map_helper_new_regexp (struct rspamd_map *map,
  758. enum rspamd_regexp_map_flags flags)
  759. {
  760. struct rspamd_regexp_map_helper *re_map;
  761. rspamd_mempool_t *pool;
  762. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  763. map->tag, 0);
  764. re_map = rspamd_mempool_alloc0_type (pool, struct rspamd_regexp_map_helper);
  765. re_map->pool = pool;
  766. re_map->values = g_ptr_array_new ();
  767. re_map->regexps = g_ptr_array_new ();
  768. re_map->map = map;
  769. re_map->map_flags = flags;
  770. re_map->htb = kh_init (rspamd_map_hash);
  771. rspamd_cryptobox_hash_init (&re_map->hst, NULL, 0);
  772. return re_map;
  773. }
  774. void
  775. rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map)
  776. {
  777. rspamd_regexp_t *re;
  778. guint i;
  779. if (!re_map || !re_map->regexps) {
  780. return;
  781. }
  782. #ifdef WITH_HYPERSCAN
  783. if (re_map->hs_scratch) {
  784. hs_free_scratch (re_map->hs_scratch);
  785. }
  786. if (re_map->hs_db) {
  787. hs_free_database (re_map->hs_db);
  788. }
  789. if (re_map->patterns) {
  790. for (i = 0; i < re_map->regexps->len; i ++) {
  791. g_free (re_map->patterns[i]);
  792. }
  793. g_free (re_map->patterns);
  794. }
  795. if (re_map->flags) {
  796. g_free (re_map->flags);
  797. }
  798. if (re_map->ids) {
  799. g_free (re_map->ids);
  800. }
  801. #endif
  802. for (i = 0; i < re_map->regexps->len; i ++) {
  803. re = g_ptr_array_index (re_map->regexps, i);
  804. rspamd_regexp_unref (re);
  805. }
  806. g_ptr_array_free (re_map->regexps, TRUE);
  807. g_ptr_array_free (re_map->values, TRUE);
  808. kh_destroy (rspamd_map_hash, re_map->htb);
  809. rspamd_mempool_t *pool = re_map->pool;
  810. memset (re_map, 0, sizeof (*re_map));
  811. rspamd_mempool_delete (pool);
  812. }
  813. gchar *
  814. rspamd_kv_list_read (
  815. gchar * chunk,
  816. gint len,
  817. struct map_cb_data *data,
  818. gboolean final)
  819. {
  820. if (data->cur_data == NULL) {
  821. data->cur_data = rspamd_map_helper_new_hash (data->map);
  822. }
  823. return rspamd_parse_kv_list (
  824. chunk,
  825. len,
  826. data,
  827. rspamd_map_helper_insert_hash,
  828. "",
  829. final);
  830. }
  831. void
  832. rspamd_kv_list_fin (struct map_cb_data *data, void **target)
  833. {
  834. struct rspamd_map *map = data->map;
  835. struct rspamd_hash_map_helper *htb;
  836. if (data->errored) {
  837. /* Clean up the current data and do not touch prev data */
  838. if (data->cur_data) {
  839. msg_info_map ("cleanup unfinished new data as error occurred for %s",
  840. map->name);
  841. htb = (struct rspamd_hash_map_helper *) data->cur_data;
  842. rspamd_map_helper_destroy_hash(htb);
  843. data->cur_data = NULL;
  844. }
  845. }
  846. else {
  847. if (data->cur_data) {
  848. htb = (struct rspamd_hash_map_helper *) data->cur_data;
  849. msg_info_map ("read hash of %d elements from %s", kh_size(htb->htb),
  850. map->name);
  851. data->map->traverse_function = rspamd_map_helper_traverse_hash;
  852. data->map->nelts = kh_size (htb->htb);
  853. data->map->digest = rspamd_cryptobox_fast_hash_final(&htb->hst);
  854. }
  855. if (target) {
  856. *target = data->cur_data;
  857. }
  858. if (data->prev_data) {
  859. htb = (struct rspamd_hash_map_helper *) data->prev_data;
  860. rspamd_map_helper_destroy_hash(htb);
  861. }
  862. }
  863. }
  864. void
  865. rspamd_kv_list_dtor (struct map_cb_data *data)
  866. {
  867. struct rspamd_hash_map_helper *htb;
  868. if (data->cur_data) {
  869. htb = (struct rspamd_hash_map_helper *)data->cur_data;
  870. rspamd_map_helper_destroy_hash (htb);
  871. }
  872. }
  873. gchar *
  874. rspamd_radix_read (
  875. gchar * chunk,
  876. gint len,
  877. struct map_cb_data *data,
  878. gboolean final)
  879. {
  880. struct rspamd_radix_map_helper *r;
  881. struct rspamd_map *map = data->map;
  882. if (data->cur_data == NULL) {
  883. r = rspamd_map_helper_new_radix (map);
  884. data->cur_data = r;
  885. }
  886. return rspamd_parse_kv_list (
  887. chunk,
  888. len,
  889. data,
  890. rspamd_map_helper_insert_radix,
  891. hash_fill,
  892. final);
  893. }
  894. void
  895. rspamd_radix_fin (struct map_cb_data *data, void **target)
  896. {
  897. struct rspamd_map *map = data->map;
  898. struct rspamd_radix_map_helper *r;
  899. if (data->errored) {
  900. /* Clean up the current data and do not touch prev data */
  901. if (data->cur_data) {
  902. msg_info_map ("cleanup unfinished new data as error occurred for %s",
  903. map->name);
  904. r = (struct rspamd_radix_map_helper *) data->cur_data;
  905. rspamd_map_helper_destroy_radix(r);
  906. data->cur_data = NULL;
  907. }
  908. }
  909. else {
  910. if (data->cur_data) {
  911. r = (struct rspamd_radix_map_helper *) data->cur_data;
  912. msg_info_map ("read radix trie of %z elements: %s",
  913. radix_get_size(r->trie), radix_get_info(r->trie));
  914. data->map->traverse_function = rspamd_map_helper_traverse_radix;
  915. data->map->nelts = kh_size (r->htb);
  916. data->map->digest = rspamd_cryptobox_fast_hash_final(&r->hst);
  917. }
  918. if (target) {
  919. *target = data->cur_data;
  920. }
  921. if (data->prev_data) {
  922. r = (struct rspamd_radix_map_helper *) data->prev_data;
  923. rspamd_map_helper_destroy_radix(r);
  924. }
  925. }
  926. }
  927. void
  928. rspamd_radix_dtor (struct map_cb_data *data)
  929. {
  930. struct rspamd_radix_map_helper *r;
  931. if (data->cur_data) {
  932. r = (struct rspamd_radix_map_helper *)data->cur_data;
  933. rspamd_map_helper_destroy_radix (r);
  934. }
  935. }
  936. #ifdef WITH_HYPERSCAN
  937. struct rspamd_re_maps_cache_dtor_cbdata {
  938. struct rspamd_config *cfg;
  939. GHashTable *valid_re_hashes;
  940. gchar *dirname;
  941. };
  942. static void
  943. rspamd_re_maps_cache_cleanup_dtor (gpointer ud)
  944. {
  945. struct rspamd_re_maps_cache_dtor_cbdata *cbd =
  946. (struct rspamd_re_maps_cache_dtor_cbdata *)ud;
  947. GPtrArray *cache_files;
  948. GError *err = NULL;
  949. struct rspamd_config *cfg;
  950. cfg = cbd->cfg;
  951. if (cfg->cur_worker != NULL) {
  952. /* Skip dtor, limit it to main process only */
  953. return;
  954. }
  955. cache_files = rspamd_glob_path (cbd->dirname, "*.hsmc", FALSE, &err);
  956. if (!cache_files) {
  957. msg_err_config ("cannot glob files in %s: %e", cbd->dirname, err);
  958. g_error_free (err);
  959. }
  960. else {
  961. const gchar *fname;
  962. guint i;
  963. PTR_ARRAY_FOREACH (cache_files, i, fname) {
  964. gchar *basename = g_path_get_basename (fname);
  965. if (g_hash_table_lookup (cbd->valid_re_hashes, basename) == NULL) {
  966. gchar *dir;
  967. dir = g_path_get_dirname (fname);
  968. /* Sanity check to avoid removal of something bad */
  969. if (strcmp (dir, cbd->dirname) != 0) {
  970. msg_err_config ("bogus file found: %s in %s, skip deleting",
  971. fname, dir);
  972. }
  973. else {
  974. if (unlink (fname) == -1) {
  975. msg_err_config ("cannot delete obsolete file %s in %s: %s",
  976. fname, dir, strerror (errno));
  977. }
  978. else {
  979. msg_info_config ("deleted obsolete file %s in %s",
  980. fname, dir);
  981. }
  982. }
  983. g_free (dir);
  984. }
  985. else {
  986. msg_debug_config ("valid re cache file %s", fname);
  987. }
  988. g_free (basename);
  989. }
  990. g_ptr_array_free (cache_files, TRUE);
  991. }
  992. g_hash_table_unref (cbd->valid_re_hashes);
  993. g_free (cbd->dirname);
  994. }
  995. static void
  996. rspamd_re_map_cache_update (const gchar *fname, struct rspamd_config *cfg)
  997. {
  998. GHashTable *valid_re_hashes;
  999. valid_re_hashes = rspamd_mempool_get_variable (cfg->cfg_pool,
  1000. RSPAMD_MEMPOOL_RE_MAPS_CACHE);
  1001. if (!valid_re_hashes) {
  1002. valid_re_hashes = g_hash_table_new_full (g_str_hash, g_str_equal,
  1003. g_free, NULL);
  1004. rspamd_mempool_set_variable (cfg->cfg_pool,
  1005. RSPAMD_MEMPOOL_RE_MAPS_CACHE,
  1006. valid_re_hashes, (rspamd_mempool_destruct_t)g_hash_table_unref);
  1007. /* We also add a cleanup dtor for all hashes */
  1008. static struct rspamd_re_maps_cache_dtor_cbdata cbd;
  1009. cbd.valid_re_hashes = g_hash_table_ref (valid_re_hashes);
  1010. cbd.cfg = cfg;
  1011. cbd.dirname = g_path_get_dirname (fname);
  1012. rspamd_mempool_add_destructor (cfg->cfg_pool,
  1013. rspamd_re_maps_cache_cleanup_dtor, &cbd);
  1014. }
  1015. g_hash_table_insert (valid_re_hashes, g_path_get_basename (fname), "1");
  1016. }
  1017. static gboolean
  1018. rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
  1019. {
  1020. gchar fp[PATH_MAX];
  1021. gpointer data;
  1022. gsize len;
  1023. struct rspamd_map *map;
  1024. map = re_map->map;
  1025. if (!map->cfg->hs_cache_dir) {
  1026. return FALSE;
  1027. }
  1028. rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
  1029. map->cfg->hs_cache_dir,
  1030. (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
  1031. if ((data = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
  1032. if (hs_deserialize_database (data, len, &re_map->hs_db) == HS_SUCCESS) {
  1033. rspamd_re_map_cache_update (fp, map->cfg);
  1034. munmap (data, len);
  1035. msg_info_map ("loaded hypersan cache from %s (%Hz length) for %s",
  1036. fp, len, map->name);
  1037. return TRUE;
  1038. }
  1039. msg_info_map ("invalid hypersan cache in %s (%Hz length) for %s, removing file",
  1040. fp, len, map->name);
  1041. munmap (data, len);
  1042. /* Remove stale file */
  1043. (void)unlink (fp);
  1044. }
  1045. return FALSE;
  1046. }
  1047. static gboolean
  1048. rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
  1049. {
  1050. gchar fp[PATH_MAX], np[PATH_MAX];
  1051. gsize len;
  1052. gint fd;
  1053. char *bytes = NULL;
  1054. struct rspamd_map *map;
  1055. map = re_map->map;
  1056. if (!map->cfg->hs_cache_dir) {
  1057. return FALSE;
  1058. }
  1059. rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc.tmp",
  1060. re_map->map->cfg->hs_cache_dir,
  1061. (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
  1062. if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
  1063. if (hs_serialize_database (re_map->hs_db, &bytes, &len) == HS_SUCCESS) {
  1064. if (write (fd, bytes, len) == -1) {
  1065. msg_warn_map ("cannot write hyperscan cache to %s: %s",
  1066. fp, strerror (errno));
  1067. unlink (fp);
  1068. free (bytes);
  1069. }
  1070. else {
  1071. free (bytes);
  1072. fsync (fd);
  1073. rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmc",
  1074. re_map->map->cfg->hs_cache_dir,
  1075. (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
  1076. if (rename (fp, np) == -1) {
  1077. msg_warn_map ("cannot rename hyperscan cache from %s to %s: %s",
  1078. fp, np, strerror (errno));
  1079. unlink (fp);
  1080. }
  1081. else {
  1082. msg_info_map ("written cached hyperscan data for %s to %s (%Hz length)",
  1083. map->name, np, len);
  1084. rspamd_re_map_cache_update (np, map->cfg);
  1085. }
  1086. }
  1087. }
  1088. else {
  1089. msg_warn_map ("cannot serialize hyperscan cache to %s: %s",
  1090. fp, strerror (errno));
  1091. unlink (fp);
  1092. }
  1093. close (fd);
  1094. }
  1095. return FALSE;
  1096. }
  1097. static gboolean
  1098. rspamd_re_map_cache_cleanup_old (struct rspamd_regexp_map_helper *old_re_map)
  1099. {
  1100. gchar fp[PATH_MAX];
  1101. struct rspamd_map *map;
  1102. map = old_re_map->map;
  1103. if (!map->cfg->hs_cache_dir) {
  1104. return FALSE;
  1105. }
  1106. rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
  1107. map->cfg->hs_cache_dir,
  1108. (gint)rspamd_cryptobox_HASHBYTES / 2, old_re_map->re_digest);
  1109. msg_info_map ("unlink stale cache file for %s: %s", map->name, fp);
  1110. if (unlink (fp) == -1) {
  1111. msg_warn_map ("cannot unlink stale cache file for %s (%s): %s",
  1112. map->name, fp, strerror (errno));
  1113. return FALSE;
  1114. }
  1115. GHashTable *valid_re_hashes;
  1116. valid_re_hashes = rspamd_mempool_get_variable (map->cfg->cfg_pool,
  1117. RSPAMD_MEMPOOL_RE_MAPS_CACHE);
  1118. if (valid_re_hashes) {
  1119. g_hash_table_remove (valid_re_hashes, fp);
  1120. }
  1121. return TRUE;
  1122. }
  1123. #endif
  1124. static void
  1125. rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
  1126. {
  1127. #ifdef WITH_HYPERSCAN
  1128. guint i;
  1129. hs_platform_info_t plt;
  1130. hs_compile_error_t *err;
  1131. struct rspamd_map *map;
  1132. rspamd_regexp_t *re;
  1133. gint pcre_flags;
  1134. map = re_map->map;
  1135. #if !defined(__aarch64__) && !defined(__powerpc64__)
  1136. if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
  1137. msg_info_map ("disable hyperscan for map %s, ssse3 instructions are not supported by CPU",
  1138. map->name);
  1139. return;
  1140. }
  1141. #endif
  1142. if (hs_populate_platform (&plt) != HS_SUCCESS) {
  1143. msg_err_map ("cannot populate hyperscan platform");
  1144. return;
  1145. }
  1146. re_map->patterns = g_new (gchar *, re_map->regexps->len);
  1147. re_map->flags = g_new (gint, re_map->regexps->len);
  1148. re_map->ids = g_new (gint, re_map->regexps->len);
  1149. for (i = 0; i < re_map->regexps->len; i ++) {
  1150. const gchar *pat;
  1151. gchar *escaped;
  1152. gint pat_flags;
  1153. re = g_ptr_array_index (re_map->regexps, i);
  1154. pcre_flags = rspamd_regexp_get_pcre_flags (re);
  1155. pat = rspamd_regexp_get_pattern (re);
  1156. pat_flags = rspamd_regexp_get_flags (re);
  1157. if (pat_flags & RSPAMD_REGEXP_FLAG_UTF) {
  1158. escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL,
  1159. RSPAMD_REGEXP_ESCAPE_RE|RSPAMD_REGEXP_ESCAPE_UTF);
  1160. re_map->flags[i] |= HS_FLAG_UTF8;
  1161. }
  1162. else {
  1163. escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL,
  1164. RSPAMD_REGEXP_ESCAPE_RE);
  1165. }
  1166. re_map->patterns[i] = escaped;
  1167. re_map->flags[i] = HS_FLAG_SINGLEMATCH;
  1168. #ifndef WITH_PCRE2
  1169. if (pcre_flags & PCRE_FLAG(UTF8)) {
  1170. re_map->flags[i] |= HS_FLAG_UTF8;
  1171. }
  1172. #else
  1173. if (pcre_flags & PCRE_FLAG(UTF)) {
  1174. re_map->flags[i] |= HS_FLAG_UTF8;
  1175. }
  1176. #endif
  1177. if (pcre_flags & PCRE_FLAG(CASELESS)) {
  1178. re_map->flags[i] |= HS_FLAG_CASELESS;
  1179. }
  1180. if (pcre_flags & PCRE_FLAG(MULTILINE)) {
  1181. re_map->flags[i] |= HS_FLAG_MULTILINE;
  1182. }
  1183. if (pcre_flags & PCRE_FLAG(DOTALL)) {
  1184. re_map->flags[i] |= HS_FLAG_DOTALL;
  1185. }
  1186. if (rspamd_regexp_get_maxhits (re) == 1) {
  1187. re_map->flags[i] |= HS_FLAG_SINGLEMATCH;
  1188. }
  1189. re_map->ids[i] = i;
  1190. }
  1191. if (re_map->regexps->len > 0 && re_map->patterns) {
  1192. if (!rspamd_try_load_re_map_cache (re_map)) {
  1193. gdouble ts1 = rspamd_get_ticks (FALSE);
  1194. if (hs_compile_multi ((const gchar **) re_map->patterns,
  1195. re_map->flags,
  1196. re_map->ids,
  1197. re_map->regexps->len,
  1198. HS_MODE_BLOCK,
  1199. &plt,
  1200. &re_map->hs_db,
  1201. &err) != HS_SUCCESS) {
  1202. msg_err_map ("cannot create tree of regexp when processing '%s': %s",
  1203. err->expression >= 0 ?
  1204. re_map->patterns[err->expression] :
  1205. "unknown regexp", err->message);
  1206. re_map->hs_db = NULL;
  1207. hs_free_compile_error (err);
  1208. return;
  1209. }
  1210. ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
  1211. msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
  1212. re_map->regexps->len, re_map->map->name, ts1);
  1213. rspamd_try_save_re_map_cache (re_map);
  1214. }
  1215. else {
  1216. msg_info_map ("hyperscan read %d cached regular expressions from %s",
  1217. re_map->regexps->len, re_map->map->name);
  1218. }
  1219. if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
  1220. msg_err_map ("cannot allocate scratch space for hyperscan");
  1221. hs_free_database (re_map->hs_db);
  1222. re_map->hs_db = NULL;
  1223. }
  1224. }
  1225. else {
  1226. msg_err_map ("regexp map is empty");
  1227. }
  1228. #endif
  1229. }
  1230. gchar *
  1231. rspamd_regexp_list_read_single (
  1232. gchar *chunk,
  1233. gint len,
  1234. struct map_cb_data *data,
  1235. gboolean final)
  1236. {
  1237. struct rspamd_regexp_map_helper *re_map;
  1238. if (data->cur_data == NULL) {
  1239. re_map = rspamd_map_helper_new_regexp (data->map, 0);
  1240. data->cur_data = re_map;
  1241. }
  1242. return rspamd_parse_kv_list (
  1243. chunk,
  1244. len,
  1245. data,
  1246. rspamd_map_helper_insert_re,
  1247. hash_fill,
  1248. final);
  1249. }
  1250. gchar *
  1251. rspamd_glob_list_read_single (
  1252. gchar *chunk,
  1253. gint len,
  1254. struct map_cb_data *data,
  1255. gboolean final)
  1256. {
  1257. struct rspamd_regexp_map_helper *re_map;
  1258. if (data->cur_data == NULL) {
  1259. re_map = rspamd_map_helper_new_regexp (data->map, RSPAMD_REGEXP_MAP_FLAG_GLOB);
  1260. data->cur_data = re_map;
  1261. }
  1262. return rspamd_parse_kv_list (
  1263. chunk,
  1264. len,
  1265. data,
  1266. rspamd_map_helper_insert_re,
  1267. hash_fill,
  1268. final);
  1269. }
  1270. gchar *
  1271. rspamd_regexp_list_read_multiple (
  1272. gchar *chunk,
  1273. gint len,
  1274. struct map_cb_data *data,
  1275. gboolean final)
  1276. {
  1277. struct rspamd_regexp_map_helper *re_map;
  1278. if (data->cur_data == NULL) {
  1279. re_map = rspamd_map_helper_new_regexp (data->map,
  1280. RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
  1281. data->cur_data = re_map;
  1282. }
  1283. return rspamd_parse_kv_list (
  1284. chunk,
  1285. len,
  1286. data,
  1287. rspamd_map_helper_insert_re,
  1288. hash_fill,
  1289. final);
  1290. }
  1291. gchar *
  1292. rspamd_glob_list_read_multiple (
  1293. gchar *chunk,
  1294. gint len,
  1295. struct map_cb_data *data,
  1296. gboolean final)
  1297. {
  1298. struct rspamd_regexp_map_helper *re_map;
  1299. if (data->cur_data == NULL) {
  1300. re_map = rspamd_map_helper_new_regexp (data->map,
  1301. RSPAMD_REGEXP_MAP_FLAG_GLOB|RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
  1302. data->cur_data = re_map;
  1303. }
  1304. return rspamd_parse_kv_list (
  1305. chunk,
  1306. len,
  1307. data,
  1308. rspamd_map_helper_insert_re,
  1309. hash_fill,
  1310. final);
  1311. }
  1312. void
  1313. rspamd_regexp_list_fin (struct map_cb_data *data, void **target)
  1314. {
  1315. struct rspamd_regexp_map_helper *re_map = NULL, *old_re_map;
  1316. struct rspamd_map *map = data->map;
  1317. if (data->errored) {
  1318. /* Clean up the current data and do not touch prev data */
  1319. if (data->cur_data) {
  1320. msg_info_map ("cleanup unfinished new data as error occurred for %s",
  1321. map->name);
  1322. re_map = (struct rspamd_regexp_map_helper *)data->cur_data;
  1323. rspamd_map_helper_destroy_regexp (re_map);
  1324. data->cur_data = NULL;
  1325. }
  1326. }
  1327. else {
  1328. if (data->cur_data) {
  1329. re_map = data->cur_data;
  1330. rspamd_cryptobox_hash_final(&re_map->hst, re_map->re_digest);
  1331. memcpy(&data->map->digest, re_map->re_digest, sizeof(data->map->digest));
  1332. rspamd_re_map_finalize(re_map);
  1333. msg_info_map ("read regexp list of %ud elements",
  1334. re_map->regexps->len);
  1335. data->map->traverse_function = rspamd_map_helper_traverse_regexp;
  1336. data->map->nelts = kh_size (re_map->htb);
  1337. }
  1338. if (target) {
  1339. *target = data->cur_data;
  1340. }
  1341. if (data->prev_data) {
  1342. old_re_map = data->prev_data;
  1343. #ifdef WITH_HYPERSCAN
  1344. if (re_map && memcmp(re_map->re_digest, old_re_map->re_digest,
  1345. sizeof(re_map->re_digest)) != 0) {
  1346. /* Cleanup old stuff */
  1347. rspamd_re_map_cache_cleanup_old(old_re_map);
  1348. }
  1349. #endif
  1350. rspamd_map_helper_destroy_regexp(old_re_map);
  1351. }
  1352. }
  1353. }
  1354. void
  1355. rspamd_regexp_list_dtor (struct map_cb_data *data)
  1356. {
  1357. if (data->cur_data) {
  1358. rspamd_map_helper_destroy_regexp (data->cur_data);
  1359. }
  1360. }
  1361. #ifdef WITH_HYPERSCAN
  1362. static int
  1363. rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
  1364. unsigned long long to,
  1365. unsigned int flags, void *context)
  1366. {
  1367. guint *i = context;
  1368. /* Always return non-zero as we need a single match here */
  1369. *i = id;
  1370. return 1;
  1371. }
  1372. #endif
  1373. gconstpointer
  1374. rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
  1375. const gchar *in, gsize len)
  1376. {
  1377. guint i;
  1378. rspamd_regexp_t *re;
  1379. gint res = 0;
  1380. gpointer ret = NULL;
  1381. struct rspamd_map_helper_value *val;
  1382. gboolean validated = FALSE;
  1383. g_assert (in != NULL);
  1384. if (map == NULL || len == 0 || map->regexps == NULL) {
  1385. return NULL;
  1386. }
  1387. if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
  1388. if (rspamd_fast_utf8_validate (in, len) == 0) {
  1389. validated = TRUE;
  1390. }
  1391. }
  1392. else {
  1393. validated = TRUE;
  1394. }
  1395. #ifdef WITH_HYPERSCAN
  1396. if (map->hs_db && map->hs_scratch) {
  1397. if (validated) {
  1398. res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
  1399. rspamd_match_hs_single_handler, (void *)&i);
  1400. if (res == HS_SCAN_TERMINATED) {
  1401. res = 1;
  1402. val = g_ptr_array_index (map->values, i);
  1403. ret = val->value;
  1404. val->hits ++;
  1405. }
  1406. return ret;
  1407. }
  1408. }
  1409. #endif
  1410. if (!res) {
  1411. /* PCRE version */
  1412. for (i = 0; i < map->regexps->len; i ++) {
  1413. re = g_ptr_array_index (map->regexps, i);
  1414. if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) {
  1415. val = g_ptr_array_index (map->values, i);
  1416. ret = val->value;
  1417. val->hits ++;
  1418. break;
  1419. }
  1420. }
  1421. }
  1422. return ret;
  1423. }
  1424. #ifdef WITH_HYPERSCAN
  1425. struct rspamd_multiple_cbdata {
  1426. GPtrArray *ar;
  1427. struct rspamd_regexp_map_helper *map;
  1428. };
  1429. static int
  1430. rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
  1431. unsigned long long to,
  1432. unsigned int flags, void *context)
  1433. {
  1434. struct rspamd_multiple_cbdata *cbd = context;
  1435. struct rspamd_map_helper_value *val;
  1436. if (id < cbd->map->values->len) {
  1437. val = g_ptr_array_index (cbd->map->values, id);
  1438. val->hits ++;
  1439. g_ptr_array_add (cbd->ar, val->value);
  1440. }
  1441. /* Always return zero as we need all matches here */
  1442. return 0;
  1443. }
  1444. #endif
  1445. GPtrArray*
  1446. rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
  1447. const gchar *in, gsize len)
  1448. {
  1449. guint i;
  1450. rspamd_regexp_t *re;
  1451. GPtrArray *ret;
  1452. gint res = 0;
  1453. gboolean validated = FALSE;
  1454. struct rspamd_map_helper_value *val;
  1455. if (map == NULL || map->regexps == NULL || len == 0) {
  1456. return NULL;
  1457. }
  1458. g_assert (in != NULL);
  1459. if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
  1460. if (rspamd_fast_utf8_validate (in, len) == 0) {
  1461. validated = TRUE;
  1462. }
  1463. }
  1464. else {
  1465. validated = TRUE;
  1466. }
  1467. ret = g_ptr_array_new ();
  1468. #ifdef WITH_HYPERSCAN
  1469. if (map->hs_db && map->hs_scratch) {
  1470. if (validated) {
  1471. struct rspamd_multiple_cbdata cbd;
  1472. cbd.ar = ret;
  1473. cbd.map = map;
  1474. if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
  1475. rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) {
  1476. res = 1;
  1477. }
  1478. }
  1479. }
  1480. #endif
  1481. if (!res) {
  1482. /* PCRE version */
  1483. for (i = 0; i < map->regexps->len; i ++) {
  1484. re = g_ptr_array_index (map->regexps, i);
  1485. if (rspamd_regexp_search (re, in, len, NULL, NULL,
  1486. !validated, NULL)) {
  1487. val = g_ptr_array_index (map->values, i);
  1488. val->hits ++;
  1489. g_ptr_array_add (ret, val->value);
  1490. }
  1491. }
  1492. }
  1493. if (ret->len > 0) {
  1494. return ret;
  1495. }
  1496. g_ptr_array_free (ret, TRUE);
  1497. return NULL;
  1498. }
  1499. gconstpointer
  1500. rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in,
  1501. gsize len)
  1502. {
  1503. khiter_t k;
  1504. struct rspamd_map_helper_value *val;
  1505. rspamd_ftok_t tok;
  1506. if (map == NULL || map->htb == NULL) {
  1507. return NULL;
  1508. }
  1509. tok.begin = in;
  1510. tok.len = len;
  1511. k = kh_get (rspamd_map_hash, map->htb, tok);
  1512. if (k != kh_end (map->htb)) {
  1513. val = kh_value (map->htb, k);
  1514. val->hits ++;
  1515. return val->value;
  1516. }
  1517. return NULL;
  1518. }
  1519. gconstpointer
  1520. rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
  1521. const guchar *in, gsize inlen)
  1522. {
  1523. struct rspamd_map_helper_value *val;
  1524. if (map == NULL || map->trie == NULL) {
  1525. return NULL;
  1526. }
  1527. val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie,
  1528. in, inlen);
  1529. if (val != (gconstpointer)RADIX_NO_VALUE) {
  1530. val->hits ++;
  1531. return val->value;
  1532. }
  1533. return NULL;
  1534. }
  1535. gconstpointer
  1536. rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map,
  1537. const rspamd_inet_addr_t *addr)
  1538. {
  1539. struct rspamd_map_helper_value *val;
  1540. if (map == NULL || map->trie == NULL) {
  1541. return NULL;
  1542. }
  1543. val = (struct rspamd_map_helper_value *)radix_find_compressed_addr (map->trie, addr);
  1544. if (val != (gconstpointer)RADIX_NO_VALUE) {
  1545. val->hits ++;
  1546. return val->value;
  1547. }
  1548. return NULL;
  1549. }
  1550. /*
  1551. * CBD stuff
  1552. */
  1553. struct rspamd_cdb_map_helper *
  1554. rspamd_map_helper_new_cdb (struct rspamd_map *map)
  1555. {
  1556. struct rspamd_cdb_map_helper *n;
  1557. n = g_malloc0 (sizeof (*n));
  1558. n->cdbs = (GQueue)G_QUEUE_INIT;
  1559. n->map = map;
  1560. rspamd_cryptobox_fast_hash_init (&n->hst, map_hash_seed);
  1561. return n;
  1562. }
  1563. void
  1564. rspamd_map_helper_destroy_cdb (struct rspamd_cdb_map_helper *c)
  1565. {
  1566. if (c == NULL) {
  1567. return;
  1568. }
  1569. GList *cur = c->cdbs.head;
  1570. while (cur) {
  1571. struct cdb *cdb = (struct cdb *)cur->data;
  1572. cdb_free (cdb);
  1573. g_free (cdb->filename);
  1574. close (cdb->cdb_fd);
  1575. g_free (cdb);
  1576. cur = g_list_next (cur);
  1577. }
  1578. g_queue_clear (&c->cdbs);
  1579. g_free (c);
  1580. }
  1581. gchar *
  1582. rspamd_cdb_list_read (gchar *chunk,
  1583. gint len,
  1584. struct map_cb_data *data,
  1585. gboolean final)
  1586. {
  1587. struct rspamd_cdb_map_helper *cdb_data;
  1588. struct cdb *found = NULL;
  1589. struct rspamd_map *map = data->map;
  1590. g_assert (map->no_file_read);
  1591. if (data->cur_data == NULL) {
  1592. cdb_data = rspamd_map_helper_new_cdb (data->map);
  1593. data->cur_data = cdb_data;
  1594. }
  1595. else {
  1596. cdb_data = (struct rspamd_cdb_map_helper *)data->cur_data;
  1597. }
  1598. GList *cur = cdb_data->cdbs.head;
  1599. while (cur) {
  1600. struct cdb *elt = (struct cdb *)cur->data;
  1601. if (strcmp (elt->filename, chunk) == 0) {
  1602. found = elt;
  1603. break;
  1604. }
  1605. cur = g_list_next (cur);
  1606. }
  1607. if (found == NULL) {
  1608. /* New cdb */
  1609. gint fd;
  1610. struct cdb *cdb;
  1611. fd = rspamd_file_xopen (chunk, O_RDONLY, 0, TRUE);
  1612. if (fd == -1) {
  1613. msg_err_map ("cannot open cdb map from %s: %s", chunk, strerror (errno));
  1614. return NULL;
  1615. }
  1616. cdb = g_malloc0 (sizeof (struct cdb));
  1617. if (cdb_init (cdb, fd) == -1) {
  1618. g_free (cdb);
  1619. msg_err_map ("cannot init cdb map from %s: %s", chunk, strerror (errno));
  1620. return NULL;
  1621. }
  1622. cdb->filename = g_strdup (chunk);
  1623. g_queue_push_tail (&cdb_data->cdbs, cdb);
  1624. cdb_data->total_size += cdb->cdb_fsize;
  1625. rspamd_cryptobox_fast_hash_update (&cdb_data->hst, chunk, len);
  1626. }
  1627. return chunk + len;
  1628. }
  1629. void
  1630. rspamd_cdb_list_fin (struct map_cb_data *data, void **target)
  1631. {
  1632. struct rspamd_map *map = data->map;
  1633. struct rspamd_cdb_map_helper *cdb_data;
  1634. if (data->errored) {
  1635. /* Clean up the current data and do not touch prev data */
  1636. if (data->cur_data) {
  1637. msg_info_map ("cleanup unfinished new data as error occurred for %s",
  1638. map->name);
  1639. cdb_data = (struct rspamd_cdb_map_helper *) data->cur_data;
  1640. rspamd_map_helper_destroy_cdb(cdb_data);
  1641. data->cur_data = NULL;
  1642. }
  1643. }
  1644. else {
  1645. if (data->cur_data) {
  1646. cdb_data = (struct rspamd_cdb_map_helper *) data->cur_data;
  1647. msg_info_map ("read cdb of %Hz size", cdb_data->total_size);
  1648. data->map->traverse_function = NULL;
  1649. data->map->nelts = 0;
  1650. data->map->digest = rspamd_cryptobox_fast_hash_final(&cdb_data->hst);
  1651. }
  1652. if (target) {
  1653. *target = data->cur_data;
  1654. }
  1655. if (data->prev_data) {
  1656. cdb_data = (struct rspamd_cdb_map_helper *) data->prev_data;
  1657. rspamd_map_helper_destroy_cdb(cdb_data);
  1658. }
  1659. }
  1660. }
  1661. void
  1662. rspamd_cdb_list_dtor (struct map_cb_data *data)
  1663. {
  1664. if (data->cur_data) {
  1665. rspamd_map_helper_destroy_cdb (data->cur_data);
  1666. }
  1667. }
  1668. gconstpointer
  1669. rspamd_match_cdb_map (struct rspamd_cdb_map_helper *map,
  1670. const gchar *in, gsize inlen)
  1671. {
  1672. if (map == NULL || map->cdbs.head == NULL) {
  1673. return NULL;
  1674. }
  1675. GList *cur = map->cdbs.head;
  1676. static rspamd_ftok_t found;
  1677. while (cur) {
  1678. struct cdb *cdb = (struct cdb *)cur->data;
  1679. if (cdb_find (cdb, in, inlen) > 0) {
  1680. /* Extract and push value to lua as string */
  1681. unsigned vlen;
  1682. gconstpointer vpos;
  1683. vpos = cdb->cdb_mem + cdb_datapos (cdb);
  1684. vlen = cdb_datalen (cdb);
  1685. found.len = vlen;
  1686. found.begin = vpos;
  1687. return &found; /* Do not reuse! */
  1688. }
  1689. cur = g_list_next (cur);
  1690. }
  1691. return NULL;
  1692. }