You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

map_helpers.c 29KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397
  1. /*-
  2. * Copyright 2018 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "map_helpers.h"
  17. #include "map_private.h"
  18. #include "khash.h"
  19. #include "radix.h"
  20. #include "rspamd.h"
  21. #include "cryptobox.h"
  22. #include "contrib/fastutf8/fastutf8.h"
  23. #ifdef WITH_HYPERSCAN
  24. #include "hs.h"
  25. #endif
  26. #ifndef WITH_PCRE2
  27. #include <pcre.h>
  28. #else
  29. #include <pcre2.h>
  30. #endif
  31. static const guint64 map_hash_seed = 0xdeadbabeULL;
  32. static const gchar *hash_fill = "1";
  33. struct rspamd_map_helper_value {
  34. gsize hits;
  35. gconstpointer key;
  36. gchar value[]; /* Null terminated */
  37. };
  38. KHASH_INIT (rspamd_map_hash, const gchar *,
  39. struct rspamd_map_helper_value *, true,
  40. rspamd_strcase_hash, rspamd_strcase_equal);
  41. struct rspamd_radix_map_helper {
  42. rspamd_mempool_t *pool;
  43. khash_t(rspamd_map_hash) *htb;
  44. radix_compressed_t *trie;
  45. rspamd_cryptobox_fast_hash_state_t hst;
  46. };
  47. struct rspamd_hash_map_helper {
  48. rspamd_mempool_t *pool;
  49. khash_t(rspamd_map_hash) *htb;
  50. rspamd_cryptobox_fast_hash_state_t hst;
  51. };
  52. struct rspamd_regexp_map_helper {
  53. rspamd_mempool_t *pool;
  54. struct rspamd_map *map;
  55. GPtrArray *regexps;
  56. GPtrArray *values;
  57. khash_t(rspamd_map_hash) *htb;
  58. rspamd_cryptobox_fast_hash_state_t hst;
  59. enum rspamd_regexp_map_flags map_flags;
  60. #ifdef WITH_HYPERSCAN
  61. hs_database_t *hs_db;
  62. hs_scratch_t *hs_scratch;
  63. gchar **patterns;
  64. gint *flags;
  65. gint *ids;
  66. #endif
  67. };
  68. /**
  69. * FSM for parsing lists
  70. */
  71. #define MAP_STORE_KEY do { \
  72. while (g_ascii_isspace (*c) && p > c) { c ++; } \
  73. key = g_malloc (p - c + 1); \
  74. rspamd_strlcpy (key, c, p - c + 1); \
  75. key = g_strstrip (key); \
  76. } while (0)
  77. #define MAP_STORE_VALUE do { \
  78. while (g_ascii_isspace (*c) && p > c) { c ++; } \
  79. value = g_malloc (p - c + 1); \
  80. rspamd_strlcpy (value, c, p - c + 1); \
  81. value = g_strstrip (value); \
  82. } while (0)
  83. gchar *
  84. rspamd_parse_kv_list (
  85. gchar * chunk,
  86. gint len,
  87. struct map_cb_data *data,
  88. insert_func func,
  89. const gchar *default_value,
  90. gboolean final)
  91. {
  92. enum {
  93. map_skip_spaces_before_key = 0,
  94. map_read_key,
  95. map_read_key_quoted,
  96. map_read_key_slashed,
  97. map_skip_spaces_after_key,
  98. map_backslash_quoted,
  99. map_backslash_slashed,
  100. map_read_key_after_slash,
  101. map_read_value,
  102. map_read_comment_start,
  103. map_skip_comment,
  104. map_read_eol,
  105. };
  106. gchar *c, *p, *key = NULL, *value = NULL, *end;
  107. struct rspamd_map *map = data->map;
  108. guint line_number = 0;
  109. p = chunk;
  110. c = p;
  111. end = p + len;
  112. while (p < end) {
  113. switch (data->state) {
  114. case map_skip_spaces_before_key:
  115. if (g_ascii_isspace (*p)) {
  116. p ++;
  117. }
  118. else {
  119. if (*p == '"') {
  120. p++;
  121. c = p;
  122. data->state = map_read_key_quoted;
  123. }
  124. else if (*p == '/') {
  125. /* Note that c is on '/' here as '/' is a part of key */
  126. c = p;
  127. p++;
  128. data->state = map_read_key_slashed;
  129. }
  130. else {
  131. c = p;
  132. data->state = map_read_key;
  133. }
  134. }
  135. break;
  136. case map_read_key:
  137. /* read key */
  138. /* Check here comments, eol and end of buffer */
  139. if (*p == '#' && (p == c || *(p - 1) != '\\')) {
  140. if (p - c > 0) {
  141. /* Store a single key */
  142. MAP_STORE_KEY;
  143. func (data->cur_data, key, default_value);
  144. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  145. key, default_value, line_number);
  146. g_free (key);
  147. }
  148. key = NULL;
  149. data->state = map_read_comment_start;
  150. }
  151. else if (*p == '\r' || *p == '\n') {
  152. if (p - c > 0) {
  153. /* Store a single key */
  154. MAP_STORE_KEY;
  155. func (data->cur_data, key, default_value);
  156. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  157. key, default_value, line_number);
  158. g_free (key);
  159. }
  160. data->state = map_read_eol;
  161. key = NULL;
  162. }
  163. else if (g_ascii_isspace (*p)) {
  164. if (p - c > 0) {
  165. MAP_STORE_KEY;
  166. data->state = map_skip_spaces_after_key;
  167. }
  168. else {
  169. msg_err_map ("empty or invalid key found on line %d", line_number);
  170. data->state = map_skip_comment;
  171. }
  172. }
  173. else {
  174. p++;
  175. }
  176. break;
  177. case map_read_key_quoted:
  178. if (*p == '\\') {
  179. data->state = map_backslash_quoted;
  180. p ++;
  181. }
  182. else if (*p == '"') {
  183. /* Allow empty keys in this case */
  184. if (p - c >= 0) {
  185. MAP_STORE_KEY;
  186. data->state = map_skip_spaces_after_key;
  187. }
  188. else {
  189. g_assert_not_reached ();
  190. }
  191. p ++;
  192. }
  193. else {
  194. p ++;
  195. }
  196. break;
  197. case map_read_key_slashed:
  198. if (*p == '\\') {
  199. data->state = map_backslash_slashed;
  200. p ++;
  201. }
  202. else if (*p == '/') {
  203. /* Allow empty keys in this case */
  204. if (p - c >= 0) {
  205. data->state = map_read_key_after_slash;
  206. }
  207. else {
  208. g_assert_not_reached ();
  209. }
  210. }
  211. else {
  212. p ++;
  213. }
  214. break;
  215. case map_read_key_after_slash:
  216. /*
  217. * This state is equal to reading of key but '/' is not
  218. * treated specially
  219. */
  220. if (*p == '#') {
  221. if (p - c > 0) {
  222. /* Store a single key */
  223. MAP_STORE_KEY;
  224. func (data->cur_data, key, default_value);
  225. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  226. key, default_value, line_number);
  227. g_free (key);
  228. key = NULL;
  229. }
  230. data->state = map_read_comment_start;
  231. }
  232. else if (*p == '\r' || *p == '\n') {
  233. if (p - c > 0) {
  234. /* Store a single key */
  235. MAP_STORE_KEY;
  236. func (data->cur_data, key, default_value);
  237. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  238. key, default_value, line_number);
  239. g_free (key);
  240. key = NULL;
  241. }
  242. data->state = map_read_eol;
  243. key = NULL;
  244. }
  245. else if (g_ascii_isspace (*p)) {
  246. if (p - c > 0) {
  247. MAP_STORE_KEY;
  248. data->state = map_skip_spaces_after_key;
  249. }
  250. else {
  251. msg_err_map ("empty or invalid key found on line %d", line_number);
  252. data->state = map_skip_comment;
  253. }
  254. }
  255. else {
  256. p ++;
  257. }
  258. break;
  259. case map_backslash_quoted:
  260. p ++;
  261. data->state = map_read_key_quoted;
  262. break;
  263. case map_backslash_slashed:
  264. p ++;
  265. data->state = map_read_key_slashed;
  266. break;
  267. case map_skip_spaces_after_key:
  268. if (*p == ' ' || *p == '\t') {
  269. p ++;
  270. }
  271. else {
  272. c = p;
  273. data->state = map_read_value;
  274. }
  275. break;
  276. case map_read_value:
  277. if (key == NULL) {
  278. /* Ignore line */
  279. msg_err_map ("empty or invalid key found on line %d", line_number);
  280. data->state = map_skip_comment;
  281. }
  282. else {
  283. if (*p == '#') {
  284. if (p - c > 0) {
  285. /* Store a single key */
  286. MAP_STORE_VALUE;
  287. func (data->cur_data, key, value);
  288. msg_debug_map ("insert key value pair: %s -> %s; line: %d",
  289. key, value, line_number);
  290. g_free (key);
  291. g_free (value);
  292. key = NULL;
  293. value = NULL;
  294. } else {
  295. func (data->cur_data, key, default_value);
  296. msg_debug_map ("insert key only pair: %s -> %s; line: %d",
  297. key, default_value, line_number);
  298. g_free (key);
  299. key = NULL;
  300. }
  301. data->state = map_read_comment_start;
  302. } else if (*p == '\r' || *p == '\n') {
  303. if (p - c > 0) {
  304. /* Store a single key */
  305. MAP_STORE_VALUE;
  306. func (data->cur_data, key, value);
  307. msg_debug_map ("insert key value pair: %s -> %s",
  308. key, value);
  309. g_free (key);
  310. g_free (value);
  311. key = NULL;
  312. value = NULL;
  313. } else {
  314. func (data->cur_data, key, default_value);
  315. msg_debug_map ("insert key only pair: %s -> %s",
  316. key, default_value);
  317. g_free (key);
  318. key = NULL;
  319. }
  320. data->state = map_read_eol;
  321. key = NULL;
  322. }
  323. else {
  324. p++;
  325. }
  326. }
  327. break;
  328. case map_read_comment_start:
  329. if (*p == '#') {
  330. data->state = map_skip_comment;
  331. p ++;
  332. key = NULL;
  333. value = NULL;
  334. }
  335. else {
  336. g_assert_not_reached ();
  337. }
  338. break;
  339. case map_skip_comment:
  340. if (*p == '\r' || *p == '\n') {
  341. data->state = map_read_eol;
  342. }
  343. else {
  344. p ++;
  345. }
  346. break;
  347. case map_read_eol:
  348. /* Skip \r\n and whitespaces */
  349. if (*p == '\r' || *p == '\n') {
  350. if (*p == '\n') {
  351. /* We don't care about \r only line separators, they are too rare */
  352. line_number ++;
  353. }
  354. p++;
  355. }
  356. else {
  357. data->state = map_skip_spaces_before_key;
  358. }
  359. break;
  360. default:
  361. g_assert_not_reached ();
  362. break;
  363. }
  364. }
  365. if (final) {
  366. /* Examine the state */
  367. switch (data->state) {
  368. case map_read_key:
  369. if (p - c > 0) {
  370. /* Store a single key */
  371. MAP_STORE_KEY;
  372. func (data->cur_data, key, default_value);
  373. msg_debug_map ("insert key only pair: %s -> %s",
  374. key, default_value);
  375. g_free (key);
  376. key = NULL;
  377. }
  378. break;
  379. case map_read_value:
  380. if (key == NULL) {
  381. /* Ignore line */
  382. msg_err_map ("empty or invalid key found on line %d", line_number);
  383. data->state = map_skip_comment;
  384. }
  385. else {
  386. if (p - c > 0) {
  387. /* Store a single key */
  388. MAP_STORE_VALUE;
  389. func (data->cur_data, key, value);
  390. msg_debug_map ("insert key value pair: %s -> %s",
  391. key, value);
  392. g_free (key);
  393. g_free (value);
  394. key = NULL;
  395. value = NULL;
  396. } else {
  397. func (data->cur_data, key, default_value);
  398. msg_debug_map ("insert key only pair: %s -> %s",
  399. key, default_value);
  400. g_free (key);
  401. key = NULL;
  402. }
  403. }
  404. break;
  405. }
  406. data->state = map_skip_spaces_before_key;
  407. }
  408. return c;
  409. }
  410. /**
  411. * Radix tree helper function
  412. */
  413. void
  414. rspamd_map_helper_insert_radix (gpointer st, gconstpointer key, gconstpointer value)
  415. {
  416. struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
  417. struct rspamd_map_helper_value *val;
  418. gsize vlen;
  419. khiter_t k;
  420. gconstpointer nk;
  421. gint res;
  422. vlen = strlen (value);
  423. val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
  424. vlen + 1);
  425. memcpy (val->value, value, vlen);
  426. k = kh_get (rspamd_map_hash, r->htb, key);
  427. if (k == kh_end (r->htb)) {
  428. nk = rspamd_mempool_strdup (r->pool, key);
  429. k = kh_put (rspamd_map_hash, r->htb, nk, &res);
  430. }
  431. nk = kh_key (r->htb, k);
  432. val->key = nk;
  433. kh_value (r->htb, k) = val;
  434. rspamd_radix_add_iplist (key, ",", r->trie, val, FALSE);
  435. rspamd_cryptobox_fast_hash_update (&r->hst, nk, strlen (nk));
  436. }
  437. void
  438. rspamd_map_helper_insert_radix_resolve (gpointer st, gconstpointer key, gconstpointer value)
  439. {
  440. struct rspamd_radix_map_helper *r = (struct rspamd_radix_map_helper *)st;
  441. struct rspamd_map_helper_value *val;
  442. gsize vlen;
  443. khiter_t k;
  444. gconstpointer nk;
  445. gint res;
  446. vlen = strlen (value);
  447. val = rspamd_mempool_alloc0 (r->pool, sizeof (*val) +
  448. vlen + 1);
  449. memcpy (val->value, value, vlen);
  450. k = kh_get (rspamd_map_hash, r->htb, key);
  451. if (k == kh_end (r->htb)) {
  452. nk = rspamd_mempool_strdup (r->pool, key);
  453. k = kh_put (rspamd_map_hash, r->htb, nk, &res);
  454. }
  455. nk = kh_key (r->htb, k);
  456. val->key = nk;
  457. kh_value (r->htb, k) = val;
  458. rspamd_radix_add_iplist (key, ",", r->trie, val, TRUE);
  459. rspamd_cryptobox_fast_hash_update (&r->hst, nk, strlen (nk));
  460. }
  461. void
  462. rspamd_map_helper_insert_hash (gpointer st, gconstpointer key, gconstpointer value)
  463. {
  464. struct rspamd_hash_map_helper *ht = st;
  465. struct rspamd_map_helper_value *val;
  466. khiter_t k;
  467. gconstpointer nk;
  468. gsize vlen;
  469. gint r;
  470. k = kh_get (rspamd_map_hash, ht->htb, key);
  471. vlen = strlen (value);
  472. if (k == kh_end (ht->htb)) {
  473. nk = rspamd_mempool_strdup (ht->pool, key);
  474. k = kh_put (rspamd_map_hash, ht->htb, nk, &r);
  475. }
  476. else {
  477. val = kh_value (ht->htb, k);
  478. if (strcmp (value, val->value) == 0) {
  479. /* Same element, skip */
  480. return;
  481. }
  482. }
  483. /* Null termination due to alloc0 */
  484. val = rspamd_mempool_alloc0 (ht->pool, sizeof (*val) + vlen + 1);
  485. memcpy (val->value, value, vlen);
  486. nk = kh_key (ht->htb, k);
  487. val->key = nk;
  488. kh_value (ht->htb, k) = val;
  489. rspamd_cryptobox_fast_hash_update (&ht->hst, nk, strlen (nk));
  490. }
  491. void
  492. rspamd_map_helper_insert_re (gpointer st, gconstpointer key, gconstpointer value)
  493. {
  494. struct rspamd_regexp_map_helper *re_map = st;
  495. struct rspamd_map *map;
  496. rspamd_regexp_t *re;
  497. gchar *escaped;
  498. GError *err = NULL;
  499. gint pcre_flags;
  500. gsize escaped_len;
  501. struct rspamd_map_helper_value *val;
  502. khiter_t k;
  503. gconstpointer nk;
  504. gsize vlen;
  505. gint r;
  506. map = re_map->map;
  507. if (re_map->map_flags & RSPAMD_REGEXP_MAP_FLAG_GLOB) {
  508. escaped = rspamd_str_regexp_escape (key, strlen (key), &escaped_len,
  509. RSPAMD_REGEXP_ESCAPE_GLOB|RSPAMD_REGEXP_ESCAPE_UTF);
  510. re = rspamd_regexp_new (escaped, NULL, &err);
  511. g_free (escaped);
  512. }
  513. else {
  514. re = rspamd_regexp_new (key, NULL, &err);
  515. }
  516. if (re == NULL) {
  517. msg_err_map ("cannot parse regexp %s: %e", key, err);
  518. if (err) {
  519. g_error_free (err);
  520. }
  521. return;
  522. }
  523. vlen = strlen (value);
  524. val = rspamd_mempool_alloc0 (re_map->pool, sizeof (*val) +
  525. vlen + 1);
  526. memcpy (val->value, value, vlen);
  527. k = kh_get (rspamd_map_hash, re_map->htb, key);
  528. if (k == kh_end (re_map->htb)) {
  529. nk = rspamd_mempool_strdup (re_map->pool, key);
  530. k = kh_put (rspamd_map_hash, re_map->htb, nk, &r);
  531. }
  532. nk = kh_key (re_map->htb, k);
  533. val->key = nk;
  534. kh_value (re_map->htb, k) = val;
  535. rspamd_cryptobox_fast_hash_update (&re_map->hst, nk, strlen (nk));
  536. pcre_flags = rspamd_regexp_get_pcre_flags (re);
  537. #ifndef WITH_PCRE2
  538. if (pcre_flags & PCRE_FLAG(UTF8)) {
  539. re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
  540. }
  541. #else
  542. if (pcre_flags & PCRE_FLAG(UTF)) {
  543. re_map->map_flags |= RSPAMD_REGEXP_MAP_FLAG_UTF;
  544. }
  545. #endif
  546. g_ptr_array_add (re_map->regexps, re);
  547. g_ptr_array_add (re_map->values, val);
  548. }
  549. static void
  550. rspamd_map_helper_traverse_regexp (void *data,
  551. rspamd_map_traverse_cb cb,
  552. gpointer cbdata,
  553. gboolean reset_hits)
  554. {
  555. gconstpointer k;
  556. struct rspamd_map_helper_value *val;
  557. struct rspamd_regexp_map_helper *re_map = data;
  558. kh_foreach (re_map->htb, k, val, {
  559. if (!cb (k, val->value, val->hits, cbdata)) {
  560. break;
  561. }
  562. if (reset_hits) {
  563. val->hits = 0;
  564. }
  565. });
  566. }
  567. struct rspamd_hash_map_helper *
  568. rspamd_map_helper_new_hash (struct rspamd_map *map)
  569. {
  570. struct rspamd_hash_map_helper *htb;
  571. rspamd_mempool_t *pool;
  572. if (map) {
  573. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  574. map->tag, 0);
  575. }
  576. else {
  577. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  578. NULL, 0);
  579. }
  580. htb = rspamd_mempool_alloc0 (pool, sizeof (*htb));
  581. htb->htb = kh_init (rspamd_map_hash);
  582. htb->pool = pool;
  583. rspamd_cryptobox_fast_hash_init (&htb->hst, map_hash_seed);
  584. return htb;
  585. }
  586. void
  587. rspamd_map_helper_destroy_hash (struct rspamd_hash_map_helper *r)
  588. {
  589. if (r == NULL || r->pool == NULL) {
  590. return;
  591. }
  592. rspamd_mempool_t *pool = r->pool;
  593. kh_destroy (rspamd_map_hash, r->htb);
  594. memset (r, 0, sizeof (*r));
  595. rspamd_mempool_delete (pool);
  596. }
  597. static void
  598. rspamd_map_helper_traverse_hash (void *data,
  599. rspamd_map_traverse_cb cb,
  600. gpointer cbdata,
  601. gboolean reset_hits)
  602. {
  603. gconstpointer k;
  604. struct rspamd_map_helper_value *val;
  605. struct rspamd_hash_map_helper *ht = data;
  606. kh_foreach (ht->htb, k, val, {
  607. if (!cb (k, val->value, val->hits, cbdata)) {
  608. break;
  609. }
  610. if (reset_hits) {
  611. val->hits = 0;
  612. }
  613. });
  614. }
  615. struct rspamd_radix_map_helper *
  616. rspamd_map_helper_new_radix (struct rspamd_map *map)
  617. {
  618. struct rspamd_radix_map_helper *r;
  619. rspamd_mempool_t *pool;
  620. if (map) {
  621. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  622. map->tag, 0);
  623. }
  624. else {
  625. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  626. NULL, 0);
  627. }
  628. r = rspamd_mempool_alloc0 (pool, sizeof (*r));
  629. r->trie = radix_create_compressed_with_pool (pool);
  630. r->htb = kh_init (rspamd_map_hash);
  631. r->pool = pool;
  632. rspamd_cryptobox_fast_hash_init (&r->hst, map_hash_seed);
  633. return r;
  634. }
  635. void
  636. rspamd_map_helper_destroy_radix (struct rspamd_radix_map_helper *r)
  637. {
  638. if (r == NULL || !r->pool) {
  639. return;
  640. }
  641. kh_destroy (rspamd_map_hash, r->htb);
  642. rspamd_mempool_t *pool = r->pool;
  643. memset (r, 0, sizeof (*r));
  644. rspamd_mempool_delete (pool);
  645. }
  646. static void
  647. rspamd_map_helper_traverse_radix (void *data,
  648. rspamd_map_traverse_cb cb,
  649. gpointer cbdata,
  650. gboolean reset_hits)
  651. {
  652. gconstpointer k;
  653. struct rspamd_map_helper_value *val;
  654. struct rspamd_radix_map_helper *r = data;
  655. kh_foreach (r->htb, k, val, {
  656. if (!cb (k, val->value, val->hits, cbdata)) {
  657. break;
  658. }
  659. if (reset_hits) {
  660. val->hits = 0;
  661. }
  662. });
  663. }
  664. struct rspamd_regexp_map_helper *
  665. rspamd_map_helper_new_regexp (struct rspamd_map *map,
  666. enum rspamd_regexp_map_flags flags)
  667. {
  668. struct rspamd_regexp_map_helper *re_map;
  669. rspamd_mempool_t *pool;
  670. pool = rspamd_mempool_new (rspamd_mempool_suggest_size (),
  671. map->tag, 0);
  672. re_map = rspamd_mempool_alloc0 (pool, sizeof (*re_map));
  673. re_map->pool = pool;
  674. re_map->values = g_ptr_array_new ();
  675. re_map->regexps = g_ptr_array_new ();
  676. re_map->map = map;
  677. re_map->map_flags = flags;
  678. re_map->htb = kh_init (rspamd_map_hash);
  679. rspamd_cryptobox_fast_hash_init (&re_map->hst, map_hash_seed);
  680. return re_map;
  681. }
  682. void
  683. rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map)
  684. {
  685. rspamd_regexp_t *re;
  686. guint i;
  687. if (!re_map || !re_map->regexps) {
  688. return;
  689. }
  690. #ifdef WITH_HYPERSCAN
  691. if (re_map->hs_scratch) {
  692. hs_free_scratch (re_map->hs_scratch);
  693. }
  694. if (re_map->hs_db) {
  695. hs_free_database (re_map->hs_db);
  696. }
  697. if (re_map->patterns) {
  698. for (i = 0; i < re_map->regexps->len; i ++) {
  699. g_free (re_map->patterns[i]);
  700. }
  701. g_free (re_map->patterns);
  702. }
  703. if (re_map->flags) {
  704. g_free (re_map->flags);
  705. }
  706. if (re_map->ids) {
  707. g_free (re_map->ids);
  708. }
  709. #endif
  710. for (i = 0; i < re_map->regexps->len; i ++) {
  711. re = g_ptr_array_index (re_map->regexps, i);
  712. rspamd_regexp_unref (re);
  713. }
  714. g_ptr_array_free (re_map->regexps, TRUE);
  715. g_ptr_array_free (re_map->values, TRUE);
  716. kh_destroy (rspamd_map_hash, re_map->htb);
  717. rspamd_mempool_t *pool = re_map->pool;
  718. memset (re_map, 0, sizeof (*re_map));
  719. rspamd_mempool_delete (pool);
  720. }
  721. gchar *
  722. rspamd_kv_list_read (
  723. gchar * chunk,
  724. gint len,
  725. struct map_cb_data *data,
  726. gboolean final)
  727. {
  728. if (data->cur_data == NULL) {
  729. data->cur_data = rspamd_map_helper_new_hash (data->map);
  730. }
  731. return rspamd_parse_kv_list (
  732. chunk,
  733. len,
  734. data,
  735. rspamd_map_helper_insert_hash,
  736. "",
  737. final);
  738. }
  739. void
  740. rspamd_kv_list_fin (struct map_cb_data *data, void **target)
  741. {
  742. struct rspamd_map *map = data->map;
  743. struct rspamd_hash_map_helper *htb;
  744. if (data->cur_data) {
  745. htb = (struct rspamd_hash_map_helper *)data->cur_data;
  746. msg_info_map ("read hash of %d elements", kh_size (htb->htb));
  747. data->map->traverse_function = rspamd_map_helper_traverse_hash;
  748. data->map->nelts = kh_size (htb->htb);
  749. data->map->digest = rspamd_cryptobox_fast_hash_final (&htb->hst);
  750. }
  751. if (target) {
  752. *target = data->cur_data;
  753. }
  754. if (data->prev_data) {
  755. htb = (struct rspamd_hash_map_helper *)data->prev_data;
  756. rspamd_map_helper_destroy_hash (htb);
  757. }
  758. }
  759. void
  760. rspamd_kv_list_dtor (struct map_cb_data *data)
  761. {
  762. struct rspamd_hash_map_helper *htb;
  763. if (data->cur_data) {
  764. htb = (struct rspamd_hash_map_helper *)data->cur_data;
  765. rspamd_map_helper_destroy_hash (htb);
  766. }
  767. }
  768. gchar *
  769. rspamd_radix_read (
  770. gchar * chunk,
  771. gint len,
  772. struct map_cb_data *data,
  773. gboolean final)
  774. {
  775. struct rspamd_radix_map_helper *r;
  776. struct rspamd_map *map = data->map;
  777. if (data->cur_data == NULL) {
  778. r = rspamd_map_helper_new_radix (map);
  779. data->cur_data = r;
  780. }
  781. return rspamd_parse_kv_list (
  782. chunk,
  783. len,
  784. data,
  785. rspamd_map_helper_insert_radix,
  786. hash_fill,
  787. final);
  788. }
  789. void
  790. rspamd_radix_fin (struct map_cb_data *data, void **target)
  791. {
  792. struct rspamd_map *map = data->map;
  793. struct rspamd_radix_map_helper *r;
  794. if (data->cur_data) {
  795. r = (struct rspamd_radix_map_helper *)data->cur_data;
  796. msg_info_map ("read radix trie of %z elements: %s",
  797. radix_get_size (r->trie), radix_get_info (r->trie));
  798. data->map->traverse_function = rspamd_map_helper_traverse_radix;
  799. data->map->nelts = kh_size (r->htb);
  800. data->map->digest = rspamd_cryptobox_fast_hash_final (&r->hst);
  801. }
  802. if (target) {
  803. *target = data->cur_data;
  804. }
  805. if (data->prev_data) {
  806. r = (struct rspamd_radix_map_helper *)data->prev_data;
  807. rspamd_map_helper_destroy_radix (r);
  808. }
  809. }
  810. void
  811. rspamd_radix_dtor (struct map_cb_data *data)
  812. {
  813. struct rspamd_radix_map_helper *r;
  814. if (data->cur_data) {
  815. r = (struct rspamd_radix_map_helper *)data->cur_data;
  816. rspamd_map_helper_destroy_radix (r);
  817. }
  818. }
  819. static void
  820. rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
  821. {
  822. #ifdef WITH_HYPERSCAN
  823. guint i;
  824. hs_platform_info_t plt;
  825. hs_compile_error_t *err;
  826. struct rspamd_map *map;
  827. rspamd_regexp_t *re;
  828. gint pcre_flags;
  829. map = re_map->map;
  830. if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
  831. msg_info_map ("disable hyperscan for map %s, ssse3 instructons are not supported by CPU",
  832. map->name);
  833. return;
  834. }
  835. if (hs_populate_platform (&plt) != HS_SUCCESS) {
  836. msg_err_map ("cannot populate hyperscan platform");
  837. return;
  838. }
  839. re_map->patterns = g_new (gchar *, re_map->regexps->len);
  840. re_map->flags = g_new (gint, re_map->regexps->len);
  841. re_map->ids = g_new (gint, re_map->regexps->len);
  842. for (i = 0; i < re_map->regexps->len; i ++) {
  843. const gchar *pat;
  844. gchar *escaped;
  845. gint pat_flags;
  846. re = g_ptr_array_index (re_map->regexps, i);
  847. pcre_flags = rspamd_regexp_get_pcre_flags (re);
  848. pat = rspamd_regexp_get_pattern (re);
  849. pat_flags = rspamd_regexp_get_flags (re);
  850. if (pat_flags & RSPAMD_REGEXP_FLAG_UTF) {
  851. escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL,
  852. RSPAMD_REGEXP_ESCAPE_RE|RSPAMD_REGEXP_ESCAPE_UTF);
  853. re_map->flags[i] |= HS_FLAG_UTF8;
  854. }
  855. else {
  856. escaped = rspamd_str_regexp_escape (pat, strlen (pat), NULL,
  857. RSPAMD_REGEXP_ESCAPE_RE);
  858. }
  859. re_map->patterns[i] = escaped;
  860. re_map->flags[i] = HS_FLAG_SINGLEMATCH;
  861. #ifndef WITH_PCRE2
  862. if (pcre_flags & PCRE_FLAG(UTF8)) {
  863. re_map->flags[i] |= HS_FLAG_UTF8;
  864. }
  865. #else
  866. if (pcre_flags & PCRE_FLAG(UTF)) {
  867. re_map->flags[i] |= HS_FLAG_UTF8;
  868. }
  869. #endif
  870. if (pcre_flags & PCRE_FLAG(CASELESS)) {
  871. re_map->flags[i] |= HS_FLAG_CASELESS;
  872. }
  873. if (pcre_flags & PCRE_FLAG(MULTILINE)) {
  874. re_map->flags[i] |= HS_FLAG_MULTILINE;
  875. }
  876. if (pcre_flags & PCRE_FLAG(DOTALL)) {
  877. re_map->flags[i] |= HS_FLAG_DOTALL;
  878. }
  879. if (rspamd_regexp_get_maxhits (re) == 1) {
  880. re_map->flags[i] |= HS_FLAG_SINGLEMATCH;
  881. }
  882. re_map->ids[i] = i;
  883. }
  884. if (re_map->regexps->len > 0 && re_map->patterns) {
  885. if (hs_compile_multi ((const gchar **)re_map->patterns,
  886. re_map->flags,
  887. re_map->ids,
  888. re_map->regexps->len,
  889. HS_MODE_BLOCK,
  890. &plt,
  891. &re_map->hs_db,
  892. &err) != HS_SUCCESS) {
  893. msg_err_map ("cannot create tree of regexp when processing '%s': %s",
  894. err->expression >= 0 ?
  895. re_map->patterns[err->expression] :
  896. "unknown regexp", err->message);
  897. re_map->hs_db = NULL;
  898. hs_free_compile_error (err);
  899. return;
  900. }
  901. if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
  902. msg_err_map ("cannot allocate scratch space for hyperscan");
  903. hs_free_database (re_map->hs_db);
  904. re_map->hs_db = NULL;
  905. }
  906. }
  907. else {
  908. msg_err_map ("regexp map is empty");
  909. }
  910. #endif
  911. }
  912. gchar *
  913. rspamd_regexp_list_read_single (
  914. gchar *chunk,
  915. gint len,
  916. struct map_cb_data *data,
  917. gboolean final)
  918. {
  919. struct rspamd_regexp_map_helper *re_map;
  920. if (data->cur_data == NULL) {
  921. re_map = rspamd_map_helper_new_regexp (data->map, 0);
  922. data->cur_data = re_map;
  923. }
  924. return rspamd_parse_kv_list (
  925. chunk,
  926. len,
  927. data,
  928. rspamd_map_helper_insert_re,
  929. hash_fill,
  930. final);
  931. }
  932. gchar *
  933. rspamd_glob_list_read_single (
  934. gchar *chunk,
  935. gint len,
  936. struct map_cb_data *data,
  937. gboolean final)
  938. {
  939. struct rspamd_regexp_map_helper *re_map;
  940. if (data->cur_data == NULL) {
  941. re_map = rspamd_map_helper_new_regexp (data->map, RSPAMD_REGEXP_MAP_FLAG_GLOB);
  942. data->cur_data = re_map;
  943. }
  944. return rspamd_parse_kv_list (
  945. chunk,
  946. len,
  947. data,
  948. rspamd_map_helper_insert_re,
  949. hash_fill,
  950. final);
  951. }
  952. gchar *
  953. rspamd_regexp_list_read_multiple (
  954. gchar *chunk,
  955. gint len,
  956. struct map_cb_data *data,
  957. gboolean final)
  958. {
  959. struct rspamd_regexp_map_helper *re_map;
  960. if (data->cur_data == NULL) {
  961. re_map = rspamd_map_helper_new_regexp (data->map,
  962. RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
  963. data->cur_data = re_map;
  964. }
  965. return rspamd_parse_kv_list (
  966. chunk,
  967. len,
  968. data,
  969. rspamd_map_helper_insert_re,
  970. hash_fill,
  971. final);
  972. }
  973. gchar *
  974. rspamd_glob_list_read_multiple (
  975. gchar *chunk,
  976. gint len,
  977. struct map_cb_data *data,
  978. gboolean final)
  979. {
  980. struct rspamd_regexp_map_helper *re_map;
  981. if (data->cur_data == NULL) {
  982. re_map = rspamd_map_helper_new_regexp (data->map,
  983. RSPAMD_REGEXP_MAP_FLAG_GLOB|RSPAMD_REGEXP_MAP_FLAG_MULTIPLE);
  984. data->cur_data = re_map;
  985. }
  986. return rspamd_parse_kv_list (
  987. chunk,
  988. len,
  989. data,
  990. rspamd_map_helper_insert_re,
  991. hash_fill,
  992. final);
  993. }
  994. void
  995. rspamd_regexp_list_fin (struct map_cb_data *data, void **target)
  996. {
  997. struct rspamd_regexp_map_helper *re_map;
  998. struct rspamd_map *map = data->map;
  999. if (data->cur_data) {
  1000. re_map = data->cur_data;
  1001. rspamd_re_map_finalize (re_map);
  1002. msg_info_map ("read regexp list of %ud elements",
  1003. re_map->regexps->len);
  1004. data->map->traverse_function = rspamd_map_helper_traverse_regexp;
  1005. data->map->nelts = kh_size (re_map->htb);
  1006. data->map->digest = rspamd_cryptobox_fast_hash_final (&re_map->hst);
  1007. }
  1008. if (target) {
  1009. *target = data->cur_data;
  1010. }
  1011. if (data->prev_data) {
  1012. rspamd_map_helper_destroy_regexp (data->prev_data);
  1013. }
  1014. }
  1015. void
  1016. rspamd_regexp_list_dtor (struct map_cb_data *data)
  1017. {
  1018. if (data->cur_data) {
  1019. rspamd_map_helper_destroy_regexp (data->cur_data);
  1020. }
  1021. }
  1022. #ifdef WITH_HYPERSCAN
  1023. static int
  1024. rspamd_match_hs_single_handler (unsigned int id, unsigned long long from,
  1025. unsigned long long to,
  1026. unsigned int flags, void *context)
  1027. {
  1028. guint *i = context;
  1029. /* Always return non-zero as we need a single match here */
  1030. *i = id;
  1031. return 1;
  1032. }
  1033. #endif
  1034. gconstpointer
  1035. rspamd_match_regexp_map_single (struct rspamd_regexp_map_helper *map,
  1036. const gchar *in, gsize len)
  1037. {
  1038. guint i;
  1039. rspamd_regexp_t *re;
  1040. gint res = 0;
  1041. gpointer ret = NULL;
  1042. struct rspamd_map_helper_value *val;
  1043. gboolean validated = FALSE;
  1044. g_assert (in != NULL);
  1045. if (map == NULL || len == 0 || map->regexps == NULL) {
  1046. return NULL;
  1047. }
  1048. if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
  1049. if (rspamd_fast_utf8_validate (in, len) == 0) {
  1050. validated = TRUE;
  1051. }
  1052. }
  1053. else {
  1054. validated = TRUE;
  1055. }
  1056. #ifdef WITH_HYPERSCAN
  1057. if (map->hs_db && map->hs_scratch) {
  1058. if (validated) {
  1059. res = hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
  1060. rspamd_match_hs_single_handler, (void *)&i);
  1061. if (res == HS_SCAN_TERMINATED) {
  1062. res = 1;
  1063. val = g_ptr_array_index (map->values, i);
  1064. ret = val->value;
  1065. val->hits ++;
  1066. }
  1067. return ret;
  1068. }
  1069. }
  1070. #endif
  1071. if (!res) {
  1072. /* PCRE version */
  1073. for (i = 0; i < map->regexps->len; i ++) {
  1074. re = g_ptr_array_index (map->regexps, i);
  1075. if (rspamd_regexp_search (re, in, len, NULL, NULL, !validated, NULL)) {
  1076. val = g_ptr_array_index (map->values, i);
  1077. ret = val->value;
  1078. val->hits ++;
  1079. break;
  1080. }
  1081. }
  1082. }
  1083. return ret;
  1084. }
  1085. #ifdef WITH_HYPERSCAN
  1086. struct rspamd_multiple_cbdata {
  1087. GPtrArray *ar;
  1088. struct rspamd_regexp_map_helper *map;
  1089. };
  1090. static int
  1091. rspamd_match_hs_multiple_handler (unsigned int id, unsigned long long from,
  1092. unsigned long long to,
  1093. unsigned int flags, void *context)
  1094. {
  1095. struct rspamd_multiple_cbdata *cbd = context;
  1096. struct rspamd_map_helper_value *val;
  1097. if (id < cbd->map->values->len) {
  1098. val = g_ptr_array_index (cbd->map->values, id);
  1099. val->hits ++;
  1100. g_ptr_array_add (cbd->ar, val->value);
  1101. }
  1102. /* Always return zero as we need all matches here */
  1103. return 0;
  1104. }
  1105. #endif
  1106. GPtrArray*
  1107. rspamd_match_regexp_map_all (struct rspamd_regexp_map_helper *map,
  1108. const gchar *in, gsize len)
  1109. {
  1110. guint i;
  1111. rspamd_regexp_t *re;
  1112. GPtrArray *ret;
  1113. gint res = 0;
  1114. gboolean validated = FALSE;
  1115. struct rspamd_map_helper_value *val;
  1116. if (map == NULL || map->regexps == NULL || len == 0) {
  1117. return NULL;
  1118. }
  1119. g_assert (in != NULL);
  1120. if (map->map_flags & RSPAMD_REGEXP_MAP_FLAG_UTF) {
  1121. if (rspamd_fast_utf8_validate (in, len) == 0) {
  1122. validated = TRUE;
  1123. }
  1124. }
  1125. else {
  1126. validated = TRUE;
  1127. }
  1128. ret = g_ptr_array_new ();
  1129. #ifdef WITH_HYPERSCAN
  1130. if (map->hs_db && map->hs_scratch) {
  1131. if (validated) {
  1132. struct rspamd_multiple_cbdata cbd;
  1133. cbd.ar = ret;
  1134. cbd.map = map;
  1135. if (hs_scan (map->hs_db, in, len, 0, map->hs_scratch,
  1136. rspamd_match_hs_multiple_handler, &cbd) == HS_SUCCESS) {
  1137. res = 1;
  1138. }
  1139. }
  1140. }
  1141. #endif
  1142. if (!res) {
  1143. /* PCRE version */
  1144. for (i = 0; i < map->regexps->len; i ++) {
  1145. re = g_ptr_array_index (map->regexps, i);
  1146. if (rspamd_regexp_search (re, in, len, NULL, NULL,
  1147. !validated, NULL)) {
  1148. val = g_ptr_array_index (map->values, i);
  1149. val->hits ++;
  1150. g_ptr_array_add (ret, val->value);
  1151. }
  1152. }
  1153. }
  1154. if (ret->len > 0) {
  1155. return ret;
  1156. }
  1157. g_ptr_array_free (ret, TRUE);
  1158. return NULL;
  1159. }
  1160. gconstpointer
  1161. rspamd_match_hash_map (struct rspamd_hash_map_helper *map, const gchar *in)
  1162. {
  1163. khiter_t k;
  1164. struct rspamd_map_helper_value *val;
  1165. if (map == NULL || map->htb == NULL) {
  1166. return NULL;
  1167. }
  1168. k = kh_get (rspamd_map_hash, map->htb, in);
  1169. if (k != kh_end (map->htb)) {
  1170. val = kh_value (map->htb, k);
  1171. val->hits ++;
  1172. return val->value;
  1173. }
  1174. return NULL;
  1175. }
  1176. gconstpointer
  1177. rspamd_match_radix_map (struct rspamd_radix_map_helper *map,
  1178. const guchar *in, gsize inlen)
  1179. {
  1180. struct rspamd_map_helper_value *val;
  1181. if (map == NULL || map->trie == NULL) {
  1182. return NULL;
  1183. }
  1184. val = (struct rspamd_map_helper_value *)radix_find_compressed (map->trie,
  1185. in, inlen);
  1186. if (val != (gconstpointer)RADIX_NO_VALUE) {
  1187. val->hits ++;
  1188. return val->value;
  1189. }
  1190. return NULL;
  1191. }
  1192. gconstpointer
  1193. rspamd_match_radix_map_addr (struct rspamd_radix_map_helper *map,
  1194. const rspamd_inet_addr_t *addr)
  1195. {
  1196. struct rspamd_map_helper_value *val;
  1197. if (map == NULL || map->trie == NULL) {
  1198. return NULL;
  1199. }
  1200. val = (struct rspamd_map_helper_value *)radix_find_compressed_addr (map->trie, addr);
  1201. if (val != (gconstpointer)RADIX_NO_VALUE) {
  1202. val->hits ++;
  1203. return val->value;
  1204. }
  1205. return NULL;
  1206. }