You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

multipattern.c 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "libutil/multipattern.h"
  18. #include "libutil/str_util.h"
  19. #include "libcryptobox/cryptobox.h"
  20. #ifdef WITH_HYPERSCAN
  21. #include "logger.h"
  22. #include "unix-std.h"
  23. #include "hs.h"
  24. #endif
  25. #include "acism.h"
  26. #include "libutil/regexp.h"
  27. #include <stdalign.h>
  28. #define MAX_SCRATCH 4
  29. enum rspamd_hs_check_state {
  30. RSPAMD_HS_UNCHECKED = 0,
  31. RSPAMD_HS_SUPPORTED,
  32. RSPAMD_HS_UNSUPPORTED
  33. };
  34. static const char *hs_cache_dir = NULL;
  35. static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED;
  36. struct RSPAMD_ALIGNED(64) rspamd_multipattern {
  37. #ifdef WITH_HYPERSCAN
  38. rspamd_cryptobox_hash_state_t hash_state;
  39. hs_database_t *db;
  40. hs_scratch_t *scratch[MAX_SCRATCH];
  41. GArray *hs_pats;
  42. GArray *hs_ids;
  43. GArray *hs_flags;
  44. guint scratch_used;
  45. #endif
  46. ac_trie_t *t;
  47. GArray *pats;
  48. GArray *res;
  49. gboolean compiled;
  50. guint cnt;
  51. enum rspamd_multipattern_flags flags;
  52. };
  53. static GQuark
  54. rspamd_multipattern_quark (void)
  55. {
  56. return g_quark_from_static_string ("multipattern");
  57. }
  58. static inline gboolean
  59. rspamd_hs_check (void)
  60. {
  61. #ifdef WITH_HYPERSCAN
  62. if (G_UNLIKELY (hs_suitable_cpu == RSPAMD_HS_UNCHECKED)) {
  63. if (hs_valid_platform () == HS_SUCCESS) {
  64. hs_suitable_cpu = RSPAMD_HS_SUPPORTED;
  65. }
  66. else {
  67. hs_suitable_cpu = RSPAMD_HS_UNSUPPORTED;
  68. }
  69. }
  70. #endif
  71. return hs_suitable_cpu == RSPAMD_HS_SUPPORTED;
  72. }
  73. void
  74. rspamd_multipattern_library_init (const gchar *cache_dir)
  75. {
  76. hs_cache_dir = cache_dir;
  77. #ifdef WITH_HYPERSCAN
  78. rspamd_hs_check ();
  79. #endif
  80. }
  81. #ifdef WITH_HYPERSCAN
  82. static gchar *
  83. rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen,
  84. gsize *dst_len)
  85. {
  86. gsize len;
  87. const gchar *p, *prefix, *suffix;
  88. gchar *res;
  89. /*
  90. * We understand the following cases
  91. * 1) blah -> .blah\b
  92. * 2) *.blah -> ..*\\.blah\b|$
  93. * 3) ???
  94. */
  95. if (pattern[0] == '*') {
  96. p = strchr (pattern, '.');
  97. if (p == NULL) {
  98. /* XXX: bad */
  99. p = pattern;
  100. }
  101. else {
  102. p ++;
  103. }
  104. prefix = "\\.";
  105. len = slen + strlen (prefix);
  106. }
  107. else {
  108. prefix = "\\.";
  109. p = pattern;
  110. len = slen + strlen (prefix);
  111. }
  112. suffix = "(:?\\b|$)";
  113. len += strlen (suffix);
  114. res = g_malloc (len + 1);
  115. slen = rspamd_strlcpy (res, prefix, len + 1);
  116. slen += rspamd_strlcpy (res + slen, p, len + 1 - slen);
  117. slen += rspamd_strlcpy (res + slen, suffix, len + 1 - slen);
  118. *dst_len = slen;
  119. return res;
  120. }
  121. #endif
  122. static gchar *
  123. rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
  124. gsize *dst_len)
  125. {
  126. gsize dlen, slen;
  127. const gchar *p, *prefix;
  128. gchar *res;
  129. /*
  130. * We understand the following cases
  131. * 1) blah -> \\.blah
  132. * 2) *.blah -> \\..*\\.blah
  133. * 3) ???
  134. */
  135. slen = len;
  136. if (pattern[0] == '*') {
  137. dlen = slen;
  138. p = memchr (pattern, '.', len);
  139. if (p == NULL) {
  140. /* XXX: bad */
  141. p = pattern;
  142. }
  143. else {
  144. p ++;
  145. }
  146. dlen -= p - pattern;
  147. prefix = ".";
  148. dlen ++;
  149. }
  150. else {
  151. dlen = slen + 1;
  152. prefix = ".";
  153. p = pattern;
  154. }
  155. res = g_malloc (dlen + 1);
  156. slen = strlen (prefix);
  157. memcpy (res, prefix, slen);
  158. rspamd_strlcpy (res + slen, p, dlen - slen + 1);
  159. *dst_len = dlen;
  160. return res;
  161. }
  162. /*
  163. * Escapes special characters from specific pattern
  164. */
  165. static gchar *
  166. rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
  167. enum rspamd_multipattern_flags flags,
  168. gsize *dst_len)
  169. {
  170. gchar *ret = NULL;
  171. gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
  172. if (flags & RSPAMD_MULTIPATTERN_UTF8) {
  173. gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
  174. }
  175. #ifdef WITH_HYPERSCAN
  176. if (rspamd_hs_check ()) {
  177. if (flags & RSPAMD_MULTIPATTERN_TLD) {
  178. gchar *tmp;
  179. gsize tlen;
  180. tmp = rspamd_multipattern_escape_tld_hyperscan (pattern, len, &tlen);
  181. ret = rspamd_str_regexp_escape (tmp, tlen, dst_len,
  182. gl_flags|RSPAMD_REGEXP_ESCAPE_RE);
  183. g_free (tmp);
  184. }
  185. else if (flags & RSPAMD_MULTIPATTERN_RE) {
  186. ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
  187. RSPAMD_REGEXP_ESCAPE_RE);
  188. }
  189. else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
  190. ret = rspamd_str_regexp_escape (pattern, len, dst_len,
  191. gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
  192. }
  193. else {
  194. ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags);
  195. }
  196. return ret;
  197. }
  198. #endif
  199. if (flags & RSPAMD_MULTIPATTERN_TLD) {
  200. ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
  201. }
  202. else if (flags & RSPAMD_MULTIPATTERN_RE) {
  203. ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
  204. RSPAMD_REGEXP_ESCAPE_RE);
  205. }
  206. else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
  207. ret = rspamd_str_regexp_escape (pattern, len, dst_len,
  208. gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
  209. }
  210. else {
  211. ret = malloc (len + 1);
  212. *dst_len = rspamd_strlcpy (ret, pattern, len + 1);
  213. }
  214. return ret;
  215. }
  216. struct rspamd_multipattern *
  217. rspamd_multipattern_create (enum rspamd_multipattern_flags flags)
  218. {
  219. struct rspamd_multipattern *mp;
  220. /* Align due to blake2b state */
  221. (void) !posix_memalign((void **)&mp, RSPAMD_ALIGNOF(struct rspamd_multipattern),
  222. sizeof (*mp));
  223. g_assert (mp != NULL);
  224. memset (mp, 0, sizeof (*mp));
  225. mp->flags = flags;
  226. #ifdef WITH_HYPERSCAN
  227. if (rspamd_hs_check ()) {
  228. mp->hs_pats = g_array_new (FALSE, TRUE, sizeof (gchar *));
  229. mp->hs_flags = g_array_new (FALSE, TRUE, sizeof (gint));
  230. mp->hs_ids = g_array_new (FALSE, TRUE, sizeof (gint));
  231. rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
  232. return mp;
  233. }
  234. #endif
  235. mp->pats = g_array_new (FALSE, TRUE, sizeof (ac_trie_pat_t));
  236. return mp;
  237. }
  238. struct rspamd_multipattern *
  239. rspamd_multipattern_create_sized (guint npatterns,
  240. enum rspamd_multipattern_flags flags)
  241. {
  242. struct rspamd_multipattern *mp;
  243. /* Align due to blake2b state */
  244. (void) !posix_memalign((void **)&mp, RSPAMD_ALIGNOF(struct rspamd_multipattern), sizeof (*mp));
  245. g_assert (mp != NULL);
  246. memset (mp, 0, sizeof (*mp));
  247. mp->flags = flags;
  248. #ifdef WITH_HYPERSCAN
  249. if (rspamd_hs_check ()) {
  250. mp->hs_pats = g_array_sized_new (FALSE, TRUE, sizeof (gchar *), npatterns);
  251. mp->hs_flags = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
  252. mp->hs_ids = g_array_sized_new (FALSE, TRUE, sizeof (gint), npatterns);
  253. rspamd_cryptobox_hash_init (&mp->hash_state, NULL, 0);
  254. return mp;
  255. }
  256. #endif
  257. mp->pats = g_array_sized_new (FALSE, TRUE, sizeof (ac_trie_pat_t), npatterns);
  258. return mp;
  259. }
  260. void
  261. rspamd_multipattern_add_pattern (struct rspamd_multipattern *mp,
  262. const gchar *pattern, gint flags)
  263. {
  264. g_assert (pattern != NULL);
  265. rspamd_multipattern_add_pattern_len (mp, pattern, strlen (pattern), flags);
  266. }
  267. void
  268. rspamd_multipattern_add_pattern_len (struct rspamd_multipattern *mp,
  269. const gchar *pattern, gsize patlen, gint flags)
  270. {
  271. gsize dlen;
  272. g_assert (pattern != NULL);
  273. g_assert (mp != NULL);
  274. g_assert (!mp->compiled);
  275. #ifdef WITH_HYPERSCAN
  276. if (rspamd_hs_check ()) {
  277. gchar *np;
  278. gint fl = HS_FLAG_SOM_LEFTMOST;
  279. gint adjusted_flags = mp->flags | flags;
  280. if (adjusted_flags & RSPAMD_MULTIPATTERN_ICASE) {
  281. fl |= HS_FLAG_CASELESS;
  282. }
  283. if (adjusted_flags & RSPAMD_MULTIPATTERN_UTF8) {
  284. if (adjusted_flags & RSPAMD_MULTIPATTERN_TLD) {
  285. fl |= HS_FLAG_UTF8;
  286. }
  287. else {
  288. fl |= HS_FLAG_UTF8 | HS_FLAG_UCP;
  289. }
  290. }
  291. if (adjusted_flags & RSPAMD_MULTIPATTERN_DOTALL) {
  292. fl |= HS_FLAG_DOTALL;
  293. }
  294. if (adjusted_flags & RSPAMD_MULTIPATTERN_SINGLEMATCH) {
  295. fl |= HS_FLAG_SINGLEMATCH;
  296. fl &= ~HS_FLAG_SOM_LEFTMOST; /* According to hyperscan docs */
  297. }
  298. if (adjusted_flags & RSPAMD_MULTIPATTERN_NO_START) {
  299. fl &= ~HS_FLAG_SOM_LEFTMOST;
  300. }
  301. g_array_append_val (mp->hs_flags, fl);
  302. np = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
  303. g_array_append_val (mp->hs_pats, np);
  304. fl = mp->cnt;
  305. g_array_append_val (mp->hs_ids, fl);
  306. rspamd_cryptobox_hash_update (&mp->hash_state, np, dlen);
  307. mp->cnt ++;
  308. return;
  309. }
  310. #endif
  311. ac_trie_pat_t pat;
  312. pat.ptr = rspamd_multipattern_pattern_filter (pattern, patlen, flags, &dlen);
  313. pat.len = dlen;
  314. g_array_append_val (mp->pats, pat);
  315. mp->cnt ++;
  316. }
  317. struct rspamd_multipattern *
  318. rspamd_multipattern_create_full (const gchar **patterns,
  319. guint npatterns, enum rspamd_multipattern_flags flags)
  320. {
  321. struct rspamd_multipattern *mp;
  322. guint i;
  323. g_assert (npatterns > 0);
  324. g_assert (patterns != NULL);
  325. mp = rspamd_multipattern_create_sized (npatterns, flags);
  326. for (i = 0; i < npatterns; i++) {
  327. rspamd_multipattern_add_pattern (mp, patterns[i], flags);
  328. }
  329. return mp;
  330. }
  331. #ifdef WITH_HYPERSCAN
  332. static gboolean
  333. rspamd_multipattern_try_load_hs (struct rspamd_multipattern *mp,
  334. const guchar *hash)
  335. {
  336. gchar fp[PATH_MAX];
  337. gpointer map;
  338. gsize len;
  339. if (hs_cache_dir == NULL) {
  340. return FALSE;
  341. }
  342. rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp", hs_cache_dir,
  343. (gint)rspamd_cryptobox_HASHBYTES / 2, hash);
  344. if ((map = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
  345. if (hs_deserialize_database (map, len, &mp->db) == HS_SUCCESS) {
  346. munmap (map, len);
  347. return TRUE;
  348. }
  349. munmap (map, len);
  350. /* Remove stale file */
  351. (void)unlink (fp);
  352. }
  353. return FALSE;
  354. }
  355. static void
  356. rspamd_multipattern_try_save_hs (struct rspamd_multipattern *mp,
  357. const guchar *hash)
  358. {
  359. gchar fp[PATH_MAX], np[PATH_MAX];
  360. char *bytes = NULL;
  361. gsize len;
  362. gint fd;
  363. if (hs_cache_dir == NULL) {
  364. return;
  365. }
  366. rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp.tmp", hs_cache_dir,
  367. (gint)rspamd_cryptobox_HASHBYTES / 2, hash);
  368. if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
  369. if (hs_serialize_database (mp->db, &bytes, &len) == HS_SUCCESS) {
  370. if (write (fd, bytes, len) == -1) {
  371. msg_warn ("cannot write hyperscan cache to %s: %s",
  372. fp, strerror (errno));
  373. unlink (fp);
  374. free (bytes);
  375. }
  376. else {
  377. free (bytes);
  378. fsync (fd);
  379. rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmp", hs_cache_dir,
  380. (gint)rspamd_cryptobox_HASHBYTES / 2, hash);
  381. if (rename (fp, np) == -1) {
  382. msg_warn ("cannot rename hyperscan cache from %s to %s: %s",
  383. fp, np, strerror (errno));
  384. unlink (fp);
  385. }
  386. }
  387. }
  388. else {
  389. msg_warn ("cannot serialize hyperscan cache to %s: %s",
  390. fp, strerror (errno));
  391. unlink (fp);
  392. }
  393. close (fd);
  394. }
  395. }
  396. #endif
  397. gboolean
  398. rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
  399. {
  400. g_assert (mp != NULL);
  401. g_assert (!mp->compiled);
  402. #ifdef WITH_HYPERSCAN
  403. if (rspamd_hs_check ()) {
  404. guint i;
  405. hs_platform_info_t plt;
  406. hs_compile_error_t *hs_errors;
  407. guchar hash[rspamd_cryptobox_HASHBYTES];
  408. if (mp->cnt > 0) {
  409. g_assert (hs_populate_platform (&plt) == HS_SUCCESS);
  410. rspamd_cryptobox_hash_update (&mp->hash_state, (void *)&plt, sizeof (plt));
  411. rspamd_cryptobox_hash_final (&mp->hash_state, hash);
  412. if (!rspamd_multipattern_try_load_hs (mp, hash)) {
  413. if (hs_compile_multi ((const char *const *)mp->hs_pats->data,
  414. (const unsigned int *)mp->hs_flags->data,
  415. (const unsigned int *)mp->hs_ids->data,
  416. mp->cnt,
  417. HS_MODE_BLOCK,
  418. &plt,
  419. &mp->db,
  420. &hs_errors) != HS_SUCCESS) {
  421. g_set_error (err, rspamd_multipattern_quark (), EINVAL,
  422. "cannot create tree of regexp when processing '%s': %s",
  423. g_array_index (mp->hs_pats, char *, hs_errors->expression),
  424. hs_errors->message);
  425. hs_free_compile_error (hs_errors);
  426. return FALSE;
  427. }
  428. }
  429. rspamd_multipattern_try_save_hs (mp, hash);
  430. for (i = 0; i < MAX_SCRATCH; i ++) {
  431. g_assert (hs_alloc_scratch (mp->db, &mp->scratch[i]) == HS_SUCCESS);
  432. }
  433. }
  434. mp->compiled = TRUE;
  435. return TRUE;
  436. }
  437. #endif
  438. if (mp->cnt > 0) {
  439. if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
  440. /* Fallback to pcre... */
  441. rspamd_regexp_t *re;
  442. mp->res = g_array_sized_new (FALSE, TRUE,
  443. sizeof (rspamd_regexp_t *), mp->cnt);
  444. for (guint i = 0; i < mp->cnt; i ++) {
  445. const ac_trie_pat_t *pat;
  446. const gchar *pat_flags = NULL;
  447. if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) {
  448. pat_flags = "u";
  449. }
  450. pat = &g_array_index (mp->pats, ac_trie_pat_t, i);
  451. re = rspamd_regexp_new (pat->ptr, pat_flags, err);
  452. if (re == NULL) {
  453. return FALSE;
  454. }
  455. g_array_append_val (mp->res, re);
  456. }
  457. }
  458. else {
  459. mp->t = acism_create ((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
  460. }
  461. }
  462. mp->compiled = TRUE;
  463. return TRUE;
  464. }
  465. struct rspamd_multipattern_cbdata {
  466. struct rspamd_multipattern *mp;
  467. const gchar *in;
  468. gsize len;
  469. rspamd_multipattern_cb_t cb;
  470. gpointer ud;
  471. guint nfound;
  472. gint ret;
  473. };
  474. #ifdef WITH_HYPERSCAN
  475. static gint
  476. rspamd_multipattern_hs_cb (unsigned int id,
  477. unsigned long long from,
  478. unsigned long long to,
  479. unsigned int flags,
  480. void *ud)
  481. {
  482. struct rspamd_multipattern_cbdata *cbd = ud;
  483. gint ret = 0;
  484. if (to > 0) {
  485. if (from == HS_OFFSET_PAST_HORIZON) {
  486. from = 0;
  487. }
  488. ret = cbd->cb (cbd->mp, id, from, to, cbd->in, cbd->len, cbd->ud);
  489. cbd->nfound ++;
  490. cbd->ret = ret;
  491. }
  492. return ret;
  493. }
  494. #endif
  495. static gint
  496. rspamd_multipattern_acism_cb (int strnum, int textpos, void *context)
  497. {
  498. struct rspamd_multipattern_cbdata *cbd = context;
  499. gint ret;
  500. ac_trie_pat_t pat;
  501. pat = g_array_index (cbd->mp->pats, ac_trie_pat_t, strnum);
  502. ret = cbd->cb (cbd->mp, strnum, textpos - pat.len,
  503. textpos, cbd->in, cbd->len, cbd->ud);
  504. cbd->nfound ++;
  505. cbd->ret = ret;
  506. return ret;
  507. }
  508. gint
  509. rspamd_multipattern_lookup (struct rspamd_multipattern *mp,
  510. const gchar *in, gsize len, rspamd_multipattern_cb_t cb,
  511. gpointer ud, guint *pnfound)
  512. {
  513. struct rspamd_multipattern_cbdata cbd;
  514. gint ret = 0;
  515. g_assert (mp != NULL);
  516. if (mp->cnt == 0 || !mp->compiled || len == 0) {
  517. return 0;
  518. }
  519. cbd.mp = mp;
  520. cbd.in = in;
  521. cbd.len = len;
  522. cbd.cb = cb;
  523. cbd.ud = ud;
  524. cbd.nfound = 0;
  525. cbd.ret = 0;
  526. #ifdef WITH_HYPERSCAN
  527. if (rspamd_hs_check ()) {
  528. hs_scratch_t *scr = NULL;
  529. guint i;
  530. for (i = 0; i < MAX_SCRATCH; i ++) {
  531. if (!(mp->scratch_used & (1 << i))) {
  532. mp->scratch_used |= (1 << i);
  533. scr = mp->scratch[i];
  534. break;
  535. }
  536. }
  537. g_assert (scr != NULL);
  538. ret = hs_scan (mp->db, in, len, 0, scr,
  539. rspamd_multipattern_hs_cb, &cbd);
  540. mp->scratch_used &= ~(1 << i);
  541. if (ret == HS_SUCCESS) {
  542. ret = 0;
  543. }
  544. else if (ret == HS_SCAN_TERMINATED) {
  545. ret = cbd.ret;
  546. }
  547. if (pnfound) {
  548. *pnfound = cbd.nfound;
  549. }
  550. return ret;
  551. }
  552. #endif
  553. gint state = 0;
  554. if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
  555. /* Terribly inefficient, but who cares - just use hyperscan */
  556. for (guint i = 0; i < mp->cnt; i ++) {
  557. rspamd_regexp_t *re = g_array_index (mp->res, rspamd_regexp_t *, i);
  558. const gchar *start = NULL, *end = NULL;
  559. while (rspamd_regexp_search (re,
  560. in,
  561. len,
  562. &start,
  563. &end,
  564. TRUE,
  565. NULL)) {
  566. if (rspamd_multipattern_acism_cb (i, end - in, &cbd)) {
  567. goto out;
  568. }
  569. }
  570. }
  571. out:
  572. ret = cbd.ret;
  573. if (pnfound) {
  574. *pnfound = cbd.nfound;
  575. }
  576. }
  577. else {
  578. /* Plain trie */
  579. ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
  580. &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
  581. if (pnfound) {
  582. *pnfound = cbd.nfound;
  583. }
  584. }
  585. return ret;
  586. }
  587. void
  588. rspamd_multipattern_destroy (struct rspamd_multipattern *mp)
  589. {
  590. guint i;
  591. if (mp) {
  592. #ifdef WITH_HYPERSCAN
  593. if (rspamd_hs_check ()) {
  594. gchar *p;
  595. if (mp->compiled && mp->cnt > 0) {
  596. for (i = 0; i < MAX_SCRATCH; i ++) {
  597. hs_free_scratch (mp->scratch[i]);
  598. }
  599. hs_free_database (mp->db);
  600. }
  601. for (i = 0; i < mp->cnt; i ++) {
  602. p = g_array_index (mp->hs_pats, gchar *, i);
  603. g_free (p);
  604. }
  605. g_array_free (mp->hs_pats, TRUE);
  606. g_array_free (mp->hs_ids, TRUE);
  607. g_array_free (mp->hs_flags, TRUE);
  608. free (mp); /* Due to posix_memalign */
  609. return;
  610. }
  611. #endif
  612. ac_trie_pat_t pat;
  613. if (mp->compiled && mp->cnt > 0) {
  614. acism_destroy (mp->t);
  615. }
  616. for (i = 0; i < mp->cnt; i ++) {
  617. pat = g_array_index (mp->pats, ac_trie_pat_t, i);
  618. g_free ((gchar *)pat.ptr);
  619. }
  620. g_array_free (mp->pats, TRUE);
  621. g_free (mp);
  622. }
  623. }
  624. const gchar*
  625. rspamd_multipattern_get_pattern (struct rspamd_multipattern *mp,
  626. guint index)
  627. {
  628. g_assert (mp != NULL);
  629. g_assert (index < mp->cnt);
  630. #ifdef WITH_HYPERSCAN
  631. if (rspamd_hs_check ()) {
  632. return g_array_index (mp->hs_pats, gchar *, index);
  633. }
  634. #endif
  635. ac_trie_pat_t pat;
  636. pat = g_array_index (mp->pats, ac_trie_pat_t, index);
  637. return pat.ptr;
  638. }
  639. guint
  640. rspamd_multipattern_get_npatterns (struct rspamd_multipattern *mp)
  641. {
  642. g_assert (mp != NULL);
  643. return mp->cnt;
  644. }
  645. gboolean
  646. rspamd_multipattern_has_hyperscan (void)
  647. {
  648. return rspamd_hs_check ();
  649. }