]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Add `L` flag for regexps to save start of the match in Hyperscan
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 28 Jul 2020 14:47:51 +0000 (15:47 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 28 Jul 2020 14:47:51 +0000 (15:47 +0100)
src/libmime/mime_expressions.c
src/libserver/re_cache.c
src/libutil/regexp.c
src/libutil/regexp.h

index aaefd24b382d0ac1101557d348ac14572d996f5e..0caa324cca936026a9adfdf1cdd2b6b2ba61ec11 100644 (file)
@@ -372,6 +372,8 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
                case 'u':
                case 'O':
                case 'r':
+               case 'L':
+                       /* Handled by rspamd_regexp_t */
                        g_string_append_c (re_flags, *p);
                        p++;
                        break;
@@ -813,6 +815,10 @@ set:
                        goto err;
                }
                else {
+                       const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
+                                       "re_conditions");
+                       gint lua_cbref = -1;
+
                        /* Check regexp condition */
                        if (real_ud->conf_obj == NULL) {
                                g_set_error (err, rspamd_mime_expr_quark(), 300,
@@ -821,10 +827,6 @@ set:
                                goto err;
                        }
 
-                       const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
-                                       "re_conditions");
-                       gint lua_cbref = -1;
-
                        if (re_conditions != NULL) {
                                if (ucl_object_type (re_conditions) != UCL_OBJECT) {
                                        g_set_error (err, rspamd_mime_expr_quark(), 320,
@@ -851,6 +853,10 @@ set:
                                }
                        }
 
+                       if (lua_cbref != -1) {
+                               msg_info_config ("added condition for regexp %s", mime_atom->str);
+                       }
+
                        /* Register new item in the cache */
                        if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
                                        mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
index bd207573bd2438b871ff71c6486c77ca27a6f3e4..73082bb2d09680e9f85a70cfd583c31f447f5c6e 100644 (file)
@@ -1979,6 +1979,10 @@ rspamd_re_cache_compile_timer_cb (EV_P_ ev_timer *w, int revents )
                        hs_flags[i] |= HS_FLAG_SINGLEMATCH;
                }
 
+               if (re_flags & RSPAMD_REGEXP_FLAG_LEFTMOST) {
+                       hs_flags[i] |= HS_FLAG_SOM_LEFTMOST;
+               }
+
                gchar *pat = rspamd_re_cache_hs_pattern_from_pcre (re);
 
                if (hs_compile (pat,
index 396da80d1053fe8a5d3950ff49783f1d3271c00c..ce764a893861f370dc29be1d809a7373917da8ea 100644 (file)
@@ -406,6 +406,10 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
                                /* We optimize all regexps by default */
                                rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT;
                                break;
+                       case 'L':
+                               /* SOM_LEFTMOST hyperscan flag */
+                               rspamd_flags |= RSPAMD_REGEXP_FLAG_LEFTMOST;
+                               break;
                        case 'r':
                                rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
                                rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF;
index 2e414892ae34ab09ed0d7dbee64b3cf7ba2309f4..128edd7612727d9024a968dc38bec7b498e5e1c8 100644 (file)
@@ -34,6 +34,7 @@
 #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
 #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
 #define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
+#define RSPAMD_REGEXP_FLAG_LEFTMOST (1 << 7)
 
 
 #ifdef  __cplusplus