diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-06-30 12:22:08 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-06-30 12:22:08 +0100 |
commit | 1b19510e1392caefd92d7d1ecb0542b7c5a8fb29 (patch) | |
tree | b1cbe161f70d2e89ab1f523be007fef02239c21e /src/libutil/regexp.c | |
parent | b45930d5c64030e815f046761541f61924e856b9 (diff) | |
download | rspamd-1b19510e1392caefd92d7d1ecb0542b7c5a8fb29.tar.gz rspamd-1b19510e1392caefd92d7d1ecb0542b7c5a8fb29.zip |
Improve regexp captures.
It is now possible to store regexp captures if needed.
Diffstat (limited to 'src/libutil/regexp.c')
-rw-r--r-- | src/libutil/regexp.c | 44 |
1 files changed, 35 insertions, 9 deletions
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index 59201eb36..529895b7b 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -52,6 +52,7 @@ struct rspamd_regexp_s { ref_entry_t ref; gpointer ud; gint flags; + gint ncaptures; }; struct rspamd_regexp_cache { @@ -128,7 +129,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, rspamd_regexp_t *res; pcre *r; gchar sep = 0, *real_pattern; - gint regexp_flags = 0, rspamd_flags = 0, err_off, study_flags = 0; + gint regexp_flags = 0, rspamd_flags = 0, err_off, study_flags = 0, ncaptures; gboolean strict_flags = FALSE; rspamd_regexp_library_init (); @@ -333,12 +334,19 @@ fin: rspamd_regexp_generate_id (pattern, flags, res->id); + /* Check number of captures */ + if (pcre_fullinfo (res->re, res->extra, PCRE_INFO_CAPTURECOUNT, + &ncaptures) == 0) { + res->ncaptures = ncaptures; + } + return res; } gboolean rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, - const gchar **start, const gchar **end, gboolean raw) + const gchar **start, const gchar **end, gboolean raw, + GArray *captures) { pcre *r; pcre_extra *ext; @@ -347,7 +355,7 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, #endif const gchar *mt; gsize remain = 0; - gint rc, match_flags = 0, ovec[10]; + gint rc, match_flags = 0, *ovec, ncaptures, i; g_assert (re != NULL); g_assert (text != NULL); @@ -392,6 +400,8 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, } g_assert (r != NULL); + ncaptures = (re->ncaptures + 1) * 3; + ovec = g_alloca (sizeof (gint) * ncaptures); if (!(re->flags & RSPAMD_REGEXP_FLAG_NOOPT)) { #ifdef HAVE_PCRE_JIT @@ -402,25 +412,26 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, if (st != NULL) { rc = pcre_jit_exec (r, ext, mt, remain, 0, 0, ovec, - G_N_ELEMENTS (ovec), st); + ncaptures, st); } else { rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, - G_N_ELEMENTS (ovec)); + ncaptures); } # else rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, - G_N_ELEMENTS (ovec)); + ncaptures); #endif #else rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, - G_N_ELEMENTS (ovec)); + ncaptures); #endif } else { rc = pcre_exec (r, ext, mt, remain, 0, match_flags, ovec, - G_N_ELEMENTS (ovec)); + ncaptures); } + if (rc >= 0) { if (start) { *start = mt + ovec[0]; @@ -429,6 +440,21 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, *end = mt + ovec[1]; } + if (captures != NULL && rc > 1) { + struct rspamd_re_capture *elt; + + g_assert (g_array_get_element_size (captures) == + sizeof (struct rspamd_re_capture)); + g_array_set_size (captures, rc - 1); + + for (i = 0; i < rc - 1; i ++) { + elt = &g_array_index (captures, struct rspamd_re_capture, i); + elt->p = mt + ovec[i * 2]; + elt->len = (mt + ovec[i * 2 + 1]) - elt->p; + + } + } + if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) { /* We also ensure that the match is full */ if (ovec[0] != 0 || (guint)ovec[1] < len) { @@ -459,7 +485,7 @@ rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len, g_assert (re != NULL); g_assert (text != NULL); - if (rspamd_regexp_search (re, text, len, &start, &end, raw)) { + if (rspamd_regexp_search (re, text, len, &start, &end, raw, NULL)) { if (start == text && end == text + len) { return TRUE; } |