aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil/regexp.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-02-09 13:12:58 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-02-09 13:12:58 +0000
commit99c5df8b156c56b884b2c71a0a825825dd30f060 (patch)
tree1e8c20d3727ae79e0993b642e7e1de3468ecae52 /src/libutil/regexp.c
parent1b40acf053c02093b4fc650463c2ff4e4889e51b (diff)
downloadrspamd-99c5df8b156c56b884b2c71a0a825825dd30f060.tar.gz
rspamd-99c5df8b156c56b884b2c71a0a825825dd30f060.zip
Implement searching using pcre 2
Diffstat (limited to 'src/libutil/regexp.c')
-rw-r--r--src/libutil/regexp.c112
1 files changed, 108 insertions, 4 deletions
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index c8dafeac8..e886810e8 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -154,7 +154,7 @@ static void
rspamd_regexp_post_process (rspamd_regexp_t *r)
{
#if defined(WITH_PCRE2)
- gint jsz;
+ gsize jsz;
/* Create match context */
r->mcontext = pcre2_match_context_create (NULL);
@@ -626,6 +626,101 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
return FALSE;
}
+#else
+/* PCRE 2 version */
+gboolean
+rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
+ const gchar **start, const gchar **end, gboolean raw,
+ GArray *captures)
+{
+ pcre2_match_data *match_data;
+ pcre2_match_context *mcontext;
+ PCRE_T *r;
+ const gchar *mt;
+ gsize remain = 0, *ovec;
+ gint rc, match_flags, novec, i;
+ gboolean ret = FALSE;
+
+ g_assert (re != NULL);
+ g_assert (text != NULL);
+
+ if (len == 0) {
+ len = strlen (text);
+ }
+
+ if (end != NULL && *end != NULL) {
+ /* Incremental search */
+ mt = (*end);
+
+ if ((gint)len > (mt - text)) {
+ remain = len - (mt - text);
+ }
+ }
+ else {
+ mt = text;
+ remain = len;
+ }
+
+ if (remain == 0) {
+ return FALSE;
+ }
+
+ match_flags = PCRE_FLAG(NEWLINE_ANYCRLF);
+
+ if (raw) {
+ r = re->raw_re;
+ mcontext = re->raw_mcontext;
+ }
+ else {
+ r = re->re;
+ mcontext = re->mcontext;
+ }
+
+ match_data = pcre2_match_data_create (re->ncaptures + 1, NULL);
+
+ rc = pcre2_match (r, mt, remain, 0, match_flags, match_data,
+ mcontext);
+
+ if (rc >= 0) {
+ novec = pcre2_get_ovector_count (match_data);
+ ovec = pcre2_get_ovector_pointer (match_data);
+
+ if (start) {
+ *start = mt + ovec[0];
+ }
+ if (end) {
+ *end = mt + ovec[1];
+ }
+
+ if (captures != NULL && novec > 1) {
+ struct rspamd_re_capture *elt;
+
+ g_assert (g_array_get_element_size (captures) ==
+ sizeof (struct rspamd_re_capture));
+ g_array_set_size (captures, novec);
+
+ for (i = 0; i < novec; i ++) {
+ elt = &g_array_index (captures, struct rspamd_re_capture, i);
+ elt->p = mt + ovec[i * 2];
+ elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
+
+ }
+ }
+
+ ret = TRUE;
+
+ if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) {
+ /* We also ensure that the match is full */
+ if (ovec[0] != 0 || (guint)ovec[1] < len) {
+ ret = FALSE;
+ }
+ }
+ }
+
+ pcre2_match_data_free (match_data);
+
+ return ret;
+}
#endif
const char*
@@ -910,7 +1005,7 @@ rspamd_regexp_library_init (void)
global_re_cache = rspamd_regexp_cache_new ();
#ifdef HAVE_PCRE_JIT
gint jit, rc;
- const gchar *str;
+ gchar *str;
#ifndef WITH_PCRE2
rc = pcre_config (PCRE_CONFIG_JIT, &jit);
@@ -927,8 +1022,17 @@ rspamd_regexp_library_init (void)
msg_info ("pcre is compiled with JIT for unknown target");
#endif
#else
- pcre2_config (PCRE2_CONFIG_JITTARGET, &str);
- msg_info ("pcre2 is compiled with JIT for %s", str);
+ rc = pcre2_config (PCRE2_CONFIG_JITTARGET, NULL);
+
+ if (rc > 0) {
+ str = g_alloca (rc);
+ pcre2_config (PCRE2_CONFIG_JITTARGET, str);
+ msg_info ("pcre2 is compiled with JIT for %s", str);
+ }
+ else {
+ msg_info ("pcre2 is compiled with JIT for unknown");
+ }
+
#endif /* WITH_PCRE2 */
can_jit = TRUE;