From 3a201043a8eca6be7e7d9a8a06bf4ec18c4047ef Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 12 May 2015 12:46:10 +0100 Subject: [PATCH] Use raw regexp instead of utf ones by default. Now to enable utf8 match, one should specify 'u' modifier, for example, by adding '/u' to the slashed patterm. --- src/libutil/regexp.c | 6 ++++-- test/lua/unit/regxep.lua | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index 6f7d07f6f..deaca80a5 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -176,7 +176,8 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, flags_str = flags; } - regexp_flags |= PCRE_UTF8 ; + rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW; + regexp_flags &= ~PCRE_UTF8; if (flags_str != NULL) { while (*flags_str) { @@ -194,7 +195,8 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, regexp_flags |= PCRE_EXTENDED; break; case 'u': - regexp_flags |= PCRE_UNGREEDY; + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW; + regexp_flags |= PCRE_UTF8; break; case 'O': /* We optimize all regexps by default */ diff --git a/test/lua/unit/regxep.lua b/test/lua/unit/regxep.lua index 04db54aa9..8cc2db2bd 100644 --- a/test/lua/unit/regxep.lua +++ b/test/lua/unit/regxep.lua @@ -15,7 +15,7 @@ context("Regexp unit tests", function() {'m,test,', 'test123', false}, {'m{https?://[^/?\\s]+?:\\d+(?/r', 'some', true}, -- Cyrillic utf8 letter -- 2.39.5