diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-05-12 12:46:10 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-05-12 12:46:10 +0100 |
commit | 3a201043a8eca6be7e7d9a8a06bf4ec18c4047ef (patch) | |
tree | ea64785468cbe42232a522245b22bd21fd39f720 | |
parent | 5912f1002bfb167232ce7ab2b23973644616ed1c (diff) | |
download | rspamd-3a201043a8eca6be7e7d9a8a06bf4ec18c4047ef.tar.gz rspamd-3a201043a8eca6be7e7d9a8a06bf4ec18c4047ef.zip |
Use raw regexp instead of utf ones by default.
Now to enable utf8 match, one should specify 'u' modifier, for example,
by adding '/u' to the slashed patterm.
-rw-r--r-- | src/libutil/regexp.c | 6 | ||||
-rw-r--r-- | test/lua/unit/regxep.lua | 2 |
2 files changed, 5 insertions, 3 deletions
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index 6f7d07f6f..deaca80a5 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -176,7 +176,8 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, flags_str = flags; } - regexp_flags |= PCRE_UTF8 ; + rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW; + regexp_flags &= ~PCRE_UTF8; if (flags_str != NULL) { while (*flags_str) { @@ -194,7 +195,8 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, regexp_flags |= PCRE_EXTENDED; break; case 'u': - regexp_flags |= PCRE_UNGREEDY; + rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW; + regexp_flags |= PCRE_UTF8; break; case 'O': /* We optimize all regexps by default */ diff --git a/test/lua/unit/regxep.lua b/test/lua/unit/regxep.lua index 04db54aa9..8cc2db2bd 100644 --- a/test/lua/unit/regxep.lua +++ b/test/lua/unit/regxep.lua @@ -15,7 +15,7 @@ context("Regexp unit tests", function() {'m,test,', 'test123', false}, {'m{https?://[^/?\\s]+?:\\d+(?<!:80)(?<!:443)(?<!:8080)(?:/|\\s|$)}', '', false}, {'/test/i', 'TeSt123', true}, - {'/тест/i', 'ТесТ', true}, + {'/ТесТ/iu', 'тест', true}, -- Raw regexp {'/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/r', 'some<example@example.com>', true}, -- Cyrillic utf8 letter |