]> source.dussan.org Git - rspamd.git/commitdiff
Use raw regexp instead of utf ones by default.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 12 May 2015 11:46:10 +0000 (12:46 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 12 May 2015 11:46:10 +0000 (12:46 +0100)
Now to enable utf8 match, one should specify 'u' modifier, for example,
by adding '/u' to the slashed patterm.

src/libutil/regexp.c
test/lua/unit/regxep.lua

index 6f7d07f6fc4f90d22a605ebcb227a6728f7e1707..deaca80a5ba7a948561b9190e5709a3e14c9d92d 100644 (file)
@@ -176,7 +176,8 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
                flags_str = flags;
        }
 
-       regexp_flags |= PCRE_UTF8 ;
+       rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
+       regexp_flags &= ~PCRE_UTF8;
 
        if (flags_str != NULL) {
                while (*flags_str) {
@@ -194,7 +195,8 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
                                regexp_flags |= PCRE_EXTENDED;
                                break;
                        case 'u':
-                               regexp_flags |= PCRE_UNGREEDY;
+                               rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
+                               regexp_flags |= PCRE_UTF8;
                                break;
                        case 'O':
                                /* We optimize all regexps by default */
index 04db54aa9b269bd89a85f2ae777aa062957c0273..8cc2db2bdcd523552cfd3efa5c86275fa08a2ba3 100644 (file)
@@ -15,7 +15,7 @@ context("Regexp unit tests", function()
       {'m,test,', 'test123', false},
       {'m{https?://[^/?\\s]+?:\\d+(?<!:80)(?<!:443)(?<!:8080)(?:/|\\s|$)}', '', false},
       {'/test/i', 'TeSt123', true},
-      {'/тест/i', 'ТесТ', true},
+      {'/ТесТ/iu', 'тест', true},
       -- Raw regexp
       {'/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/r', 'some<example@example.com>', true},
       -- Cyrillic utf8 letter