]> source.dussan.org Git - rspamd.git/commitdiff
Adopt lua regexps for rspamd_regexp.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 5 Mar 2015 23:32:14 +0000 (23:32 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 5 Mar 2015 23:32:14 +0000 (23:32 +0000)
src/libutil/regexp.c
src/libutil/regexp.h
src/lua/lua_regexp.c
test/lua/unit/regxep.lua

index 8d023569cf07f27d562ab465fc0dd65b7b29f863..6c2046dd45085ac30f2ea9d0996f31b883bb2f5f 100644 (file)
@@ -155,7 +155,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
                end = pattern + strlen (pattern);
        }
 
-       regexp_flags |= PCRE_UTF8;
+       regexp_flags |= PCRE_UTF8 | PCRE_NO_AUTO_CAPTURE;
 
        if (flags != NULL) {
                flags_str = flags;
@@ -356,6 +356,14 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
        return FALSE;
 }
 
+const char*
+rspamd_regexp_get_pattern (rspamd_regexp_t *re)
+{
+       g_assert (re != NULL);
+
+       return re->pattern;
+}
+
 gboolean
 rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len,
                gboolean raw)
index fdbe44404e699a9801c462864d88b9af804b745e..6e8aa7295957c63f227ac7a3c103b5eba6b1fd0b 100644 (file)
@@ -75,6 +75,13 @@ rspamd_regexp_t* rspamd_regexp_ref (rspamd_regexp_t *re);
  */
 void rspamd_regexp_unref (rspamd_regexp_t *re);
 
+/**
+ * Get pattern for the specified regexp object
+ * @param re
+ * @return
+ */
+const char* rspamd_regexp_get_pattern (rspamd_regexp_t *re);
+
 /**
  * Create new regexp cache
  * @return
index a22eaf93add50213ea95976c5cd1e8a1eca802ee..64312be7000cfc7ecd702467cef74e0958209101 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "lua_common.h"
 #include "expressions.h"
+#include "regexp.h"
 
 /***
  * Rspamd regexp is an utility module that handles rspamd perl compatible
@@ -70,7 +71,7 @@ static const struct luaL_reg regexplib_f[] = {
 rspamd_mempool_t *regexp_static_pool = NULL;
 
 struct rspamd_lua_regexp {
-       GRegex *re;
+       rspamd_regexp_t *re;
        gchar *re_pattern;
        gint re_flags;
 };
@@ -98,11 +99,9 @@ lua_check_regexp (lua_State * L)
 static int
 lua_regexp_create (lua_State *L)
 {
-       gint regexp_flags = 0;
-       GRegex *re;
+       rspamd_regexp_t *re;
        struct rspamd_lua_regexp *new, **pnew;
-       const gchar *string, *flags_str = NULL, *slash;
-       gchar *pattern, sep;
+       const gchar *string, *flags_str = NULL;
        GError *err = NULL;
 
        string = luaL_checkstring (L, 1);
@@ -110,93 +109,20 @@ lua_regexp_create (lua_State *L)
                flags_str = luaL_checkstring (L, 2);
        }
 
-       if (string[0] == '/') {
-               /* We have likely slashed regexp */
-               slash = strrchr (string, '/');
-               if (slash != NULL && slash != string) {
-                       flags_str = slash + 1;
-                       pattern = g_malloc (slash - string);
-                       rspamd_strlcpy (pattern, string + 1, slash - string);
-               }
-               else {
-                       pattern = g_strdup (string);
-               }
-       }
-       else if (string[0] == 'm') {
-               /* Special case for m */
-               slash = &string[1];
-               sep = *slash;
-
-               if (sep == '\0' || g_ascii_isalnum (sep)) {
-                       /* Not a special case */
-                       pattern = g_strdup (string);
-               }
-               else {
-                       slash = strrchr (string, sep);
-                       if (slash != NULL && slash > &string[1]) {
-                               flags_str = slash + 1;
-                               pattern = g_malloc (slash - string + 1);
-                               pattern[0] = '^';
-                               rspamd_strlcpy (pattern + 1, string + 2, slash - string - 1);
-                               pattern[slash - string - 1] = '$';
-                               pattern[slash - string] = '\0';
-                       }
-               }
-       }
-       else {
-               pattern = g_strdup (string);
-       }
-
-       if (flags_str && flags_str != '\0') {
-               while (*flags_str) {
-                       switch (*flags_str) {
-                       case 'i':
-                               regexp_flags |= G_REGEX_CASELESS;
-                               break;
-                       case 'm':
-                               regexp_flags |= G_REGEX_MULTILINE;
-                               break;
-                       case 's':
-                               regexp_flags |= G_REGEX_DOTALL;
-                               break;
-                       case 'x':
-                               regexp_flags |= G_REGEX_EXTENDED;
-                               break;
-                       case 'u':
-                               regexp_flags |= G_REGEX_UNGREEDY;
-                               break;
-                       case 'o':
-                               regexp_flags |= G_REGEX_OPTIMIZE;
-                               break;
-                       case 'r':
-                               regexp_flags |= G_REGEX_RAW;
-                               break;
-                       default:
-                               msg_info ("invalid regexp flag: %c", *flags_str);
-                               goto fin;
-                               break;
-                       }
-                       flags_str++;
-               }
-       }
-fin:
-       re = g_regex_new (pattern, regexp_flags, 0, &err);
+       re = rspamd_regexp_cache_create (NULL, string, flags_str, &err);
        if (re == NULL) {
-               g_free (pattern);
                lua_pushnil (L);
                msg_info ("cannot parse regexp: %s, error: %s",
                        string,
                        err == NULL ? "undefined" : err->message);
+               g_error_free (err);
        }
        else {
                new = g_slice_alloc (sizeof (struct rspamd_lua_regexp));
                new->re = re;
-               new->re_flags = regexp_flags;
-               new->re_pattern = pattern;
                pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
                rspamd_lua_setclass (L, "rspamd{regexp}", -1);
                *pnew = new;
-               re_cache_add (new->re_pattern, new, regexp_static_pool);
        }
 
        return 1;
@@ -217,7 +143,7 @@ lua_regexp_get_cached (lua_State *L)
        const gchar *line;
 
        line = luaL_checkstring (L, 1);
-       new = re_cache_check (line, regexp_static_pool);
+       new = rspamd_regexp_cache_query (NULL, line, NULL);
        if (new) {
                pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
                rspamd_lua_setclass (L, "rspamd{regexp}", -1);
@@ -252,7 +178,7 @@ lua_regexp_create_cached (lua_State *L)
        struct rspamd_lua_regexp *new, **pnew;
 
        line = luaL_checkstring (L, 1);
-       new = re_cache_check (line, regexp_static_pool);
+       new = rspamd_regexp_cache_query (NULL, line, NULL);
        if (new) {
                pnew = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
                rspamd_lua_setclass (L, "rspamd{regexp}", -1);
@@ -275,8 +201,11 @@ lua_regexp_get_pattern (lua_State *L)
 {
        struct rspamd_lua_regexp *re = lua_check_regexp (L);
 
-       if (re) {
-               lua_pushstring (L, re->re_pattern);
+       if (re && re->re) {
+               lua_pushstring (L, rspamd_regexp_get_pattern (re->re));
+       }
+       else {
+               lua_pushnil (L);
        }
 
        return 1;
@@ -301,39 +230,32 @@ static int
 lua_regexp_match (lua_State *L)
 {
        struct rspamd_lua_regexp *re = lua_check_regexp (L);
-       GMatchInfo *mi;
        const gchar *data;
-       gchar **matches;
+       const gchar *start = NULL, *end = NULL;
        gint i;
+       gsize len;
+       gboolean matched = FALSE;
 
        if (re) {
-               data = luaL_checkstring (L, 2);
+               data = luaL_checklstring (L, 2, &len);
                if (data) {
-                       if ((re->re_flags & G_REGEX_RAW) == 0) {
-                               /* Validate input */
-                               if (!g_utf8_validate (data, -1, NULL)) {
-                                       lua_pushnil (L);
-                                       return 1;
-                               }
-                       }
-                       if (g_regex_match_full (re->re, data, -1, 0, 0, &mi, NULL)) {
-                               matches = g_match_info_fetch_all (mi);
-                               lua_newtable (L);
-                               for (i = 1; matches[i - 1] != NULL; i++) {
-                                       lua_pushstring (L, matches[i - 1]);
-                                       lua_rawseti (L, -2, i);
-                               }
-                               g_strfreev (matches);
+                       lua_newtable (L);
+                       i = 0;
+                       while (rspamd_regexp_search (re->re, data, len, &start, &end, FALSE)) {
+                               lua_pushlstring (L, start, end - start);
+                               lua_rawseti (L, -2, ++i);
+                               matched = TRUE;
                        }
-                       else {
+                       if (!matched) {
+                               lua_pop (L, 1);
                                lua_pushnil (L);
                        }
-                       g_match_info_free (mi);
                        return 1;
                }
        }
 
        lua_pushnil (L);
+
        return 1;
 }
 
@@ -394,8 +316,7 @@ lua_regexp_destroy (lua_State *L)
 
        if (to_del) {
                re_cache_del (to_del->re_pattern, regexp_static_pool);
-               g_regex_unref (to_del->re);
-               g_free (to_del->re_pattern);
+               rspamd_regexp_unref (to_del->re);
                g_slice_free1 (sizeof (struct rspamd_lua_regexp), to_del);
        }
 
index f063efedae9dd476ebe9897d6cb5800822ff8313..5329fece2f63ce356a171c6da062f093ddee448d 100644 (file)
@@ -24,7 +24,8 @@ context("Regexp unit tests", function()
       local m = false
       if res then m = true end
       
-      assert_equal(m, c[3])
+      assert_equal(m, c[3], string.format("'%s' doesn't match with '%s'",
+        c[2], c[1]))
     end
   end)
   end