Browse Source

[Minor] Move regexp escape function to the public space

tags/1.7.4
Vsevolod Stakhov 6 years ago
parent
commit
79f15b27c6
3 changed files with 131 additions and 113 deletions
  1. 2
    113
      src/libutil/multipattern.c
  2. 117
    0
      src/libutil/str_util.c
  3. 12
    0
      src/libutil/str_util.h

+ 2
- 113
src/libutil/multipattern.c View File

@@ -133,117 +133,6 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen,
return res;
}

static gchar *
rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen,
gsize *dst_len, gboolean allow_glob)
{
const gchar *p, *end = pattern + slen;
gchar *res, *d, t;
gsize len;
static const gchar hexdigests[16] = "0123456789abcdef";

len = slen;
p = pattern;

/* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
while (p < end) {
t = *p ++;

switch (t) {
case '[':
case ']':
case '-':
case '\\':
case '{':
case '}':
case '(':
case ')':
case '*':
case '+':
case '?':
case '.':
case ',':
case '^':
case '$':
case '|':
case '#':
len ++;
break;
default:
if (g_ascii_isspace (t)) {
len ++;
}
else if (!g_ascii_isprint (t)) {
/* \\xHH -> 4 symbols */
len += 3;
}
break;
}
}

if (slen == len) {
*dst_len = slen;
return g_strdup (pattern);
}

res = g_malloc (len + 1);
p = pattern;
d = res;

while (p < end) {
t = *p ++;

switch (t) {
case '[':
case ']':
case '-':
case '\\':
case '{':
case '}':
case '(':
case ')':
case '.':
case ',':
case '^':
case '$':
case '|':
case '#':
*d++ = '\\';
break;
case '*':
case '?':
case '+':
if (allow_glob) {
/* Treat * as .* and ? as .? */
*d++ = '.';
}
else {
*d++ = '\\';
}
break;
default:
if (g_ascii_isspace (t)) {
*d++ = '\\';
}
else if (!g_ascii_isgraph (t)) {
*d++ = '\\';
*d++ = 'x';
*d++ = hexdigests[((t >> 4) & 0xF)];
*d++ = hexdigests[((t) & 0xF)];
continue; /* To avoid *d++ = t; */
}
break;
}

*d++ = t;
}

*d = '\0';
*dst_len = d - res;

return res;
}

#endif
static gchar *
rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
@@ -312,10 +201,10 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
}
else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE);
ret = rspamd_str_regexp_escape (pattern, len, dst_len, TRUE);
}
else {
ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE);
ret = rspamd_str_regexp_escape (pattern, len, dst_len, FALSE);
}

return ret;

+ 117
- 0
src/libutil/str_util.c View File

@@ -2093,3 +2093,120 @@ rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start,
return FALSE;
#endif
}

gchar *
rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
gsize *dst_len, gboolean allow_glob)
{
const gchar *p, *end = pattern + slen;
gchar *res, *d, t;
gsize len;
static const gchar hexdigests[16] = "0123456789abcdef";

len = slen;
p = pattern;

/* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
while (p < end) {
t = *p ++;

switch (t) {
case '[':
case ']':
case '-':
case '\\':
case '{':
case '}':
case '(':
case ')':
case '*':
case '+':
case '?':
case '.':
case ',':
case '^':
case '$':
case '|':
case '#':
len ++;
break;
default:
if (g_ascii_isspace (t)) {
len ++;
}
else if (!g_ascii_isprint (t)) {
/* \\xHH -> 4 symbols */
len += 3;
}
break;
}
}

if (slen == len) {
if (dst_len) {
*dst_len = slen;
}

return g_strdup (pattern);
}

res = g_malloc (len + 1);
p = pattern;
d = res;

while (p < end) {
t = *p ++;

switch (t) {
case '[':
case ']':
case '-':
case '\\':
case '{':
case '}':
case '(':
case ')':
case '.':
case ',':
case '^':
case '$':
case '|':
case '#':
*d++ = '\\';
break;
case '*':
case '?':
case '+':
if (allow_glob) {
/* Treat * as .* and ? as .? */
*d++ = '.';
}
else {
*d++ = '\\';
}
break;
default:
if (g_ascii_isspace (t)) {
*d++ = '\\';
}
else if (!g_ascii_isgraph (t)) {
*d++ = '\\';
*d++ = 'x';
*d++ = hexdigests[((t >> 4) & 0xF)];
*d++ = hexdigests[((t) & 0xF)];
continue; /* To avoid *d++ = t; */
}
break;
}

*d++ = t;
}

*d = '\0';

if (dst_len) {
*dst_len = d - res;
}

return res;
}

+ 12
- 0
src/libutil/str_util.h View File

@@ -375,4 +375,16 @@ rspamd_str_has_8bit (const guchar *beg, gsize len)
gboolean rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
gchar *start, guint *len);

/**
* Escapes special characters when reading plain data to be processed in pcre
* @param pattern pattern to process
* @param slen source length
* @param dst_len destination length pointer (can be NULL)
* @param allow_glob allow glob expressions to be translated into pcre
* @return newly allocated zero terminated escaped pattern
*/
gchar *
rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
gsize *dst_len, gboolean allow_glob);

#endif /* SRC_LIBUTIL_STR_UTIL_H_ */

Loading…
Cancel
Save