aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-04-23 10:32:48 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-04-23 10:42:05 +0100
commit79f15b27c647f6a7028b6167f9c243ca3f6fa96a (patch)
tree84dfdd5158604bdc3d988e7fafaed04ae6cd5a45
parent278b44e90d80fca828c8595e42563f312c0f2a05 (diff)
downloadrspamd-79f15b27c647f6a7028b6167f9c243ca3f6fa96a.tar.gz
rspamd-79f15b27c647f6a7028b6167f9c243ca3f6fa96a.zip
[Minor] Move regexp escape function to the public space
-rw-r--r--src/libutil/multipattern.c115
-rw-r--r--src/libutil/str_util.c117
-rw-r--r--src/libutil/str_util.h12
3 files changed, 131 insertions, 113 deletions
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
index e55b5d0b5..94b5398b3 100644
--- a/src/libutil/multipattern.c
+++ b/src/libutil/multipattern.c
@@ -133,117 +133,6 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen,
return res;
}
-static gchar *
-rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen,
- gsize *dst_len, gboolean allow_glob)
-{
- const gchar *p, *end = pattern + slen;
- gchar *res, *d, t;
- gsize len;
- static const gchar hexdigests[16] = "0123456789abcdef";
-
- len = slen;
- p = pattern;
-
- /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
- while (p < end) {
- t = *p ++;
-
- switch (t) {
- case '[':
- case ']':
- case '-':
- case '\\':
- case '{':
- case '}':
- case '(':
- case ')':
- case '*':
- case '+':
- case '?':
- case '.':
- case ',':
- case '^':
- case '$':
- case '|':
- case '#':
- len ++;
- break;
- default:
- if (g_ascii_isspace (t)) {
- len ++;
- }
- else if (!g_ascii_isprint (t)) {
- /* \\xHH -> 4 symbols */
- len += 3;
- }
- break;
- }
- }
-
- if (slen == len) {
- *dst_len = slen;
- return g_strdup (pattern);
- }
-
- res = g_malloc (len + 1);
- p = pattern;
- d = res;
-
- while (p < end) {
- t = *p ++;
-
- switch (t) {
- case '[':
- case ']':
- case '-':
- case '\\':
- case '{':
- case '}':
- case '(':
- case ')':
- case '.':
- case ',':
- case '^':
- case '$':
- case '|':
- case '#':
- *d++ = '\\';
- break;
- case '*':
- case '?':
- case '+':
- if (allow_glob) {
- /* Treat * as .* and ? as .? */
- *d++ = '.';
- }
- else {
- *d++ = '\\';
- }
- break;
- default:
- if (g_ascii_isspace (t)) {
- *d++ = '\\';
- }
- else if (!g_ascii_isgraph (t)) {
- *d++ = '\\';
- *d++ = 'x';
- *d++ = hexdigests[((t >> 4) & 0xF)];
- *d++ = hexdigests[((t) & 0xF)];
- continue; /* To avoid *d++ = t; */
- }
- break;
- }
-
- *d++ = t;
- }
-
- *d = '\0';
- *dst_len = d - res;
-
- return res;
-}
-
#endif
static gchar *
rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
@@ -312,10 +201,10 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
}
else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
- ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE);
+ ret = rspamd_str_regexp_escape (pattern, len, dst_len, TRUE);
}
else {
- ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE);
+ ret = rspamd_str_regexp_escape (pattern, len, dst_len, FALSE);
}
return ret;
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 3b1f3c1e3..186ce5d38 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -2093,3 +2093,120 @@ rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start,
return FALSE;
#endif
}
+
+gchar *
+rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
+ gsize *dst_len, gboolean allow_glob)
+{
+ const gchar *p, *end = pattern + slen;
+ gchar *res, *d, t;
+ gsize len;
+ static const gchar hexdigests[16] = "0123456789abcdef";
+
+ len = slen;
+ p = pattern;
+
+ /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
+ while (p < end) {
+ t = *p ++;
+
+ switch (t) {
+ case '[':
+ case ']':
+ case '-':
+ case '\\':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '?':
+ case '.':
+ case ',':
+ case '^':
+ case '$':
+ case '|':
+ case '#':
+ len ++;
+ break;
+ default:
+ if (g_ascii_isspace (t)) {
+ len ++;
+ }
+ else if (!g_ascii_isprint (t)) {
+ /* \\xHH -> 4 symbols */
+ len += 3;
+ }
+ break;
+ }
+ }
+
+ if (slen == len) {
+ if (dst_len) {
+ *dst_len = slen;
+ }
+
+ return g_strdup (pattern);
+ }
+
+ res = g_malloc (len + 1);
+ p = pattern;
+ d = res;
+
+ while (p < end) {
+ t = *p ++;
+
+ switch (t) {
+ case '[':
+ case ']':
+ case '-':
+ case '\\':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '.':
+ case ',':
+ case '^':
+ case '$':
+ case '|':
+ case '#':
+ *d++ = '\\';
+ break;
+ case '*':
+ case '?':
+ case '+':
+ if (allow_glob) {
+ /* Treat * as .* and ? as .? */
+ *d++ = '.';
+ }
+ else {
+ *d++ = '\\';
+ }
+ break;
+ default:
+ if (g_ascii_isspace (t)) {
+ *d++ = '\\';
+ }
+ else if (!g_ascii_isgraph (t)) {
+ *d++ = '\\';
+ *d++ = 'x';
+ *d++ = hexdigests[((t >> 4) & 0xF)];
+ *d++ = hexdigests[((t) & 0xF)];
+ continue; /* To avoid *d++ = t; */
+ }
+ break;
+ }
+
+ *d++ = t;
+ }
+
+ *d = '\0';
+
+ if (dst_len) {
+ *dst_len = d - res;
+ }
+
+ return res;
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 5f0695c2a..45507e2be 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -375,4 +375,16 @@ rspamd_str_has_8bit (const guchar *beg, gsize len)
gboolean rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
gchar *start, guint *len);
+/**
+ * Escapes special characters when reading plain data to be processed in pcre
+ * @param pattern pattern to process
+ * @param slen source length
+ * @param dst_len destination length pointer (can be NULL)
+ * @param allow_glob allow glob expressions to be translated into pcre
+ * @return newly allocated zero terminated escaped pattern
+ */
+gchar *
+rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
+ gsize *dst_len, gboolean allow_glob);
+
#endif /* SRC_LIBUTIL_STR_UTIL_H_ */