diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-12-03 11:30:22 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-12-03 11:30:22 +0000 |
commit | b4442b41baab6160e87098f52b94def24b97e066 (patch) | |
tree | 5361f47875c1399c0c8a0836e9c74475af0ae459 /src | |
parent | fe940c7d3d9d72f0196b9cd847dd0160603dcbe9 (diff) | |
download | rspamd-b4442b41baab6160e87098f52b94def24b97e066.tar.gz rspamd-b4442b41baab6160e87098f52b94def24b97e066.zip |
[Minor] Add rspamd_str_make_utf_valid routine + unit tests
Diffstat (limited to 'src')
-rw-r--r-- | src/libutil/str_util.c | 60 | ||||
-rw-r--r-- | src/libutil/str_util.h | 10 |
2 files changed, 69 insertions, 1 deletions
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index d8b17e3c3..1e43e7726 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -2425,7 +2425,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, if (flags & RSPAMD_REGEXP_ESCAPE_UTF) { if (!g_utf8_validate (pattern, slen, NULL)) { - tmp_utf = g_utf8_make_valid (pattern, slen); + tmp_utf = rspamd_str_make_utf_valid (pattern, slen, NULL); } } @@ -2517,3 +2517,61 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, return res; } + + +gchar * +rspamd_str_make_utf_valid (const gchar *src, gsize slen, gsize *dstlen) +{ + GString *dst; + const gchar *last; + gchar *dchar; + gsize i, valid, prev; + UChar32 uc; + + if (src == NULL) { + return NULL; + } + + if (slen == 0) { + slen = strlen (src); + } + + dst = g_string_sized_new (slen); + i = 0; + last = src; + valid = 0; + prev = 0; + + while (i < slen) { + U8_NEXT (src, i, slen, uc); + + if (uc <= 0) { + if (valid > 0) { + g_string_append_len (dst, last, valid); + } + /* 0xFFFD in UTF8 */ + g_string_append_len (dst, "\357\277\275", 3); + valid = 0; + last = &src[i]; + } + else { + valid += i - prev; + } + + prev = i; + } + + if (valid > 0) { + g_string_append_len (dst, last, valid); + } + + dchar = dst->str; + + if (dstlen) { + *dstlen = dst->len; + } + + g_string_free (dst, FALSE); + + return dchar; +}
\ No newline at end of file diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 688034ec6..139a85416 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -419,4 +419,14 @@ gchar * rspamd_str_regexp_escape (const gchar *pattern, gsize slen, gsize *dst_len, enum rspamd_regexp_escape_flags flags); +/** + * Returns copy of src (zero terminated) where all unicode is made valid or replaced + * to FFFD characters. Caller must free string after usage + * @param src + * @param slen + * @param dstelen + * @return + */ +gchar * rspamd_str_make_utf_valid (const gchar *src, gsize slen, gsize *dstlen); + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ |