diff options
Diffstat (limited to 'src/libutil/str_util.h')
-rw-r--r-- | src/libutil/str_util.h | 126 |
1 files changed, 74 insertions, 52 deletions
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 6fbb11ccf..c820bd10c 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -20,6 +20,9 @@ #include "ucl.h" #include "fstring.h" +#ifdef __cplusplus +extern "C" { +#endif enum rspamd_newlines_type { RSPAMD_TASK_NEWLINES_CR, @@ -37,6 +40,7 @@ gint rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l); * Convert string to lowercase in-place using ASCII conversion */ void rspamd_str_lc (gchar *str, guint size); + /** * Convert string to lowercase in-place using utf (limited) conversion */ @@ -46,13 +50,16 @@ void rspamd_str_lc_utf8 (gchar *str, guint size); * Hash table utility functions for case insensitive hashing */ guint64 rspamd_icase_hash (const gchar *in, gsize len, guint64 seed); + guint rspamd_strcase_hash (gconstpointer key); + gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); /* * Hash table utility functions for case sensitive hashing */ guint rspamd_str_hash (gconstpointer key); + gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); @@ -60,10 +67,15 @@ gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); * Hash table utility functions for hashing fixed strings */ guint rspamd_ftok_icase_hash (gconstpointer key); + gboolean rspamd_ftok_icase_equal (gconstpointer v, gconstpointer v2); + guint rspamd_ftok_hash (gconstpointer key); + gboolean rspamd_ftok_equal (gconstpointer v, gconstpointer v2); + guint rspamd_gstring_icase_hash (gconstpointer key); + gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2); /** @@ -77,6 +89,7 @@ gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2); * @return bytes copied */ gsize rspamd_strlcpy_fast (gchar *dst, const gchar *src, gsize siz); + gsize rspamd_strlcpy_safe (gchar *dst, const gchar *src, gsize siz); #if defined(__has_feature) @@ -133,7 +146,7 @@ gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud); * @param inlen input length * @return freshly allocated base32 encoding of a specified string */ -gchar * rspamd_encode_base32 (const guchar *in, gsize inlen); +gchar *rspamd_encode_base32 (const guchar *in, gsize inlen); /** * Decode string using base32 encoding @@ -141,7 +154,7 @@ gchar * rspamd_encode_base32 (const guchar *in, gsize inlen); * @param inlen input length * @return freshly allocated base32 decoded value or NULL if input is invalid */ -guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen); +guchar *rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen); /** * Encode string using hex encoding @@ -149,7 +162,7 @@ guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen); * @param inlen input length * @return freshly allocated base32 encoding of a specified string */ -gchar * rspamd_encode_hex (const guchar *in, gsize inlen); +gchar *rspamd_encode_hex (const guchar *in, gsize inlen); /** * Decode string using hex encoding @@ -157,7 +170,7 @@ gchar * rspamd_encode_hex (const guchar *in, gsize inlen); * @param inlen input length * @return freshly allocated base32 decoded value or NULL if input is invalid */ -guchar* rspamd_decode_hex (const gchar *in, gsize inlen); +guchar *rspamd_decode_hex (const gchar *in, gsize inlen); /** * Encode string using base32 encoding @@ -168,7 +181,7 @@ guchar* rspamd_decode_hex (const gchar *in, gsize inlen); * @return encoded len if `outlen` is enough to encode `inlen` */ gint rspamd_encode_base32_buf (const guchar *in, gsize inlen, gchar *out, - gsize outlen); + gsize outlen); /** * Decode string using base32 encoding @@ -179,7 +192,7 @@ gint rspamd_encode_base32_buf (const guchar *in, gsize inlen, gchar *out, * @return decoded len if in is valid base32 and `outlen` is enough to encode `inlen` */ gint rspamd_decode_base32_buf (const gchar *in, gsize inlen, - guchar *out, gsize outlen); + guchar *out, gsize outlen); /** * Encode string using hex encoding @@ -190,7 +203,7 @@ gint rspamd_decode_base32_buf (const gchar *in, gsize inlen, * @return encoded len if `outlen` is enough to encode `inlen` */ gint rspamd_encode_hex_buf (const guchar *in, gsize inlen, gchar *out, - gsize outlen); + gsize outlen); /** @@ -202,7 +215,7 @@ gint rspamd_encode_hex_buf (const guchar *in, gsize inlen, gchar *out, * @return decoded len if in is valid hex and `outlen` is enough to encode `inlen` */ gssize rspamd_decode_hex_buf (const gchar *in, gsize inlen, - guchar *out, gsize outlen); + guchar *out, gsize outlen); /** * Common version of base64 encoder @@ -221,6 +234,7 @@ rspamd_encode_base64_common (const guchar *in, gsize *outlen, gboolean fold, enum rspamd_newlines_type how); + /** * Encode string using base64 encoding * @param in input @@ -228,8 +242,8 @@ rspamd_encode_base64_common (const guchar *in, * @param str_len maximum string length (if <= 0 then no lines are split) * @return freshly allocated base64 encoded value or NULL if input is invalid */ -gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, - gsize *outlen); +gchar *rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, + gsize *outlen); /** * Encode and fold string using base64 encoding @@ -238,8 +252,8 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, * @param str_len maximum string length (if <= 0 then no lines are split) * @return freshly allocated base64 encoded value or NULL if input is invalid */ -gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, - gsize *outlen, enum rspamd_newlines_type how); +gchar *rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, + gsize *outlen, enum rspamd_newlines_type how); /** * Encode and fold string using quoted printable encoding @@ -248,8 +262,8 @@ gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len, * @param str_len maximum string length (if <= 0 then no lines are split) * @return freshly allocated base64 encoded value or NULL if input is invalid */ -gchar * rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, - gsize *outlen, enum rspamd_newlines_type how); +gchar *rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, + gsize *outlen, enum rspamd_newlines_type how); /** * Decode quoted-printable encoded buffer, input and output must not overlap @@ -260,7 +274,7 @@ gchar * rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len, * @return real size of decoded output or (-1) if outlen is not enough */ gssize rspamd_decode_qp_buf (const gchar *in, gsize inlen, - gchar *out, gsize outlen); + gchar *out, gsize outlen); /** * Decode quoted-printable encoded buffer using rfc2047 format, input and output must not overlap @@ -271,7 +285,7 @@ gssize rspamd_decode_qp_buf (const gchar *in, gsize inlen, * @return real size of decoded output or (-1) if outlen is not enough */ gssize rspamd_decode_qp2047_buf (const gchar *in, gsize inlen, - gchar *out, gsize outlen); + gchar *out, gsize outlen); /** * Encode quoted-printable buffer using rfc2047 format, input and output must not overlap @@ -282,7 +296,7 @@ gssize rspamd_decode_qp2047_buf (const gchar *in, gsize inlen, * @return */ gssize rspamd_encode_qp2047_buf (const gchar *in, gsize inlen, - gchar *out, gsize outlen); + gchar *out, gsize outlen); #ifndef g_tolower # define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) @@ -297,7 +311,7 @@ gssize rspamd_encode_qp2047_buf (const gchar *in, gsize inlen, * @return */ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, - const gchar *s2, gsize s2len, guint replace_cost); + const gchar *s2, gsize s2len, guint replace_cost); /** * Fold header using rfc822 rules, return new GString from the previous one @@ -309,10 +323,10 @@ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len, * @return new GString with the folded value */ GString *rspamd_header_value_fold (const gchar *name, - const gchar *value, - guint fold_max, - enum rspamd_newlines_type how, - const gchar *fold_on_chars); + const gchar *value, + guint fold_max, + enum rspamd_newlines_type how, + const gchar *fold_on_chars); /** * Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm @@ -324,7 +338,7 @@ GString *rspamd_header_value_fold (const gchar *name, * @return position of the first substring match or (-1) if not found */ goffset rspamd_substring_search (const gchar *in, gsize inlen, - const gchar *srch, gsize srchlen); + const gchar *srch, gsize srchlen); /** * Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm in caseless matter (ASCII only) @@ -336,7 +350,7 @@ goffset rspamd_substring_search (const gchar *in, gsize inlen, * @return position of the first substring match or (-1) if not found */ goffset rspamd_substring_search_caseless (const gchar *in, gsize inlen, - const gchar *srch, gsize srchlen); + const gchar *srch, gsize srchlen); /** * Search for end-of-headers mark in the input string. Returns position just after @@ -348,7 +362,8 @@ goffset rspamd_string_find_eoh (GString *input, goffset *body_start); #define rspamd_ucl_emit_gstring(o, t, target) \ - rspamd_ucl_emit_gstring_comments((o), (t), (target), NULL) + rspamd_ucl_emit_gstring_comments((o), (t), (target), NULL) + /** * Emit UCL object to gstring * @param obj object to emit @@ -357,12 +372,13 @@ goffset rspamd_string_find_eoh (GString *input, goffset *body_start); * @param target target string */ void rspamd_ucl_emit_gstring_comments (const ucl_object_t *obj, - enum ucl_emitter emit_type, - GString *target, - const ucl_object_t *comments); + enum ucl_emitter emit_type, + GString *target, + const ucl_object_t *comments); #define rspamd_ucl_emit_fstring(o, t, target) \ - rspamd_ucl_emit_fstring_comments((o), (t), (target), NULL) + rspamd_ucl_emit_fstring_comments((o), (t), (target), NULL) + /** * Emit UCL object to fstring * @param obj object to emit @@ -371,9 +387,9 @@ void rspamd_ucl_emit_gstring_comments (const ucl_object_t *obj, * @param target target string */ void rspamd_ucl_emit_fstring_comments (const ucl_object_t *obj, - enum ucl_emitter emit_type, - rspamd_fstring_t **target, - const ucl_object_t *comments); + enum ucl_emitter emit_type, + rspamd_fstring_t **target, + const ucl_object_t *comments); extern const guchar lc_map[256]; @@ -405,27 +421,26 @@ gsize rspamd_memcspn (const gchar *s, const gchar *e, gsize len); gsize rspamd_memspn (const gchar *s, const gchar *e, gsize len); /* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */ -#define rspamd_str_hasmore(x,n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128) +#define rspamd_str_hasmore(x, n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128) static inline gboolean -rspamd_str_has_8bit (const guchar *beg, gsize len) -{ +rspamd_str_has_8bit (const guchar *beg, gsize len) { unsigned long *w; gsize i, leftover = len % sizeof (*w); - w = (unsigned long *)beg; + w = (unsigned long *) beg; - for (i = 0; i < len / sizeof (*w); i ++) { + for (i = 0; i < len / sizeof (*w); i++) { if (rspamd_str_hasmore (*w, 127)) { return TRUE; } - w ++; + w++; } - beg = (const guchar *)w; + beg = (const guchar *) w; - for (i = 0; i < leftover; i ++) { + for (i = 0; i < leftover; i++) { if (beg[i] > 127) { return TRUE; } @@ -435,9 +450,11 @@ rspamd_str_has_8bit (const guchar *beg, gsize len) } struct UConverter; + struct UConverter *rspamd_get_utf8_converter (void); struct UNormalizer2; + const struct UNormalizer2 *rspamd_get_unicode_normalizer (void); enum rspamd_normalise_result { @@ -456,7 +473,7 @@ enum rspamd_normalise_result { * @return TRUE if a string has been normalised */ enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, - gchar *start, guint *len); + gchar *start, guint *len); enum rspamd_regexp_escape_flags { RSPAMD_REGEXP_ESCAPE_ASCII = 0, @@ -464,6 +481,7 @@ enum rspamd_regexp_escape_flags { RSPAMD_REGEXP_ESCAPE_GLOB = 1u << 1, RSPAMD_REGEXP_ESCAPE_RE = 1u << 2, }; + /** * Escapes special characters when reading plain data to be processed in pcre * @param pattern pattern to process @@ -474,7 +492,7 @@ enum rspamd_regexp_escape_flags { */ gchar * rspamd_str_regexp_escape (const gchar *pattern, gsize slen, - gsize *dst_len, enum rspamd_regexp_escape_flags flags); + gsize *dst_len, enum rspamd_regexp_escape_flags flags); /** * Returns copy of src (zero terminated) where all unicode is made valid or replaced @@ -484,7 +502,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen, * @param dstelen * @return */ -gchar * rspamd_str_make_utf_valid (const guchar *src, gsize slen, gsize *dstlen); +gchar *rspamd_str_make_utf_valid (const guchar *src, gsize slen, gsize *dstlen); /** * Strips characters in `strip_chars` from start and end of the GString @@ -498,19 +516,23 @@ gsize rspamd_gstring_strip (GString *s, const gchar *strip_chars); * @param s * @param strip_chars */ -const gchar* rspamd_string_len_strip (const gchar *in, - gsize *len, const gchar *strip_chars); +const gchar *rspamd_string_len_strip (const gchar *in, + gsize *len, const gchar *strip_chars); #define IS_ZERO_WIDTH_SPACE(uc) ((uc) == 0x200B || \ - (uc) == 0x200C || \ - (uc) == 0x200D || \ - (uc) == 0xFEFF) + (uc) == 0x200C || \ + (uc) == 0x200D || \ + (uc) == 0xFEFF) #define IS_OBSCURED_CHAR(uc) (((uc) >= 0x200B && (uc) <= 0x200F) || \ - ((uc) >= 0x2028 && (uc) <= 0x202F) || \ - ((uc) >= 0x205F && (uc) <= 0x206F) || \ - (uc) == 0xFEFF) + ((uc) >= 0x2028 && (uc) <= 0x202F) || \ + ((uc) >= 0x205F && (uc) <= 0x206F) || \ + (uc) == 0xFEFF) #define RSPAMD_LEN_CHECK_STARTS_WITH(s, len, lit) \ - ((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp ((s), (lit), sizeof(lit) - 1) == 0) + ((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp ((s), (lit), sizeof(lit) - 1) == 0) + +#ifdef __cplusplus +} +#endif #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ |