summaryrefslogtreecommitdiffstats
path: root/src/libutil/str_util.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/libutil/str_util.h')
-rw-r--r--src/libutil/str_util.h126
1 files changed, 74 insertions, 52 deletions
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 6fbb11ccf..c820bd10c 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -20,6 +20,9 @@
#include "ucl.h"
#include "fstring.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
enum rspamd_newlines_type {
RSPAMD_TASK_NEWLINES_CR,
@@ -37,6 +40,7 @@ gint rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l);
* Convert string to lowercase in-place using ASCII conversion
*/
void rspamd_str_lc (gchar *str, guint size);
+
/**
* Convert string to lowercase in-place using utf (limited) conversion
*/
@@ -46,13 +50,16 @@ void rspamd_str_lc_utf8 (gchar *str, guint size);
* Hash table utility functions for case insensitive hashing
*/
guint64 rspamd_icase_hash (const gchar *in, gsize len, guint64 seed);
+
guint rspamd_strcase_hash (gconstpointer key);
+
gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2);
/*
* Hash table utility functions for case sensitive hashing
*/
guint rspamd_str_hash (gconstpointer key);
+
gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
@@ -60,10 +67,15 @@ gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
* Hash table utility functions for hashing fixed strings
*/
guint rspamd_ftok_icase_hash (gconstpointer key);
+
gboolean rspamd_ftok_icase_equal (gconstpointer v, gconstpointer v2);
+
guint rspamd_ftok_hash (gconstpointer key);
+
gboolean rspamd_ftok_equal (gconstpointer v, gconstpointer v2);
+
guint rspamd_gstring_icase_hash (gconstpointer key);
+
gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2);
/**
@@ -77,6 +89,7 @@ gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2);
* @return bytes copied
*/
gsize rspamd_strlcpy_fast (gchar *dst, const gchar *src, gsize siz);
+
gsize rspamd_strlcpy_safe (gchar *dst, const gchar *src, gsize siz);
#if defined(__has_feature)
@@ -133,7 +146,7 @@ gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud);
* @param inlen input length
* @return freshly allocated base32 encoding of a specified string
*/
-gchar * rspamd_encode_base32 (const guchar *in, gsize inlen);
+gchar *rspamd_encode_base32 (const guchar *in, gsize inlen);
/**
* Decode string using base32 encoding
@@ -141,7 +154,7 @@ gchar * rspamd_encode_base32 (const guchar *in, gsize inlen);
* @param inlen input length
* @return freshly allocated base32 decoded value or NULL if input is invalid
*/
-guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
+guchar *rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
/**
* Encode string using hex encoding
@@ -149,7 +162,7 @@ guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
* @param inlen input length
* @return freshly allocated base32 encoding of a specified string
*/
-gchar * rspamd_encode_hex (const guchar *in, gsize inlen);
+gchar *rspamd_encode_hex (const guchar *in, gsize inlen);
/**
* Decode string using hex encoding
@@ -157,7 +170,7 @@ gchar * rspamd_encode_hex (const guchar *in, gsize inlen);
* @param inlen input length
* @return freshly allocated base32 decoded value or NULL if input is invalid
*/
-guchar* rspamd_decode_hex (const gchar *in, gsize inlen);
+guchar *rspamd_decode_hex (const gchar *in, gsize inlen);
/**
* Encode string using base32 encoding
@@ -168,7 +181,7 @@ guchar* rspamd_decode_hex (const gchar *in, gsize inlen);
* @return encoded len if `outlen` is enough to encode `inlen`
*/
gint rspamd_encode_base32_buf (const guchar *in, gsize inlen, gchar *out,
- gsize outlen);
+ gsize outlen);
/**
* Decode string using base32 encoding
@@ -179,7 +192,7 @@ gint rspamd_encode_base32_buf (const guchar *in, gsize inlen, gchar *out,
* @return decoded len if in is valid base32 and `outlen` is enough to encode `inlen`
*/
gint rspamd_decode_base32_buf (const gchar *in, gsize inlen,
- guchar *out, gsize outlen);
+ guchar *out, gsize outlen);
/**
* Encode string using hex encoding
@@ -190,7 +203,7 @@ gint rspamd_decode_base32_buf (const gchar *in, gsize inlen,
* @return encoded len if `outlen` is enough to encode `inlen`
*/
gint rspamd_encode_hex_buf (const guchar *in, gsize inlen, gchar *out,
- gsize outlen);
+ gsize outlen);
/**
@@ -202,7 +215,7 @@ gint rspamd_encode_hex_buf (const guchar *in, gsize inlen, gchar *out,
* @return decoded len if in is valid hex and `outlen` is enough to encode `inlen`
*/
gssize rspamd_decode_hex_buf (const gchar *in, gsize inlen,
- guchar *out, gsize outlen);
+ guchar *out, gsize outlen);
/**
* Common version of base64 encoder
@@ -221,6 +234,7 @@ rspamd_encode_base64_common (const guchar *in,
gsize *outlen,
gboolean fold,
enum rspamd_newlines_type how);
+
/**
* Encode string using base64 encoding
* @param in input
@@ -228,8 +242,8 @@ rspamd_encode_base64_common (const guchar *in,
* @param str_len maximum string length (if <= 0 then no lines are split)
* @return freshly allocated base64 encoded value or NULL if input is invalid
*/
-gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen);
+gchar *rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen);
/**
* Encode and fold string using base64 encoding
@@ -238,8 +252,8 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
* @param str_len maximum string length (if <= 0 then no lines are split)
* @return freshly allocated base64 encoded value or NULL if input is invalid
*/
-gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen, enum rspamd_newlines_type how);
+gchar *rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, enum rspamd_newlines_type how);
/**
* Encode and fold string using quoted printable encoding
@@ -248,8 +262,8 @@ gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
* @param str_len maximum string length (if <= 0 then no lines are split)
* @return freshly allocated base64 encoded value or NULL if input is invalid
*/
-gchar * rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len,
- gsize *outlen, enum rspamd_newlines_type how);
+gchar *rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, enum rspamd_newlines_type how);
/**
* Decode quoted-printable encoded buffer, input and output must not overlap
@@ -260,7 +274,7 @@ gchar * rspamd_encode_qp_fold (const guchar *in, gsize inlen, gint str_len,
* @return real size of decoded output or (-1) if outlen is not enough
*/
gssize rspamd_decode_qp_buf (const gchar *in, gsize inlen,
- gchar *out, gsize outlen);
+ gchar *out, gsize outlen);
/**
* Decode quoted-printable encoded buffer using rfc2047 format, input and output must not overlap
@@ -271,7 +285,7 @@ gssize rspamd_decode_qp_buf (const gchar *in, gsize inlen,
* @return real size of decoded output or (-1) if outlen is not enough
*/
gssize rspamd_decode_qp2047_buf (const gchar *in, gsize inlen,
- gchar *out, gsize outlen);
+ gchar *out, gsize outlen);
/**
* Encode quoted-printable buffer using rfc2047 format, input and output must not overlap
@@ -282,7 +296,7 @@ gssize rspamd_decode_qp2047_buf (const gchar *in, gsize inlen,
* @return
*/
gssize rspamd_encode_qp2047_buf (const gchar *in, gsize inlen,
- gchar *out, gsize outlen);
+ gchar *out, gsize outlen);
#ifndef g_tolower
# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
@@ -297,7 +311,7 @@ gssize rspamd_encode_qp2047_buf (const gchar *in, gsize inlen,
* @return
*/
gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
- const gchar *s2, gsize s2len, guint replace_cost);
+ const gchar *s2, gsize s2len, guint replace_cost);
/**
* Fold header using rfc822 rules, return new GString from the previous one
@@ -309,10 +323,10 @@ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
* @return new GString with the folded value
*/
GString *rspamd_header_value_fold (const gchar *name,
- const gchar *value,
- guint fold_max,
- enum rspamd_newlines_type how,
- const gchar *fold_on_chars);
+ const gchar *value,
+ guint fold_max,
+ enum rspamd_newlines_type how,
+ const gchar *fold_on_chars);
/**
* Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm
@@ -324,7 +338,7 @@ GString *rspamd_header_value_fold (const gchar *name,
* @return position of the first substring match or (-1) if not found
*/
goffset rspamd_substring_search (const gchar *in, gsize inlen,
- const gchar *srch, gsize srchlen);
+ const gchar *srch, gsize srchlen);
/**
* Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm in caseless matter (ASCII only)
@@ -336,7 +350,7 @@ goffset rspamd_substring_search (const gchar *in, gsize inlen,
* @return position of the first substring match or (-1) if not found
*/
goffset rspamd_substring_search_caseless (const gchar *in, gsize inlen,
- const gchar *srch, gsize srchlen);
+ const gchar *srch, gsize srchlen);
/**
* Search for end-of-headers mark in the input string. Returns position just after
@@ -348,7 +362,8 @@ goffset rspamd_string_find_eoh (GString *input, goffset *body_start);
#define rspamd_ucl_emit_gstring(o, t, target) \
- rspamd_ucl_emit_gstring_comments((o), (t), (target), NULL)
+ rspamd_ucl_emit_gstring_comments((o), (t), (target), NULL)
+
/**
* Emit UCL object to gstring
* @param obj object to emit
@@ -357,12 +372,13 @@ goffset rspamd_string_find_eoh (GString *input, goffset *body_start);
* @param target target string
*/
void rspamd_ucl_emit_gstring_comments (const ucl_object_t *obj,
- enum ucl_emitter emit_type,
- GString *target,
- const ucl_object_t *comments);
+ enum ucl_emitter emit_type,
+ GString *target,
+ const ucl_object_t *comments);
#define rspamd_ucl_emit_fstring(o, t, target) \
- rspamd_ucl_emit_fstring_comments((o), (t), (target), NULL)
+ rspamd_ucl_emit_fstring_comments((o), (t), (target), NULL)
+
/**
* Emit UCL object to fstring
* @param obj object to emit
@@ -371,9 +387,9 @@ void rspamd_ucl_emit_gstring_comments (const ucl_object_t *obj,
* @param target target string
*/
void rspamd_ucl_emit_fstring_comments (const ucl_object_t *obj,
- enum ucl_emitter emit_type,
- rspamd_fstring_t **target,
- const ucl_object_t *comments);
+ enum ucl_emitter emit_type,
+ rspamd_fstring_t **target,
+ const ucl_object_t *comments);
extern const guchar lc_map[256];
@@ -405,27 +421,26 @@ gsize rspamd_memcspn (const gchar *s, const gchar *e, gsize len);
gsize rspamd_memspn (const gchar *s, const gchar *e, gsize len);
/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
-#define rspamd_str_hasmore(x,n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128)
+#define rspamd_str_hasmore(x, n) ((((x)+~0UL/255*(127-(n)))|(x))&~0UL/255*128)
static inline gboolean
-rspamd_str_has_8bit (const guchar *beg, gsize len)
-{
+rspamd_str_has_8bit (const guchar *beg, gsize len) {
unsigned long *w;
gsize i, leftover = len % sizeof (*w);
- w = (unsigned long *)beg;
+ w = (unsigned long *) beg;
- for (i = 0; i < len / sizeof (*w); i ++) {
+ for (i = 0; i < len / sizeof (*w); i++) {
if (rspamd_str_hasmore (*w, 127)) {
return TRUE;
}
- w ++;
+ w++;
}
- beg = (const guchar *)w;
+ beg = (const guchar *) w;
- for (i = 0; i < leftover; i ++) {
+ for (i = 0; i < leftover; i++) {
if (beg[i] > 127) {
return TRUE;
}
@@ -435,9 +450,11 @@ rspamd_str_has_8bit (const guchar *beg, gsize len)
}
struct UConverter;
+
struct UConverter *rspamd_get_utf8_converter (void);
struct UNormalizer2;
+
const struct UNormalizer2 *rspamd_get_unicode_normalizer (void);
enum rspamd_normalise_result {
@@ -456,7 +473,7 @@ enum rspamd_normalise_result {
* @return TRUE if a string has been normalised
*/
enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
- gchar *start, guint *len);
+ gchar *start, guint *len);
enum rspamd_regexp_escape_flags {
RSPAMD_REGEXP_ESCAPE_ASCII = 0,
@@ -464,6 +481,7 @@ enum rspamd_regexp_escape_flags {
RSPAMD_REGEXP_ESCAPE_GLOB = 1u << 1,
RSPAMD_REGEXP_ESCAPE_RE = 1u << 2,
};
+
/**
* Escapes special characters when reading plain data to be processed in pcre
* @param pattern pattern to process
@@ -474,7 +492,7 @@ enum rspamd_regexp_escape_flags {
*/
gchar *
rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
- gsize *dst_len, enum rspamd_regexp_escape_flags flags);
+ gsize *dst_len, enum rspamd_regexp_escape_flags flags);
/**
* Returns copy of src (zero terminated) where all unicode is made valid or replaced
@@ -484,7 +502,7 @@ rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
* @param dstelen
* @return
*/
-gchar * rspamd_str_make_utf_valid (const guchar *src, gsize slen, gsize *dstlen);
+gchar *rspamd_str_make_utf_valid (const guchar *src, gsize slen, gsize *dstlen);
/**
* Strips characters in `strip_chars` from start and end of the GString
@@ -498,19 +516,23 @@ gsize rspamd_gstring_strip (GString *s, const gchar *strip_chars);
* @param s
* @param strip_chars
*/
-const gchar* rspamd_string_len_strip (const gchar *in,
- gsize *len, const gchar *strip_chars);
+const gchar *rspamd_string_len_strip (const gchar *in,
+ gsize *len, const gchar *strip_chars);
#define IS_ZERO_WIDTH_SPACE(uc) ((uc) == 0x200B || \
- (uc) == 0x200C || \
- (uc) == 0x200D || \
- (uc) == 0xFEFF)
+ (uc) == 0x200C || \
+ (uc) == 0x200D || \
+ (uc) == 0xFEFF)
#define IS_OBSCURED_CHAR(uc) (((uc) >= 0x200B && (uc) <= 0x200F) || \
- ((uc) >= 0x2028 && (uc) <= 0x202F) || \
- ((uc) >= 0x205F && (uc) <= 0x206F) || \
- (uc) == 0xFEFF)
+ ((uc) >= 0x2028 && (uc) <= 0x202F) || \
+ ((uc) >= 0x205F && (uc) <= 0x206F) || \
+ (uc) == 0xFEFF)
#define RSPAMD_LEN_CHECK_STARTS_WITH(s, len, lit) \
- ((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp ((s), (lit), sizeof(lit) - 1) == 0)
+ ((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp ((s), (lit), sizeof(lit) - 1) == 0)
+
+#ifdef __cplusplus
+}
+#endif
#endif /* SRC_LIBUTIL_STR_UTIL_H_ */