aboutsummaryrefslogtreecommitdiffstats
path: root/src/libstat/tokenizers
diff options
context:
space:
mode:
Diffstat (limited to 'src/libstat/tokenizers')
-rw-r--r--src/libstat/tokenizers/osb.c16
-rw-r--r--src/libstat/tokenizers/tokenizers.c32
-rw-r--r--src/libstat/tokenizers/tokenizers.h2
3 files changed, 25 insertions, 25 deletions
diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c
index e96748a93..3f770c69e 100644
--- a/src/libstat/tokenizers/osb.c
+++ b/src/libstat/tokenizers/osb.c
@@ -62,7 +62,7 @@ struct rspamd_osb_tokenizer_config {
gshort version;
gshort window_size;
enum rspamd_osb_hash_type ht;
- guint64 seed;
+ uint64_t seed;
rspamd_sipkey_t sk;
};
@@ -262,7 +262,7 @@ rspamd_tokenizer_osb_is_compat (struct rspamd_tokenizer_runtime *rt)
#endif
struct token_pipe_entry {
- guint64 h;
+ uint64_t h;
rspamd_stat_token_t *t;
};
@@ -276,9 +276,9 @@ gint rspamd_tokenizer_osb(struct rspamd_stat_ctx *ctx,
rspamd_token_t *new_tok = NULL;
rspamd_stat_token_t *token;
struct rspamd_osb_tokenizer_config *osb_cf;
- guint64 cur, seed;
+ uint64_t cur, seed;
struct token_pipe_entry *hashpipe;
- guint32 h1, h2;
+ uint32_t h1, h2;
gsize token_size;
guint processed = 0, i, w, window_size, token_flags = 0;
@@ -369,10 +369,10 @@ gint rspamd_tokenizer_osb(struct rspamd_stat_ctx *ctx,
new_tok->t1 = hashpipe[0].t; \
new_tok->t2 = hashpipe[i].t; \
if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { \
- h1 = ((guint32) hashpipe[0].h) * primes[0] + \
- ((guint32) hashpipe[i].h) * primes[i << 1]; \
- h2 = ((guint32) hashpipe[0].h) * primes[1] + \
- ((guint32) hashpipe[i].h) * primes[(i << 1) - 1]; \
+ h1 = ((uint32_t) hashpipe[0].h) * primes[0] + \
+ ((uint32_t) hashpipe[i].h) * primes[i << 1]; \
+ h2 = ((uint32_t) hashpipe[0].h) * primes[1] + \
+ ((uint32_t) hashpipe[i].h) * primes[(i << 1) - 1]; \
memcpy((guchar *) &new_tok->data, &h1, sizeof(h1)); \
memcpy(((guchar *) &new_tok->data) + sizeof(h1), &h2, sizeof(h2)); \
} \
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index ee7234df7..702668142 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2024 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -162,8 +162,8 @@ static inline gboolean
rspamd_tokenize_check_limit(gboolean decay,
guint word_decay,
guint nwords,
- guint64 *hv,
- guint64 *prob,
+ uint64_t *hv,
+ uint64_t *prob,
const rspamd_stat_token_t *token,
gssize remain,
gssize total)
@@ -171,8 +171,8 @@ rspamd_tokenize_check_limit(gboolean decay,
static const gdouble avg_word_len = 6.0;
if (!decay) {
- if (token->original.len >= sizeof(guint64)) {
- guint64 tmp;
+ if (token->original.len >= sizeof(uint64_t)) {
+ uint64_t tmp;
memcpy(&tmp, token->original.begin, sizeof(tmp));
*hv = mum_hash_step(*hv, tmp);
}
@@ -192,7 +192,7 @@ rspamd_tokenize_check_limit(gboolean decay,
*prob = G_MAXUINT64;
}
else {
- *prob = (guint64) (decay_prob * (double) G_MAXUINT64);
+ *prob = (uint64_t) (decay_prob * (double) G_MAXUINT64);
}
return TRUE;
@@ -213,7 +213,7 @@ rspamd_tokenize_check_limit(gboolean decay,
static inline gboolean
rspamd_utf_word_valid(const guchar *text, const guchar *end,
- gint32 start, gint32 finish)
+ int32_t start, int32_t finish)
{
const guchar *st = text + start, *fin = text + finish;
UChar32 c;
@@ -283,7 +283,7 @@ rspamd_tokenize_text(const gchar *text, gsize len,
enum rspamd_tokenize_type how,
struct rspamd_config *cfg,
GList *exceptions,
- guint64 *hash,
+ uint64_t *hash,
GArray *cur_words,
rspamd_mempool_t *pool)
{
@@ -293,9 +293,9 @@ rspamd_tokenize_text(const gchar *text, gsize len,
GArray *res;
GList *cur = exceptions;
guint min_len = 0, max_len = 0, word_decay = 0, initial_size = 128;
- guint64 hv = 0;
+ uint64_t hv = 0;
gboolean decay = FALSE, long_text_mode = FALSE;
- guint64 prob = 0;
+ uint64_t prob = 0;
static UBreakIterator *bi = NULL;
static const gsize long_text_limit = 1 * 1024 * 1024;
static const ev_tstamp max_exec_time = 0.2; /* 200 ms */
@@ -422,7 +422,7 @@ rspamd_tokenize_text(const gchar *text, gsize len,
if (last > p) {
/* Exception spread over the boundaries */
while (last > p && p != UBRK_DONE) {
- gint32 old_p = p;
+ int32_t old_p = p;
p = ubrk_next(bi);
if (p != UBRK_DONE && p <= old_p) {
@@ -462,7 +462,7 @@ rspamd_tokenize_text(const gchar *text, gsize len,
if (last > p) {
/* Exception spread over the boundaries */
while (last > p && p != UBRK_DONE) {
- gint32 old_p = p;
+ int32_t old_p = p;
p = ubrk_next(bi);
if (p != UBRK_DONE && p <= old_p) {
msg_warn_pool_check(
@@ -607,14 +607,14 @@ rspamd_add_metawords_from_str(const gchar *beg, gsize len,
while (i < len) {
U8_NEXT(beg, i, len, uc);
- if (((gint32) uc) < 0) {
+ if (((int32_t) uc) < 0) {
valid_utf = FALSE;
break;
}
#if U_ICU_VERSION_MAJOR_NUM < 50
if (u_isalpha(uc)) {
- gint32 sc = ublock_getCode(uc);
+ int32_t sc = ublock_getCode(uc);
if (sc == UBLOCK_THAI) {
valid_utf = FALSE;
@@ -697,7 +697,7 @@ rspamd_uchars_to_ucs32(const UChar *src, gsize srclen,
rspamd_mempool_t *pool)
{
UChar32 *dest, t, *d;
- gint32 i = 0;
+ int32_t i = 0;
dest = rspamd_mempool_alloc(pool, srclen * sizeof(UChar32));
d = dest;
@@ -787,7 +787,7 @@ void rspamd_normalize_single_word(rspamd_stat_token_t *tok, rspamd_mempool_t *po
else {
#if U_ICU_VERSION_MAJOR_NUM >= 44
const UNormalizer2 *norm = rspamd_get_unicode_normalizer();
- gint32 end;
+ int32_t end;
/* We can now check if we need to decompose */
end = unorm2_spanQuickCheckYes(norm, tmpbuf, ulen, &uc_err);
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index d696364e2..ff5c530c5 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -65,7 +65,7 @@ GArray *rspamd_tokenize_text(const gchar *text, gsize len,
enum rspamd_tokenize_type how,
struct rspamd_config *cfg,
GList *exceptions,
- guint64 *hash,
+ uint64_t *hash,
GArray *cur_words,
rspamd_mempool_t *pool);