diff options
Diffstat (limited to 'src/libstat')
-rw-r--r-- | src/libstat/backends/backends.h | 124 | ||||
-rw-r--r-- | src/libstat/backends/cdb_backend.cxx | 151 | ||||
-rw-r--r-- | src/libstat/backends/http_backend.cxx | 142 | ||||
-rw-r--r-- | src/libstat/backends/mmaped_file.c | 880 | ||||
-rw-r--r-- | src/libstat/backends/redis_backend.c | 1465 | ||||
-rw-r--r-- | src/libstat/backends/sqlite3_backend.c | 857 | ||||
-rw-r--r-- | src/libstat/classifiers/bayes.c | 341 | ||||
-rw-r--r-- | src/libstat/classifiers/classifiers.h | 88 | ||||
-rw-r--r-- | src/libstat/classifiers/lua_classifier.c | 216 | ||||
-rw-r--r-- | src/libstat/learn_cache/learn_cache.h | 58 | ||||
-rw-r--r-- | src/libstat/learn_cache/redis_cache.c | 326 | ||||
-rw-r--r-- | src/libstat/learn_cache/sqlite3_cache.c | 251 | ||||
-rw-r--r-- | src/libstat/stat_api.h | 42 | ||||
-rw-r--r-- | src/libstat/stat_config.c | 419 | ||||
-rw-r--r-- | src/libstat/stat_internal.h | 37 | ||||
-rw-r--r-- | src/libstat/stat_process.c | 812 | ||||
-rw-r--r-- | src/libstat/tokenizers/osb.c | 213 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.c | 476 | ||||
-rw-r--r-- | src/libstat/tokenizers/tokenizers.h | 68 |
19 files changed, 3404 insertions, 3562 deletions
diff --git a/src/libstat/backends/backends.h b/src/libstat/backends/backends.h index 67d612383..4b16950bb 100644 --- a/src/libstat/backends/backends.h +++ b/src/libstat/backends/backends.h @@ -21,7 +21,7 @@ #define RSPAMD_DEFAULT_BACKEND "mmap" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -38,81 +38,81 @@ struct rspamd_stat_backend { const char *name; bool read_only; - gpointer (*init) (struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg, - struct rspamd_statfile *st); + gpointer (*init)(struct rspamd_stat_ctx *ctx, struct rspamd_config *cfg, + struct rspamd_statfile *st); - gpointer (*runtime) (struct rspamd_task *task, - struct rspamd_statfile_config *stcf, - gboolean learn, gpointer ctx, - gint id); + gpointer (*runtime)(struct rspamd_task *task, + struct rspamd_statfile_config *stcf, + gboolean learn, gpointer ctx, + gint id); - gboolean (*process_tokens) (struct rspamd_task *task, GPtrArray *tokens, - gint id, - gpointer ctx); + gboolean (*process_tokens)(struct rspamd_task *task, GPtrArray *tokens, + gint id, + gpointer ctx); - gboolean (*finalize_process) (struct rspamd_task *task, - gpointer runtime, gpointer ctx); + gboolean (*finalize_process)(struct rspamd_task *task, + gpointer runtime, gpointer ctx); - gboolean (*learn_tokens) (struct rspamd_task *task, GPtrArray *tokens, - gint id, - gpointer ctx); + gboolean (*learn_tokens)(struct rspamd_task *task, GPtrArray *tokens, + gint id, + gpointer ctx); - gulong (*total_learns) (struct rspamd_task *task, - gpointer runtime, gpointer ctx); + gulong (*total_learns)(struct rspamd_task *task, + gpointer runtime, gpointer ctx); - gboolean (*finalize_learn) (struct rspamd_task *task, - gpointer runtime, gpointer ctx, GError **err); + gboolean (*finalize_learn)(struct rspamd_task *task, + gpointer runtime, gpointer ctx, GError **err); - gulong (*inc_learns) (struct rspamd_task *task, - gpointer runtime, gpointer ctx); + gulong (*inc_learns)(struct rspamd_task *task, + gpointer runtime, gpointer ctx); - gulong (*dec_learns) (struct rspamd_task *task, - gpointer runtime, gpointer ctx); + gulong (*dec_learns)(struct rspamd_task *task, + gpointer runtime, gpointer ctx); - ucl_object_t *(*get_stat) (gpointer runtime, gpointer ctx); + ucl_object_t *(*get_stat)(gpointer runtime, gpointer ctx); - void (*close) (gpointer ctx); + void (*close)(gpointer ctx); - gpointer (*load_tokenizer_config) (gpointer runtime, gsize *sz); + gpointer (*load_tokenizer_config)(gpointer runtime, gsize *sz); gpointer ctx; }; -#define RSPAMD_STAT_BACKEND_DEF(name) \ - gpointer rspamd_##name##_init (struct rspamd_stat_ctx *ctx, \ - struct rspamd_config *cfg, struct rspamd_statfile *st); \ - gpointer rspamd_##name##_runtime (struct rspamd_task *task, \ - struct rspamd_statfile_config *stcf, \ - gboolean learn, gpointer ctx, gint id); \ - gboolean rspamd_##name##_process_tokens (struct rspamd_task *task, \ - GPtrArray *tokens, gint id, \ - gpointer runtime); \ - gboolean rspamd_##name##_finalize_process (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gboolean rspamd_##name##_learn_tokens (struct rspamd_task *task, \ - GPtrArray *tokens, gint id, \ - gpointer runtime); \ - gboolean rspamd_##name##_finalize_learn (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx, GError **err); \ - gulong rspamd_##name##_total_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gulong rspamd_##name##_inc_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gulong rspamd_##name##_dec_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - gulong rspamd_##name##_learns (struct rspamd_task *task, \ - gpointer runtime, \ - gpointer ctx); \ - ucl_object_t * rspamd_##name##_get_stat (gpointer runtime, \ - gpointer ctx); \ - gpointer rspamd_##name##_load_tokenizer_config (gpointer runtime, \ - gsize *len); \ - void rspamd_##name##_close (gpointer ctx) +#define RSPAMD_STAT_BACKEND_DEF(name) \ + gpointer rspamd_##name##_init(struct rspamd_stat_ctx *ctx, \ + struct rspamd_config *cfg, struct rspamd_statfile *st); \ + gpointer rspamd_##name##_runtime(struct rspamd_task *task, \ + struct rspamd_statfile_config *stcf, \ + gboolean learn, gpointer ctx, gint id); \ + gboolean rspamd_##name##_process_tokens(struct rspamd_task *task, \ + GPtrArray *tokens, gint id, \ + gpointer runtime); \ + gboolean rspamd_##name##_finalize_process(struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gboolean rspamd_##name##_learn_tokens(struct rspamd_task *task, \ + GPtrArray *tokens, gint id, \ + gpointer runtime); \ + gboolean rspamd_##name##_finalize_learn(struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx, GError **err); \ + gulong rspamd_##name##_total_learns(struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gulong rspamd_##name##_inc_learns(struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gulong rspamd_##name##_dec_learns(struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + gulong rspamd_##name##_learns(struct rspamd_task *task, \ + gpointer runtime, \ + gpointer ctx); \ + ucl_object_t *rspamd_##name##_get_stat(gpointer runtime, \ + gpointer ctx); \ + gpointer rspamd_##name##_load_tokenizer_config(gpointer runtime, \ + gsize *len); \ + void rspamd_##name##_close(gpointer ctx) RSPAMD_STAT_BACKEND_DEF(mmaped_file); RSPAMD_STAT_BACKEND_DEF(sqlite3); @@ -120,7 +120,7 @@ RSPAMD_STAT_BACKEND_DEF(cdb); RSPAMD_STAT_BACKEND_DEF(redis); RSPAMD_STAT_BACKEND_DEF(http); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libstat/backends/cdb_backend.cxx b/src/libstat/backends/cdb_backend.cxx index 22a6c3dab..81d87f319 100644 --- a/src/libstat/backends/cdb_backend.cxx +++ b/src/libstat/backends/cdb_backend.cxx @@ -41,7 +41,8 @@ public: using cdb_element_t = std::shared_ptr<struct cdb>; cdb_shared_storage() = default; - auto get_cdb(const char *path) const -> std::optional<cdb_element_t> { + auto get_cdb(const char *path) const -> std::optional<cdb_element_t> + { auto found = elts.find(path); if (found != elts.end()) { @@ -53,13 +54,15 @@ public: return std::nullopt; } /* Create a new smart pointer over POD cdb structure */ - static auto new_cdb() -> cdb_element_t { + static auto new_cdb() -> cdb_element_t + { auto ret = cdb_element_t(new struct cdb, cdb_deleter()); memset(ret.get(), 0, sizeof(struct cdb)); return ret; } /* Enclose cdb into storage */ - auto push_cdb(const char *path, cdb_element_t cdbp) -> cdb_element_t { + auto push_cdb(const char *path, cdb_element_t cdbp) -> cdb_element_t + { auto found = elts.find(path); if (found != elts.end()) { @@ -82,6 +85,7 @@ public: return cdbp; } } + private: /* * We store weak pointers here to allow owning cdb statfiles to free @@ -90,7 +94,8 @@ private: ankerl::unordered_dense::map<std::string, std::weak_ptr<struct cdb>> elts; struct cdb_deleter { - void operator()(struct cdb *c) const { + void operator()(struct cdb *c) const + { cdb_free(c); delete c; } @@ -102,13 +107,16 @@ static cdb_shared_storage cdb_shared_storage; class ro_backend final { public: explicit ro_backend(struct rspamd_statfile *_st, cdb_shared_storage::cdb_element_t _db) - : st(_st), db(std::move(_db)) {} + : st(_st), db(std::move(_db)) + { + } ro_backend() = delete; ro_backend(const ro_backend &) = delete; - ro_backend(ro_backend &&other) noexcept { + ro_backend(ro_backend &&other) noexcept + { *this = std::move(other); } - ro_backend& operator=(ro_backend &&other) noexcept + ro_backend &operator=(ro_backend &&other) noexcept { std::swap(st, other.st); std::swap(db, other.db); @@ -118,14 +126,18 @@ public: return *this; } - ~ro_backend() {} + ~ro_backend() + { + } auto load_cdb() -> tl::expected<bool, std::string>; auto process_token(const rspamd_token_t *tok) const -> std::optional<float>; - constexpr auto is_spam() const -> bool { + constexpr auto is_spam() const -> bool + { return st->stcf->is_spam; } - auto get_learns() const -> std::uint64_t { + auto get_learns() const -> std::uint64_t + { if (is_spam()) { return learns_spam; } @@ -133,9 +145,11 @@ public: return learns_ham; } } - auto get_total_learns() const -> std::uint64_t { + auto get_total_learns() const -> std::uint64_t + { return learns_spam + learns_ham; } + private: struct rspamd_statfile *st; cdb_shared_storage::cdb_element_t db; @@ -148,7 +162,7 @@ template<typename T> static inline auto cdb_get_key_as_int64(struct cdb *cdb, T key) -> std::optional<std::int64_t> { - auto pos = cdb_find(cdb, (void *)&key, sizeof(key)); + auto pos = cdb_find(cdb, (void *) &key, sizeof(key)); if (pos > 0) { auto vpos = cdb_datapos(cdb); @@ -156,7 +170,7 @@ cdb_get_key_as_int64(struct cdb *cdb, T key) -> std::optional<std::int64_t> if (vlen == sizeof(std::int64_t)) { std::int64_t ret; - cdb_read(cdb, (void *)&ret, vlen, vpos); + cdb_read(cdb, (void *) &ret, vlen, vpos); return ret; } @@ -169,7 +183,7 @@ template<typename T> static inline auto cdb_get_key_as_float_pair(struct cdb *cdb, T key) -> std::optional<std::pair<float, float>> { - auto pos = cdb_find(cdb, (void *)&key, sizeof(key)); + auto pos = cdb_find(cdb, (void *) &key, sizeof(key)); if (pos > 0) { auto vpos = cdb_datapos(cdb); @@ -183,7 +197,7 @@ cdb_get_key_as_float_pair(struct cdb *cdb, T key) -> std::optional<std::pair<flo } d; char c[sizeof(float) * 2]; } u; - cdb_read(cdb, (void *)u.c, vlen, vpos); + cdb_read(cdb, (void *) u.c, vlen, vpos); return std::make_pair(u.d.v1, u.d.v2); } @@ -193,8 +207,7 @@ cdb_get_key_as_float_pair(struct cdb *cdb, T key) -> std::optional<std::pair<flo } -auto -ro_backend::load_cdb() -> tl::expected<bool, std::string> +auto ro_backend::load_cdb() -> tl::expected<bool, std::string> { if (!db) { return tl::make_unexpected("no database loaded"); @@ -205,7 +218,7 @@ ro_backend::load_cdb() -> tl::expected<bool, std::string> static const char learn_spam_key[9] = "_lrnspam", learn_ham_key[9] = "_lrnham_"; auto check_key = [&](const char *key, std::uint64_t &target) -> tl::expected<bool, std::string> { - memcpy((void *)&cdb_key, key, sizeof(cdb_key)); + memcpy((void *) &cdb_key, key, sizeof(cdb_key)); auto maybe_value = cdb_get_key_as_int64(db.get(), cdb_key); @@ -213,7 +226,7 @@ ro_backend::load_cdb() -> tl::expected<bool, std::string> return tl::make_unexpected(fmt::format("missing {} key", key)); } - target = (std::uint64_t)maybe_value.value(); + target = (std::uint64_t) maybe_value.value(); return true; }; @@ -232,11 +245,10 @@ ro_backend::load_cdb() -> tl::expected<bool, std::string> loaded = true; - return true; // expected + return true;// expected } -auto -ro_backend::process_token(const rspamd_token_t *tok) const -> std::optional<float> +auto ro_backend::process_token(const rspamd_token_t *tok) const -> std::optional<float> { if (!loaded) { return std::nullopt; @@ -258,15 +270,14 @@ ro_backend::process_token(const rspamd_token_t *tok) const -> std::optional<floa return std::nullopt; } -auto -open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> +auto open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> { const char *path = nullptr; const auto *stf = st->stcf; auto get_filename = [](const ucl_object_t *obj) -> const char * { const auto *filename = ucl_object_lookup_any(obj, - "filename", "path", "cdb", nullptr); + "filename", "path", "cdb", nullptr); if (filename && ucl_object_type(filename) == UCL_STRING) { return ucl_object_tostring(filename); @@ -276,8 +287,8 @@ open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> }; /* First search in backend configuration */ - const auto *obj = ucl_object_lookup (st->classifier->cfg->opts, "backend"); - if (obj != NULL && ucl_object_type (obj) == UCL_OBJECT) { + const auto *obj = ucl_object_lookup(st->classifier->cfg->opts, "backend"); + if (obj != NULL && ucl_object_type(obj) == UCL_OBJECT) { path = get_filename(obj); } @@ -304,7 +315,7 @@ open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> if (fd == -1) { return tl::make_unexpected(fmt::format("cannot open {}: {}", - path, strerror(errno))); + path, strerror(errno))); } cdbp = cdb_shared_storage::new_cdb(); @@ -313,7 +324,7 @@ open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> close(fd); return tl::make_unexpected(fmt::format("cannot init cdb in {}: {}", - path, strerror(errno))); + path, strerror(errno))); } cdbp = cdb_shared_storage.push_cdb(path, cdbp); @@ -326,7 +337,7 @@ open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> if (!cdbp) { return tl::make_unexpected(fmt::format("cannot init cdb in {}: internal error", - path)); + path)); } ro_backend bk{st, std::move(cdbp)}; @@ -340,15 +351,15 @@ open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string> return bk; } -} +}// namespace rspamd::stat::cdb #define CDB_FROM_RAW(p) (reinterpret_cast<rspamd::stat::cdb::ro_backend *>(p)) /* C exports */ gpointer -rspamd_cdb_init(struct rspamd_stat_ctx* ctx, - struct rspamd_config* cfg, - struct rspamd_statfile* st) +rspamd_cdb_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st) { auto maybe_backend = rspamd::stat::cdb::open_cdb(st); @@ -365,21 +376,21 @@ rspamd_cdb_init(struct rspamd_stat_ctx* ctx, return nullptr; } gpointer -rspamd_cdb_runtime(struct rspamd_task* task, - struct rspamd_statfile_config* stcf, - gboolean learn, - gpointer ctx, - gint _id) +rspamd_cdb_runtime(struct rspamd_task *task, + struct rspamd_statfile_config *stcf, + gboolean learn, + gpointer ctx, + gint _id) { /* In CDB we don't have any dynamic stuff */ return ctx; } gboolean -rspamd_cdb_process_tokens(struct rspamd_task* task, - GPtrArray* tokens, - gint id, - gpointer runtime) +rspamd_cdb_process_tokens(struct rspamd_task *task, + GPtrArray *tokens, + gint id, + gpointer runtime) { auto *cdbp = CDB_FROM_RAW(runtime); bool seen_values = false; @@ -409,33 +420,32 @@ rspamd_cdb_process_tokens(struct rspamd_task* task, } return true; - } gboolean -rspamd_cdb_finalize_process(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_cdb_finalize_process(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { return true; } gboolean -rspamd_cdb_learn_tokens(struct rspamd_task* task, - GPtrArray* tokens, - gint id, - gpointer ctx) +rspamd_cdb_learn_tokens(struct rspamd_task *task, + GPtrArray *tokens, + gint id, + gpointer ctx) { return false; } gboolean -rspamd_cdb_finalize_learn(struct rspamd_task* task, - gpointer runtime, - gpointer ctx, - GError** err) +rspamd_cdb_finalize_learn(struct rspamd_task *task, + gpointer runtime, + gpointer ctx, + GError **err) { return false; } -gulong rspamd_cdb_total_learns(struct rspamd_task* task, +gulong rspamd_cdb_total_learns(struct rspamd_task *task, gpointer runtime, gpointer ctx) { @@ -443,39 +453,38 @@ gulong rspamd_cdb_total_learns(struct rspamd_task* task, return cdbp->get_total_learns(); } gulong -rspamd_cdb_inc_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_cdb_inc_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { - return (gulong)-1; + return (gulong) -1; } gulong -rspamd_cdb_dec_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_cdb_dec_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { - return (gulong)-1; + return (gulong) -1; } gulong -rspamd_cdb_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_cdb_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { auto *cdbp = CDB_FROM_RAW(ctx); return cdbp->get_learns(); } -ucl_object_t* +ucl_object_t * rspamd_cdb_get_stat(gpointer runtime, gpointer ctx) { return nullptr; } gpointer -rspamd_cdb_load_tokenizer_config(gpointer runtime, gsize* len) +rspamd_cdb_load_tokenizer_config(gpointer runtime, gsize *len) { return nullptr; } -void -rspamd_cdb_close(gpointer ctx) +void rspamd_cdb_close(gpointer ctx) { auto *cdbp = CDB_FROM_RAW(ctx); delete cdbp; diff --git a/src/libstat/backends/http_backend.cxx b/src/libstat/backends/http_backend.cxx index 69c735a09..3b2e3efe0 100644 --- a/src/libstat/backends/http_backend.cxx +++ b/src/libstat/backends/http_backend.cxx @@ -24,10 +24,10 @@ namespace rspamd::stat::http { -#define msg_debug_stat_http(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_stat_http_log_id, "stat_http", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_debug_stat_http(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_stat_http_log_id, "stat_http", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(stat_http) @@ -37,8 +37,10 @@ class http_backends_collection { double timeout = 1.0; /* Default timeout */ struct upstream_list *read_servers = nullptr; struct upstream_list *write_servers = nullptr; + public: - static auto get() -> http_backends_collection& { + static auto get() -> http_backends_collection & + { static http_backends_collection *singleton = nullptr; if (singleton == nullptr) { @@ -81,26 +83,31 @@ class http_backend_runtime final { public: static auto create(struct rspamd_task *task, bool is_learn) -> http_backend_runtime *; /* Add a new statfile with a specific id to the list of statfiles */ - auto notice_statfile(int id, const struct rspamd_statfile_config *st) -> void { + auto notice_statfile(int id, const struct rspamd_statfile_config *st) -> void + { seen_statfiles[id] = st; } - auto process_tokens(struct rspamd_task* task, - GPtrArray* tokens, + auto process_tokens(struct rspamd_task *task, + GPtrArray *tokens, gint id, bool learn) -> bool; + private: http_backends_collection *all_backends; ankerl::unordered_dense::map<int, const struct rspamd_statfile_config *> seen_statfiles; struct upstream *selected; + private: - http_backend_runtime(struct rspamd_task *task, bool is_learn) : - all_backends(&http_backends_collection::get()) { + http_backend_runtime(struct rspamd_task *task, bool is_learn) + : all_backends(&http_backends_collection::get()) + { selected = all_backends->get_upstream(is_learn); } ~http_backend_runtime() = default; - static auto dtor(void *p) -> void { - ((http_backend_runtime *)p)->~http_backend_runtime(); + static auto dtor(void *p) -> void + { + ((http_backend_runtime *) p)->~http_backend_runtime(); } }; @@ -126,14 +133,15 @@ stat_tokens_to_msgpack(GPtrArray *tokens) -> std::vector<std::uint8_t> ret.resize(tokens->len * (sizeof(std::uint64_t) + 1) + 5); ret.push_back('\xdd'); std::uint32_t ulen = GUINT32_TO_BE(tokens->len); - std::copy((const std::uint8_t *)&ulen, - ((const std::uint8_t *)&ulen) + sizeof(ulen), std::back_inserter(ret)); + std::copy((const std::uint8_t *) &ulen, + ((const std::uint8_t *) &ulen) + sizeof(ulen), std::back_inserter(ret)); - PTR_ARRAY_FOREACH(tokens, i, cur) { + PTR_ARRAY_FOREACH(tokens, i, cur) + { ret.push_back('\xcf'); std::uint64_t val = GUINT64_TO_BE(cur->data); - std::copy((const std::uint8_t *)&val, - ((const std::uint8_t *)&val) + sizeof(val), std::back_inserter(ret)); + std::copy((const std::uint8_t *) &val, + ((const std::uint8_t *) &val) + sizeof(val), std::back_inserter(ret)); } return ret; @@ -149,8 +157,7 @@ auto http_backend_runtime::create(struct rspamd_task *task, bool is_learn) -> ht return new (allocated_runtime) http_backend_runtime{task, is_learn}; } -auto -http_backend_runtime::process_tokens(struct rspamd_task *task, GPtrArray *tokens, gint id, bool learn) -> bool +auto http_backend_runtime::process_tokens(struct rspamd_task *task, GPtrArray *tokens, gint id, bool learn) -> bool { if (!learn) { if (id == seen_statfiles.size() - 1) { @@ -171,10 +178,9 @@ http_backend_runtime::process_tokens(struct rspamd_task *task, GPtrArray *tokens return true; } -auto -http_backends_collection::add_backend(struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - struct rspamd_statfile *st) -> bool +auto http_backends_collection::add_backend(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st) -> bool { /* On empty list of backends we know that we need to load backend data actually */ if (backends.empty()) { @@ -235,7 +241,7 @@ auto http_backends_collection::first_init(struct rspamd_stat_ctx *ctx, }; auto ret = false; - auto obj = ucl_object_lookup (st->classifier->cfg->opts, "backend"); + auto obj = ucl_object_lookup(st->classifier->cfg->opts, "backend"); if (obj != nullptr) { ret = try_load_backend_config(obj); } @@ -291,14 +297,14 @@ upstream *http_backends_collection::get_upstream(bool is_learn) return rspamd_upstream_get(ups_list, RSPAMD_UPSTREAM_ROUND_ROBIN, nullptr, 0); } -} +}// namespace rspamd::stat::http /* C API */ gpointer -rspamd_http_init(struct rspamd_stat_ctx* ctx, - struct rspamd_config* cfg, - struct rspamd_statfile* st) +rspamd_http_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st) { auto &collections = rspamd::stat::http::http_backends_collection::get(); @@ -308,11 +314,11 @@ rspamd_http_init(struct rspamd_stat_ctx* ctx, return nullptr; } - return (void *)&collections; + return (void *) &collections; } gpointer -rspamd_http_runtime(struct rspamd_task* task, - struct rspamd_statfile_config* stcf, +rspamd_http_runtime(struct rspamd_task *task, + struct rspamd_statfile_config *stcf, gboolean learn, gpointer ctx, gint id) @@ -320,7 +326,7 @@ rspamd_http_runtime(struct rspamd_task* task, auto maybe_existing = rspamd_mempool_get_variable(task->task_pool, RSPAMD_MEMPOOL_HTTP_STAT_BACKEND_RUNTIME); if (maybe_existing != nullptr) { - auto real_runtime = (rspamd::stat::http::http_backend_runtime *)maybe_existing; + auto real_runtime = (rspamd::stat::http::http_backend_runtime *) maybe_existing; real_runtime->notice_statfile(id, stcf); return maybe_existing; @@ -331,19 +337,19 @@ rspamd_http_runtime(struct rspamd_task* task, if (runtime) { runtime->notice_statfile(id, stcf); rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_HTTP_STAT_BACKEND_RUNTIME, - (void *)runtime, nullptr); + (void *) runtime, nullptr); } - return (void *)runtime; + return (void *) runtime; } gboolean -rspamd_http_process_tokens(struct rspamd_task* task, - GPtrArray* tokens, - gint id, - gpointer runtime) +rspamd_http_process_tokens(struct rspamd_task *task, + GPtrArray *tokens, + gint id, + gpointer runtime) { - auto real_runtime = (rspamd::stat::http::http_backend_runtime *)runtime; + auto real_runtime = (rspamd::stat::http::http_backend_runtime *) runtime; if (real_runtime) { return real_runtime->process_tokens(task, tokens, id, false); @@ -351,24 +357,23 @@ rspamd_http_process_tokens(struct rspamd_task* task, return false; - } gboolean -rspamd_http_finalize_process(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_http_finalize_process(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { /* Not needed */ return true; } gboolean -rspamd_http_learn_tokens(struct rspamd_task* task, - GPtrArray* tokens, - gint id, - gpointer runtime) +rspamd_http_learn_tokens(struct rspamd_task *task, + GPtrArray *tokens, + gint id, + gpointer runtime) { - auto real_runtime = (rspamd::stat::http::http_backend_runtime *)runtime; + auto real_runtime = (rspamd::stat::http::http_backend_runtime *) runtime; if (real_runtime) { return real_runtime->process_tokens(task, tokens, id, true); @@ -378,58 +383,57 @@ rspamd_http_learn_tokens(struct rspamd_task* task, return false; } gboolean -rspamd_http_finalize_learn(struct rspamd_task* task, - gpointer runtime, - gpointer ctx, - GError** err) +rspamd_http_finalize_learn(struct rspamd_task *task, + gpointer runtime, + gpointer ctx, + GError **err) { return false; } -gulong rspamd_http_total_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +gulong rspamd_http_total_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { /* TODO */ return 0; } gulong -rspamd_http_inc_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_http_inc_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { /* TODO */ return 0; } gulong -rspamd_http_dec_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_http_dec_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { /* TODO */ - return (gulong)-1; + return (gulong) -1; } gulong -rspamd_http_learns(struct rspamd_task* task, - gpointer runtime, - gpointer ctx) +rspamd_http_learns(struct rspamd_task *task, + gpointer runtime, + gpointer ctx) { /* TODO */ return 0; } -ucl_object_t* +ucl_object_t * rspamd_http_get_stat(gpointer runtime, gpointer ctx) { /* TODO */ return nullptr; } gpointer -rspamd_http_load_tokenizer_config(gpointer runtime, gsize* len) +rspamd_http_load_tokenizer_config(gpointer runtime, gsize *len) { return nullptr; } -void -rspamd_http_close(gpointer ctx) +void rspamd_http_close(gpointer ctx) { /* TODO */ }
\ No newline at end of file diff --git a/src/libstat/backends/mmaped_file.c b/src/libstat/backends/mmaped_file.c index 046c024fb..5c2020787 100644 --- a/src/libstat/backends/mmaped_file.c +++ b/src/libstat/backends/mmaped_file.c @@ -26,42 +26,42 @@ * Common statfile header */ struct stat_file_header { - u_char magic[3]; /**< magic signature ('r' 's' 'd') */ - u_char version[2]; /**< version of statfile */ - u_char padding[3]; /**< padding */ - guint64 create_time; /**< create time (time_t->guint64) */ - guint64 revision; /**< revision number */ - guint64 rev_time; /**< revision time */ - guint64 used_blocks; /**< used blocks number */ - guint64 total_blocks; /**< total number of blocks */ - guint64 tokenizer_conf_len; /**< length of tokenizer configuration */ - u_char unused[231]; /**< some bytes that can be used in future */ + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ + guint64 revision; /**< revision number */ + guint64 rev_time; /**< revision time */ + guint64 used_blocks; /**< used blocks number */ + guint64 total_blocks; /**< total number of blocks */ + guint64 tokenizer_conf_len; /**< length of tokenizer configuration */ + u_char unused[231]; /**< some bytes that can be used in future */ }; /** * Section header */ struct stat_file_section { - guint64 code; /**< section's code */ - guint64 length; /**< section's length in blocks */ + guint64 code; /**< section's code */ + guint64 length; /**< section's length in blocks */ }; /** * Block of data in statfile */ struct stat_file_block { - guint32 hash1; /**< hash1 (also acts as index) */ - guint32 hash2; /**< hash2 */ - double value; /**< double value */ + guint32 hash1; /**< hash1 (also acts as index) */ + guint32 hash2; /**< hash2 */ + double value; /**< double value */ }; /** * Statistic file */ struct stat_file { - struct stat_file_header header; /**< header */ - struct stat_file_section section; /**< first section */ - struct stat_file_block blocks[1]; /**< first block of data */ + struct stat_file_header header; /**< header */ + struct stat_file_section section; /**< first section */ + struct stat_file_block blocks[1]; /**< first block of data */ }; /** @@ -69,40 +69,43 @@ struct stat_file { */ typedef struct { #ifdef HAVE_PATH_MAX - gchar filename[PATH_MAX]; /**< name of file */ + gchar filename[PATH_MAX]; /**< name of file */ #else - gchar filename[MAXPATHLEN]; /**< name of file */ + gchar filename[MAXPATHLEN]; /**< name of file */ #endif rspamd_mempool_t *pool; - gint fd; /**< descriptor */ - void *map; /**< mmaped area */ - off_t seek_pos; /**< current seek position */ - struct stat_file_section cur_section; /**< current section */ - size_t len; /**< length of file(in bytes) */ + gint fd; /**< descriptor */ + void *map; /**< mmaped area */ + off_t seek_pos; /**< current seek position */ + struct stat_file_section cur_section; /**< current section */ + size_t len; /**< length of file(in bytes) */ struct rspamd_statfile_config *cf; } rspamd_mmaped_file_t; -#define RSPAMD_STATFILE_VERSION {'1', '2'} +#define RSPAMD_STATFILE_VERSION \ + { \ + '1', '2' \ + } #define BACKUP_SUFFIX ".old" -static void rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool, - rspamd_mmaped_file_t *file, - guint32 h1, guint32 h2, double value); +static void rspamd_mmaped_file_set_block_common(rspamd_mempool_t *pool, + rspamd_mmaped_file_t *file, + guint32 h1, guint32 h2, double value); -rspamd_mmaped_file_t * rspamd_mmaped_file_open (rspamd_mempool_t *pool, - const gchar *filename, size_t size, - struct rspamd_statfile_config *stcf); -gint rspamd_mmaped_file_create (const gchar *filename, size_t size, - struct rspamd_statfile_config *stcf, - rspamd_mempool_t *pool); -gint rspamd_mmaped_file_close_file (rspamd_mempool_t *pool, - rspamd_mmaped_file_t * file); +rspamd_mmaped_file_t *rspamd_mmaped_file_open(rspamd_mempool_t *pool, + const gchar *filename, size_t size, + struct rspamd_statfile_config *stcf); +gint rspamd_mmaped_file_create(const gchar *filename, size_t size, + struct rspamd_statfile_config *stcf, + rspamd_mempool_t *pool); +gint rspamd_mmaped_file_close_file(rspamd_mempool_t *pool, + rspamd_mmaped_file_t *file); double -rspamd_mmaped_file_get_block (rspamd_mmaped_file_t * file, - guint32 h1, - guint32 h2) +rspamd_mmaped_file_get_block(rspamd_mmaped_file_t *file, + guint32 h1, + guint32 h2) { struct stat_file_block *block; guint i, blocknum; @@ -113,9 +116,8 @@ rspamd_mmaped_file_get_block (rspamd_mmaped_file_t * file, } blocknum = h1 % file->cur_section.length; - c = (u_char *) file->map + file->seek_pos + blocknum * - sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof(struct stat_file_block); + block = (struct stat_file_block *) c; for (i = 0; i < CHAIN_LENGTH; i++) { if (i + blocknum >= file->cur_section.length) { @@ -124,8 +126,8 @@ rspamd_mmaped_file_get_block (rspamd_mmaped_file_t * file, if (block->hash1 == h1 && block->hash2 == h2) { return block->value; } - c += sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; + c += sizeof(struct stat_file_block); + block = (struct stat_file_block *) c; } @@ -133,9 +135,9 @@ rspamd_mmaped_file_get_block (rspamd_mmaped_file_t * file, } static void -rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool, - rspamd_mmaped_file_t *file, - guint32 h1, guint32 h2, double value) +rspamd_mmaped_file_set_block_common(rspamd_mempool_t *pool, + rspamd_mmaped_file_t *file, + guint32 h1, guint32 h2, double value) { struct stat_file_block *block, *to_expire = NULL; struct stat_file_header *header; @@ -148,38 +150,37 @@ rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool, } blocknum = h1 % file->cur_section.length; - header = (struct stat_file_header *)file->map; - c = (u_char *) file->map + file->seek_pos + blocknum * - sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; + header = (struct stat_file_header *) file->map; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof(struct stat_file_block); + block = (struct stat_file_block *) c; for (i = 0; i < CHAIN_LENGTH; i++) { if (i + blocknum >= file->cur_section.length) { /* Need to expire some block in chain */ - msg_info_pool ("chain %ud is full in statfile %s, starting expire", - blocknum, - file->filename); + msg_info_pool("chain %ud is full in statfile %s, starting expire", + blocknum, + file->filename); break; } /* First try to find block in chain */ if (block->hash1 == h1 && block->hash2 == h2) { - msg_debug_pool ("%s found existing block %ud in chain %ud, value %.2f", - file->filename, - i, - blocknum, - value); + msg_debug_pool("%s found existing block %ud in chain %ud, value %.2f", + file->filename, + i, + blocknum, + value); block->value = value; return; } /* Check whether we have a free block in chain */ if (block->hash1 == 0 && block->hash2 == 0) { /* Write new block here */ - msg_debug_pool ("%s found free block %ud in chain %ud, set h1=%ud, h2=%ud", - file->filename, - i, - blocknum, - h1, - h2); + msg_debug_pool("%s found free block %ud in chain %ud, set h1=%ud, h2=%ud", + file->filename, + i, + blocknum, + h1, + h2); block->hash1 = h1; block->hash2 = h2; block->value = value; @@ -193,8 +194,8 @@ rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool, to_expire = block; min = block->value; } - c += sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; + c += sizeof(struct stat_file_block); + block = (struct stat_file_block *) c; } /* Try expire some block */ @@ -203,9 +204,8 @@ rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool, } else { /* Expire first block in chain */ - c = (u_char *) file->map + file->seek_pos + blocknum * - sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof(struct stat_file_block); + block = (struct stat_file_block *) c; } block->hash1 = h1; @@ -213,18 +213,17 @@ rspamd_mmaped_file_set_block_common (rspamd_mempool_t *pool, block->value = value; } -void -rspamd_mmaped_file_set_block (rspamd_mempool_t *pool, - rspamd_mmaped_file_t * file, - guint32 h1, - guint32 h2, - double value) +void rspamd_mmaped_file_set_block(rspamd_mempool_t *pool, + rspamd_mmaped_file_t *file, + guint32 h1, + guint32 h2, + double value) { - rspamd_mmaped_file_set_block_common (pool, file, h1, h2, value); + rspamd_mmaped_file_set_block_common(pool, file, h1, h2, value); } gboolean -rspamd_mmaped_file_set_revision (rspamd_mmaped_file_t *file, guint64 rev, time_t time) +rspamd_mmaped_file_set_revision(rspamd_mmaped_file_t *file, guint64 rev, time_t time) { struct stat_file_header *header; @@ -232,7 +231,7 @@ rspamd_mmaped_file_set_revision (rspamd_mmaped_file_t *file, guint64 rev, time_t return FALSE; } - header = (struct stat_file_header *)file->map; + header = (struct stat_file_header *) file->map; header->revision = rev; header->rev_time = time; @@ -241,7 +240,7 @@ rspamd_mmaped_file_set_revision (rspamd_mmaped_file_t *file, guint64 rev, time_t } gboolean -rspamd_mmaped_file_inc_revision (rspamd_mmaped_file_t *file) +rspamd_mmaped_file_inc_revision(rspamd_mmaped_file_t *file) { struct stat_file_header *header; @@ -249,7 +248,7 @@ rspamd_mmaped_file_inc_revision (rspamd_mmaped_file_t *file) return FALSE; } - header = (struct stat_file_header *)file->map; + header = (struct stat_file_header *) file->map; header->revision++; @@ -257,7 +256,7 @@ rspamd_mmaped_file_inc_revision (rspamd_mmaped_file_t *file) } gboolean -rspamd_mmaped_file_dec_revision (rspamd_mmaped_file_t *file) +rspamd_mmaped_file_dec_revision(rspamd_mmaped_file_t *file) { struct stat_file_header *header; @@ -265,7 +264,7 @@ rspamd_mmaped_file_dec_revision (rspamd_mmaped_file_t *file) return FALSE; } - header = (struct stat_file_header *)file->map; + header = (struct stat_file_header *) file->map; header->revision--; @@ -274,7 +273,7 @@ rspamd_mmaped_file_dec_revision (rspamd_mmaped_file_t *file) gboolean -rspamd_mmaped_file_get_revision (rspamd_mmaped_file_t *file, guint64 *rev, time_t *time) +rspamd_mmaped_file_get_revision(rspamd_mmaped_file_t *file, guint64 *rev, time_t *time) { struct stat_file_header *header; @@ -282,7 +281,7 @@ rspamd_mmaped_file_get_revision (rspamd_mmaped_file_t *file, guint64 *rev, time_ return FALSE; } - header = (struct stat_file_header *)file->map; + header = (struct stat_file_header *) file->map; if (rev != NULL) { *rev = header->revision; @@ -295,29 +294,29 @@ rspamd_mmaped_file_get_revision (rspamd_mmaped_file_t *file, guint64 *rev, time_ } guint64 -rspamd_mmaped_file_get_used (rspamd_mmaped_file_t *file) +rspamd_mmaped_file_get_used(rspamd_mmaped_file_t *file) { struct stat_file_header *header; if (file == NULL || file->map == NULL) { - return (guint64) - 1; + return (guint64) -1; } - header = (struct stat_file_header *)file->map; + header = (struct stat_file_header *) file->map; return header->used_blocks; } guint64 -rspamd_mmaped_file_get_total (rspamd_mmaped_file_t *file) +rspamd_mmaped_file_get_total(rspamd_mmaped_file_t *file) { struct stat_file_header *header; if (file == NULL || file->map == NULL) { - return (guint64) - 1; + return (guint64) -1; } - header = (struct stat_file_header *)file->map; + header = (struct stat_file_header *) file->map; /* If total blocks is 0 we have old version of header, so set total blocks correctly */ if (header->total_blocks == 0) { @@ -329,7 +328,7 @@ rspamd_mmaped_file_get_total (rspamd_mmaped_file_t *file) /* Check whether specified file is statistic file and calculate its len in blocks */ static gint -rspamd_mmaped_file_check (rspamd_mempool_t *pool, rspamd_mmaped_file_t * file) +rspamd_mmaped_file_check(rspamd_mempool_t *pool, rspamd_mmaped_file_t *file) { struct stat_file *f; gchar *c; @@ -340,18 +339,18 @@ rspamd_mmaped_file_check (rspamd_mempool_t *pool, rspamd_mmaped_file_t * file) return -1; } - if (file->len < sizeof (struct stat_file)) { - msg_info_pool ("file %s is too short to be stat file: %z", - file->filename, - file->len); + if (file->len < sizeof(struct stat_file)) { + msg_info_pool("file %s is too short to be stat file: %z", + file->filename, + file->len); return -1; } - f = (struct stat_file *)file->map; + f = (struct stat_file *) file->map; c = &f->header.magic[0]; /* Check magic and version */ if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') { - msg_info_pool ("file %s is invalid stat file", file->filename); + msg_info_pool("file %s is invalid stat file", file->filename); return -1; } @@ -360,39 +359,39 @@ rspamd_mmaped_file_check (rspamd_mempool_t *pool, rspamd_mmaped_file_t * file) if (*c == 1 && *(c + 1) == 0) { return -1; } - else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) { + else if (memcmp(c, valid_version, sizeof(valid_version)) != 0) { /* Unknown version */ - msg_info_pool ("file %s has invalid version %c.%c", - file->filename, - '0' + *c, - '0' + *(c + 1)); + msg_info_pool("file %s has invalid version %c.%c", + file->filename, + '0' + *c, + '0' + *(c + 1)); return -1; } /* Check first section and set new offset */ file->cur_section.code = f->section.code; file->cur_section.length = f->section.length; - if (file->cur_section.length * sizeof (struct stat_file_block) > + if (file->cur_section.length * sizeof(struct stat_file_block) > file->len) { - msg_info_pool ("file %s is truncated: %z, must be %z", - file->filename, - file->len, - file->cur_section.length * sizeof (struct stat_file_block)); + msg_info_pool("file %s is truncated: %z, must be %z", + file->filename, + file->len, + file->cur_section.length * sizeof(struct stat_file_block)); return -1; } - file->seek_pos = sizeof (struct stat_file) - - sizeof (struct stat_file_block); + file->seek_pos = sizeof(struct stat_file) - + sizeof(struct stat_file_block); return 0; } static rspamd_mmaped_file_t * -rspamd_mmaped_file_reindex (rspamd_mempool_t *pool, - const gchar *filename, - size_t old_size, - size_t size, - struct rspamd_statfile_config *stcf) +rspamd_mmaped_file_reindex(rspamd_mempool_t *pool, + const gchar *filename, + size_t old_size, + size_t size, + struct rspamd_statfile_config *stcf) { gchar *backup, *lock; gint fd, lock_fd; @@ -401,285 +400,276 @@ rspamd_mmaped_file_reindex (rspamd_mempool_t *pool, struct stat_file_block *block; struct stat_file_header *header, *nh; struct timespec sleep_ts = { - .tv_sec = 0, - .tv_nsec = 1000000 - }; + .tv_sec = 0, + .tv_nsec = 1000000}; if (size < - sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + - sizeof (block)) { - msg_err_pool ("file %s is too small to carry any statistic: %z", - filename, - size); + sizeof(struct stat_file_header) + sizeof(struct stat_file_section) + + sizeof(block)) { + msg_err_pool("file %s is too small to carry any statistic: %z", + filename, + size); return NULL; } - lock = g_strconcat (filename, ".lock", NULL); - lock_fd = open (lock, O_WRONLY|O_CREAT|O_EXCL, 00600); + lock = g_strconcat(filename, ".lock", NULL); + lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600); while (lock_fd == -1) { /* Wait for lock */ - lock_fd = open (lock, O_WRONLY|O_CREAT|O_EXCL, 00600); + lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600); if (lock_fd != -1) { - unlink (lock); - close (lock_fd); - g_free (lock); + unlink(lock); + close(lock_fd); + g_free(lock); - return rspamd_mmaped_file_open (pool, filename, size, stcf); + return rspamd_mmaped_file_open(pool, filename, size, stcf); } else { - nanosleep (&sleep_ts, NULL); + nanosleep(&sleep_ts, NULL); } } - backup = g_strconcat (filename, ".old", NULL); - if (rename (filename, backup) == -1) { - msg_err_pool ("cannot rename %s to %s: %s", filename, backup, strerror ( - errno)); - g_free (backup); - unlink (lock); - g_free (lock); - close (lock_fd); + backup = g_strconcat(filename, ".old", NULL); + if (rename(filename, backup) == -1) { + msg_err_pool("cannot rename %s to %s: %s", filename, backup, strerror(errno)); + g_free(backup); + unlink(lock); + g_free(lock); + close(lock_fd); return NULL; } - old = rspamd_mmaped_file_open (pool, backup, old_size, stcf); + old = rspamd_mmaped_file_open(pool, backup, old_size, stcf); if (old == NULL) { - msg_warn_pool ("old file %s is invalid mmapped file, just move it", - backup); + msg_warn_pool("old file %s is invalid mmapped file, just move it", + backup); } /* We need to release our lock here */ - unlink (lock); - close (lock_fd); - g_free (lock); + unlink(lock); + close(lock_fd); + g_free(lock); /* Now create new file with required size */ - if (rspamd_mmaped_file_create (filename, size, stcf, pool) != 0) { - msg_err_pool ("cannot create new file"); - rspamd_mmaped_file_close (old); - g_free (backup); + if (rspamd_mmaped_file_create(filename, size, stcf, pool) != 0) { + msg_err_pool("cannot create new file"); + rspamd_mmaped_file_close(old); + g_free(backup); return NULL; } - new = rspamd_mmaped_file_open (pool, filename, size, stcf); + new = rspamd_mmaped_file_open(pool, filename, size, stcf); if (old) { /* Now open new file and start copying */ - fd = open (backup, O_RDONLY); + fd = open(backup, O_RDONLY); if (fd == -1 || new == NULL) { if (fd != -1) { - close (fd); + close(fd); } - msg_err_pool ("cannot open file: %s", strerror (errno)); - rspamd_mmaped_file_close (old); - g_free (backup); + msg_err_pool("cannot open file: %s", strerror(errno)); + rspamd_mmaped_file_close(old); + g_free(backup); return NULL; } - /* Now start reading blocks from old statfile */ if ((map = - mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { - msg_err_pool ("cannot mmap file: %s", strerror (errno)); - close (fd); - rspamd_mmaped_file_close (old); - g_free (backup); + mmap(NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err_pool("cannot mmap file: %s", strerror(errno)); + close(fd); + rspamd_mmaped_file_close(old); + g_free(backup); return NULL; } - pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); + pos = map + (sizeof(struct stat_file) - sizeof(struct stat_file_block)); - if (pos - map < (gssize)old_size) { - while ((gssize)old_size - (pos - map) >= (gssize)sizeof (struct stat_file_block)) { - block = (struct stat_file_block *)pos; + if (pos - map < (gssize) old_size) { + while ((gssize) old_size - (pos - map) >= (gssize) sizeof(struct stat_file_block)) { + block = (struct stat_file_block *) pos; if (block->hash1 != 0 && block->value != 0) { - rspamd_mmaped_file_set_block_common (pool, - new, block->hash1, - block->hash2, block->value); + rspamd_mmaped_file_set_block_common(pool, + new, block->hash1, + block->hash2, block->value); } - pos += sizeof (block); + pos += sizeof(block); } } - header = (struct stat_file_header *)map; - rspamd_mmaped_file_set_revision (new, header->revision, header->rev_time); + header = (struct stat_file_header *) map; + rspamd_mmaped_file_set_revision(new, header->revision, header->rev_time); nh = new->map; /* Copy tokenizer configuration */ - memcpy (nh->unused, header->unused, sizeof (header->unused)); + memcpy(nh->unused, header->unused, sizeof(header->unused)); nh->tokenizer_conf_len = header->tokenizer_conf_len; - munmap (map, old_size); - close (fd); - rspamd_mmaped_file_close_file (pool, old); + munmap(map, old_size); + close(fd); + rspamd_mmaped_file_close_file(pool, old); } - unlink (backup); - g_free (backup); + unlink(backup); + g_free(backup); return new; - } /* * Pre-load mmaped file into memory */ static void -rspamd_mmaped_file_preload (rspamd_mmaped_file_t *file) +rspamd_mmaped_file_preload(rspamd_mmaped_file_t *file) { guint8 *pos, *end; volatile guint8 t; gsize size; - pos = (guint8 *)file->map; - end = (guint8 *)file->map + file->len; + pos = (guint8 *) file->map; + end = (guint8 *) file->map + file->len; - if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) { - msg_info ("madvise failed: %s", strerror (errno)); + if (madvise(pos, end - pos, MADV_SEQUENTIAL) == -1) { + msg_info("madvise failed: %s", strerror(errno)); } else { /* Load pages of file */ #ifdef HAVE_GETPAGESIZE - size = getpagesize (); + size = getpagesize(); #else - size = sysconf (_SC_PAGESIZE); + size = sysconf(_SC_PAGESIZE); #endif while (pos < end) { t = *pos; - (void)t; + (void) t; pos += size; } } } rspamd_mmaped_file_t * -rspamd_mmaped_file_open (rspamd_mempool_t *pool, - const gchar *filename, size_t size, - struct rspamd_statfile_config *stcf) +rspamd_mmaped_file_open(rspamd_mempool_t *pool, + const gchar *filename, size_t size, + struct rspamd_statfile_config *stcf) { struct stat st; rspamd_mmaped_file_t *new_file; gchar *lock; gint lock_fd; - lock = g_strconcat (filename, ".lock", NULL); - lock_fd = open (lock, O_WRONLY|O_CREAT|O_EXCL, 00600); + lock = g_strconcat(filename, ".lock", NULL); + lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600); if (lock_fd == -1) { - g_free (lock); - msg_info_pool ("cannot open file %s, it is locked by another process", - filename); + g_free(lock); + msg_info_pool("cannot open file %s, it is locked by another process", + filename); return NULL; } - close (lock_fd); - unlink (lock); - g_free (lock); + close(lock_fd); + unlink(lock); + g_free(lock); - if (stat (filename, &st) == -1) { - msg_info_pool ("cannot stat file %s, error %s, %d", filename, strerror ( - errno), errno); + if (stat(filename, &st) == -1) { + msg_info_pool("cannot stat file %s, error %s, %d", filename, strerror(errno), errno); return NULL; } - if (labs ((glong)size - st.st_size) > (long)sizeof (struct stat_file) * 2 - && size > sizeof (struct stat_file)) { - msg_warn_pool ("need to reindex statfile old size: %Hz, new size: %Hz", - (size_t)st.st_size, size); - return rspamd_mmaped_file_reindex (pool, filename, st.st_size, size, stcf); + if (labs((glong) size - st.st_size) > (long) sizeof(struct stat_file) * 2 && size > sizeof(struct stat_file)) { + msg_warn_pool("need to reindex statfile old size: %Hz, new size: %Hz", + (size_t) st.st_size, size); + return rspamd_mmaped_file_reindex(pool, filename, st.st_size, size, stcf); } - else if (size < sizeof (struct stat_file)) { - msg_err_pool ("requested to shrink statfile to %Hz but it is too small", - size); + else if (size < sizeof(struct stat_file)) { + msg_err_pool("requested to shrink statfile to %Hz but it is too small", + size); } - new_file = g_malloc0 (sizeof (rspamd_mmaped_file_t)); - if ((new_file->fd = open (filename, O_RDWR)) == -1) { - msg_info_pool ("cannot open file %s, error %d, %s", - filename, - errno, - strerror (errno)); - g_free (new_file); + new_file = g_malloc0(sizeof(rspamd_mmaped_file_t)); + if ((new_file->fd = open(filename, O_RDWR)) == -1) { + msg_info_pool("cannot open file %s, error %d, %s", + filename, + errno, + strerror(errno)); + g_free(new_file); return NULL; } if ((new_file->map = - mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, - new_file->fd, 0)) == MAP_FAILED) { - close (new_file->fd); - msg_info_pool ("cannot mmap file %s, error %d, %s", - filename, - errno, - strerror (errno)); - g_free (new_file); + mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, + new_file->fd, 0)) == MAP_FAILED) { + close(new_file->fd); + msg_info_pool("cannot mmap file %s, error %d, %s", + filename, + errno, + strerror(errno)); + g_free(new_file); return NULL; - } - rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename)); + rspamd_strlcpy(new_file->filename, filename, sizeof(new_file->filename)); new_file->len = st.st_size; /* Try to lock pages in RAM */ /* Acquire lock for this operation */ - if (!rspamd_file_lock (new_file->fd, FALSE)) { - close (new_file->fd); - munmap (new_file->map, st.st_size); - msg_info_pool ("cannot lock file %s, error %d, %s", - filename, - errno, - strerror (errno)); - g_free (new_file); + if (!rspamd_file_lock(new_file->fd, FALSE)) { + close(new_file->fd); + munmap(new_file->map, st.st_size); + msg_info_pool("cannot lock file %s, error %d, %s", + filename, + errno, + strerror(errno)); + g_free(new_file); return NULL; } - if (rspamd_mmaped_file_check (pool, new_file) == -1) { - close (new_file->fd); - rspamd_file_unlock (new_file->fd, FALSE); - munmap (new_file->map, st.st_size); - g_free (new_file); + if (rspamd_mmaped_file_check(pool, new_file) == -1) { + close(new_file->fd); + rspamd_file_unlock(new_file->fd, FALSE); + munmap(new_file->map, st.st_size); + g_free(new_file); return NULL; } - rspamd_file_unlock (new_file->fd, FALSE); + rspamd_file_unlock(new_file->fd, FALSE); new_file->cf = stcf; new_file->pool = pool; - rspamd_mmaped_file_preload (new_file); + rspamd_mmaped_file_preload(new_file); - g_assert (stcf->clcf != NULL); + g_assert(stcf->clcf != NULL); - msg_debug_pool ("opened statfile %s of size %l", filename, (long)size); + msg_debug_pool("opened statfile %s of size %l", filename, (long) size); return new_file; } -gint -rspamd_mmaped_file_close_file (rspamd_mempool_t *pool, - rspamd_mmaped_file_t * file) +gint rspamd_mmaped_file_close_file(rspamd_mempool_t *pool, + rspamd_mmaped_file_t *file) { if (file->map) { - msg_info_pool ("syncing statfile %s", file->filename); - msync (file->map, file->len, MS_ASYNC); - munmap (file->map, file->len); + msg_info_pool("syncing statfile %s", file->filename); + msync(file->map, file->len, MS_ASYNC); + munmap(file->map, file->len); } if (file->fd != -1) { - close (file->fd); + close(file->fd); } - g_free (file); + g_free(file); return 0; } -gint -rspamd_mmaped_file_create (const gchar *filename, - size_t size, - struct rspamd_statfile_config *stcf, - rspamd_mempool_t *pool) +gint rspamd_mmaped_file_create(const gchar *filename, + size_t size, + struct rspamd_statfile_config *stcf, + rspamd_mempool_t *pool) { struct stat_file_header header = { .magic = {'r', 's', 'd'}, @@ -687,12 +677,11 @@ rspamd_mmaped_file_create (const gchar *filename, .padding = {0, 0, 0}, .revision = 0, .rev_time = 0, - .used_blocks = 0 - }; + .used_blocks = 0}; struct stat_file_section section = { .code = STATFILE_SECTION_COMMON, }; - struct stat_file_block block = { 0, 0, 0 }; + struct stat_file_block block = {0, 0, 0}; struct rspamd_stat_tokenizer *tokenizer; gint fd, lock_fd; guint buflen = 0, nblocks; @@ -701,31 +690,30 @@ rspamd_mmaped_file_create (const gchar *filename, gpointer tok_conf; gsize tok_conf_len; struct timespec sleep_ts = { - .tv_sec = 0, - .tv_nsec = 1000000 - }; + .tv_sec = 0, + .tv_nsec = 1000000}; if (size < - sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + - sizeof (block)) { - msg_err_pool ("file %s is too small to carry any statistic: %z", - filename, - size); + sizeof(struct stat_file_header) + sizeof(struct stat_file_section) + + sizeof(block)) { + msg_err_pool("file %s is too small to carry any statistic: %z", + filename, + size); return -1; } - lock = g_strconcat (filename, ".lock", NULL); - lock_fd = open (lock, O_WRONLY|O_CREAT|O_EXCL, 00600); + lock = g_strconcat(filename, ".lock", NULL); + lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600); while (lock_fd == -1) { /* Wait for lock */ - lock_fd = open (lock, O_WRONLY|O_CREAT|O_EXCL, 00600); + lock_fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600); if (lock_fd != -1) { - if (stat (filename, &sb) != -1) { + if (stat(filename, &sb) != -1) { /* File has been created by some other process */ - unlink (lock); - close (lock_fd); - g_free (lock); + unlink(lock); + close(lock_fd); + g_free(lock); return 0; } @@ -734,110 +722,111 @@ rspamd_mmaped_file_create (const gchar *filename, goto create; } else { - nanosleep (&sleep_ts, NULL); + nanosleep(&sleep_ts, NULL); } } create: - msg_debug_pool ("create statfile %s of size %l", filename, (long)size); + msg_debug_pool("create statfile %s of size %l", filename, (long) size); nblocks = - (size - sizeof (struct stat_file_header) - - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); + (size - sizeof(struct stat_file_header) - + sizeof(struct stat_file_section)) / + sizeof(struct stat_file_block); header.total_blocks = nblocks; if ((fd = - open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info_pool ("cannot create file %s, error %d, %s", - filename, - errno, - strerror (errno)); - unlink (lock); - close (lock_fd); - g_free (lock); + open(filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info_pool("cannot create file %s, error %d, %s", + filename, + errno, + strerror(errno)); + unlink(lock); + close(lock_fd); + g_free(lock); return -1; } - rspamd_fallocate (fd, - 0, - sizeof (header) + sizeof (section) + sizeof (block) * nblocks); + rspamd_fallocate(fd, + 0, + sizeof(header) + sizeof(section) + sizeof(block) * nblocks); - header.create_time = (guint64) time (NULL); - g_assert (stcf->clcf != NULL); - g_assert (stcf->clcf->tokenizer != NULL); - tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name); - g_assert (tokenizer != NULL); - tok_conf = tokenizer->get_config (pool, stcf->clcf->tokenizer, &tok_conf_len); + header.create_time = (guint64) time(NULL); + g_assert(stcf->clcf != NULL); + g_assert(stcf->clcf->tokenizer != NULL); + tokenizer = rspamd_stat_get_tokenizer(stcf->clcf->tokenizer->name); + g_assert(tokenizer != NULL); + tok_conf = tokenizer->get_config(pool, stcf->clcf->tokenizer, &tok_conf_len); header.tokenizer_conf_len = tok_conf_len; - g_assert (tok_conf_len < sizeof (header.unused) - sizeof (guint64)); - memcpy (header.unused, tok_conf, tok_conf_len); - - if (write (fd, &header, sizeof (header)) == -1) { - msg_info_pool ("cannot write header to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - unlink (lock); - close (lock_fd); - g_free (lock); + g_assert(tok_conf_len < sizeof(header.unused) - sizeof(guint64)); + memcpy(header.unused, tok_conf, tok_conf_len); + + if (write(fd, &header, sizeof(header)) == -1) { + msg_info_pool("cannot write header to file %s, error %d, %s", + filename, + errno, + strerror(errno)); + close(fd); + unlink(lock); + close(lock_fd); + g_free(lock); return -1; } section.length = (guint64) nblocks; - if (write (fd, §ion, sizeof (section)) == -1) { - msg_info_pool ("cannot write section header to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - unlink (lock); - close (lock_fd); - g_free (lock); + if (write(fd, §ion, sizeof(section)) == -1) { + msg_info_pool("cannot write section header to file %s, error %d, %s", + filename, + errno, + strerror(errno)); + close(fd); + unlink(lock); + close(lock_fd); + g_free(lock); return -1; } /* Buffer for write 256 blocks at once */ if (nblocks > 256) { - buflen = sizeof (block) * 256; - buf = g_malloc0 (buflen); + buflen = sizeof(block) * 256; + buf = g_malloc0(buflen); } while (nblocks) { if (nblocks > 256) { /* Just write buffer */ - if (write (fd, buf, buflen) == -1) { - msg_info_pool ("cannot write blocks buffer to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); - g_free (buf); - unlink (lock); - close (lock_fd); - g_free (lock); + if (write(fd, buf, buflen) == -1) { + msg_info_pool("cannot write blocks buffer to file %s, error %d, %s", + filename, + errno, + strerror(errno)); + close(fd); + g_free(buf); + unlink(lock); + close(lock_fd); + g_free(lock); return -1; } nblocks -= 256; } else { - if (write (fd, &block, sizeof (block)) == -1) { - msg_info_pool ("cannot write block to file %s, error %d, %s", - filename, - errno, - strerror (errno)); - close (fd); + if (write(fd, &block, sizeof(block)) == -1) { + msg_info_pool("cannot write block to file %s, error %d, %s", + filename, + errno, + strerror(errno)); + close(fd); if (buf) { - g_free (buf); + g_free(buf); } - unlink (lock); - close (lock_fd); - g_free (lock); + unlink(lock); + close(lock_fd); + g_free(lock); return -1; } @@ -845,23 +834,23 @@ create: } } - close (fd); + close(fd); if (buf) { - g_free (buf); + g_free(buf); } - unlink (lock); - close (lock_fd); - g_free (lock); - msg_debug_pool ("created statfile %s of size %l", filename, (long)size); + unlink(lock); + close(lock_fd); + g_free(lock); + msg_debug_pool("created statfile %s of size %l", filename, (long) size); return 0; } gpointer -rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, struct rspamd_statfile *st) +rspamd_mmaped_file_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, struct rspamd_statfile *st) { struct rspamd_statfile_config *stf = st->stcf; rspamd_mmaped_file_t *mf; @@ -869,105 +858,104 @@ rspamd_mmaped_file_init (struct rspamd_stat_ctx *ctx, const gchar *filename; gsize size; - filenameo = ucl_object_lookup (stf->opts, "filename"); + filenameo = ucl_object_lookup(stf->opts, "filename"); - if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { - filenameo = ucl_object_lookup (stf->opts, "path"); + if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) { + filenameo = ucl_object_lookup(stf->opts, "path"); - if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { - msg_err_config ("statfile %s has no filename defined", stf->symbol); + if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) { + msg_err_config("statfile %s has no filename defined", stf->symbol); return NULL; } } - filename = ucl_object_tostring (filenameo); + filename = ucl_object_tostring(filenameo); - sizeo = ucl_object_lookup (stf->opts, "size"); + sizeo = ucl_object_lookup(stf->opts, "size"); - if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) { - msg_err_config ("statfile %s has no size defined", stf->symbol); + if (sizeo == NULL || ucl_object_type(sizeo) != UCL_INT) { + msg_err_config("statfile %s has no size defined", stf->symbol); return NULL; } - size = ucl_object_toint (sizeo); - mf = rspamd_mmaped_file_open (cfg->cfg_pool, filename, size, stf); + size = ucl_object_toint(sizeo); + mf = rspamd_mmaped_file_open(cfg->cfg_pool, filename, size, stf); if (mf != NULL) { mf->pool = cfg->cfg_pool; - } else { + } + else { /* Create file here */ - filenameo = ucl_object_find_key (stf->opts, "filename"); - if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { - filenameo = ucl_object_find_key (stf->opts, "path"); - if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { - msg_err_config ("statfile %s has no filename defined", stf->symbol); + filenameo = ucl_object_find_key(stf->opts, "filename"); + if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) { + filenameo = ucl_object_find_key(stf->opts, "path"); + if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) { + msg_err_config("statfile %s has no filename defined", stf->symbol); return NULL; } } - filename = ucl_object_tostring (filenameo); + filename = ucl_object_tostring(filenameo); - sizeo = ucl_object_find_key (stf->opts, "size"); - if (sizeo == NULL || ucl_object_type (sizeo) != UCL_INT) { - msg_err_config ("statfile %s has no size defined", stf->symbol); + sizeo = ucl_object_find_key(stf->opts, "size"); + if (sizeo == NULL || ucl_object_type(sizeo) != UCL_INT) { + msg_err_config("statfile %s has no size defined", stf->symbol); return NULL; } - size = ucl_object_toint (sizeo); + size = ucl_object_toint(sizeo); - if (rspamd_mmaped_file_create (filename, size, stf, cfg->cfg_pool) != 0) { - msg_err_config ("cannot create new file"); + if (rspamd_mmaped_file_create(filename, size, stf, cfg->cfg_pool) != 0) { + msg_err_config("cannot create new file"); } - mf = rspamd_mmaped_file_open (cfg->cfg_pool, filename, size, stf); + mf = rspamd_mmaped_file_open(cfg->cfg_pool, filename, size, stf); } - return (gpointer)mf; + return (gpointer) mf; } -void -rspamd_mmaped_file_close (gpointer p) +void rspamd_mmaped_file_close(gpointer p) { rspamd_mmaped_file_t *mf = p; if (mf) { - rspamd_mmaped_file_close_file (mf->pool, mf); + rspamd_mmaped_file_close_file(mf->pool, mf); } - } gpointer -rspamd_mmaped_file_runtime (struct rspamd_task *task, - struct rspamd_statfile_config *stcf, - gboolean learn, - gpointer p, - gint _id) +rspamd_mmaped_file_runtime(struct rspamd_task *task, + struct rspamd_statfile_config *stcf, + gboolean learn, + gpointer p, + gint _id) { rspamd_mmaped_file_t *mf = p; - return (gpointer)mf; + return (gpointer) mf; } gboolean -rspamd_mmaped_file_process_tokens (struct rspamd_task *task, GPtrArray *tokens, - gint id, - gpointer p) +rspamd_mmaped_file_process_tokens(struct rspamd_task *task, GPtrArray *tokens, + gint id, + gpointer p) { rspamd_mmaped_file_t *mf = p; guint32 h1, h2; rspamd_token_t *tok; guint i; - g_assert (tokens != NULL); - g_assert (p != NULL); + g_assert(tokens != NULL); + g_assert(p != NULL); for (i = 0; i < tokens->len; i++) { - tok = g_ptr_array_index (tokens, i); - memcpy (&h1, (guchar *)&tok->data, sizeof (h1)); - memcpy (&h2, ((guchar *)&tok->data) + sizeof (h1), sizeof (h2)); - tok->values[id] = rspamd_mmaped_file_get_block (mf, h1, h2); + tok = g_ptr_array_index(tokens, i); + memcpy(&h1, (guchar *) &tok->data, sizeof(h1)); + memcpy(&h2, ((guchar *) &tok->data) + sizeof(h1), sizeof(h2)); + tok->values[id] = rspamd_mmaped_file_get_block(mf, h1, h2); } if (mf->cf->is_spam) { @@ -981,71 +969,71 @@ rspamd_mmaped_file_process_tokens (struct rspamd_task *task, GPtrArray *tokens, } gboolean -rspamd_mmaped_file_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, - gint id, - gpointer p) +rspamd_mmaped_file_learn_tokens(struct rspamd_task *task, GPtrArray *tokens, + gint id, + gpointer p) { rspamd_mmaped_file_t *mf = p; guint32 h1, h2; rspamd_token_t *tok; guint i; - g_assert (tokens != NULL); - g_assert (p != NULL); + g_assert(tokens != NULL); + g_assert(p != NULL); for (i = 0; i < tokens->len; i++) { - tok = g_ptr_array_index (tokens, i); - memcpy (&h1, (guchar *)&tok->data, sizeof (h1)); - memcpy (&h2, ((guchar *)&tok->data) + sizeof (h1), sizeof (h2)); - rspamd_mmaped_file_set_block (task->task_pool, mf, h1, h2, - tok->values[id]); + tok = g_ptr_array_index(tokens, i); + memcpy(&h1, (guchar *) &tok->data, sizeof(h1)); + memcpy(&h2, ((guchar *) &tok->data) + sizeof(h1), sizeof(h2)); + rspamd_mmaped_file_set_block(task->task_pool, mf, h1, h2, + tok->values[id]); } return TRUE; } gulong -rspamd_mmaped_file_total_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_mmaped_file_total_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *)runtime; + rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime; guint64 rev = 0; time_t t; if (mf != NULL) { - rspamd_mmaped_file_get_revision (mf, &rev, &t); + rspamd_mmaped_file_get_revision(mf, &rev, &t); } return rev; } gulong -rspamd_mmaped_file_inc_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_mmaped_file_inc_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *)runtime; + rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime; guint64 rev = 0; time_t t; if (mf != NULL) { - rspamd_mmaped_file_inc_revision (mf); - rspamd_mmaped_file_get_revision (mf, &rev, &t); + rspamd_mmaped_file_inc_revision(mf); + rspamd_mmaped_file_get_revision(mf, &rev, &t); } return rev; } gulong -rspamd_mmaped_file_dec_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_mmaped_file_dec_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *)runtime; + rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime; guint64 rev = 0; time_t t; if (mf != NULL) { - rspamd_mmaped_file_dec_revision (mf); - rspamd_mmaped_file_get_revision (mf, &rev, &t); + rspamd_mmaped_file_dec_revision(mf); + rspamd_mmaped_file_get_revision(mf, &rev, &t); } return rev; @@ -1053,36 +1041,34 @@ rspamd_mmaped_file_dec_learns (struct rspamd_task *task, gpointer runtime, ucl_object_t * -rspamd_mmaped_file_get_stat (gpointer runtime, - gpointer ctx) +rspamd_mmaped_file_get_stat(gpointer runtime, + gpointer ctx) { ucl_object_t *res = NULL; guint64 rev; - rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *)runtime; + rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime; if (mf != NULL) { - res = ucl_object_typed_new (UCL_OBJECT); - rspamd_mmaped_file_get_revision (mf, &rev, NULL); - ucl_object_insert_key (res, ucl_object_fromint (rev), "revision", - 0, false); - ucl_object_insert_key (res, ucl_object_fromint (mf->len), "size", - 0, false); - ucl_object_insert_key (res, ucl_object_fromint ( - rspamd_mmaped_file_get_total (mf)), "total", 0, false); - ucl_object_insert_key (res, ucl_object_fromint ( - rspamd_mmaped_file_get_used (mf)), "used", 0, false); - ucl_object_insert_key (res, ucl_object_fromstring (mf->cf->symbol), - "symbol", 0, false); - ucl_object_insert_key (res, ucl_object_fromstring ("mmap"), - "type", 0, false); - ucl_object_insert_key (res, ucl_object_fromint (0), - "languages", 0, false); - ucl_object_insert_key (res, ucl_object_fromint (0), - "users", 0, false); + res = ucl_object_typed_new(UCL_OBJECT); + rspamd_mmaped_file_get_revision(mf, &rev, NULL); + ucl_object_insert_key(res, ucl_object_fromint(rev), "revision", + 0, false); + ucl_object_insert_key(res, ucl_object_fromint(mf->len), "size", + 0, false); + ucl_object_insert_key(res, ucl_object_fromint(rspamd_mmaped_file_get_total(mf)), "total", 0, false); + ucl_object_insert_key(res, ucl_object_fromint(rspamd_mmaped_file_get_used(mf)), "used", 0, false); + ucl_object_insert_key(res, ucl_object_fromstring(mf->cf->symbol), + "symbol", 0, false); + ucl_object_insert_key(res, ucl_object_fromstring("mmap"), + "type", 0, false); + ucl_object_insert_key(res, ucl_object_fromint(0), + "languages", 0, false); + ucl_object_insert_key(res, ucl_object_fromint(0), + "users", 0, false); if (mf->cf->label) { - ucl_object_insert_key (res, ucl_object_fromstring (mf->cf->label), - "label", 0, false); + ucl_object_insert_key(res, ucl_object_fromstring(mf->cf->label), + "label", 0, false); } } @@ -1090,33 +1076,33 @@ rspamd_mmaped_file_get_stat (gpointer runtime, } gboolean -rspamd_mmaped_file_finalize_learn (struct rspamd_task *task, gpointer runtime, - gpointer ctx, GError **err) +rspamd_mmaped_file_finalize_learn(struct rspamd_task *task, gpointer runtime, + gpointer ctx, GError **err) { - rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *)runtime; + rspamd_mmaped_file_t *mf = (rspamd_mmaped_file_t *) runtime; if (mf != NULL) { - msync (mf->map, mf->len, MS_INVALIDATE | MS_ASYNC); + msync(mf->map, mf->len, MS_INVALIDATE | MS_ASYNC); } return TRUE; } gboolean -rspamd_mmaped_file_finalize_process (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_mmaped_file_finalize_process(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { return TRUE; } gpointer -rspamd_mmaped_file_load_tokenizer_config (gpointer runtime, - gsize *len) +rspamd_mmaped_file_load_tokenizer_config(gpointer runtime, + gsize *len) { rspamd_mmaped_file_t *mf = runtime; struct stat_file_header *header; - g_assert (mf != NULL); + g_assert(mf != NULL); header = mf->map; if (len) { diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index 883d3d00c..2e4711ae9 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -23,15 +23,15 @@ #include "adapters/libev.h" #include "ref.h" -#define msg_debug_stat_redis(...) rspamd_conditional_debug_fast (NULL, NULL, \ - rspamd_stat_redis_log_id, "stat_redis", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_debug_stat_redis(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_stat_redis_log_id, "stat_redis", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(stat_redis) -#define REDIS_CTX(p) (struct redis_stat_ctx *)(p) -#define REDIS_RUNTIME(p) (struct redis_stat_runtime *)(p) +#define REDIS_CTX(p) (struct redis_stat_ctx *) (p) +#define REDIS_RUNTIME(p) (struct redis_stat_runtime *) (p) #define REDIS_BACKEND_TYPE "redis" #define REDIS_DEFAULT_PORT 6379 #define REDIS_DEFAULT_OBJECT "%s%l" @@ -106,23 +106,23 @@ struct rspamd_redis_stat_cbdata { static const gchar *M = "redis statistics"; static GQuark -rspamd_redis_stat_quark (void) +rspamd_redis_stat_quark(void) { - return g_quark_from_static_string (M); + return g_quark_from_static_string(M); } static inline struct upstream_list * -rspamd_redis_get_servers (struct redis_stat_ctx *ctx, - const gchar *what) +rspamd_redis_get_servers(struct redis_stat_ctx *ctx, + const gchar *what) { lua_State *L = ctx->L; struct upstream_list *res; - lua_rawgeti (L, LUA_REGISTRYINDEX, ctx->conf_ref); - lua_pushstring (L, what); - lua_gettable (L, -2); - res = *((struct upstream_list**)lua_touserdata (L, -1)); - lua_settop (L, 0); + lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->conf_ref); + lua_pushstring(L, what); + lua_gettable(L, -2); + res = *((struct upstream_list **) lua_touserdata(L, -1)); + lua_settop(L, 0); return res; } @@ -130,16 +130,15 @@ rspamd_redis_get_servers (struct redis_stat_ctx *ctx, /* * Non-static for lua unit testing */ -gsize -rspamd_redis_expand_object (const gchar *pattern, - struct redis_stat_ctx *ctx, - struct rspamd_task *task, - gchar **target) +gsize rspamd_redis_expand_object(const gchar *pattern, + struct redis_stat_ctx *ctx, + struct rspamd_task *task, + gchar **target) { gsize tlen = 0; const gchar *p = pattern, *elt; gchar *d, *end; - enum { + enum { just_char, percent_char, mod_char @@ -150,42 +149,42 @@ rspamd_redis_expand_object (const gchar *pattern, const gchar *rcpt = NULL; gint err_idx; - g_assert (ctx != NULL); - g_assert (task != NULL); + g_assert(ctx != NULL); + g_assert(task != NULL); stcf = ctx->stcf; L = task->cfg->lua_state; - g_assert (L != NULL); + g_assert(L != NULL); if (ctx->enable_users) { if (ctx->cbref_user == -1) { - rcpt = rspamd_task_get_principal_recipient (task); + rcpt = rspamd_task_get_principal_recipient(task); } else { /* Execute lua function to get userdata */ - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); - lua_rawgeti (L, LUA_REGISTRYINDEX, ctx->cbref_user); - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->cbref_user); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); - if (lua_pcall (L, 1, 1, err_idx) != 0) { - msg_err_task ("call to user extraction script failed: %s", - lua_tostring (L, -1)); + if (lua_pcall(L, 1, 1, err_idx) != 0) { + msg_err_task("call to user extraction script failed: %s", + lua_tostring(L, -1)); } else { - rcpt = rspamd_mempool_strdup (task->task_pool, lua_tostring (L, -1)); + rcpt = rspamd_mempool_strdup(task->task_pool, lua_tostring(L, -1)); } /* Result + error function */ - lua_settop (L, err_idx - 1); + lua_settop(L, err_idx - 1); } if (rcpt) { - rspamd_mempool_set_variable (task->task_pool, "stat_user", - (gpointer)rcpt, NULL); + rspamd_mempool_set_variable(task->task_pool, "stat_user", + (gpointer) rcpt, NULL); } } @@ -197,67 +196,67 @@ rspamd_redis_expand_object (const gchar *pattern, state = percent_char; } else { - tlen ++; + tlen++; } - p ++; + p++; break; case percent_char: switch (*p) { case '%': - tlen ++; + tlen++; state = just_char; break; case 'u': - elt = GET_TASK_ELT (task, auth_user); + elt = GET_TASK_ELT(task, auth_user); if (elt) { - tlen += strlen (elt); + tlen += strlen(elt); } break; case 'r': if (rcpt == NULL) { - elt = rspamd_task_get_principal_recipient (task); + elt = rspamd_task_get_principal_recipient(task); } else { elt = rcpt; } if (elt) { - tlen += strlen (elt); + tlen += strlen(elt); } break; case 'l': if (stcf->label) { - tlen += strlen (stcf->label); + tlen += strlen(stcf->label); } /* Label miss is OK */ break; case 's': if (ctx->new_schema) { - tlen += sizeof ("RS") - 1; + tlen += sizeof("RS") - 1; } else { if (stcf->symbol) { - tlen += strlen (stcf->symbol); + tlen += strlen(stcf->symbol); } } break; default: state = just_char; - tlen ++; + tlen++; break; } if (state == percent_char) { state = mod_char; } - p ++; + p++; break; case mod_char: switch (*p) { case 'd': - p ++; + p++; state = just_char; break; default: @@ -273,7 +272,7 @@ rspamd_redis_expand_object (const gchar *pattern, return -1; } - *target = rspamd_mempool_alloc (task->task_pool, tlen + 1); + *target = rspamd_mempool_alloc(task->task_pool, tlen + 1); d = *target; end = d + tlen + 1; d[tlen] = '\0'; @@ -290,7 +289,7 @@ rspamd_redis_expand_object (const gchar *pattern, else { *d++ = *p; } - p ++; + p++; break; case percent_char: switch (*p) { @@ -299,35 +298,35 @@ rspamd_redis_expand_object (const gchar *pattern, state = just_char; break; case 'u': - elt = GET_TASK_ELT (task, auth_user); + elt = GET_TASK_ELT(task, auth_user); if (elt) { - d += rspamd_strlcpy (d, elt, end - d); + d += rspamd_strlcpy(d, elt, end - d); } break; case 'r': if (rcpt == NULL) { - elt = rspamd_task_get_principal_recipient (task); + elt = rspamd_task_get_principal_recipient(task); } else { elt = rcpt; } if (elt) { - d += rspamd_strlcpy (d, elt, end - d); + d += rspamd_strlcpy(d, elt, end - d); } break; case 'l': if (stcf->label) { - d += rspamd_strlcpy (d, stcf->label, end - d); + d += rspamd_strlcpy(d, stcf->label, end - d); } break; case 's': if (ctx->new_schema) { - d += rspamd_strlcpy (d, "RS", end - d); + d += rspamd_strlcpy(d, "RS", end - d); } else { if (stcf->symbol) { - d += rspamd_strlcpy (d, stcf->symbol, end - d); + d += rspamd_strlcpy(d, stcf->symbol, end - d); } } break; @@ -340,14 +339,14 @@ rspamd_redis_expand_object (const gchar *pattern, if (state == percent_char) { state = mod_char; } - p ++; + p++; break; case mod_char: switch (*p) { case 'd': /* TODO: not supported yet */ - p ++; + p++; state = just_char; break; default: @@ -362,13 +361,13 @@ rspamd_redis_expand_object (const gchar *pattern, } static void -rspamd_redis_maybe_auth (struct redis_stat_ctx *ctx, redisAsyncContext *redis) +rspamd_redis_maybe_auth(struct redis_stat_ctx *ctx, redisAsyncContext *redis) { if (ctx->password) { - redisAsyncCommand (redis, NULL, NULL, "AUTH %s", ctx->password); + redisAsyncCommand(redis, NULL, NULL, "AUTH %s", ctx->password); } if (ctx->dbname) { - redisAsyncCommand (redis, NULL, NULL, "SELECT %s", ctx->dbname); + redisAsyncCommand(redis, NULL, NULL, "SELECT %s", ctx->dbname); } } @@ -379,14 +378,14 @@ rspamd_redis_maybe_auth (struct redis_stat_ctx *ctx, redisAsyncContext *redis) #pragma GCC diagnostic ignored "-Wformat-extra-args" #endif static rspamd_fstring_t * -rspamd_redis_tokens_to_query (struct rspamd_task *task, - struct redis_stat_runtime *rt, - GPtrArray *tokens, - const gchar *command, - const gchar *prefix, - gboolean learn, - gint idx, - gboolean intvals) +rspamd_redis_tokens_to_query(struct rspamd_task *task, + struct redis_stat_runtime *rt, + GPtrArray *tokens, + const gchar *command, + const gchar *prefix, + gboolean learn, + gint idx, + gboolean intvals) { rspamd_fstring_t *out; rspamd_token_t *tok; @@ -394,21 +393,21 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, guint i, l0, l1, cmd_len, prefix_len; gint ret; - g_assert (tokens != NULL); + g_assert(tokens != NULL); - cmd_len = strlen (command); - prefix_len = strlen (prefix); - out = rspamd_fstring_sized_new (1024); + cmd_len = strlen(command); + prefix_len = strlen(prefix); + out = rspamd_fstring_sized_new(1024); if (learn) { - rspamd_printf_fstring (&out, "*1\r\n$5\r\nMULTI\r\n"); + rspamd_printf_fstring(&out, "*1\r\n$5\r\nMULTI\r\n"); - ret = redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + ret = redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); if (ret != REDIS_OK) { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); - rspamd_fstring_free (out); + msg_err_task("call to redis failed: %s", rt->redis->errstr); + rspamd_fstring_free(out); return NULL; } @@ -418,14 +417,14 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, else { if (rt->ctx->new_schema) { /* Multi + HGET */ - rspamd_printf_fstring (&out, "*1\r\n$5\r\nMULTI\r\n"); + rspamd_printf_fstring(&out, "*1\r\n$5\r\nMULTI\r\n"); - ret = redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + ret = redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); if (ret != REDIS_OK) { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); - rspamd_fstring_free (out); + msg_err_task("call to redis failed: %s", rt->redis->errstr); + rspamd_fstring_free(out); return NULL; } @@ -433,81 +432,82 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, out->len = 0; } else { - rspamd_printf_fstring (&out, "" - "*%d\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n", - (tokens->len + 2), - cmd_len, command, - prefix_len, prefix); + rspamd_printf_fstring(&out, "" + "*%d\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n", + (tokens->len + 2), + cmd_len, command, + prefix_len, prefix); } } - for (i = 0; i < tokens->len; i ++) { - tok = g_ptr_array_index (tokens, i); + for (i = 0; i < tokens->len; i++) { + tok = g_ptr_array_index(tokens, i); if (learn) { if (intvals) { - l1 = rspamd_snprintf (n1, sizeof (n1), "%L", - (gint64) tok->values[idx]); - } else { - l1 = rspamd_snprintf (n1, sizeof (n1), "%f", - tok->values[idx]); + l1 = rspamd_snprintf(n1, sizeof(n1), "%L", + (gint64) tok->values[idx]); + } + else { + l1 = rspamd_snprintf(n1, sizeof(n1), "%f", + tok->values[idx]); } if (rt->ctx->new_schema) { /* * HINCRBY <prefix_token> <0|1> <value> */ - l0 = rspamd_snprintf (n0, sizeof (n0), "%*s_%uL", - prefix_len, prefix, - tok->data); - - rspamd_printf_fstring (&out, "" - "*4\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n", - cmd_len, command, - l0, n0, - 1, rt->stcf->is_spam ? "S" : "H", - l1, n1); + l0 = rspamd_snprintf(n0, sizeof(n0), "%*s_%uL", + prefix_len, prefix, + tok->data); + + rspamd_printf_fstring(&out, "" + "*4\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n", + cmd_len, command, + l0, n0, + 1, rt->stcf->is_spam ? "S" : "H", + l1, n1); } else { - l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", tok->data); + l0 = rspamd_snprintf(n0, sizeof(n0), "%uL", tok->data); /* * HINCRBY <prefix> <token> <value> */ - rspamd_printf_fstring (&out, "" - "*4\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n", - cmd_len, command, - prefix_len, prefix, - l0, n0, - l1, n1); + rspamd_printf_fstring(&out, "" + "*4\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n", + cmd_len, command, + prefix_len, prefix, + l0, n0, + l1, n1); } - ret = redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + ret = redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); if (ret != REDIS_OK) { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); - rspamd_fstring_free (out); + msg_err_task("call to redis failed: %s", rt->redis->errstr); + rspamd_fstring_free(out); return NULL; } @@ -521,19 +521,20 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, * ZINCRBY prefix_z 1.0 <token_id> */ if (tok->t1 && tok->t2) { - redisAsyncCommand (rt->redis, NULL, NULL, - "HSET %b_tokens %b %b:%b", - prefix, (size_t) prefix_len, - n0, (size_t) l0, - tok->t1->stemmed.begin, tok->t1->stemmed.len, - tok->t2->stemmed.begin, tok->t2->stemmed.len); - } else if (tok->t1) { - redisAsyncCommand (rt->redis, NULL, NULL, - "HSET %b_tokens %b %b", - prefix, (size_t) prefix_len, - n0, (size_t) l0, - tok->t1->stemmed.begin, - tok->t1->stemmed.len); + redisAsyncCommand(rt->redis, NULL, NULL, + "HSET %b_tokens %b %b:%b", + prefix, (size_t) prefix_len, + n0, (size_t) l0, + tok->t1->stemmed.begin, tok->t1->stemmed.len, + tok->t2->stemmed.begin, tok->t2->stemmed.len); + } + else if (tok->t1) { + redisAsyncCommand(rt->redis, NULL, NULL, + "HSET %b_tokens %b %b", + prefix, (size_t) prefix_len, + n0, (size_t) l0, + tok->t1->stemmed.begin, + tok->t1->stemmed.len); } } else { @@ -543,73 +544,74 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, * ZINCRBY prefix_z 1.0 <token_id> */ if (tok->t1 && tok->t2) { - redisAsyncCommand (rt->redis, NULL, NULL, - "HSET %b %s %b:%b", - n0, (size_t) l0, - "tokens", - tok->t1->stemmed.begin, tok->t1->stemmed.len, - tok->t2->stemmed.begin, tok->t2->stemmed.len); - } else if (tok->t1) { - redisAsyncCommand (rt->redis, NULL, NULL, - "HSET %b %s %b", - n0, (size_t) l0, - "tokens", - tok->t1->stemmed.begin, tok->t1->stemmed.len); + redisAsyncCommand(rt->redis, NULL, NULL, + "HSET %b %s %b:%b", + n0, (size_t) l0, + "tokens", + tok->t1->stemmed.begin, tok->t1->stemmed.len, + tok->t2->stemmed.begin, tok->t2->stemmed.len); + } + else if (tok->t1) { + redisAsyncCommand(rt->redis, NULL, NULL, + "HSET %b %s %b", + n0, (size_t) l0, + "tokens", + tok->t1->stemmed.begin, tok->t1->stemmed.len); } } - redisAsyncCommand (rt->redis, NULL, NULL, - "ZINCRBY %b_z %b %b", - prefix, (size_t)prefix_len, - n1, (size_t)l1, - n0, (size_t)l0); + redisAsyncCommand(rt->redis, NULL, NULL, + "ZINCRBY %b_z %b %b", + prefix, (size_t) prefix_len, + n1, (size_t) l1, + n0, (size_t) l0); } if (rt->ctx->new_schema && rt->ctx->expiry > 0) { out->len = 0; - l1 = rspamd_snprintf (n1, sizeof (n1), "%d", - rt->ctx->expiry); - - rspamd_printf_fstring (&out, "" - "*3\r\n" - "$6\r\n" - "EXPIRE\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n", - l0, n0, - l1, n1); - redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + l1 = rspamd_snprintf(n1, sizeof(n1), "%d", + rt->ctx->expiry); + + rspamd_printf_fstring(&out, "" + "*3\r\n" + "$6\r\n" + "EXPIRE\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n", + l0, n0, + l1, n1); + redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); } out->len = 0; } else { if (rt->ctx->new_schema) { - l0 = rspamd_snprintf (n0, sizeof (n0), "%*s_%uL", - prefix_len, prefix, - tok->data); - - rspamd_printf_fstring (&out, "" - "*3\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n", - cmd_len, command, - l0, n0, - 1, rt->stcf->is_spam ? "S" : "H"); - - ret = redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + l0 = rspamd_snprintf(n0, sizeof(n0), "%*s_%uL", + prefix_len, prefix, + tok->data); + + rspamd_printf_fstring(&out, "" + "*3\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n", + cmd_len, command, + l0, n0, + 1, rt->stcf->is_spam ? "S" : "H"); + + ret = redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); if (ret != REDIS_OK) { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); - rspamd_fstring_free (out); + msg_err_task("call to redis failed: %s", rt->redis->errstr); + rspamd_fstring_free(out); return NULL; } @@ -617,16 +619,17 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, out->len = 0; } else { - l0 = rspamd_snprintf (n0, sizeof (n0), "%uL", tok->data); - rspamd_printf_fstring (&out, "" - "$%d\r\n" - "%s\r\n", l0, n0); + l0 = rspamd_snprintf(n0, sizeof(n0), "%uL", tok->data); + rspamd_printf_fstring(&out, "" + "$%d\r\n" + "%s\r\n", + l0, n0); } } } if (!learn && rt->ctx->new_schema) { - rspamd_printf_fstring (&out, "*1\r\n$4\r\nEXEC\r\n"); + rspamd_printf_fstring(&out, "*1\r\n$4\r\nEXEC\r\n"); } return out; @@ -636,84 +639,86 @@ rspamd_redis_tokens_to_query (struct rspamd_task *task, #endif static void -rspamd_redis_store_stat_signature (struct rspamd_task *task, - struct redis_stat_runtime *rt, - GPtrArray *tokens, - const gchar *prefix) +rspamd_redis_store_stat_signature(struct rspamd_task *task, + struct redis_stat_runtime *rt, + GPtrArray *tokens, + const gchar *prefix) { gchar *sig, keybuf[512], nbuf[64]; rspamd_token_t *tok; guint i, blen, klen; rspamd_fstring_t *out; - sig = rspamd_mempool_get_variable (task->task_pool, - RSPAMD_MEMPOOL_STAT_SIGNATURE); + sig = rspamd_mempool_get_variable(task->task_pool, + RSPAMD_MEMPOOL_STAT_SIGNATURE); if (sig == NULL) { - msg_err_task ("cannot get bayes signature"); + msg_err_task("cannot get bayes signature"); return; } - out = rspamd_fstring_sized_new (1024); - klen = rspamd_snprintf (keybuf, sizeof (keybuf), "%s_%s_%s", - prefix, sig, rt->stcf->is_spam ? "S" : "H"); + out = rspamd_fstring_sized_new(1024); + klen = rspamd_snprintf(keybuf, sizeof(keybuf), "%s_%s_%s", + prefix, sig, rt->stcf->is_spam ? "S" : "H"); /* Cleanup key */ - rspamd_printf_fstring (&out, "" - "*2\r\n" - "$3\r\n" - "DEL\r\n" - "$%d\r\n" - "%s\r\n", - klen, keybuf); - redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + rspamd_printf_fstring(&out, "" + "*2\r\n" + "$3\r\n" + "DEL\r\n" + "$%d\r\n" + "%s\r\n", + klen, keybuf); + redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); out->len = 0; - rspamd_printf_fstring (&out, "" - "*%d\r\n" - "$5\r\n" - "LPUSH\r\n" - "$%d\r\n" - "%s\r\n", - tokens->len + 2, - klen, keybuf); + rspamd_printf_fstring(&out, "" + "*%d\r\n" + "$5\r\n" + "LPUSH\r\n" + "$%d\r\n" + "%s\r\n", + tokens->len + 2, + klen, keybuf); - PTR_ARRAY_FOREACH (tokens, i, tok) { - blen = rspamd_snprintf (nbuf, sizeof (nbuf), "%uL", tok->data); - rspamd_printf_fstring (&out, "" - "$%d\r\n" - "%s\r\n", blen, nbuf); + PTR_ARRAY_FOREACH(tokens, i, tok) + { + blen = rspamd_snprintf(nbuf, sizeof(nbuf), "%uL", tok->data); + rspamd_printf_fstring(&out, "" + "$%d\r\n" + "%s\r\n", + blen, nbuf); } - redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); + redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); out->len = 0; if (rt->ctx->expiry > 0) { out->len = 0; - blen = rspamd_snprintf (nbuf, sizeof (nbuf), "%d", - rt->ctx->expiry); - - rspamd_printf_fstring (&out, "" - "*3\r\n" - "$6\r\n" - "EXPIRE\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n", - klen, keybuf, - blen, nbuf); - redisAsyncFormattedCommand (rt->redis, NULL, NULL, - out->str, out->len); - } - - rspamd_fstring_free (out); + blen = rspamd_snprintf(nbuf, sizeof(nbuf), "%d", + rt->ctx->expiry); + + rspamd_printf_fstring(&out, "" + "*3\r\n" + "$6\r\n" + "EXPIRE\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n", + klen, keybuf, + blen, nbuf); + redisAsyncFormattedCommand(rt->redis, NULL, NULL, + out->str, out->len); + } + + rspamd_fstring_free(out); } static void -rspamd_redis_async_cbdata_cleanup (struct rspamd_redis_stat_cbdata *cbdata) +rspamd_redis_async_cbdata_cleanup(struct rspamd_redis_stat_cbdata *cbdata) { guint i; gchar *k; @@ -721,14 +726,14 @@ rspamd_redis_async_cbdata_cleanup (struct rspamd_redis_stat_cbdata *cbdata) if (cbdata && !cbdata->wanna_die) { /* Avoid double frees */ cbdata->wanna_die = TRUE; - redisAsyncFree (cbdata->redis); + redisAsyncFree(cbdata->redis); - for (i = 0; i < cbdata->cur_keys->len; i ++) { - k = g_ptr_array_index (cbdata->cur_keys, i); - g_free (k); + for (i = 0; i < cbdata->cur_keys->len; i++) { + k = g_ptr_array_index(cbdata->cur_keys, i); + g_free(k); } - g_ptr_array_free (cbdata->cur_keys, TRUE); + g_ptr_array_free(cbdata->cur_keys, TRUE); if (cbdata->elt) { cbdata->elt->cbdata = NULL; @@ -738,7 +743,7 @@ rspamd_redis_async_cbdata_cleanup (struct rspamd_redis_stat_cbdata *cbdata) /* Replace ucl object */ if (cbdata->cur) { if (cbdata->elt->stat) { - ucl_object_unref (cbdata->elt->stat); + ucl_object_unref(cbdata->elt->stat); } cbdata->elt->stat = cbdata->cur; @@ -747,18 +752,18 @@ rspamd_redis_async_cbdata_cleanup (struct rspamd_redis_stat_cbdata *cbdata) } if (cbdata->cur) { - ucl_object_unref (cbdata->cur); + ucl_object_unref(cbdata->cur); } - g_free (cbdata); + g_free(cbdata); } } /* Called when we get number of learns for a specific key */ static void -rspamd_redis_stat_learns (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_redis_stat_learns(redisAsyncContext *c, gpointer r, gpointer priv) { - struct rspamd_redis_stat_elt *redis_elt = (struct rspamd_redis_stat_elt *)priv; + struct rspamd_redis_stat_elt *redis_elt = (struct rspamd_redis_stat_elt *) priv; struct rspamd_redis_stat_cbdata *cbdata; redisReply *reply = r; ucl_object_t *obj; @@ -770,33 +775,33 @@ rspamd_redis_stat_learns (redisAsyncContext *c, gpointer r, gpointer priv) return; } - cbdata->inflight --; + cbdata->inflight--; if (c->err == 0 && r != NULL) { - if (G_LIKELY (reply->type == REDIS_REPLY_INTEGER)) { + if (G_LIKELY(reply->type == REDIS_REPLY_INTEGER)) { num = reply->integer; } else if (reply->type == REDIS_REPLY_STRING) { - rspamd_strtoul (reply->str, reply->len, &num); + rspamd_strtoul(reply->str, reply->len, &num); } - obj = (ucl_object_t *) ucl_object_lookup (cbdata->cur, "revision"); + obj = (ucl_object_t *) ucl_object_lookup(cbdata->cur, "revision"); if (obj) { obj->value.iv += num; } } if (cbdata->inflight == 0) { - rspamd_redis_async_cbdata_cleanup (cbdata); + rspamd_redis_async_cbdata_cleanup(cbdata); redis_elt->cbdata = NULL; } } /* Called when we get number of elements for a specific key */ static void -rspamd_redis_stat_key (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_redis_stat_key(redisAsyncContext *c, gpointer r, gpointer priv) { - struct rspamd_redis_stat_elt *redis_elt = (struct rspamd_redis_stat_elt *)priv; + struct rspamd_redis_stat_elt *redis_elt = (struct rspamd_redis_stat_elt *) priv; struct rspamd_redis_stat_cbdata *cbdata; redisReply *reply = r; ucl_object_t *obj; @@ -808,50 +813,50 @@ rspamd_redis_stat_key (redisAsyncContext *c, gpointer r, gpointer priv) return; } - cbdata->inflight --; + cbdata->inflight--; if (c->err == 0 && r != NULL) { - if (G_LIKELY (reply->type == REDIS_REPLY_INTEGER)) { + if (G_LIKELY(reply->type == REDIS_REPLY_INTEGER)) { num = reply->integer; } else if (reply->type == REDIS_REPLY_STRING) { - rspamd_strtol (reply->str, reply->len, &num); + rspamd_strtol(reply->str, reply->len, &num); } if (num < 0) { - msg_err ("bad learns count: %L", (gint64)num); + msg_err("bad learns count: %L", (gint64) num); num = 0; } - obj = (ucl_object_t *)ucl_object_lookup (cbdata->cur, "used"); + obj = (ucl_object_t *) ucl_object_lookup(cbdata->cur, "used"); if (obj) { obj->value.iv += num; } - obj = (ucl_object_t *)ucl_object_lookup (cbdata->cur, "total"); + obj = (ucl_object_t *) ucl_object_lookup(cbdata->cur, "total"); if (obj) { obj->value.iv += num; } - obj = (ucl_object_t *)ucl_object_lookup (cbdata->cur, "size"); + obj = (ucl_object_t *) ucl_object_lookup(cbdata->cur, "size"); if (obj) { /* Size of key + size of int64_t */ - obj->value.iv += num * (sizeof (G_STRINGIFY (G_MAXINT64)) + - sizeof (guint64) + sizeof (gpointer)); + obj->value.iv += num * (sizeof(G_STRINGIFY(G_MAXINT64)) + + sizeof(guint64) + sizeof(gpointer)); } } if (cbdata->inflight == 0) { - rspamd_redis_async_cbdata_cleanup (cbdata); + rspamd_redis_async_cbdata_cleanup(cbdata); redis_elt->cbdata = NULL; } } /* Called when we have connected to the redis server and got keys to check */ static void -rspamd_redis_stat_keys (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_redis_stat_keys(redisAsyncContext *c, gpointer r, gpointer priv) { - struct rspamd_redis_stat_elt *redis_elt = (struct rspamd_redis_stat_elt *)priv; + struct rspamd_redis_stat_elt *redis_elt = (struct rspamd_redis_stat_elt *) priv; struct rspamd_redis_stat_cbdata *cbdata; redisReply *reply = r, *more_elt, *elts, *elt; gchar **pk, *k; @@ -864,43 +869,45 @@ rspamd_redis_stat_keys (redisAsyncContext *c, gpointer r, gpointer priv) return; } - cbdata->inflight --; + cbdata->inflight--; if (c->err == 0 && r != NULL) { if (reply->type == REDIS_REPLY_ARRAY) { more_elt = reply->element[0]; elts = reply->element[1]; - if (more_elt != NULL && more_elt->str != NULL && strcmp (more_elt->str, "0") != 0) { + if (more_elt != NULL && more_elt->str != NULL && strcmp(more_elt->str, "0") != 0) { more = true; } /* Clear the existing stuff */ - PTR_ARRAY_FOREACH (cbdata->cur_keys, i, k) { + PTR_ARRAY_FOREACH(cbdata->cur_keys, i, k) + { if (k) { - g_free (k); + g_free(k); } } - g_ptr_array_set_size (cbdata->cur_keys, elts->elements); + g_ptr_array_set_size(cbdata->cur_keys, elts->elements); - for (i = 0; i < elts->elements; i ++) { + for (i = 0; i < elts->elements; i++) { elt = elts->element[i]; if (elt->type == REDIS_REPLY_STRING) { - pk = (gchar **)&g_ptr_array_index (cbdata->cur_keys, i); - *pk = g_malloc (elt->len + 1); - rspamd_strlcpy (*pk, elt->str, elt->len + 1); - processed ++; + pk = (gchar **) &g_ptr_array_index(cbdata->cur_keys, i); + *pk = g_malloc(elt->len + 1); + rspamd_strlcpy(*pk, elt->str, elt->len + 1); + processed++; } else { - pk = (gchar **)&g_ptr_array_index (cbdata->cur_keys, i); + pk = (gchar **) &g_ptr_array_index(cbdata->cur_keys, i); *pk = NULL; } } if (processed) { - PTR_ARRAY_FOREACH (cbdata->cur_keys, i, k) { + PTR_ARRAY_FOREACH(cbdata->cur_keys, i, k) + { if (k) { const gchar *learned_key = "learns"; @@ -911,24 +918,24 @@ rspamd_redis_stat_keys (redisAsyncContext *c, gpointer r, gpointer priv) else { learned_key = "learns_ham"; } - redisAsyncCommand (cbdata->redis, - rspamd_redis_stat_learns, - redis_elt, - "HGET %s %s", - k, learned_key); + redisAsyncCommand(cbdata->redis, + rspamd_redis_stat_learns, + redis_elt, + "HGET %s %s", + k, learned_key); cbdata->inflight += 1; } else { - redisAsyncCommand (cbdata->redis, - rspamd_redis_stat_key, - redis_elt, - "HLEN %s", - k); - redisAsyncCommand (cbdata->redis, - rspamd_redis_stat_learns, - redis_elt, - "HGET %s %s", - k, learned_key); + redisAsyncCommand(cbdata->redis, + rspamd_redis_stat_key, + redis_elt, + "HLEN %s", + k); + redisAsyncCommand(cbdata->redis, + rspamd_redis_stat_learns, + redis_elt, + "HGET %s %s", + k, learned_key); cbdata->inflight += 2; } } @@ -938,56 +945,56 @@ rspamd_redis_stat_keys (redisAsyncContext *c, gpointer r, gpointer priv) if (more) { /* Get more stat keys */ - redisAsyncCommand (cbdata->redis, rspamd_redis_stat_keys, redis_elt, - "SSCAN %s_keys %s COUNT 1000", - cbdata->elt->ctx->stcf->symbol, more_elt->str); + redisAsyncCommand(cbdata->redis, rspamd_redis_stat_keys, redis_elt, + "SSCAN %s_keys %s COUNT 1000", + cbdata->elt->ctx->stcf->symbol, more_elt->str); cbdata->inflight += 1; } else { /* Set up the required keys */ - ucl_object_insert_key (cbdata->cur, - ucl_object_typed_new (UCL_INT), "revision", 0, false); - ucl_object_insert_key (cbdata->cur, - ucl_object_typed_new (UCL_INT), "used", 0, false); - ucl_object_insert_key (cbdata->cur, - ucl_object_typed_new (UCL_INT), "total", 0, false); - ucl_object_insert_key (cbdata->cur, - ucl_object_typed_new (UCL_INT), "size", 0, false); - ucl_object_insert_key (cbdata->cur, - ucl_object_fromstring (cbdata->elt->ctx->stcf->symbol), - "symbol", 0, false); - ucl_object_insert_key (cbdata->cur, ucl_object_fromstring ("redis"), - "type", 0, false); - ucl_object_insert_key (cbdata->cur, ucl_object_fromint (0), - "languages", 0, false); - ucl_object_insert_key (cbdata->cur, ucl_object_fromint (processed), - "users", 0, false); - - rspamd_upstream_ok (cbdata->selected); + ucl_object_insert_key(cbdata->cur, + ucl_object_typed_new(UCL_INT), "revision", 0, false); + ucl_object_insert_key(cbdata->cur, + ucl_object_typed_new(UCL_INT), "used", 0, false); + ucl_object_insert_key(cbdata->cur, + ucl_object_typed_new(UCL_INT), "total", 0, false); + ucl_object_insert_key(cbdata->cur, + ucl_object_typed_new(UCL_INT), "size", 0, false); + ucl_object_insert_key(cbdata->cur, + ucl_object_fromstring(cbdata->elt->ctx->stcf->symbol), + "symbol", 0, false); + ucl_object_insert_key(cbdata->cur, ucl_object_fromstring("redis"), + "type", 0, false); + ucl_object_insert_key(cbdata->cur, ucl_object_fromint(0), + "languages", 0, false); + ucl_object_insert_key(cbdata->cur, ucl_object_fromint(processed), + "users", 0, false); + + rspamd_upstream_ok(cbdata->selected); if (cbdata->inflight == 0) { - rspamd_redis_async_cbdata_cleanup (cbdata); + rspamd_redis_async_cbdata_cleanup(cbdata); redis_elt->cbdata = NULL; } } } else { if (c->errstr) { - msg_err ("cannot get keys to gather stat: %s", c->errstr); + msg_err("cannot get keys to gather stat: %s", c->errstr); } else { - msg_err ("cannot get keys to gather stat: unknown error"); + msg_err("cannot get keys to gather stat: unknown error"); } - rspamd_upstream_fail (cbdata->selected, FALSE, c->errstr); - rspamd_redis_async_cbdata_cleanup (cbdata); + rspamd_upstream_fail(cbdata->selected, FALSE, c->errstr); + rspamd_redis_async_cbdata_cleanup(cbdata); redis_elt->cbdata = NULL; } } static void -rspamd_redis_async_stat_cb (struct rspamd_stat_async_elt *elt, gpointer d) +rspamd_redis_async_stat_cb(struct rspamd_stat_async_elt *elt, gpointer d) { struct redis_stat_ctx *ctx; struct rspamd_redis_stat_elt *redis_elt = elt->ud; @@ -997,83 +1004,83 @@ rspamd_redis_async_stat_cb (struct rspamd_stat_async_elt *elt, gpointer d) redisAsyncContext *redis_ctx; struct upstream *selected; - g_assert (redis_elt != NULL); + g_assert(redis_elt != NULL); ctx = redis_elt->ctx; if (redis_elt->cbdata) { /* We have some other process pending */ - rspamd_redis_async_cbdata_cleanup (redis_elt->cbdata); + rspamd_redis_async_cbdata_cleanup(redis_elt->cbdata); redis_elt->cbdata = NULL; } /* Disable further events unless needed */ elt->enabled = FALSE; - ups = rspamd_redis_get_servers (ctx, "read_servers"); + ups = rspamd_redis_get_servers(ctx, "read_servers"); if (!ups) { return; } - selected = rspamd_upstream_get (ups, - RSPAMD_UPSTREAM_ROUND_ROBIN, - NULL, - 0); + selected = rspamd_upstream_get(ups, + RSPAMD_UPSTREAM_ROUND_ROBIN, + NULL, + 0); - g_assert (selected != NULL); - addr = rspamd_upstream_addr_next (selected); - g_assert (addr != NULL); + g_assert(selected != NULL); + addr = rspamd_upstream_addr_next(selected); + g_assert(addr != NULL); - if (rspamd_inet_address_get_af (addr) == AF_UNIX) { - redis_ctx = redisAsyncConnectUnix (rspamd_inet_address_to_string (addr)); + if (rspamd_inet_address_get_af(addr) == AF_UNIX) { + redis_ctx = redisAsyncConnectUnix(rspamd_inet_address_to_string(addr)); } else { - redis_ctx = redisAsyncConnect (rspamd_inet_address_to_string (addr), - rspamd_inet_address_get_port (addr)); + redis_ctx = redisAsyncConnect(rspamd_inet_address_to_string(addr), + rspamd_inet_address_get_port(addr)); } if (redis_ctx == NULL) { - msg_warn ("cannot connect to redis server %s: %s", - rspamd_inet_address_to_string_pretty (addr), - strerror (errno)); + msg_warn("cannot connect to redis server %s: %s", + rspamd_inet_address_to_string_pretty(addr), + strerror(errno)); return; } else if (redis_ctx->err != REDIS_OK) { - msg_warn ("cannot connect to redis server %s: %s", - rspamd_inet_address_to_string_pretty (addr), - redis_ctx->errstr); - redisAsyncFree (redis_ctx); + msg_warn("cannot connect to redis server %s: %s", + rspamd_inet_address_to_string_pretty(addr), + redis_ctx->errstr); + redisAsyncFree(redis_ctx); return; } - redisLibevAttach (redis_elt->event_loop, redis_ctx); - cbdata = g_malloc0 (sizeof (*cbdata)); + redisLibevAttach(redis_elt->event_loop, redis_ctx); + cbdata = g_malloc0(sizeof(*cbdata)); cbdata->redis = redis_ctx; cbdata->selected = selected; cbdata->inflight = 1; - cbdata->cur = ucl_object_typed_new (UCL_OBJECT); + cbdata->cur = ucl_object_typed_new(UCL_OBJECT); cbdata->elt = redis_elt; - cbdata->cur_keys = g_ptr_array_sized_new (1000); + cbdata->cur_keys = g_ptr_array_sized_new(1000); redis_elt->cbdata = cbdata; /* XXX: deal with timeouts maybe */ /* Get keys in redis that match our symbol */ - rspamd_redis_maybe_auth (ctx, cbdata->redis); - redisAsyncCommand (cbdata->redis, rspamd_redis_stat_keys, redis_elt, - "SSCAN %s_keys 0 COUNT 1000", - ctx->stcf->symbol); + rspamd_redis_maybe_auth(ctx, cbdata->redis); + redisAsyncCommand(cbdata->redis, rspamd_redis_stat_keys, redis_elt, + "SSCAN %s_keys 0 COUNT 1000", + ctx->stcf->symbol); } static void -rspamd_redis_async_stat_fin (struct rspamd_stat_async_elt *elt, gpointer d) +rspamd_redis_async_stat_fin(struct rspamd_stat_async_elt *elt, gpointer d) { struct rspamd_redis_stat_elt *redis_elt = elt->ud; if (redis_elt->cbdata != NULL) { - rspamd_redis_async_cbdata_cleanup (redis_elt->cbdata); + rspamd_redis_async_cbdata_cleanup(redis_elt->cbdata); redis_elt->cbdata = NULL; } @@ -1083,29 +1090,29 @@ rspamd_redis_async_stat_fin (struct rspamd_stat_async_elt *elt, gpointer d) redis_elt->stat = NULL; } - g_free (redis_elt); + g_free(redis_elt); } /* Called on connection termination */ static void -rspamd_redis_fin (gpointer data) +rspamd_redis_fin(gpointer data) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (data); + struct redis_stat_runtime *rt = REDIS_RUNTIME(data); redisAsyncContext *redis; if (rt->has_event) { /* Should not happen ! */ - msg_err ("FIXME: this code path should not be reached!"); - rspamd_session_remove_event (rt->task->s, NULL, rt); + msg_err("FIXME: this code path should not be reached!"); + rspamd_session_remove_event(rt->task->s, NULL, rt); rt->has_event = FALSE; } /* Stop timeout */ - if (ev_can_stop (&rt->timeout_event)) { - ev_timer_stop (rt->task->event_loop, &rt->timeout_event); + if (ev_can_stop(&rt->timeout_event)) { + ev_timer_stop(rt->task->event_loop, &rt->timeout_event); } if (rt->tokens) { - g_ptr_array_unref (rt->tokens); + g_ptr_array_unref(rt->tokens); rt->tokens = NULL; } @@ -1113,56 +1120,56 @@ rspamd_redis_fin (gpointer data) redis = rt->redis; rt->redis = NULL; /* This calls for all callbacks pending */ - redisAsyncFree (redis); + redisAsyncFree(redis); } if (rt->err) { - g_error_free (rt->err); + g_error_free(rt->err); } } static void -rspamd_redis_timeout (EV_P_ ev_timer *w, int revents) +rspamd_redis_timeout(EV_P_ ev_timer *w, int revents) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (w->data); + struct redis_stat_runtime *rt = REDIS_RUNTIME(w->data); struct rspamd_task *task; redisAsyncContext *redis; task = rt->task; - msg_err_task_check ("connection to redis server %s timed out", - rspamd_upstream_name (rt->selected)); + msg_err_task_check("connection to redis server %s timed out", + rspamd_upstream_name(rt->selected)); - rspamd_upstream_fail (rt->selected, FALSE, "timeout"); + rspamd_upstream_fail(rt->selected, FALSE, "timeout"); if (rt->redis) { redis = rt->redis; rt->redis = NULL; /* This calls for all callbacks pending */ - redisAsyncFree (redis); + redisAsyncFree(redis); } if (rt->tokens) { - g_ptr_array_unref (rt->tokens); + g_ptr_array_unref(rt->tokens); rt->tokens = NULL; } if (!rt->err) { - g_set_error (&rt->err, rspamd_redis_stat_quark (), ETIMEDOUT, - "error getting reply from redis server %s: timeout", - rspamd_upstream_name (rt->selected)); + g_set_error(&rt->err, rspamd_redis_stat_quark(), ETIMEDOUT, + "error getting reply from redis server %s: timeout", + rspamd_upstream_name(rt->selected)); } if (rt->has_event) { rt->has_event = FALSE; - rspamd_session_remove_event (task->s, NULL, rt); + rspamd_session_remove_event(task->s, NULL, rt); } } /* Called when we have received tokens values from redis */ static void -rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_redis_processed(redisAsyncContext *c, gpointer r, gpointer priv) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (priv); + struct redis_stat_runtime *rt = REDIS_RUNTIME(priv); redisReply *reply = r, *elt; struct rspamd_task *task; rspamd_token_t *tok; @@ -1177,32 +1184,32 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) if (reply->type == REDIS_REPLY_ARRAY) { if (reply->elements == task->tokens->len) { - for (i = 0; i < reply->elements; i ++) { - tok = g_ptr_array_index (task->tokens, i); + for (i = 0; i < reply->elements; i++) { + tok = g_ptr_array_index(task->tokens, i); elt = reply->element[i]; - if (G_UNLIKELY (elt->type == REDIS_REPLY_INTEGER)) { + if (G_UNLIKELY(elt->type == REDIS_REPLY_INTEGER)) { tok->values[rt->id] = elt->integer; - found ++; + found++; } else if (elt->type == REDIS_REPLY_STRING) { if (rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER) { - rspamd_strtoul (elt->str, elt->len, &val); + rspamd_strtoul(elt->str, elt->len, &val); tok->values[rt->id] = val; } else { - float_val = strtof (elt->str, NULL); + float_val = strtof(elt->str, NULL); tok->values[rt->id] = float_val; } - found ++; + found++; } else { tok->values[rt->id] = 0; } - processed ++; + processed++; } if (rt->stcf->is_spam) { @@ -1213,54 +1220,54 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) } } else { - msg_err_task_check ("got invalid length of reply vector from redis: " - "%d, expected: %d", - (gint)reply->elements, - (gint)task->tokens->len); + msg_err_task_check("got invalid length of reply vector from redis: " + "%d, expected: %d", + (gint) reply->elements, + (gint) task->tokens->len); } } else { if (reply->type == REDIS_REPLY_ERROR) { - msg_err_task_check ("cannot learn %s: redis error: \"%s\"", - rt->stcf->symbol, reply->str); + msg_err_task_check("cannot learn %s: redis error: \"%s\"", + rt->stcf->symbol, reply->str); } else { - msg_err_task_check ("got invalid reply from redis: %s, array expected", - rspamd_redis_type_to_string(reply->type)); + msg_err_task_check("got invalid reply from redis: %s, array expected", + rspamd_redis_type_to_string(reply->type)); } } - msg_debug_stat_redis ("received tokens for %s: %d processed, %d found", - rt->redis_object_expanded, processed, found); - rspamd_upstream_ok (rt->selected); + msg_debug_stat_redis("received tokens for %s: %d processed, %d found", + rt->redis_object_expanded, processed, found); + rspamd_upstream_ok(rt->selected); } } else { - msg_err_task ("error getting reply from redis server %s: %s", - rspamd_upstream_name (rt->selected), c->errstr); + msg_err_task("error getting reply from redis server %s: %s", + rspamd_upstream_name(rt->selected), c->errstr); if (rt->redis) { - rspamd_upstream_fail (rt->selected, FALSE, c->errstr); + rspamd_upstream_fail(rt->selected, FALSE, c->errstr); } if (!rt->err) { - g_set_error (&rt->err, rspamd_redis_stat_quark (), c->err, - "cannot get values: error getting reply from redis server %s: %s", - rspamd_upstream_name (rt->selected), c->errstr); + g_set_error(&rt->err, rspamd_redis_stat_quark(), c->err, + "cannot get values: error getting reply from redis server %s: %s", + rspamd_upstream_name(rt->selected), c->errstr); } } if (rt->has_event) { rt->has_event = FALSE; - rspamd_session_remove_event (task->s, NULL, rt); + rspamd_session_remove_event(task->s, NULL, rt); } } /* Called when we have connected to the redis server and got stats */ static void -rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_redis_connected(redisAsyncContext *c, gpointer r, gpointer priv) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (priv); + struct redis_stat_runtime *rt = REDIS_RUNTIME(priv); redisReply *reply = r; struct rspamd_task *task; glong val = 0; @@ -1270,22 +1277,22 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) if (c->err == 0 && rt->has_event) { if (r != NULL) { - if (G_UNLIKELY (reply->type == REDIS_REPLY_INTEGER)) { + if (G_UNLIKELY(reply->type == REDIS_REPLY_INTEGER)) { val = reply->integer; } else if (reply->type == REDIS_REPLY_STRING) { - rspamd_strtol (reply->str, reply->len, &val); + rspamd_strtol(reply->str, reply->len, &val); } else { if (reply->type != REDIS_REPLY_NIL) { if (reply->type == REDIS_REPLY_ERROR) { - msg_err_task ("cannot learn %s: redis error: \"%s\"", - rt->stcf->symbol, reply->str); + msg_err_task("cannot learn %s: redis error: \"%s\"", + rt->stcf->symbol, reply->str); } else { - msg_err_task ("bad learned type for %s: %s, nil expected", - rt->stcf->symbol, - rspamd_redis_type_to_string(reply->type)); + msg_err_task("bad learned type for %s: %s, nil expected", + rt->stcf->symbol, + rspamd_redis_type_to_string(reply->type)); } } @@ -1293,15 +1300,15 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) } if (val < 0) { - msg_warn_task ("invalid number of learns for %s: %L", - rt->stcf->symbol, val); + msg_warn_task("invalid number of learns for %s: %L", + rt->stcf->symbol, val); val = 0; } rt->learned = val; - msg_debug_stat_redis ("connected to redis server, tokens learned for %s: %uL", - rt->redis_object_expanded, rt->learned); - rspamd_upstream_ok (rt->selected); + msg_debug_stat_redis("connected to redis server, tokens learned for %s: %uL", + rt->redis_object_expanded, rt->learned); + rspamd_upstream_ok(rt->selected); /* Save learn count in mempool variable */ gint64 *learns_cnt; @@ -1314,148 +1321,149 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) var_name = RSPAMD_MEMPOOL_HAM_LEARNS; } - learns_cnt = rspamd_mempool_get_variable (task->task_pool, - var_name); + learns_cnt = rspamd_mempool_get_variable(task->task_pool, + var_name); if (learns_cnt) { (*learns_cnt) += rt->learned; } else { - learns_cnt = rspamd_mempool_alloc (task->task_pool, - sizeof (*learns_cnt)); + learns_cnt = rspamd_mempool_alloc(task->task_pool, + sizeof(*learns_cnt)); *learns_cnt = rt->learned; - rspamd_mempool_set_variable (task->task_pool, - var_name, - learns_cnt, NULL); + rspamd_mempool_set_variable(task->task_pool, + var_name, + learns_cnt, NULL); } if (rt->learned >= rt->stcf->clcf->min_learns && rt->learned > 0) { - rspamd_fstring_t *query = rspamd_redis_tokens_to_query ( - task, - rt, - rt->tokens, - rt->ctx->new_schema ? "HGET" : "HMGET", - rt->redis_object_expanded, FALSE, -1, - rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER); - g_assert (query != NULL); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, query); - - int ret = redisAsyncFormattedCommand (rt->redis, - rspamd_redis_processed, rt, - query->str, query->len); + rspamd_fstring_t *query = rspamd_redis_tokens_to_query( + task, + rt, + rt->tokens, + rt->ctx->new_schema ? "HGET" : "HMGET", + rt->redis_object_expanded, FALSE, -1, + rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER); + g_assert(query != NULL); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, query); + + int ret = redisAsyncFormattedCommand(rt->redis, + rspamd_redis_processed, rt, + query->str, query->len); if (ret != REDIS_OK) { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); + msg_err_task("call to redis failed: %s", rt->redis->errstr); } else { /* Further is handled by rspamd_redis_processed */ final = FALSE; /* Restart timeout */ - if (ev_can_stop (&rt->timeout_event)) { + if (ev_can_stop(&rt->timeout_event)) { rt->timeout_event.repeat = rt->ctx->timeout; - ev_timer_again (task->event_loop, &rt->timeout_event); + ev_timer_again(task->event_loop, &rt->timeout_event); } else { rt->timeout_event.data = rt; - ev_timer_init (&rt->timeout_event, rspamd_redis_timeout, - rt->ctx->timeout, 0.); - ev_timer_start (task->event_loop, &rt->timeout_event); + ev_timer_init(&rt->timeout_event, rspamd_redis_timeout, + rt->ctx->timeout, 0.); + ev_timer_start(task->event_loop, &rt->timeout_event); } } } else { - msg_warn_task ("skip obtaining bayes tokens for %s of classifier " - "%s: not enough learns %d; %d required", - rt->stcf->symbol, rt->stcf->clcf->name, - (int)rt->learned, rt->stcf->clcf->min_learns); + msg_warn_task("skip obtaining bayes tokens for %s of classifier " + "%s: not enough learns %d; %d required", + rt->stcf->symbol, rt->stcf->clcf->name, + (int) rt->learned, rt->stcf->clcf->min_learns); } } } else if (rt->has_event) { - msg_err_task ("error getting reply from redis server %s: %s", - rspamd_upstream_name (rt->selected), c->errstr); - rspamd_upstream_fail (rt->selected, FALSE, c->errstr); + msg_err_task("error getting reply from redis server %s: %s", + rspamd_upstream_name(rt->selected), c->errstr); + rspamd_upstream_fail(rt->selected, FALSE, c->errstr); if (!rt->err) { - g_set_error (&rt->err, rspamd_redis_stat_quark (), c->err, - "error getting reply from redis server %s: %s", - rspamd_upstream_name (rt->selected), c->errstr); + g_set_error(&rt->err, rspamd_redis_stat_quark(), c->err, + "error getting reply from redis server %s: %s", + rspamd_upstream_name(rt->selected), c->errstr); } } if (final && rt->has_event) { rt->has_event = FALSE; - rspamd_session_remove_event (task->s, NULL, rt); + rspamd_session_remove_event(task->s, NULL, rt); } } /* Called when we have set tokens during learning */ static void -rspamd_redis_learned (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_redis_learned(redisAsyncContext *c, gpointer r, gpointer priv) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (priv); + struct redis_stat_runtime *rt = REDIS_RUNTIME(priv); struct rspamd_task *task; task = rt->task; if (c->err == 0) { - rspamd_upstream_ok (rt->selected); + rspamd_upstream_ok(rt->selected); } else { - msg_err_task_check ("error getting reply from redis server %s: %s", - rspamd_upstream_name (rt->selected), c->errstr); + msg_err_task_check("error getting reply from redis server %s: %s", + rspamd_upstream_name(rt->selected), c->errstr); if (rt->redis) { - rspamd_upstream_fail (rt->selected, FALSE, c->errstr); + rspamd_upstream_fail(rt->selected, FALSE, c->errstr); } if (!rt->err) { - g_set_error (&rt->err, rspamd_redis_stat_quark (), c->err, - "cannot get learned: error getting reply from redis server %s: %s", - rspamd_upstream_name (rt->selected), c->errstr); + g_set_error(&rt->err, rspamd_redis_stat_quark(), c->err, + "cannot get learned: error getting reply from redis server %s: %s", + rspamd_upstream_name(rt->selected), c->errstr); } } if (rt->has_event) { rt->has_event = FALSE; - rspamd_session_remove_event (task->s, NULL, rt); + rspamd_session_remove_event(task->s, NULL, rt); } } static void -rspamd_redis_parse_classifier_opts (struct redis_stat_ctx *backend, - const ucl_object_t *obj, - struct rspamd_config *cfg) +rspamd_redis_parse_classifier_opts(struct redis_stat_ctx *backend, + const ucl_object_t *obj, + struct rspamd_config *cfg) { const gchar *lua_script; const ucl_object_t *elt, *users_enabled; - users_enabled = ucl_object_lookup_any (obj, "per_user", - "users_enabled", NULL); + users_enabled = ucl_object_lookup_any(obj, "per_user", + "users_enabled", NULL); if (users_enabled != NULL) { - if (ucl_object_type (users_enabled) == UCL_BOOLEAN) { - backend->enable_users = ucl_object_toboolean (users_enabled); + if (ucl_object_type(users_enabled) == UCL_BOOLEAN) { + backend->enable_users = ucl_object_toboolean(users_enabled); backend->cbref_user = -1; } - else if (ucl_object_type (users_enabled) == UCL_STRING) { - lua_script = ucl_object_tostring (users_enabled); + else if (ucl_object_type(users_enabled) == UCL_STRING) { + lua_script = ucl_object_tostring(users_enabled); - if (luaL_dostring (cfg->lua_state, lua_script) != 0) { - msg_err_config ("cannot execute lua script for users " - "extraction: %s", lua_tostring (cfg->lua_state, -1)); + if (luaL_dostring(cfg->lua_state, lua_script) != 0) { + msg_err_config("cannot execute lua script for users " + "extraction: %s", + lua_tostring(cfg->lua_state, -1)); } else { - if (lua_type (cfg->lua_state, -1) == LUA_TFUNCTION) { + if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) { backend->enable_users = TRUE; - backend->cbref_user = luaL_ref (cfg->lua_state, - LUA_REGISTRYINDEX); + backend->cbref_user = luaL_ref(cfg->lua_state, + LUA_REGISTRYINDEX); } else { - msg_err_config ("lua script must return " - "function(task) and not %s", - lua_typename (cfg->lua_state, lua_type ( - cfg->lua_state, -1))); + msg_err_config("lua script must return " + "function(task) and not %s", + lua_typename(cfg->lua_state, lua_type( + cfg->lua_state, -1))); } } } @@ -1465,8 +1473,8 @@ rspamd_redis_parse_classifier_opts (struct redis_stat_ctx *backend, backend->cbref_user = -1; } - elt = ucl_object_lookup (obj, "prefix"); - if (elt == NULL || ucl_object_type (elt) != UCL_STRING) { + elt = ucl_object_lookup(obj, "prefix"); + if (elt == NULL || ucl_object_type(elt) != UCL_STRING) { /* Default non-users statistics */ if (backend->enable_users || backend->cbref_user != -1) { backend->redis_object = REDIS_DEFAULT_USERS_OBJECT; @@ -1477,40 +1485,40 @@ rspamd_redis_parse_classifier_opts (struct redis_stat_ctx *backend, } else { /* XXX: sanity check */ - backend->redis_object = ucl_object_tostring (elt); + backend->redis_object = ucl_object_tostring(elt); } - elt = ucl_object_lookup (obj, "store_tokens"); + elt = ucl_object_lookup(obj, "store_tokens"); if (elt) { - backend->store_tokens = ucl_object_toboolean (elt); + backend->store_tokens = ucl_object_toboolean(elt); } else { backend->store_tokens = FALSE; } - elt = ucl_object_lookup (obj, "new_schema"); + elt = ucl_object_lookup(obj, "new_schema"); if (elt) { - backend->new_schema = ucl_object_toboolean (elt); + backend->new_schema = ucl_object_toboolean(elt); } else { backend->new_schema = FALSE; - msg_warn_config ("you are using old bayes schema for redis statistics, " - "please consider converting it to a new one " - "by using 'rspamadm configwizard statistics'"); + msg_warn_config("you are using old bayes schema for redis statistics, " + "please consider converting it to a new one " + "by using 'rspamadm configwizard statistics'"); } - elt = ucl_object_lookup (obj, "signatures"); + elt = ucl_object_lookup(obj, "signatures"); if (elt) { - backend->enable_signatures = ucl_object_toboolean (elt); + backend->enable_signatures = ucl_object_toboolean(elt); } else { backend->enable_signatures = FALSE; } - elt = ucl_object_lookup_any (obj, "expiry", "expire", NULL); + elt = ucl_object_lookup_any(obj, "expiry", "expire", NULL); if (elt) { - backend->expiry = ucl_object_toint (elt); + backend->expiry = ucl_object_toint(elt); } else { backend->expiry = 0; @@ -1518,8 +1526,8 @@ rspamd_redis_parse_classifier_opts (struct redis_stat_ctx *backend, } gpointer -rspamd_redis_init (struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, struct rspamd_statfile *st) +rspamd_redis_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, struct rspamd_statfile *st) { struct redis_stat_ctx *backend; struct rspamd_statfile_config *stf = st->stcf; @@ -1527,99 +1535,99 @@ rspamd_redis_init (struct rspamd_stat_ctx *ctx, const ucl_object_t *obj; gboolean ret = FALSE; gint conf_ref = -1; - lua_State *L = (lua_State *)cfg->lua_state; + lua_State *L = (lua_State *) cfg->lua_state; - backend = g_malloc0 (sizeof (*backend)); + backend = g_malloc0(sizeof(*backend)); backend->L = L; backend->timeout = REDIS_DEFAULT_TIMEOUT; /* First search in backend configuration */ - obj = ucl_object_lookup (st->classifier->cfg->opts, "backend"); - if (obj != NULL && ucl_object_type (obj) == UCL_OBJECT) { - ret = rspamd_lua_try_load_redis (L, obj, cfg, &conf_ref); + obj = ucl_object_lookup(st->classifier->cfg->opts, "backend"); + if (obj != NULL && ucl_object_type(obj) == UCL_OBJECT) { + ret = rspamd_lua_try_load_redis(L, obj, cfg, &conf_ref); } /* Now try statfiles config */ if (!ret && stf->opts) { - ret = rspamd_lua_try_load_redis (L, stf->opts, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, stf->opts, cfg, &conf_ref); } /* Now try classifier config */ if (!ret && st->classifier->cfg->opts) { - ret = rspamd_lua_try_load_redis (L, st->classifier->cfg->opts, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, st->classifier->cfg->opts, cfg, &conf_ref); } /* Now try global redis settings */ if (!ret) { - obj = ucl_object_lookup (cfg->rcl_obj, "redis"); + obj = ucl_object_lookup(cfg->rcl_obj, "redis"); if (obj) { const ucl_object_t *specific_obj; - specific_obj = ucl_object_lookup (obj, "statistics"); + specific_obj = ucl_object_lookup(obj, "statistics"); if (specific_obj) { - ret = rspamd_lua_try_load_redis (L, - specific_obj, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, + specific_obj, cfg, &conf_ref); } else { - ret = rspamd_lua_try_load_redis (L, - obj, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, + obj, cfg, &conf_ref); } } } if (!ret) { - msg_err_config ("cannot init redis backend for %s", stf->symbol); - g_free (backend); + msg_err_config("cannot init redis backend for %s", stf->symbol); + g_free(backend); return NULL; } backend->conf_ref = conf_ref; /* Check some common table values */ - lua_rawgeti (L, LUA_REGISTRYINDEX, conf_ref); + lua_rawgeti(L, LUA_REGISTRYINDEX, conf_ref); - lua_pushstring (L, "timeout"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TNUMBER) { - backend->timeout = lua_tonumber (L, -1); + lua_pushstring(L, "timeout"); + lua_gettable(L, -2); + if (lua_type(L, -1) == LUA_TNUMBER) { + backend->timeout = lua_tonumber(L, -1); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_pushstring (L, "db"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TSTRING) { - backend->dbname = rspamd_mempool_strdup (cfg->cfg_pool, - lua_tostring (L, -1)); + lua_pushstring(L, "db"); + lua_gettable(L, -2); + if (lua_type(L, -1) == LUA_TSTRING) { + backend->dbname = rspamd_mempool_strdup(cfg->cfg_pool, + lua_tostring(L, -1)); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_pushstring (L, "password"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TSTRING) { - backend->password = rspamd_mempool_strdup (cfg->cfg_pool, - lua_tostring (L, -1)); + lua_pushstring(L, "password"); + lua_gettable(L, -2); + if (lua_type(L, -1) == LUA_TSTRING) { + backend->password = rspamd_mempool_strdup(cfg->cfg_pool, + lua_tostring(L, -1)); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_settop (L, 0); + lua_settop(L, 0); - rspamd_redis_parse_classifier_opts (backend, st->classifier->cfg->opts, cfg); + rspamd_redis_parse_classifier_opts(backend, st->classifier->cfg->opts, cfg); stf->clcf->flags |= RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND; backend->stcf = stf; - st_elt = g_malloc0 (sizeof (*st_elt)); + st_elt = g_malloc0(sizeof(*st_elt)); st_elt->event_loop = ctx->event_loop; st_elt->ctx = backend; - backend->stat_elt = rspamd_stat_ctx_register_async ( - rspamd_redis_async_stat_cb, - rspamd_redis_async_stat_fin, - st_elt, - REDIS_STAT_TIMEOUT); + backend->stat_elt = rspamd_stat_ctx_register_async( + rspamd_redis_async_stat_cb, + rspamd_redis_async_stat_fin, + st_elt, + REDIS_STAT_TIMEOUT); st_elt->async = backend->stat_elt; - return (gpointer)backend; + return (gpointer) backend; } /* @@ -1629,10 +1637,10 @@ rspamd_redis_init (struct rspamd_stat_ctx *ctx, static void rspamd_stat_redis_on_disconnect(const struct redisAsyncContext *ac, int status) { - struct redis_stat_runtime *rt = (struct redis_stat_runtime *)ac->data; + struct redis_stat_runtime *rt = (struct redis_stat_runtime *) ac->data; - if (ev_can_stop (&rt->timeout_event)) { - ev_timer_stop (rt->task->event_loop, &rt->timeout_event); + if (ev_can_stop(&rt->timeout_event)) { + ev_timer_stop(rt->task->event_loop, &rt->timeout_event); } rt->redis = NULL; } @@ -1640,7 +1648,7 @@ rspamd_stat_redis_on_disconnect(const struct redisAsyncContext *ac, int status) static void rspamd_stat_redis_on_connect(const struct redisAsyncContext *ac, int status) { - struct redis_stat_runtime *rt = (struct redis_stat_runtime *)ac->data; + struct redis_stat_runtime *rt = (struct redis_stat_runtime *) ac->data; if (status == REDIS_ERR) { @@ -1648,136 +1656,135 @@ rspamd_stat_redis_on_connect(const struct redisAsyncContext *ac, int status) * We also need to reset rt->redis as it will be subsequently freed without * calling for redis_on_disconnect callback... */ - if (ev_can_stop (&rt->timeout_event)) { - ev_timer_stop (rt->task->event_loop, &rt->timeout_event); + if (ev_can_stop(&rt->timeout_event)) { + ev_timer_stop(rt->task->event_loop, &rt->timeout_event); } rt->redis = NULL; } } gpointer -rspamd_redis_runtime (struct rspamd_task *task, - struct rspamd_statfile_config *stcf, - gboolean learn, gpointer c, gint _id) +rspamd_redis_runtime(struct rspamd_task *task, + struct rspamd_statfile_config *stcf, + gboolean learn, gpointer c, gint _id) { - struct redis_stat_ctx *ctx = REDIS_CTX (c); + struct redis_stat_ctx *ctx = REDIS_CTX(c); struct redis_stat_runtime *rt; struct upstream *up; struct upstream_list *ups; char *object_expanded = NULL; rspamd_inet_addr_t *addr; - g_assert (ctx != NULL); - g_assert (stcf != NULL); + g_assert(ctx != NULL); + g_assert(stcf != NULL); if (learn) { - ups = rspamd_redis_get_servers (ctx, "write_servers"); + ups = rspamd_redis_get_servers(ctx, "write_servers"); if (!ups) { - msg_err_task ("no write servers defined for %s, cannot learn", - stcf->symbol); + msg_err_task("no write servers defined for %s, cannot learn", + stcf->symbol); return NULL; } - up = rspamd_upstream_get (ups, - RSPAMD_UPSTREAM_MASTER_SLAVE, - NULL, - 0); + up = rspamd_upstream_get(ups, + RSPAMD_UPSTREAM_MASTER_SLAVE, + NULL, + 0); } else { - ups = rspamd_redis_get_servers (ctx, "read_servers"); + ups = rspamd_redis_get_servers(ctx, "read_servers"); if (!ups) { - msg_err_task ("no read servers defined for %s, cannot stat", - stcf->symbol); + msg_err_task("no read servers defined for %s, cannot stat", + stcf->symbol); return NULL; } - up = rspamd_upstream_get (ups, - RSPAMD_UPSTREAM_ROUND_ROBIN, - NULL, - 0); + up = rspamd_upstream_get(ups, + RSPAMD_UPSTREAM_ROUND_ROBIN, + NULL, + 0); } if (up == NULL) { - msg_err_task ("no upstreams reachable"); + msg_err_task("no upstreams reachable"); return NULL; } - if (rspamd_redis_expand_object (ctx->redis_object, ctx, task, - &object_expanded) == 0) { - msg_err_task ("expansion for %s failed for symbol %s " - "(maybe learning per user classifier with no user or recipient)", - learn ? "learning" : "classifying", - stcf->symbol); + if (rspamd_redis_expand_object(ctx->redis_object, ctx, task, + &object_expanded) == 0) { + msg_err_task("expansion for %s failed for symbol %s " + "(maybe learning per user classifier with no user or recipient)", + learn ? "learning" : "classifying", + stcf->symbol); return NULL; } - rt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*rt)); + rt = rspamd_mempool_alloc0(task->task_pool, sizeof(*rt)); rt->selected = up; rt->task = task; rt->ctx = ctx; rt->stcf = stcf; rt->redis_object_expanded = object_expanded; - addr = rspamd_upstream_addr_next (up); - g_assert (addr != NULL); + addr = rspamd_upstream_addr_next(up); + g_assert(addr != NULL); - if (rspamd_inet_address_get_af (addr) == AF_UNIX) { - rt->redis = redisAsyncConnectUnix (rspamd_inet_address_to_string (addr)); + if (rspamd_inet_address_get_af(addr) == AF_UNIX) { + rt->redis = redisAsyncConnectUnix(rspamd_inet_address_to_string(addr)); } else { - rt->redis = redisAsyncConnect (rspamd_inet_address_to_string (addr), - rspamd_inet_address_get_port (addr)); + rt->redis = redisAsyncConnect(rspamd_inet_address_to_string(addr), + rspamd_inet_address_get_port(addr)); } if (rt->redis == NULL) { - msg_warn_task ("cannot connect to redis server %s: %s", - rspamd_inet_address_to_string_pretty (addr), - strerror (errno)); + msg_warn_task("cannot connect to redis server %s: %s", + rspamd_inet_address_to_string_pretty(addr), + strerror(errno)); return NULL; } else if (rt->redis->err != REDIS_OK) { - msg_warn_task ("cannot connect to redis server %s: %s", - rspamd_inet_address_to_string_pretty (addr), - rt->redis->errstr); - redisAsyncFree (rt->redis); + msg_warn_task("cannot connect to redis server %s: %s", + rspamd_inet_address_to_string_pretty(addr), + rt->redis->errstr); + redisAsyncFree(rt->redis); rt->redis = NULL; return NULL; } - redisLibevAttach (task->event_loop, rt->redis); - rspamd_redis_maybe_auth (ctx, rt->redis); + redisLibevAttach(task->event_loop, rt->redis); + rspamd_redis_maybe_auth(ctx, rt->redis); rt->redis->data = rt; - redisAsyncSetDisconnectCallback (rt->redis, rspamd_stat_redis_on_disconnect); - redisAsyncSetConnectCallback (rt->redis, rspamd_stat_redis_on_connect); + redisAsyncSetDisconnectCallback(rt->redis, rspamd_stat_redis_on_disconnect); + redisAsyncSetConnectCallback(rt->redis, rspamd_stat_redis_on_connect); - rspamd_mempool_add_destructor (task->task_pool, rspamd_redis_fin, rt); + rspamd_mempool_add_destructor(task->task_pool, rspamd_redis_fin, rt); return rt; } -void -rspamd_redis_close (gpointer p) +void rspamd_redis_close(gpointer p) { - struct redis_stat_ctx *ctx = REDIS_CTX (p); + struct redis_stat_ctx *ctx = REDIS_CTX(p); lua_State *L = ctx->L; if (ctx->conf_ref) { - luaL_unref (L, LUA_REGISTRYINDEX, ctx->conf_ref); + luaL_unref(L, LUA_REGISTRYINDEX, ctx->conf_ref); } - g_free (ctx); + g_free(ctx); } gboolean -rspamd_redis_process_tokens (struct rspamd_task *task, - GPtrArray *tokens, - gint id, gpointer p) +rspamd_redis_process_tokens(struct rspamd_task *task, + GPtrArray *tokens, + gint id, gpointer p) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (p); + struct redis_stat_runtime *rt = REDIS_RUNTIME(p); const gchar *learned_key = "learns"; - if (rspamd_session_blocked (task->s)) { + if (rspamd_session_blocked(task->s)) { return FALSE; } @@ -1796,22 +1803,22 @@ rspamd_redis_process_tokens (struct rspamd_task *task, } } - if (redisAsyncCommand (rt->redis, rspamd_redis_connected, rt, "HGET %s %s", - rt->redis_object_expanded, learned_key) == REDIS_OK) { + if (redisAsyncCommand(rt->redis, rspamd_redis_connected, rt, "HGET %s %s", + rt->redis_object_expanded, learned_key) == REDIS_OK) { - rspamd_session_add_event (task->s, NULL, rt, M); + rspamd_session_add_event(task->s, NULL, rt, M); rt->has_event = TRUE; - rt->tokens = g_ptr_array_ref (tokens); + rt->tokens = g_ptr_array_ref(tokens); - if (ev_can_stop (&rt->timeout_event)) { + if (ev_can_stop(&rt->timeout_event)) { rt->timeout_event.repeat = rt->ctx->timeout; - ev_timer_again (task->event_loop, &rt->timeout_event); + ev_timer_again(task->event_loop, &rt->timeout_event); } else { rt->timeout_event.data = rt; - ev_timer_init (&rt->timeout_event, rspamd_redis_timeout, - rt->ctx->timeout, 0.); - ev_timer_start (task->event_loop, &rt->timeout_event); + ev_timer_init(&rt->timeout_event, rspamd_redis_timeout, + rt->ctx->timeout, 0.); + ev_timer_start(task->event_loop, &rt->timeout_event); } } @@ -1819,30 +1826,30 @@ rspamd_redis_process_tokens (struct rspamd_task *task, } gboolean -rspamd_redis_finalize_process (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_redis_finalize_process(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); if (rt->err) { - msg_info_task ("cannot retrieve stat tokens from Redis: %e", rt->err); - g_error_free (rt->err); + msg_info_task("cannot retrieve stat tokens from Redis: %e", rt->err); + g_error_free(rt->err); rt->err = NULL; - rspamd_redis_fin (rt); + rspamd_redis_fin(rt); return FALSE; } - rspamd_redis_fin (rt); + rspamd_redis_fin(rt); return TRUE; } gboolean -rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, - gint id, gpointer p) +rspamd_redis_learn_tokens(struct rspamd_task *task, GPtrArray *tokens, + gint id, gpointer p) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (p); + struct redis_stat_runtime *rt = REDIS_RUNTIME(p); rspamd_fstring_t *query; const gchar *redis_cmd; rspamd_token_t *tok; @@ -1850,7 +1857,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, goffset off; const gchar *learned_key = "learns"; - if (rspamd_session_blocked (task->s)) { + if (rspamd_session_blocked(task->s)) { return FALSE; } @@ -1866,12 +1873,12 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, /* * Add the current key to the set of learned keys */ - redisAsyncCommand (rt->redis, NULL, NULL, "SADD %s_keys %s", - rt->stcf->symbol, rt->redis_object_expanded); + redisAsyncCommand(rt->redis, NULL, NULL, "SADD %s_keys %s", + rt->stcf->symbol, rt->redis_object_expanded); if (rt->ctx->new_schema) { - redisAsyncCommand (rt->redis, NULL, NULL, "HSET %s version 2", - rt->redis_object_expanded); + redisAsyncCommand(rt->redis, NULL, NULL, "HSET %s version 2", + rt->redis_object_expanded); } if (rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER) { @@ -1882,10 +1889,10 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, } rt->id = id; - query = rspamd_redis_tokens_to_query (task, rt, tokens, - redis_cmd, rt->redis_object_expanded, TRUE, id, - rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER); - g_assert (query != NULL); + query = rspamd_redis_tokens_to_query(task, rt, tokens, + redis_cmd, rt->redis_object_expanded, TRUE, id, + rt->stcf->clcf->flags & RSPAMD_FLAG_CLASSIFIER_INTEGER); + g_assert(query != NULL); query->len = 0; /* @@ -1894,85 +1901,85 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, * we could understand that we are learning or unlearning */ - tok = g_ptr_array_index (task->tokens, 0); + tok = g_ptr_array_index(task->tokens, 0); if (tok->values[id] > 0) { - rspamd_printf_fstring (&query, "" - "*4\r\n" - "$7\r\n" - "HINCRBY\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" /* Learned key */ - "$1\r\n" - "1\r\n", - (gint)strlen (rt->redis_object_expanded), - rt->redis_object_expanded, - (gint)strlen (learned_key), - learned_key); + rspamd_printf_fstring(&query, "" + "*4\r\n" + "$7\r\n" + "HINCRBY\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" /* Learned key */ + "$1\r\n" + "1\r\n", + (gint) strlen(rt->redis_object_expanded), + rt->redis_object_expanded, + (gint) strlen(learned_key), + learned_key); } else { - rspamd_printf_fstring (&query, "" - "*4\r\n" - "$7\r\n" - "HINCRBY\r\n" - "$%d\r\n" - "%s\r\n" - "$%d\r\n" - "%s\r\n" /* Learned key */ - "$2\r\n" - "-1\r\n", - (gint)strlen (rt->redis_object_expanded), - rt->redis_object_expanded, - (gint)strlen (learned_key), - learned_key); - } - - ret = redisAsyncFormattedCommand (rt->redis, NULL, NULL, - query->str, query->len); + rspamd_printf_fstring(&query, "" + "*4\r\n" + "$7\r\n" + "HINCRBY\r\n" + "$%d\r\n" + "%s\r\n" + "$%d\r\n" + "%s\r\n" /* Learned key */ + "$2\r\n" + "-1\r\n", + (gint) strlen(rt->redis_object_expanded), + rt->redis_object_expanded, + (gint) strlen(learned_key), + learned_key); + } + + ret = redisAsyncFormattedCommand(rt->redis, NULL, NULL, + query->str, query->len); if (ret != REDIS_OK) { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); - rspamd_fstring_free (query); + msg_err_task("call to redis failed: %s", rt->redis->errstr); + rspamd_fstring_free(query); return FALSE; } off = query->len; - ret = rspamd_printf_fstring (&query, "*1\r\n$4\r\nEXEC\r\n"); - ret = redisAsyncFormattedCommand (rt->redis, rspamd_redis_learned, rt, - query->str + off, ret); - rspamd_mempool_add_destructor (task->task_pool, - (rspamd_mempool_destruct_t)rspamd_fstring_free, query); + ret = rspamd_printf_fstring(&query, "*1\r\n$4\r\nEXEC\r\n"); + ret = redisAsyncFormattedCommand(rt->redis, rspamd_redis_learned, rt, + query->str + off, ret); + rspamd_mempool_add_destructor(task->task_pool, + (rspamd_mempool_destruct_t) rspamd_fstring_free, query); if (ret == REDIS_OK) { /* Add signature if needed */ if (rt->ctx->enable_signatures) { - rspamd_redis_store_stat_signature (task, rt, tokens, - "RSIG"); + rspamd_redis_store_stat_signature(task, rt, tokens, + "RSIG"); } - rspamd_session_add_event (task->s, NULL, rt, M); + rspamd_session_add_event(task->s, NULL, rt, M); rt->has_event = TRUE; /* Set timeout */ - if (ev_can_stop (&rt->timeout_event)) { + if (ev_can_stop(&rt->timeout_event)) { rt->timeout_event.repeat = rt->ctx->timeout; - ev_timer_again (task->event_loop, &rt->timeout_event); + ev_timer_again(task->event_loop, &rt->timeout_event); } else { rt->timeout_event.data = rt; - ev_timer_init (&rt->timeout_event, rspamd_redis_timeout, - rt->ctx->timeout, 0.); - ev_timer_start (task->event_loop, &rt->timeout_event); + ev_timer_init(&rt->timeout_event, rspamd_redis_timeout, + rt->ctx->timeout, 0.); + ev_timer_start(task->event_loop, &rt->timeout_event); } return TRUE; } else { - msg_err_task ("call to redis failed: %s", rt->redis->errstr); + msg_err_task("call to redis failed: %s", rt->redis->errstr); } return FALSE; @@ -1980,67 +1987,67 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, gboolean -rspamd_redis_finalize_learn (struct rspamd_task *task, gpointer runtime, - gpointer ctx, GError **err) +rspamd_redis_finalize_learn(struct rspamd_task *task, gpointer runtime, + gpointer ctx, GError **err) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); if (rt->err) { - g_propagate_error (err, rt->err); + g_propagate_error(err, rt->err); rt->err = NULL; - rspamd_redis_fin (rt); + rspamd_redis_fin(rt); return FALSE; } - rspamd_redis_fin (rt); + rspamd_redis_fin(rt); return TRUE; } gulong -rspamd_redis_total_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_redis_total_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); return rt->learned; } gulong -rspamd_redis_inc_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_redis_inc_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); /* XXX: may cause races */ return rt->learned + 1; } gulong -rspamd_redis_dec_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_redis_dec_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); /* XXX: may cause races */ return rt->learned + 1; } gulong -rspamd_redis_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_redis_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); return rt->learned; } ucl_object_t * -rspamd_redis_get_stat (gpointer runtime, - gpointer ctx) +rspamd_redis_get_stat(gpointer runtime, + gpointer ctx) { - struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); + struct redis_stat_runtime *rt = REDIS_RUNTIME(runtime); struct rspamd_redis_stat_elt *st; redisAsyncContext *redis; @@ -2050,11 +2057,11 @@ rspamd_redis_get_stat (gpointer runtime, if (rt->redis) { redis = rt->redis; rt->redis = NULL; - redisAsyncFree (redis); + redisAsyncFree(redis); } if (st->stat) { - return ucl_object_ref (st->stat); + return ucl_object_ref(st->stat); } } @@ -2062,8 +2069,8 @@ rspamd_redis_get_stat (gpointer runtime, } gpointer -rspamd_redis_load_tokenizer_config (gpointer runtime, - gsize *len) +rspamd_redis_load_tokenizer_config(gpointer runtime, + gsize *len) { return NULL; } diff --git a/src/libstat/backends/sqlite3_backend.c b/src/libstat/backends/sqlite3_backend.c index 45e51fa7b..2fd34d83f 100644 --- a/src/libstat/backends/sqlite3_backend.c +++ b/src/libstat/backends/sqlite3_backend.c @@ -48,33 +48,33 @@ struct rspamd_stat_sqlite3_rt { }; static const char *create_tables_sql = - "BEGIN IMMEDIATE;" - "CREATE TABLE tokenizer(data BLOB);" - "CREATE TABLE users(" - "id INTEGER PRIMARY KEY," - "name TEXT," - "learns INTEGER" - ");" - "CREATE TABLE languages(" - "id INTEGER PRIMARY KEY," - "name TEXT," - "learns INTEGER" - ");" - "CREATE TABLE tokens(" - "token INTEGER NOT NULL," - "user INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE," - "language INTEGER NOT NULL REFERENCES languages(id) ON DELETE CASCADE," - "value INTEGER," - "modified INTEGER," - "CONSTRAINT tid UNIQUE (token, user, language) ON CONFLICT REPLACE" - ");" - "CREATE UNIQUE INDEX IF NOT EXISTS un ON users(name);" - "CREATE INDEX IF NOT EXISTS tok ON tokens(token);" - "CREATE UNIQUE INDEX IF NOT EXISTS ln ON languages(name);" - "PRAGMA user_version=" SQLITE3_SCHEMA_VERSION ";" - "INSERT INTO users(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);" - "INSERT INTO languages(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);" - "COMMIT;"; + "BEGIN IMMEDIATE;" + "CREATE TABLE tokenizer(data BLOB);" + "CREATE TABLE users(" + "id INTEGER PRIMARY KEY," + "name TEXT," + "learns INTEGER" + ");" + "CREATE TABLE languages(" + "id INTEGER PRIMARY KEY," + "name TEXT," + "learns INTEGER" + ");" + "CREATE TABLE tokens(" + "token INTEGER NOT NULL," + "user INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE," + "language INTEGER NOT NULL REFERENCES languages(id) ON DELETE CASCADE," + "value INTEGER," + "modified INTEGER," + "CONSTRAINT tid UNIQUE (token, user, language) ON CONFLICT REPLACE" + ");" + "CREATE UNIQUE INDEX IF NOT EXISTS un ON users(name);" + "CREATE INDEX IF NOT EXISTS tok ON tokens(token);" + "CREATE UNIQUE INDEX IF NOT EXISTS ln ON languages(name);" + "PRAGMA user_version=" SQLITE3_SCHEMA_VERSION ";" + "INSERT INTO users(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);" + "INSERT INTO languages(id, name, learns) VALUES(0, '" SQLITE3_DEFAULT "',0);" + "COMMIT;"; enum rspamd_stat_sqlite3_stmt_idx { RSPAMD_STAT_BACKEND_TRANSACTION_START_IM = 0, @@ -103,221 +103,62 @@ enum rspamd_stat_sqlite3_stmt_idx { }; static struct rspamd_sqlite3_prstmt prepared_stmts[RSPAMD_STAT_BACKEND_MAX] = -{ - [RSPAMD_STAT_BACKEND_TRANSACTION_START_IM] = { - .idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_IM, - .sql = "BEGIN IMMEDIATE TRANSACTION;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .flags = 0, - .ret = "", - }, - [RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF] = { - .idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF, - .sql = "BEGIN DEFERRED TRANSACTION;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL] = { - .idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL, - .sql = "BEGIN EXCLUSIVE TRANSACTION;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT] = { - .idx = RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT, - .sql = "COMMIT;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK] = { - .idx = RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK, - .sql = "ROLLBACK;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_GET_TOKEN_FULL] = { - .idx = RSPAMD_STAT_BACKEND_GET_TOKEN_FULL, - .sql = "SELECT value FROM tokens " - "LEFT JOIN languages ON tokens.language=languages.id " - "LEFT JOIN users ON tokens.user=users.id " - "WHERE token=?1 AND (users.id=?2) " - "AND (languages.id=?3 OR languages.id=0);", - .stmt = NULL, - .args = "III", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE] = { - .idx = RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE, - .sql = "SELECT value FROM tokens WHERE token=?1", + { + [RSPAMD_STAT_BACKEND_TRANSACTION_START_IM] = { + .idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_IM, + .sql = "BEGIN IMMEDIATE TRANSACTION;", + .args = "", .stmt = NULL, - .args = "I", - .result = SQLITE_ROW, + .result = SQLITE_DONE, .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_SET_TOKEN] = { - .idx = RSPAMD_STAT_BACKEND_SET_TOKEN, - .sql = "INSERT OR REPLACE INTO tokens (token, user, language, value, modified) " - "VALUES (?1, ?2, ?3, ?4, strftime('%s','now'))", - .stmt = NULL, - .args = "IIII", - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_INC_LEARNS_LANG] = { - .idx = RSPAMD_STAT_BACKEND_INC_LEARNS_LANG, - .sql = "UPDATE languages SET learns=learns + 1 WHERE id=?1", - .stmt = NULL, - .args = "I", - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_INC_LEARNS_USER] = { - .idx = RSPAMD_STAT_BACKEND_INC_LEARNS_USER, - .sql = "UPDATE users SET learns=learns + 1 WHERE id=?1", - .stmt = NULL, - .args = "I", - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG] = { - .idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG, - .sql = "UPDATE languages SET learns=MAX(0, learns - 1) WHERE id=?1", - .stmt = NULL, - .args = "I", - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_DEC_LEARNS_USER] = { - .idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_USER, - .sql = "UPDATE users SET learns=MAX(0, learns - 1) WHERE id=?1", - .stmt = NULL, - .args = "I", - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_GET_LEARNS] = { - .idx = RSPAMD_STAT_BACKEND_GET_LEARNS, - .sql = "SELECT SUM(MAX(0, learns)) FROM languages", - .stmt = NULL, - .args = "", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_GET_LANGUAGE] = { - .idx = RSPAMD_STAT_BACKEND_GET_LANGUAGE, - .sql = "SELECT id FROM languages WHERE name=?1", - .stmt = NULL, - .args = "T", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_GET_USER] = { - .idx = RSPAMD_STAT_BACKEND_GET_USER, - .sql = "SELECT id FROM users WHERE name=?1", - .stmt = NULL, - .args = "T", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_INSERT_USER] = { - .idx = RSPAMD_STAT_BACKEND_INSERT_USER, - .sql = "INSERT INTO users (name, learns) VALUES (?1, 0)", - .stmt = NULL, - .args = "T", - .result = SQLITE_DONE, - .flags = 0, - .ret = "L" - }, - [RSPAMD_STAT_BACKEND_INSERT_LANGUAGE] = { - .idx = RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, - .sql = "INSERT INTO languages (name, learns) VALUES (?1, 0)", - .stmt = NULL, - .args = "T", - .result = SQLITE_DONE, - .flags = 0, - .ret = "L" - }, - [RSPAMD_STAT_BACKEND_SAVE_TOKENIZER] = { - .idx = RSPAMD_STAT_BACKEND_SAVE_TOKENIZER, - .sql = "INSERT INTO tokenizer(data) VALUES (?1)", - .stmt = NULL, - .args = "B", - .result = SQLITE_DONE, - .flags = 0, - .ret = "" - }, - [RSPAMD_STAT_BACKEND_LOAD_TOKENIZER] = { - .idx = RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, - .sql = "SELECT data FROM tokenizer", - .stmt = NULL, - .args = "", - .result = SQLITE_ROW, - .flags = 0, - .ret = "B" - }, - [RSPAMD_STAT_BACKEND_NTOKENS] = { - .idx = RSPAMD_STAT_BACKEND_NTOKENS, - .sql = "SELECT COUNT(*) FROM tokens", - .stmt = NULL, - .args = "", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_NLANGUAGES] = { - .idx = RSPAMD_STAT_BACKEND_NLANGUAGES, - .sql = "SELECT COUNT(*) FROM languages", - .stmt = NULL, - .args = "", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - }, - [RSPAMD_STAT_BACKEND_NUSERS] = { - .idx = RSPAMD_STAT_BACKEND_NUSERS, - .sql = "SELECT COUNT(*) FROM users", - .stmt = NULL, - .args = "", - .result = SQLITE_ROW, - .flags = 0, - .ret = "I" - } -}; + .ret = "", + }, + [RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF, .sql = "BEGIN DEFERRED TRANSACTION;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL, .sql = "BEGIN EXCLUSIVE TRANSACTION;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT, .sql = "COMMIT;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK] = {.idx = RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK, .sql = "ROLLBACK;", .args = "", .stmt = NULL, .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_GET_TOKEN_FULL] = {.idx = RSPAMD_STAT_BACKEND_GET_TOKEN_FULL, .sql = "SELECT value FROM tokens " + "LEFT JOIN languages ON tokens.language=languages.id " + "LEFT JOIN users ON tokens.user=users.id " + "WHERE token=?1 AND (users.id=?2) " + "AND (languages.id=?3 OR languages.id=0);", + .stmt = NULL, + .args = "III", + .result = SQLITE_ROW, + .flags = 0, + .ret = "I"}, + [RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE] = {.idx = RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE, .sql = "SELECT value FROM tokens WHERE token=?1", .stmt = NULL, .args = "I", .result = SQLITE_ROW, .flags = 0, .ret = "I"}, + [RSPAMD_STAT_BACKEND_SET_TOKEN] = {.idx = RSPAMD_STAT_BACKEND_SET_TOKEN, .sql = "INSERT OR REPLACE INTO tokens (token, user, language, value, modified) " + "VALUES (?1, ?2, ?3, ?4, strftime('%s','now'))", + .stmt = NULL, + .args = "IIII", + .result = SQLITE_DONE, + .flags = 0, + .ret = ""}, + [RSPAMD_STAT_BACKEND_INC_LEARNS_LANG] = {.idx = RSPAMD_STAT_BACKEND_INC_LEARNS_LANG, .sql = "UPDATE languages SET learns=learns + 1 WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_INC_LEARNS_USER] = {.idx = RSPAMD_STAT_BACKEND_INC_LEARNS_USER, .sql = "UPDATE users SET learns=learns + 1 WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG] = {.idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG, .sql = "UPDATE languages SET learns=MAX(0, learns - 1) WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_DEC_LEARNS_USER] = {.idx = RSPAMD_STAT_BACKEND_DEC_LEARNS_USER, .sql = "UPDATE users SET learns=MAX(0, learns - 1) WHERE id=?1", .stmt = NULL, .args = "I", .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_GET_LEARNS] = {.idx = RSPAMD_STAT_BACKEND_GET_LEARNS, .sql = "SELECT SUM(MAX(0, learns)) FROM languages", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"}, + [RSPAMD_STAT_BACKEND_GET_LANGUAGE] = {.idx = RSPAMD_STAT_BACKEND_GET_LANGUAGE, .sql = "SELECT id FROM languages WHERE name=?1", .stmt = NULL, .args = "T", .result = SQLITE_ROW, .flags = 0, .ret = "I"}, + [RSPAMD_STAT_BACKEND_GET_USER] = {.idx = RSPAMD_STAT_BACKEND_GET_USER, .sql = "SELECT id FROM users WHERE name=?1", .stmt = NULL, .args = "T", .result = SQLITE_ROW, .flags = 0, .ret = "I"}, + [RSPAMD_STAT_BACKEND_INSERT_USER] = {.idx = RSPAMD_STAT_BACKEND_INSERT_USER, .sql = "INSERT INTO users (name, learns) VALUES (?1, 0)", .stmt = NULL, .args = "T", .result = SQLITE_DONE, .flags = 0, .ret = "L"}, + [RSPAMD_STAT_BACKEND_INSERT_LANGUAGE] = {.idx = RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, .sql = "INSERT INTO languages (name, learns) VALUES (?1, 0)", .stmt = NULL, .args = "T", .result = SQLITE_DONE, .flags = 0, .ret = "L"}, + [RSPAMD_STAT_BACKEND_SAVE_TOKENIZER] = {.idx = RSPAMD_STAT_BACKEND_SAVE_TOKENIZER, .sql = "INSERT INTO tokenizer(data) VALUES (?1)", .stmt = NULL, .args = "B", .result = SQLITE_DONE, .flags = 0, .ret = ""}, + [RSPAMD_STAT_BACKEND_LOAD_TOKENIZER] = {.idx = RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, .sql = "SELECT data FROM tokenizer", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "B"}, + [RSPAMD_STAT_BACKEND_NTOKENS] = {.idx = RSPAMD_STAT_BACKEND_NTOKENS, .sql = "SELECT COUNT(*) FROM tokens", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"}, + [RSPAMD_STAT_BACKEND_NLANGUAGES] = {.idx = RSPAMD_STAT_BACKEND_NLANGUAGES, .sql = "SELECT COUNT(*) FROM languages", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"}, + [RSPAMD_STAT_BACKEND_NUSERS] = {.idx = RSPAMD_STAT_BACKEND_NUSERS, .sql = "SELECT COUNT(*) FROM users", .stmt = NULL, .args = "", .result = SQLITE_ROW, .flags = 0, .ret = "I"}}; static GQuark -rspamd_sqlite3_backend_quark (void) +rspamd_sqlite3_backend_quark(void) { - return g_quark_from_static_string ("sqlite3-stat-backend"); + return g_quark_from_static_string("sqlite3-stat-backend"); } static gint64 -rspamd_sqlite3_get_user (struct rspamd_stat_sqlite3_db *db, - struct rspamd_task *task, gboolean learn) +rspamd_sqlite3_get_user(struct rspamd_stat_sqlite3_db *db, + struct rspamd_task *task, gboolean learn) { gint64 id = 0; /* Default user is 0 */ gint rc, err_idx; @@ -326,48 +167,48 @@ rspamd_sqlite3_get_user (struct rspamd_stat_sqlite3_db *db, lua_State *L = db->L; if (db->cbref_user == -1) { - user = rspamd_task_get_principal_recipient (task); + user = rspamd_task_get_principal_recipient(task); } else { /* Execute lua function to get userdata */ - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); - lua_rawgeti (L, LUA_REGISTRYINDEX, db->cbref_user); - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + lua_rawgeti(L, LUA_REGISTRYINDEX, db->cbref_user); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); - if (lua_pcall (L, 1, 1, err_idx) != 0) { - msg_err_task ("call to user extraction script failed: %s", - lua_tostring (L, -1)); + if (lua_pcall(L, 1, 1, err_idx) != 0) { + msg_err_task("call to user extraction script failed: %s", + lua_tostring(L, -1)); } else { - user = rspamd_mempool_strdup (task->task_pool, lua_tostring (L, -1)); + user = rspamd_mempool_strdup(task->task_pool, lua_tostring(L, -1)); } /* Result + error function */ - lua_settop (L, err_idx - 1); + lua_settop(L, err_idx - 1); } if (user != NULL) { - rspamd_mempool_set_variable (task->task_pool, "stat_user", - (gpointer)user, NULL); + rspamd_mempool_set_variable(task->task_pool, "stat_user", + (gpointer) user, NULL); - rc = rspamd_sqlite3_run_prstmt (task->task_pool, db->sqlite, db->prstmt, - RSPAMD_STAT_BACKEND_GET_USER, user, &id); + rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt, + RSPAMD_STAT_BACKEND_GET_USER, user, &id); if (rc != SQLITE_OK && learn) { /* We need to insert a new user */ if (!db->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, db->sqlite, db->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_START_IM); db->in_transaction = TRUE; } - rc = rspamd_sqlite3_run_prstmt (task->task_pool, db->sqlite, db->prstmt, - RSPAMD_STAT_BACKEND_INSERT_USER, user, &id); + rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt, + RSPAMD_STAT_BACKEND_INSERT_USER, user, &id); } } @@ -375,8 +216,8 @@ rspamd_sqlite3_get_user (struct rspamd_stat_sqlite3_db *db, } static gint64 -rspamd_sqlite3_get_language (struct rspamd_stat_sqlite3_db *db, - struct rspamd_task *task, gboolean learn) +rspamd_sqlite3_get_language(struct rspamd_stat_sqlite3_db *db, + struct rspamd_task *task, gboolean learn) { gint64 id = 0; /* Default language is 0 */ gint rc, err_idx; @@ -387,10 +228,11 @@ rspamd_sqlite3_get_language (struct rspamd_stat_sqlite3_db *db, lua_State *L = db->L; if (db->cbref_language == -1) { - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp) + { if (tp->language != NULL && tp->language[0] != '\0' && - strcmp (tp->language, "en") != 0) { + strcmp(tp->language, "en") != 0) { language = tp->language; break; } @@ -398,43 +240,43 @@ rspamd_sqlite3_get_language (struct rspamd_stat_sqlite3_db *db, } else { /* Execute lua function to get userdata */ - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); - lua_rawgeti (L, LUA_REGISTRYINDEX, db->cbref_language); - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + lua_rawgeti(L, LUA_REGISTRYINDEX, db->cbref_language); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); - if (lua_pcall (L, 1, 1, err_idx) != 0) { - msg_err_task ("call to language extraction script failed: %s", - lua_tostring (L, -1)); + if (lua_pcall(L, 1, 1, err_idx) != 0) { + msg_err_task("call to language extraction script failed: %s", + lua_tostring(L, -1)); } else { - language = rspamd_mempool_strdup (task->task_pool, - lua_tostring (L, -1)); + language = rspamd_mempool_strdup(task->task_pool, + lua_tostring(L, -1)); } /* Result + error function */ - lua_settop (L, err_idx - 1); + lua_settop(L, err_idx - 1); } /* XXX: We ignore multiple languages but default + extra */ if (language != NULL) { - rc = rspamd_sqlite3_run_prstmt (task->task_pool, db->sqlite, db->prstmt, - RSPAMD_STAT_BACKEND_GET_LANGUAGE, language, &id); + rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt, + RSPAMD_STAT_BACKEND_GET_LANGUAGE, language, &id); if (rc != SQLITE_OK && learn) { /* We need to insert a new language */ if (!db->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, db->sqlite, db->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_START_IM); db->in_transaction = TRUE; } - rc = rspamd_sqlite3_run_prstmt (task->task_pool, db->sqlite, db->prstmt, - RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, language, &id); + rc = rspamd_sqlite3_run_prstmt(task->task_pool, db->sqlite, db->prstmt, + RSPAMD_STAT_BACKEND_INSERT_LANGUAGE, language, &id); } } @@ -442,10 +284,10 @@ rspamd_sqlite3_get_language (struct rspamd_stat_sqlite3_db *db, } static struct rspamd_stat_sqlite3_db * -rspamd_sqlite3_opendb (rspamd_mempool_t *pool, - struct rspamd_statfile_config *stcf, - const gchar *path, const ucl_object_t *opts, - gboolean create, GError **err) +rspamd_sqlite3_opendb(rspamd_mempool_t *pool, + struct rspamd_statfile_config *stcf, + const gchar *path, const ucl_object_t *opts, + gboolean create, GError **err) { struct rspamd_stat_sqlite3_db *bk; struct rspamd_stat_tokenizer *tokenizer; @@ -456,80 +298,79 @@ rspamd_sqlite3_opendb (rspamd_mempool_t *pool, gint ret, ntries = 0; const gint max_tries = 100; struct timespec sleep_ts = { - .tv_sec = 0, - .tv_nsec = 1000000 - }; + .tv_sec = 0, + .tv_nsec = 1000000}; - bk = g_malloc0 (sizeof (*bk)); - bk->sqlite = rspamd_sqlite3_open_or_create (pool, path, create_tables_sql, - 0, err); + bk = g_malloc0(sizeof(*bk)); + bk->sqlite = rspamd_sqlite3_open_or_create(pool, path, create_tables_sql, + 0, err); bk->pool = pool; if (bk->sqlite == NULL) { - g_free (bk); + g_free(bk); return NULL; } - bk->fname = g_strdup (path); + bk->fname = g_strdup(path); - bk->prstmt = rspamd_sqlite3_init_prstmt (bk->sqlite, prepared_stmts, - RSPAMD_STAT_BACKEND_MAX, err); + bk->prstmt = rspamd_sqlite3_init_prstmt(bk->sqlite, prepared_stmts, + RSPAMD_STAT_BACKEND_MAX, err); if (bk->prstmt == NULL) { - sqlite3_close (bk->sqlite); - g_free (bk); + sqlite3_close(bk->sqlite); + g_free(bk); return NULL; } /* Check tokenizer configuration */ - if (rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz64, &tk_conf) != SQLITE_OK || - sz64 == 0) { + if (rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz64, &tk_conf) != SQLITE_OK || + sz64 == 0) { - while ((ret = rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL)) == SQLITE_BUSY && + while ((ret = rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_START_EXCL)) == SQLITE_BUSY && ++ntries <= max_tries) { - nanosleep (&sleep_ts, NULL); + nanosleep(&sleep_ts, NULL); } - msg_info_pool ("absent tokenizer conf in %s, creating a new one", - bk->fname); - g_assert (stcf->clcf->tokenizer != NULL); - tokenizer = rspamd_stat_get_tokenizer (stcf->clcf->tokenizer->name); - g_assert (tokenizer != NULL); - tk_conf = tokenizer->get_config (pool, stcf->clcf->tokenizer, &sz); + msg_info_pool("absent tokenizer conf in %s, creating a new one", + bk->fname); + g_assert(stcf->clcf->tokenizer != NULL); + tokenizer = rspamd_stat_get_tokenizer(stcf->clcf->tokenizer->name); + g_assert(tokenizer != NULL); + tk_conf = tokenizer->get_config(pool, stcf->clcf->tokenizer, &sz); /* Encode to base32 */ - tok_conf_encoded = rspamd_encode_base32 (tk_conf, sz, RSPAMD_BASE32_DEFAULT); + tok_conf_encoded = rspamd_encode_base32(tk_conf, sz, RSPAMD_BASE32_DEFAULT); - if (rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_SAVE_TOKENIZER, - (gint64)strlen (tok_conf_encoded), - tok_conf_encoded) != SQLITE_OK) { - sqlite3_close (bk->sqlite); - g_free (bk); - g_free (tok_conf_encoded); + if (rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_SAVE_TOKENIZER, + (gint64) strlen(tok_conf_encoded), + tok_conf_encoded) != SQLITE_OK) { + sqlite3_close(bk->sqlite); + g_free(bk); + g_free(tok_conf_encoded); return NULL; } - rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); - g_free (tok_conf_encoded); + rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); + g_free(tok_conf_encoded); } else { - g_free (tk_conf); + g_free(tk_conf); } return bk; } gpointer -rspamd_sqlite3_init (struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - struct rspamd_statfile *st) +rspamd_sqlite3_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st) { struct rspamd_classifier_config *clf = st->classifier->cfg; struct rspamd_statfile_config *stf = st->stcf; @@ -538,51 +379,52 @@ rspamd_sqlite3_init (struct rspamd_stat_ctx *ctx, struct rspamd_stat_sqlite3_db *bk; GError *err = NULL; - filenameo = ucl_object_lookup (stf->opts, "filename"); - if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { - filenameo = ucl_object_lookup (stf->opts, "path"); - if (filenameo == NULL || ucl_object_type (filenameo) != UCL_STRING) { - msg_err_config ("statfile %s has no filename defined", stf->symbol); + filenameo = ucl_object_lookup(stf->opts, "filename"); + if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) { + filenameo = ucl_object_lookup(stf->opts, "path"); + if (filenameo == NULL || ucl_object_type(filenameo) != UCL_STRING) { + msg_err_config("statfile %s has no filename defined", stf->symbol); return NULL; } } - filename = ucl_object_tostring (filenameo); + filename = ucl_object_tostring(filenameo); - if ((bk = rspamd_sqlite3_opendb (cfg->cfg_pool, stf, filename, - stf->opts, TRUE, &err)) == NULL) { - msg_err_config ("cannot open sqlite3 db %s: %e", filename, err); - g_error_free (err); + if ((bk = rspamd_sqlite3_opendb(cfg->cfg_pool, stf, filename, + stf->opts, TRUE, &err)) == NULL) { + msg_err_config("cannot open sqlite3 db %s: %e", filename, err); + g_error_free(err); return NULL; } bk->L = cfg->lua_state; - users_enabled = ucl_object_lookup_any (clf->opts, "per_user", - "users_enabled", NULL); + users_enabled = ucl_object_lookup_any(clf->opts, "per_user", + "users_enabled", NULL); if (users_enabled != NULL) { - if (ucl_object_type (users_enabled) == UCL_BOOLEAN) { - bk->enable_users = ucl_object_toboolean (users_enabled); + if (ucl_object_type(users_enabled) == UCL_BOOLEAN) { + bk->enable_users = ucl_object_toboolean(users_enabled); bk->cbref_user = -1; } - else if (ucl_object_type (users_enabled) == UCL_STRING) { - lua_script = ucl_object_tostring (users_enabled); + else if (ucl_object_type(users_enabled) == UCL_STRING) { + lua_script = ucl_object_tostring(users_enabled); - if (luaL_dostring (cfg->lua_state, lua_script) != 0) { - msg_err_config ("cannot execute lua script for users " - "extraction: %s", lua_tostring (cfg->lua_state, -1)); + if (luaL_dostring(cfg->lua_state, lua_script) != 0) { + msg_err_config("cannot execute lua script for users " + "extraction: %s", + lua_tostring(cfg->lua_state, -1)); } else { - if (lua_type (cfg->lua_state, -1) == LUA_TFUNCTION) { + if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) { bk->enable_users = TRUE; - bk->cbref_user = luaL_ref (cfg->lua_state, - LUA_REGISTRYINDEX); + bk->cbref_user = luaL_ref(cfg->lua_state, + LUA_REGISTRYINDEX); } else { - msg_err_config ("lua script must return " - "function(task) and not %s", - lua_typename (cfg->lua_state, lua_type ( - cfg->lua_state, -1))); + msg_err_config("lua script must return " + "function(task) and not %s", + lua_typename(cfg->lua_state, lua_type( + cfg->lua_state, -1))); } } } @@ -591,34 +433,34 @@ rspamd_sqlite3_init (struct rspamd_stat_ctx *ctx, bk->enable_users = FALSE; } - lang_enabled = ucl_object_lookup_any (clf->opts, - "per_language", "languages_enabled", NULL); + lang_enabled = ucl_object_lookup_any(clf->opts, + "per_language", "languages_enabled", NULL); if (lang_enabled != NULL) { - if (ucl_object_type (lang_enabled) == UCL_BOOLEAN) { - bk->enable_languages = ucl_object_toboolean (lang_enabled); + if (ucl_object_type(lang_enabled) == UCL_BOOLEAN) { + bk->enable_languages = ucl_object_toboolean(lang_enabled); bk->cbref_language = -1; } - else if (ucl_object_type (lang_enabled) == UCL_STRING) { - lua_script = ucl_object_tostring (lang_enabled); - - if (luaL_dostring (cfg->lua_state, lua_script) != 0) { - msg_err_config ( - "cannot execute lua script for languages " - "extraction: %s", - lua_tostring (cfg->lua_state, -1)); + else if (ucl_object_type(lang_enabled) == UCL_STRING) { + lua_script = ucl_object_tostring(lang_enabled); + + if (luaL_dostring(cfg->lua_state, lua_script) != 0) { + msg_err_config( + "cannot execute lua script for languages " + "extraction: %s", + lua_tostring(cfg->lua_state, -1)); } else { - if (lua_type (cfg->lua_state, -1) == LUA_TFUNCTION) { + if (lua_type(cfg->lua_state, -1) == LUA_TFUNCTION) { bk->enable_languages = TRUE; - bk->cbref_language = luaL_ref (cfg->lua_state, - LUA_REGISTRYINDEX); + bk->cbref_language = luaL_ref(cfg->lua_state, + LUA_REGISTRYINDEX); } else { - msg_err_config ("lua script must return " - "function(task) and not %s", - lua_typename (cfg->lua_state, - lua_type (cfg->lua_state, -1))); + msg_err_config("lua script must return " + "function(task) and not %s", + lua_typename(cfg->lua_state, + lua_type(cfg->lua_state, -1))); } } } @@ -628,46 +470,45 @@ rspamd_sqlite3_init (struct rspamd_stat_ctx *ctx, } if (bk->enable_languages) { - msg_info_config ("enable per language statistics for %s", - stf->symbol); + msg_info_config("enable per language statistics for %s", + stf->symbol); } if (bk->enable_users) { - msg_info_config ("enable per users statistics for %s", - stf->symbol); + msg_info_config("enable per users statistics for %s", + stf->symbol); } return (gpointer) bk; } -void -rspamd_sqlite3_close (gpointer p) +void rspamd_sqlite3_close(gpointer p) { struct rspamd_stat_sqlite3_db *bk = p; if (bk->sqlite) { if (bk->in_transaction) { - rspamd_sqlite3_run_prstmt (bk->pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(bk->pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); } - rspamd_sqlite3_close_prstmt (bk->sqlite, bk->prstmt); - sqlite3_close (bk->sqlite); - g_free (bk->fname); - g_free (bk); + rspamd_sqlite3_close_prstmt(bk->sqlite, bk->prstmt); + sqlite3_close(bk->sqlite); + g_free(bk->fname); + g_free(bk); } } gpointer -rspamd_sqlite3_runtime (struct rspamd_task *task, - struct rspamd_statfile_config *stcf, gboolean learn, gpointer p, gint _id) +rspamd_sqlite3_runtime(struct rspamd_task *task, + struct rspamd_statfile_config *stcf, gboolean learn, gpointer p, gint _id) { struct rspamd_stat_sqlite3_rt *rt = NULL; struct rspamd_stat_sqlite3_db *bk = p; if (bk) { - rt = rspamd_mempool_alloc (task->task_pool, sizeof (*rt)); + rt = rspamd_mempool_alloc(task->task_pool, sizeof(*rt)); rt->db = bk; rt->task = task; rt->user_id = -1; @@ -679,9 +520,9 @@ rspamd_sqlite3_runtime (struct rspamd_task *task, } gboolean -rspamd_sqlite3_process_tokens (struct rspamd_task *task, - GPtrArray *tokens, - gint id, gpointer p) +rspamd_sqlite3_process_tokens(struct rspamd_task *task, + GPtrArray *tokens, + gint id, gpointer p) { struct rspamd_stat_sqlite3_db *bk; struct rspamd_stat_sqlite3_rt *rt = p; @@ -689,13 +530,13 @@ rspamd_sqlite3_process_tokens (struct rspamd_task *task, guint i; rspamd_token_t *tok; - g_assert (p != NULL); - g_assert (tokens != NULL); + g_assert(p != NULL); + g_assert(tokens != NULL); bk = rt->db; - for (i = 0; i < tokens->len; i ++) { - tok = g_ptr_array_index (tokens, i); + for (i = 0; i < tokens->len; i++) { + tok = g_ptr_array_index(tokens, i); if (bk == NULL) { /* Statfile is does not exist, so all values are zero */ @@ -704,14 +545,14 @@ rspamd_sqlite3_process_tokens (struct rspamd_task *task, } if (!bk->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_START_DEF); bk->in_transaction = TRUE; } if (rt->user_id == -1) { if (bk->enable_users) { - rt->user_id = rspamd_sqlite3_get_user (bk, task, FALSE); + rt->user_id = rspamd_sqlite3_get_user(bk, task, FALSE); } else { rt->user_id = 0; @@ -720,7 +561,7 @@ rspamd_sqlite3_process_tokens (struct rspamd_task *task, if (rt->lang_id == -1) { if (bk->enable_languages) { - rt->lang_id = rspamd_sqlite3_get_language (bk, task, FALSE); + rt->lang_id = rspamd_sqlite3_get_language(bk, task, FALSE); } else { rt->lang_id = 0; @@ -728,9 +569,9 @@ rspamd_sqlite3_process_tokens (struct rspamd_task *task, } if (bk->enable_languages || bk->enable_users) { - if (rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_TOKEN_FULL, - tok->data, rt->user_id, rt->lang_id, &iv) == SQLITE_OK) { + if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_TOKEN_FULL, + tok->data, rt->user_id, rt->lang_id, &iv) == SQLITE_OK) { tok->values[id] = iv; } else { @@ -738,9 +579,9 @@ rspamd_sqlite3_process_tokens (struct rspamd_task *task, } } else { - if (rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE, - tok->data, &iv) == SQLITE_OK) { + if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_TOKEN_SIMPLE, + tok->data, &iv) == SQLITE_OK) { tok->values[id] = iv; } else { @@ -761,18 +602,18 @@ rspamd_sqlite3_process_tokens (struct rspamd_task *task, } gboolean -rspamd_sqlite3_finalize_process (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_sqlite3_finalize_process(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; if (bk->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); bk->in_transaction = FALSE; } @@ -783,8 +624,8 @@ rspamd_sqlite3_finalize_process (struct rspamd_task *task, gpointer runtime, } gboolean -rspamd_sqlite3_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, - gint id, gpointer p) +rspamd_sqlite3_learn_tokens(struct rspamd_task *task, GPtrArray *tokens, + gint id, gpointer p) { struct rspamd_stat_sqlite3_db *bk; struct rspamd_stat_sqlite3_rt *rt = p; @@ -792,27 +633,27 @@ rspamd_sqlite3_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, guint i; rspamd_token_t *tok; - g_assert (tokens != NULL); - g_assert (p != NULL); + g_assert(tokens != NULL); + g_assert(p != NULL); bk = rt->db; for (i = 0; i < tokens->len; i++) { - tok = g_ptr_array_index (tokens, i); + tok = g_ptr_array_index(tokens, i); if (bk == NULL) { /* Statfile is does not exist, so all values are zero */ return FALSE; } if (!bk->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_START_IM); bk->in_transaction = TRUE; } if (rt->user_id == -1) { if (bk->enable_users) { - rt->user_id = rspamd_sqlite3_get_user (bk, task, TRUE); + rt->user_id = rspamd_sqlite3_get_user(bk, task, TRUE); } else { rt->user_id = 0; @@ -821,7 +662,7 @@ rspamd_sqlite3_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, if (rt->lang_id == -1) { if (bk->enable_languages) { - rt->lang_id = rspamd_sqlite3_get_language (bk, task, TRUE); + rt->lang_id = rspamd_sqlite3_get_language(bk, task, TRUE); } else { rt->lang_id = 0; @@ -830,11 +671,11 @@ rspamd_sqlite3_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, iv = tok->values[id]; - if (rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_SET_TOKEN, - tok->data, rt->user_id, rt->lang_id, iv) != SQLITE_OK) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK); + if (rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_SET_TOKEN, + tok->data, rt->user_id, rt->lang_id, iv) != SQLITE_OK) { + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_ROLLBACK); bk->in_transaction = FALSE; return FALSE; @@ -845,19 +686,19 @@ rspamd_sqlite3_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, } gboolean -rspamd_sqlite3_finalize_learn (struct rspamd_task *task, gpointer runtime, - gpointer ctx, GError **err) +rspamd_sqlite3_finalize_learn(struct rspamd_task *task, gpointer runtime, + gpointer ctx, GError **err) { struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; gint wal_frames, wal_checkpointed, mode; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; if (bk->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); bk->in_transaction = FALSE; } @@ -870,17 +711,17 @@ rspamd_sqlite3_finalize_learn (struct rspamd_task *task, gpointer runtime, mode = SQLITE_CHECKPOINT_FULL; #endif /* Perform wal checkpoint (might be long) */ - if (sqlite3_wal_checkpoint_v2 (bk->sqlite, - NULL, - mode, - &wal_frames, - &wal_checkpointed) != SQLITE_OK) { - msg_warn_task ("cannot commit checkpoint: %s", - sqlite3_errmsg (bk->sqlite)); - - g_set_error (err, rspamd_sqlite3_backend_quark (), 500, - "cannot commit checkpoint: %s", - sqlite3_errmsg (bk->sqlite)); + if (sqlite3_wal_checkpoint_v2(bk->sqlite, + NULL, + mode, + &wal_frames, + &wal_checkpointed) != SQLITE_OK) { + msg_warn_task("cannot commit checkpoint: %s", + sqlite3_errmsg(bk->sqlite)); + + g_set_error(err, rspamd_sqlite3_backend_quark(), 500, + "cannot commit checkpoint: %s", + sqlite3_errmsg(bk->sqlite)); return FALSE; } #endif @@ -889,98 +730,98 @@ rspamd_sqlite3_finalize_learn (struct rspamd_task *task, gpointer runtime, } gulong -rspamd_sqlite3_total_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_sqlite3_total_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; guint64 res; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_LEARNS, &res); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_LEARNS, &res); return res; } gulong -rspamd_sqlite3_inc_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_sqlite3_inc_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; guint64 res; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_INC_LEARNS_LANG, - rt->lang_id); - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_INC_LEARNS_USER, - rt->user_id); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_INC_LEARNS_LANG, + rt->lang_id); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_INC_LEARNS_USER, + rt->user_id); if (bk->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); bk->in_transaction = FALSE; } - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_LEARNS, &res); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_LEARNS, &res); return res; } gulong -rspamd_sqlite3_dec_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_sqlite3_dec_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; guint64 res; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG, - rt->lang_id); - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_DEC_LEARNS_USER, - rt->user_id); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_DEC_LEARNS_LANG, + rt->lang_id); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_DEC_LEARNS_USER, + rt->user_id); if (bk->in_transaction) { - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_TRANSACTION_COMMIT); bk->in_transaction = FALSE; } - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_LEARNS, &res); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_LEARNS, &res); return res; } gulong -rspamd_sqlite3_learns (struct rspamd_task *task, gpointer runtime, - gpointer ctx) +rspamd_sqlite3_learns(struct rspamd_task *task, gpointer runtime, + gpointer ctx) { struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; guint64 res; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; - rspamd_sqlite3_run_prstmt (task->task_pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_LEARNS, &res); + rspamd_sqlite3_run_prstmt(task->task_pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_LEARNS, &res); return res; } ucl_object_t * -rspamd_sqlite3_get_stat (gpointer runtime, - gpointer ctx) +rspamd_sqlite3_get_stat(gpointer runtime, + gpointer ctx) { ucl_object_t *res = NULL; struct rspamd_stat_sqlite3_rt *rt = runtime; @@ -989,73 +830,73 @@ rspamd_sqlite3_get_stat (gpointer runtime, struct stat st; gint64 rev; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; pool = bk->pool; - (void)stat (bk->fname, &st); - rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_GET_LEARNS, &rev); - - res = ucl_object_typed_new (UCL_OBJECT); - ucl_object_insert_key (res, ucl_object_fromint (rev), "revision", - 0, false); - ucl_object_insert_key (res, ucl_object_fromint (st.st_size), "size", - 0, false); - rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_NTOKENS, &rev); - ucl_object_insert_key (res, ucl_object_fromint (rev), "total", 0, false); - ucl_object_insert_key (res, ucl_object_fromint (rev), "used", 0, false); - ucl_object_insert_key (res, ucl_object_fromstring (rt->cf->symbol), - "symbol", 0, false); - ucl_object_insert_key (res, ucl_object_fromstring ("sqlite3"), - "type", 0, false); - rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_NLANGUAGES, &rev); - ucl_object_insert_key (res, ucl_object_fromint (rev), - "languages", 0, false); - rspamd_sqlite3_run_prstmt (pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_NUSERS, &rev); - ucl_object_insert_key (res, ucl_object_fromint (rev), - "users", 0, false); + (void) stat(bk->fname, &st); + rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_GET_LEARNS, &rev); + + res = ucl_object_typed_new(UCL_OBJECT); + ucl_object_insert_key(res, ucl_object_fromint(rev), "revision", + 0, false); + ucl_object_insert_key(res, ucl_object_fromint(st.st_size), "size", + 0, false); + rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_NTOKENS, &rev); + ucl_object_insert_key(res, ucl_object_fromint(rev), "total", 0, false); + ucl_object_insert_key(res, ucl_object_fromint(rev), "used", 0, false); + ucl_object_insert_key(res, ucl_object_fromstring(rt->cf->symbol), + "symbol", 0, false); + ucl_object_insert_key(res, ucl_object_fromstring("sqlite3"), + "type", 0, false); + rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_NLANGUAGES, &rev); + ucl_object_insert_key(res, ucl_object_fromint(rev), + "languages", 0, false); + rspamd_sqlite3_run_prstmt(pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_NUSERS, &rev); + ucl_object_insert_key(res, ucl_object_fromint(rev), + "users", 0, false); if (rt->cf->label) { - ucl_object_insert_key (res, ucl_object_fromstring (rt->cf->label), - "label", 0, false); + ucl_object_insert_key(res, ucl_object_fromstring(rt->cf->label), + "label", 0, false); } return res; } gpointer -rspamd_sqlite3_load_tokenizer_config (gpointer runtime, - gsize *len) +rspamd_sqlite3_load_tokenizer_config(gpointer runtime, + gsize *len) { gpointer tk_conf, copied_conf; guint64 sz; struct rspamd_stat_sqlite3_rt *rt = runtime; struct rspamd_stat_sqlite3_db *bk; - g_assert (rt != NULL); + g_assert(rt != NULL); bk = rt->db; - g_assert (rspamd_sqlite3_run_prstmt (rt->db->pool, bk->sqlite, bk->prstmt, - RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz, &tk_conf) == SQLITE_OK); - g_assert (sz > 0); + g_assert(rspamd_sqlite3_run_prstmt(rt->db->pool, bk->sqlite, bk->prstmt, + RSPAMD_STAT_BACKEND_LOAD_TOKENIZER, &sz, &tk_conf) == SQLITE_OK); + g_assert(sz > 0); /* * Here we can have either decoded or undecoded version of tokenizer config * XXX: dirty hack to check if we have osb magic here */ - if (sz > 7 && memcmp (tk_conf, "osbtokv", 7) == 0) { - copied_conf = rspamd_mempool_alloc (rt->task->task_pool, sz); - memcpy (copied_conf, tk_conf, sz); - g_free (tk_conf); + if (sz > 7 && memcmp(tk_conf, "osbtokv", 7) == 0) { + copied_conf = rspamd_mempool_alloc(rt->task->task_pool, sz); + memcpy(copied_conf, tk_conf, sz); + g_free(tk_conf); } else { /* Need to decode */ - copied_conf = rspamd_decode_base32 (tk_conf, sz, len, RSPAMD_BASE32_DEFAULT); - g_free (tk_conf); - rspamd_mempool_add_destructor (rt->task->task_pool, g_free, copied_conf); + copied_conf = rspamd_decode_base32(tk_conf, sz, len, RSPAMD_BASE32_DEFAULT); + g_free(tk_conf); + rspamd_mempool_add_destructor(rt->task->task_pool, g_free, copied_conf); } if (len) { diff --git a/src/libstat/classifiers/bayes.c b/src/libstat/classifiers/bayes.c index 6709bb75a..513db9af9 100644 --- a/src/libstat/classifiers/bayes.c +++ b/src/libstat/classifiers/bayes.c @@ -21,25 +21,25 @@ #include "stat_internal.h" #include "math.h" -#define msg_err_bayes(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ - "bayes", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) -#define msg_warn_bayes(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ - "bayes", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) -#define msg_info_bayes(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ - "bayes", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_err_bayes(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "bayes", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_warn_bayes(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ + "bayes", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_info_bayes(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "bayes", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE_PUBLIC(bayes) static inline GQuark -bayes_error_quark (void) +bayes_error_quark(void) { - return g_quark_from_static_string ("bayes-error"); + return g_quark_from_static_string("bayes-error"); } /** @@ -50,21 +50,21 @@ bayes_error_quark (void) * @return */ static gdouble -inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg) +inv_chi_square(struct rspamd_task *task, gdouble value, gint freedom_deg) { double prob, sum, m; gint i; errno = 0; m = -value; - prob = exp (value); + prob = exp(value); if (errno == ERANGE) { /* * e^x where x is large *NEGATIVE* number is OK, so we have a very strong * confidence that inv-chi-square is close to zero */ - msg_debug_bayes ("exp overflow"); + msg_debug_bayes("exp overflow"); if (value < 0) { return 0; @@ -76,7 +76,7 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg) sum = prob; - msg_debug_bayes ("m: %f, probability: %g", m, prob); + msg_debug_bayes("m: %f, probability: %g", m, prob); /* * m is our confidence in class @@ -85,12 +85,12 @@ inv_chi_square (struct rspamd_task *task, gdouble value, gint freedom_deg) * from 1.0 (no confidence) to 0.0 (full confidence) */ for (i = 1; i < freedom_deg; i++) { - prob *= m / (gdouble)i; + prob *= m / (gdouble) i; sum += prob; - msg_debug_bayes ("i=%d, probability: %g, sum: %g", i, prob, sum); + msg_debug_bayes("i=%d, probability: %g, sum: %g", i, prob, sum); } - return MIN (1.0, sum); + return MIN(1.0, sum); } struct bayes_task_closure { @@ -107,15 +107,15 @@ struct bayes_task_closure { * Mathematically we use pow(complexity, complexity), where complexity is the * window index */ -static const double feature_weight[] = { 0, 3125, 256, 27, 1, 0, 0, 0 }; +static const double feature_weight[] = {0, 3125, 256, 27, 1, 0, 0, 0}; #define PROB_COMBINE(prob, cnt, weight, assumed) (((weight) * (assumed) + (cnt) * (prob)) / ((weight) + (cnt))) /* * In this callback we calculate local probabilities for tokens */ static void -bayes_classify_token (struct rspamd_classifier *ctx, - rspamd_token_t *tok, struct bayes_task_closure *cl) +bayes_classify_token(struct rspamd_classifier *ctx, + rspamd_token_t *tok, struct bayes_task_closure *cl) { guint i; gint id; @@ -136,15 +136,15 @@ bayes_classify_token (struct rspamd_classifier *ctx, #endif if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_META && cl->meta_skip_prob > 0) { - val = rspamd_random_double_fast (); + val = rspamd_random_double_fast(); if (val <= cl->meta_skip_prob) { if (tok->t1 && tok->t2) { - msg_debug_bayes ( - "token(meta) %uL <%*s:%*s> probabilistically skipped", - tok->data, - (int) tok->t1->original.len, tok->t1->original.begin, - (int) tok->t2->original.len, tok->t2->original.begin); + msg_debug_bayes( + "token(meta) %uL <%*s:%*s> probabilistically skipped", + tok->data, + (int) tok->t1->original.len, tok->t1->original.begin, + (int) tok->t2->original.len, tok->t2->original.begin); } return; @@ -152,9 +152,9 @@ bayes_classify_token (struct rspamd_classifier *ctx, } for (i = 0; i < ctx->statfiles_ids->len; i++) { - id = g_array_index (ctx->statfiles_ids, gint, i); - st = g_ptr_array_index (ctx->ctx->statfiles, id); - g_assert (st != NULL); + id = g_array_index(ctx->statfiles_ids, gint, i); + st = g_ptr_array_index(ctx->ctx->statfiles, id); + g_assert(st != NULL); val = tok->values[id]; if (val > 0) { @@ -172,8 +172,8 @@ bayes_classify_token (struct rspamd_classifier *ctx, /* Probability for this token */ if (total_count >= ctx->cfg->min_token_hits) { - spam_freq = ((double)spam_count / MAX (1., (double) ctx->spam_learns)); - ham_freq = ((double)ham_count / MAX (1., (double)ctx->ham_learns)); + spam_freq = ((double) spam_count / MAX(1., (double) ctx->spam_learns)); + ham_freq = ((double) ham_count / MAX(1., (double) ctx->ham_learns)); spam_prob = spam_freq / (spam_freq + ham_freq); ham_prob = ham_freq / (spam_freq + ham_freq); @@ -182,93 +182,91 @@ bayes_classify_token (struct rspamd_classifier *ctx, } else { fw = feature_weight[tok->window_idx % - G_N_ELEMENTS (feature_weight)]; + G_N_ELEMENTS(feature_weight)]; } w = (fw * total_count) / (1.0 + fw * total_count); - bayes_spam_prob = PROB_COMBINE (spam_prob, total_count, w, 0.5); + bayes_spam_prob = PROB_COMBINE(spam_prob, total_count, w, 0.5); if ((bayes_spam_prob > 0.5 && bayes_spam_prob < 0.5 + ctx->cfg->min_prob_strength) || (bayes_spam_prob < 0.5 && bayes_spam_prob > 0.5 - ctx->cfg->min_prob_strength)) { - msg_debug_bayes ( - "token %uL <%*s:%*s> skipped, probability not in range: %f", - tok->data, - (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, - (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, - bayes_spam_prob); + msg_debug_bayes( + "token %uL <%*s:%*s> skipped, probability not in range: %f", + tok->data, + (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, + (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, + bayes_spam_prob); return; } - bayes_ham_prob = PROB_COMBINE (ham_prob, total_count, w, 0.5); + bayes_ham_prob = PROB_COMBINE(ham_prob, total_count, w, 0.5); - cl->spam_prob += log (bayes_spam_prob); - cl->ham_prob += log (bayes_ham_prob); - cl->processed_tokens ++; + cl->spam_prob += log(bayes_spam_prob); + cl->ham_prob += log(bayes_ham_prob); + cl->processed_tokens++; if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) { - cl->text_tokens ++; + cl->text_tokens++; } else { token_type = "meta"; } if (tok->t1 && tok->t2) { - msg_debug_bayes ("token(%s) %uL <%*s:%*s>: weight: %f, cf: %f, " - "total_count: %ud, " - "spam_count: %ud, ham_count: %ud," - "spam_prob: %.3f, ham_prob: %.3f, " - "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " - "current spam probability: %.3f, current ham probability: %.3f", - token_type, - tok->data, - (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, - (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, - fw, w, total_count, spam_count, ham_count, - spam_prob, ham_prob, - bayes_spam_prob, bayes_ham_prob, - cl->spam_prob, cl->ham_prob); + msg_debug_bayes("token(%s) %uL <%*s:%*s>: weight: %f, cf: %f, " + "total_count: %ud, " + "spam_count: %ud, ham_count: %ud," + "spam_prob: %.3f, ham_prob: %.3f, " + "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " + "current spam probability: %.3f, current ham probability: %.3f", + token_type, + tok->data, + (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, + (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, + fw, w, total_count, spam_count, ham_count, + spam_prob, ham_prob, + bayes_spam_prob, bayes_ham_prob, + cl->spam_prob, cl->ham_prob); } else { - msg_debug_bayes ("token(%s) %uL <?:?>: weight: %f, cf: %f, " - "total_count: %ud, " - "spam_count: %ud, ham_count: %ud," - "spam_prob: %.3f, ham_prob: %.3f, " - "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " - "current spam probability: %.3f, current ham probability: %.3f", - token_type, - tok->data, - fw, w, total_count, spam_count, ham_count, - spam_prob, ham_prob, - bayes_spam_prob, bayes_ham_prob, - cl->spam_prob, cl->ham_prob); + msg_debug_bayes("token(%s) %uL <?:?>: weight: %f, cf: %f, " + "total_count: %ud, " + "spam_count: %ud, ham_count: %ud," + "spam_prob: %.3f, ham_prob: %.3f, " + "bayes_spam_prob: %.3f, bayes_ham_prob: %.3f, " + "current spam probability: %.3f, current ham probability: %.3f", + token_type, + tok->data, + fw, w, total_count, spam_count, ham_count, + spam_prob, ham_prob, + bayes_spam_prob, bayes_ham_prob, + cl->spam_prob, cl->ham_prob); } } } - gboolean -bayes_init (struct rspamd_config *cfg, - struct ev_loop *ev_base, - struct rspamd_classifier *cl) +bayes_init(struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct rspamd_classifier *cl) { cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_INTEGER; return TRUE; } -void -bayes_fin (struct rspamd_classifier *cl) +void bayes_fin(struct rspamd_classifier *cl) { } gboolean -bayes_classify (struct rspamd_classifier * ctx, - GPtrArray *tokens, - struct rspamd_task *task) +bayes_classify(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task) { double final_prob, h, s, *pprob; gchar sumbuf[32]; @@ -278,41 +276,41 @@ bayes_classify (struct rspamd_classifier * ctx, guint i, text_tokens = 0; gint id; - g_assert (ctx != NULL); - g_assert (tokens != NULL); + g_assert(ctx != NULL); + g_assert(tokens != NULL); - memset (&cl, 0, sizeof (cl)); + memset(&cl, 0, sizeof(cl)); cl.task = task; /* Check min learns */ if (ctx->cfg->min_learns > 0) { if (ctx->ham_learns < ctx->cfg->min_learns) { - msg_info_task ("not classified as ham. The ham class needs more " - "training samples. Currently: %ul; minimum %ud required", - ctx->ham_learns, ctx->cfg->min_learns); + msg_info_task("not classified as ham. The ham class needs more " + "training samples. Currently: %ul; minimum %ud required", + ctx->ham_learns, ctx->cfg->min_learns); return TRUE; } if (ctx->spam_learns < ctx->cfg->min_learns) { - msg_info_task ("not classified as spam. The spam class needs more " - "training samples. Currently: %ul; minimum %ud required", - ctx->spam_learns, ctx->cfg->min_learns); + msg_info_task("not classified as spam. The spam class needs more " + "training samples. Currently: %ul; minimum %ud required", + ctx->spam_learns, ctx->cfg->min_learns); return TRUE; } } - for (i = 0; i < tokens->len; i ++) { - tok = g_ptr_array_index (tokens, i); + for (i = 0; i < tokens->len; i++) { + tok = g_ptr_array_index(tokens, i); if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) { - text_tokens ++; + text_tokens++; } } if (text_tokens == 0) { - msg_info_task ("skipped classification as there are no text tokens. " - "Total tokens: %ud", - tokens->len); + msg_info_task("skipped classification as there are no text tokens. " + "Total tokens: %ud", + tokens->len); return TRUE; } @@ -327,42 +325,42 @@ bayes_classify (struct rspamd_classifier * ctx, cl.meta_skip_prob = 1.0 - text_tokens / tokens->len; } - for (i = 0; i < tokens->len; i ++) { - tok = g_ptr_array_index (tokens, i); + for (i = 0; i < tokens->len; i++) { + tok = g_ptr_array_index(tokens, i); - bayes_classify_token (ctx, tok, &cl); + bayes_classify_token(ctx, tok, &cl); } if (cl.processed_tokens == 0) { - msg_info_bayes ("no tokens found in bayes database " - "(%ud total tokens, %ud text tokens), ignore stats", - tokens->len, text_tokens); + msg_info_bayes("no tokens found in bayes database " + "(%ud total tokens, %ud text tokens), ignore stats", + tokens->len, text_tokens); return TRUE; } if (ctx->cfg->min_tokens > 0 && - cl.text_tokens < (gint)(ctx->cfg->min_tokens * 0.1)) { - msg_info_bayes ("ignore bayes probability since we have " - "found too few text tokens: %uL (of %ud checked), " - "at least %d required", - cl.text_tokens, - text_tokens, - (gint)(ctx->cfg->min_tokens * 0.1)); + cl.text_tokens < (gint) (ctx->cfg->min_tokens * 0.1)) { + msg_info_bayes("ignore bayes probability since we have " + "found too few text tokens: %uL (of %ud checked), " + "at least %d required", + cl.text_tokens, + text_tokens, + (gint) (ctx->cfg->min_tokens * 0.1)); return TRUE; } if (cl.spam_prob > -300 && cl.ham_prob > -300) { /* Fisher value is low enough to apply inv_chi_square */ - h = 1 - inv_chi_square (task, cl.spam_prob, cl.processed_tokens); - s = 1 - inv_chi_square (task, cl.ham_prob, cl.processed_tokens); + h = 1 - inv_chi_square(task, cl.spam_prob, cl.processed_tokens); + s = 1 - inv_chi_square(task, cl.ham_prob, cl.processed_tokens); } else { /* Use naive method */ if (cl.spam_prob < cl.ham_prob) { h = (1.0 - exp(cl.spam_prob - cl.ham_prob)) / - (1.0 + exp(cl.spam_prob - cl.ham_prob)); + (1.0 + exp(cl.spam_prob - cl.ham_prob)); s = 1.0 - h; } else { @@ -372,51 +370,51 @@ bayes_classify (struct rspamd_classifier * ctx, } } - if (isfinite (s) && isfinite (h)) { + if (isfinite(s) && isfinite(h)) { final_prob = (s + 1.0 - h) / 2.; - msg_debug_bayes ( - "got ham probability %.2f -> %.2f and spam probability %.2f -> %.2f," - " %L tokens processed of %ud total tokens;" - " %uL text tokens found of %ud text tokens)", - cl.ham_prob, - h, - cl.spam_prob, - s, - cl.processed_tokens, - tokens->len, - cl.text_tokens, - text_tokens); + msg_debug_bayes( + "got ham probability %.2f -> %.2f and spam probability %.2f -> %.2f," + " %L tokens processed of %ud total tokens;" + " %uL text tokens found of %ud text tokens)", + cl.ham_prob, + h, + cl.spam_prob, + s, + cl.processed_tokens, + tokens->len, + cl.text_tokens, + text_tokens); } else { /* * We have some overflow, hence we need to check which class * is NaN */ - if (isfinite (h)) { + if (isfinite(h)) { final_prob = 1.0; - msg_debug_bayes ("spam class is full: no" - " ham samples"); + msg_debug_bayes("spam class is full: no" + " ham samples"); } - else if (isfinite (s)) { + else if (isfinite(s)) { final_prob = 0.0; - msg_debug_bayes ("ham class is full: no" - " spam samples"); + msg_debug_bayes("ham class is full: no" + " spam samples"); } else { final_prob = 0.5; - msg_warn_bayes ("spam and ham classes are both full"); + msg_warn_bayes("spam and ham classes are both full"); } } - pprob = rspamd_mempool_alloc (task->task_pool, sizeof (*pprob)); + pprob = rspamd_mempool_alloc(task->task_pool, sizeof(*pprob)); *pprob = final_prob; - rspamd_mempool_set_variable (task->task_pool, "bayes_prob", pprob, NULL); + rspamd_mempool_set_variable(task->task_pool, "bayes_prob", pprob, NULL); - if (cl.processed_tokens > 0 && fabs (final_prob - 0.5) > 0.05) { + if (cl.processed_tokens > 0 && fabs(final_prob - 0.5) > 0.05) { /* Now we can have exactly one HAM and exactly one SPAM statfiles per classifier */ for (i = 0; i < ctx->statfiles_ids->len; i++) { - id = g_array_index (ctx->statfiles_ids, gint, i); - st = g_ptr_array_index (ctx->ctx->statfiles, id); + id = g_array_index(ctx->statfiles_ids, gint, i); + st = g_ptr_array_index(ctx->ctx->statfiles, id); if (final_prob > 0.5 && st->stcf->is_spam) { break; @@ -435,14 +433,15 @@ bayes_classify (struct rspamd_classifier * ctx, * Bayes p is from 0.5 to 1.0, but confidence is from 0 to 1, so * we need to rescale it to display correctly */ - rspamd_snprintf (sumbuf, sizeof (sumbuf), "%.2f%%", - (final_prob - 0.5) * 200.); - final_prob = rspamd_normalize_probability (final_prob, 0.5); - g_assert (st != NULL); + rspamd_snprintf(sumbuf, sizeof(sumbuf), "%.2f%%", + (final_prob - 0.5) * 200.); + final_prob = rspamd_normalize_probability(final_prob, 0.5); + g_assert(st != NULL); if (final_prob > 1 || final_prob < 0) { - msg_err_bayes ("internal error: probability %f is outside of the " - "allowed range [0..1]", final_prob); + msg_err_bayes("internal error: probability %f is outside of the " + "allowed range [0..1]", + final_prob); if (final_prob > 1) { final_prob = 1.0; @@ -452,22 +451,22 @@ bayes_classify (struct rspamd_classifier * ctx, } } - rspamd_task_insert_result (task, - st->stcf->symbol, - final_prob, - sumbuf); + rspamd_task_insert_result(task, + st->stcf->symbol, + final_prob, + sumbuf); } return TRUE; } gboolean -bayes_learn_spam (struct rspamd_classifier * ctx, - GPtrArray *tokens, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err) +bayes_learn_spam(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err) { guint i, j, total_cnt, spam_cnt, ham_cnt; gint id; @@ -475,8 +474,8 @@ bayes_learn_spam (struct rspamd_classifier * ctx, rspamd_token_t *tok; gboolean incrementing; - g_assert (ctx != NULL); - g_assert (tokens != NULL); + g_assert(ctx != NULL); + g_assert(tokens != NULL); incrementing = ctx->cfg->flags & RSPAMD_FLAG_CLASSIFIER_INCREMENTING_BACKEND; @@ -484,12 +483,12 @@ bayes_learn_spam (struct rspamd_classifier * ctx, total_cnt = 0; spam_cnt = 0; ham_cnt = 0; - tok = g_ptr_array_index (tokens, i); + tok = g_ptr_array_index(tokens, i); for (j = 0; j < ctx->statfiles_ids->len; j++) { - id = g_array_index (ctx->statfiles_ids, gint, j); - st = g_ptr_array_index (ctx->ctx->statfiles, id); - g_assert (st != NULL); + id = g_array_index(ctx->statfiles_ids, gint, j); + st = g_ptr_array_index(ctx->ctx->statfiles, id); + g_assert(st != NULL); if (!!st->stcf->is_spam == !!is_spam) { if (incrementing) { @@ -533,18 +532,18 @@ bayes_learn_spam (struct rspamd_classifier * ctx, } if (tok->t1 && tok->t2) { - msg_debug_bayes ("token %uL <%*s:%*s>: window: %d, total_count: %d, " - "spam_count: %d, ham_count: %d", - tok->data, - (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, - (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, - tok->window_idx, total_cnt, spam_cnt, ham_cnt); + msg_debug_bayes("token %uL <%*s:%*s>: window: %d, total_count: %d, " + "spam_count: %d, ham_count: %d", + tok->data, + (int) tok->t1->stemmed.len, tok->t1->stemmed.begin, + (int) tok->t2->stemmed.len, tok->t2->stemmed.begin, + tok->window_idx, total_cnt, spam_cnt, ham_cnt); } else { - msg_debug_bayes ("token %uL <?:?>: window: %d, total_count: %d, " - "spam_count: %d, ham_count: %d", - tok->data, - tok->window_idx, total_cnt, spam_cnt, ham_cnt); + msg_debug_bayes("token %uL <?:?>: window: %d, total_count: %d, " + "spam_count: %d, ham_count: %d", + tok->data, + tok->window_idx, total_cnt, spam_cnt, ham_cnt); } } diff --git a/src/libstat/classifiers/classifiers.h b/src/libstat/classifiers/classifiers.h index 32473cdd1..f6109c3e5 100644 --- a/src/libstat/classifiers/classifiers.h +++ b/src/libstat/classifiers/classifiers.h @@ -9,7 +9,7 @@ /* Consider this value as 0 */ #define ALPHA 0.0001 -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -23,66 +23,66 @@ struct token_node_s; struct rspamd_stat_classifier { char *name; - gboolean (*init_func) (struct rspamd_config *cfg, - struct ev_loop *ev_base, - struct rspamd_classifier *cl); + gboolean (*init_func)(struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct rspamd_classifier *cl); - gboolean (*classify_func) (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task); + gboolean (*classify_func)(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task); - gboolean (*learn_spam_func) (struct rspamd_classifier *ctx, - GPtrArray *input, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); + gboolean (*learn_spam_func)(struct rspamd_classifier *ctx, + GPtrArray *input, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); - void (*fin_func) (struct rspamd_classifier *cl); + void (*fin_func)(struct rspamd_classifier *cl); }; /* Bayes algorithm */ -gboolean bayes_init (struct rspamd_config *cfg, - struct ev_loop *ev_base, - struct rspamd_classifier *); +gboolean bayes_init(struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct rspamd_classifier *); -gboolean bayes_classify (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task); +gboolean bayes_classify(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task); -gboolean bayes_learn_spam (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); +gboolean bayes_learn_spam(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); -void bayes_fin (struct rspamd_classifier *); +void bayes_fin(struct rspamd_classifier *); /* Generic lua classifier */ -gboolean lua_classifier_init (struct rspamd_config *cfg, - struct ev_loop *ev_base, - struct rspamd_classifier *); +gboolean lua_classifier_init(struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct rspamd_classifier *); -gboolean lua_classifier_classify (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task); +gboolean lua_classifier_classify(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task); -gboolean lua_classifier_learn_spam (struct rspamd_classifier *ctx, - GPtrArray *tokens, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err); +gboolean lua_classifier_learn_spam(struct rspamd_classifier *ctx, + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err); extern gint rspamd_bayes_log_id; -#define msg_debug_bayes(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ - rspamd_bayes_log_id, "bayes", task->task_pool->tag.uid, \ - G_STRFUNC, \ - __VA_ARGS__) +#define msg_debug_bayes(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \ + rspamd_bayes_log_id, "bayes", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libstat/classifiers/lua_classifier.c b/src/libstat/classifiers/lua_classifier.c index 41657abc0..b74330dca 100644 --- a/src/libstat/classifiers/lua_classifier.c +++ b/src/libstat/classifiers/lua_classifier.c @@ -27,108 +27,108 @@ struct rspamd_lua_classifier_ctx { static GHashTable *lua_classifiers = NULL; -#define msg_err_luacl(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ - "luacl", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) -#define msg_warn_luacl(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ - "luacl", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) -#define msg_info_luacl(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ - "luacl", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) -#define msg_debug_luacl(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \ - rspamd_luacl_log_id, "luacl", task->task_pool->tag.uid, \ - RSPAMD_LOG_FUNC, \ - __VA_ARGS__) +#define msg_err_luacl(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "luacl", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_warn_luacl(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ + "luacl", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_info_luacl(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "luacl", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_debug_luacl(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \ + rspamd_luacl_log_id, "luacl", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) INIT_LOG_MODULE(luacl) gboolean -lua_classifier_init (struct rspamd_config *cfg, - struct ev_loop *ev_base, - struct rspamd_classifier *cl) +lua_classifier_init(struct rspamd_config *cfg, + struct ev_loop *ev_base, + struct rspamd_classifier *cl) { struct rspamd_lua_classifier_ctx *ctx; lua_State *L = cl->ctx->cfg->lua_state; gint cb_classify = -1, cb_learn = -1; if (lua_classifiers == NULL) { - lua_classifiers = g_hash_table_new_full (rspamd_strcase_hash, - rspamd_strcase_equal, g_free, g_free); + lua_classifiers = g_hash_table_new_full(rspamd_strcase_hash, + rspamd_strcase_equal, g_free, g_free); } - ctx = g_hash_table_lookup (lua_classifiers, cl->subrs->name); + ctx = g_hash_table_lookup(lua_classifiers, cl->subrs->name); if (ctx != NULL) { - msg_err_config ("duplicate lua classifier definition: %s", - cl->subrs->name); + msg_err_config("duplicate lua classifier definition: %s", + cl->subrs->name); return FALSE; } - lua_getglobal (L, "rspamd_classifiers"); - if (lua_type (L, -1) != LUA_TTABLE) { - msg_err_config ("cannot register classifier %s: no rspamd_classifier global", - cl->subrs->name); - lua_pop (L, 1); + lua_getglobal(L, "rspamd_classifiers"); + if (lua_type(L, -1) != LUA_TTABLE) { + msg_err_config("cannot register classifier %s: no rspamd_classifier global", + cl->subrs->name); + lua_pop(L, 1); return FALSE; } - lua_pushstring (L, cl->subrs->name); - lua_gettable (L, -2); + lua_pushstring(L, cl->subrs->name); + lua_gettable(L, -2); - if (lua_type (L, -1) != LUA_TTABLE) { - msg_err_config ("cannot register classifier %s: bad lua type: %s", - cl->subrs->name, lua_typename (L, lua_type (L, -1))); - lua_pop (L, 2); + if (lua_type(L, -1) != LUA_TTABLE) { + msg_err_config("cannot register classifier %s: bad lua type: %s", + cl->subrs->name, lua_typename(L, lua_type(L, -1))); + lua_pop(L, 2); return FALSE; } - lua_pushstring (L, "classify"); - lua_gettable (L, -2); + lua_pushstring(L, "classify"); + lua_gettable(L, -2); - if (lua_type (L, -1) != LUA_TFUNCTION) { - msg_err_config ("cannot register classifier %s: bad lua type for classify: %s", - cl->subrs->name, lua_typename (L, lua_type (L, -1))); - lua_pop (L, 3); + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_err_config("cannot register classifier %s: bad lua type for classify: %s", + cl->subrs->name, lua_typename(L, lua_type(L, -1))); + lua_pop(L, 3); return FALSE; } - cb_classify = luaL_ref (L, LUA_REGISTRYINDEX); + cb_classify = luaL_ref(L, LUA_REGISTRYINDEX); - lua_pushstring (L, "learn"); - lua_gettable (L, -2); + lua_pushstring(L, "learn"); + lua_gettable(L, -2); - if (lua_type (L, -1) != LUA_TFUNCTION) { - msg_err_config ("cannot register classifier %s: bad lua type for learn: %s", - cl->subrs->name, lua_typename (L, lua_type (L, -1))); - lua_pop (L, 3); + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_err_config("cannot register classifier %s: bad lua type for learn: %s", + cl->subrs->name, lua_typename(L, lua_type(L, -1))); + lua_pop(L, 3); return FALSE; } - cb_learn = luaL_ref (L, LUA_REGISTRYINDEX); - lua_pop (L, 2); /* Table + global */ + cb_learn = luaL_ref(L, LUA_REGISTRYINDEX); + lua_pop(L, 2); /* Table + global */ - ctx = g_malloc0 (sizeof (*ctx)); - ctx->name = g_strdup (cl->subrs->name); + ctx = g_malloc0(sizeof(*ctx)); + ctx->name = g_strdup(cl->subrs->name); ctx->classify_ref = cb_classify; ctx->learn_ref = cb_learn; cl->cfg->flags |= RSPAMD_FLAG_CLASSIFIER_NO_BACKEND; - g_hash_table_insert (lua_classifiers, ctx->name, ctx); + g_hash_table_insert(lua_classifiers, ctx->name, ctx); return TRUE; } gboolean -lua_classifier_classify (struct rspamd_classifier *cl, - GPtrArray *tokens, - struct rspamd_task *task) +lua_classifier_classify(struct rspamd_classifier *cl, + GPtrArray *tokens, + struct rspamd_task *task) { struct rspamd_lua_classifier_ctx *ctx; struct rspamd_task **ptask; @@ -138,38 +138,38 @@ lua_classifier_classify (struct rspamd_classifier *cl, guint i; guint64 v; - ctx = g_hash_table_lookup (lua_classifiers, cl->subrs->name); - g_assert (ctx != NULL); + ctx = g_hash_table_lookup(lua_classifiers, cl->subrs->name); + g_assert(ctx != NULL); L = task->cfg->lua_state; - lua_rawgeti (L, LUA_REGISTRYINDEX, ctx->classify_ref); - ptask = lua_newuserdata (L, sizeof (*ptask)); + lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->classify_ref); + ptask = lua_newuserdata(L, sizeof(*ptask)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); - pcfg = lua_newuserdata (L, sizeof (*pcfg)); + rspamd_lua_setclass(L, "rspamd{task}", -1); + pcfg = lua_newuserdata(L, sizeof(*pcfg)); *pcfg = cl->cfg; - rspamd_lua_setclass (L, "rspamd{classifier}", -1); + rspamd_lua_setclass(L, "rspamd{classifier}", -1); - lua_createtable (L, tokens->len, 0); + lua_createtable(L, tokens->len, 0); - for (i = 0; i < tokens->len; i ++) { - tok = g_ptr_array_index (tokens, i); + for (i = 0; i < tokens->len; i++) { + tok = g_ptr_array_index(tokens, i); v = tok->data; - lua_createtable (L, 3, 0); + lua_createtable(L, 3, 0); /* High word, low word, order */ - lua_pushinteger (L, (guint32)(v >> 32)); - lua_rawseti (L, -2, 1); - lua_pushinteger (L, (guint32)(v)); - lua_rawseti (L, -2, 2); - lua_pushinteger (L, tok->window_idx); - lua_rawseti (L, -2, 3); - lua_rawseti (L, -2, i + 1); + lua_pushinteger(L, (guint32) (v >> 32)); + lua_rawseti(L, -2, 1); + lua_pushinteger(L, (guint32) (v)); + lua_rawseti(L, -2, 2); + lua_pushinteger(L, tok->window_idx); + lua_rawseti(L, -2, 3); + lua_rawseti(L, -2, i + 1); } - if (lua_pcall (L, 3, 0, 0) != 0) { - msg_err_luacl ("error running classify function for %s: %s", ctx->name, - lua_tostring (L, -1)); - lua_pop (L, 1); + if (lua_pcall(L, 3, 0, 0) != 0) { + msg_err_luacl("error running classify function for %s: %s", ctx->name, + lua_tostring(L, -1)); + lua_pop(L, 1); return FALSE; } @@ -178,12 +178,12 @@ lua_classifier_classify (struct rspamd_classifier *cl, } gboolean -lua_classifier_learn_spam (struct rspamd_classifier *cl, - GPtrArray *tokens, - struct rspamd_task *task, - gboolean is_spam, - gboolean unlearn, - GError **err) +lua_classifier_learn_spam(struct rspamd_classifier *cl, + GPtrArray *tokens, + struct rspamd_task *task, + gboolean is_spam, + gboolean unlearn, + GError **err) { struct rspamd_lua_classifier_ctx *ctx; struct rspamd_task **ptask; @@ -193,42 +193,42 @@ lua_classifier_learn_spam (struct rspamd_classifier *cl, guint i; guint64 v; - ctx = g_hash_table_lookup (lua_classifiers, cl->subrs->name); - g_assert (ctx != NULL); + ctx = g_hash_table_lookup(lua_classifiers, cl->subrs->name); + g_assert(ctx != NULL); L = task->cfg->lua_state; - lua_rawgeti (L, LUA_REGISTRYINDEX, ctx->learn_ref); - ptask = lua_newuserdata (L, sizeof (*ptask)); + lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->learn_ref); + ptask = lua_newuserdata(L, sizeof(*ptask)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); - pcfg = lua_newuserdata (L, sizeof (*pcfg)); + rspamd_lua_setclass(L, "rspamd{task}", -1); + pcfg = lua_newuserdata(L, sizeof(*pcfg)); *pcfg = cl->cfg; - rspamd_lua_setclass (L, "rspamd{classifier}", -1); + rspamd_lua_setclass(L, "rspamd{classifier}", -1); - lua_createtable (L, tokens->len, 0); + lua_createtable(L, tokens->len, 0); - for (i = 0; i < tokens->len; i ++) { - tok = g_ptr_array_index (tokens, i); + for (i = 0; i < tokens->len; i++) { + tok = g_ptr_array_index(tokens, i); v = 0; v = tok->data; - lua_createtable (L, 3, 0); + lua_createtable(L, 3, 0); /* High word, low word, order */ - lua_pushinteger (L, (guint32)(v >> 32)); - lua_rawseti (L, -2, 1); - lua_pushinteger (L, (guint32)(v)); - lua_rawseti (L, -2, 2); - lua_pushinteger (L, tok->window_idx); - lua_rawseti (L, -2, 3); - lua_rawseti (L, -2, i + 1); + lua_pushinteger(L, (guint32) (v >> 32)); + lua_rawseti(L, -2, 1); + lua_pushinteger(L, (guint32) (v)); + lua_rawseti(L, -2, 2); + lua_pushinteger(L, tok->window_idx); + lua_rawseti(L, -2, 3); + lua_rawseti(L, -2, i + 1); } - lua_pushboolean (L, is_spam); - lua_pushboolean (L, unlearn); + lua_pushboolean(L, is_spam); + lua_pushboolean(L, unlearn); - if (lua_pcall (L, 5, 0, 0) != 0) { - msg_err_luacl ("error running learn function for %s: %s", ctx->name, - lua_tostring (L, -1)); - lua_pop (L, 1); + if (lua_pcall(L, 5, 0, 0) != 0) { + msg_err_luacl("error running learn function for %s: %s", ctx->name, + lua_tostring(L, -1)); + lua_pop(L, 1); return FALSE; } diff --git a/src/libstat/learn_cache/learn_cache.h b/src/libstat/learn_cache/learn_cache.h index ad13ecf29..11a66fc09 100644 --- a/src/libstat/learn_cache/learn_cache.h +++ b/src/libstat/learn_cache/learn_cache.h @@ -19,7 +19,7 @@ #include "config.h" #include "ucl.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -33,46 +33,46 @@ struct rspamd_statfile; struct rspamd_stat_cache { const char *name; - gpointer (*init) (struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - struct rspamd_statfile *st, - const ucl_object_t *cf); + gpointer (*init)(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st, + const ucl_object_t *cf); - gpointer (*runtime) (struct rspamd_task *task, - gpointer ctx, gboolean learn); + gpointer (*runtime)(struct rspamd_task *task, + gpointer ctx, gboolean learn); - gint (*check) (struct rspamd_task *task, - gboolean is_spam, - gpointer runtime); + gint (*check)(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime); - gint (*learn) (struct rspamd_task *task, - gboolean is_spam, - gpointer runtime); + gint (*learn)(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime); - void (*close) (gpointer ctx); + void (*close)(gpointer ctx); gpointer ctx; }; -#define RSPAMD_STAT_CACHE_DEF(name) \ - gpointer rspamd_stat_cache_##name##_init (struct rspamd_stat_ctx *ctx, \ - struct rspamd_config *cfg, \ - struct rspamd_statfile *st, \ - const ucl_object_t *cf); \ - gpointer rspamd_stat_cache_##name##_runtime (struct rspamd_task *task, \ - gpointer ctx, gboolean learn); \ - gint rspamd_stat_cache_##name##_check (struct rspamd_task *task, \ - gboolean is_spam, \ - gpointer runtime); \ - gint rspamd_stat_cache_##name##_learn (struct rspamd_task *task, \ - gboolean is_spam, \ - gpointer runtime); \ - void rspamd_stat_cache_##name##_close (gpointer ctx) +#define RSPAMD_STAT_CACHE_DEF(name) \ + gpointer rspamd_stat_cache_##name##_init(struct rspamd_stat_ctx *ctx, \ + struct rspamd_config *cfg, \ + struct rspamd_statfile *st, \ + const ucl_object_t *cf); \ + gpointer rspamd_stat_cache_##name##_runtime(struct rspamd_task *task, \ + gpointer ctx, gboolean learn); \ + gint rspamd_stat_cache_##name##_check(struct rspamd_task *task, \ + gboolean is_spam, \ + gpointer runtime); \ + gint rspamd_stat_cache_##name##_learn(struct rspamd_task *task, \ + gboolean is_spam, \ + gpointer runtime); \ + void rspamd_stat_cache_##name##_close(gpointer ctx) RSPAMD_STAT_CACHE_DEF(sqlite3); RSPAMD_STAT_CACHE_DEF(redis); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libstat/learn_cache/redis_cache.c b/src/libstat/learn_cache/redis_cache.c index 479623942..d5fe4ad48 100644 --- a/src/libstat/learn_cache/redis_cache.c +++ b/src/libstat/learn_cache/redis_cache.c @@ -52,78 +52,78 @@ struct rspamd_redis_cache_runtime { }; static GQuark -rspamd_stat_cache_redis_quark (void) +rspamd_stat_cache_redis_quark(void) { - return g_quark_from_static_string (M); + return g_quark_from_static_string(M); } static inline struct upstream_list * -rspamd_redis_get_servers (struct rspamd_redis_cache_ctx *ctx, - const gchar *what) +rspamd_redis_get_servers(struct rspamd_redis_cache_ctx *ctx, + const gchar *what) { lua_State *L = ctx->L; struct upstream_list *res; - lua_rawgeti (L, LUA_REGISTRYINDEX, ctx->conf_ref); - lua_pushstring (L, what); - lua_gettable (L, -2); - res = *((struct upstream_list**)lua_touserdata (L, -1)); - lua_settop (L, 0); + lua_rawgeti(L, LUA_REGISTRYINDEX, ctx->conf_ref); + lua_pushstring(L, what); + lua_gettable(L, -2); + res = *((struct upstream_list **) lua_touserdata(L, -1)); + lua_settop(L, 0); return res; } static void -rspamd_redis_cache_maybe_auth (struct rspamd_redis_cache_ctx *ctx, - redisAsyncContext *redis) +rspamd_redis_cache_maybe_auth(struct rspamd_redis_cache_ctx *ctx, + redisAsyncContext *redis) { if (ctx->password) { - redisAsyncCommand (redis, NULL, NULL, "AUTH %s", ctx->password); + redisAsyncCommand(redis, NULL, NULL, "AUTH %s", ctx->password); } if (ctx->dbname) { - redisAsyncCommand (redis, NULL, NULL, "SELECT %s", ctx->dbname); + redisAsyncCommand(redis, NULL, NULL, "SELECT %s", ctx->dbname); } } /* Called on connection termination */ static void -rspamd_redis_cache_fin (gpointer data) +rspamd_redis_cache_fin(gpointer data) { struct rspamd_redis_cache_runtime *rt = data; redisAsyncContext *redis; rt->has_event = FALSE; - ev_timer_stop (rt->task->event_loop, &rt->timer_ev); + ev_timer_stop(rt->task->event_loop, &rt->timer_ev); if (rt->redis) { redis = rt->redis; rt->redis = NULL; /* This calls for all callbacks pending */ - redisAsyncFree (redis); + redisAsyncFree(redis); } } static void -rspamd_redis_cache_timeout (EV_P_ ev_timer *w, int revents) +rspamd_redis_cache_timeout(EV_P_ ev_timer *w, int revents) { struct rspamd_redis_cache_runtime *rt = - (struct rspamd_redis_cache_runtime *)w->data; + (struct rspamd_redis_cache_runtime *) w->data; struct rspamd_task *task; task = rt->task; - msg_err_task ("connection to redis server %s timed out", - rspamd_upstream_name (rt->selected)); - rspamd_upstream_fail (rt->selected, FALSE, "timeout"); + msg_err_task("connection to redis server %s timed out", + rspamd_upstream_name(rt->selected)); + rspamd_upstream_fail(rt->selected, FALSE, "timeout"); if (rt->has_event) { - rspamd_session_remove_event (task->s, rspamd_redis_cache_fin, rt); + rspamd_session_remove_event(task->s, rspamd_redis_cache_fin, rt); } } /* Called when we have checked the specified message id */ static void -rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_stat_cache_redis_get(redisAsyncContext *c, gpointer r, gpointer priv) { struct rspamd_redis_cache_runtime *rt = priv; redisReply *reply = r; @@ -134,20 +134,20 @@ rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv) if (c->err == 0) { if (reply) { - if (G_LIKELY (reply->type == REDIS_REPLY_INTEGER)) { + if (G_LIKELY(reply->type == REDIS_REPLY_INTEGER)) { val = reply->integer; } else if (reply->type == REDIS_REPLY_STRING) { - rspamd_strtol (reply->str, reply->len, &val); + rspamd_strtol(reply->str, reply->len, &val); } else { if (reply->type == REDIS_REPLY_ERROR) { - msg_err_task ("cannot learn %s: redis error: \"%s\"", - rt->ctx->stcf->symbol, reply->str); + msg_err_task("cannot learn %s: redis error: \"%s\"", + rt->ctx->stcf->symbol, reply->str); } else if (reply->type != REDIS_REPLY_NIL) { - msg_err_task ("bad learned type for %s: %d", - rt->ctx->stcf->symbol, reply->type); + msg_err_task("bad learned type for %s: %d", + rt->ctx->stcf->symbol, reply->type); } val = 0; @@ -155,11 +155,12 @@ rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv) } if ((val > 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM)) || - (val < 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM))) { + (val < 0 && (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM))) { /* Already learned */ - msg_info_task ("<%s> has been already " - "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), - (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? "spam" : "ham"); + msg_info_task("<%s> has been already " + "learned as %s, ignore it", + MESSAGE_FIELD(task, message_id), + (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? "spam" : "ham"); task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED; } else if (val != 0) { @@ -167,20 +168,20 @@ rspamd_stat_cache_redis_get (redisAsyncContext *c, gpointer r, gpointer priv) task->flags |= RSPAMD_TASK_FLAG_UNLEARN; } - rspamd_upstream_ok (rt->selected); + rspamd_upstream_ok(rt->selected); } else { - rspamd_upstream_fail (rt->selected, FALSE, c->errstr); + rspamd_upstream_fail(rt->selected, FALSE, c->errstr); } if (rt->has_event) { - rspamd_session_remove_event (task->s, rspamd_redis_cache_fin, rt); + rspamd_session_remove_event(task->s, rspamd_redis_cache_fin, rt); } } /* Called when we have learned the specified message id */ static void -rspamd_stat_cache_redis_set (redisAsyncContext *c, gpointer r, gpointer priv) +rspamd_stat_cache_redis_set(redisAsyncContext *c, gpointer r, gpointer priv) { struct rspamd_redis_cache_runtime *rt = priv; struct rspamd_task *task; @@ -189,19 +190,19 @@ rspamd_stat_cache_redis_set (redisAsyncContext *c, gpointer r, gpointer priv) if (c->err == 0) { /* XXX: we ignore results here */ - rspamd_upstream_ok (rt->selected); + rspamd_upstream_ok(rt->selected); } else { - rspamd_upstream_fail (rt->selected, FALSE, c->errstr); + rspamd_upstream_fail(rt->selected, FALSE, c->errstr); } if (rt->has_event) { - rspamd_session_remove_event (task->s, rspamd_redis_cache_fin, rt); + rspamd_session_remove_event(task->s, rspamd_redis_cache_fin, rt); } } static void -rspamd_stat_cache_redis_generate_id (struct rspamd_task *task) +rspamd_stat_cache_redis_generate_id(struct rspamd_task *task) { rspamd_cryptobox_hash_state_t st; rspamd_token_t *tok; @@ -210,98 +211,98 @@ rspamd_stat_cache_redis_generate_id (struct rspamd_task *task) gchar *b32out; gchar *user = NULL; - rspamd_cryptobox_hash_init (&st, NULL, 0); + rspamd_cryptobox_hash_init(&st, NULL, 0); - user = rspamd_mempool_get_variable (task->task_pool, "stat_user"); + user = rspamd_mempool_get_variable(task->task_pool, "stat_user"); /* Use dedicated hash space for per users cache */ if (user != NULL) { - rspamd_cryptobox_hash_update (&st, user, strlen (user)); + rspamd_cryptobox_hash_update(&st, user, strlen(user)); } - for (i = 0; i < task->tokens->len; i ++) { - tok = g_ptr_array_index (task->tokens, i); - rspamd_cryptobox_hash_update (&st, (guchar *)&tok->data, - sizeof (tok->data)); + for (i = 0; i < task->tokens->len; i++) { + tok = g_ptr_array_index(task->tokens, i); + rspamd_cryptobox_hash_update(&st, (guchar *) &tok->data, + sizeof(tok->data)); } - rspamd_cryptobox_hash_final (&st, out); + rspamd_cryptobox_hash_final(&st, out); - b32out = rspamd_mempool_alloc (task->task_pool, - sizeof (out) * 8 / 5 + 3); - i = rspamd_encode_base32_buf (out, sizeof (out), b32out, - sizeof (out) * 8 / 5 + 2, RSPAMD_BASE32_DEFAULT); + b32out = rspamd_mempool_alloc(task->task_pool, + sizeof(out) * 8 / 5 + 3); + i = rspamd_encode_base32_buf(out, sizeof(out), b32out, + sizeof(out) * 8 / 5 + 2, RSPAMD_BASE32_DEFAULT); if (i > 0) { /* Zero terminate */ b32out[i] = '\0'; } - rspamd_mempool_set_variable (task->task_pool, "words_hash", b32out, NULL); + rspamd_mempool_set_variable(task->task_pool, "words_hash", b32out, NULL); } gpointer -rspamd_stat_cache_redis_init (struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - struct rspamd_statfile *st, - const ucl_object_t *cf) +rspamd_stat_cache_redis_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st, + const ucl_object_t *cf) { struct rspamd_redis_cache_ctx *cache_ctx; struct rspamd_statfile_config *stf = st->stcf; const ucl_object_t *obj; gboolean ret = FALSE; - lua_State *L = (lua_State *)cfg->lua_state; + lua_State *L = (lua_State *) cfg->lua_state; gint conf_ref = -1; - cache_ctx = g_malloc0 (sizeof (*cache_ctx)); + cache_ctx = g_malloc0(sizeof(*cache_ctx)); cache_ctx->timeout = REDIS_DEFAULT_TIMEOUT; cache_ctx->L = L; /* First search in backend configuration */ - obj = ucl_object_lookup (st->classifier->cfg->opts, "backend"); - if (obj != NULL && ucl_object_type (obj) == UCL_OBJECT) { - ret = rspamd_lua_try_load_redis (L, obj, cfg, &conf_ref); + obj = ucl_object_lookup(st->classifier->cfg->opts, "backend"); + if (obj != NULL && ucl_object_type(obj) == UCL_OBJECT) { + ret = rspamd_lua_try_load_redis(L, obj, cfg, &conf_ref); } /* Now try statfiles config */ if (!ret && stf->opts) { - ret = rspamd_lua_try_load_redis (L, stf->opts, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, stf->opts, cfg, &conf_ref); } /* Now try classifier config */ if (!ret && st->classifier->cfg->opts) { - ret = rspamd_lua_try_load_redis (L, st->classifier->cfg->opts, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, st->classifier->cfg->opts, cfg, &conf_ref); } /* Now try global redis settings */ if (!ret) { - obj = ucl_object_lookup (cfg->rcl_obj, "redis"); + obj = ucl_object_lookup(cfg->rcl_obj, "redis"); if (obj) { const ucl_object_t *specific_obj; - specific_obj = ucl_object_lookup (obj, "statistics"); + specific_obj = ucl_object_lookup(obj, "statistics"); if (specific_obj) { - ret = rspamd_lua_try_load_redis (L, - specific_obj, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, + specific_obj, cfg, &conf_ref); } else { - ret = rspamd_lua_try_load_redis (L, - obj, cfg, &conf_ref); + ret = rspamd_lua_try_load_redis(L, + obj, cfg, &conf_ref); } } } if (!ret) { - msg_err_config ("cannot init redis cache for %s", stf->symbol); - g_free (cache_ctx); + msg_err_config("cannot init redis cache for %s", stf->symbol); + g_free(cache_ctx); return NULL; } - obj = ucl_object_lookup (st->classifier->cfg->opts, "cache_key"); + obj = ucl_object_lookup(st->classifier->cfg->opts, "cache_key"); if (obj) { - cache_ctx->redis_object = ucl_object_tostring (obj); + cache_ctx->redis_object = ucl_object_tostring(obj); } else { cache_ctx->redis_object = DEFAULT_REDIS_KEY; @@ -310,41 +311,41 @@ rspamd_stat_cache_redis_init (struct rspamd_stat_ctx *ctx, cache_ctx->conf_ref = conf_ref; /* Check some common table values */ - lua_rawgeti (L, LUA_REGISTRYINDEX, conf_ref); + lua_rawgeti(L, LUA_REGISTRYINDEX, conf_ref); - lua_pushstring (L, "timeout"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TNUMBER) { - cache_ctx->timeout = lua_tonumber (L, -1); + lua_pushstring(L, "timeout"); + lua_gettable(L, -2); + if (lua_type(L, -1) == LUA_TNUMBER) { + cache_ctx->timeout = lua_tonumber(L, -1); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_pushstring (L, "db"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TSTRING) { - cache_ctx->dbname = rspamd_mempool_strdup (cfg->cfg_pool, - lua_tostring (L, -1)); + lua_pushstring(L, "db"); + lua_gettable(L, -2); + if (lua_type(L, -1) == LUA_TSTRING) { + cache_ctx->dbname = rspamd_mempool_strdup(cfg->cfg_pool, + lua_tostring(L, -1)); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_pushstring (L, "password"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TSTRING) { - cache_ctx->password = rspamd_mempool_strdup (cfg->cfg_pool, - lua_tostring (L, -1)); + lua_pushstring(L, "password"); + lua_gettable(L, -2); + if (lua_type(L, -1) == LUA_TSTRING) { + cache_ctx->password = rspamd_mempool_strdup(cfg->cfg_pool, + lua_tostring(L, -1)); } - lua_pop (L, 1); + lua_pop(L, 1); - lua_settop (L, 0); + lua_settop(L, 0); cache_ctx->stcf = stf; - return (gpointer)cache_ctx; + return (gpointer) cache_ctx; } gpointer -rspamd_stat_cache_redis_runtime (struct rspamd_task *task, - gpointer c, gboolean learn) +rspamd_stat_cache_redis_runtime(struct rspamd_task *task, + gpointer c, gboolean learn) { struct rspamd_redis_cache_ctx *ctx = c; struct rspamd_redis_cache_runtime *rt; @@ -352,120 +353,119 @@ rspamd_stat_cache_redis_runtime (struct rspamd_task *task, struct upstream_list *ups; rspamd_inet_addr_t *addr; - g_assert (ctx != NULL); + g_assert(ctx != NULL); if (task->tokens == NULL || task->tokens->len == 0) { return NULL; } if (learn) { - ups = rspamd_redis_get_servers (ctx, "write_servers"); + ups = rspamd_redis_get_servers(ctx, "write_servers"); if (!ups) { - msg_err_task ("no write servers defined for %s, cannot learn", - ctx->stcf->symbol); + msg_err_task("no write servers defined for %s, cannot learn", + ctx->stcf->symbol); return NULL; } - up = rspamd_upstream_get (ups, - RSPAMD_UPSTREAM_MASTER_SLAVE, - NULL, - 0); + up = rspamd_upstream_get(ups, + RSPAMD_UPSTREAM_MASTER_SLAVE, + NULL, + 0); } else { - ups = rspamd_redis_get_servers (ctx, "read_servers"); + ups = rspamd_redis_get_servers(ctx, "read_servers"); if (!ups) { - msg_err_task ("no read servers defined for %s, cannot check", - ctx->stcf->symbol); + msg_err_task("no read servers defined for %s, cannot check", + ctx->stcf->symbol); return NULL; } - up = rspamd_upstream_get (ups, - RSPAMD_UPSTREAM_ROUND_ROBIN, - NULL, - 0); + up = rspamd_upstream_get(ups, + RSPAMD_UPSTREAM_ROUND_ROBIN, + NULL, + 0); } if (up == NULL) { - msg_err_task ("no upstreams reachable"); + msg_err_task("no upstreams reachable"); return NULL; } - rt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*rt)); + rt = rspamd_mempool_alloc0(task->task_pool, sizeof(*rt)); rt->selected = up; rt->task = task; rt->ctx = ctx; - addr = rspamd_upstream_addr_next (up); - g_assert (addr != NULL); + addr = rspamd_upstream_addr_next(up); + g_assert(addr != NULL); - if (rspamd_inet_address_get_af (addr) == AF_UNIX) { - rt->redis = redisAsyncConnectUnix (rspamd_inet_address_to_string (addr)); + if (rspamd_inet_address_get_af(addr) == AF_UNIX) { + rt->redis = redisAsyncConnectUnix(rspamd_inet_address_to_string(addr)); } else { - rt->redis = redisAsyncConnect (rspamd_inet_address_to_string (addr), - rspamd_inet_address_get_port (addr)); + rt->redis = redisAsyncConnect(rspamd_inet_address_to_string(addr), + rspamd_inet_address_get_port(addr)); } if (rt->redis == NULL) { - msg_warn_task ("cannot connect to redis server %s: %s", - rspamd_inet_address_to_string_pretty (addr), - strerror (errno)); + msg_warn_task("cannot connect to redis server %s: %s", + rspamd_inet_address_to_string_pretty(addr), + strerror(errno)); return NULL; } else if (rt->redis->err != REDIS_OK) { - msg_warn_task ("cannot connect to redis server %s: %s", - rspamd_inet_address_to_string_pretty (addr), - rt->redis->errstr); - redisAsyncFree (rt->redis); + msg_warn_task("cannot connect to redis server %s: %s", + rspamd_inet_address_to_string_pretty(addr), + rt->redis->errstr); + redisAsyncFree(rt->redis); rt->redis = NULL; return NULL; } - redisLibevAttach (task->event_loop, rt->redis); + redisLibevAttach(task->event_loop, rt->redis); /* Now check stats */ rt->timer_ev.data = rt; - ev_timer_init (&rt->timer_ev, rspamd_redis_cache_timeout, - rt->ctx->timeout, 0.0); - rspamd_redis_cache_maybe_auth (ctx, rt->redis); + ev_timer_init(&rt->timer_ev, rspamd_redis_cache_timeout, + rt->ctx->timeout, 0.0); + rspamd_redis_cache_maybe_auth(ctx, rt->redis); if (!learn) { - rspamd_stat_cache_redis_generate_id (task); + rspamd_stat_cache_redis_generate_id(task); } return rt; } -gint -rspamd_stat_cache_redis_check (struct rspamd_task *task, - gboolean is_spam, - gpointer runtime) +gint rspamd_stat_cache_redis_check(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime) { struct rspamd_redis_cache_runtime *rt = runtime; gchar *h; - if (rspamd_session_blocked (task->s)) { + if (rspamd_session_blocked(task->s)) { return RSPAMD_LEARN_IGNORE; } - h = rspamd_mempool_get_variable (task->task_pool, "words_hash"); + h = rspamd_mempool_get_variable(task->task_pool, "words_hash"); if (h == NULL) { return RSPAMD_LEARN_IGNORE; } - if (redisAsyncCommand (rt->redis, rspamd_stat_cache_redis_get, rt, - "HGET %s %s", - rt->ctx->redis_object, h) == REDIS_OK) { - rspamd_session_add_event (task->s, - rspamd_redis_cache_fin, - rt, - M); - ev_timer_start (rt->task->event_loop, &rt->timer_ev); + if (redisAsyncCommand(rt->redis, rspamd_stat_cache_redis_get, rt, + "HGET %s %s", + rt->ctx->redis_object, h) == REDIS_OK) { + rspamd_session_add_event(task->s, + rspamd_redis_cache_fin, + rt, + M); + ev_timer_start(rt->task->event_loop, &rt->timer_ev); rt->has_event = TRUE; } @@ -473,30 +473,29 @@ rspamd_stat_cache_redis_check (struct rspamd_task *task, return RSPAMD_LEARN_OK; } -gint -rspamd_stat_cache_redis_learn (struct rspamd_task *task, - gboolean is_spam, - gpointer runtime) +gint rspamd_stat_cache_redis_learn(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime) { struct rspamd_redis_cache_runtime *rt = runtime; gchar *h; gint flag; - if (rt == NULL || rt->ctx == NULL || rspamd_session_blocked (task->s)) { + if (rt == NULL || rt->ctx == NULL || rspamd_session_blocked(task->s)) { return RSPAMD_LEARN_IGNORE; } - h = rspamd_mempool_get_variable (task->task_pool, "words_hash"); - g_assert (h != NULL); + h = rspamd_mempool_get_variable(task->task_pool, "words_hash"); + g_assert(h != NULL); flag = (task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM) ? 1 : -1; - if (redisAsyncCommand (rt->redis, rspamd_stat_cache_redis_set, rt, - "HSET %s %s %d", - rt->ctx->redis_object, h, flag) == REDIS_OK) { - rspamd_session_add_event (task->s, - rspamd_redis_cache_fin, rt, M); - ev_timer_start (rt->task->event_loop, &rt->timer_ev); + if (redisAsyncCommand(rt->redis, rspamd_stat_cache_redis_set, rt, + "HSET %s %s %d", + rt->ctx->redis_object, h, flag) == REDIS_OK) { + rspamd_session_add_event(task->s, + rspamd_redis_cache_fin, rt, M); + ev_timer_start(rt->task->event_loop, &rt->timer_ev); rt->has_event = TRUE; } @@ -504,17 +503,16 @@ rspamd_stat_cache_redis_learn (struct rspamd_task *task, return RSPAMD_LEARN_OK; } -void -rspamd_stat_cache_redis_close (gpointer c) +void rspamd_stat_cache_redis_close(gpointer c) { - struct rspamd_redis_cache_ctx *ctx = (struct rspamd_redis_cache_ctx *)c; + struct rspamd_redis_cache_ctx *ctx = (struct rspamd_redis_cache_ctx *) c; lua_State *L; L = ctx->L; if (ctx->conf_ref) { - luaL_unref (L, LUA_REGISTRYINDEX, ctx->conf_ref); + luaL_unref(L, LUA_REGISTRYINDEX, ctx->conf_ref); } - g_free (ctx); + g_free(ctx); } diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 755114367..d8ad20ad2 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -25,13 +25,13 @@ #include "libutil/sqlite_utils.h" static const char *create_tables_sql = - "" - "CREATE TABLE IF NOT EXISTS learns(" - "id INTEGER PRIMARY KEY," - "flag INTEGER NOT NULL," - "digest TEXT NOT NULL);" - "CREATE UNIQUE INDEX IF NOT EXISTS d ON learns(digest);" - ""; + "" + "CREATE TABLE IF NOT EXISTS learns(" + "id INTEGER PRIMARY KEY," + "flag INTEGER NOT NULL," + "digest TEXT NOT NULL);" + "CREATE UNIQUE INDEX IF NOT EXISTS d ON learns(digest);" + ""; #define SQLITE_CACHE_PATH RSPAMD_DBDIR "/learn_cache.sqlite" @@ -47,64 +47,49 @@ enum rspamd_stat_sqlite3_stmt_idx { }; static struct rspamd_sqlite3_prstmt prepared_stmts[RSPAMD_STAT_CACHE_MAX] = -{ - { - .idx = RSPAMD_STAT_CACHE_TRANSACTION_START_IM, - .sql = "BEGIN IMMEDIATE TRANSACTION;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .ret = "" - }, - { - .idx = RSPAMD_STAT_CACHE_TRANSACTION_START_DEF, - .sql = "BEGIN DEFERRED TRANSACTION;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .ret = "" - }, - { - .idx = RSPAMD_STAT_CACHE_TRANSACTION_COMMIT, - .sql = "COMMIT;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .ret = "" - }, - { - .idx = RSPAMD_STAT_CACHE_TRANSACTION_ROLLBACK, - .sql = "ROLLBACK;", - .args = "", - .stmt = NULL, - .result = SQLITE_DONE, - .ret = "" - }, { - .idx = RSPAMD_STAT_CACHE_GET_LEARN, - .sql = "SELECT flag FROM learns WHERE digest=?1", - .args = "V", - .stmt = NULL, - .result = SQLITE_ROW, - .ret = "I" - }, - { - .idx = RSPAMD_STAT_CACHE_ADD_LEARN, - .sql = "INSERT INTO learns(digest, flag) VALUES (?1, ?2);", - .args = "VI", - .stmt = NULL, - .result = SQLITE_DONE, - .ret = "" - }, - { - .idx = RSPAMD_STAT_CACHE_UPDATE_LEARN, - .sql = "UPDATE learns SET flag=?1 WHERE digest=?2;", - .args = "IV", - .stmt = NULL, - .result = SQLITE_DONE, - .ret = "" - } -}; + {.idx = RSPAMD_STAT_CACHE_TRANSACTION_START_IM, + .sql = "BEGIN IMMEDIATE TRANSACTION;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = ""}, + {.idx = RSPAMD_STAT_CACHE_TRANSACTION_START_DEF, + .sql = "BEGIN DEFERRED TRANSACTION;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = ""}, + {.idx = RSPAMD_STAT_CACHE_TRANSACTION_COMMIT, + .sql = "COMMIT;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = ""}, + {.idx = RSPAMD_STAT_CACHE_TRANSACTION_ROLLBACK, + .sql = "ROLLBACK;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = ""}, + {.idx = RSPAMD_STAT_CACHE_GET_LEARN, + .sql = "SELECT flag FROM learns WHERE digest=?1", + .args = "V", + .stmt = NULL, + .result = SQLITE_ROW, + .ret = "I"}, + {.idx = RSPAMD_STAT_CACHE_ADD_LEARN, + .sql = "INSERT INTO learns(digest, flag) VALUES (?1, ?2);", + .args = "VI", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = ""}, + {.idx = RSPAMD_STAT_CACHE_UPDATE_LEARN, + .sql = "UPDATE learns SET flag=?1 WHERE digest=?2;", + .args = "IV", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = ""}}; struct rspamd_stat_sqlite3_ctx { sqlite3 *db; @@ -112,10 +97,10 @@ struct rspamd_stat_sqlite3_ctx { }; gpointer -rspamd_stat_cache_sqlite3_init (struct rspamd_stat_ctx *ctx, - struct rspamd_config *cfg, - struct rspamd_statfile *st, - const ucl_object_t *cf) +rspamd_stat_cache_sqlite3_init(struct rspamd_stat_ctx *ctx, + struct rspamd_config *cfg, + struct rspamd_statfile *st, + const ucl_object_t *cf) { struct rspamd_stat_sqlite3_ctx *new = NULL; const ucl_object_t *elt; @@ -125,35 +110,35 @@ rspamd_stat_cache_sqlite3_init (struct rspamd_stat_ctx *ctx, GError *err = NULL; if (cf) { - elt = ucl_object_lookup_any (cf, "path", "file", NULL); + elt = ucl_object_lookup_any(cf, "path", "file", NULL); if (elt != NULL) { - path = ucl_object_tostring (elt); + path = ucl_object_tostring(elt); } } - rspamd_snprintf (dbpath, sizeof (dbpath), "%s", path); + rspamd_snprintf(dbpath, sizeof(dbpath), "%s", path); - sqlite = rspamd_sqlite3_open_or_create (cfg->cfg_pool, - dbpath, create_tables_sql, 0, &err); + sqlite = rspamd_sqlite3_open_or_create(cfg->cfg_pool, + dbpath, create_tables_sql, 0, &err); if (sqlite == NULL) { - msg_err ("cannot open sqlite3 cache: %e", err); - g_error_free (err); + msg_err("cannot open sqlite3 cache: %e", err); + g_error_free(err); err = NULL; } else { - new = g_malloc0 (sizeof (*new)); + new = g_malloc0(sizeof(*new)); new->db = sqlite; - new->prstmt = rspamd_sqlite3_init_prstmt (sqlite, prepared_stmts, - RSPAMD_STAT_CACHE_MAX, &err); + new->prstmt = rspamd_sqlite3_init_prstmt(sqlite, prepared_stmts, + RSPAMD_STAT_CACHE_MAX, &err); if (new->prstmt == NULL) { - msg_err ("cannot open sqlite3 cache: %e", err); - g_error_free (err); + msg_err("cannot open sqlite3 cache: %e", err); + g_error_free(err); err = NULL; - sqlite3_close (sqlite); - g_free (new); + sqlite3_close(sqlite); + g_free(new); new = NULL; } } @@ -162,17 +147,16 @@ rspamd_stat_cache_sqlite3_init (struct rspamd_stat_ctx *ctx, } gpointer -rspamd_stat_cache_sqlite3_runtime (struct rspamd_task *task, - gpointer ctx, gboolean learn) +rspamd_stat_cache_sqlite3_runtime(struct rspamd_task *task, + gpointer ctx, gboolean learn) { /* No need of runtime for this type of classifier */ return ctx; } -gint -rspamd_stat_cache_sqlite3_check (struct rspamd_task *task, - gboolean is_spam, - gpointer runtime) +gint rspamd_stat_cache_sqlite3_check(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime) { struct rspamd_stat_sqlite3_ctx *ctx = runtime; rspamd_cryptobox_hash_state_t st; @@ -188,41 +172,41 @@ rspamd_stat_cache_sqlite3_check (struct rspamd_task *task, } if (ctx != NULL && ctx->db != NULL) { - out = rspamd_mempool_alloc (task->task_pool, rspamd_cryptobox_HASHBYTES); + out = rspamd_mempool_alloc(task->task_pool, rspamd_cryptobox_HASHBYTES); - rspamd_cryptobox_hash_init (&st, NULL, 0); + rspamd_cryptobox_hash_init(&st, NULL, 0); - user = rspamd_mempool_get_variable (task->task_pool, "stat_user"); + user = rspamd_mempool_get_variable(task->task_pool, "stat_user"); /* Use dedicated hash space for per users cache */ if (user != NULL) { - rspamd_cryptobox_hash_update (&st, user, strlen (user)); + rspamd_cryptobox_hash_update(&st, user, strlen(user)); } - for (i = 0; i < task->tokens->len; i ++) { - tok = g_ptr_array_index (task->tokens, i); - rspamd_cryptobox_hash_update (&st, (guchar *)&tok->data, - sizeof (tok->data)); + for (i = 0; i < task->tokens->len; i++) { + tok = g_ptr_array_index(task->tokens, i); + rspamd_cryptobox_hash_update(&st, (guchar *) &tok->data, + sizeof(tok->data)); } - rspamd_cryptobox_hash_final (&st, out); + rspamd_cryptobox_hash_final(&st, out); - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_START_DEF); - rc = rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_GET_LEARN, (gint64)rspamd_cryptobox_HASHBYTES, - out, &flag); - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_START_DEF); + rc = rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_GET_LEARN, (gint64) rspamd_cryptobox_HASHBYTES, + out, &flag); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); /* Save hash into variables */ - rspamd_mempool_set_variable (task->task_pool, "words_hash", out, NULL); + rspamd_mempool_set_variable(task->task_pool, "words_hash", out, NULL); if (rc == SQLITE_OK) { /* We have some existing record in the table */ if (!!flag == !!is_spam) { /* Already learned */ - msg_warn_task ("already seen stat hash: %*bs", - rspamd_cryptobox_HASHBYTES, out); + msg_warn_task("already seen stat hash: %*bs", + rspamd_cryptobox_HASHBYTES, out); return RSPAMD_LEARN_IGNORE; } else { @@ -235,17 +219,16 @@ rspamd_stat_cache_sqlite3_check (struct rspamd_task *task, return RSPAMD_LEARN_OK; } -gint -rspamd_stat_cache_sqlite3_learn (struct rspamd_task *task, - gboolean is_spam, - gpointer runtime) +gint rspamd_stat_cache_sqlite3_learn(struct rspamd_task *task, + gboolean is_spam, + gpointer runtime) { struct rspamd_stat_sqlite3_ctx *ctx = runtime; gboolean unlearn = !!(task->flags & RSPAMD_TASK_FLAG_UNLEARN); guchar *h; gint64 flag; - h = rspamd_mempool_get_variable (task->task_pool, "words_hash"); + h = rspamd_mempool_get_variable(task->task_pool, "words_hash"); if (h == NULL) { return RSPAMD_LEARN_IGNORE; @@ -255,39 +238,37 @@ rspamd_stat_cache_sqlite3_learn (struct rspamd_task *task, if (!unlearn) { /* Insert result new id */ - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_START_IM); - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_ADD_LEARN, - (gint64)rspamd_cryptobox_HASHBYTES, h, flag); - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_ADD_LEARN, + (gint64) rspamd_cryptobox_HASHBYTES, h, flag); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); } else { - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_START_IM); - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_UPDATE_LEARN, - flag, - (gint64)rspamd_cryptobox_HASHBYTES, h); - rspamd_sqlite3_run_prstmt (task->task_pool, ctx->db, ctx->prstmt, - RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_START_IM); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_UPDATE_LEARN, + flag, + (gint64) rspamd_cryptobox_HASHBYTES, h); + rspamd_sqlite3_run_prstmt(task->task_pool, ctx->db, ctx->prstmt, + RSPAMD_STAT_CACHE_TRANSACTION_COMMIT); } - rspamd_sqlite3_sync (ctx->db, NULL, NULL); + rspamd_sqlite3_sync(ctx->db, NULL, NULL); return RSPAMD_LEARN_OK; } -void -rspamd_stat_cache_sqlite3_close (gpointer c) +void rspamd_stat_cache_sqlite3_close(gpointer c) { - struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *)c; + struct rspamd_stat_sqlite3_ctx *ctx = (struct rspamd_stat_sqlite3_ctx *) c; if (ctx != NULL) { - rspamd_sqlite3_close_prstmt (ctx->db, ctx->prstmt); - sqlite3_close (ctx->db); - g_free (ctx); + rspamd_sqlite3_close_prstmt(ctx->db, ctx->prstmt); + sqlite3_close(ctx->db); + g_free(ctx); } - } diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h index cc34c7a0b..1badb2001 100644 --- a/src/libstat/stat_api.h +++ b/src/libstat/stat_api.h @@ -21,7 +21,7 @@ #include "lua/lua_common.h" #include "contrib/libev/ev.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -46,10 +46,10 @@ extern "C" { #define RSPAMD_STAT_TOKEN_FLAG_EMOJI (1u << 13) typedef struct rspamd_stat_token_s { - rspamd_ftok_t original; /* utf8 raw */ + rspamd_ftok_t original; /* utf8 raw */ rspamd_ftok_unicode_t unicode; /* array of unicode characters, normalized, lowercased */ - rspamd_ftok_t normalized; /* normalized and lowercased utf8 */ - rspamd_ftok_t stemmed; /* stemmed utf8 */ + rspamd_ftok_t normalized; /* normalized and lowercased utf8 */ + rspamd_ftok_t stemmed; /* stemmed utf8 */ guint flags; } rspamd_stat_token_t; @@ -80,20 +80,20 @@ typedef enum rspamd_stat_result_e { * Initialise statistics modules * @param cfg */ -void rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base); +void rspamd_stat_init(struct rspamd_config *cfg, struct ev_loop *ev_base); /** * Finalize statistics */ -void rspamd_stat_close (void); +void rspamd_stat_close(void); /** * Tokenize task * @param st_ctx * @param task */ -void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task); +void rspamd_stat_process_tokenize(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task); /** * Classify the task specified and insert symbols if needed @@ -102,8 +102,8 @@ void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, * @param err error returned * @return TRUE if task has been classified */ -rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task, - lua_State *L, guint stage, GError **err); +rspamd_stat_result_t rspamd_stat_classify(struct rspamd_task *task, + lua_State *L, guint stage, GError **err); /** @@ -111,7 +111,7 @@ rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task, * @param task * @return */ -gboolean rspamd_stat_check_autolearn (struct rspamd_task *task); +gboolean rspamd_stat_check_autolearn(struct rspamd_task *task); /** * Learn task as spam or ham, task must be processed prior to this call @@ -122,10 +122,10 @@ gboolean rspamd_stat_check_autolearn (struct rspamd_task *task); * @param err error returned * @return TRUE if task has been learned */ -rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task, - gboolean spam, lua_State *L, const gchar *classifier, - guint stage, - GError **err); +rspamd_stat_result_t rspamd_stat_learn(struct rspamd_task *task, + gboolean spam, lua_State *L, const gchar *classifier, + guint stage, + GError **err); /** * Get the overall statistics for all statfile backends @@ -133,14 +133,14 @@ rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task, * @param total_learns the total number of learns is stored here * @return array of statistical information */ -rspamd_stat_result_t rspamd_stat_statistics (struct rspamd_task *task, - struct rspamd_config *cfg, - guint64 *total_learns, - ucl_object_t **res); +rspamd_stat_result_t rspamd_stat_statistics(struct rspamd_task *task, + struct rspamd_config *cfg, + guint64 *total_learns, + ucl_object_t **res); -void rspamd_stat_unload (void); +void rspamd_stat_unload(void); -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libstat/stat_config.c b/src/libstat/stat_config.c index 1f11a5607..274804461 100644 --- a/src/libstat/stat_config.c +++ b/src/libstat/stat_config.c @@ -38,8 +38,7 @@ static struct rspamd_stat_classifier stat_classifiers[] = { .classify_func = bayes_classify, .learn_spam_func = bayes_learn_spam, .fin_func = bayes_fin, - } -}; + }}; static struct rspamd_stat_tokenizer stat_tokenizers[] = { { @@ -54,62 +53,63 @@ static struct rspamd_stat_tokenizer stat_tokenizers[] = { }, }; -#define RSPAMD_STAT_BACKEND_ELT(nam, eltn) { \ - .name = #nam, \ - .read_only = false, \ - .init = rspamd_##eltn##_init, \ - .runtime = rspamd_##eltn##_runtime, \ - .process_tokens = rspamd_##eltn##_process_tokens, \ - .finalize_process = rspamd_##eltn##_finalize_process, \ - .learn_tokens = rspamd_##eltn##_learn_tokens, \ - .finalize_learn = rspamd_##eltn##_finalize_learn, \ - .total_learns = rspamd_##eltn##_total_learns, \ - .inc_learns = rspamd_##eltn##_inc_learns, \ - .dec_learns = rspamd_##eltn##_dec_learns, \ - .get_stat = rspamd_##eltn##_get_stat, \ +#define RSPAMD_STAT_BACKEND_ELT(nam, eltn) \ + { \ + .name = #nam, \ + .read_only = false, \ + .init = rspamd_##eltn##_init, \ + .runtime = rspamd_##eltn##_runtime, \ + .process_tokens = rspamd_##eltn##_process_tokens, \ + .finalize_process = rspamd_##eltn##_finalize_process, \ + .learn_tokens = rspamd_##eltn##_learn_tokens, \ + .finalize_learn = rspamd_##eltn##_finalize_learn, \ + .total_learns = rspamd_##eltn##_total_learns, \ + .inc_learns = rspamd_##eltn##_inc_learns, \ + .dec_learns = rspamd_##eltn##_dec_learns, \ + .get_stat = rspamd_##eltn##_get_stat, \ .load_tokenizer_config = rspamd_##eltn##_load_tokenizer_config, \ - .close = rspamd_##eltn##_close \ + .close = rspamd_##eltn##_close \ } -#define RSPAMD_STAT_BACKEND_ELT_READONLY(nam, eltn) { \ - .name = #nam, \ - .read_only = true, \ - .init = rspamd_##eltn##_init, \ - .runtime = rspamd_##eltn##_runtime, \ - .process_tokens = rspamd_##eltn##_process_tokens, \ - .finalize_process = rspamd_##eltn##_finalize_process, \ - .learn_tokens = NULL, \ - .finalize_learn = NULL, \ - .total_learns = rspamd_##eltn##_total_learns, \ - .inc_learns = NULL, \ - .dec_learns = NULL, \ - .get_stat = rspamd_##eltn##_get_stat, \ +#define RSPAMD_STAT_BACKEND_ELT_READONLY(nam, eltn) \ + { \ + .name = #nam, \ + .read_only = true, \ + .init = rspamd_##eltn##_init, \ + .runtime = rspamd_##eltn##_runtime, \ + .process_tokens = rspamd_##eltn##_process_tokens, \ + .finalize_process = rspamd_##eltn##_finalize_process, \ + .learn_tokens = NULL, \ + .finalize_learn = NULL, \ + .total_learns = rspamd_##eltn##_total_learns, \ + .inc_learns = NULL, \ + .dec_learns = NULL, \ + .get_stat = rspamd_##eltn##_get_stat, \ .load_tokenizer_config = rspamd_##eltn##_load_tokenizer_config, \ - .close = rspamd_##eltn##_close \ + .close = rspamd_##eltn##_close \ } static struct rspamd_stat_backend stat_backends[] = { - RSPAMD_STAT_BACKEND_ELT(mmap, mmaped_file), - RSPAMD_STAT_BACKEND_ELT(sqlite3, sqlite3), - RSPAMD_STAT_BACKEND_ELT_READONLY(cdb, cdb), - RSPAMD_STAT_BACKEND_ELT(redis, redis) -}; - -#define RSPAMD_STAT_CACHE_ELT(nam, eltn) { \ - .name = #nam, \ - .init = rspamd_stat_cache_##eltn##_init, \ + RSPAMD_STAT_BACKEND_ELT(mmap, mmaped_file), + RSPAMD_STAT_BACKEND_ELT(sqlite3, sqlite3), + RSPAMD_STAT_BACKEND_ELT_READONLY(cdb, cdb), + RSPAMD_STAT_BACKEND_ELT(redis, redis)}; + +#define RSPAMD_STAT_CACHE_ELT(nam, eltn) \ + { \ + .name = #nam, \ + .init = rspamd_stat_cache_##eltn##_init, \ .runtime = rspamd_stat_cache_##eltn##_runtime, \ - .check = rspamd_stat_cache_##eltn##_check, \ - .learn = rspamd_stat_cache_##eltn##_learn, \ - .close = rspamd_stat_cache_##eltn##_close \ + .check = rspamd_stat_cache_##eltn##_check, \ + .learn = rspamd_stat_cache_##eltn##_learn, \ + .close = rspamd_stat_cache_##eltn##_close \ } static struct rspamd_stat_cache stat_caches[] = { - RSPAMD_STAT_CACHE_ELT(sqlite3, sqlite3), - RSPAMD_STAT_CACHE_ELT(redis, redis), + RSPAMD_STAT_CACHE_ELT(sqlite3, sqlite3), + RSPAMD_STAT_CACHE_ELT(redis, redis), }; -void -rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) +void rspamd_stat_init(struct rspamd_config *cfg, struct ev_loop *ev_base) { GList *cur, *curst; struct rspamd_classifier_config *clf; @@ -124,111 +124,112 @@ rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) gboolean skip_cache = FALSE; if (stat_ctx == NULL) { - stat_ctx = g_malloc0 (sizeof (*stat_ctx)); + stat_ctx = g_malloc0(sizeof(*stat_ctx)); } - lua_getglobal (L, "rspamd_classifiers"); + lua_getglobal(L, "rspamd_classifiers"); - if (lua_type (L, -1) == LUA_TTABLE) { - lua_pushnil (L); + if (lua_type(L, -1) == LUA_TTABLE) { + lua_pushnil(L); - while (lua_next (L, -2) != 0) { - lua_classifiers_cnt ++; - lua_pop (L, 1); + while (lua_next(L, -2) != 0) { + lua_classifiers_cnt++; + lua_pop(L, 1); } } - lua_pop (L, 1); + lua_pop(L, 1); - stat_ctx->classifiers_count = G_N_ELEMENTS (stat_classifiers) + - lua_classifiers_cnt; - stat_ctx->classifiers_subrs = g_new0 (struct rspamd_stat_classifier, - stat_ctx->classifiers_count); + stat_ctx->classifiers_count = G_N_ELEMENTS(stat_classifiers) + + lua_classifiers_cnt; + stat_ctx->classifiers_subrs = g_new0(struct rspamd_stat_classifier, + stat_ctx->classifiers_count); - for (i = 0; i < G_N_ELEMENTS (stat_classifiers); i ++) { - memcpy (&stat_ctx->classifiers_subrs[i], &stat_classifiers[i], - sizeof (struct rspamd_stat_classifier)); + for (i = 0; i < G_N_ELEMENTS(stat_classifiers); i++) { + memcpy(&stat_ctx->classifiers_subrs[i], &stat_classifiers[i], + sizeof(struct rspamd_stat_classifier)); } - lua_getglobal (L, "rspamd_classifiers"); + lua_getglobal(L, "rspamd_classifiers"); - if (lua_type (L, -1) == LUA_TTABLE) { - lua_pushnil (L); + if (lua_type(L, -1) == LUA_TTABLE) { + lua_pushnil(L); - while (lua_next (L, -2) != 0) { - lua_pushvalue (L, -2); - memcpy (&stat_ctx->classifiers_subrs[i], &lua_classifier, - sizeof (struct rspamd_stat_classifier)); - stat_ctx->classifiers_subrs[i].name = g_strdup (lua_tostring (L, -1)); - i ++; - lua_pop (L, 2); + while (lua_next(L, -2) != 0) { + lua_pushvalue(L, -2); + memcpy(&stat_ctx->classifiers_subrs[i], &lua_classifier, + sizeof(struct rspamd_stat_classifier)); + stat_ctx->classifiers_subrs[i].name = g_strdup(lua_tostring(L, -1)); + i++; + lua_pop(L, 2); } } - lua_pop (L, 1); + lua_pop(L, 1); stat_ctx->backends_subrs = stat_backends; - stat_ctx->backends_count = G_N_ELEMENTS (stat_backends); + stat_ctx->backends_count = G_N_ELEMENTS(stat_backends); stat_ctx->tokenizers_subrs = stat_tokenizers; - stat_ctx->tokenizers_count = G_N_ELEMENTS (stat_tokenizers); + stat_ctx->tokenizers_count = G_N_ELEMENTS(stat_tokenizers); stat_ctx->caches_subrs = stat_caches; - stat_ctx->caches_count = G_N_ELEMENTS (stat_caches); + stat_ctx->caches_count = G_N_ELEMENTS(stat_caches); stat_ctx->cfg = cfg; - stat_ctx->statfiles = g_ptr_array_new (); - stat_ctx->classifiers = g_ptr_array_new (); - stat_ctx->async_elts = g_queue_new (); + stat_ctx->statfiles = g_ptr_array_new(); + stat_ctx->classifiers = g_ptr_array_new(); + stat_ctx->async_elts = g_queue_new(); stat_ctx->event_loop = ev_base; stat_ctx->lua_stat_tokens_ref = -1; /* Interact with lua_stat */ - if (luaL_dostring (L, "return require \"lua_stat\"") != 0) { - msg_err_config ("cannot require lua_stat: %s", - lua_tostring (L, -1)); + if (luaL_dostring(L, "return require \"lua_stat\"") != 0) { + msg_err_config("cannot require lua_stat: %s", + lua_tostring(L, -1)); } else { #if LUA_VERSION_NUM >= 504 lua_settop(L, -2); #endif - if (lua_type (L, -1) != LUA_TTABLE) { - msg_err_config ("lua stat must return " - "table and not %s", - lua_typename (L, lua_type (L, -1))); + if (lua_type(L, -1) != LUA_TTABLE) { + msg_err_config("lua stat must return " + "table and not %s", + lua_typename(L, lua_type(L, -1))); } else { - lua_pushstring (L, "gen_stat_tokens"); - lua_gettable (L, -2); + lua_pushstring(L, "gen_stat_tokens"); + lua_gettable(L, -2); - if (lua_type (L, -1) != LUA_TFUNCTION) { - msg_err_config ("gen_stat_tokens must return " - "function and not %s", - lua_typename (L, lua_type (L, -1))); + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_err_config("gen_stat_tokens must return " + "function and not %s", + lua_typename(L, lua_type(L, -1))); } else { /* Call this function to obtain closure */ gint err_idx, ret; struct rspamd_config **pcfg; - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); - lua_pushvalue (L, err_idx - 1); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_pushvalue(L, err_idx - 1); - pcfg = lua_newuserdata (L, sizeof (*pcfg)); + pcfg = lua_newuserdata(L, sizeof(*pcfg)); *pcfg = cfg; - rspamd_lua_setclass (L, "rspamd{config}", -1); + rspamd_lua_setclass(L, "rspamd{config}", -1); - if ((ret = lua_pcall (L, 1, 1, err_idx)) != 0) { - msg_err_config ("call to gen_stat_tokens lua " - "script failed (%d): %s", ret, - lua_tostring (L, -1)); + if ((ret = lua_pcall(L, 1, 1, err_idx)) != 0) { + msg_err_config("call to gen_stat_tokens lua " + "script failed (%d): %s", + ret, + lua_tostring(L, -1)); } else { - if (lua_type (L, -1) != LUA_TFUNCTION) { - msg_err_config ("gen_stat_tokens invocation must return " - "function and not %s", - lua_typename (L, lua_type (L, -1))); + if (lua_type(L, -1) != LUA_TFUNCTION) { + msg_err_config("gen_stat_tokens invocation must return " + "function and not %s", + lua_typename(L, lua_type(L, -1))); } else { - stat_ctx->lua_stat_tokens_ref = luaL_ref (L, LUA_REGISTRYINDEX); + stat_ctx->lua_stat_tokens_ref = luaL_ref(L, LUA_REGISTRYINDEX); } } } @@ -236,7 +237,7 @@ rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) } /* Cleanup mess */ - lua_settop (L, 0); + lua_settop(L, 0); /* Create statfiles from the classifiers */ cur = cfg->classifiers; @@ -244,40 +245,41 @@ rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) while (cur) { bk = NULL; clf = cur->data; - cl = g_malloc0 (sizeof (*cl)); + cl = g_malloc0(sizeof(*cl)); cl->cfg = clf; cl->ctx = stat_ctx; - cl->statfiles_ids = g_array_new (FALSE, FALSE, sizeof (gint)); - cl->subrs = rspamd_stat_get_classifier (clf->classifier); + cl->statfiles_ids = g_array_new(FALSE, FALSE, sizeof(gint)); + cl->subrs = rspamd_stat_get_classifier(clf->classifier); if (cl->subrs == NULL) { - g_free (cl); - msg_err_config ("cannot init classifier type %s", clf->name); - cur = g_list_next (cur); + g_free(cl); + msg_err_config("cannot init classifier type %s", clf->name); + cur = g_list_next(cur); continue; } - if (!cl->subrs->init_func (cfg, ev_base, cl)) { - g_free (cl); - msg_err_config ("cannot init classifier type %s", clf->name); - cur = g_list_next (cur); + if (!cl->subrs->init_func(cfg, ev_base, cl)) { + g_free(cl); + msg_err_config("cannot init classifier type %s", clf->name); + cur = g_list_next(cur); continue; } if (!(clf->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { - bk = rspamd_stat_get_backend (clf->backend); + bk = rspamd_stat_get_backend(clf->backend); if (bk == NULL) { - msg_err_config ("cannot get backend of type %s, so disable classifier" - " %s completely", clf->backend, clf->name); - cur = g_list_next (cur); + msg_err_config("cannot get backend of type %s, so disable classifier" + " %s completely", + clf->backend, clf->name); + cur = g_list_next(cur); continue; } } else { /* This actually is not implemented so it should never happen */ - g_free (cl); - cur = g_list_next (cur); + g_free(cl); + cur = g_list_next(cur); continue; } @@ -286,10 +288,10 @@ rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) * We NO LONGER support multiple tokenizers per rspamd instance */ if (stat_ctx->tkcf == NULL) { - stat_ctx->tokenizer = rspamd_stat_get_tokenizer (clf->tokenizer->name); - g_assert (stat_ctx->tokenizer != NULL); - stat_ctx->tkcf = stat_ctx->tokenizer->get_config (cfg->cfg_pool, - clf->tokenizer, NULL); + stat_ctx->tokenizer = rspamd_stat_get_tokenizer(clf->tokenizer->name); + g_assert(stat_ctx->tokenizer != NULL); + stat_ctx->tkcf = stat_ctx->tokenizer->get_config(cfg->cfg_pool, + clf->tokenizer, NULL); } /* Init classifier cache */ @@ -306,7 +308,7 @@ rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) else { if (cache_obj) { cache_name_obj = ucl_object_lookup_any(cache_obj, - "name", "type", NULL); + "name", "type", NULL); } if (cache_name_obj) { @@ -328,62 +330,61 @@ rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base) while (curst) { stf = curst->data; - st = g_malloc0 (sizeof (*st)); + st = g_malloc0(sizeof(*st)); st->classifier = cl; st->stcf = stf; if (!(cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { st->backend = bk; - st->bkcf = bk->init (stat_ctx, cfg, st); - msg_info_config ("added backend %s for symbol %s", - bk->name, stf->symbol); + st->bkcf = bk->init(stat_ctx, cfg, st); + msg_info_config("added backend %s for symbol %s", + bk->name, stf->symbol); } else { - msg_debug_config ("added backend-less statfile for symbol %s", - stf->symbol); + msg_debug_config("added backend-less statfile for symbol %s", + stf->symbol); } /* XXX: bad hack to pass statfiles configuration to cache */ if (cl->cache == NULL && !skip_cache) { - cl->cache = rspamd_stat_get_cache (cache_name); - g_assert (cl->cache != NULL); - cl->cachecf = cl->cache->init (stat_ctx, cfg, st, cache_obj); + cl->cache = rspamd_stat_get_cache(cache_name); + g_assert(cl->cache != NULL); + cl->cachecf = cl->cache->init(stat_ctx, cfg, st, cache_obj); if (cl->cachecf == NULL) { - msg_err_config ("error adding cache %s for symbol %s", - cl->cache->name, stf->symbol); + msg_err_config("error adding cache %s for symbol %s", + cl->cache->name, stf->symbol); cl->cache = NULL; } else { - msg_debug_config ("added cache %s for symbol %s", - cl->cache->name, stf->symbol); + msg_debug_config("added cache %s for symbol %s", + cl->cache->name, stf->symbol); } } if (st->bkcf == NULL && - !(cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { - msg_err_config ("cannot init backend %s for statfile %s", - clf->backend, stf->symbol); + !(cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { + msg_err_config("cannot init backend %s for statfile %s", + clf->backend, stf->symbol); - g_free (st); + g_free(st); } else { st->id = stat_ctx->statfiles->len; - g_ptr_array_add (stat_ctx->statfiles, st); - g_array_append_val (cl->statfiles_ids, st->id); + g_ptr_array_add(stat_ctx->statfiles, st); + g_array_append_val(cl->statfiles_ids, st->id); } curst = curst->next; } - g_ptr_array_add (stat_ctx->classifiers, cl); + g_ptr_array_add(stat_ctx->classifiers, cl); cur = cur->next; } } -void -rspamd_stat_close (void) +void rspamd_stat_close(void) { struct rspamd_classifier *cl; struct rspamd_statfile *st; @@ -393,67 +394,67 @@ rspamd_stat_close (void) guint i, j; gint id; - st_ctx = rspamd_stat_get_ctx (); - g_assert (st_ctx != NULL); + st_ctx = rspamd_stat_get_ctx(); + g_assert(st_ctx != NULL); - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); - for (j = 0; j < cl->statfiles_ids->len; j ++) { - id = g_array_index (cl->statfiles_ids, gint, j); - st = g_ptr_array_index (st_ctx->statfiles, id); + for (j = 0; j < cl->statfiles_ids->len; j++) { + id = g_array_index(cl->statfiles_ids, gint, j); + st = g_ptr_array_index(st_ctx->statfiles, id); if (!(st->classifier->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { - st->backend->close (st->bkcf); + st->backend->close(st->bkcf); } - g_free (st); + g_free(st); } if (cl->cache && cl->cachecf) { - cl->cache->close (cl->cachecf); + cl->cache->close(cl->cachecf); } - g_array_free (cl->statfiles_ids, TRUE); + g_array_free(cl->statfiles_ids, TRUE); if (cl->subrs->fin_func) { - cl->subrs->fin_func (cl); + cl->subrs->fin_func(cl); } - g_free (cl); + g_free(cl); } cur = st_ctx->async_elts->head; while (cur) { aelt = cur->data; - REF_RELEASE (aelt); - cur = g_list_next (cur); + REF_RELEASE(aelt); + cur = g_list_next(cur); } - g_queue_free (stat_ctx->async_elts); - g_ptr_array_free (st_ctx->statfiles, TRUE); - g_ptr_array_free (st_ctx->classifiers, TRUE); + g_queue_free(stat_ctx->async_elts); + g_ptr_array_free(st_ctx->statfiles, TRUE); + g_ptr_array_free(st_ctx->classifiers, TRUE); if (st_ctx->lua_stat_tokens_ref != -1) { - luaL_unref (st_ctx->cfg->lua_state, LUA_REGISTRYINDEX, - st_ctx->lua_stat_tokens_ref); + luaL_unref(st_ctx->cfg->lua_state, LUA_REGISTRYINDEX, + st_ctx->lua_stat_tokens_ref); } - g_free (st_ctx->classifiers_subrs); - g_free (st_ctx); + g_free(st_ctx->classifiers_subrs); + g_free(st_ctx); /* Set global var to NULL */ stat_ctx = NULL; } struct rspamd_stat_ctx * -rspamd_stat_get_ctx (void) +rspamd_stat_get_ctx(void) { return stat_ctx; } struct rspamd_stat_classifier * -rspamd_stat_get_classifier (const gchar *name) +rspamd_stat_get_classifier(const gchar *name) { guint i; @@ -461,19 +462,19 @@ rspamd_stat_get_classifier (const gchar *name) name = RSPAMD_DEFAULT_CLASSIFIER; } - for (i = 0; i < stat_ctx->classifiers_count; i ++) { - if (strcmp (name, stat_ctx->classifiers_subrs[i].name) == 0) { + for (i = 0; i < stat_ctx->classifiers_count; i++) { + if (strcmp(name, stat_ctx->classifiers_subrs[i].name) == 0) { return &stat_ctx->classifiers_subrs[i]; } } - msg_err ("cannot find classifier named %s", name); + msg_err("cannot find classifier named %s", name); return NULL; } struct rspamd_stat_backend * -rspamd_stat_get_backend (const gchar *name) +rspamd_stat_get_backend(const gchar *name) { guint i; @@ -481,19 +482,19 @@ rspamd_stat_get_backend (const gchar *name) name = RSPAMD_DEFAULT_BACKEND; } - for (i = 0; i < stat_ctx->backends_count; i ++) { - if (strcmp (name, stat_ctx->backends_subrs[i].name) == 0) { + for (i = 0; i < stat_ctx->backends_count; i++) { + if (strcmp(name, stat_ctx->backends_subrs[i].name) == 0) { return &stat_ctx->backends_subrs[i]; } } - msg_err ("cannot find backend named %s", name); + msg_err("cannot find backend named %s", name); return NULL; } struct rspamd_stat_tokenizer * -rspamd_stat_get_tokenizer (const gchar *name) +rspamd_stat_get_tokenizer(const gchar *name) { guint i; @@ -501,19 +502,19 @@ rspamd_stat_get_tokenizer (const gchar *name) name = RSPAMD_DEFAULT_TOKENIZER; } - for (i = 0; i < stat_ctx->tokenizers_count; i ++) { - if (strcmp (name, stat_ctx->tokenizers_subrs[i].name) == 0) { + for (i = 0; i < stat_ctx->tokenizers_count; i++) { + if (strcmp(name, stat_ctx->tokenizers_subrs[i].name) == 0) { return &stat_ctx->tokenizers_subrs[i]; } } - msg_err ("cannot find tokenizer named %s", name); + msg_err("cannot find tokenizer named %s", name); return NULL; } struct rspamd_stat_cache * -rspamd_stat_get_cache (const gchar *name) +rspamd_stat_get_cache(const gchar *name) { guint i; @@ -522,62 +523,62 @@ rspamd_stat_get_cache (const gchar *name) } for (i = 0; i < stat_ctx->caches_count; i++) { - if (strcmp (name, stat_ctx->caches_subrs[i].name) == 0) { + if (strcmp(name, stat_ctx->caches_subrs[i].name) == 0) { return &stat_ctx->caches_subrs[i]; } } - msg_err ("cannot find cache named %s", name); + msg_err("cannot find cache named %s", name); return NULL; } static void -rspamd_async_elt_dtor (struct rspamd_stat_async_elt *elt) +rspamd_async_elt_dtor(struct rspamd_stat_async_elt *elt) { if (elt->cleanup) { - elt->cleanup (elt, elt->ud); + elt->cleanup(elt, elt->ud); } - ev_timer_stop (elt->event_loop, &elt->timer_ev); - g_free (elt); + ev_timer_stop(elt->event_loop, &elt->timer_ev); + g_free(elt); } static void -rspamd_async_elt_on_timer (EV_P_ ev_timer *w, int revents) +rspamd_async_elt_on_timer(EV_P_ ev_timer *w, int revents) { - struct rspamd_stat_async_elt *elt = (struct rspamd_stat_async_elt *)w->data; + struct rspamd_stat_async_elt *elt = (struct rspamd_stat_async_elt *) w->data; gdouble jittered_time; if (elt->enabled) { - elt->handler (elt, elt->ud); + elt->handler(elt, elt->ud); } - jittered_time = rspamd_time_jitter (elt->timeout, 0); + jittered_time = rspamd_time_jitter(elt->timeout, 0); elt->timer_ev.repeat = jittered_time; - ev_timer_again (EV_A_ w); + ev_timer_again(EV_A_ w); } -struct rspamd_stat_async_elt* -rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler, - rspamd_stat_async_cleanup cleanup, - gpointer d, - gdouble timeout) +struct rspamd_stat_async_elt * +rspamd_stat_ctx_register_async(rspamd_stat_async_handler handler, + rspamd_stat_async_cleanup cleanup, + gpointer d, + gdouble timeout) { struct rspamd_stat_async_elt *elt; struct rspamd_stat_ctx *st_ctx; - st_ctx = rspamd_stat_get_ctx (); - g_assert (st_ctx != NULL); + st_ctx = rspamd_stat_get_ctx(); + g_assert(st_ctx != NULL); - elt = g_malloc0 (sizeof (*elt)); + elt = g_malloc0(sizeof(*elt)); elt->handler = handler; elt->cleanup = cleanup; elt->ud = d; elt->timeout = timeout; elt->event_loop = st_ctx->event_loop; - REF_INIT_RETAIN (elt, rspamd_async_elt_dtor); + REF_INIT_RETAIN(elt, rspamd_async_elt_dtor); /* Enabled by default */ @@ -588,15 +589,15 @@ rspamd_stat_ctx_register_async (rspamd_stat_async_handler handler, * fast as possible */ elt->timer_ev.data = elt; - ev_timer_init (&elt->timer_ev, rspamd_async_elt_on_timer, - 0.1, 0.0); - ev_timer_start (st_ctx->event_loop, &elt->timer_ev); + ev_timer_init(&elt->timer_ev, rspamd_async_elt_on_timer, + 0.1, 0.0); + ev_timer_start(st_ctx->event_loop, &elt->timer_ev); } else { elt->enabled = FALSE; } - g_queue_push_tail (st_ctx->async_elts, elt); + g_queue_push_tail(st_ctx->async_elts, elt); return elt; } diff --git a/src/libstat/stat_internal.h b/src/libstat/stat_internal.h index e9fd3d52d..8d0ebd4fa 100644 --- a/src/libstat/stat_internal.h +++ b/src/libstat/stat_internal.h @@ -24,7 +24,7 @@ #include "backends/backends.h" #include "learn_cache/learn_cache.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -59,11 +59,11 @@ struct rspamd_statfile { struct rspamd_stat_async_elt; -typedef void (*rspamd_stat_async_handler) (struct rspamd_stat_async_elt *elt, - gpointer ud); +typedef void (*rspamd_stat_async_handler)(struct rspamd_stat_async_elt *elt, + gpointer ud); -typedef void (*rspamd_stat_async_cleanup) (struct rspamd_stat_async_elt *elt, - gpointer ud); +typedef void (*rspamd_stat_async_cleanup)(struct rspamd_stat_async_elt *elt, + gpointer ud); struct rspamd_stat_async_elt { rspamd_stat_async_handler handler; @@ -88,9 +88,9 @@ struct rspamd_stat_ctx { guint caches_count; /* Runtime configuration */ - GPtrArray *statfiles; /* struct rspamd_statfile */ + GPtrArray *statfiles; /* struct rspamd_statfile */ GPtrArray *classifiers; /* struct rspamd_classifier */ - GQueue *async_elts; /* struct rspamd_stat_async_elt */ + GQueue *async_elts; /* struct rspamd_stat_async_elt */ struct rspamd_config *cfg; gint lua_stat_tokens_ref; @@ -108,25 +108,26 @@ typedef enum rspamd_learn_cache_result { RSPAMD_LEARN_IGNORE } rspamd_learn_t; -struct rspamd_stat_ctx *rspamd_stat_get_ctx (void); +struct rspamd_stat_ctx *rspamd_stat_get_ctx(void); -struct rspamd_stat_classifier *rspamd_stat_get_classifier (const gchar *name); +struct rspamd_stat_classifier *rspamd_stat_get_classifier(const gchar *name); -struct rspamd_stat_backend *rspamd_stat_get_backend (const gchar *name); +struct rspamd_stat_backend *rspamd_stat_get_backend(const gchar *name); -struct rspamd_stat_tokenizer *rspamd_stat_get_tokenizer (const gchar *name); +struct rspamd_stat_tokenizer *rspamd_stat_get_tokenizer(const gchar *name); -struct rspamd_stat_cache *rspamd_stat_get_cache (const gchar *name); +struct rspamd_stat_cache *rspamd_stat_get_cache(const gchar *name); -struct rspamd_stat_async_elt *rspamd_stat_ctx_register_async ( - rspamd_stat_async_handler handler, rspamd_stat_async_cleanup cleanup, - gpointer d, gdouble timeout); +struct rspamd_stat_async_elt *rspamd_stat_ctx_register_async( + rspamd_stat_async_handler handler, rspamd_stat_async_cleanup cleanup, + gpointer d, gdouble timeout); -static GQuark rspamd_stat_quark (void) { - return g_quark_from_static_string ("rspamd-statistics"); +static GQuark rspamd_stat_quark(void) +{ + return g_quark_from_static_string("rspamd-statistics"); } -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c index 9e2875297..8c1d8ff19 100644 --- a/src/libstat/stat_process.c +++ b/src/libstat/stat_process.c @@ -32,90 +32,90 @@ static const gdouble similarity_threshold = 80.0; static void -rspamd_stat_tokenize_parts_metadata (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task) +rspamd_stat_tokenize_parts_metadata(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task) { GArray *ar; rspamd_stat_token_t elt; guint i; lua_State *L = task->cfg->lua_state; - ar = g_array_sized_new (FALSE, FALSE, sizeof (elt), 16); - memset (&elt, 0, sizeof (elt)); + ar = g_array_sized_new(FALSE, FALSE, sizeof(elt), 16); + memset(&elt, 0, sizeof(elt)); elt.flags = RSPAMD_STAT_TOKEN_FLAG_META; if (st_ctx->lua_stat_tokens_ref != -1) { gint err_idx, ret; struct rspamd_task **ptask; - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); - lua_rawgeti (L, LUA_REGISTRYINDEX, st_ctx->lua_stat_tokens_ref); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_rawgeti(L, LUA_REGISTRYINDEX, st_ctx->lua_stat_tokens_ref); - ptask = lua_newuserdata (L, sizeof (*ptask)); + ptask = lua_newuserdata(L, sizeof(*ptask)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); - if ((ret = lua_pcall (L, 1, 1, err_idx)) != 0) { - msg_err_task ("call to stat_tokens lua " - "script failed (%d): %s", ret, lua_tostring (L, -1)); + if ((ret = lua_pcall(L, 1, 1, err_idx)) != 0) { + msg_err_task("call to stat_tokens lua " + "script failed (%d): %s", + ret, lua_tostring(L, -1)); } else { - if (lua_type (L, -1) != LUA_TTABLE) { - msg_err_task ("stat_tokens invocation must return " - "table and not %s", - lua_typename (L, lua_type (L, -1))); + if (lua_type(L, -1) != LUA_TTABLE) { + msg_err_task("stat_tokens invocation must return " + "table and not %s", + lua_typename(L, lua_type(L, -1))); } else { guint vlen; rspamd_ftok_t tok; - vlen = rspamd_lua_table_size (L, -1); + vlen = rspamd_lua_table_size(L, -1); - for (i = 0; i < vlen; i ++) { - lua_rawgeti (L, -1, i + 1); - tok.begin = lua_tolstring (L, -1, &tok.len); + for (i = 0; i < vlen; i++) { + lua_rawgeti(L, -1, i + 1); + tok.begin = lua_tolstring(L, -1, &tok.len); if (tok.begin && tok.len > 0) { elt.original.begin = - rspamd_mempool_ftokdup (task->task_pool, &tok); + rspamd_mempool_ftokdup(task->task_pool, &tok); elt.original.len = tok.len; elt.stemmed.begin = elt.original.begin; elt.stemmed.len = elt.original.len; elt.normalized.begin = elt.original.begin; elt.normalized.len = elt.original.len; - g_array_append_val (ar, elt); + g_array_append_val(ar, elt); } - lua_pop (L, 1); + lua_pop(L, 1); } } } - lua_settop (L, 0); + lua_settop(L, 0); } if (ar->len > 0) { - st_ctx->tokenizer->tokenize_func (st_ctx, - task, - ar, - TRUE, - "M", - task->tokens); + st_ctx->tokenizer->tokenize_func(st_ctx, + task, + ar, + TRUE, + "M", + task->tokens); } - rspamd_mempool_add_destructor (task->task_pool, - rspamd_array_free_hard, ar); + rspamd_mempool_add_destructor(task->task_pool, + rspamd_array_free_hard, ar); } /* * Tokenize task using the tokenizer specified */ -void -rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task) +void rspamd_stat_process_tokenize(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task) { struct rspamd_mime_text_part *part; rspamd_cryptobox_hash_state_t hst; @@ -126,172 +126,175 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, gchar *b32_hout; if (st_ctx == NULL) { - st_ctx = rspamd_stat_get_ctx (); + st_ctx = rspamd_stat_get_ctx(); } - g_assert (st_ctx != NULL); + g_assert(st_ctx != NULL); - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { - if (!IS_TEXT_PART_EMPTY (part) && part->utf_words != NULL) { + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, part) + { + if (!IS_TEXT_PART_EMPTY(part) && part->utf_words != NULL) { reserved_len += part->utf_words->len; } /* XXX: normal window size */ reserved_len += 5; } - task->tokens = g_ptr_array_sized_new (reserved_len); - rspamd_mempool_add_destructor (task->task_pool, - rspamd_ptr_array_free_hard, task->tokens); - rspamd_mempool_notify_alloc (task->task_pool, reserved_len * sizeof (gpointer)); - pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); - - PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) { - if (!IS_TEXT_PART_EMPTY (part) && part->utf_words != NULL) { - st_ctx->tokenizer->tokenize_func (st_ctx, task, - part->utf_words, IS_TEXT_PART_UTF (part), - NULL, task->tokens); + task->tokens = g_ptr_array_sized_new(reserved_len); + rspamd_mempool_add_destructor(task->task_pool, + rspamd_ptr_array_free_hard, task->tokens); + rspamd_mempool_notify_alloc(task->task_pool, reserved_len * sizeof(gpointer)); + pdiff = rspamd_mempool_get_variable(task->task_pool, "parts_distance"); + + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, part) + { + if (!IS_TEXT_PART_EMPTY(part) && part->utf_words != NULL) { + st_ctx->tokenizer->tokenize_func(st_ctx, task, + part->utf_words, IS_TEXT_PART_UTF(part), + NULL, task->tokens); } if (pdiff != NULL && (1.0 - *pdiff) * 100.0 > similarity_threshold) { - msg_debug_bayes ("message has two common parts (%.2f), so skip the last one", - *pdiff); + msg_debug_bayes("message has two common parts (%.2f), so skip the last one", + *pdiff); break; } } if (task->meta_words != NULL) { - st_ctx->tokenizer->tokenize_func (st_ctx, - task, - task->meta_words, - TRUE, - "SUBJECT", - task->tokens); + st_ctx->tokenizer->tokenize_func(st_ctx, + task, + task->meta_words, + TRUE, + "SUBJECT", + task->tokens); } - rspamd_stat_tokenize_parts_metadata (st_ctx, task); + rspamd_stat_tokenize_parts_metadata(st_ctx, task); /* Produce signature */ - rspamd_cryptobox_hash_init (&hst, NULL, 0); + rspamd_cryptobox_hash_init(&hst, NULL, 0); - PTR_ARRAY_FOREACH (task->tokens, i, st_tok) { - rspamd_cryptobox_hash_update (&hst, (guchar *)&st_tok->data, - sizeof (st_tok->data)); + PTR_ARRAY_FOREACH(task->tokens, i, st_tok) + { + rspamd_cryptobox_hash_update(&hst, (guchar *) &st_tok->data, + sizeof(st_tok->data)); } - rspamd_cryptobox_hash_final (&hst, hout); - b32_hout = rspamd_encode_base32 (hout, sizeof (hout), RSPAMD_BASE32_DEFAULT); + rspamd_cryptobox_hash_final(&hst, hout); + b32_hout = rspamd_encode_base32(hout, sizeof(hout), RSPAMD_BASE32_DEFAULT); /* * We need to strip it to 32 characters providing ~160 bits of * hash distribution */ b32_hout[32] = '\0'; - rspamd_mempool_set_variable (task->task_pool, RSPAMD_MEMPOOL_STAT_SIGNATURE, - b32_hout, g_free); + rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_STAT_SIGNATURE, + b32_hout, g_free); } static gboolean -rspamd_stat_classifier_is_skipped (struct rspamd_task *task, - struct rspamd_classifier *cl, gboolean is_learn, gboolean is_spam) +rspamd_stat_classifier_is_skipped(struct rspamd_task *task, + struct rspamd_classifier *cl, gboolean is_learn, gboolean is_spam) { GList *cur = is_learn ? cl->cfg->learn_conditions : cl->cfg->classify_conditions; lua_State *L = task->cfg->lua_state; gboolean ret = FALSE; while (cur) { - gint cb_ref = GPOINTER_TO_INT (cur->data); - gint old_top = lua_gettop (L); + gint cb_ref = GPOINTER_TO_INT(cur->data); + gint old_top = lua_gettop(L); gint nargs; - lua_rawgeti (L, LUA_REGISTRYINDEX, cb_ref); + lua_rawgeti(L, LUA_REGISTRYINDEX, cb_ref); /* Push task and two booleans: is_spam and is_unlearn */ - struct rspamd_task **ptask = lua_newuserdata (L, sizeof (*ptask)); + struct rspamd_task **ptask = lua_newuserdata(L, sizeof(*ptask)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); if (is_learn) { lua_pushboolean(L, is_spam); lua_pushboolean(L, - task->flags & RSPAMD_TASK_FLAG_UNLEARN ? true : false); + task->flags & RSPAMD_TASK_FLAG_UNLEARN ? true : false); nargs = 3; } else { nargs = 1; } - if (lua_pcall (L, nargs, LUA_MULTRET, 0) != 0) { - msg_err_task ("call to %s failed: %s", - "condition callback", - lua_tostring (L, -1)); + if (lua_pcall(L, nargs, LUA_MULTRET, 0) != 0) { + msg_err_task("call to %s failed: %s", + "condition callback", + lua_tostring(L, -1)); } else { - if (lua_isboolean (L, 1)) { - if (!lua_toboolean (L, 1)) { + if (lua_isboolean(L, 1)) { + if (!lua_toboolean(L, 1)) { ret = TRUE; } } - if (lua_isstring (L, 2)) { + if (lua_isstring(L, 2)) { if (ret) { - msg_notice_task ("%s condition for classifier %s returned: %s; skip classifier", - is_learn ? "learn" : "classify", cl->cfg->name, - lua_tostring(L, 2)); + msg_notice_task("%s condition for classifier %s returned: %s; skip classifier", + is_learn ? "learn" : "classify", cl->cfg->name, + lua_tostring(L, 2)); } else { - msg_info_task ("%s condition for classifier %s returned: %s", - is_learn ? "learn" : "classify", cl->cfg->name, - lua_tostring(L, 2)); + msg_info_task("%s condition for classifier %s returned: %s", + is_learn ? "learn" : "classify", cl->cfg->name, + lua_tostring(L, 2)); } } else if (ret) { msg_notice_task("%s condition for classifier %s returned false; skip classifier", - is_learn ? "learn" : "classify", cl->cfg->name); + is_learn ? "learn" : "classify", cl->cfg->name); } if (ret) { - lua_settop (L, old_top); + lua_settop(L, old_top); break; } } - lua_settop (L, old_top); - cur = g_list_next (cur); + lua_settop(L, old_top); + cur = g_list_next(cur); } return ret; } static void -rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task, gboolean is_learn, gboolean is_spam) +rspamd_stat_preprocess(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task, gboolean is_learn, gboolean is_spam) { guint i; struct rspamd_statfile *st; gpointer bk_run; if (task->tokens == NULL) { - rspamd_stat_process_tokenize (st_ctx, task); + rspamd_stat_process_tokenize(st_ctx, task); } - task->stat_runtimes = g_ptr_array_sized_new (st_ctx->statfiles->len); - g_ptr_array_set_size (task->stat_runtimes, st_ctx->statfiles->len); - rspamd_mempool_add_destructor (task->task_pool, - rspamd_ptr_array_free_hard, task->stat_runtimes); + task->stat_runtimes = g_ptr_array_sized_new(st_ctx->statfiles->len); + g_ptr_array_set_size(task->stat_runtimes, st_ctx->statfiles->len); + rspamd_mempool_add_destructor(task->task_pool, + rspamd_ptr_array_free_hard, task->stat_runtimes); /* Temporary set all stat_runtimes to some max size to distinguish from NULL */ - for (i = 0; i < st_ctx->statfiles->len; i ++) { - g_ptr_array_index (task->stat_runtimes, i) = GSIZE_TO_POINTER(G_MAXSIZE); + for (i = 0; i < st_ctx->statfiles->len; i++) { + g_ptr_array_index(task->stat_runtimes, i) = GSIZE_TO_POINTER(G_MAXSIZE); } for (i = 0; i < st_ctx->classifiers->len; i++) { - struct rspamd_classifier *cl = g_ptr_array_index (st_ctx->classifiers, i); + struct rspamd_classifier *cl = g_ptr_array_index(st_ctx->classifiers, i); gboolean skip_classifier = FALSE; if (cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND) { skip_classifier = TRUE; } else { - if (rspamd_stat_classifier_is_skipped (task, cl, is_learn , is_spam)) { + if (rspamd_stat_classifier_is_skipped(task, cl, is_learn, is_spam)) { skip_classifier = TRUE; } } @@ -299,69 +302,69 @@ rspamd_stat_preprocess (struct rspamd_stat_ctx *st_ctx, if (skip_classifier) { /* Set NULL for all statfiles indexed by id */ for (int j = 0; j < cl->statfiles_ids->len; j++) { - int id = g_array_index (cl->statfiles_ids, gint, j); - g_ptr_array_index (task->stat_runtimes, id) = NULL; + int id = g_array_index(cl->statfiles_ids, gint, j); + g_ptr_array_index(task->stat_runtimes, id) = NULL; } } } - for (i = 0; i < st_ctx->statfiles->len; i ++) { - st = g_ptr_array_index (st_ctx->statfiles, i); - g_assert (st != NULL); + for (i = 0; i < st_ctx->statfiles->len; i++) { + st = g_ptr_array_index(st_ctx->statfiles, i); + g_assert(st != NULL); - if (g_ptr_array_index (task->stat_runtimes, i) == NULL) { + if (g_ptr_array_index(task->stat_runtimes, i) == NULL) { /* The whole classifier is skipped */ continue; } if (is_learn && st->backend->read_only) { /* Read only backend, skip it */ - g_ptr_array_index (task->stat_runtimes, i) = NULL; + g_ptr_array_index(task->stat_runtimes, i) = NULL; continue; } - if (!is_learn && !rspamd_symcache_is_symbol_enabled (task, task->cfg->cache, - st->stcf->symbol)) { - g_ptr_array_index (task->stat_runtimes, i) = NULL; - msg_debug_bayes ("symbol %s is disabled, skip classification", - st->stcf->symbol); + if (!is_learn && !rspamd_symcache_is_symbol_enabled(task, task->cfg->cache, + st->stcf->symbol)) { + g_ptr_array_index(task->stat_runtimes, i) = NULL; + msg_debug_bayes("symbol %s is disabled, skip classification", + st->stcf->symbol); continue; } - bk_run = st->backend->runtime (task, st->stcf, is_learn, st->bkcf, i); + bk_run = st->backend->runtime(task, st->stcf, is_learn, st->bkcf, i); if (bk_run == NULL) { - msg_err_task ("cannot init backend %s for statfile %s", - st->backend->name, st->stcf->symbol); + msg_err_task("cannot init backend %s for statfile %s", + st->backend->name, st->stcf->symbol); } - g_ptr_array_index (task->stat_runtimes, i) = bk_run; + g_ptr_array_index(task->stat_runtimes, i) = bk_run; } } static void -rspamd_stat_backends_process (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task) +rspamd_stat_backends_process(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task) { guint i; struct rspamd_statfile *st; gpointer bk_run; - g_assert (task->stat_runtimes != NULL); + g_assert(task->stat_runtimes != NULL); for (i = 0; i < st_ctx->statfiles->len; i++) { - st = g_ptr_array_index (st_ctx->statfiles, i); - bk_run = g_ptr_array_index (task->stat_runtimes, i); + st = g_ptr_array_index(st_ctx->statfiles, i); + bk_run = g_ptr_array_index(task->stat_runtimes, i); if (bk_run != NULL) { - st->backend->process_tokens (task, task->tokens, i, bk_run); + st->backend->process_tokens(task, task->tokens, i, bk_run); } } } static void -rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task) +rspamd_stat_classifiers_process(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task) { guint i, j, id; struct rspamd_classifier *cl; @@ -377,60 +380,60 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, * Do not classify a message if some class is missing */ if (!(task->flags & RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS)) { - msg_info_task ("skip statistics as SPAM class is missing"); + msg_info_task("skip statistics as SPAM class is missing"); return; } if (!(task->flags & RSPAMD_TASK_FLAG_HAS_HAM_TOKENS)) { - msg_info_task ("skip statistics as HAM class is missing"); + msg_info_task("skip statistics as HAM class is missing"); return; } for (i = 0; i < st_ctx->classifiers->len; i++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + cl = g_ptr_array_index(st_ctx->classifiers, i); cl->spam_learns = 0; cl->ham_learns = 0; } - g_assert (task->stat_runtimes != NULL); + g_assert(task->stat_runtimes != NULL); for (i = 0; i < st_ctx->statfiles->len; i++) { - st = g_ptr_array_index (st_ctx->statfiles, i); + st = g_ptr_array_index(st_ctx->statfiles, i); cl = st->classifier; - bk_run = g_ptr_array_index (task->stat_runtimes, i); - g_assert (st != NULL); + bk_run = g_ptr_array_index(task->stat_runtimes, i); + g_assert(st != NULL); if (bk_run != NULL) { if (st->stcf->is_spam) { - cl->spam_learns += st->backend->total_learns (task, - bk_run, - st_ctx); + cl->spam_learns += st->backend->total_learns(task, + bk_run, + st_ctx); } else { - cl->ham_learns += st->backend->total_learns (task, - bk_run, - st_ctx); + cl->ham_learns += st->backend->total_learns(task, + bk_run, + st_ctx); } } } for (i = 0; i < st_ctx->classifiers->len; i++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + cl = g_ptr_array_index(st_ctx->classifiers, i); - g_assert (cl != NULL); + g_assert(cl != NULL); skip = FALSE; /* Do not process classifiers on backend failures */ for (j = 0; j < cl->statfiles_ids->len; j++) { - id = g_array_index (cl->statfiles_ids, gint, j); - bk_run = g_ptr_array_index (task->stat_runtimes, id); - st = g_ptr_array_index (st_ctx->statfiles, id); + id = g_array_index(cl->statfiles_ids, gint, j); + bk_run = g_ptr_array_index(task->stat_runtimes, id); + st = g_ptr_array_index(st_ctx->statfiles, id); if (bk_run != NULL) { - if (!st->backend->finalize_process (task, bk_run, st_ctx)) { + if (!st->backend->finalize_process(task, bk_run, st_ctx)) { skip = TRUE; break; } @@ -440,14 +443,14 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, /* Ensure that all symbols enabled */ if (!skip && !(cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND)) { for (j = 0; j < cl->statfiles_ids->len; j++) { - id = g_array_index (cl->statfiles_ids, gint, j); - bk_run = g_ptr_array_index (task->stat_runtimes, id); - st = g_ptr_array_index (st_ctx->statfiles, id); + id = g_array_index(cl->statfiles_ids, gint, j); + bk_run = g_ptr_array_index(task->stat_runtimes, id); + st = g_ptr_array_index(st_ctx->statfiles, id); if (bk_run == NULL) { skip = TRUE; - msg_debug_bayes ("disable classifier %s as statfile symbol %s is disabled", - cl->cfg->name, st->stcf->symbol); + msg_debug_bayes("disable classifier %s as statfile symbol %s is disabled", + cl->cfg->name, st->stcf->symbol); break; } } @@ -455,38 +458,38 @@ rspamd_stat_classifiers_process (struct rspamd_stat_ctx *st_ctx, if (!skip) { if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) { - msg_debug_bayes ( - "contains less tokens than required for %s classifier: " - "%ud < %ud", - cl->cfg->name, - task->tokens->len, - cl->cfg->min_tokens); + msg_debug_bayes( + "contains less tokens than required for %s classifier: " + "%ud < %ud", + cl->cfg->name, + task->tokens->len, + cl->cfg->min_tokens); continue; } else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) { - msg_debug_bayes ( - "contains more tokens than allowed for %s classifier: " - "%ud > %ud", - cl->cfg->name, - task->tokens->len, - cl->cfg->max_tokens); + msg_debug_bayes( + "contains more tokens than allowed for %s classifier: " + "%ud > %ud", + cl->cfg->name, + task->tokens->len, + cl->cfg->max_tokens); continue; } - cl->subrs->classify_func (cl, task->tokens, task); + cl->subrs->classify_func(cl, task->tokens, task); } } } rspamd_stat_result_t -rspamd_stat_classify (struct rspamd_task *task, lua_State *L, guint stage, - GError **err) +rspamd_stat_classify(struct rspamd_task *task, lua_State *L, guint stage, + GError **err) { struct rspamd_stat_ctx *st_ctx; rspamd_stat_result_t ret = RSPAMD_STAT_PROCESS_OK; - st_ctx = rspamd_stat_get_ctx (); - g_assert (st_ctx != NULL); + st_ctx = rspamd_stat_get_ctx(); + g_assert(st_ctx != NULL); if (st_ctx->classifiers->len == 0) { task->processed_stages |= stage; @@ -495,15 +498,15 @@ rspamd_stat_classify (struct rspamd_task *task, lua_State *L, guint stage, if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS_PRE) { /* Preprocess tokens */ - rspamd_stat_preprocess (st_ctx, task, FALSE, FALSE); + rspamd_stat_preprocess(st_ctx, task, FALSE, FALSE); } else if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS) { /* Process backends */ - rspamd_stat_backends_process (st_ctx, task); + rspamd_stat_backends_process(st_ctx, task); } else if (stage == RSPAMD_TASK_STAGE_CLASSIFIERS_POST) { /* Process classifiers */ - rspamd_stat_classifiers_process (st_ctx, task); + rspamd_stat_classifiers_process(st_ctx, task); } task->processed_stages |= stage; @@ -512,11 +515,11 @@ rspamd_stat_classify (struct rspamd_task *task, lua_State *L, guint stage, } static gboolean -rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task, - const gchar *classifier, - gboolean spam, - GError **err) +rspamd_stat_cache_check(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task, + const gchar *classifier, + gboolean spam, + GError **err) { rspamd_learn_t learn_res = RSPAMD_LEARN_OK; struct rspamd_classifier *cl, *sel = NULL; @@ -524,27 +527,28 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, guint i; /* Check whether we have learned that file */ - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); /* Skip other classifiers if they are not needed */ if (classifier != NULL && (cl->cfg->name == NULL || - g_ascii_strcasecmp (classifier, cl->cfg->name) != 0)) { + g_ascii_strcasecmp(classifier, cl->cfg->name) != 0)) { continue; } sel = cl; if (sel->cache && sel->cachecf) { - rt = cl->cache->runtime (task, sel->cachecf, FALSE); - learn_res = cl->cache->check (task, spam, rt); + rt = cl->cache->runtime(task, sel->cachecf, FALSE); + learn_res = cl->cache->check(task, spam, rt); } if (learn_res == RSPAMD_LEARN_IGNORE) { /* Do not learn twice */ - g_set_error (err, rspamd_stat_quark (), 404, "<%s> has been already " - "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), - spam ? "spam" : "ham"); + g_set_error(err, rspamd_stat_quark(), 404, "<%s> has been already " + "learned as %s, ignore it", + MESSAGE_FIELD(task, message_id), + spam ? "spam" : "ham"); task->flags |= RSPAMD_TASK_FLAG_ALREADY_LEARNED; return FALSE; @@ -557,11 +561,12 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, if (sel == NULL) { if (classifier) { - g_set_error (err, rspamd_stat_quark (), 404, "cannot find classifier " - "with name %s", classifier); + g_set_error(err, rspamd_stat_quark(), 404, "cannot find classifier " + "with name %s", + classifier); } else { - g_set_error (err, rspamd_stat_quark (), 404, "no classifiers defined"); + g_set_error(err, rspamd_stat_quark(), 404, "no classifiers defined"); } return FALSE; @@ -571,33 +576,34 @@ rspamd_stat_cache_check (struct rspamd_stat_ctx *st_ctx, } static gboolean -rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task, - const gchar *classifier, - gboolean spam, - GError **err) +rspamd_stat_classifiers_learn(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task, + const gchar *classifier, + gboolean spam, + GError **err) { struct rspamd_classifier *cl, *sel = NULL; guint i; gboolean learned = FALSE, too_small = FALSE, too_large = FALSE; if ((task->flags & RSPAMD_TASK_FLAG_ALREADY_LEARNED) && err != NULL && - *err == NULL) { + *err == NULL) { /* Do not learn twice */ - g_set_error (err, rspamd_stat_quark (), 208, "<%s> has been already " - "learned as %s, ignore it", MESSAGE_FIELD (task, message_id), - spam ? "spam" : "ham"); + g_set_error(err, rspamd_stat_quark(), 208, "<%s> has been already " + "learned as %s, ignore it", + MESSAGE_FIELD(task, message_id), + spam ? "spam" : "ham"); return FALSE; } /* Check whether we have learned that file */ - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); /* Skip other classifiers if they are not needed */ if (classifier != NULL && (cl->cfg->name == NULL || - g_ascii_strcasecmp (classifier, cl->cfg->name) != 0)) { + g_ascii_strcasecmp(classifier, cl->cfg->name) != 0)) { continue; } @@ -605,41 +611,42 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, /* Now check max and min tokens */ if (cl->cfg->min_tokens > 0 && task->tokens->len < cl->cfg->min_tokens) { - msg_info_task ( + msg_info_task( "<%s> contains less tokens than required for %s classifier: " - "%ud < %ud", - MESSAGE_FIELD (task, message_id), - cl->cfg->name, - task->tokens->len, - cl->cfg->min_tokens); + "%ud < %ud", + MESSAGE_FIELD(task, message_id), + cl->cfg->name, + task->tokens->len, + cl->cfg->min_tokens); too_small = TRUE; continue; } else if (cl->cfg->max_tokens > 0 && task->tokens->len > cl->cfg->max_tokens) { - msg_info_task ( + msg_info_task( "<%s> contains more tokens than allowed for %s classifier: " - "%ud > %ud", - MESSAGE_FIELD (task, message_id), - cl->cfg->name, - task->tokens->len, - cl->cfg->max_tokens); + "%ud > %ud", + MESSAGE_FIELD(task, message_id), + cl->cfg->name, + task->tokens->len, + cl->cfg->max_tokens); too_large = TRUE; continue; } - if (cl->subrs->learn_spam_func (cl, task->tokens, task, spam, - task->flags & RSPAMD_TASK_FLAG_UNLEARN, err)) { + if (cl->subrs->learn_spam_func(cl, task->tokens, task, spam, + task->flags & RSPAMD_TASK_FLAG_UNLEARN, err)) { learned = TRUE; } } if (sel == NULL) { if (classifier) { - g_set_error (err, rspamd_stat_quark (), 404, "cannot find classifier " - "with name %s", classifier); + g_set_error(err, rspamd_stat_quark(), 404, "cannot find classifier " + "with name %s", + classifier); } else { - g_set_error (err, rspamd_stat_quark (), 404, "no classifiers defined"); + g_set_error(err, rspamd_stat_quark(), 404, "no classifiers defined"); } return FALSE; @@ -647,22 +654,22 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, if (!learned && err && *err == NULL) { if (too_large) { - g_set_error (err, rspamd_stat_quark (), 204, - "<%s> contains more tokens than allowed for %s classifier: " - "%d > %d", - MESSAGE_FIELD (task, message_id), - sel->cfg->name, - task->tokens->len, - sel->cfg->max_tokens); + g_set_error(err, rspamd_stat_quark(), 204, + "<%s> contains more tokens than allowed for %s classifier: " + "%d > %d", + MESSAGE_FIELD(task, message_id), + sel->cfg->name, + task->tokens->len, + sel->cfg->max_tokens); } else if (too_small) { - g_set_error (err, rspamd_stat_quark (), 204, - "<%s> contains less tokens than required for %s classifier: " - "%d < %d", - MESSAGE_FIELD (task, message_id), - sel->cfg->name, - task->tokens->len, - sel->cfg->min_tokens); + g_set_error(err, rspamd_stat_quark(), 204, + "<%s> contains less tokens than required for %s classifier: " + "%d < %d", + MESSAGE_FIELD(task, message_id), + sel->cfg->name, + task->tokens->len, + sel->cfg->min_tokens); } } @@ -670,11 +677,11 @@ rspamd_stat_classifiers_learn (struct rspamd_stat_ctx *st_ctx, } static gboolean -rspamd_stat_backends_learn (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task, - const gchar *classifier, - gboolean spam, - GError **err) +rspamd_stat_backends_learn(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task, + const gchar *classifier, + gboolean spam, + GError **err) { struct rspamd_classifier *cl, *sel = NULL; struct rspamd_statfile *st; @@ -683,12 +690,12 @@ rspamd_stat_backends_learn (struct rspamd_stat_ctx *st_ctx, gint id; gboolean res = FALSE, backend_found = FALSE; - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); /* Skip other classifiers if they are not needed */ if (classifier != NULL && (cl->cfg->name == NULL || - g_ascii_strcasecmp (classifier, cl->cfg->name) != 0)) { + g_ascii_strcasecmp(classifier, cl->cfg->name) != 0)) { continue; } @@ -699,27 +706,27 @@ rspamd_stat_backends_learn (struct rspamd_stat_ctx *st_ctx, sel = cl; - for (j = 0; j < cl->statfiles_ids->len; j ++) { - id = g_array_index (cl->statfiles_ids, gint, j); - st = g_ptr_array_index (st_ctx->statfiles, id); - bk_run = g_ptr_array_index (task->stat_runtimes, id); + for (j = 0; j < cl->statfiles_ids->len; j++) { + id = g_array_index(cl->statfiles_ids, gint, j); + st = g_ptr_array_index(st_ctx->statfiles, id); + bk_run = g_ptr_array_index(task->stat_runtimes, id); - g_assert (st != NULL); + g_assert(st != NULL); if (bk_run == NULL) { /* XXX: must be error */ if (task->result->passthrough_result) { /* Passthrough email, cannot learn */ - g_set_error (err, rspamd_stat_quark (), 204, - "Cannot learn statistics when passthrough " - "result has been set; not classified"); + g_set_error(err, rspamd_stat_quark(), 204, + "Cannot learn statistics when passthrough " + "result has been set; not classified"); res = FALSE; goto end; } - msg_debug_task ("no runtime for backend %s; classifier %s; symbol %s", - st->backend->name, cl->cfg->name, st->stcf->symbol); + msg_debug_task("no runtime for backend %s; classifier %s; symbol %s", + st->backend->name, cl->cfg->name, st->stcf->symbol); continue; } @@ -733,20 +740,20 @@ rspamd_stat_backends_learn (struct rspamd_stat_ctx *st_ctx, } } - if (!st->backend->learn_tokens (task, task->tokens, id, bk_run)) { - g_set_error (err, rspamd_stat_quark (), 500, - "Cannot push " - "learned results to the backend"); + if (!st->backend->learn_tokens(task, task->tokens, id, bk_run)) { + g_set_error(err, rspamd_stat_quark(), 500, + "Cannot push " + "learned results to the backend"); res = FALSE; goto end; } else { if (!!spam == !!st->stcf->is_spam) { - st->backend->inc_learns (task, bk_run, st_ctx); + st->backend->inc_learns(task, bk_run, st_ctx); } else if (task->flags & RSPAMD_TASK_FLAG_UNLEARN) { - st->backend->dec_learns (task, bk_run, st_ctx); + st->backend->dec_learns(task, bk_run, st_ctx); } res = TRUE; @@ -765,7 +772,8 @@ end: if (sel == NULL) { if (classifier) { g_set_error(err, rspamd_stat_quark(), 404, "cannot find classifier " - "with name %s", classifier); + "with name %s", + classifier); } else { g_set_error(err, rspamd_stat_quark(), 404, "no classifiers defined"); @@ -776,14 +784,14 @@ end: else if (!backend_found) { g_set_error(err, rspamd_stat_quark(), 204, "all learn conditions " "denied learning %s in %s", - spam ? "spam" : "ham", - classifier ? classifier : "default classifier"); + spam ? "spam" : "ham", + classifier ? classifier : "default classifier"); } else { g_set_error(err, rspamd_stat_quark(), 404, "cannot find statfile " "backend to learn %s in %s", - spam ? "spam" : "ham", - classifier ? classifier : "default classifier"); + spam ? "spam" : "ham", + classifier ? classifier : "default classifier"); } } @@ -791,11 +799,11 @@ end: } static gboolean -rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx, - struct rspamd_task *task, - const gchar *classifier, - gboolean spam, - GError **err) +rspamd_stat_backends_post_learn(struct rspamd_stat_ctx *st_ctx, + struct rspamd_task *task, + const gchar *classifier, + gboolean spam, + GError **err) { struct rspamd_classifier *cl; struct rspamd_statfile *st; @@ -804,12 +812,12 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx, gint id; gboolean res = TRUE; - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); /* Skip other classifiers if they are not needed */ if (classifier != NULL && (cl->cfg->name == NULL || - g_ascii_strcasecmp (classifier, cl->cfg->name) != 0)) { + g_ascii_strcasecmp(classifier, cl->cfg->name) != 0)) { continue; } @@ -818,38 +826,38 @@ rspamd_stat_backends_post_learn (struct rspamd_stat_ctx *st_ctx, continue; } - for (j = 0; j < cl->statfiles_ids->len; j ++) { - id = g_array_index (cl->statfiles_ids, gint, j); - st = g_ptr_array_index (st_ctx->statfiles, id); - bk_run = g_ptr_array_index (task->stat_runtimes, id); + for (j = 0; j < cl->statfiles_ids->len; j++) { + id = g_array_index(cl->statfiles_ids, gint, j); + st = g_ptr_array_index(st_ctx->statfiles, id); + bk_run = g_ptr_array_index(task->stat_runtimes, id); - g_assert (st != NULL); + g_assert(st != NULL); if (bk_run == NULL) { /* XXX: must be error */ continue; } - if (!st->backend->finalize_learn (task, bk_run, st_ctx, err)) { + if (!st->backend->finalize_learn(task, bk_run, st_ctx, err)) { return RSPAMD_STAT_PROCESS_ERROR; } } if (cl->cache) { - cache_run = cl->cache->runtime (task, cl->cachecf, TRUE); - cl->cache->learn (task, spam, cache_run); + cache_run = cl->cache->runtime(task, cl->cachecf, TRUE); + cl->cache->learn(task, spam, cache_run); } } - g_atomic_int_add (&task->worker->srv->stat->messages_learned, 1); + g_atomic_int_add(&task->worker->srv->stat->messages_learned, 1); return res; } rspamd_stat_result_t -rspamd_stat_learn (struct rspamd_task *task, - gboolean spam, lua_State *L, const gchar *classifier, guint stage, - GError **err) +rspamd_stat_learn(struct rspamd_task *task, + gboolean spam, lua_State *L, const gchar *classifier, guint stage, + GError **err) { struct rspamd_stat_ctx *st_ctx; rspamd_stat_result_t ret = RSPAMD_STAT_PROCESS_OK; @@ -858,10 +866,10 @@ rspamd_stat_learn (struct rspamd_task *task, * We assume now that a task has been already classified before * coming to learn */ - g_assert (RSPAMD_TASK_IS_CLASSIFIED (task)); + g_assert(RSPAMD_TASK_IS_CLASSIFIED(task)); - st_ctx = rspamd_stat_get_ctx (); - g_assert (st_ctx != NULL); + st_ctx = rspamd_stat_get_ctx(); + g_assert(st_ctx != NULL); if (st_ctx->classifiers->len == 0) { task->processed_stages |= stage; @@ -870,38 +878,38 @@ rspamd_stat_learn (struct rspamd_task *task, if (stage == RSPAMD_TASK_STAGE_LEARN_PRE) { /* Process classifiers */ - rspamd_stat_preprocess (st_ctx, task, TRUE, spam); + rspamd_stat_preprocess(st_ctx, task, TRUE, spam); - if (!rspamd_stat_cache_check (st_ctx, task, classifier, spam, err)) { + if (!rspamd_stat_cache_check(st_ctx, task, classifier, spam, err)) { return RSPAMD_STAT_PROCESS_ERROR; } } else if (stage == RSPAMD_TASK_STAGE_LEARN) { /* Process classifiers */ - if (!rspamd_stat_classifiers_learn (st_ctx, task, classifier, - spam, err)) { + if (!rspamd_stat_classifiers_learn(st_ctx, task, classifier, + spam, err)) { if (err && *err == NULL) { - g_set_error (err, rspamd_stat_quark (), 500, - "Unknown statistics error, found when learning classifiers;" - " classifier: %s", - task->classifier); + g_set_error(err, rspamd_stat_quark(), 500, + "Unknown statistics error, found when learning classifiers;" + " classifier: %s", + task->classifier); } return RSPAMD_STAT_PROCESS_ERROR; } /* Process backends */ - if (!rspamd_stat_backends_learn (st_ctx, task, classifier, spam, err)) { + if (!rspamd_stat_backends_learn(st_ctx, task, classifier, spam, err)) { if (err && *err == NULL) { - g_set_error (err, rspamd_stat_quark (), 500, - "Unknown statistics error, found when storing data on backend;" - " classifier: %s", - task->classifier); + g_set_error(err, rspamd_stat_quark(), 500, + "Unknown statistics error, found when storing data on backend;" + " classifier: %s", + task->classifier); } return RSPAMD_STAT_PROCESS_ERROR; } } else if (stage == RSPAMD_TASK_STAGE_LEARN_POST) { - if (!rspamd_stat_backends_post_learn (st_ctx, task, classifier, spam, err)) { + if (!rspamd_stat_backends_post_learn(st_ctx, task, classifier, spam, err)) { return RSPAMD_STAT_PROCESS_ERROR; } } @@ -912,9 +920,9 @@ rspamd_stat_learn (struct rspamd_task *task, } static gboolean -rspamd_stat_has_classifier_symbols (struct rspamd_task *task, - struct rspamd_scan_result *mres, - struct rspamd_classifier *cl) +rspamd_stat_has_classifier_symbols(struct rspamd_task *task, + struct rspamd_scan_result *mres, + struct rspamd_classifier *cl) { guint i; gint id; @@ -926,17 +934,18 @@ rspamd_stat_has_classifier_symbols (struct rspamd_task *task, return FALSE; } - st_ctx = rspamd_stat_get_ctx (); + st_ctx = rspamd_stat_get_ctx(); is_spam = !!(task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM); - for (i = 0; i < cl->statfiles_ids->len; i ++) { - id = g_array_index (cl->statfiles_ids, gint, i); - st = g_ptr_array_index (st_ctx->statfiles, id); + for (i = 0; i < cl->statfiles_ids->len; i++) { + id = g_array_index(cl->statfiles_ids, gint, i); + st = g_ptr_array_index(st_ctx->statfiles, id); - if (rspamd_task_find_symbol_result (task, st->stcf->symbol, NULL)) { + if (rspamd_task_find_symbol_result(task, st->stcf->symbol, NULL)) { if (is_spam == !!st->stcf->is_spam) { - msg_debug_bayes ("do not autolearn %s as symbol %s is already " - "added", is_spam ? "spam" : "ham", st->stcf->symbol); + msg_debug_bayes("do not autolearn %s as symbol %s is already " + "added", + is_spam ? "spam" : "ham", st->stcf->symbol); return TRUE; } @@ -947,7 +956,7 @@ rspamd_stat_has_classifier_symbols (struct rspamd_task *task, } gboolean -rspamd_stat_check_autolearn (struct rspamd_task *task) +rspamd_stat_check_autolearn(struct rspamd_task *task) { struct rspamd_stat_ctx *st_ctx; struct rspamd_classifier *cl; @@ -961,22 +970,22 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) gdouble ham_score, spam_score; const gchar *lua_script, *lua_ret; - g_assert (RSPAMD_TASK_IS_CLASSIFIED (task)); - st_ctx = rspamd_stat_get_ctx (); - g_assert (st_ctx != NULL); + g_assert(RSPAMD_TASK_IS_CLASSIFIED(task)); + st_ctx = rspamd_stat_get_ctx(); + g_assert(st_ctx != NULL); L = task->cfg->lua_state; - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); ret = FALSE; if (cl->cfg->opts) { - obj = ucl_object_lookup (cl->cfg->opts, "autolearn"); + obj = ucl_object_lookup(cl->cfg->opts, "autolearn"); - if (ucl_object_type (obj) == UCL_BOOLEAN) { + if (ucl_object_type(obj) == UCL_BOOLEAN) { /* Legacy true/false */ - if (ucl_object_toboolean (obj)) { + if (ucl_object_toboolean(obj)) { /* * Default learning algorithm: * @@ -986,7 +995,7 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) mres = task->result; if (mres) { - if (mres->score > rspamd_task_get_required_score (task, mres)) { + if (mres->score > rspamd_task_get_required_score(task, mres)) { task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM; ret = TRUE; @@ -998,21 +1007,21 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) } } } - else if (ucl_object_type (obj) == UCL_ARRAY && obj->len == 2) { + else if (ucl_object_type(obj) == UCL_ARRAY && obj->len == 2) { /* Legacy thresholds */ /* * We have an array of 2 elements, treat it as a * ham_score, spam_score */ - elt1 = ucl_array_find_index (obj, 0); - elt2 = ucl_array_find_index (obj, 1); + elt1 = ucl_array_find_index(obj, 0); + elt2 = ucl_array_find_index(obj, 1); - if ((ucl_object_type (elt1) == UCL_FLOAT || - ucl_object_type (elt1) == UCL_INT) && - (ucl_object_type (elt2) == UCL_FLOAT || - ucl_object_type (elt2) == UCL_INT)) { - ham_score = ucl_object_todouble (elt1); - spam_score = ucl_object_todouble (elt2); + if ((ucl_object_type(elt1) == UCL_FLOAT || + ucl_object_type(elt1) == UCL_INT) && + (ucl_object_type(elt2) == UCL_FLOAT || + ucl_object_type(elt2) == UCL_INT)) { + ham_score = ucl_object_todouble(elt1); + spam_score = ucl_object_todouble(elt2); if (ham_score > spam_score) { gdouble t; @@ -1037,38 +1046,40 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) } } } - else if (ucl_object_type (obj) == UCL_STRING) { + else if (ucl_object_type(obj) == UCL_STRING) { /* Legacy script */ - lua_script = ucl_object_tostring (obj); + lua_script = ucl_object_tostring(obj); - if (luaL_dostring (L, lua_script) != 0) { - msg_err_task ("cannot execute lua script for autolearn " - "extraction: %s", lua_tostring (L, -1)); + if (luaL_dostring(L, lua_script) != 0) { + msg_err_task("cannot execute lua script for autolearn " + "extraction: %s", + lua_tostring(L, -1)); } else { - if (lua_type (L, -1) == LUA_TFUNCTION) { - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); - lua_pushvalue (L, -2); /* Function itself */ + if (lua_type(L, -1) == LUA_TFUNCTION) { + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_pushvalue(L, -2); /* Function itself */ - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); - if (lua_pcall (L, 1, 1, err_idx) != 0) { - msg_err_task ("call to autolearn script failed: " - "%s", lua_tostring (L, -1)); + if (lua_pcall(L, 1, 1, err_idx) != 0) { + msg_err_task("call to autolearn script failed: " + "%s", + lua_tostring(L, -1)); } else { - lua_ret = lua_tostring (L, -1); + lua_ret = lua_tostring(L, -1); /* We can have immediate results */ if (lua_ret) { - if (strcmp (lua_ret, "ham") == 0) { + if (strcmp(lua_ret, "ham") == 0) { task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM; ret = TRUE; } - else if (strcmp (lua_ret, "spam") == 0) { + else if (strcmp(lua_ret, "spam") == 0) { task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM; ret = TRUE; } @@ -1076,85 +1087,86 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) } /* Result + error function + original function */ - lua_pop (L, 3); + lua_pop(L, 3); } else { - msg_err_task ("lua script must return " - "function(task) and not %s", - lua_typename (L, lua_type ( - L, -1))); + msg_err_task("lua script must return " + "function(task) and not %s", + lua_typename(L, lua_type( + L, -1))); } } } - else if (ucl_object_type (obj) == UCL_OBJECT) { + else if (ucl_object_type(obj) == UCL_OBJECT) { /* Try to find autolearn callback */ if (cl->autolearn_cbref == 0) { /* We don't have preprocessed cb id, so try to get it */ - if (!rspamd_lua_require_function (L, "lua_bayes_learn", - "autolearn")) { - msg_err_task ("cannot get autolearn library from " - "`lua_bayes_learn`"); + if (!rspamd_lua_require_function(L, "lua_bayes_learn", + "autolearn")) { + msg_err_task("cannot get autolearn library from " + "`lua_bayes_learn`"); } else { - cl->autolearn_cbref = luaL_ref (L, LUA_REGISTRYINDEX); + cl->autolearn_cbref = luaL_ref(L, LUA_REGISTRYINDEX); } } if (cl->autolearn_cbref != -1) { - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); - lua_rawgeti (L, LUA_REGISTRYINDEX, cl->autolearn_cbref); + lua_pushcfunction(L, &rspamd_lua_traceback); + err_idx = lua_gettop(L); + lua_rawgeti(L, LUA_REGISTRYINDEX, cl->autolearn_cbref); - ptask = lua_newuserdata (L, sizeof (struct rspamd_task *)); + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); *ptask = task; - rspamd_lua_setclass (L, "rspamd{task}", -1); + rspamd_lua_setclass(L, "rspamd{task}", -1); /* Push the whole object as well */ - ucl_object_push_lua (L, obj, true); + ucl_object_push_lua(L, obj, true); - if (lua_pcall (L, 2, 1, err_idx) != 0) { - msg_err_task ("call to autolearn script failed: " - "%s", lua_tostring (L, -1)); + if (lua_pcall(L, 2, 1, err_idx) != 0) { + msg_err_task("call to autolearn script failed: " + "%s", + lua_tostring(L, -1)); } else { - lua_ret = lua_tostring (L, -1); + lua_ret = lua_tostring(L, -1); if (lua_ret) { - if (strcmp (lua_ret, "ham") == 0) { + if (strcmp(lua_ret, "ham") == 0) { task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM; ret = TRUE; } - else if (strcmp (lua_ret, "spam") == 0) { + else if (strcmp(lua_ret, "spam") == 0) { task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM; ret = TRUE; } } } - lua_settop (L, err_idx - 1); + lua_settop(L, err_idx - 1); } } if (ret) { /* Do not autolearn if we have this symbol already */ - if (rspamd_stat_has_classifier_symbols (task, mres, cl)) { + if (rspamd_stat_has_classifier_symbols(task, mres, cl)) { ret = FALSE; task->flags &= ~(RSPAMD_TASK_FLAG_LEARN_HAM | - RSPAMD_TASK_FLAG_LEARN_SPAM); + RSPAMD_TASK_FLAG_LEARN_SPAM); } else if (mres != NULL) { if (task->flags & RSPAMD_TASK_FLAG_LEARN_HAM) { - msg_info_task ("<%s>: autolearn ham for classifier " - "'%s' as message's " - "score is negative: %.2f", - MESSAGE_FIELD (task, message_id), cl->cfg->name, - mres->score); + msg_info_task("<%s>: autolearn ham for classifier " + "'%s' as message's " + "score is negative: %.2f", + MESSAGE_FIELD(task, message_id), cl->cfg->name, + mres->score); } else { - msg_info_task ("<%s>: autolearn spam for classifier " - "'%s' as message's " - "action is reject, score: %.2f", - MESSAGE_FIELD (task, message_id), cl->cfg->name, - mres->score); + msg_info_task("<%s>: autolearn spam for classifier " + "'%s' as message's " + "action is reject, score: %.2f", + MESSAGE_FIELD(task, message_id), cl->cfg->name, + mres->score); } task->classifier = cl->cfg->name; @@ -1174,10 +1186,10 @@ rspamd_stat_check_autolearn (struct rspamd_task *task) * @return array of statistical information */ rspamd_stat_result_t -rspamd_stat_statistics (struct rspamd_task *task, - struct rspamd_config *cfg, - guint64 *total_learns, - ucl_object_t **target) +rspamd_stat_statistics(struct rspamd_task *task, + struct rspamd_config *cfg, + guint64 *total_learns, + ucl_object_t **target) { struct rspamd_stat_ctx *st_ctx; struct rspamd_classifier *cl; @@ -1188,37 +1200,37 @@ rspamd_stat_statistics (struct rspamd_task *task, guint i, j; gint id; - st_ctx = rspamd_stat_get_ctx (); - g_assert (st_ctx != NULL); + st_ctx = rspamd_stat_get_ctx(); + g_assert(st_ctx != NULL); - res = ucl_object_typed_new (UCL_ARRAY); + res = ucl_object_typed_new(UCL_ARRAY); - for (i = 0; i < st_ctx->classifiers->len; i ++) { - cl = g_ptr_array_index (st_ctx->classifiers, i); + for (i = 0; i < st_ctx->classifiers->len; i++) { + cl = g_ptr_array_index(st_ctx->classifiers, i); if (cl->cfg->flags & RSPAMD_FLAG_CLASSIFIER_NO_BACKEND) { continue; } - for (j = 0; j < cl->statfiles_ids->len; j ++) { - id = g_array_index (cl->statfiles_ids, gint, j); - st = g_ptr_array_index (st_ctx->statfiles, id); - backend_runtime = st->backend->runtime (task, st->stcf, FALSE, - st->bkcf, id); - elt = st->backend->get_stat (backend_runtime, st->bkcf); + for (j = 0; j < cl->statfiles_ids->len; j++) { + id = g_array_index(cl->statfiles_ids, gint, j); + st = g_ptr_array_index(st_ctx->statfiles, id); + backend_runtime = st->backend->runtime(task, st->stcf, FALSE, + st->bkcf, id); + elt = st->backend->get_stat(backend_runtime, st->bkcf); - if (elt && ucl_object_type (elt) == UCL_OBJECT) { - const ucl_object_t *rev = ucl_object_lookup (elt, "revision"); + if (elt && ucl_object_type(elt) == UCL_OBJECT) { + const ucl_object_t *rev = ucl_object_lookup(elt, "revision"); - learns += ucl_object_toint (rev); + learns += ucl_object_toint(rev); } else { - learns += st->backend->total_learns (task, backend_runtime, - st->bkcf); + learns += st->backend->total_learns(task, backend_runtime, + st->bkcf); } if (elt != NULL) { - ucl_array_append (res, elt); + ucl_array_append(res, elt); } } } @@ -1231,7 +1243,7 @@ rspamd_stat_statistics (struct rspamd_task *task, *target = res; } else { - ucl_object_unref (res); + ucl_object_unref(res); } return RSPAMD_STAT_PROCESS_OK; diff --git a/src/libstat/tokenizers/osb.c b/src/libstat/tokenizers/osb.c index a8007ec0f..d871c7a4e 100644 --- a/src/libstat/tokenizers/osb.c +++ b/src/libstat/tokenizers/osb.c @@ -27,16 +27,26 @@ #define DEFAULT_OSB_VERSION 2 static const int primes[] = { - 1, 7, - 3, 13, - 5, 29, - 11, 51, - 23, 101, - 47, 203, - 97, 407, - 197, 817, - 397, 1637, - 797, 3277, + 1, + 7, + 3, + 13, + 5, + 29, + 11, + 51, + 23, + 101, + 47, + 203, + 97, + 407, + 197, + 817, + 397, + 1637, + 797, + 3277, }; static const guchar osb_tokenizer_magic[] = {'o', 's', 'b', 't', 'o', 'k', 'v', '2'}; @@ -60,13 +70,13 @@ struct rspamd_osb_tokenizer_config { * Return default config */ static struct rspamd_osb_tokenizer_config * -rspamd_tokenizer_osb_default_config (void) +rspamd_tokenizer_osb_default_config(void) { static struct rspamd_osb_tokenizer_config def; - if (memcmp (def.magic, osb_tokenizer_magic, sizeof (osb_tokenizer_magic)) != 0) { - memset (&def, 0, sizeof (def)); - memcpy (def.magic, osb_tokenizer_magic, sizeof (osb_tokenizer_magic)); + if (memcmp(def.magic, osb_tokenizer_magic, sizeof(osb_tokenizer_magic)) != 0) { + memset(&def, 0, sizeof(def)); + memcpy(def.magic, osb_tokenizer_magic, sizeof(osb_tokenizer_magic)); def.version = DEFAULT_OSB_VERSION; def.window_size = DEFAULT_FEATURE_WINDOW_SIZE; def.ht = RSPAMD_OSB_HASH_XXHASH; @@ -77,8 +87,8 @@ rspamd_tokenizer_osb_default_config (void) } static struct rspamd_osb_tokenizer_config * -rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool, - const ucl_object_t *obj) +rspamd_tokenizer_osb_config_from_ucl(rspamd_mempool_t *pool, + const ucl_object_t *obj) { const ucl_object_t *elt; struct rspamd_osb_tokenizer_config *cf, *def; @@ -87,61 +97,58 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool, if (pool != NULL) { - cf = rspamd_mempool_alloc0 (pool, sizeof (*cf)); + cf = rspamd_mempool_alloc0(pool, sizeof(*cf)); } else { - cf = g_malloc0 (sizeof (*cf)); + cf = g_malloc0(sizeof(*cf)); } /* Use default config */ - def = rspamd_tokenizer_osb_default_config (); - memcpy (cf, def, sizeof (*cf)); + def = rspamd_tokenizer_osb_default_config(); + memcpy(cf, def, sizeof(*cf)); - elt = ucl_object_lookup (obj, "hash"); - if (elt != NULL && ucl_object_type (elt) == UCL_STRING) { - if (g_ascii_strncasecmp (ucl_object_tostring (elt), "xxh", 3) - == 0) { + elt = ucl_object_lookup(obj, "hash"); + if (elt != NULL && ucl_object_type(elt) == UCL_STRING) { + if (g_ascii_strncasecmp(ucl_object_tostring(elt), "xxh", 3) == 0) { cf->ht = RSPAMD_OSB_HASH_XXHASH; - elt = ucl_object_lookup (obj, "seed"); - if (elt != NULL && ucl_object_type (elt) == UCL_INT) { - cf->seed = ucl_object_toint (elt); + elt = ucl_object_lookup(obj, "seed"); + if (elt != NULL && ucl_object_type(elt) == UCL_INT) { + cf->seed = ucl_object_toint(elt); } } - else if (g_ascii_strncasecmp (ucl_object_tostring (elt), "sip", 3) - == 0) { + else if (g_ascii_strncasecmp(ucl_object_tostring(elt), "sip", 3) == 0) { cf->ht = RSPAMD_OSB_HASH_SIPHASH; - elt = ucl_object_lookup (obj, "key"); - - if (elt != NULL && ucl_object_type (elt) == UCL_STRING) { - key = rspamd_decode_base32 (ucl_object_tostring (elt), - 0, &keylen, RSPAMD_BASE32_DEFAULT); - if (keylen < sizeof (rspamd_sipkey_t)) { - msg_warn ("siphash key is too short: %z", keylen); - g_free (key); + elt = ucl_object_lookup(obj, "key"); + + if (elt != NULL && ucl_object_type(elt) == UCL_STRING) { + key = rspamd_decode_base32(ucl_object_tostring(elt), + 0, &keylen, RSPAMD_BASE32_DEFAULT); + if (keylen < sizeof(rspamd_sipkey_t)) { + msg_warn("siphash key is too short: %z", keylen); + g_free(key); } else { - memcpy (cf->sk, key, sizeof (cf->sk)); - g_free (key); + memcpy(cf->sk, key, sizeof(cf->sk)); + g_free(key); } } else { - msg_warn_pool ("siphash cannot be used without key"); + msg_warn_pool("siphash cannot be used without key"); } - } } else { - elt = ucl_object_lookup (obj, "compat"); - if (elt != NULL && ucl_object_toboolean (elt)) { + elt = ucl_object_lookup(obj, "compat"); + if (elt != NULL && ucl_object_toboolean(elt)) { cf->ht = RSPAMD_OSB_HASH_COMPAT; } } - elt = ucl_object_lookup (obj, "window"); - if (elt != NULL && ucl_object_type (elt) == UCL_INT) { - cf->window_size = ucl_object_toint (elt); + elt = ucl_object_lookup(obj, "window"); + if (elt != NULL && ucl_object_type(elt) == UCL_INT) { + cf->window_size = ucl_object_toint(elt); if (cf->window_size > DEFAULT_FEATURE_WINDOW_SIZE * 4) { - msg_err_pool ("too large window size: %d", cf->window_size); + msg_err_pool("too large window size: %d", cf->window_size); cf->window_size = DEFAULT_FEATURE_WINDOW_SIZE; } } @@ -150,31 +157,31 @@ rspamd_tokenizer_osb_config_from_ucl (rspamd_mempool_t * pool, } gpointer -rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool, - struct rspamd_tokenizer_config *cf, - gsize *len) +rspamd_tokenizer_osb_get_config(rspamd_mempool_t *pool, + struct rspamd_tokenizer_config *cf, + gsize *len) { struct rspamd_osb_tokenizer_config *osb_cf, *def; if (cf != NULL && cf->opts != NULL) { - osb_cf = rspamd_tokenizer_osb_config_from_ucl (pool, cf->opts); + osb_cf = rspamd_tokenizer_osb_config_from_ucl(pool, cf->opts); } else { - def = rspamd_tokenizer_osb_default_config (); - osb_cf = rspamd_mempool_alloc (pool, sizeof (*osb_cf)); - memcpy (osb_cf, def, sizeof (*osb_cf)); + def = rspamd_tokenizer_osb_default_config(); + osb_cf = rspamd_mempool_alloc(pool, sizeof(*osb_cf)); + memcpy(osb_cf, def, sizeof(*osb_cf)); /* Do not write sipkey to statfile */ } if (osb_cf->ht == RSPAMD_OSB_HASH_SIPHASH) { - msg_info_pool ("siphash key is not stored into statfiles, so you'd " - "need to keep it inside the configuration"); + msg_info_pool("siphash key is not stored into statfiles, so you'd " + "need to keep it inside the configuration"); } - memset (osb_cf->sk, 0, sizeof (osb_cf->sk)); + memset(osb_cf->sk, 0, sizeof(osb_cf->sk)); if (len != NULL) { - *len = sizeof (*osb_cf); + *len = sizeof(*osb_cf); } return osb_cf; @@ -259,13 +266,12 @@ struct token_pipe_entry { rspamd_stat_token_t *t; }; -gint -rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, - struct rspamd_task *task, - GArray *words, - gboolean is_utf, - const gchar *prefix, - GPtrArray *result) +gint rspamd_tokenizer_osb(struct rspamd_stat_ctx *ctx, + struct rspamd_task *task, + GArray *words, + gboolean is_utf, + const gchar *prefix, + GPtrArray *result) { rspamd_token_t *new_tok = NULL; rspamd_stat_token_t *token; @@ -284,31 +290,31 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, window_size = osb_cf->window_size; if (prefix) { - seed = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, - prefix, strlen (prefix), osb_cf->seed); + seed = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64, + prefix, strlen(prefix), osb_cf->seed); } else { seed = osb_cf->seed; } - hashpipe = g_alloca (window_size * sizeof (hashpipe[0])); + hashpipe = g_alloca(window_size * sizeof(hashpipe[0])); for (i = 0; i < window_size; i++) { hashpipe[i].h = 0xfe; hashpipe[i].t = NULL; } - token_size = sizeof (rspamd_token_t) + - sizeof (gdouble) * ctx->statfiles->len; - g_assert (token_size > 0); + token_size = sizeof(rspamd_token_t) + + sizeof(gdouble) * ctx->statfiles->len; + g_assert(token_size > 0); - for (w = 0; w < words->len; w ++) { - token = &g_array_index (words, rspamd_stat_token_t, w); + for (w = 0; w < words->len; w++) { + token = &g_array_index(words, rspamd_stat_token_t, w); token_flags = token->flags; const gchar *begin; gsize len; if (token->flags & - (RSPAMD_STAT_TOKEN_FLAG_STOP_WORD|RSPAMD_STAT_TOKEN_FLAG_SKIPPED)) { + (RSPAMD_STAT_TOKEN_FLAG_STOP_WORD | RSPAMD_STAT_TOKEN_FLAG_SKIPPED)) { /* Skip stop/skipped words */ continue; } @@ -327,17 +333,17 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, ftok.begin = begin; ftok.len = len; - cur = rspamd_fstrhash_lc (&ftok, is_utf); + cur = rspamd_fstrhash_lc(&ftok, is_utf); } else { /* We know that the words are normalized */ if (osb_cf->ht == RSPAMD_OSB_HASH_XXHASH) { - cur = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, - begin, len, osb_cf->seed); + cur = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64, + begin, len, osb_cf->seed); } else { - rspamd_cryptobox_siphash ((guchar *)&cur, begin, - len, osb_cf->sk); + rspamd_cryptobox_siphash((guchar *) &cur, begin, + len, osb_cf->sk); if (prefix) { cur ^= seed; @@ -346,36 +352,37 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, } if (token_flags & RSPAMD_STAT_TOKEN_FLAG_UNIGRAM) { - new_tok = rspamd_mempool_alloc0 (task->task_pool, token_size); + new_tok = rspamd_mempool_alloc0(task->task_pool, token_size); new_tok->flags = token_flags; new_tok->t1 = token; new_tok->t2 = token; new_tok->data = cur; new_tok->window_idx = 0; - g_ptr_array_add (result, new_tok); + g_ptr_array_add(result, new_tok); continue; } -#define ADD_TOKEN do {\ - new_tok = rspamd_mempool_alloc0 (task->task_pool, token_size); \ - new_tok->flags = token_flags; \ - new_tok->t1 = hashpipe[0].t; \ - new_tok->t2 = hashpipe[i].t; \ - if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { \ - h1 = ((guint32)hashpipe[0].h) * primes[0] + \ - ((guint32)hashpipe[i].h) * primes[i << 1]; \ - h2 = ((guint32)hashpipe[0].h) * primes[1] + \ - ((guint32)hashpipe[i].h) * primes[(i << 1) - 1]; \ - memcpy((guchar *)&new_tok->data, &h1, sizeof (h1)); \ - memcpy(((guchar *)&new_tok->data) + sizeof (h1), &h2, sizeof (h2)); \ - } \ - else { \ - new_tok->data = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \ - } \ - new_tok->window_idx = i; \ - g_ptr_array_add (result, new_tok); \ - } while(0) +#define ADD_TOKEN \ + do { \ + new_tok = rspamd_mempool_alloc0(task->task_pool, token_size); \ + new_tok->flags = token_flags; \ + new_tok->t1 = hashpipe[0].t; \ + new_tok->t2 = hashpipe[i].t; \ + if (osb_cf->ht == RSPAMD_OSB_HASH_COMPAT) { \ + h1 = ((guint32) hashpipe[0].h) * primes[0] + \ + ((guint32) hashpipe[i].h) * primes[i << 1]; \ + h2 = ((guint32) hashpipe[0].h) * primes[1] + \ + ((guint32) hashpipe[i].h) * primes[(i << 1) - 1]; \ + memcpy((guchar *) &new_tok->data, &h1, sizeof(h1)); \ + memcpy(((guchar *) &new_tok->data) + sizeof(h1), &h2, sizeof(h2)); \ + } \ + else { \ + new_tok->data = hashpipe[0].h * primes[0] + hashpipe[i].h * primes[i << 1]; \ + } \ + new_tok->window_idx = i; \ + g_ptr_array_add(result, new_tok); \ + } while (0) if (processed < window_size) { /* Just fill a hashpipe */ @@ -402,9 +409,9 @@ rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, } if (processed > 1 && processed <= window_size) { - processed --; - memmove (hashpipe, &hashpipe[window_size - processed], - processed * sizeof (hashpipe[0])); + processed--; + memmove(hashpipe, &hashpipe[window_size - processed], + processed * sizeof(hashpipe[0])); for (i = 1; i < processed; i++) { ADD_TOKEN; diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 55ee62f85..6e55a33a6 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -35,9 +35,9 @@ #include <math.h> -typedef gboolean (*token_get_function) (rspamd_stat_token_t * buf, gchar const **pos, - rspamd_stat_token_t * token, - GList **exceptions, gsize *rl, gboolean check_signature); +typedef gboolean (*token_get_function)(rspamd_stat_token_t *buf, gchar const **pos, + rspamd_stat_token_t *token, + GList **exceptions, gsize *rl, gboolean check_signature); const gchar t_delimiters[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, @@ -65,14 +65,13 @@ const gchar t_delimiters[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 -}; + 0, 0, 0, 0, 0, 0}; /* Get next word from specified f_str_t buf */ static gboolean -rspamd_tokenizer_get_word_raw (rspamd_stat_token_t * buf, - gchar const **cur, rspamd_stat_token_t * token, - GList **exceptions, gsize *rl, gboolean unused) +rspamd_tokenizer_get_word_raw(rspamd_stat_token_t *buf, + gchar const **cur, rspamd_stat_token_t *token, + GList **exceptions, gsize *rl, gboolean unused) { gsize remain, pos; const gchar *p; @@ -82,7 +81,7 @@ rspamd_tokenizer_get_word_raw (rspamd_stat_token_t * buf, return FALSE; } - g_assert (cur != NULL); + g_assert(cur != NULL); if (exceptions != NULL && *exceptions != NULL) { ex = (*exceptions)->data; @@ -121,20 +120,20 @@ rspamd_tokenizer_get_word_raw (rspamd_stat_token_t * buf, do { if (ex != NULL && ex->pos == pos) { /* Go to the next exception */ - *exceptions = g_list_next (*exceptions); + *exceptions = g_list_next(*exceptions); *cur = p + ex->len; return TRUE; } pos++; p++; remain--; - } while (remain > 0 && t_delimiters[(guchar)*p]); + } while (remain > 0 && t_delimiters[(guchar) *p]); token->original.begin = p; - while (remain > 0 && !t_delimiters[(guchar)*p]) { + while (remain > 0 && !t_delimiters[(guchar) *p]) { if (ex != NULL && ex->pos == pos) { - *exceptions = g_list_next (*exceptions); + *exceptions = g_list_next(*exceptions); *cur = p + ex->len; return TRUE; } @@ -160,40 +159,40 @@ rspamd_tokenizer_get_word_raw (rspamd_stat_token_t * buf, } static inline gboolean -rspamd_tokenize_check_limit (gboolean decay, - guint word_decay, - guint nwords, - guint64 *hv, - guint64 *prob, - const rspamd_stat_token_t *token, - gssize remain, - gssize total) +rspamd_tokenize_check_limit(gboolean decay, + guint word_decay, + guint nwords, + guint64 *hv, + guint64 *prob, + const rspamd_stat_token_t *token, + gssize remain, + gssize total) { static const gdouble avg_word_len = 6.0; if (!decay) { - if (token->original.len >= sizeof (guint64)) { + if (token->original.len >= sizeof(guint64)) { guint64 tmp; - memcpy (&tmp, token->original.begin, sizeof (tmp)); - *hv = mum_hash_step (*hv, tmp); + memcpy(&tmp, token->original.begin, sizeof(tmp)); + *hv = mum_hash_step(*hv, tmp); } /* Check for decay */ - if (word_decay > 0 && nwords > word_decay && remain < (gssize)total) { + if (word_decay > 0 && nwords > word_decay && remain < (gssize) total) { /* Start decay */ gdouble decay_prob; - *hv = mum_hash_finish (*hv); + *hv = mum_hash_finish(*hv); /* We assume that word is 6 symbols length in average */ - decay_prob = (gdouble)word_decay / ((total - (remain)) / avg_word_len) * 10; - decay_prob = floor (decay_prob) / 10.0; + decay_prob = (gdouble) word_decay / ((total - (remain)) / avg_word_len) * 10; + decay_prob = floor(decay_prob) / 10.0; if (decay_prob >= 1.0) { *prob = G_MAXUINT64; } else { - *prob = (guint64)(decay_prob * (double)G_MAXUINT64); + *prob = (guint64) (decay_prob * (double) G_MAXUINT64); } return TRUE; @@ -213,8 +212,8 @@ rspamd_tokenize_check_limit (gboolean decay, } static inline gboolean -rspamd_utf_word_valid (const guchar *text, const guchar *end, - gint32 start, gint32 finish) +rspamd_utf_word_valid(const guchar *text, const guchar *end, + gint32 start, gint32 finish) { const guchar *st = text + start, *fin = text + finish; UChar32 c; @@ -223,37 +222,38 @@ rspamd_utf_word_valid (const guchar *text, const guchar *end, return FALSE; } - U8_NEXT (text, start, finish, c); + U8_NEXT(text, start, finish, c); - if (u_isJavaIDPart (c)) { + if (u_isJavaIDPart(c)) { return TRUE; } return FALSE; } -#define SHIFT_EX do { \ - cur = g_list_next (cur); \ - if (cur) { \ - ex = (struct rspamd_process_exception *) cur->data; \ - } \ - else { \ - ex = NULL; \ - } \ -} while(0) +#define SHIFT_EX \ + do { \ + cur = g_list_next(cur); \ + if (cur) { \ + ex = (struct rspamd_process_exception *) cur->data; \ + } \ + else { \ + ex = NULL; \ + } \ + } while (0) static inline void -rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res) +rspamd_tokenize_exception(struct rspamd_process_exception *ex, GArray *res) { rspamd_stat_token_t token; - memset (&token, 0, sizeof (token)); + memset(&token, 0, sizeof(token)); if (ex->type == RSPAMD_EXCEPTION_GENERIC) { token.original.begin = "!!EX!!"; - token.original.len = sizeof ("!!EX!!") - 1; + token.original.len = sizeof("!!EX!!") - 1; token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; - g_array_append_val (res, token); + g_array_append_val(res, token); token.flags = 0; } else if (ex->type == RSPAMD_EXCEPTION_URL) { @@ -262,31 +262,30 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res) uri = ex->ptr; if (uri && uri->tldlen > 0) { - token.original.begin = rspamd_url_tld_unsafe (uri); + token.original.begin = rspamd_url_tld_unsafe(uri); token.original.len = uri->tldlen; - } else { token.original.begin = "!!EX!!"; - token.original.len = sizeof ("!!EX!!") - 1; + token.original.len = sizeof("!!EX!!") - 1; } token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION; - g_array_append_val (res, token); + g_array_append_val(res, token); token.flags = 0; } } GArray * -rspamd_tokenize_text (const gchar *text, gsize len, - const UText *utxt, - enum rspamd_tokenize_type how, - struct rspamd_config *cfg, - GList *exceptions, - guint64 *hash, - GArray *cur_words, - rspamd_mempool_t *pool) +rspamd_tokenize_text(const gchar *text, gsize len, + const UText *utxt, + enum rspamd_tokenize_type how, + struct rspamd_config *cfg, + GList *exceptions, + guint64 *hash, + GArray *cur_words, + rspamd_mempool_t *pool) { rspamd_stat_token_t token, buf; const gchar *pos = NULL; @@ -297,7 +296,7 @@ rspamd_tokenize_text (const gchar *text, gsize len, guint64 hv = 0; gboolean decay = FALSE, long_text_mode = FALSE; guint64 prob = 0; - static UBreakIterator* bi = NULL; + static UBreakIterator *bi = NULL; static const gsize long_text_limit = 1 * 1024 * 1024; static const ev_tstamp max_exec_time = 0.2; /* 200 ms */ ev_tstamp start; @@ -311,14 +310,14 @@ rspamd_tokenize_text (const gchar *text, gsize len, * In this mode we do additional checks to avoid performance issues */ long_text_mode = TRUE; - start = ev_time (); + start = ev_time(); } buf.original.begin = text; buf.original.len = len; buf.flags = 0; - memset (&token, 0, sizeof (token)); + memset(&token, 0, sizeof(token)); if (cfg != NULL) { min_len = cfg->min_word_len; @@ -328,15 +327,15 @@ rspamd_tokenize_text (const gchar *text, gsize len, } if (!cur_words) { - res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_stat_token_t), - initial_size); + res = g_array_sized_new(FALSE, FALSE, sizeof(rspamd_stat_token_t), + initial_size); } else { res = cur_words; } - if (G_UNLIKELY (how == RSPAMD_TOKENIZE_RAW || utxt == NULL)) { - while (rspamd_tokenizer_get_word_raw (&buf, &pos, &token, &cur, &l, FALSE)) { + if (G_UNLIKELY(how == RSPAMD_TOKENIZE_RAW || utxt == NULL)) { + while (rspamd_tokenizer_get_word_raw(&buf, &pos, &token, &cur, &l, FALSE)) { if (l == 0 || (min_len > 0 && l < min_len) || (max_len > 0 && l > max_len)) { token.original.begin = pos; @@ -344,8 +343,8 @@ rspamd_tokenize_text (const gchar *text, gsize len, } if (token.original.len > 0 && - rspamd_tokenize_check_limit (decay, word_decay, res->len, - &hv, &prob, &token, pos - text, len)) { + rspamd_tokenize_check_limit(decay, word_decay, res->len, + &hv, &prob, &token, pos - text, len)) { if (!decay) { decay = TRUE; } @@ -357,27 +356,27 @@ rspamd_tokenize_text (const gchar *text, gsize len, if (long_text_mode) { if ((res->len + 1) % 16 == 0) { - ev_tstamp now = ev_time (); + ev_tstamp now = ev_time(); if (now - start > max_exec_time) { - msg_warn_pool_check ( - "too long time has been spent on tokenization:" - " %.1f ms, limit is %.1f ms; %d words added so far", - (now - start) * 1e3, max_exec_time * 1e3, - res->len); + msg_warn_pool_check( + "too long time has been spent on tokenization:" + " %.1f ms, limit is %.1f ms; %d words added so far", + (now - start) * 1e3, max_exec_time * 1e3, + res->len); goto end; } } } - g_array_append_val (res, token); + g_array_append_val(res, token); - if (((gsize)res->len) * sizeof (token) > (0x1ull << 30u)) { + if (((gsize) res->len) * sizeof(token) > (0x1ull << 30u)) { /* Due to bug in glib ! */ - msg_err_pool_check ( - "too many words found: %d, stop tokenization to avoid DoS", - res->len); + msg_err_pool_check( + "too many words found: %d, stop tokenization to avoid DoS", + res->len); goto end; } @@ -392,21 +391,21 @@ rspamd_tokenize_text (const gchar *text, gsize len, struct rspamd_process_exception *ex = NULL; if (bi == NULL) { - bi = ubrk_open (UBRK_WORD, NULL, NULL, 0, &uc_err); + bi = ubrk_open(UBRK_WORD, NULL, NULL, 0, &uc_err); - g_assert (U_SUCCESS (uc_err)); + g_assert(U_SUCCESS(uc_err)); } - ubrk_setUText (bi, (UText*)utxt, &uc_err); - last = ubrk_first (bi); + ubrk_setUText(bi, (UText *) utxt, &uc_err); + last = ubrk_first(bi); p = last; if (cur) { - ex = (struct rspamd_process_exception *)cur->data; + ex = (struct rspamd_process_exception *) cur->data; } while (p != UBRK_DONE) { -start_over: + start_over: token.original.len = 0; if (p > last) { @@ -418,19 +417,19 @@ start_over: while (cur && ex->pos <= last) { /* We have an exception at the beginning, skip those */ last += ex->len; - rspamd_tokenize_exception (ex, res); + rspamd_tokenize_exception(ex, res); if (last > p) { /* Exception spread over the boundaries */ while (last > p && p != UBRK_DONE) { gint32 old_p = p; - p = ubrk_next (bi); + p = ubrk_next(bi); if (p != UBRK_DONE && p <= old_p) { - msg_warn_pool_check ( - "tokenization reversed back on position %d," - "%d new position (%d backward), likely libicu bug!", - (gint)(p), (gint)(old_p), old_p - p); + msg_warn_pool_check( + "tokenization reversed back on position %d," + "%d new position (%d backward), likely libicu bug!", + (gint) (p), (gint) (old_p), old_p - p); goto end; } @@ -447,8 +446,8 @@ start_over: /* Now, we can have an exception within boundary again */ if (cur && ex->pos >= last && ex->pos <= p) { /* Append the first part */ - if (rspamd_utf_word_valid (text, text + len, last, - ex->pos)) { + if (rspamd_utf_word_valid(text, text + len, last, + ex->pos)) { token.original.begin = text + last; token.original.len = ex->pos - last; token.flags = RSPAMD_STAT_TOKEN_FLAG_TEXT | @@ -458,18 +457,18 @@ start_over: /* Process the current exception */ last += ex->len + (ex->pos - last); - rspamd_tokenize_exception (ex, res); + rspamd_tokenize_exception(ex, res); if (last > p) { /* Exception spread over the boundaries */ while (last > p && p != UBRK_DONE) { gint32 old_p = p; - p = ubrk_next (bi); + p = ubrk_next(bi); if (p != UBRK_DONE && p <= old_p) { - msg_warn_pool_check ( - "tokenization reversed back on position %d," - "%d new position (%d backward), likely libicu bug!", - (gint)(p), (gint)(old_p), old_p - p); + msg_warn_pool_check( + "tokenization reversed back on position %d," + "%d new position (%d backward), likely libicu bug!", + (gint) (p), (gint) (old_p), old_p - p); goto end; } @@ -482,7 +481,7 @@ start_over: SHIFT_EX; } else if (p > last) { - if (rspamd_utf_word_valid (text, text + len, last, p)) { + if (rspamd_utf_word_valid(text, text + len, last, p)) { token.original.begin = text + last; token.original.len = p - last; token.flags = RSPAMD_STAT_TOKEN_FLAG_TEXT | @@ -497,7 +496,7 @@ start_over: SHIFT_EX; } - if (rspamd_utf_word_valid (text, text + len, last, p)) { + if (rspamd_utf_word_valid(text, text + len, last, p)) { token.original.begin = text + last; token.original.len = p - last; token.flags = RSPAMD_STAT_TOKEN_FLAG_TEXT | @@ -506,7 +505,7 @@ start_over: } else { /* No exceptions within boundary */ - if (rspamd_utf_word_valid (text, text + len, last, p)) { + if (rspamd_utf_word_valid(text, text + len, last, p)) { token.original.begin = text + last; token.original.len = p - last; token.flags = RSPAMD_STAT_TOKEN_FLAG_TEXT | @@ -515,7 +514,7 @@ start_over: } } else { - if (rspamd_utf_word_valid (text, text + len, last, p)) { + if (rspamd_utf_word_valid(text, text + len, last, p)) { token.original.begin = text + last; token.original.len = p - last; token.flags = RSPAMD_STAT_TOKEN_FLAG_TEXT | @@ -524,11 +523,12 @@ start_over: } if (token.original.len > 0 && - rspamd_tokenize_check_limit (decay, word_decay, res->len, - &hv, &prob, &token, p, len)) { + rspamd_tokenize_check_limit(decay, word_decay, res->len, + &hv, &prob, &token, p, len)) { if (!decay) { decay = TRUE; - } else { + } + else { token.flags |= RSPAMD_STAT_TOKEN_FLAG_SKIPPED; } } @@ -536,15 +536,15 @@ start_over: if (token.original.len > 0) { /* Additional check for number of words */ - if (((gsize)res->len) * sizeof (token) > (0x1ull << 30u)) { + if (((gsize) res->len) * sizeof(token) > (0x1ull << 30u)) { /* Due to bug in glib ! */ - msg_err ("too many words found: %d, stop tokenization to avoid DoS", + msg_err("too many words found: %d, stop tokenization to avoid DoS", res->len); goto end; } - g_array_append_val (res, token); + g_array_append_val(res, token); } /* Also check for long text mode */ @@ -553,14 +553,14 @@ start_over: const int words_check_mask = 0x7F; if ((res->len & words_check_mask) == words_check_mask) { - ev_tstamp now = ev_time (); + ev_tstamp now = ev_time(); if (now - start > max_exec_time) { - msg_warn_pool_check ( - "too long time has been spent on tokenization:" - " %.1f ms, limit is %.1f ms; %d words added so far", - (now - start) * 1e3, max_exec_time * 1e3, - res->len); + msg_warn_pool_check( + "too long time has been spent on tokenization:" + " %.1f ms, limit is %.1f ms; %d words added so far", + (now - start) * 1e3, max_exec_time * 1e3, + res->len); goto end; } @@ -568,12 +568,12 @@ start_over: } last = p; - p = ubrk_next (bi); + p = ubrk_next(bi); if (p != UBRK_DONE && p <= last) { - msg_warn_pool_check ("tokenization reversed back on position %d," - "%d new position (%d backward), likely libicu bug!", - (gint)(p), (gint)(last), last - p); + msg_warn_pool_check("tokenization reversed back on position %d," + "%d new position (%d backward), likely libicu bug!", + (gint) (p), (gint) (last), last - p); goto end; } @@ -582,7 +582,7 @@ start_over: end: if (!decay) { - hv = mum_hash_finish (hv); + hv = mum_hash_finish(hv); } if (hash) { @@ -595,8 +595,8 @@ end: #undef SHIFT_EX static void -rspamd_add_metawords_from_str (const gchar *beg, gsize len, - struct rspamd_task *task) +rspamd_add_metawords_from_str(const gchar *beg, gsize len, + struct rspamd_task *task) { UText utxt = UTEXT_INITIALIZER; UErrorCode uc_err = U_ZERO_ERROR; @@ -605,7 +605,7 @@ rspamd_add_metawords_from_str (const gchar *beg, gsize len, gboolean valid_utf = TRUE; while (i < len) { - U8_NEXT (beg, i, len, uc); + U8_NEXT(beg, i, len, uc); if (((gint32) uc) < 0) { valid_utf = FALSE; @@ -613,12 +613,12 @@ rspamd_add_metawords_from_str (const gchar *beg, gsize len, } #if U_ICU_VERSION_MAJOR_NUM < 50 - if (u_isalpha (uc)) { - gint32 sc = ublock_getCode (uc); + if (u_isalpha(uc)) { + gint32 sc = ublock_getCode(uc); if (sc == UBLOCK_THAI) { valid_utf = FALSE; - msg_info_task ("enable workaround for Thai characters for old libicu"); + msg_info_task("enable workaround for Thai characters for old libicu"); break; } } @@ -626,101 +626,100 @@ rspamd_add_metawords_from_str (const gchar *beg, gsize len, } if (valid_utf) { - utext_openUTF8 (&utxt, - beg, - len, - &uc_err); + utext_openUTF8(&utxt, + beg, + len, + &uc_err); - task->meta_words = rspamd_tokenize_text (beg, len, - &utxt, RSPAMD_TOKENIZE_UTF, - task->cfg, NULL, NULL, - task->meta_words, - task->task_pool); + task->meta_words = rspamd_tokenize_text(beg, len, + &utxt, RSPAMD_TOKENIZE_UTF, + task->cfg, NULL, NULL, + task->meta_words, + task->task_pool); - utext_close (&utxt); + utext_close(&utxt); } else { - task->meta_words = rspamd_tokenize_text (beg, len, - NULL, RSPAMD_TOKENIZE_RAW, - task->cfg, NULL, NULL, task->meta_words, - task->task_pool); + task->meta_words = rspamd_tokenize_text(beg, len, + NULL, RSPAMD_TOKENIZE_RAW, + task->cfg, NULL, NULL, task->meta_words, + task->task_pool); } } -void -rspamd_tokenize_meta_words (struct rspamd_task *task) +void rspamd_tokenize_meta_words(struct rspamd_task *task) { guint i = 0; rspamd_stat_token_t *tok; - if (MESSAGE_FIELD (task, subject)) { - rspamd_add_metawords_from_str (MESSAGE_FIELD (task, subject), - strlen (MESSAGE_FIELD (task, subject)), task); + if (MESSAGE_FIELD(task, subject)) { + rspamd_add_metawords_from_str(MESSAGE_FIELD(task, subject), + strlen(MESSAGE_FIELD(task, subject)), task); } - if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) { + if (MESSAGE_FIELD(task, from_mime) && MESSAGE_FIELD(task, from_mime)->len > 0) { struct rspamd_email_address *addr; - addr = g_ptr_array_index (MESSAGE_FIELD (task, from_mime), 0); + addr = g_ptr_array_index(MESSAGE_FIELD(task, from_mime), 0); if (addr->name) { - rspamd_add_metawords_from_str (addr->name, strlen (addr->name), task); + rspamd_add_metawords_from_str(addr->name, strlen(addr->name), task); } } if (task->meta_words != NULL) { const gchar *language = NULL; - if (MESSAGE_FIELD (task, text_parts) && - MESSAGE_FIELD (task, text_parts)->len > 0) { - struct rspamd_mime_text_part *tp = g_ptr_array_index ( - MESSAGE_FIELD (task, text_parts), 0); + if (MESSAGE_FIELD(task, text_parts) && + MESSAGE_FIELD(task, text_parts)->len > 0) { + struct rspamd_mime_text_part *tp = g_ptr_array_index( + MESSAGE_FIELD(task, text_parts), 0); if (tp->language) { language = tp->language; } } - rspamd_normalize_words (task->meta_words, task->task_pool); - rspamd_stem_words (task->meta_words, task->task_pool, language, - task->lang_det); + rspamd_normalize_words(task->meta_words, task->task_pool); + rspamd_stem_words(task->meta_words, task->task_pool, language, + task->lang_det); for (i = 0; i < task->meta_words->len; i++) { - tok = &g_array_index (task->meta_words, rspamd_stat_token_t, i); + tok = &g_array_index(task->meta_words, rspamd_stat_token_t, i); tok->flags |= RSPAMD_STAT_TOKEN_FLAG_HEADER; } } } static inline void -rspamd_uchars_to_ucs32 (const UChar *src, gsize srclen, - rspamd_stat_token_t *tok, - rspamd_mempool_t *pool) +rspamd_uchars_to_ucs32(const UChar *src, gsize srclen, + rspamd_stat_token_t *tok, + rspamd_mempool_t *pool) { UChar32 *dest, t, *d; gint32 i = 0; - dest = rspamd_mempool_alloc (pool, srclen * sizeof (UChar32)); + dest = rspamd_mempool_alloc(pool, srclen * sizeof(UChar32)); d = dest; while (i < srclen) { - U16_NEXT_UNSAFE (src, i, t); + U16_NEXT_UNSAFE(src, i, t); - if (u_isgraph (t)) { + if (u_isgraph(t)) { UCharCategory cat; - cat = u_charType (t); + cat = u_charType(t); #if U_ICU_VERSION_MAJOR_NUM >= 57 - if (u_hasBinaryProperty (t, UCHAR_EMOJI)) { + if (u_hasBinaryProperty(t, UCHAR_EMOJI)) { tok->flags |= RSPAMD_STAT_TOKEN_FLAG_EMOJI; } #endif if ((cat >= U_UPPERCASE_LETTER && cat <= U_OTHER_NUMBER) || - cat == U_CONNECTOR_PUNCTUATION || - cat == U_MATH_SYMBOL || - cat == U_CURRENCY_SYMBOL) { - *d++ = u_tolower (t); + cat == U_CONNECTOR_PUNCTUATION || + cat == U_MATH_SYMBOL || + cat == U_CURRENCY_SYMBOL) { + *d++ = u_tolower(t); } } else { @@ -734,52 +733,51 @@ rspamd_uchars_to_ucs32 (const UChar *src, gsize srclen, } static inline void -rspamd_ucs32_to_normalised (rspamd_stat_token_t *tok, - rspamd_mempool_t *pool) +rspamd_ucs32_to_normalised(rspamd_stat_token_t *tok, + rspamd_mempool_t *pool) { guint i, doff = 0; gsize utflen = 0; gchar *dest; UChar32 t; - for (i = 0; i < tok->unicode.len; i ++) { - utflen += U8_LENGTH (tok->unicode.begin[i]); + for (i = 0; i < tok->unicode.len; i++) { + utflen += U8_LENGTH(tok->unicode.begin[i]); } - dest = rspamd_mempool_alloc (pool, utflen + 1); + dest = rspamd_mempool_alloc(pool, utflen + 1); - for (i = 0; i < tok->unicode.len; i ++) { + for (i = 0; i < tok->unicode.len; i++) { t = tok->unicode.begin[i]; - U8_APPEND_UNSAFE (dest, doff, t); + U8_APPEND_UNSAFE(dest, doff, t); } - g_assert (doff <= utflen); + g_assert(doff <= utflen); dest[doff] = '\0'; tok->normalized.len = doff; tok->normalized.begin = dest; } -void -rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool) +void rspamd_normalize_single_word(rspamd_stat_token_t *tok, rspamd_mempool_t *pool) { UErrorCode uc_err = U_ZERO_ERROR; UConverter *utf8_converter; UChar tmpbuf[1024]; /* Assume that we have no longer words... */ gsize ulen; - utf8_converter = rspamd_get_utf8_converter (); + utf8_converter = rspamd_get_utf8_converter(); if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) { - ulen = ucnv_toUChars (utf8_converter, - tmpbuf, - G_N_ELEMENTS (tmpbuf), - tok->original.begin, - tok->original.len, - &uc_err); + ulen = ucnv_toUChars(utf8_converter, + tmpbuf, + G_N_ELEMENTS(tmpbuf), + tok->original.begin, + tok->original.len, + &uc_err); /* Now, we need to understand if we need to normalise the word */ - if (!U_SUCCESS (uc_err)) { + if (!U_SUCCESS(uc_err)) { tok->flags |= RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE; tok->unicode.begin = NULL; tok->unicode.len = 0; @@ -788,14 +786,14 @@ rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool) } else { #if U_ICU_VERSION_MAJOR_NUM >= 44 - const UNormalizer2 *norm = rspamd_get_unicode_normalizer (); + const UNormalizer2 *norm = rspamd_get_unicode_normalizer(); gint32 end; /* We can now check if we need to decompose */ - end = unorm2_spanQuickCheckYes (norm, tmpbuf, ulen, &uc_err); + end = unorm2_spanQuickCheckYes(norm, tmpbuf, ulen, &uc_err); - if (!U_SUCCESS (uc_err)) { - rspamd_uchars_to_ucs32 (tmpbuf, ulen, tok, pool); + if (!U_SUCCESS(uc_err)) { + rspamd_uchars_to_ucs32(tmpbuf, ulen, tok, pool); tok->normalized.begin = NULL; tok->normalized.len = 0; tok->flags |= RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE; @@ -803,46 +801,46 @@ rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool) else { if (end == ulen) { /* Already normalised, just lowercase */ - rspamd_uchars_to_ucs32 (tmpbuf, ulen, tok, pool); - rspamd_ucs32_to_normalised (tok, pool); + rspamd_uchars_to_ucs32(tmpbuf, ulen, tok, pool); + rspamd_ucs32_to_normalised(tok, pool); } else { /* Perform normalization */ UChar normbuf[1024]; - g_assert (end < G_N_ELEMENTS (normbuf)); + g_assert(end < G_N_ELEMENTS(normbuf)); /* First part */ - memcpy (normbuf, tmpbuf, end * sizeof (UChar)); + memcpy(normbuf, tmpbuf, end * sizeof(UChar)); /* Second part */ - ulen = unorm2_normalizeSecondAndAppend (norm, - normbuf, end, - G_N_ELEMENTS (normbuf), - tmpbuf + end, - ulen - end, - &uc_err); - - if (!U_SUCCESS (uc_err)) { + ulen = unorm2_normalizeSecondAndAppend(norm, + normbuf, end, + G_N_ELEMENTS(normbuf), + tmpbuf + end, + ulen - end, + &uc_err); + + if (!U_SUCCESS(uc_err)) { if (uc_err != U_BUFFER_OVERFLOW_ERROR) { - msg_warn_pool_check ("cannot normalise text '%*s': %s", - (gint)tok->original.len, tok->original.begin, - u_errorName (uc_err)); - rspamd_uchars_to_ucs32 (tmpbuf, ulen, tok, pool); - rspamd_ucs32_to_normalised (tok, pool); + msg_warn_pool_check("cannot normalise text '%*s': %s", + (gint) tok->original.len, tok->original.begin, + u_errorName(uc_err)); + rspamd_uchars_to_ucs32(tmpbuf, ulen, tok, pool); + rspamd_ucs32_to_normalised(tok, pool); tok->flags |= RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE; } } else { /* Copy normalised back */ - rspamd_uchars_to_ucs32 (normbuf, ulen, tok, pool); + rspamd_uchars_to_ucs32(normbuf, ulen, tok, pool); tok->flags |= RSPAMD_STAT_TOKEN_FLAG_NORMALISED; - rspamd_ucs32_to_normalised (tok, pool); + rspamd_ucs32_to_normalised(tok, pool); } } } #else /* Legacy version with no unorm2 interface */ - rspamd_uchars_to_ucs32 (tmpbuf, ulen, tok, pool); - rspamd_ucs32_to_normalised (tok, pool); + rspamd_uchars_to_ucs32(tmpbuf, ulen, tok, pool); + rspamd_ucs32_to_normalised(tok, pool); #endif } } @@ -851,31 +849,29 @@ rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool) /* Simple lowercase */ gchar *dest; - dest = rspamd_mempool_alloc (pool, tok->original.len + 1); - rspamd_strlcpy (dest, tok->original.begin, tok->original.len + 1); - rspamd_str_lc (dest, tok->original.len); + dest = rspamd_mempool_alloc(pool, tok->original.len + 1); + rspamd_strlcpy(dest, tok->original.begin, tok->original.len + 1); + rspamd_str_lc(dest, tok->original.len); tok->normalized.len = tok->original.len; tok->normalized.begin = dest; } } } -void -rspamd_normalize_words (GArray *words, rspamd_mempool_t *pool) +void rspamd_normalize_words(GArray *words, rspamd_mempool_t *pool) { rspamd_stat_token_t *tok; guint i; for (i = 0; i < words->len; i++) { - tok = &g_array_index (words, rspamd_stat_token_t, i); - rspamd_normalize_single_word (tok, pool); + tok = &g_array_index(words, rspamd_stat_token_t, i); + rspamd_normalize_single_word(tok, pool); } } -void -rspamd_stem_words (GArray *words, rspamd_mempool_t *pool, - const gchar *language, - struct rspamd_lang_detector *d) +void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool, + const gchar *language, + struct rspamd_lang_detector *d) { static GHashTable *stemmers = NULL; struct sb_stemmer *stem = NULL; @@ -885,49 +881,49 @@ rspamd_stem_words (GArray *words, rspamd_mempool_t *pool, gsize dlen; if (!stemmers) { - stemmers = g_hash_table_new (rspamd_strcase_hash, - rspamd_strcase_equal); + stemmers = g_hash_table_new(rspamd_strcase_hash, + rspamd_strcase_equal); } if (language && language[0] != '\0') { - stem = g_hash_table_lookup (stemmers, language); + stem = g_hash_table_lookup(stemmers, language); if (stem == NULL) { - stem = sb_stemmer_new (language, "UTF_8"); + stem = sb_stemmer_new(language, "UTF_8"); if (stem == NULL) { - msg_debug_pool ( - "<%s> cannot create lemmatizer for %s language", - language); - g_hash_table_insert (stemmers, g_strdup (language), - GINT_TO_POINTER (-1)); + msg_debug_pool( + "<%s> cannot create lemmatizer for %s language", + language); + g_hash_table_insert(stemmers, g_strdup(language), + GINT_TO_POINTER(-1)); } else { - g_hash_table_insert (stemmers, g_strdup (language), - stem); + g_hash_table_insert(stemmers, g_strdup(language), + stem); } } - else if (stem == GINT_TO_POINTER (-1)) { + else if (stem == GINT_TO_POINTER(-1)) { /* Negative cache */ stem = NULL; } } for (i = 0; i < words->len; i++) { - tok = &g_array_index (words, rspamd_stat_token_t, i); + tok = &g_array_index(words, rspamd_stat_token_t, i); if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) { if (stem) { const gchar *stemmed = NULL; - stemmed = sb_stemmer_stem (stem, - tok->normalized.begin, tok->normalized.len); + stemmed = sb_stemmer_stem(stem, + tok->normalized.begin, tok->normalized.len); - dlen = stemmed ? strlen (stemmed) : 0; + dlen = stemmed ? strlen(stemmed) : 0; if (dlen > 0) { - dest = rspamd_mempool_alloc (pool, dlen + 1); - memcpy (dest, stemmed, dlen); + dest = rspamd_mempool_alloc(pool, dlen + 1); + memcpy(dest, stemmed, dlen); dest[dlen] = '\0'; tok->stemmed.len = dlen; tok->stemmed.begin = dest; @@ -945,7 +941,7 @@ rspamd_stem_words (GArray *words, rspamd_mempool_t *pool, } if (tok->stemmed.len > 0 && d != NULL && - rspamd_language_detector_is_stop_word (d, tok->stemmed.begin, tok->stemmed.len)) { + rspamd_language_detector_is_stop_word(d, tok->stemmed.begin, tok->stemmed.len)) { tok->flags |= RSPAMD_STAT_TOKEN_FLAG_STOP_WORD; } } diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h index ca7261802..e908c359d 100644 --- a/src/libstat/tokenizers/tokenizers.h +++ b/src/libstat/tokenizers/tokenizers.h @@ -11,7 +11,7 @@ #define RSPAMD_DEFAULT_TOKENIZER "osb" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -22,15 +22,15 @@ struct rspamd_stat_ctx; struct rspamd_stat_tokenizer { gchar *name; - gpointer (*get_config) (rspamd_mempool_t *pool, - struct rspamd_tokenizer_config *cf, gsize *len); + gpointer (*get_config)(rspamd_mempool_t *pool, + struct rspamd_tokenizer_config *cf, gsize *len); - gint (*tokenize_func) (struct rspamd_stat_ctx *ctx, - struct rspamd_task *task, - GArray *words, - gboolean is_utf, - const gchar *prefix, - GPtrArray *result); + gint (*tokenize_func)(struct rspamd_stat_ctx *ctx, + struct rspamd_task *task, + GArray *words, + gboolean is_utf, + const gchar *prefix, + GPtrArray *result); }; enum rspamd_tokenize_type { @@ -40,44 +40,44 @@ enum rspamd_tokenize_type { }; /* Compare two token nodes */ -gint token_node_compare_func (gconstpointer a, gconstpointer b); +gint token_node_compare_func(gconstpointer a, gconstpointer b); /* Tokenize text into array of words (rspamd_stat_token_t type) */ -GArray *rspamd_tokenize_text (const gchar *text, gsize len, - const UText *utxt, - enum rspamd_tokenize_type how, - struct rspamd_config *cfg, - GList *exceptions, - guint64 *hash, - GArray *cur_words, - rspamd_mempool_t *pool); +GArray *rspamd_tokenize_text(const gchar *text, gsize len, + const UText *utxt, + enum rspamd_tokenize_type how, + struct rspamd_config *cfg, + GList *exceptions, + guint64 *hash, + GArray *cur_words, + rspamd_mempool_t *pool); /* OSB tokenize function */ -gint rspamd_tokenizer_osb (struct rspamd_stat_ctx *ctx, - struct rspamd_task *task, - GArray *words, - gboolean is_utf, - const gchar *prefix, - GPtrArray *result); +gint rspamd_tokenizer_osb(struct rspamd_stat_ctx *ctx, + struct rspamd_task *task, + GArray *words, + gboolean is_utf, + const gchar *prefix, + GPtrArray *result); -gpointer rspamd_tokenizer_osb_get_config (rspamd_mempool_t *pool, - struct rspamd_tokenizer_config *cf, - gsize *len); +gpointer rspamd_tokenizer_osb_get_config(rspamd_mempool_t *pool, + struct rspamd_tokenizer_config *cf, + gsize *len); struct rspamd_lang_detector; -void rspamd_normalize_single_word (rspamd_stat_token_t *tok, rspamd_mempool_t *pool); +void rspamd_normalize_single_word(rspamd_stat_token_t *tok, rspamd_mempool_t *pool); -void rspamd_normalize_words (GArray *words, rspamd_mempool_t *pool); +void rspamd_normalize_words(GArray *words, rspamd_mempool_t *pool); -void rspamd_stem_words (GArray *words, rspamd_mempool_t *pool, - const gchar *language, - struct rspamd_lang_detector *d); +void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool, + const gchar *language, + struct rspamd_lang_detector *d); -void rspamd_tokenize_meta_words (struct rspamd_task *task); +void rspamd_tokenize_meta_words(struct rspamd_task *task); -#ifdef __cplusplus +#ifdef __cplusplus } #endif |