From 1f58c5fa9805a404c9a942f110589fce464825df Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 1 Jan 2018 18:58:01 +0000 Subject: [PATCH] [Project] Initialize language detector --- src/controller.c | 22 +++++++++++++++------- src/libserver/cfg_file.h | 4 ++++ src/libserver/cfg_rcl.c | 3 +++ src/libserver/task.c | 5 ++++- src/libserver/task.h | 5 ++++- src/lua/lua_util.c | 2 +- src/plugins/fuzzy_check.c | 2 +- src/rspamadm/lua_repl.c | 2 +- src/rspamd_proxy.c | 2 +- src/worker.c | 6 +++--- 10 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/controller.c b/src/controller.c index 8e22850e6..1367c819b 100644 --- a/src/controller.c +++ b/src/controller.c @@ -28,6 +28,7 @@ #include "fuzzy_wire.h" #include "unix-std.h" #include "utlist.h" +#include "libmime/lang_detection.h" #include /* 60 seconds for worker's IO */ @@ -179,6 +180,7 @@ struct rspamd_controller_worker_ctx { struct event *rrd_event; struct rspamd_rrd_file *rrd; struct event save_stats_event; + struct rspamd_lang_detector *lang_det; }; struct rspamd_controller_plugin_cbdata { @@ -1482,7 +1484,7 @@ rspamd_controller_handle_lua_history (lua_State *L, if (lua_isfunction (L, -1)) { task = rspamd_task_new (session->ctx->worker, session->cfg, - session->pool); + session->pool, ctx->lang_det); task->resolver = ctx->resolver; task->ev_base = ctx->ev_base; @@ -1780,7 +1782,8 @@ rspamd_controller_handle_lua (struct rspamd_http_connection_entry *conn_ent, return 0; } - task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool); + task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool, + ctx->lang_det); task->resolver = ctx->resolver; task->ev_base = ctx->ev_base; @@ -1963,7 +1966,8 @@ rspamd_controller_handle_learn_common ( return 0; } - task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool); + task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool, + session->ctx->lang_det); task->resolver = ctx->resolver; task->ev_base = ctx->ev_base; @@ -2063,7 +2067,8 @@ rspamd_controller_handle_scan (struct rspamd_http_connection_entry *conn_ent, return 0; } - task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool); + task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool, + ctx->lang_det); task->ev_base = session->ctx->ev_base; task->resolver = ctx->resolver; @@ -2541,9 +2546,10 @@ rspamd_controller_handle_stat_common ( rspamd_mempool_stat (&mem_st); memcpy (&stat_copy, session->ctx->worker->srv->stat, sizeof (stat_copy)); stat = &stat_copy; - task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool); - ctx = session->ctx; + + task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool, + ctx->lang_det); task->resolver = ctx->resolver; task->ev_base = ctx->ev_base; cbdata = rspamd_mempool_alloc0 (session->pool, sizeof (*cbdata)); @@ -2905,7 +2911,8 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e return 0; } - task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool); + task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool, + ctx->lang_det); task->resolver = ctx->resolver; task->ev_base = ctx->ev_base; @@ -3622,6 +3629,7 @@ start_controller_worker (struct rspamd_worker *worker) g_ptr_array_add (worker->finish_actions, (gpointer)rspamd_controller_on_terminate); rspamd_controller_load_saved_stats (ctx); + ctx->lang_det = ctx->cfg->lang_det; /* RRD collector */ if (ctx->cfg->rrd_file && worker->index == 0) { diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index f724a82c8..5f04a39a7 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -264,6 +264,8 @@ struct rspamd_config_post_load_script { struct rspamd_config_post_load_script *prev, *next; }; +struct rspamd_lang_detector; + /** * Structure that stores all config data */ @@ -428,6 +430,8 @@ struct rspamd_config { gchar *zstd_output_dictionary; /**< path to zstd output dictionary */ ucl_object_t *neighbours; /**< other servers in the cluster */ + struct rspamd_lang_detector *lang_det; /**< language detector */ + ref_entry_t ref; /**< reference counter */ }; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index f2256f55c..7fbe7abd4 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -28,6 +28,7 @@ #include "cryptobox.h" #include "libutil/multipattern.h" #include "libmime/email_addr.h" +#include "libmime/lang_detection.h" #ifdef HAVE_SYSLOG_H #include @@ -3688,6 +3689,8 @@ rspamd_config_read (struct rspamd_config *cfg, const gchar *filename, return FALSE; } + cfg->lang_det = rspamd_language_detector_init (cfg); + return TRUE; } diff --git a/src/libserver/task.c b/src/libserver/task.c index bf66ec030..2c014a7d1 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -26,6 +26,7 @@ #include "utlist.h" #include "contrib/zstd/zstd.h" #include "libserver/mempool_vars_internal.h" +#include "libmime/lang_detection.h" #include /* @@ -61,7 +62,8 @@ rspamd_request_header_dtor (gpointer p) */ struct rspamd_task * rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg, - rspamd_mempool_t *pool) + rspamd_mempool_t *pool, + struct rspamd_lang_detector *lang_det) { struct rspamd_task *new_task; @@ -82,6 +84,7 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg, gettimeofday (&new_task->tv, NULL); new_task->time_real = rspamd_get_ticks (FALSE); new_task->time_virtual = rspamd_get_virtual_ticks (); + new_task->lang_det = lang_det; if (pool == NULL) { new_task->task_pool = diff --git a/src/libserver/task.h b/src/libserver/task.h index 8ab9514ce..3055c5654 100644 --- a/src/libserver/task.h +++ b/src/libserver/task.h @@ -122,6 +122,7 @@ enum rspamd_task_stage { #define RSPAMD_TASK_IS_PROFILING(task) (((task)->flags & RSPAMD_TASK_FLAG_PROFILE)) struct rspamd_email_address; +struct rspamd_lang_detector; enum rspamd_newlines_type; /** @@ -205,6 +206,7 @@ struct rspamd_task { ucl_object_t *settings; /**< Settings applied to task */ const gchar *classifier; /**< Classifier to learn (if needed) */ + struct rspamd_lang_detector *lang_det; /**< Languages detector */ guchar digest[16]; }; @@ -213,7 +215,8 @@ struct rspamd_task { */ struct rspamd_task *rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg, - rspamd_mempool_t *pool); + rspamd_mempool_t *pool, + struct rspamd_lang_detector *lang_det); /** * Destroy task object and remove its IO dispatcher if it exists */ diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 480269b73..cb8d17849 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -700,7 +700,7 @@ lua_util_process_message (lua_State *L) if (cfg != NULL && message != NULL) { base = event_init (); rspamd_init_filters (cfg, FALSE); - task = rspamd_task_new (NULL, cfg, NULL); + task = rspamd_task_new (NULL, cfg, NULL, NULL); task->ev_base = base; task->msg.begin = rspamd_mempool_alloc (task->task_pool, mlen); rspamd_strlcpy ((gpointer)task->msg.begin, message, mlen); diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index a0706830d..a5e6487c1 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -2959,7 +2959,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent, gint r, *saved, rules = 0, err_idx; /* Prepare task */ - task = rspamd_task_new (session->wrk, session->cfg, NULL); + task = rspamd_task_new (session->wrk, session->cfg, NULL, NULL); task->cfg = ctx->cfg; task->ev_base = conn_ent->rt->ev_base; saved = rspamd_mempool_alloc0 (session->pool, sizeof (gint)); diff --git a/src/rspamadm/lua_repl.c b/src/rspamadm/lua_repl.c index 0edfeb05e..e0ee7d9f1 100644 --- a/src/rspamadm/lua_repl.c +++ b/src/rspamadm/lua_repl.c @@ -372,7 +372,7 @@ rspamadm_lua_message_handler (lua_State *L, gint argc, gchar **argv) rspamd_printf ("cannot open %s: %s\n", argv[i], strerror (errno)); } else { - task = rspamd_task_new (NULL, NULL, NULL); + task = rspamd_task_new (NULL, NULL, NULL, NULL); if (!rspamd_task_load_message (task, NULL, map, len)) { rspamd_printf ("cannot load %s\n", argv[i]); diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index cf6112b75..2a451ff3e 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -1650,7 +1650,7 @@ rspamd_proxy_self_scan (struct rspamd_proxy_session *session) msg = session->client_message; task = rspamd_task_new (session->worker, session->ctx->cfg, - session->pool); + session->pool, session->ctx->lang_det); task->flags |= RSPAMD_TASK_FLAG_MIME; task->sock = -1; diff --git a/src/worker.c b/src/worker.c index 9dc5fec0f..e0d2b4a0b 100644 --- a/src/worker.c +++ b/src/worker.c @@ -101,7 +101,7 @@ rspamd_worker_call_finish_handlers (struct rspamd_worker *worker) if (cfg->finish_callbacks) { ctx = worker->ctx; /* Create a fake task object for async events */ - task = rspamd_task_new (worker, cfg, NULL); + task = rspamd_task_new (worker, cfg, NULL, NULL); task->resolver = ctx->resolver; task->ev_base = ctx->ev_base; task->flags |= RSPAMD_TASK_FLAG_PROCESSING; @@ -368,7 +368,7 @@ accept_socket (gint fd, short what, void *arg) return; } - task = rspamd_task_new (worker, ctx->cfg, NULL); + task = rspamd_task_new (worker, ctx->cfg, NULL, ctx->lang_det); msg_info_task ("accepted connection from %s port %d, task ptr: %p", rspamd_inet_address_to_string (addr), @@ -660,7 +660,7 @@ rspamd_worker_init_scanner (struct rspamd_worker *worker, rspamd_worker_monitored_handler, worker->srv->cfg); - *plang_det = rspamd_language_detector_init (worker->srv->cfg); + *plang_det = worker->srv->cfg; } /* -- 2.39.5