]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Initialize language detector
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 1 Jan 2018 18:58:01 +0000 (18:58 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 1 Jan 2018 18:58:01 +0000 (18:58 +0000)
src/controller.c
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libserver/task.c
src/libserver/task.h
src/lua/lua_util.c
src/plugins/fuzzy_check.c
src/rspamadm/lua_repl.c
src/rspamd_proxy.c
src/worker.c

index 8e22850e614cb045bb21bba6c156e4f7a64dd023..1367c819b5fc035d5e2b2c250fa2e329f28abdd3 100644 (file)
@@ -28,6 +28,7 @@
 #include "fuzzy_wire.h"
 #include "unix-std.h"
 #include "utlist.h"
+#include "libmime/lang_detection.h"
 #include <math.h>
 
 /* 60 seconds for worker's IO */
@@ -179,6 +180,7 @@ struct rspamd_controller_worker_ctx {
        struct event *rrd_event;
        struct rspamd_rrd_file *rrd;
        struct event save_stats_event;
+       struct rspamd_lang_detector *lang_det;
 };
 
 struct rspamd_controller_plugin_cbdata {
@@ -1482,7 +1484,7 @@ rspamd_controller_handle_lua_history (lua_State *L,
 
                        if (lua_isfunction (L, -1)) {
                                task = rspamd_task_new (session->ctx->worker, session->cfg,
-                                               session->pool);
+                                               session->pool, ctx->lang_det);
 
                                task->resolver = ctx->resolver;
                                task->ev_base = ctx->ev_base;
@@ -1780,7 +1782,8 @@ rspamd_controller_handle_lua (struct rspamd_http_connection_entry *conn_ent,
                return 0;
        }
 
-       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool);
+       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool,
+                       ctx->lang_det);
 
        task->resolver = ctx->resolver;
        task->ev_base = ctx->ev_base;
@@ -1963,7 +1966,8 @@ rspamd_controller_handle_learn_common (
                return 0;
        }
 
-       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool);
+       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool,
+                       session->ctx->lang_det);
 
        task->resolver = ctx->resolver;
        task->ev_base = ctx->ev_base;
@@ -2063,7 +2067,8 @@ rspamd_controller_handle_scan (struct rspamd_http_connection_entry *conn_ent,
                return 0;
        }
 
-       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool);
+       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool,
+                       ctx->lang_det);
        task->ev_base = session->ctx->ev_base;
 
        task->resolver = ctx->resolver;
@@ -2541,9 +2546,10 @@ rspamd_controller_handle_stat_common (
        rspamd_mempool_stat (&mem_st);
        memcpy (&stat_copy, session->ctx->worker->srv->stat, sizeof (stat_copy));
        stat = &stat_copy;
-       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool);
-
        ctx = session->ctx;
+
+       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool,
+                       ctx->lang_det);
        task->resolver = ctx->resolver;
        task->ev_base = ctx->ev_base;
        cbdata = rspamd_mempool_alloc0 (session->pool, sizeof (*cbdata));
@@ -2905,7 +2911,8 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e
                return 0;
        }
 
-       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool);
+       task = rspamd_task_new (session->ctx->worker, session->cfg, session->pool,
+                       ctx->lang_det);
 
        task->resolver = ctx->resolver;
        task->ev_base = ctx->ev_base;
@@ -3622,6 +3629,7 @@ start_controller_worker (struct rspamd_worker *worker)
        g_ptr_array_add (worker->finish_actions,
                        (gpointer)rspamd_controller_on_terminate);
        rspamd_controller_load_saved_stats (ctx);
+       ctx->lang_det = ctx->cfg->lang_det;
 
        /* RRD collector */
        if (ctx->cfg->rrd_file && worker->index == 0) {
index f724a82c8c00333c7e18645ecbc38d1658f529fe..5f04a39a784a3b327bbe8288bfa9eec7053f4108 100644 (file)
@@ -264,6 +264,8 @@ struct rspamd_config_post_load_script {
        struct rspamd_config_post_load_script *prev, *next;
 };
 
+struct rspamd_lang_detector;
+
 /**
  * Structure that stores all config data
  */
@@ -428,6 +430,8 @@ struct rspamd_config {
        gchar *zstd_output_dictionary;                                  /**< path to zstd output dictionary                                             */
        ucl_object_t *neighbours;                                               /**< other servers in the cluster                                               */
 
+       struct rspamd_lang_detector *lang_det;                  /**< language detector                                                                  */
+
        ref_entry_t ref;                                                                /**< reference counter                                                                  */
 };
 
index f2256f55cb80b56a199782f51d21bbef4be57cd7..7fbe7abd423cd38f84ef20eb8aead4dd4f7235ca 100644 (file)
@@ -28,6 +28,7 @@
 #include "cryptobox.h"
 #include "libutil/multipattern.h"
 #include "libmime/email_addr.h"
+#include "libmime/lang_detection.h"
 
 #ifdef HAVE_SYSLOG_H
 #include <syslog.h>
@@ -3688,6 +3689,8 @@ rspamd_config_read (struct rspamd_config *cfg, const gchar *filename,
                return FALSE;
        }
 
+       cfg->lang_det = rspamd_language_detector_init (cfg);
+
        return TRUE;
 }
 
index bf66ec030dc80338f0e60262d4aff3fae5ed4425..2c014a7d1b81249d8e66d426bf2de440655da567 100644 (file)
@@ -26,6 +26,7 @@
 #include "utlist.h"
 #include "contrib/zstd/zstd.h"
 #include "libserver/mempool_vars_internal.h"
+#include "libmime/lang_detection.h"
 #include <math.h>
 
 /*
@@ -61,7 +62,8 @@ rspamd_request_header_dtor (gpointer p)
  */
 struct rspamd_task *
 rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
-               rspamd_mempool_t *pool)
+               rspamd_mempool_t *pool,
+               struct rspamd_lang_detector *lang_det)
 {
        struct rspamd_task *new_task;
 
@@ -82,6 +84,7 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
        gettimeofday (&new_task->tv, NULL);
        new_task->time_real = rspamd_get_ticks (FALSE);
        new_task->time_virtual = rspamd_get_virtual_ticks ();
+       new_task->lang_det = lang_det;
 
        if (pool == NULL) {
                new_task->task_pool =
index 8ab9514ce585e22cdd00769cc9276ac239c1d202..3055c5654046eac79d0a8d1ec55117d247c3420b 100644 (file)
@@ -122,6 +122,7 @@ enum rspamd_task_stage {
 #define RSPAMD_TASK_IS_PROFILING(task) (((task)->flags & RSPAMD_TASK_FLAG_PROFILE))
 
 struct rspamd_email_address;
+struct rspamd_lang_detector;
 enum rspamd_newlines_type;
 
 /**
@@ -205,6 +206,7 @@ struct rspamd_task {
        ucl_object_t *settings;                                                 /**< Settings applied to task                                           */
 
        const gchar *classifier;                                                /**< Classifier to learn (if needed)                            */
+       struct rspamd_lang_detector *lang_det;                  /**< Languages detector                                                         */
        guchar digest[16];
 };
 
@@ -213,7 +215,8 @@ struct rspamd_task {
  */
 struct rspamd_task *rspamd_task_new (struct rspamd_worker *worker,
                struct rspamd_config *cfg,
-               rspamd_mempool_t *pool);
+               rspamd_mempool_t *pool,
+               struct rspamd_lang_detector *lang_det);
 /**
  * Destroy task object and remove its IO dispatcher if it exists
  */
index 480269b733293f8685fc86874d53d31a71518d3c..cb8d178495ea056b0a1062d0ec5121f6f417a484 100644 (file)
@@ -700,7 +700,7 @@ lua_util_process_message (lua_State *L)
        if (cfg != NULL && message != NULL) {
                base = event_init ();
                rspamd_init_filters (cfg, FALSE);
-               task = rspamd_task_new (NULL, cfg, NULL);
+               task = rspamd_task_new (NULL, cfg, NULL, NULL);
                task->ev_base = base;
                task->msg.begin = rspamd_mempool_alloc (task->task_pool, mlen);
                rspamd_strlcpy ((gpointer)task->msg.begin, message, mlen);
index a0706830d1c8cd34904210a601e591aa60933c35..a5e6487c1691bf3d6484edd03ca409b8bb4c9a3b 100644 (file)
@@ -2959,7 +2959,7 @@ fuzzy_process_handler (struct rspamd_http_connection_entry *conn_ent,
        gint r, *saved, rules = 0, err_idx;
 
        /* Prepare task */
-       task = rspamd_task_new (session->wrk, session->cfg, NULL);
+       task = rspamd_task_new (session->wrk, session->cfg, NULL, NULL);
        task->cfg = ctx->cfg;
        task->ev_base = conn_ent->rt->ev_base;
        saved = rspamd_mempool_alloc0 (session->pool, sizeof (gint));
index 0edfeb05e023bef45cb7a7936f61a1a6039d5da3..e0ee7d9f178b03c60fc85cc8811d4f0789d66ae2 100644 (file)
@@ -372,7 +372,7 @@ rspamadm_lua_message_handler (lua_State *L, gint argc, gchar **argv)
                        rspamd_printf ("cannot open %s: %s\n", argv[i], strerror (errno));
                }
                else {
-                       task = rspamd_task_new (NULL, NULL, NULL);
+                       task = rspamd_task_new (NULL, NULL, NULL, NULL);
 
                        if (!rspamd_task_load_message (task, NULL, map, len)) {
                                rspamd_printf ("cannot load %s\n", argv[i]);
index cf6112b7588bb27e90b199a831ddfb6da728c596..2a451ff3e18d98c2c2c0d499acb441145a2c2188 100644 (file)
@@ -1650,7 +1650,7 @@ rspamd_proxy_self_scan (struct rspamd_proxy_session *session)
 
        msg = session->client_message;
        task = rspamd_task_new (session->worker, session->ctx->cfg,
-                       session->pool);
+                       session->pool, session->ctx->lang_det);
        task->flags |= RSPAMD_TASK_FLAG_MIME;
        task->sock = -1;
 
index 9dc5fec0fb76ee735cdec6363825b7cca08d2359..e0d2b4a0bf4b284b2365a96318007dadca14bcf4 100644 (file)
@@ -101,7 +101,7 @@ rspamd_worker_call_finish_handlers (struct rspamd_worker *worker)
        if (cfg->finish_callbacks) {
                ctx = worker->ctx;
                /* Create a fake task object for async events */
-               task = rspamd_task_new (worker, cfg, NULL);
+               task = rspamd_task_new (worker, cfg, NULL, NULL);
                task->resolver = ctx->resolver;
                task->ev_base = ctx->ev_base;
                task->flags |= RSPAMD_TASK_FLAG_PROCESSING;
@@ -368,7 +368,7 @@ accept_socket (gint fd, short what, void *arg)
                return;
        }
 
-       task = rspamd_task_new (worker, ctx->cfg, NULL);
+       task = rspamd_task_new (worker, ctx->cfg, NULL, ctx->lang_det);
 
        msg_info_task ("accepted connection from %s port %d, task ptr: %p",
                rspamd_inet_address_to_string (addr),
@@ -660,7 +660,7 @@ rspamd_worker_init_scanner (struct rspamd_worker *worker,
                        rspamd_worker_monitored_handler,
                        worker->srv->cfg);
 
-       *plang_det = rspamd_language_detector_init (worker->srv->cfg);
+       *plang_det = worker->srv->cfg;
 }
 
 /*