[Project] Try to do strings concatenation in C

author Vsevolod Stakhov <vsevolod@rspamd.com>

Thu, 14 Dec 2023 16:34:35 +0000 (16:34 +0000)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Thu, 14 Dec 2023 16:34:35 +0000 (16:34 +0000)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Thu, 14 Dec 2023 16:34:35 +0000 (16:34 +0000)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Thu, 14 Dec 2023 16:34:35 +0000 (16:34 +0000)
diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua

index 1a4734cf5d21c0991e4c5ef42e982152b921a6f7..e94f645fdf8e7a934af1e95b1e8e46e6d3fd00e3 100644 (file)
--- a/lualib/redis_scripts/bayes_classify.lua
+++ b/lualib/redis_scripts/bayes_classify.lua
@@ -9,7 +9,6 @@ local output_ham = {}
  
  local learned_ham = tonumber(redis.call('HGET', prefix, 'learns_ham')) or 0
  local learned_spam = tonumber(redis.call('HGET', prefix, 'learns_spam')) or 0
-local prefix_underscore = prefix .. '_'
  
  -- Output is a set of pairs (token_index, token_count), tokens that are not
  -- found are not filled.
@@ -18,11 +17,11 @@ local prefix_underscore = prefix .. '_'
  if learned_ham > 0 and learned_spam > 0 then
    local input_tokens = cmsgpack.unpack(KEYS[2])
    for i, token in ipairs(input_tokens) do
-    local token_data = redis.call('HMGET', prefix_underscore .. token, 'H', 'S')
+    local token_data = redis.call('HMGET', token, 'H', 'S')
  
      if token_data then
        local ham_count = token_data[1]
-      local spam_count = tonumber(token_data[2]) or 0
+      local spam_count = token_data[2]
  
        if ham_count then
          table.insert(output_ham, { i, tonumber(ham_count) })
diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua

index 7536f680852a397e016ea68bff15419b4a68a0ac..244be43f66bc1115b64dcea28803cf186b80776c 100644 (file)
--- a/lualib/redis_scripts/bayes_learn.lua
+++ b/lualib/redis_scripts/bayes_learn.lua
@@ -12,7 +12,6 @@ local symbol = KEYS[3]
  local is_unlearn = KEYS[4] == 'true' and true or false
  local input_tokens = cmsgpack.unpack(KEYS[5])
  
-local prefix_underscore = prefix .. '_'
  local hash_key = is_spam and 'S' or 'H'
  local learned_key = is_spam and 'learns_spam' or 'learns_ham'
  
@@ -21,5 +20,5 @@ redis.call('HSET', prefix, 'version', '2') -- new schema
  redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
  
  for _, token in ipairs(input_tokens) do
-  redis.call('HINCRBY', prefix_underscore .. token, hash_key, 1)
+  redis.call('HINCRBY', token, hash_key, 1)
  end
 \ No newline at end of file
diff --git a/src/libstat/backends/redis_backend.cxx b/src/libstat/backends/redis_backend.cxx

index 5e222c6cb0e3cf89bef2bd9b3db77e4df7b7f388..7bebeb1ed39fbc13820a4c14ed818fd11746197d 100644 (file)
--- a/src/libstat/backends/redis_backend.cxx
+++ b/src/libstat/backends/redis_backend.cxx
@@ -646,11 +646,12 @@ void rspamd_redis_close(gpointer p)
   * Serialise stat tokens to message pack
   */
  static char *
-rspamd_redis_serialize_tokens(struct rspamd_task *task, GPtrArray *tokens, gsize *ser_len)
+rspamd_redis_serialize_tokens(struct rspamd_task *task, const gchar *prefix, GPtrArray *tokens, gsize *ser_len)
  {
         /* Each token is int64_t that requires 10 bytes (2 int32_t) + 4 bytes array len + 1 byte array magic */
         char max_int64_str[] = "18446744073709551615";
-       auto req_len = tokens->len * sizeof(max_int64_str) + 5;
+       auto prefix_len = strlen(prefix);
+       auto req_len = tokens->len * (sizeof(max_int64_str) + prefix_len + 1) + 5;
         rspamd_token_t *tok;
  
         auto *buf = (gchar *) rspamd_mempool_alloc(task->task_pool, req_len);
@@ -667,8 +668,8 @@ rspamd_redis_serialize_tokens(struct rspamd_task *task, GPtrArray *tokens, gsize
         int i;
         PTR_ARRAY_FOREACH(tokens, i, tok)
         {
-               char numbuf[sizeof(max_int64_str)];
-               auto r = rspamd_snprintf(numbuf, sizeof(numbuf), "%uL", tok->data);
+               char *numbuf = (char *) g_alloca(sizeof(max_int64_str) + prefix_len + 1);
+               auto r = rspamd_snprintf(numbuf, sizeof(numbuf), "%s_%uL", tok->data);
                 *p++ = (gchar) ((r & 0xff) | 0xa0);
  
                 memcpy(p, &numbuf, r);
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Thu, 14 Dec 2023 16:34:35 +0000 (16:34 +0000)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Thu, 14 Dec 2023 16:34:35 +0000 (16:34 +0000)
lualib/redis_scripts/bayes_classify.lua		patch \| blob \| history
lualib/redis_scripts/bayes_learn.lua		patch \| blob \| history
src/libstat/backends/redis_backend.cxx		patch \| blob \| history