From 75d38d207863132e963137578681904f308a2cb1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 24 Jan 2014 16:44:47 +0000 Subject: [PATCH] Remove HFILTER_URL_ONELINE rule. - Added functions: `rspamd_textpart:get_length` - get a full length of text part `rspamd_url:get_length` - get a full length of an URL object - Removed `HFILTER_URL_ONELINE` as it is broken by nature - Reworked hfilter to improve performance --- conf/lua/hfilter.lua | 42 +++++++++++++++++++----------------------- conf/metrics.conf | 1 - src/lua/lua_task.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/conf/lua/hfilter.lua b/conf/lua/hfilter.lua index f0c4748a8..0ec54f1f4 100644 --- a/conf/lua/hfilter.lua +++ b/conf/lua/hfilter.lua @@ -250,28 +250,24 @@ local function hfilter(task) local parts = task:get_text_parts() if parts then --One text part-- - if table.maxn(parts) > 0 and parts[1]:get_content() then - local part_text = trim1(parts[1]:get_content()) - local total_part_len = string.len(part_text) - if total_part_len > 0 then - local urls = task:get_urls() - if urls then - local total_url_len = 0 - for _,url in ipairs(urls) do - total_url_len = total_url_len + string.len(url:get_text()) - end - if total_url_len > 0 then - if total_url_len + 7 > total_part_len then - task:insert_result('HFILTER_URL_ONLY', 1.00) - else - if not string.find(part_text, "\n") then - task:insert_result('HFILTER_URL_ONELINE', 1.00) - end - end - end - end - end - end + total_parts_len = 0 + for _,p in ipairs(parts) do + total_parts_len = total_parts_len + p:get_length() + end + if total_parts_len > 0 then + local urls = task:get_urls() + if urls then + local total_url_len = 0 + for _,url in ipairs(urls) do + total_url_len = total_url_len + url:get_length() + end + if total_url_len > 0 then + if total_url_len + 7 > total_part_len then + task:insert_result('HFILTER_URL_ONLY', 1.00) + end + end + end + end end return false @@ -284,4 +280,4 @@ rspamd_config:register_symbols(hfilter, 1.0, "HFILTER_FROMHOST_NORESOLVE_MX", "HFILTER_FROMHOST_NORES_A_OR_MX", "HFILTER_FROMHOST_NOT_FQDN", "HFILTER_MID_NOT_FQDN", "HFILTER_HOSTNAME_NOPTR", -"HFILTER_URL_ONLY", "HFILTER_URL_ONELINE"); +"HFILTER_URL_ONLY"); diff --git a/conf/metrics.conf b/conf/metrics.conf index 13b9796af..cba0ce18c 100644 --- a/conf/metrics.conf +++ b/conf/metrics.conf @@ -714,5 +714,4 @@ metric { symbol { weight = 0.50; name = "HFILTER_MID_NOT_FQDN"; description = "Message-id host not FQDN"; } symbol { weight = 4.00; name = "HFILTER_HOSTNAME_NOPTR"; description = "No PTR for IP"; } symbol { weight = 3.50; name = "HFILTER_URL_ONLY"; description = "URL only in body"; } - symbol { weight = 2.00; name = "HFILTER_URL_ONELINE"; description = "One line URL and text in body"; } } diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 6b74bade0..dfb983d4d 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -146,6 +146,7 @@ static const struct luaL_reg tasklib_m[] = { /* Textpart methods */ LUA_FUNCTION_DEF (textpart, get_content); +LUA_FUNCTION_DEF (textpart, get_length); LUA_FUNCTION_DEF (textpart, is_empty); LUA_FUNCTION_DEF (textpart, is_html); LUA_FUNCTION_DEF (textpart, get_fuzzy); @@ -154,6 +155,7 @@ LUA_FUNCTION_DEF (textpart, compare_distance); static const struct luaL_reg textpartlib_m[] = { LUA_INTERFACE_DEF (textpart, get_content), + LUA_INTERFACE_DEF (textpart, get_length), LUA_INTERFACE_DEF (textpart, is_empty), LUA_INTERFACE_DEF (textpart, is_html), LUA_INTERFACE_DEF (textpart, get_fuzzy), @@ -197,6 +199,7 @@ static const struct luaL_reg imagelib_m[] = { }; /* URL methods */ +LUA_FUNCTION_DEF (url, get_length); LUA_FUNCTION_DEF (url, get_host); LUA_FUNCTION_DEF (url, get_user); LUA_FUNCTION_DEF (url, get_path); @@ -205,6 +208,7 @@ LUA_FUNCTION_DEF (url, is_phished); LUA_FUNCTION_DEF (url, get_phished); static const struct luaL_reg urllib_m[] = { + LUA_INTERFACE_DEF (url, get_length), LUA_INTERFACE_DEF (url, get_host), LUA_INTERFACE_DEF (url, get_user), LUA_INTERFACE_DEF (url, get_path), @@ -1388,6 +1392,26 @@ lua_textpart_get_content (lua_State * L) return 1; } +static gint +lua_textpart_get_length (lua_State * L) +{ + struct mime_text_part *part = lua_check_textpart (L); + + if (part == NULL) { + lua_pushnil (L); + return 1; + } + + if (part->is_empty) { + lua_pushnumber (L, 0); + } + else { + lua_pushnumber (L, part->content->len); + } + + return 1; +} + static gint lua_textpart_is_empty (lua_State * L) { @@ -1724,6 +1748,20 @@ lua_image_get_filename (lua_State *L) } /* URL part */ +static gint +lua_url_get_length (lua_State *L) +{ + struct uri *url = lua_check_url (L); + + if (url != NULL) { + lua_pushinteger (L, strlen (struri (url))); + } + else { + lua_pushnil (L); + } + return 1; +} + static gint lua_url_get_host (lua_State *L) { -- 2.39.5