diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-09-07 16:48:54 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-09-07 16:48:54 +0100 |
commit | 9c48415dcb28adef6bea4784a021085aa6111402 (patch) | |
tree | 97203164b0008c3003c2d431c070e16d9b639528 | |
parent | a94c61767322db73dbedb98f58347a3c7f6fa982 (diff) | |
parent | 2830cd13e47cf2fa6505cf52464f46b01c3237b0 (diff) | |
download | rspamd-9c48415dcb28adef6bea4784a021085aa6111402.tar.gz rspamd-9c48415dcb28adef6bea4784a021085aa6111402.zip |
Merge branch 'master' of github.com:vstakhov/rspamd
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | conf/lua/regexp/headers.lua | 26 | ||||
-rw-r--r-- | doc/Makefile | 6 | ||||
m--------- | doc/doxydown | 0 | ||||
-rw-r--r-- | src/lua/lua_task.c | 171 |
5 files changed, 191 insertions, 15 deletions
diff --git a/.gitmodules b/.gitmodules index 9926aac01..5a6389bcd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "src/ucl"] path = src/ucl url = git://github.com/vstakhov/libucl +[submodule "doc/doxydown"] + path = doc/doxydown + url = https://github.com/vstakhov/doxydown.git diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua index e2b547e6d..af7eb5fa7 100644 --- a/conf/lua/regexp/headers.lua +++ b/conf/lua/regexp/headers.lua @@ -153,8 +153,8 @@ reconf['CC_EXCESS_QP'] = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime) -- OE X-Mailer header local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H' -- OE Message ID format -local oe_msgid_1 = 'Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com$/mH' -local oe_msgid_2 = 'Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+$/mH' +local oe_msgid_1 = 'Message-Id=/^<?[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com>?$/mH' +local oe_msgid_2 = 'Message-Id=/^<?(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+>?$/mH' -- EZLM remail of message local lyris_ezml_remailer = 'List-Unsubscribe=/<mailto:(?:leave-\\S+|\\S+-unsubscribe)\\@\\S+>$/H' -- Header of wacky sendmail @@ -162,9 +162,9 @@ local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H' -- Iplanet received header local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H' -- Hotmail message id -local hotmail_baydav_msgid = 'Message-Id=/^BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl$/mH' +local hotmail_baydav_msgid = 'Message-Id=/^<?BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl?>$/mH' -- Sympatico message id -local sympatico_msgid = 'Message-Id=/^BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE$/mH' +local sympatico_msgid = 'Message-Id=/^<?BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE>?$/mH' -- Mailman message id local mailman_msgid = 'Message-ID=/^<mailman\\.\\d+\\.\\d+\\.\\d+\\..+\\@\\S+>$/XS' -- Message id seems to be forged @@ -174,9 +174,9 @@ local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)', local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid) -- Outlook specific headers local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H' -local outlook_dollars_other = 'Message-Id=/^\\!\\~\\!/mH' -local vista_msgid = 'Message-Id=/^[A-F\\d]{32}\\@\\S+$/mH' -local ims_msgid = 'Message-Id=/^[A-F\\d]{36,40}\\@\\S+$/mH' +local outlook_dollars_other = 'Message-Id=/^<?\\!\\~\\!>?/mH' +local vista_msgid = 'Message-Id=/^<?[A-F\\d]{32}\\@\\S+>?$/mH' +local ims_msgid = 'Message-Id=/^<?[A-F\\d]{36,40}\\@\\S+>?$/mH' -- Forged outlook headers local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s', outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid) @@ -212,9 +212,9 @@ reconf['SUSPICIOUS_BOUNDARY4'] = string.format('(%s) & (%s)', suspicious_boundar -- The Bat! X-Mailer header local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H' -- The Bat! common Message-ID template -local thebat_msgid_common = 'Message-ID=/^\\d+\\.\\d+\\@\\S+$/mH' +local thebat_msgid_common = 'Message-ID=/^<?\\d+\\.\\d+\\@\\S+>?$/mH' -- Correct The Bat! Message-ID template -local thebat_msgid = 'Message-ID=/^\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+/mH' +local thebat_msgid = 'Message-ID=/^<?\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+>?/mH' -- Summary rule for forged The Bat! Message-ID header reconf['FORGED_MUA_THEBAT_MSGID'] = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid) -- Summary rule for forged The Bat! Message-ID header with unknown template @@ -225,7 +225,7 @@ reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s) -- KMail User-Agent header local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H' -- KMail common Message-ID template -local kmail_msgid_common = 'Message-Id=/^\\s*\\d+\\.\\d+\\.\\S+\\@\\S+$/mH' +local kmail_msgid_common = 'Message-Id=/^<?\\s*\\d+\\.\\d+\\.\\S+\\@\\S+>?$/mH' function kmail_msgid (task) local regexp_text = '<(\\S+)>\\|(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)\\.\\d+\\.\\1$' local re = rspamd_regexp.create_cached(regexp_text) @@ -245,11 +245,11 @@ reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s)', -- Opera Mail User-Agent header local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H' -- Opera Mail Message-ID template -local opera1x_msgid = 'Message-ID=/^op\\.[a-z\\d]{14}\\@\\S+$/mHS' +local opera1x_msgid = 'Message-ID=/^<?op\\.[a-z\\d]{14}\\@\\S+>?$/mHS' -- Suspicious Opera Mail User-Agent header local suspicious_opera10w_mua = 'User-Agent=/^\\s*Opera Mail\\/10\\.\\d+ \\(Windows\\)$/H' -- Suspicious Opera Mail Message-ID, apparently from KMail -local suspicious_opera10w_msgid = 'Message-Id=/^2009\\d{8}\\.\\d+\\.\\S+\\@\\S+$/mHS' +local suspicious_opera10w_msgid = 'Message-Id=/^<?2009\\d{8}\\.\\d+\\.\\S+\\@\\S+?>$/mHS' -- Summary rule for forged Opera Mail User-Agent header and Message-ID header from KMail reconf['SUSPICIOUS_OPERA_10W_MSGID'] = string.format('(%s) & (%s)', suspicious_opera10w_mua, suspicious_opera10w_msgid) -- Summary rule for forged Opera Mail Message-ID header @@ -277,7 +277,7 @@ reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(% -- Message id validity -local sane_msgid = 'Message-Id=/^[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\s*$/mH' +local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/mH' local msgid_comment = 'Message-Id=/\\(.*\\)/mH' reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment) diff --git a/doc/Makefile b/doc/Makefile index 55947cb19..342b8ae52 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,7 +1,7 @@ # A simple makefile to generate documentation from .md using pandoc PANDOC ?= pandoc -LUADOC ?= ./lua_api.pl +LUADOC ?= doxydown/doxydown.pl all: man @@ -12,7 +12,7 @@ rspamd.8: rspamd.8.md rspamc.1: rspamc.1.md $(PANDOC) -s -f markdown -t man -o rspamc.1 rspamc.1.md -lua-doc: lua_regexp lua_ip lua_config +lua-doc: lua_regexp lua_ip lua_config lua_task lua_regexp: ../src/lua/lua_regexp.c $(LUADOC) < ../src/lua/lua_regexp.c > markdown/lua/regexp.md @@ -20,3 +20,5 @@ lua_ip: ../src/lua/lua_ip.c $(LUADOC) < ../src/lua/lua_ip.c > markdown/lua/ip.md lua_config: ../src/lua/lua_config.c $(LUADOC) < ../src/lua/lua_config.c > markdown/lua/config.md +lua_task: ../src/lua/lua_task.c + $(LUADOC) < ../src/lua/lua_task.c > markdown/lua/task.md diff --git a/doc/doxydown b/doc/doxydown new file mode 160000 +Subproject 0284fa5cb263b787b50cb75997617acdf29ba3e diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index d5cf31c75..05c4730e4 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -39,27 +39,198 @@ #include "statfile_sync.h" #include "diff.h" +/*** + * @module rspamd_task + * This module provides routines for tasks manipulation in rspamd. Tasks usually + * represent messages being scanned, and this API provides access to such elements + * as headers, symbols, metrics and so on and so forth. Normally, task objects + * are passed to the lua callbacks allowing to check specific properties of messages + * and add the corresponding symbols to the scan's results. +@example +rspamd_config.DATE_IN_PAST = function(task) + if rspamd_config:get_api_version() >= 5 then + local dm = task:get_date{format = 'message', gmt = true} + local dt = task:get_date{format = 'connect', gmt = true} + -- A day + if dt - dm > 86400 then + return true + end + end + + return false +end + */ + /* Task creation */ +/*** + * @function rspamd_task.create_empty() + * Creates new empty task object. + * @return {rspamd_task} task object + */ LUA_FUNCTION_DEF (task, create_empty); +/*** + * @function rspamd_task.create_from_buffer(input) + * Creates new task object and load its content from the string provided. + * @param {string} input string that contains MIME message + * @return {rspamd_task} task object + */ LUA_FUNCTION_DEF (task, create_from_buffer); /* Task methods */ LUA_FUNCTION_DEF (task, get_message); LUA_FUNCTION_DEF (task, process_message); +/*** + * @method task:get_cfg() + * Get configuration object for a task. + * @return {rspamd_config} (config.md)[configuration object] for the task + */ LUA_FUNCTION_DEF (task, get_cfg); LUA_FUNCTION_DEF (task, set_cfg); LUA_FUNCTION_DEF (task, destroy); +/*** + * @method task:get_mempool() + * Returns memory pool valid for a lifetime of task. It is used internally by + * many rspamd routines. + * @return {rspamd_mempool} memory pool object + */ LUA_FUNCTION_DEF (task, get_mempool); +/*** + * @method task:get_session() + * Returns asynchronous session object that is used by many rspamd asynchronous + * utilities internally. + * @return {rspamd_session} session object + */ LUA_FUNCTION_DEF (task, get_session); +/*** + * @method task:get_ev_base() + * Return asynchronous event base for using in callbacks and resolver. + * @return {rspamd_ev_base} event base + */ LUA_FUNCTION_DEF (task, get_ev_base); +/*** + * @method task:insert_result(symbol, weigth[, option1, ...]) + * Insert specific symbol to the tasks scanning results assigning the initial + * weight to it. + * @param {string} symbol symbol to insert + * @param {number} weight initial weight (this weight is multiplied by the metric weight) + * @param {string} options list of optional options attached to a symbol inserted +@example +local function cb(task) + if task:get_header('Some header') then + task:insert_result('SOME_HEADER', 1.0, 'Got some header') + end +end + */ LUA_FUNCTION_DEF (task, insert_result); +/*** + * @method task:set_pre_results(action, description) + * Sets pre-result for a task. It is used in pre-filters to specify early results + * of the task scanned. If a pre-filter sets some result, then further processing + * may be skipped. For selecting action it is possible to use global table + * `rspamd_actions`. + * @param {rspamd_action} action a numeric action value + * @param {string} description string description +@example +local function cb(task) + local gr = task:get_header('Greylist') + if gr and gr == 'greylist' then + task:set_pre_result(rspamd_actions['greylist'], 'Greylisting required') + end +end + */ LUA_FUNCTION_DEF (task, set_pre_result); +/*** + * @method task:get_urls() + * Get all URLs found in a message. + * @return {table rspamd_url} list of all urls found +@example +local function phishing_cb(task) + local urls = task:get_urls(); + + if urls then + for _,url in ipairs(urls) do + if url:is_phished() then + return true + end + end + end + return false +end + */ LUA_FUNCTION_DEF (task, get_urls); +/*** + * @method task:get_urls() + * Get all email addresses found in a message. + * @return {table rspamd_url} list of all email addresses found + */ LUA_FUNCTION_DEF (task, get_emails); +/*** + * @method task:get_text_parts() + * Get all text (and HTML) parts found in a message + * @return {table rspamd_text_part} list of text parts + */ LUA_FUNCTION_DEF (task, get_text_parts); +/*** + * @method task:get_parts() + * Get all mime parts found in a message + * @return {table rspamd_mime_part} list of mime parts + */ LUA_FUNCTION_DEF (task, get_parts); +/*** + * @method task:get_header(name[, case_sensitive]) + * Get decoded value of a header specified with optional case_sensitive flag. + * By default headers are searched in caseless matter. + * @param {string} name name of header to get + * @param {boolean} case_sensitive case sensitiveness flag to search for a header + * @return {string} decoded value of a header + */ LUA_FUNCTION_DEF (task, get_header); +/*** + * @method task:get_raw_header(name[, case_sensitive]) + * Get raw value of a header specified with optional case_sensitive flag. + * By default headers are searched in caseless matter. + * @param {string} name name of header to get + * @param {boolean} case_sensitive case sensitiveness flag to search for a header + * @return {string} raw value of a header + */ LUA_FUNCTION_DEF (task, get_header_raw); +/*** + * @method task:get_header_full(name[, case_sensitive]) + * Get raw value of a header specified with optional case_sensitive flag. + * By default headers are searched in caseless matter. This method returns more + * information about the header as a list of tables with the following structure: + * + * - `name` - name of a header + * - `value` - raw value of a header + * - `decoded` - decoded value of a header + * - `tab_separated` - `true` if a header and a value are separated by `tab` character + * - `empty_separator` - `true` if there are no separator between a header and a value + * @param {string} name name of header to get + * @param {boolean} case_sensitive case sensitiveness flag to search for a header + * @return {list of tables} all values of a header as specified above +@example +function check_header_delimiter_tab(task, header_name) + for _,rh in ipairs(task:get_header_full(header_name)) do + if rh['tab_separated'] then return true end + end + return false +end + */ LUA_FUNCTION_DEF (task, get_header_full); +/*** + * @method task:get_received_headers() + * Returns a list of tables of parsed received headers. A tables returned have + * the following structure: + * + * - `from_hostname` - string that represents hostname provided by a peer + * - `from_ip` - string representation of IP address as provided by a peer + * - `real_hostname` - hostname as resolved by MTA + * - `real_ip` - string representation of IP as resolved by PTR request of MTA + * - `by_hostname` - MTA hostname + * + * Please note that in some situations rspamd cannot parse all the fields of received headers. + * In that case you should check all strings for validity. + * @return {table of tables} list of received headers described above + */ LUA_FUNCTION_DEF (task, get_received_headers); LUA_FUNCTION_DEF (task, get_resolver); LUA_FUNCTION_DEF (task, inc_dns_req); |