summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-09-07 16:48:54 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-09-07 16:48:54 +0100
commit9c48415dcb28adef6bea4784a021085aa6111402 (patch)
tree97203164b0008c3003c2d431c070e16d9b639528
parenta94c61767322db73dbedb98f58347a3c7f6fa982 (diff)
parent2830cd13e47cf2fa6505cf52464f46b01c3237b0 (diff)
downloadrspamd-9c48415dcb28adef6bea4784a021085aa6111402.tar.gz
rspamd-9c48415dcb28adef6bea4784a021085aa6111402.zip
Merge branch 'master' of github.com:vstakhov/rspamd
-rw-r--r--.gitmodules3
-rw-r--r--conf/lua/regexp/headers.lua26
-rw-r--r--doc/Makefile6
m---------doc/doxydown0
-rw-r--r--src/lua/lua_task.c171
5 files changed, 191 insertions, 15 deletions
diff --git a/.gitmodules b/.gitmodules
index 9926aac01..5a6389bcd 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
[submodule "src/ucl"]
path = src/ucl
url = git://github.com/vstakhov/libucl
+[submodule "doc/doxydown"]
+ path = doc/doxydown
+ url = https://github.com/vstakhov/doxydown.git
diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua
index e2b547e6d..af7eb5fa7 100644
--- a/conf/lua/regexp/headers.lua
+++ b/conf/lua/regexp/headers.lua
@@ -153,8 +153,8 @@ reconf['CC_EXCESS_QP'] = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime)
-- OE X-Mailer header
local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H'
-- OE Message ID format
-local oe_msgid_1 = 'Message-Id=/^[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com$/mH'
-local oe_msgid_2 = 'Message-Id=/^(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+$/mH'
+local oe_msgid_1 = 'Message-Id=/^<?[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com>?$/mH'
+local oe_msgid_2 = 'Message-Id=/^<?(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+>?$/mH'
-- EZLM remail of message
local lyris_ezml_remailer = 'List-Unsubscribe=/<mailto:(?:leave-\\S+|\\S+-unsubscribe)\\@\\S+>$/H'
-- Header of wacky sendmail
@@ -162,9 +162,9 @@ local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H'
-- Iplanet received header
local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H'
-- Hotmail message id
-local hotmail_baydav_msgid = 'Message-Id=/^BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl$/mH'
+local hotmail_baydav_msgid = 'Message-Id=/^<?BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl?>$/mH'
-- Sympatico message id
-local sympatico_msgid = 'Message-Id=/^BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE$/mH'
+local sympatico_msgid = 'Message-Id=/^<?BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE>?$/mH'
-- Mailman message id
local mailman_msgid = 'Message-ID=/^<mailman\\.\\d+\\.\\d+\\.\\d+\\..+\\@\\S+>$/XS'
-- Message id seems to be forged
@@ -174,9 +174,9 @@ local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)',
local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid)
-- Outlook specific headers
local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H'
-local outlook_dollars_other = 'Message-Id=/^\\!\\~\\!/mH'
-local vista_msgid = 'Message-Id=/^[A-F\\d]{32}\\@\\S+$/mH'
-local ims_msgid = 'Message-Id=/^[A-F\\d]{36,40}\\@\\S+$/mH'
+local outlook_dollars_other = 'Message-Id=/^<?\\!\\~\\!>?/mH'
+local vista_msgid = 'Message-Id=/^<?[A-F\\d]{32}\\@\\S+>?$/mH'
+local ims_msgid = 'Message-Id=/^<?[A-F\\d]{36,40}\\@\\S+>?$/mH'
-- Forged outlook headers
local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s',
outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid)
@@ -212,9 +212,9 @@ reconf['SUSPICIOUS_BOUNDARY4'] = string.format('(%s) & (%s)', suspicious_boundar
-- The Bat! X-Mailer header
local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H'
-- The Bat! common Message-ID template
-local thebat_msgid_common = 'Message-ID=/^\\d+\\.\\d+\\@\\S+$/mH'
+local thebat_msgid_common = 'Message-ID=/^<?\\d+\\.\\d+\\@\\S+>?$/mH'
-- Correct The Bat! Message-ID template
-local thebat_msgid = 'Message-ID=/^\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+/mH'
+local thebat_msgid = 'Message-ID=/^<?\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+>?/mH'
-- Summary rule for forged The Bat! Message-ID header
reconf['FORGED_MUA_THEBAT_MSGID'] = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid)
-- Summary rule for forged The Bat! Message-ID header with unknown template
@@ -225,7 +225,7 @@ reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s)
-- KMail User-Agent header
local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H'
-- KMail common Message-ID template
-local kmail_msgid_common = 'Message-Id=/^\\s*\\d+\\.\\d+\\.\\S+\\@\\S+$/mH'
+local kmail_msgid_common = 'Message-Id=/^<?\\s*\\d+\\.\\d+\\.\\S+\\@\\S+>?$/mH'
function kmail_msgid (task)
local regexp_text = '<(\\S+)>\\|(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)\\.\\d+\\.\\1$'
local re = rspamd_regexp.create_cached(regexp_text)
@@ -245,11 +245,11 @@ reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%s)',
-- Opera Mail User-Agent header
local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H'
-- Opera Mail Message-ID template
-local opera1x_msgid = 'Message-ID=/^op\\.[a-z\\d]{14}\\@\\S+$/mHS'
+local opera1x_msgid = 'Message-ID=/^<?op\\.[a-z\\d]{14}\\@\\S+>?$/mHS'
-- Suspicious Opera Mail User-Agent header
local suspicious_opera10w_mua = 'User-Agent=/^\\s*Opera Mail\\/10\\.\\d+ \\(Windows\\)$/H'
-- Suspicious Opera Mail Message-ID, apparently from KMail
-local suspicious_opera10w_msgid = 'Message-Id=/^2009\\d{8}\\.\\d+\\.\\S+\\@\\S+$/mHS'
+local suspicious_opera10w_msgid = 'Message-Id=/^<?2009\\d{8}\\.\\d+\\.\\S+\\@\\S+?>$/mHS'
-- Summary rule for forged Opera Mail User-Agent header and Message-ID header from KMail
reconf['SUSPICIOUS_OPERA_10W_MSGID'] = string.format('(%s) & (%s)', suspicious_opera10w_mua, suspicious_opera10w_msgid)
-- Summary rule for forged Opera Mail Message-ID header
@@ -277,7 +277,7 @@ reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = string.format('(%s) & !(%s) & !(%
-- Message id validity
-local sane_msgid = 'Message-Id=/^[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\s*$/mH'
+local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/mH'
local msgid_comment = 'Message-Id=/\\(.*\\)/mH'
reconf['INVALID_MSGID'] = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment)
diff --git a/doc/Makefile b/doc/Makefile
index 55947cb19..342b8ae52 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -1,7 +1,7 @@
# A simple makefile to generate documentation from .md using pandoc
PANDOC ?= pandoc
-LUADOC ?= ./lua_api.pl
+LUADOC ?= doxydown/doxydown.pl
all: man
@@ -12,7 +12,7 @@ rspamd.8: rspamd.8.md
rspamc.1: rspamc.1.md
$(PANDOC) -s -f markdown -t man -o rspamc.1 rspamc.1.md
-lua-doc: lua_regexp lua_ip lua_config
+lua-doc: lua_regexp lua_ip lua_config lua_task
lua_regexp: ../src/lua/lua_regexp.c
$(LUADOC) < ../src/lua/lua_regexp.c > markdown/lua/regexp.md
@@ -20,3 +20,5 @@ lua_ip: ../src/lua/lua_ip.c
$(LUADOC) < ../src/lua/lua_ip.c > markdown/lua/ip.md
lua_config: ../src/lua/lua_config.c
$(LUADOC) < ../src/lua/lua_config.c > markdown/lua/config.md
+lua_task: ../src/lua/lua_task.c
+ $(LUADOC) < ../src/lua/lua_task.c > markdown/lua/task.md
diff --git a/doc/doxydown b/doc/doxydown
new file mode 160000
+Subproject 0284fa5cb263b787b50cb75997617acdf29ba3e
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index d5cf31c75..05c4730e4 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -39,27 +39,198 @@
#include "statfile_sync.h"
#include "diff.h"
+/***
+ * @module rspamd_task
+ * This module provides routines for tasks manipulation in rspamd. Tasks usually
+ * represent messages being scanned, and this API provides access to such elements
+ * as headers, symbols, metrics and so on and so forth. Normally, task objects
+ * are passed to the lua callbacks allowing to check specific properties of messages
+ * and add the corresponding symbols to the scan's results.
+@example
+rspamd_config.DATE_IN_PAST = function(task)
+ if rspamd_config:get_api_version() >= 5 then
+ local dm = task:get_date{format = 'message', gmt = true}
+ local dt = task:get_date{format = 'connect', gmt = true}
+ -- A day
+ if dt - dm > 86400 then
+ return true
+ end
+ end
+
+ return false
+end
+ */
+
/* Task creation */
+/***
+ * @function rspamd_task.create_empty()
+ * Creates new empty task object.
+ * @return {rspamd_task} task object
+ */
LUA_FUNCTION_DEF (task, create_empty);
+/***
+ * @function rspamd_task.create_from_buffer(input)
+ * Creates new task object and load its content from the string provided.
+ * @param {string} input string that contains MIME message
+ * @return {rspamd_task} task object
+ */
LUA_FUNCTION_DEF (task, create_from_buffer);
/* Task methods */
LUA_FUNCTION_DEF (task, get_message);
LUA_FUNCTION_DEF (task, process_message);
+/***
+ * @method task:get_cfg()
+ * Get configuration object for a task.
+ * @return {rspamd_config} (config.md)[configuration object] for the task
+ */
LUA_FUNCTION_DEF (task, get_cfg);
LUA_FUNCTION_DEF (task, set_cfg);
LUA_FUNCTION_DEF (task, destroy);
+/***
+ * @method task:get_mempool()
+ * Returns memory pool valid for a lifetime of task. It is used internally by
+ * many rspamd routines.
+ * @return {rspamd_mempool} memory pool object
+ */
LUA_FUNCTION_DEF (task, get_mempool);
+/***
+ * @method task:get_session()
+ * Returns asynchronous session object that is used by many rspamd asynchronous
+ * utilities internally.
+ * @return {rspamd_session} session object
+ */
LUA_FUNCTION_DEF (task, get_session);
+/***
+ * @method task:get_ev_base()
+ * Return asynchronous event base for using in callbacks and resolver.
+ * @return {rspamd_ev_base} event base
+ */
LUA_FUNCTION_DEF (task, get_ev_base);
+/***
+ * @method task:insert_result(symbol, weigth[, option1, ...])
+ * Insert specific symbol to the tasks scanning results assigning the initial
+ * weight to it.
+ * @param {string} symbol symbol to insert
+ * @param {number} weight initial weight (this weight is multiplied by the metric weight)
+ * @param {string} options list of optional options attached to a symbol inserted
+@example
+local function cb(task)
+ if task:get_header('Some header') then
+ task:insert_result('SOME_HEADER', 1.0, 'Got some header')
+ end
+end
+ */
LUA_FUNCTION_DEF (task, insert_result);
+/***
+ * @method task:set_pre_results(action, description)
+ * Sets pre-result for a task. It is used in pre-filters to specify early results
+ * of the task scanned. If a pre-filter sets some result, then further processing
+ * may be skipped. For selecting action it is possible to use global table
+ * `rspamd_actions`.
+ * @param {rspamd_action} action a numeric action value
+ * @param {string} description string description
+@example
+local function cb(task)
+ local gr = task:get_header('Greylist')
+ if gr and gr == 'greylist' then
+ task:set_pre_result(rspamd_actions['greylist'], 'Greylisting required')
+ end
+end
+ */
LUA_FUNCTION_DEF (task, set_pre_result);
+/***
+ * @method task:get_urls()
+ * Get all URLs found in a message.
+ * @return {table rspamd_url} list of all urls found
+@example
+local function phishing_cb(task)
+ local urls = task:get_urls();
+
+ if urls then
+ for _,url in ipairs(urls) do
+ if url:is_phished() then
+ return true
+ end
+ end
+ end
+ return false
+end
+ */
LUA_FUNCTION_DEF (task, get_urls);
+/***
+ * @method task:get_urls()
+ * Get all email addresses found in a message.
+ * @return {table rspamd_url} list of all email addresses found
+ */
LUA_FUNCTION_DEF (task, get_emails);
+/***
+ * @method task:get_text_parts()
+ * Get all text (and HTML) parts found in a message
+ * @return {table rspamd_text_part} list of text parts
+ */
LUA_FUNCTION_DEF (task, get_text_parts);
+/***
+ * @method task:get_parts()
+ * Get all mime parts found in a message
+ * @return {table rspamd_mime_part} list of mime parts
+ */
LUA_FUNCTION_DEF (task, get_parts);
+/***
+ * @method task:get_header(name[, case_sensitive])
+ * Get decoded value of a header specified with optional case_sensitive flag.
+ * By default headers are searched in caseless matter.
+ * @param {string} name name of header to get
+ * @param {boolean} case_sensitive case sensitiveness flag to search for a header
+ * @return {string} decoded value of a header
+ */
LUA_FUNCTION_DEF (task, get_header);
+/***
+ * @method task:get_raw_header(name[, case_sensitive])
+ * Get raw value of a header specified with optional case_sensitive flag.
+ * By default headers are searched in caseless matter.
+ * @param {string} name name of header to get
+ * @param {boolean} case_sensitive case sensitiveness flag to search for a header
+ * @return {string} raw value of a header
+ */
LUA_FUNCTION_DEF (task, get_header_raw);
+/***
+ * @method task:get_header_full(name[, case_sensitive])
+ * Get raw value of a header specified with optional case_sensitive flag.
+ * By default headers are searched in caseless matter. This method returns more
+ * information about the header as a list of tables with the following structure:
+ *
+ * - `name` - name of a header
+ * - `value` - raw value of a header
+ * - `decoded` - decoded value of a header
+ * - `tab_separated` - `true` if a header and a value are separated by `tab` character
+ * - `empty_separator` - `true` if there are no separator between a header and a value
+ * @param {string} name name of header to get
+ * @param {boolean} case_sensitive case sensitiveness flag to search for a header
+ * @return {list of tables} all values of a header as specified above
+@example
+function check_header_delimiter_tab(task, header_name)
+ for _,rh in ipairs(task:get_header_full(header_name)) do
+ if rh['tab_separated'] then return true end
+ end
+ return false
+end
+ */
LUA_FUNCTION_DEF (task, get_header_full);
+/***
+ * @method task:get_received_headers()
+ * Returns a list of tables of parsed received headers. A tables returned have
+ * the following structure:
+ *
+ * - `from_hostname` - string that represents hostname provided by a peer
+ * - `from_ip` - string representation of IP address as provided by a peer
+ * - `real_hostname` - hostname as resolved by MTA
+ * - `real_ip` - string representation of IP as resolved by PTR request of MTA
+ * - `by_hostname` - MTA hostname
+ *
+ * Please note that in some situations rspamd cannot parse all the fields of received headers.
+ * In that case you should check all strings for validity.
+ * @return {table of tables} list of received headers described above
+ */
LUA_FUNCTION_DEF (task, get_received_headers);
LUA_FUNCTION_DEF (task, get_resolver);
LUA_FUNCTION_DEF (task, inc_dns_req);