summaryrefslogtreecommitdiffstats
path: root/src/plugins/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/regexp.c')
-rw-r--r--src/plugins/regexp.c311
1 files changed, 295 insertions, 16 deletions
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 33aa60f29..636b26c03 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -34,10 +34,12 @@
#include "../message.h"
#include "../modules.h"
#include "../cfg_file.h"
+#include "../map.h"
#include "../util.h"
#include "../expressions.h"
#include "../view.h"
#include "../lua/lua_common.h"
+#include "../json/jansson.h"
#define DEFAULT_STATFILE_PREFIX "./"
@@ -61,6 +63,14 @@ struct regexp_ctx {
char *statfile_prefix;
memory_pool_t *regexp_pool;
+ memory_pool_t *dynamic_pool;
+};
+
+struct regexp_json_buf {
+ u_char *buf;
+ u_char *pos;
+ size_t buflen;
+ struct config_file *cfg;
};
static struct regexp_ctx *regexp_module_ctx = NULL;
@@ -72,29 +82,96 @@ static gboolean rspamd_check_smtp_data (struct worker_task *task
static void process_regexp_item (struct worker_task *task, void *user_data);
-int
-regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+static void
+regexp_dynamic_insert_result (struct worker_task *task, void *user_data)
{
- regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+ char *symbol = user_data;
+
+ insert_result (task, regexp_module_ctx->metric, symbol, 1, NULL);
+}
- regexp_module_ctx->filter = regexp_common_filter;
- regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
- regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
+static gboolean
+parse_regexp_ipmask (const char *begin, struct dynamic_map_item *addr)
+{
+ const char *pos;
+ char ip_buf[sizeof ("255.255.255.255")], mask_buf[3], *p;
+ int state = 0, dots = 0;
+
+ bzero (ip_buf, sizeof (ip_buf));
+ bzero (mask_buf, sizeof (mask_buf));
+ pos = begin;
+ p = ip_buf;
+
+ while (*pos) {
+ switch (state) {
+ case 0:
+ state = 1;
+ p = ip_buf;
+ dots = 0;
+ break;
+ case 1:
+ /* Begin parse ip */
+ if (p - ip_buf >= sizeof (ip_buf) || dots > 3) {
+ return FALSE;
+ }
+ if (g_ascii_isdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else if (*pos == '.') {
+ *p ++ = *pos ++;
+ dots ++;
+ }
+ else if (*pos == '/') {
+ pos ++;
+ p = mask_buf;
+ state = 2;
+ }
+ else {
+ /* Invalid character */
+ return FALSE;
+ }
+ break;
+ case 2:
+ /* Parse mask */
+ if (p - mask_buf > 2) {
+ return FALSE;
+ }
+ if (g_ascii_isdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else {
+ return FALSE;
+ }
+ break;
+ }
+ }
- *ctx = (struct module_ctx *)regexp_module_ctx;
- register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
- register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
- register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
+ if (!inet_aton (ip_buf, &addr->addr)) {
+ return FALSE;
+ }
+ if (state == 2) {
+ /* Also parse mask */
+ addr->mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0';
+ if (addr->mask > 32) {
+ msg_info ("bad ipmask value: '%s'", begin);
+ return FALSE;
+ }
+ }
+ else {
+ addr->mask = 32;
+ }
+
+ return TRUE;
- return 0;
}
+/* Process regexp expression */
static gboolean
-read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, struct config_file *cfg)
+read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain, char *symbol, char *line, gboolean raw_mode)
{
struct expression *e, *cur;
- e = parse_expression (regexp_module_ctx->regexp_pool, line);
+ e = parse_expression (pool, line);
if (e == NULL) {
msg_warn ("%s = \"%s\" is invalid regexp expression", symbol, line);
return FALSE;
@@ -103,7 +180,7 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain,
cur = e;
while (cur) {
if (cur->type == EXPR_REGEXP) {
- cur->content.operand = parse_regexp (pool, cur->content.operand, cfg->raw_mode);
+ cur->content.operand = parse_regexp (pool, cur->content.operand, raw_mode);
if (cur->content.operand == NULL) {
msg_warn ("cannot parse regexp, skip expression %s = \"%s\"", symbol, line);
return FALSE;
@@ -116,6 +193,197 @@ read_regexp_expression (memory_pool_t * pool, struct regexp_module_item *chain,
return TRUE;
}
+
+/* Callbacks for reading json dynamic rules */
+u_char *
+json_regexp_read_cb (memory_pool_t * pool, u_char * chunk, size_t len, struct map_cb_data *data)
+{
+ struct regexp_json_buf *jb;
+ size_t free, off;
+
+ if (data->cur_data == NULL) {
+ jb = g_malloc (sizeof (struct regexp_json_buf));
+ jb->cfg = ((struct regexp_json_buf *)data->prev_data)->cfg;
+ jb->buf = NULL;
+ jb->pos = NULL;
+ data->cur_data = jb;
+ }
+ else {
+ jb = data->cur_data;
+ }
+
+ if (jb->buf == NULL) {
+ /* Allocate memory for buffer */
+ jb->buflen = len * 2;
+ jb->buf = g_malloc (jb->buflen);
+ jb->pos = jb->buf;
+ }
+
+ off = jb->pos - jb->buf;
+ free = jb->buflen - off;
+
+ if (free < len) {
+ jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
+ jb->buf = g_realloc (jb->buf, jb->buflen);
+ jb->pos = jb->buf + off;
+ }
+
+ memcpy (jb->pos, chunk, len);
+ jb->pos += len;
+
+ /* Say not to copy any part of this buffer */
+ return NULL;
+}
+
+void
+json_regexp_fin_cb (memory_pool_t * pool, struct map_cb_data *data)
+{
+ struct regexp_json_buf *jb;
+ int nelts, i, j;
+ json_t *js, *cur_elt, *cur_nm, *it_val;
+ json_error_t je;
+ char *cur_rule, *cur_symbol;
+ double score;
+ struct regexp_module_item *cur_item;
+ GList *cur_networks = NULL;
+ struct dynamic_map_item *cur_nitem;
+ memory_pool_t *new_pool;
+ struct metric *metric;
+
+ if (data->prev_data) {
+ jb = data->prev_data;
+ /* Clean prev data */
+ if (jb->buf) {
+ g_free (jb->buf);
+ }
+ g_free (jb);
+ }
+
+ /* Now parse json */
+ if (data->cur_data) {
+ jb = data->cur_data;
+ }
+ else {
+ msg_err ("no data read");
+ return;
+ }
+ if (jb->buf == NULL) {
+ msg_err ("no data read");
+ return;
+ }
+ /* NULL terminate current buf */
+ *jb->pos = '\0';
+
+ js = json_loads (jb->buf, &je);
+ if (!js) {
+ msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
+ return;
+ }
+
+ if (!json_is_array (js)) {
+ json_decref (js);
+ msg_err ("loaded json is not an array");
+ return;
+ }
+
+ new_pool = memory_pool_new (memory_pool_get_size ());
+ metric = g_hash_table_lookup (jb->cfg->metrics, regexp_module_ctx->metric);
+ if (metric == NULL) {
+ msg_err ("cannot find metric definition %s", regexp_module_ctx->metric);
+ return;
+ }
+
+ remove_dynamic_rules (metric->cache);
+ if (regexp_module_ctx->dynamic_pool != NULL) {
+ memory_pool_delete (regexp_module_ctx->dynamic_pool);
+ }
+ regexp_module_ctx->dynamic_pool = new_pool;
+
+ nelts = json_array_size (js);
+ for (i = 0; i < nelts; i++) {
+ cur_networks = NULL;
+ cur_rule = NULL;
+
+ cur_elt = json_array_get (js, i);
+ if (!cur_elt || !json_is_object (cur_elt)) {
+ msg_err ("loaded json is not an object");
+ continue;
+ }
+ /* Factor param */
+ cur_nm = json_object_get (cur_elt, "factor");
+ if (cur_nm == NULL || !json_is_number (cur_nm)) {
+ msg_err ("factor is not a number or not exists, but is required");
+ continue;
+ }
+ score = json_number_value (cur_nm);
+ /* Symbol param */
+ cur_nm = json_object_get (cur_elt, "symbol");
+ if (cur_nm == NULL || !json_is_string (cur_nm)) {
+ msg_err ("symbol is not a string or not exists, but is required");
+ continue;
+ }
+ cur_symbol = memory_pool_strdup (new_pool, json_string_value (cur_nm));
+ /* Now check other settings */
+ /* Rule */
+ cur_nm = json_object_get (cur_elt, "rule");
+ if (cur_nm != NULL && json_is_string (cur_nm)) {
+ cur_rule = memory_pool_strdup (new_pool, json_string_value (cur_nm));
+ }
+ /* Networks array */
+ cur_nm = json_object_get (cur_elt, "networks");
+ if (cur_nm != NULL && json_is_array (cur_nm)) {
+ for (j = 0; j < json_array_size (cur_nm); j++) {
+ it_val = json_array_get (cur_nm, i);
+ if (it_val && json_is_string (it_val)) {
+ cur_nitem = memory_pool_alloc (new_pool, sizeof (struct dynamic_map_item));
+ if (parse_regexp_ipmask (json_string_value (it_val), cur_nitem)) {
+ cur_networks = g_list_prepend (cur_networks, cur_nitem);
+ }
+ }
+ }
+ }
+ if (cur_rule) {
+ /* Dynamic rule has rule option */
+ cur_item = memory_pool_alloc0 (new_pool, sizeof (struct regexp_module_item));
+ cur_item->symbol = cur_symbol;
+ if (read_regexp_expression (new_pool, cur_item, cur_symbol, cur_rule, jb->cfg->raw_mode)) {
+ register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, process_regexp_item, cur_item, cur_networks);
+ }
+ else {
+ msg_warn ("cannot parse dynamic rule");
+ }
+ }
+ else {
+ /* Just rule that is allways true (for whitelisting for example) */
+ register_dynamic_symbol (new_pool, &metric->cache, cur_symbol, score, regexp_dynamic_insert_result, cur_symbol, cur_networks);
+ }
+ if (cur_networks) {
+ g_list_free (cur_networks);
+ }
+ }
+ json_decref (js);
+}
+
+/* Init function */
+int
+regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+ regexp_module_ctx = g_malloc (sizeof (struct regexp_ctx));
+
+ regexp_module_ctx->filter = regexp_common_filter;
+ regexp_module_ctx->regexp_pool = memory_pool_new (memory_pool_get_size ());
+ regexp_module_ctx->dynamic_pool = NULL;
+ regexp_module_ctx->autolearn_symbols = g_hash_table_new (g_str_hash, g_str_equal);
+
+ *ctx = (struct module_ctx *)regexp_module_ctx;
+ register_expression_function ("regexp_match_number", rspamd_regexp_match_number, NULL);
+ register_expression_function ("raw_header_exists", rspamd_raw_header_exists, NULL);
+ register_expression_function ("check_smtp_data", rspamd_check_smtp_data, NULL);
+
+ return 0;
+}
+
+
/*
* Parse string in format:
* SYMBOL:statfile:weight
@@ -157,6 +425,7 @@ regexp_module_config (struct config_file *cfg)
char *value;
int res = TRUE;
double *w;
+ struct regexp_json_buf *jb, **pjb;
if ((value = get_module_opt (cfg, "regexp", "metric")) != NULL) {
regexp_module_ctx->metric = memory_pool_strdup (regexp_module_ctx->regexp_pool, value);
@@ -170,6 +439,16 @@ regexp_module_config (struct config_file *cfg)
else {
regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
}
+ if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
+ jb = g_malloc (sizeof (struct regexp_json_buf));
+ pjb = g_malloc (sizeof (struct regexp_json_buf *));
+ jb->buf = NULL;
+ jb->cfg = cfg;
+ *pjb = jb;
+ if (!add_map (value, json_regexp_read_cb, json_regexp_fin_cb, (void **)pjb)) {
+ msg_err ("cannot add map %s", value);
+ }
+ }
metric = g_hash_table_lookup (cfg->metrics, regexp_module_ctx->metric);
if (metric == NULL) {
@@ -190,7 +469,7 @@ regexp_module_config (struct config_file *cfg)
cur_item = memory_pool_alloc0 (regexp_module_ctx->regexp_pool, sizeof (struct regexp_module_item));
cur_item->symbol = cur->param;
if (cur->is_lua && cur->lua_type == LUA_VAR_STRING) {
- if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg)) {
+ if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->actual_data, cfg->raw_mode)) {
res = FALSE;
}
}
@@ -198,7 +477,7 @@ regexp_module_config (struct config_file *cfg)
cur_item->lua_function = cur->actual_data;
}
else if (! cur->is_lua) {
- if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg)) {
+ if (!read_regexp_expression (regexp_module_ctx->regexp_pool, cur_item, cur->param, cur->value, cfg->raw_mode)) {
res = FALSE;
}
}