diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-03-25 19:19:47 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2010-03-25 19:19:47 +0300 |
commit | 2f2642851746b0985f67e8dde58e2458eae07cca (patch) | |
tree | c379cf43cf7a26be6e70111e8e79815258a1c0ee | |
parent | 9e16e433e1386b3613ea5667b12ee14c3ef0588c (diff) | |
download | rspamd-2f2642851746b0985f67e8dde58e2458eae07cca.tar.gz rspamd-2f2642851746b0985f67e8dde58e2458eae07cca.zip |
* Initial add of xml parser for rspamd configuration
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/cfg_file.h | 1 | ||||
-rw-r--r-- | src/cfg_file.l | 8 | ||||
-rw-r--r-- | src/cfg_utils.c | 51 | ||||
-rw-r--r-- | src/cfg_xml.c | 379 | ||||
-rw-r--r-- | src/cfg_xml.h | 66 |
6 files changed, 505 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 52b91581f..e7e713e1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -395,6 +395,7 @@ SET(RSPAMDSRC src/modules.c src/bloom.c src/buffer.c src/cfg_utils.c + src/cfg_xml.c src/controller.c src/events.c src/expressions.c diff --git a/src/cfg_file.h b/src/cfg_file.h index 53fd7e6fc..74ce91911 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -372,6 +372,7 @@ GList* parse_comma_list (memory_pool_t *pool, char *line); struct classifier_config* check_classifier_cfg (struct config_file *cfg, struct classifier_config *c); struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c); gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const char *line); +gboolean read_xml_config (struct config_file *cfg, const char *filename); int yylex (void); int yyparse (void); diff --git a/src/cfg_file.l b/src/cfg_file.l index 48cf18c44..fd83be66c 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -130,6 +130,14 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG; <incl>[ \t]* /* eat the whitespace */ <incl>[^ \t\n]+ { /* got the include file name */ /* got the include file name */ + /* Handle XML case */ + int len = strlen (yytext); + if (strcmp (yytext + len - 4, ".xml") == 0) { + if (!read_xml_config (cfg, yytext)) { + yyerror ("invalid xml detected"); + } + BEGIN(INITIAL); + } if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) { yyerror ("yylex: includes nested too deeply" ); return -1; diff --git a/src/cfg_utils.c b/src/cfg_utils.c index 023d97315..534d44eae 100644 --- a/src/cfg_utils.c +++ b/src/cfg_utils.c @@ -24,13 +24,13 @@ #include "config.h" -#include <math.h> #include "cfg_file.h" #include "main.h" #include "filter.h" #include "settings.h" #include "classifiers/classifiers.h" +#include "cfg_xml.h" #ifdef WITH_LUA #include "lua/lua_common.h" #endif @@ -760,6 +760,55 @@ parse_normalizer (struct config_file *cfg, struct statfile *st, const char *line return FALSE; } +static GMarkupParser xml_parser = { + .start_element = rspamd_xml_start_element, + .end_element = rspamd_xml_end_element, + .passthrough = NULL, + .text = rspamd_xml_text, + .error = rspamd_xml_error, +}; + +gboolean +read_xml_config (struct config_file *cfg, const char *filename) +{ + struct stat st; + int fd; + gchar *data; + gboolean res; + GMarkupParseContext *ctx; + GError *err = NULL; + + struct rspamd_xml_userdata ud; + + if (stat (filename, &st) == -1) { + msg_err ("cannot stat %s: %s", filename, strerror (errno)); + return FALSE; + } + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_err ("cannot open %s: %s", filename, strerror (errno)); + return FALSE; + + } + /* Now mmap this file to simplify reading process */ + if ((data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap %s: %s", filename, strerror (errno)); + close (fd); + return FALSE; + } + close (fd); + + /* Prepare xml parser */ + ud.cfg = cfg; + ud.state = XML_READ_START; + + ctx = g_markup_parse_context_new (&xml_parser, G_MARKUP_TREAT_CDATA_AS_TEXT, &ud, NULL); + res = g_markup_parse_context_parse (ctx, data, st.st_size, &err); + + munmap (data, st.st_size); + + return res; +} + /* * vi:ts=4 */ diff --git a/src/cfg_xml.c b/src/cfg_xml.c new file mode 100644 index 000000000..2308fdba4 --- /dev/null +++ b/src/cfg_xml.c @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2009, Rambler media + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Read and write rspamd dynamic parameters from xml files + */ + +#include "config.h" +#include "cfg_xml.h" +#include "logger.h" + +GQuark +xml_error_quark (void) +{ + return g_quark_from_static_string ("xml-error-quark"); +} + + +static inline gboolean +extract_attr (const gchar *attr, const gchar **attribute_names, const gchar **attribute_values, gchar **res) +{ + const gchar **cur_attr, **cur_value; + + cur_attr = attribute_names; + cur_value = attribute_values; + + while (*cur_attr && *cur_value) { + if (g_ascii_strcasecmp (*cur_attr, attr)) { + *res = (gchar *) *cur_value; + return TRUE; + } + cur_attr ++; + cur_value ++; + } + + return FALSE; +} + +static inline char* +xml_asciiz_string (memory_pool_t *pool, const gchar *text, gsize len) +{ + char *val; + + val = memory_pool_alloc (pool, len + 1); + g_strlcpy (val, text, len + 1); + + return val; +} + +static void +xml_parse_module_opt (struct rspamd_xml_userdata *ud, const gchar *text, gsize len) +{ + char *val; + GList *cur_opt; + struct module_opt *cur; + + val = xml_asciiz_string (ud->cfg->cfg_pool, text, len); + cur_opt = g_hash_table_lookup (ud->cfg->modules_opts, ud->section_name); + if (cur_opt == NULL) { + /* Insert new option structure */ + cur = memory_pool_alloc (ud->cfg->cfg_pool, sizeof (struct module_opt)); + cur->param = memory_pool_strdup (ud->cfg->cfg_pool, ud->other_data); + cur->value = val; + cur_opt = g_list_prepend (NULL, cur); + g_hash_table_insert (ud->cfg->modules_opts, memory_pool_strdup (ud->cfg->cfg_pool, ud->section_name), cur_opt); + } + else { + /* First try to find option with this name */ + while (cur_opt) { + cur = cur_opt->data; + if (strcmp (cur->param, ud->other_data) == 0) { + /* cur->value is in pool */ + cur->value = val; + return; + } + cur_opt = g_list_next (cur_opt); + } + /* Not found, insert */ + cur = memory_pool_alloc (ud->cfg->cfg_pool, sizeof (struct module_opt)); + cur->param = memory_pool_strdup (ud->cfg->cfg_pool, ud->other_data); + cur->value = val; + cur_opt = g_list_prepend (cur_opt, cur); + } + +} + +void +rspamd_xml_start_element (GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, + const gchar **attribute_values, gpointer user_data, GError **error) +{ + struct rspamd_xml_userdata *ud = user_data; + gchar *res; + + switch (ud->state) { + case XML_READ_START: + if (g_ascii_strcasecmp (element_name, "rspamd") != 0) { + /* Invalid XML, it must contains root element <rspamd></rspamd> */ + *error = g_error_new (xml_error_quark (), XML_START_MISSING, "start element is missing"); + ud->state = XML_ERROR; + } + else { + ud->state = XML_READ_PARAM; + } + break; + case XML_READ_PARAM: + /* Read parameter name and try to find among list of known parameters */ + if (g_ascii_strcasecmp (element_name, "module") == 0) { + /* Read module data */ + if (extract_attr ("name", attribute_names, attribute_values, &res)) { + ud->section_name = g_strdup (res); + ud->state = XML_READ_MODULE; + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'module'"); + ud->state = XML_ERROR; + } + } + else if (g_ascii_strcasecmp (element_name, "factors") == 0) { + ud->state = XML_READ_FACTORS; + } + else if (g_ascii_strcasecmp (element_name, "logging") == 0) { + ud->state = XML_READ_LOGGING; + } + else if (g_ascii_strcasecmp (element_name, "metric") == 0) { + if (extract_attr ("name", attribute_names, attribute_values, &res)) { + ud->section_name = g_strdup (res); + ud->state = XML_READ_METRIC; + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'metric'"); + ud->state = XML_ERROR; + } + } + else if (g_ascii_strcasecmp (element_name, "classifier") == 0) { + if (extract_attr ("type", attribute_names, attribute_values, &res)) { + ud->section_name = g_strdup (res); + ud->state = XML_READ_CLASSIFIER; + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'type' is required for tag 'classifier'"); + ud->state = XML_ERROR; + } + } + else if (g_ascii_strcasecmp (element_name, "worker") == 0) { + if (extract_attr ("type", attribute_names, attribute_values, &res)) { + ud->section_name = g_strdup (res); + ud->state = XML_READ_WORKER; + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'type' is required for tag 'worker'"); + ud->state = XML_ERROR; + } + } + else { + /* Other params */ + if (g_ascii_strcasecmp (element_name, "variable") == 0) { + if (extract_attr ("name", attribute_names, attribute_values, &res)) { + ud->section_name = g_strdup (res); + ud->state = XML_READ_VARIABLE; + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'variable'"); + ud->state = XML_ERROR; + } + + } + else if (g_ascii_strcasecmp (element_name, "pidfile") == 0) { + ud->state = XML_READ_PIDFILE; + } + else if (g_ascii_strcasecmp (element_name, "filters") == 0) { + ud->state = XML_READ_FILTERS; + } + } + break; + case XML_READ_MODULE: + if (g_ascii_strcasecmp (element_name, "param") == 0) { + if (extract_attr ("name", attribute_names, attribute_values, &res)) { + ud->other_data = g_strdup (res); + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'param'"); + ud->state = XML_ERROR; + } + } + break; + case XML_READ_CLASSIFIER: + break; + case XML_READ_STATFILE: + break; + case XML_READ_FACTORS: + if (g_ascii_strcasecmp (element_name, "factor") == 0) { + if (extract_attr ("name", attribute_names, attribute_values, &res)) { + ud->other_data = g_strdup (res); + } + else { + *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'factor'"); + ud->state = XML_ERROR; + } + } + break; + case XML_READ_WORKER: + break; + case XML_READ_LOGGING: + break; + default: + *error = g_error_new (xml_error_quark (), XML_EXTRA_ELEMENT, "element %s is unexpected in this state", element_name); + break; + } +} + +#define CHECK_TAG(x, required) \ +do { \ +if (g_ascii_strcasecmp (element_name, (x)) == 0) { \ + ud->state = XML_READ_PARAM; \ + res = TRUE; \ + if (!required) { \ + g_free (ud->section_name); \ + } \ +} \ +else { \ + res = FALSE; \ + if ((required) == TRUE) { \ + *error = g_error_new (xml_error_quark (), XML_UNMATCHED_TAG, "element %s is unexpected in this state, expected %s", element_name, (x)); \ + ud->state = XML_ERROR; \ + } \ +} \ +} while (0) + +void +rspamd_xml_end_element (GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error) +{ + struct rspamd_xml_userdata *ud = user_data; + gboolean res; + + switch (ud->state) { + case XML_READ_MODULE: + CHECK_TAG ("module", FALSE); + break; + case XML_READ_CLASSIFIER: + CHECK_TAG ("classifier", FALSE); + break; + case XML_READ_STATFILE: + CHECK_TAG ("statfile", FALSE); + break; + case XML_READ_FACTORS: + CHECK_TAG ("factors", FALSE); + break; + case XML_READ_METRIC: + CHECK_TAG ("metric", FALSE); + break; + case XML_READ_WORKER: + CHECK_TAG ("worker", FALSE); + break; + case XML_READ_VARIABLE: + CHECK_TAG ("variable", TRUE); + break; + case XML_READ_PIDFILE: + CHECK_TAG ("pidfile", TRUE); + break; + case XML_READ_STATFILE_POOL: + CHECK_TAG ("statfile_pool_size", TRUE); + break; + case XML_READ_FILTERS: + CHECK_TAG ("filters", TRUE); + break; + case XML_READ_LOGGING: + CHECK_TAG ("logging", FALSE); + break; + case XML_READ_PARAM: + if (g_ascii_strcasecmp (element_name, "rspamd") == 0) { + /* End of document */ + ud->state = XML_END; + } + else { + *error = g_error_new (xml_error_quark (), XML_EXTRA_ELEMENT, "element %s is umatched", element_name); + ud->state = XML_ERROR; + } + break; + default: + ud->state = XML_ERROR; + break; + } + +} +#undef CHECK_TAG + +void +rspamd_xml_text (GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error) +{ + struct rspamd_xml_userdata *ud = user_data; + char *val; + double *tmp; + + switch (ud->state) { + case XML_READ_MODULE: + if (ud->other_data) { + /* Insert or replace module's option */ + xml_parse_module_opt (ud, text, text_len); + g_free (ud->other_data); + } + break; + case XML_READ_CLASSIFIER: + break; + case XML_READ_STATFILE: + break; + case XML_READ_FACTORS: + if (ud->other_data) { + /* Assume that we have factor name in other_data */ + val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len); + tmp = memory_pool_alloc (ud->cfg->cfg_pool, sizeof (double)); + *tmp = strtod (val, NULL); + g_hash_table_insert (ud->cfg->factors, ud->other_data, tmp); + g_free (ud->other_data); + } + break; + case XML_READ_METRIC: + break; + case XML_READ_WORKER: + break; + case XML_READ_VARIABLE: + if (ud->other_data) { + /* Assume that we have factor name in other_data */ + val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len); + g_hash_table_insert (ud->cfg->variables, ud->other_data, val); + g_free (ud->other_data); + } + break; + case XML_READ_PIDFILE: + val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len); + ud->cfg->pid_file = val; + break; + case XML_READ_STATFILE_POOL: + val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len); + ud->cfg->max_statfile_size = strtoull (val, NULL, 10); + break; + case XML_READ_FILTERS: + val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len); + ud->cfg->filters_str = val; + break; + case XML_READ_LOGGING: + break; + case XML_READ_PARAM: + break; + default: + ud->state = XML_ERROR; + break; + } + +} + +void +rspamd_xml_error (GMarkupParseContext *context, GError *error, gpointer user_data) +{ + struct rspamd_xml_userdata *ud = user_data; + + msg_err ("xml parser error: %s, at state %d", error->message, ud->state); +} + diff --git a/src/cfg_xml.h b/src/cfg_xml.h new file mode 100644 index 000000000..3e1a4bcd4 --- /dev/null +++ b/src/cfg_xml.h @@ -0,0 +1,66 @@ +#ifndef RSPAMD_CFG_XML_H +#define RSPAMD_CFG_XML_H + +#include "config.h" +#include "cfg_file.h" + +#define XML_START_MISSING 1 +#define XML_PARAM_MISSING 2 +#define XML_EXTRA_ELEMENT 3 +#define XML_UNMATCHED_TAG 4 + +enum xml_read_state { + XML_READ_START, + XML_READ_PARAM, + XML_READ_MODULE, + XML_READ_CLASSIFIER, + XML_READ_STATFILE, + XML_READ_FACTORS, + XML_READ_METRIC, + XML_READ_WORKER, + XML_READ_VARIABLE, + XML_READ_PIDFILE, + XML_READ_STATFILE_POOL, + XML_READ_FILTERS, + XML_READ_LOGGING, + XML_ERROR, + XML_END +}; + +struct rspamd_xml_userdata { + enum xml_read_state state; + struct config_file *cfg; + gchar *section_name; + gpointer other_data; +}; + +/* Called for open tags <foo bar="baz"> */ +void rspamd_xml_start_element (GMarkupParseContext *context, + const gchar *element_name, + const gchar **attribute_names, + const gchar **attribute_values, + gpointer user_data, + GError **error); + +/* Called for close tags </foo> */ +void rspamd_xml_end_element (GMarkupParseContext *context, + const gchar *element_name, + gpointer user_data, + GError **error); + +/* text is not nul-terminated */ +void rspamd_xml_text (GMarkupParseContext *context, + const gchar *text, + gsize text_len, + gpointer user_data, + GError **error); + +/* Called on error, including one set by other +* methods in the vtable. The GError should not be freed. +*/ +void rspamd_xml_error (GMarkupParseContext *context, + GError *error, + gpointer user_data); + + +#endif |