aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-03-25 19:19:47 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-03-25 19:19:47 +0300
commit2f2642851746b0985f67e8dde58e2458eae07cca (patch)
treec379cf43cf7a26be6e70111e8e79815258a1c0ee
parent9e16e433e1386b3613ea5667b12ee14c3ef0588c (diff)
downloadrspamd-2f2642851746b0985f67e8dde58e2458eae07cca.tar.gz
rspamd-2f2642851746b0985f67e8dde58e2458eae07cca.zip
* Initial add of xml parser for rspamd configuration
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/cfg_file.h1
-rw-r--r--src/cfg_file.l8
-rw-r--r--src/cfg_utils.c51
-rw-r--r--src/cfg_xml.c379
-rw-r--r--src/cfg_xml.h66
6 files changed, 505 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 52b91581f..e7e713e1c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -395,6 +395,7 @@ SET(RSPAMDSRC src/modules.c
src/bloom.c
src/buffer.c
src/cfg_utils.c
+ src/cfg_xml.c
src/controller.c
src/events.c
src/expressions.c
diff --git a/src/cfg_file.h b/src/cfg_file.h
index 53fd7e6fc..74ce91911 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -372,6 +372,7 @@ GList* parse_comma_list (memory_pool_t *pool, char *line);
struct classifier_config* check_classifier_cfg (struct config_file *cfg, struct classifier_config *c);
struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c);
gboolean parse_normalizer (struct config_file *cfg, struct statfile *st, const char *line);
+gboolean read_xml_config (struct config_file *cfg, const char *filename);
int yylex (void);
int yyparse (void);
diff --git a/src/cfg_file.l b/src/cfg_file.l
index 48cf18c44..fd83be66c 100644
--- a/src/cfg_file.l
+++ b/src/cfg_file.l
@@ -130,6 +130,14 @@ yes|YES|no|NO|[yY]|[nN] yylval.flag=parse_flag(yytext); return FLAG;
<incl>[ \t]* /* eat the whitespace */
<incl>[^ \t\n]+ { /* got the include file name */
/* got the include file name */
+ /* Handle XML case */
+ int len = strlen (yytext);
+ if (strcmp (yytext + len - 4, ".xml") == 0) {
+ if (!read_xml_config (cfg, yytext)) {
+ yyerror ("invalid xml detected");
+ }
+ BEGIN(INITIAL);
+ }
if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) {
yyerror ("yylex: includes nested too deeply" );
return -1;
diff --git a/src/cfg_utils.c b/src/cfg_utils.c
index 023d97315..534d44eae 100644
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -24,13 +24,13 @@
#include "config.h"
-#include <math.h>
#include "cfg_file.h"
#include "main.h"
#include "filter.h"
#include "settings.h"
#include "classifiers/classifiers.h"
+#include "cfg_xml.h"
#ifdef WITH_LUA
#include "lua/lua_common.h"
#endif
@@ -760,6 +760,55 @@ parse_normalizer (struct config_file *cfg, struct statfile *st, const char *line
return FALSE;
}
+static GMarkupParser xml_parser = {
+ .start_element = rspamd_xml_start_element,
+ .end_element = rspamd_xml_end_element,
+ .passthrough = NULL,
+ .text = rspamd_xml_text,
+ .error = rspamd_xml_error,
+};
+
+gboolean
+read_xml_config (struct config_file *cfg, const char *filename)
+{
+ struct stat st;
+ int fd;
+ gchar *data;
+ gboolean res;
+ GMarkupParseContext *ctx;
+ GError *err = NULL;
+
+ struct rspamd_xml_userdata ud;
+
+ if (stat (filename, &st) == -1) {
+ msg_err ("cannot stat %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+ if ((fd = open (filename, O_RDONLY)) == -1) {
+ msg_err ("cannot open %s: %s", filename, strerror (errno));
+ return FALSE;
+
+ }
+ /* Now mmap this file to simplify reading process */
+ if ((data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ msg_err ("cannot mmap %s: %s", filename, strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ close (fd);
+
+ /* Prepare xml parser */
+ ud.cfg = cfg;
+ ud.state = XML_READ_START;
+
+ ctx = g_markup_parse_context_new (&xml_parser, G_MARKUP_TREAT_CDATA_AS_TEXT, &ud, NULL);
+ res = g_markup_parse_context_parse (ctx, data, st.st_size, &err);
+
+ munmap (data, st.st_size);
+
+ return res;
+}
+
/*
* vi:ts=4
*/
diff --git a/src/cfg_xml.c b/src/cfg_xml.c
new file mode 100644
index 000000000..2308fdba4
--- /dev/null
+++ b/src/cfg_xml.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Read and write rspamd dynamic parameters from xml files
+ */
+
+#include "config.h"
+#include "cfg_xml.h"
+#include "logger.h"
+
+GQuark
+xml_error_quark (void)
+{
+ return g_quark_from_static_string ("xml-error-quark");
+}
+
+
+static inline gboolean
+extract_attr (const gchar *attr, const gchar **attribute_names, const gchar **attribute_values, gchar **res)
+{
+ const gchar **cur_attr, **cur_value;
+
+ cur_attr = attribute_names;
+ cur_value = attribute_values;
+
+ while (*cur_attr && *cur_value) {
+ if (g_ascii_strcasecmp (*cur_attr, attr)) {
+ *res = (gchar *) *cur_value;
+ return TRUE;
+ }
+ cur_attr ++;
+ cur_value ++;
+ }
+
+ return FALSE;
+}
+
+static inline char*
+xml_asciiz_string (memory_pool_t *pool, const gchar *text, gsize len)
+{
+ char *val;
+
+ val = memory_pool_alloc (pool, len + 1);
+ g_strlcpy (val, text, len + 1);
+
+ return val;
+}
+
+static void
+xml_parse_module_opt (struct rspamd_xml_userdata *ud, const gchar *text, gsize len)
+{
+ char *val;
+ GList *cur_opt;
+ struct module_opt *cur;
+
+ val = xml_asciiz_string (ud->cfg->cfg_pool, text, len);
+ cur_opt = g_hash_table_lookup (ud->cfg->modules_opts, ud->section_name);
+ if (cur_opt == NULL) {
+ /* Insert new option structure */
+ cur = memory_pool_alloc (ud->cfg->cfg_pool, sizeof (struct module_opt));
+ cur->param = memory_pool_strdup (ud->cfg->cfg_pool, ud->other_data);
+ cur->value = val;
+ cur_opt = g_list_prepend (NULL, cur);
+ g_hash_table_insert (ud->cfg->modules_opts, memory_pool_strdup (ud->cfg->cfg_pool, ud->section_name), cur_opt);
+ }
+ else {
+ /* First try to find option with this name */
+ while (cur_opt) {
+ cur = cur_opt->data;
+ if (strcmp (cur->param, ud->other_data) == 0) {
+ /* cur->value is in pool */
+ cur->value = val;
+ return;
+ }
+ cur_opt = g_list_next (cur_opt);
+ }
+ /* Not found, insert */
+ cur = memory_pool_alloc (ud->cfg->cfg_pool, sizeof (struct module_opt));
+ cur->param = memory_pool_strdup (ud->cfg->cfg_pool, ud->other_data);
+ cur->value = val;
+ cur_opt = g_list_prepend (cur_opt, cur);
+ }
+
+}
+
+void
+rspamd_xml_start_element (GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names,
+ const gchar **attribute_values, gpointer user_data, GError **error)
+{
+ struct rspamd_xml_userdata *ud = user_data;
+ gchar *res;
+
+ switch (ud->state) {
+ case XML_READ_START:
+ if (g_ascii_strcasecmp (element_name, "rspamd") != 0) {
+ /* Invalid XML, it must contains root element <rspamd></rspamd> */
+ *error = g_error_new (xml_error_quark (), XML_START_MISSING, "start element is missing");
+ ud->state = XML_ERROR;
+ }
+ else {
+ ud->state = XML_READ_PARAM;
+ }
+ break;
+ case XML_READ_PARAM:
+ /* Read parameter name and try to find among list of known parameters */
+ if (g_ascii_strcasecmp (element_name, "module") == 0) {
+ /* Read module data */
+ if (extract_attr ("name", attribute_names, attribute_values, &res)) {
+ ud->section_name = g_strdup (res);
+ ud->state = XML_READ_MODULE;
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'module'");
+ ud->state = XML_ERROR;
+ }
+ }
+ else if (g_ascii_strcasecmp (element_name, "factors") == 0) {
+ ud->state = XML_READ_FACTORS;
+ }
+ else if (g_ascii_strcasecmp (element_name, "logging") == 0) {
+ ud->state = XML_READ_LOGGING;
+ }
+ else if (g_ascii_strcasecmp (element_name, "metric") == 0) {
+ if (extract_attr ("name", attribute_names, attribute_values, &res)) {
+ ud->section_name = g_strdup (res);
+ ud->state = XML_READ_METRIC;
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'metric'");
+ ud->state = XML_ERROR;
+ }
+ }
+ else if (g_ascii_strcasecmp (element_name, "classifier") == 0) {
+ if (extract_attr ("type", attribute_names, attribute_values, &res)) {
+ ud->section_name = g_strdup (res);
+ ud->state = XML_READ_CLASSIFIER;
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'type' is required for tag 'classifier'");
+ ud->state = XML_ERROR;
+ }
+ }
+ else if (g_ascii_strcasecmp (element_name, "worker") == 0) {
+ if (extract_attr ("type", attribute_names, attribute_values, &res)) {
+ ud->section_name = g_strdup (res);
+ ud->state = XML_READ_WORKER;
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'type' is required for tag 'worker'");
+ ud->state = XML_ERROR;
+ }
+ }
+ else {
+ /* Other params */
+ if (g_ascii_strcasecmp (element_name, "variable") == 0) {
+ if (extract_attr ("name", attribute_names, attribute_values, &res)) {
+ ud->section_name = g_strdup (res);
+ ud->state = XML_READ_VARIABLE;
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'variable'");
+ ud->state = XML_ERROR;
+ }
+
+ }
+ else if (g_ascii_strcasecmp (element_name, "pidfile") == 0) {
+ ud->state = XML_READ_PIDFILE;
+ }
+ else if (g_ascii_strcasecmp (element_name, "filters") == 0) {
+ ud->state = XML_READ_FILTERS;
+ }
+ }
+ break;
+ case XML_READ_MODULE:
+ if (g_ascii_strcasecmp (element_name, "param") == 0) {
+ if (extract_attr ("name", attribute_names, attribute_values, &res)) {
+ ud->other_data = g_strdup (res);
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'param'");
+ ud->state = XML_ERROR;
+ }
+ }
+ break;
+ case XML_READ_CLASSIFIER:
+ break;
+ case XML_READ_STATFILE:
+ break;
+ case XML_READ_FACTORS:
+ if (g_ascii_strcasecmp (element_name, "factor") == 0) {
+ if (extract_attr ("name", attribute_names, attribute_values, &res)) {
+ ud->other_data = g_strdup (res);
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_PARAM_MISSING, "param 'name' is required for tag 'factor'");
+ ud->state = XML_ERROR;
+ }
+ }
+ break;
+ case XML_READ_WORKER:
+ break;
+ case XML_READ_LOGGING:
+ break;
+ default:
+ *error = g_error_new (xml_error_quark (), XML_EXTRA_ELEMENT, "element %s is unexpected in this state", element_name);
+ break;
+ }
+}
+
+#define CHECK_TAG(x, required) \
+do { \
+if (g_ascii_strcasecmp (element_name, (x)) == 0) { \
+ ud->state = XML_READ_PARAM; \
+ res = TRUE; \
+ if (!required) { \
+ g_free (ud->section_name); \
+ } \
+} \
+else { \
+ res = FALSE; \
+ if ((required) == TRUE) { \
+ *error = g_error_new (xml_error_quark (), XML_UNMATCHED_TAG, "element %s is unexpected in this state, expected %s", element_name, (x)); \
+ ud->state = XML_ERROR; \
+ } \
+} \
+} while (0)
+
+void
+rspamd_xml_end_element (GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error)
+{
+ struct rspamd_xml_userdata *ud = user_data;
+ gboolean res;
+
+ switch (ud->state) {
+ case XML_READ_MODULE:
+ CHECK_TAG ("module", FALSE);
+ break;
+ case XML_READ_CLASSIFIER:
+ CHECK_TAG ("classifier", FALSE);
+ break;
+ case XML_READ_STATFILE:
+ CHECK_TAG ("statfile", FALSE);
+ break;
+ case XML_READ_FACTORS:
+ CHECK_TAG ("factors", FALSE);
+ break;
+ case XML_READ_METRIC:
+ CHECK_TAG ("metric", FALSE);
+ break;
+ case XML_READ_WORKER:
+ CHECK_TAG ("worker", FALSE);
+ break;
+ case XML_READ_VARIABLE:
+ CHECK_TAG ("variable", TRUE);
+ break;
+ case XML_READ_PIDFILE:
+ CHECK_TAG ("pidfile", TRUE);
+ break;
+ case XML_READ_STATFILE_POOL:
+ CHECK_TAG ("statfile_pool_size", TRUE);
+ break;
+ case XML_READ_FILTERS:
+ CHECK_TAG ("filters", TRUE);
+ break;
+ case XML_READ_LOGGING:
+ CHECK_TAG ("logging", FALSE);
+ break;
+ case XML_READ_PARAM:
+ if (g_ascii_strcasecmp (element_name, "rspamd") == 0) {
+ /* End of document */
+ ud->state = XML_END;
+ }
+ else {
+ *error = g_error_new (xml_error_quark (), XML_EXTRA_ELEMENT, "element %s is umatched", element_name);
+ ud->state = XML_ERROR;
+ }
+ break;
+ default:
+ ud->state = XML_ERROR;
+ break;
+ }
+
+}
+#undef CHECK_TAG
+
+void
+rspamd_xml_text (GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
+{
+ struct rspamd_xml_userdata *ud = user_data;
+ char *val;
+ double *tmp;
+
+ switch (ud->state) {
+ case XML_READ_MODULE:
+ if (ud->other_data) {
+ /* Insert or replace module's option */
+ xml_parse_module_opt (ud, text, text_len);
+ g_free (ud->other_data);
+ }
+ break;
+ case XML_READ_CLASSIFIER:
+ break;
+ case XML_READ_STATFILE:
+ break;
+ case XML_READ_FACTORS:
+ if (ud->other_data) {
+ /* Assume that we have factor name in other_data */
+ val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len);
+ tmp = memory_pool_alloc (ud->cfg->cfg_pool, sizeof (double));
+ *tmp = strtod (val, NULL);
+ g_hash_table_insert (ud->cfg->factors, ud->other_data, tmp);
+ g_free (ud->other_data);
+ }
+ break;
+ case XML_READ_METRIC:
+ break;
+ case XML_READ_WORKER:
+ break;
+ case XML_READ_VARIABLE:
+ if (ud->other_data) {
+ /* Assume that we have factor name in other_data */
+ val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len);
+ g_hash_table_insert (ud->cfg->variables, ud->other_data, val);
+ g_free (ud->other_data);
+ }
+ break;
+ case XML_READ_PIDFILE:
+ val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len);
+ ud->cfg->pid_file = val;
+ break;
+ case XML_READ_STATFILE_POOL:
+ val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len);
+ ud->cfg->max_statfile_size = strtoull (val, NULL, 10);
+ break;
+ case XML_READ_FILTERS:
+ val = xml_asciiz_string (ud->cfg->cfg_pool, text, text_len);
+ ud->cfg->filters_str = val;
+ break;
+ case XML_READ_LOGGING:
+ break;
+ case XML_READ_PARAM:
+ break;
+ default:
+ ud->state = XML_ERROR;
+ break;
+ }
+
+}
+
+void
+rspamd_xml_error (GMarkupParseContext *context, GError *error, gpointer user_data)
+{
+ struct rspamd_xml_userdata *ud = user_data;
+
+ msg_err ("xml parser error: %s, at state %d", error->message, ud->state);
+}
+
diff --git a/src/cfg_xml.h b/src/cfg_xml.h
new file mode 100644
index 000000000..3e1a4bcd4
--- /dev/null
+++ b/src/cfg_xml.h
@@ -0,0 +1,66 @@
+#ifndef RSPAMD_CFG_XML_H
+#define RSPAMD_CFG_XML_H
+
+#include "config.h"
+#include "cfg_file.h"
+
+#define XML_START_MISSING 1
+#define XML_PARAM_MISSING 2
+#define XML_EXTRA_ELEMENT 3
+#define XML_UNMATCHED_TAG 4
+
+enum xml_read_state {
+ XML_READ_START,
+ XML_READ_PARAM,
+ XML_READ_MODULE,
+ XML_READ_CLASSIFIER,
+ XML_READ_STATFILE,
+ XML_READ_FACTORS,
+ XML_READ_METRIC,
+ XML_READ_WORKER,
+ XML_READ_VARIABLE,
+ XML_READ_PIDFILE,
+ XML_READ_STATFILE_POOL,
+ XML_READ_FILTERS,
+ XML_READ_LOGGING,
+ XML_ERROR,
+ XML_END
+};
+
+struct rspamd_xml_userdata {
+ enum xml_read_state state;
+ struct config_file *cfg;
+ gchar *section_name;
+ gpointer other_data;
+};
+
+/* Called for open tags <foo bar="baz"> */
+void rspamd_xml_start_element (GMarkupParseContext *context,
+ const gchar *element_name,
+ const gchar **attribute_names,
+ const gchar **attribute_values,
+ gpointer user_data,
+ GError **error);
+
+/* Called for close tags </foo> */
+void rspamd_xml_end_element (GMarkupParseContext *context,
+ const gchar *element_name,
+ gpointer user_data,
+ GError **error);
+
+/* text is not nul-terminated */
+void rspamd_xml_text (GMarkupParseContext *context,
+ const gchar *text,
+ gsize text_len,
+ gpointer user_data,
+ GError **error);
+
+/* Called on error, including one set by other
+* methods in the vtable. The GError should not be freed.
+*/
+void rspamd_xml_error (GMarkupParseContext *context,
+ GError *error,
+ gpointer user_data);
+
+
+#endif