aboutsummaryrefslogtreecommitdiffstats
path: root/src/rcl/rcl_parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/rcl/rcl_parser.c')
-rw-r--r--src/rcl/rcl_parser.c298
1 files changed, 298 insertions, 0 deletions
diff --git a/src/rcl/rcl_parser.c b/src/rcl/rcl_parser.c
index 574edf8c0..8ace95cf0 100644
--- a/src/rcl/rcl_parser.c
+++ b/src/rcl/rcl_parser.c
@@ -29,3 +29,301 @@
* @file rcl_parser.c
* The implementation of rcl parser
*/
+
+/**
+ * Create a new object
+ * @return new object
+ */
+static inline rspamd_cl_object_t *
+rspamd_cl_object_new (void)
+{
+ return g_slice_alloc0 (sizeof (rspamd_cl_object_t));
+}
+
+/**
+ * Move up to len characters
+ * @param parser
+ * @param begin
+ * @param len
+ * @return new position in chunk
+ */
+static inline const guchar *
+rspamd_cl_chunk_getc (struct rspamd_cl_parser *parser, const guchar *begin, gsize len)
+{
+ while (len > 0) {
+ len --;
+ if (*begin == '\n') {
+ parser->line ++;
+ parser->column = 0;
+ }
+ else {
+ parser->column ++;
+ }
+ begin ++;
+ }
+ return begin;
+}
+
+static gboolean
+rspamd_cl_check_open_comment (struct rspamd_cl_parser *parser, const guchar **begin, gsize *len)
+{
+ const guchar *p = *begin;
+
+ if (*p == '#') {
+ if (parser->state != RSPAMD_RCL_STATE_SCOMMENT &&
+ parser->state != RSPAMD_RCL_STATE_MCOMMENT) {
+ parser->prev_state = parser->state;
+ parser->state = RSPAMD_RCL_STATE_SCOMMENT;
+ *begin = rspamd_cl_chunk_getc (parser, *begin, 1);
+ (*len) --;
+ return TRUE;
+ }
+ }
+ else if (*p == '/' && *len >= 2) {
+ if (*p == '/' && parser->state != RSPAMD_RCL_STATE_SCOMMENT &&
+ parser->state != RSPAMD_RCL_STATE_MCOMMENT) {
+ parser->prev_state = parser->state;
+ parser->state = RSPAMD_RCL_STATE_SCOMMENT;
+ *begin = rspamd_cl_chunk_getc (parser, *begin, 2);
+ (*len) -= 2;
+ return TRUE;
+ }
+ else if (*p == '*') {
+ /* Multiline comment */
+ if (parser->state == RSPAMD_RCL_STATE_SCOMMENT) {
+ /* Immediately finish single line comment and start multiline one */
+ parser->state = RSPAMD_RCL_STATE_MCOMMENT;
+ parser->comments_nested ++;
+ }
+ else if (parser->state == RSPAMD_RCL_STATE_MCOMMENT) {
+ parser->comments_nested ++;
+ }
+ else {
+ parser->prev_state = parser->state;
+ parser->state = RSPAMD_RCL_STATE_SCOMMENT;
+ }
+ *begin = rspamd_cl_chunk_getc (parser, *begin, 2);
+ (*len) -= 2;
+ }
+ }
+
+ return FALSE;
+}
+
+/**
+ * Handle include macro
+ * @param data include data
+ * @param len length of data
+ * @param ud user data
+ * @param err error ptr
+ * @return
+ */
+static gboolean
+rspamd_cl_include_handler (const guchar *data, gsize len, gpointer ud, GError **err)
+{
+ return TRUE;
+}
+
+/**
+ * Handle includes macro
+ * @param data include data
+ * @param len length of data
+ * @param ud user data
+ * @param err error ptr
+ * @return
+ */
+static gboolean
+rspamd_cl_includes_handler (const guchar *data, gsize len, gpointer ud, GError **err)
+{
+ return TRUE;
+}
+
+static const guchar *
+rspamd_cl_skip_spaces (struct rspamd_cl_parser *parser, const guchar *data, gsize *len)
+{
+ const guchar *p, *end;
+
+ p = data;
+ end = data + *len;
+
+ if (parser->state == RSPAMD_RCL_STATE_KEY) {
+ /* Skip any space character */
+ while (p < end) {
+ if (!g_ascii_isspace (*p)) {
+ break;
+ }
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ (*len) --;
+ }
+ }
+ else {
+ while (p < end) {
+ if (!g_ascii_isspace (*p) || *p == '\n' || *p == '\r') {
+ break;
+ }
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ (*len) --;
+ }
+ }
+
+ return p;
+}
+
+static gboolean
+rspamd_cl_parse_key (struct rspamd_cl_parser *parser, const guchar **data,
+ gsize *len, GError **err)
+{
+ const guchar *p, *c = NULL, *end;
+
+
+ p = *data;
+ end = p + *len;
+
+ while (p < end) {
+ /*
+ * A key must start with alpha and end with space character
+ */
+ if (*p == '.') {
+ /* It is macro actually */
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ len --;
+ parser->state = RSPAMD_RCL_STATE_MACRO_NAME;
+ *data = p;
+ return TRUE;
+ }
+ else if (c == NULL) {
+ if (g_ascii_isalpha (*p)) {
+ /* The first symbol */
+ c = p;
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ (*len) --;
+ }
+ else if (*p == '"') {
+ /* JSON style key */
+ c = p + 1;
+ p = rspamd_cl_chunk_getc (parser, p, 2);
+ (*len) -= 2;
+ }
+ else {
+ /* Invalid identifier */
+ parser->state = RSPAMD_RCL_STATE_ERROR;
+ g_set_error (err, RCL_ERROR, RSPAMD_CL_ESYNTAX, "key must start with a letter, "
+ "line %d, pos: %d", parser->line, parser->column);
+ return FALSE;
+ }
+ }
+ else {
+ if (g_ascii_isalnum (*p)) {
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ (*len) --;
+ }
+ else if (*p == ' ' || *p == '\t') {
+ p = rspamd_cl_skip_spaces (parser, p, len);
+ }
+ }
+ }
+ *data = p;
+
+ return TRUE;
+}
+
+/**
+ * Handle the main states of rcl parser
+ * @param parser parser structure
+ * @param data the pointer to the beginning of a chunk
+ * @param len the length of a chunk
+ * @param err if *err is NULL it is set to parser error
+ * @return TRUE if chunk has been parsed and FALSE in case of error
+ */
+static gboolean
+rspamd_cl_state_machine (struct rspamd_cl_parser *parser, const guchar *data,
+ gsize len, GError **err)
+{
+ const guchar *p, *end;
+ rspamd_cl_object_t *obj;
+
+ p = data;
+ end = p + len;
+ while (p < end) {
+ switch (parser->state) {
+ case RSPAMD_RCL_STATE_INIT:
+ /*
+ * At the init state we can either go to the parse array or object
+ * if we got [ or { correspondingly or can just treat new data as
+ * a key of newly created object
+ */
+ if (!rspamd_cl_check_open_comment (parser, &p, &len)) {
+ obj = rspamd_cl_object_new ();
+ if (*p == '[') {
+ parser->state = RSPAMD_RCL_STATE_ARRAY;
+ obj->type = RSPAMD_CL_ARRAY;
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ len --;
+ }
+ else {
+ parser->state = RSPAMD_RCL_STATE_KEY;
+ obj->type = RSPAMD_CL_OBJECT;
+ if (*p == '{') {
+ p = rspamd_cl_chunk_getc (parser, p, 1);
+ len --;
+ }
+ }
+ parser->cur_obj = obj;
+ parser->top_obj = obj;
+ p = rspamd_cl_skip_spaces (parser, p, &len);
+ }
+ break;
+ case RSPAMD_RCL_STATE_KEY:
+ if (!rspamd_cl_parse_key (parser, &p, &len, err)) {
+ return FALSE;
+ }
+ break;
+ default:
+ /* TODO: add all states */
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+struct rspamd_cl_parser*
+rspamd_cl_parser_new (void)
+{
+ struct rspamd_cl_parser *new;
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_cl_parser));
+
+ new->line = 1;
+ rspamd_cl_parser_register_macro (new, "include", rspamd_cl_include_handler, new);
+ rspamd_cl_parser_register_macro (new, "includes", rspamd_cl_includes_handler, new);
+
+ return new;
+}
+
+
+void
+rspamd_cl_parser_register_macro (struct rspamd_cl_parser *parser, const gchar *macro,
+ rspamd_cl_macro_handler handler, gpointer ud)
+{
+ struct rspamd_cl_macro *new;
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_cl_macro));
+ new->handler = handler;
+ new->name = g_strdup (macro);
+ new->ud = ud;
+ HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
+}
+
+gboolean
+rspamd_cl_parser_add_chunk (struct rspamd_cl_parser *parser, const guchar *data,
+ gsize len, GError **err)
+{
+ if (parser->state != RSPAMD_RCL_STATE_ERROR) {
+ return rspamd_cl_state_machine (parser, data, len, err);
+ }
+
+ g_set_error (err, RCL_ERROR, RSPAMD_CL_ESTATE, "a parser is in an invalid state");
+
+ return FALSE;
+}