]> source.dussan.org Git - rspamd.git/commitdiff
* Implement expression parser (convertor to inverse polish record)
authorcebka@cebka-laptop <cebka@cebka-laptop>
Mon, 13 Oct 2008 15:06:07 +0000 (19:06 +0400)
committercebka@cebka-laptop <cebka@cebka-laptop>
Mon, 13 Oct 2008 15:06:07 +0000 (19:06 +0400)
* Improve configure system by implementing dependencies
* Add .depend files for subdirs that requre it
* Write simple test for expressions parser

configure
main.h
test/.depends [new file with mode: 0644]
test/Makefile.in
test/rspamd_expression_test.c [new file with mode: 0644]
test/rspamd_memcached_test.c
test/rspamd_test_suite.c
test/tests.h
util.c
utils/.depends [new file with mode: 0644]
utils/Makefile.in

index 20a413419fd62e08d9a13692157b1a7e0c2a341d..73eab3b06cd8b5f72adf47f92794c816f5952eaf 100755 (executable)
--- a/configure
+++ b/configure
@@ -45,6 +45,9 @@ PERL_MAKEFILE="perl/Makefile.PL.in"
 PERLCFLAGS=""
 PERLLDFLAGS=""
 
+DEPENDS_FILE=".depends"
+OPTS=""
+
 TARGETS="${EXEC}"
 
 prepare_config()
@@ -69,6 +72,7 @@ cleanup()
 have_opt()
 {
        echo "#define HAVE_$1" >> $CONFIG
+       OPTS="$OPTS HAVE_$1"
 }
 
 check_compiler()
@@ -404,16 +408,49 @@ check_group()
        fi
 }
 
-write_result()
+check_depends()
+{
+       ifdep=""
+       depends=""
+       if [ ! -f $DEPENDS_FILE ] ; then
+               return 0
+       fi
+
+       while read line ; do
+               echo $line | grep '#if' > /dev/null
+               if [ $? -eq 0 ] ; then
+                       ifdep=`echo $line | sed -e 's/^#if \([A-Za-z0-9_]*\)$/\1/'`
+               elif [ "F$ifdep" != "F" ] ; then
+                       echo $line | grep "#endif" > /dev/null
+                       if [ $? -eq 0 ] ; then
+                               ifdep=""
+                       else
+                               echo $OPTS | grep $ifdep > /dev/null
+                               if [ $? -eq 0 ] ; then
+                                       depends="$depends $line"
+                               fi
+                       fi
+               else
+                       depends="$depends $line"
+               fi
+       done < $DEPENDS_FILE
+       
+       obj_depends=`echo $depends | sed -e 's/\([^ ]\{1,\}\)\/\([^/]\{1,\}\).c/\2.o/g'`
+       echo "DEPENDS=$depends" >> $MAKEFILE
+       echo "OBJ_DEPENDS=$obj_depends" >> $MAKEFILE
+       for _dep in $depends ; do
+               _obj_dep=`echo $_dep | sed -e 's/\([^ ]\{1,\}\)\/\([^/]\{1,\}\).c/\2.o/g'`
+               cat >> $MAKEFILE.dep << END
+${_obj_dep}: ${_dep}
+       \$(CC) \$(OPT_FLAGS) \$(CFLAGS) \$(PTHREAD_CFLAGS) -o ${_obj_dep} -c ${_dep}
+
+END
+
+       done
+}
+
+write_modules()
 {
-       echo "Compiler: $GCC" >> config.log 
-       echo "Make: $MAKE" >> config.log 
-       echo "Sources: $SOURCES" >> config.log
-       echo "Cflags: $CFLAGS" >> config.log
-       echo "Ldflags: $LDFLAGS" >> config.log
-       echo "Libs: $LIBS" >> config.log
-       echo "#define RVERSION \"${VERSION}\"" >> $CONFIG
-       echo "#define HASH_COMPAT" >> $CONFIG
        # Write modules init function
        echo "#ifndef MODULES_H" > modules.h
        echo "#include \"config.h\"" >> modules.h
@@ -430,6 +467,11 @@ write_result()
        echo "#define MODULES_NUM $modules_num" >> $CONFIG
        SOURCES="$SOURCES modules.c"
        OBJECTS=`echo $SOURCES | sed -e 's/\.c/\.o/g'`
+
+}
+
+write_perl()
+{
        # Write to perl Makefile
        sed -e "s|%%libs%%|${LDFLAGS} ${LIBS}|" < $PERL_MAKEFILE > .pl_tmp
        sed -e "s|%%include%%|${CFLAGS}|" < .pl_tmp > .pl2_tmp
@@ -440,6 +482,74 @@ write_result()
        cd `dirname ${PERL_MAKEFILE}`
        $PERL Makefile.PL
        cd $CURDIR
+}
+
+write_subdirs()
+{
+       # Write subdirs makefiles
+       clean_target="clean-subdirs: "
+       dist_clean_target="dist-clean-subdirs: "
+       for sub in $SUBDIRS ; do
+               cp $MAKEFILE $sub/$MAKEFILE
+               saved_pwd=`pwd`
+               old_objs=`echo $OBJECTS | sed -e 's/\([^. ]*\.o\)/..\/\1/g'`
+               old_srcs=`echo $SOURCES | sed -e 's/\([^. ]*\.c\)/..\/\1/g'`
+               cd $sub
+               sub_src="`echo *.c`"
+               sub_obj="`echo $sub_src | sed -e 's/\.c/\.o/g'`"
+               echo "SOURCES=$sub_src" >> $MAKEFILE
+               echo "OBJECTS=$sub_obj" >> $MAKEFILE
+               check_depends
+               cat Makefile.in >> $MAKEFILE
+               if [ -f $MAKEFILE.dep ] ; then
+                       cat $MAKEFILE.dep >> $MAKEFILE
+                       rm -f $MAKEFILE.dep
+               fi
+               for _sub_src in $sub_src ; do
+                       _sub_obj=`echo $_sub_src | sed -e 's/\.c/\.o/g'`
+                       cat >> $MAKEFILE << END
+${_sub_obj}: ${_sub_src}
+       \$(CC) \$(OPT_FLAGS) \$(CFLAGS) \$(PTHREAD_CFLAGS) -o ${_sub_obj} -c ${_sub_src}
+
+END
+               done
+               cd $saved_pwd
+               dist_clean_target="$dist_clean_target ${sub}-dist-clean"
+               clean_target="$clean_target ${sub}-clean"
+       done
+
+       # Write IN file
+       cat $MAKEFILE_IN >> $MAKEFILE
+
+       # Process clean targets for all subdirs
+       echo $dist_clean_target >> $MAKEFILE
+       echo $clean_target >> $MAKEFILE
+       for sub in $SUBDIRS ; do
+               cat >> $MAKEFILE << END
+${sub}-clean:
+       cd ${sub} && make clean && cd ..
+${sub}-dist-clean:
+       cd ${sub} && make dist-clean && cd ..
+${sub}:
+       cd ${sub} && make && cd ..
+
+END
+       done
+}
+
+write_result()
+{
+       echo "Compiler: $GCC" >> config.log 
+       echo "Make: $MAKE" >> config.log 
+       echo "Sources: $SOURCES" >> config.log
+       echo "Cflags: $CFLAGS" >> config.log
+       echo "Ldflags: $LDFLAGS" >> config.log
+       echo "Libs: $LIBS" >> config.log
+       echo "#define RVERSION \"${VERSION}\"" >> $CONFIG
+       echo "#define HASH_COMPAT" >> $CONFIG
+
+       write_modules
+       write_perl
        # Make CFLAGS more readable
        CFLAGS="$CFLAGS $PERLCFLAGS"
        LIBS="$LIBS $PERLLDFLAGS"
@@ -495,50 +605,8 @@ MANPATH=$MANPATH
 SUBDIRS=$SUBDIRS
 
 END
-       # Write subdirs makefiles
-       clean_target="clean-subdirs: "
-       dist_clean_target="dist-clean-subdirs: "
-       for sub in $SUBDIRS ; do
-               cp $MAKEFILE $sub/$MAKEFILE
-               saved_pwd=`pwd`
-               old_objs=`echo $OBJECTS | sed -e 's/\([^. ]*\.o\)/..\/\1/g'`
-               old_srcs=`echo $SOURCES | sed -e 's/\([^. ]*\.c\)/..\/\1/g'`
-               cd $sub
-               sub_src="`echo *.c`"
-               sub_obj="`echo $sub_src | sed -e 's/\.c/\.o/g'`"
-               echo "SOURCES=$sub_src" >> $MAKEFILE
-               echo "OBJECTS=$sub_obj" >> $MAKEFILE
-               cat Makefile.in >> $MAKEFILE
-               for _sub_src in $sub_src ; do
-                       _sub_obj=`echo $_sub_src | sed -e 's/\.c/\.o/g'`
-                       cat >> $MAKEFILE << END
-${_sub_obj}: ${_sub_src}
-       \$(CC) \$(OPT_FLAGS) \$(CFLAGS) \$(PTHREAD_CFLAGS) -o ${_sub_obj} -c ${_sub_src}
-
-END
-               done
-               cd $saved_pwd
-               dist_clean_target="$dist_clean_target ${sub}-dist-clean"
-               clean_target="$clean_target ${sub}-clean"
-       done
-
-       # Write IN file
-       cat $MAKEFILE_IN >> $MAKEFILE
-
-       # Process clean targets for all subdirs
-       echo $dist_clean_target >> $MAKEFILE
-       echo $clean_target >> $MAKEFILE
-       for sub in $SUBDIRS ; do
-               cat >> $MAKEFILE << END
-${sub}-clean:
-       cd ${sub} && make clean && cd ..
-${sub}-dist-clean:
-       cd ${sub} && make dist-clean && cd ..
-${sub}:
-       cd ${sub} && make && cd ..
 
-END
-       done
+       write_subdirs
 
        # Write build targets to makefile
        cat >> $MAKEFILE << END
diff --git a/main.h b/main.h
index adefa576fc2b901281a67cd81b05e6bd1d9455fd..f111ba1b4ddbd28ba98686649b7a08b6256acc43 100644 (file)
--- a/main.h
+++ b/main.h
@@ -56,6 +56,16 @@ enum script_type {
        SCRIPT_MESSAGE,
 };
 
+/* Logic expression */
+struct expression {
+       enum { EXPR_OPERAND, EXPR_OPERATION } type;
+       union {
+               void *operand;
+               char operation;
+       } content;
+       struct expression *next;
+};
+
 /* Worker process structure */
 struct rspamd_worker {
        pid_t pid;
@@ -153,6 +163,7 @@ struct c_module {
 };
 
 void start_worker (struct rspamd_worker *worker, int listen_sock);
+struct expression* parse_expression (memory_pool_t *pool, char *line);
 
 #endif
 
diff --git a/test/.depends b/test/.depends
new file mode 100644 (file)
index 0000000..52848a0
--- /dev/null
@@ -0,0 +1,7 @@
+#if HAVE_STRLCPY_H
+../strlcpy.c
+#endif
+../mem_pool.c
+../url.c
+../util.c
+../memcached.c
index 14a2646b9de422bd2f218db18b1c245a5b589a9b..08f041e4eb101fed89d96a99e9b50c34aa4b0277 100644 (file)
@@ -2,8 +2,8 @@
 
 all: rspamd_test_suite
 
-rspamd_test_suite: $(OBJECTS) ../url.o ../util.o ../memcached.o
-       $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) ../url.o ../util.o ../memcached.o ../mem_pool.o $(LIBS) -o rspamd_test_suite
+rspamd_test_suite: $(OBJECTS) $(OBJ_DEPENDS)
+       $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) $(OBJ_DEPENDS) $(LIBS) -o rspamd_test_suite
 
 run_test: rspamd_test_suite
        gtester --verbose -k -o=rspamd_test.xml ./rspamd_test_suite
diff --git a/test/rspamd_expression_test.c b/test/rspamd_expression_test.c
new file mode 100644 (file)
index 0000000..5d8e2a6
--- /dev/null
@@ -0,0 +1,57 @@
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "../config.h"
+#include "../main.h"
+#include "../cfg_file.h"
+#include "tests.h"
+
+/* Vector of test expressions */
+char *test_expressions[] = {
+       "(A&B|!C)&!(D|E)",
+       "/test&!/&!/\\/|/",
+       NULL
+}; 
+
+void 
+rspamd_expression_test_func ()
+{
+       memory_pool_t *pool;
+       struct expression *cur;
+       char **line, *outstr;
+       int r, s;
+
+       pool = memory_pool_new (1024);
+       
+       line = test_expressions;
+       while (*line) {
+               r = 0;
+               cur = parse_expression (pool, *line);
+               s = strlen (*line) + 1;
+               outstr = memory_pool_alloc (pool, s);
+               while (cur) {
+                       if (cur->type == EXPR_OPERAND) {
+                               r += snprintf (outstr + r, s - r, "%s", (char *)cur->content.operand);
+                       }
+                       else {
+                               r += snprintf (outstr + r, s - r, "%c", cur->content.operation);
+                       }
+                       cur = cur->next;
+               }
+               msg_debug ("Parsed expression: '%s' -> '%s'", *line, outstr);
+               line ++;
+       }
+
+       memory_pool_delete (pool);
+}
index 1993c756e290eb9d4f7a75952b33190fe574f342..60ae1d3eb2219469c4625f744f9613745a2884ee 100644 (file)
@@ -33,7 +33,7 @@ memcached_callback (memcached_ctx_t *ctx, memc_error_t error, void *data)
                        break;
                case CMD_READ:
                        g_assert (error == OK);
-                       g_assert (!strcmp(ctx->param->buf, buf));
+                       g_assert (!memcmp(ctx->param->buf, buf, ctx->param->bufsize));
                        msg_debug ("Read ok");
                        memc_close_ctx (ctx);
                        tv.tv_sec = 0;
index 90e64b1129ae39635fc39daeb5dd6e2068188c29..dba448a2c50594dd5526f9f4838a87354cb9e088 100644 (file)
 #include "../cfg_file.h"
 #include "tests.h"
 
-#ifdef HAVE_STRLCPY_H
-#include "../strlcpy.c"
-#endif
-
 int
 main (int argc, char **argv)
 {
@@ -27,6 +23,7 @@ main (int argc, char **argv)
        g_test_add_func ("/rspamd/memcached", rspamd_memcached_test_func);
        g_test_add_func ("/rspamd/mem_pool", rspamd_mem_pool_test_func);
        g_test_add_func ("/rspamd/url", rspamd_url_test_func);
+       g_test_add_func ("/rspamd/expression", rspamd_expression_test_func);
 
        g_test_run ();
 }
index 3eddf92fc063d84210bbbd63f809508de5ed4e98..7ff7636236b4764f4e66873bad5741010f60c03a 100644 (file)
@@ -14,4 +14,7 @@ void rspamd_memcached_test_func ();
 /* Memory pools */
 void rspamd_mem_pool_test_func ();
 
+/* Expressions */
+void rspamd_expression_test_func ();
+
 #endif
diff --git a/util.c b/util.c
index 78ca58e5a4c42dd8db4f1af199290cedd2467acd..0bd7f91c8edebd465af7b2fa09f30dc4b816283e 100644 (file)
--- a/util.c
+++ b/util.c
@@ -542,3 +542,199 @@ pidfile_remove(struct pidfh *pfh)
        return (_pidfile_remove(pfh, 1));
 }
 #endif
+
+/*
+ * Functions for parsing expressions
+ */
+
+struct expression_stack {
+       char op;
+       struct expression_stack *next;
+};
+
+/*
+ * Push operand or operator to stack  
+ */
+static struct expression_stack*
+push_expression_stack (memory_pool_t *pool, struct expression_stack *head, char op)
+{
+       struct expression_stack *new;
+       new = memory_pool_alloc (pool, sizeof (struct expression_stack));
+       new->op = op;
+       new->next = head;
+       return new;                               
+}
+
+/*
+ * Delete symbol from stack, return pointer to operand or operator (casted to void* )
+ */
+static char
+delete_expression_stack (struct expression_stack **head)
+{
+       struct expression_stack *cur;
+       char res;
+
+       if(*head == NULL) return 0;
+
+       cur = *head;
+       res = cur->op;
+       
+       *head = cur->next;
+       return res;
+}
+
+/*
+ * Return operation priority
+ */
+static int
+logic_priority (char a)
+{
+       switch (a) {
+               case '!':
+                       return 3;
+               case '|':
+               case '&':
+                       return 2;
+               case '(':
+                       return 1;
+               default:
+                       return 0;
+       }
+}
+
+/*
+ * Return 0 if symbol is not operation symbol (operand)
+ * Return 1 if symbol is operation symbol
+ */
+static int
+is_operation_symbol (char a)
+{
+       switch (a) {
+               case '!':
+               case '&':
+               case '|':
+               case '(':
+               case ')':
+                       return 1;
+               default:
+                       return 0;
+       }
+}
+
+static void
+insert_expression (memory_pool_t *pool, struct expression **head, int type, char op, void *operand)
+{
+       struct expression *new, *cur;
+       
+       new = memory_pool_alloc (pool, sizeof (struct expression));
+       new->type = type;
+       if (new->type == EXPR_OPERAND) {
+               new->content.operand = operand;
+       }
+       else {
+               new->content.operation = op;
+       }
+       new->next = NULL;
+
+       if (!*head) {
+               *head = new;
+       }
+       else {
+               cur = *head;
+               while (cur->next) {
+                       cur = cur->next;
+               }
+               cur->next = new;
+       }
+}
+
+/*
+ * Make inverse polish record for specified expression
+ * Memory is allocated from given pool
+ */
+struct expression* 
+parse_expression (memory_pool_t *pool, char *line)
+{
+       struct expression *expr = NULL;
+       struct expression_stack *stack = NULL;
+       char *p, *c, *str, op, in_regexp = 0;
+
+       if (line == NULL || pool == NULL) {
+               return NULL;
+       } 
+
+       p = line;
+       c = p;
+       while (*p) {
+               if (is_operation_symbol (*p) && !in_regexp) {
+                       if (c != p) {
+                               /* Copy operand */
+                               str = memory_pool_alloc (pool, p - c + 1);
+                               strlcpy (str, c, (p - c + 1));
+                               insert_expression (pool, &expr, EXPR_OPERAND, 0, str);
+                       }
+                       if (*p == ')') {
+                               if (stack == NULL) {
+                                       return NULL;
+                               }
+                               /* Pop all operators from stack to nearest '(' or to head */
+                               while (stack->op != '(') {
+                                       op = delete_expression_stack (&stack);
+                                       if (op != '(') {
+                                               insert_expression (pool, &expr, EXPR_OPERATION, op, NULL);
+                                       }
+                               }
+                       }
+                       else if (*p == '(') {
+                               /* Push it to stack */
+                               stack = push_expression_stack (pool, stack, *p);
+                       }
+                       else {
+                               if (stack == NULL) {
+                                       stack = push_expression_stack (pool, stack, *p);
+                               }
+                               /* Check priority of logic operation */
+                               else {
+                                       if (logic_priority (stack->op) < logic_priority (*p)) {
+                                               stack = push_expression_stack (pool, stack, *p);
+                                       }
+                                       else {
+                                               /* Pop all operations that have higher priority than this one */
+                                               while((stack != NULL) && (logic_priority (stack->op) >= logic_priority (*p))) {
+                                                       op = delete_expression_stack (&stack);
+                                                       if (op != '(') {
+                                                               insert_expression (pool, &expr, EXPR_OPERATION, op, NULL);
+                                                       }
+                                               }
+                                               stack = push_expression_stack (pool, stack, *p);
+                                       }
+                               }
+                       }
+                       c = p + 1;
+               }
+               if (*p == '/' && (p == line || *(p - 1) != '\\')) {
+                       in_regexp = !in_regexp;
+               }
+               p++;
+       }
+       /* Write last operand if it exists */
+       if (c != p) {
+               /* Copy operand */
+               str = memory_pool_alloc (pool, p - c + 1);
+               strlcpy (str, c, (p - c + 1));
+               insert_expression (pool, &expr, EXPR_OPERAND, 0, str);
+       }
+       /* Pop everything from stack */
+       while(stack != NULL) {
+               op = delete_expression_stack (&stack);
+               if (op != '(') {
+                       insert_expression (pool, &expr, EXPR_OPERATION, op, NULL);
+               }
+       }
+
+       return expr;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/utils/.depends b/utils/.depends
new file mode 100644 (file)
index 0000000..52848a0
--- /dev/null
@@ -0,0 +1,7 @@
+#if HAVE_STRLCPY_H
+../strlcpy.c
+#endif
+../mem_pool.c
+../url.c
+../util.c
+../memcached.c
index 4ceb18ae195ecdb41f80f455dd307f3170968b79..3b9aa8e074102745302526453825c108542407e3 100644 (file)
@@ -2,8 +2,8 @@
 
 all: url_extracter
 
-url_extracter: $(OBJECTS) ../url.o ../util.o
-       $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) ../url.o ../util.o ../mem_pool.o $(LIBS) -o url_extracter
+url_extracter: $(OBJECTS) $(OBJ_DEPENDS)
+       $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) $(OBJ_DEPENDS) $(LIBS) -o url_extracter
 
 clean:
        rm -f *.o url_extracter *.core