* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <expression.h>
#include "config.h"
+#include "expression.h"
+#include "regexp.h"
enum rspamd_expression_op {
+ OP_INVALID = 0,
OP_PLUS, /* || or + */
OP_MULT, /* && or * */
OP_NOT, /* ! */
OP_LT, /* < */
OP_GT, /* > */
OP_LE, /* <= */
- OP_GE /* >= */
+ OP_GE, /* >= */
+ OP_OBRACE, /* ( */
+ OP_CBRACE /* ) */
};
struct rspamd_expression_elt {
struct rspamd_expression {
struct rspamd_atom_subr *subr;
GArray *expressions;
- GPtrArray *expression_stack;
+ GArray *expression_stack;
};
+static GQuark
+rspamd_expr_quark (void)
+{
+ return g_quark_from_static_string ("rspamd-expression");
+}
+
static void
rspamd_expr_stack_push (struct rspamd_expression *expr,
- struct rspamd_expression_elt *elt)
+ gpointer elt)
{
- g_ptr_array_add (expr->expression_stack, elt);
+ g_array_append_val (expr->expression_stack, elt);
}
-static struct rspamd_expression_elt *
+static gpointer
rspamd_expr_stack_pop (struct rspamd_expression *expr)
{
- struct rspamd_expression_elt *e;
+ gpointer e;
gint idx;
if (expr->expression_stack->len == 0) {
}
idx = expr->expression_stack->len - 1;
- e = g_ptr_array_index (expr->expression_stack, idx);
- g_ptr_array_remove_index_fast (expr->expression_stack, idx);
+ e = g_array_index (expr->expression_stack, gpointer, idx);
+ g_array_remove_index_fast (expr->expression_stack, idx);
return e;
}
+/*
+ * Return operation priority
+ */
+static gint
+rspamd_expr_logic_priority (enum rspamd_expression_op op)
+{
+ gint ret = 0;
+
+ switch (op) {
+ case OP_NOT:
+ ret = 5;
+ break;
+ case OP_MULT:
+ ret = 4;
+ break;
+ case OP_PLUS:
+ ret = 3;
+ break;
+ case OP_GE:
+ case OP_GT:
+ case OP_LE:
+ case OP_LT:
+ ret = 2;
+ break;
+ case OP_OBRACE:
+ case OP_CBRACE:
+ ret = 1;
+ break;
+ case OP_INVALID:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Return FALSE if symbol is not operation symbol (operand)
+ * Return TRUE if symbol is operation symbol
+ */
+static gboolean
+rspamd_expr_is_operation_symbol (gchar a)
+{
+ switch (a) {
+ case '!':
+ case '&':
+ case '|':
+ case '(':
+ case ')':
+ case '>':
+ case '<':
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/* Return character representation of operation */
+static enum rspamd_expression_op
+rspamd_expr_str_to_op (const gchar *a, const gchar *end, const gchar **next)
+{
+ enum rspamd_expression_op op = OP_INVALID;
+
+ g_assert (a < end);
+
+ switch (*a) {
+ case '!':
+ case '&':
+ case '|':
+ case '+':
+ case '*':
+ case '(':
+ case ')': {
+ if (a < end - 1) {
+ if ((a[0] == '&' && a[1] == '&') ||
+ (a[0] == '|' && a[1] == '|')) {
+ *next = a + 2;
+ }
+ else {
+ *next = a + 1;
+ }
+ }
+ /* XXX: not especially effective */
+ switch (*a) {
+ case '!':
+ op = OP_NOT;
+ break;
+ case '&':
+ case '*':
+ op = OP_MULT;
+ break;
+ case '|':
+ case '+':
+ op = OP_PLUS;
+ break;
+ case ')':
+ op = OP_CBRACE;
+ break;
+ case '(':
+ op = OP_OBRACE;
+ break;
+ default:
+ op = OP_INVALID;
+ break;
+ }
+ break;
+ }
+ case 'O':
+ case 'o':
+ if ((gulong)(end - a) >= sizeof ("or") &&
+ g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) {
+ *next = a + sizeof ("or") - 1;
+ op = OP_PLUS;
+ }
+ break;
+ case 'A':
+ case 'a':
+ if ((gulong)(end - a) >= sizeof ("and") &&
+ g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) {
+ *next = a + sizeof ("and") - 1;
+ op = OP_MULT;
+ }
+ break;
+ case 'N':
+ case 'n':
+ if ((gulong)(end - a) >= sizeof ("not") &&
+ g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) {
+ *next = a + sizeof ("not") - 1;
+ op = OP_NOT;
+ }
+ break;
+ case '>':
+ if (a < end - 1 && a[1] == '=') {
+ *next = a + 2;
+ op = OP_GE;
+ }
+ else {
+ *next = a + 1;
+ op = OP_GT;
+ }
+ break;
+ case '<':
+ if (a < end - 1 && a[1] == '=') {
+ *next = a + 2;
+ op = OP_LE;
+ }
+ else {
+ *next = a + 1;
+ op = OP_LT;
+ }
+ break;
+ default:
+ op = OP_INVALID;
+ break;
+ }
+
+ return op;
+}
+
gboolean
rspamd_parse_expression (const gchar *line, gsize len,
struct rspamd_atom_subr *subr, gpointer subr_data,
rspamd_mempool_t *pool, GError **err,
struct rspamd_expression **target)
{
+ struct rspamd_expression *e;
+ struct rspamd_expression_elt elt;
+ rspamd_expression_atom_t *atom;
+ rspamd_regexp_t *num_re;
+ enum rspamd_expression_op op, op_stack;
+ const gchar *p, *c, *end;
+
+ enum {
+ PARSE_ATOM = 0,
+ PARSE_OP,
+ PARSE_LIM,
+ SKIP_SPACES
+ } state;
+
g_assert (line != NULL);
g_assert (subr != NULL && subr->parse != NULL);
len = strlen (line);
}
+ num_re = rspamd_regexp_cache_create (NULL, "/^\\d+\\s*[><]/", NULL, NULL);
+
+ p = line;
+ c = line;
+ end = line + len;
+ e = g_slice_alloc (sizeof (*e));
+ e->expressions = g_array_new (FALSE, FALSE,
+ sizeof (struct rspamd_expression_elt));
+ e->expression_stack = g_array_sized_new (FALSE, FALSE, sizeof (gpointer), 32);
+
+ /* Shunting-yard algorithm */
+ while (p < end) {
+ switch (state) {
+ case PARSE_ATOM:
+ if (g_ascii_isspace (*p)) {
+ state = SKIP_SPACES;
+ }
+ else if (rspamd_expr_is_operation_symbol (*p)) {
+ state = PARSE_ATOM;
+ }
+ else {
+ /*
+ * First of all, we check some pre-conditions:
+ * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
+ * 2) if we have full numeric string, then we check for the following:
+ * ^\d+\s*[><]$
+ */
+ if ((gulong)(end - p) > sizeof ("and ") &&
+ (g_ascii_strncasecmp (p, "and ", sizeof ("and ") - 1) == 0 ||
+ g_ascii_strncasecmp (p, "not ", sizeof ("not ") - 1) == 0 )) {
+ state = PARSE_OP;
+ }
+ else if ((gulong)(end - p) > sizeof ("or ") &&
+ g_ascii_strncasecmp (p, "or ", sizeof ("or ") - 1) == 0) {
+ state = PARSE_OP;
+ }
+ else if (rspamd_regexp_search (num_re, p, end - p, NULL, NULL,
+ FALSE)) {
+ c = p;
+ state = PARSE_LIM;
+ }
+ else {
+ /* Try to parse atom */
+ atom = subr->parse (p, end - p, pool, subr_data, err);
+ if (atom == NULL) {
+ /* We couldn't parse the atom, so go out */
+ goto err;
+ }
+ g_assert (atom->len != 0);
+ p = p + atom->len;
+
+ /* Push to output */
+ elt.type = ELT_ATOM;
+ elt.p.atom = atom;
+ g_array_append_val (e->expressions, elt);
+ }
+ }
+ break;
+ case PARSE_LIM:
+ if (g_ascii_isdigit (*p)) {
+ p ++;
+ }
+ else {
+ if (p - c > 0) {
+ elt.type = ELT_LIMIT;
+ elt.p.lim.val = strtoul (c, NULL, 10);
+ g_array_append_val (e->expressions, elt);
+ c = p;
+ state = SKIP_SPACES;
+ }
+ else {
+ g_set_error (err, rspamd_expr_quark(), 400, "Empty number");
+ goto err;
+ }
+ }
+ break;
+ case PARSE_OP:
+ op = rspamd_expr_str_to_op (p, end, &p);
+ if (op == OP_INVALID) {
+ g_set_error (err, rspamd_expr_quark(), 500, "Bad operator %c",
+ *p);
+ goto err;
+ }
+ else if (op == OP_OBRACE) {
+ /*
+ * If the token is a left parenthesis, then push it onto
+ * the stack.
+ */
+ rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
+ }
+ else if (op == OP_CBRACE) {
+ /*
+ * Until the token at the top of the stack is a left
+ * parenthesis, pop operators off the stack onto the
+ * output queue.
+ *
+ * Pop the left parenthesis from the stack,
+ * but not onto the output queue.
+ *
+ * If the stack runs out without finding a left parenthesis,
+ * then there are mismatched parentheses.
+ */
+ do {
+ op = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
+
+ if (op == OP_INVALID) {
+ g_set_error (err, rspamd_expr_quark(), 600,
+ "Braces mismatch");
+ goto err;
+ }
+
+ if (op != OP_OBRACE) {
+ elt.type = ELT_OP;
+ elt.p.op = op;
+ g_array_append_val (e->expressions, elt);
+ }
+
+ } while (op != OP_OBRACE);
+ }
+ else {
+ /*
+ * While there is an operator token, o2, at the top of
+ * the operator stack, and either:
+ *
+ * - o1 is left-associative and its precedence is less than
+ * or equal to that of o2, or
+ * - o1 is right associative, and has precedence less than
+ * that of o2,
+ *
+ * then pop o2 off the operator stack, onto the output queue;
+ *
+ * push o1 onto the operator stack.
+ */
+
+ for (;;) {
+ op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e));
+
+ if (op_stack == OP_INVALID) {
+ /* Stack is empty */
+ break;
+ }
+
+ /* We ignore associativity for now */
+ if (op_stack != OP_OBRACE &&
+ rspamd_expr_logic_priority (op) <=
+ rspamd_expr_logic_priority(op_stack)) {
+ elt.type = ELT_OP;
+ elt.p.op = op_stack;
+ g_array_append_val (e->expressions, elt);
+ }
+ else {
+ /* Push op_stack back */
+ rspamd_expr_stack_push (e, GINT_TO_POINTER (op_stack));
+ break;
+ }
+ }
+
+ /* Push new operator itself */
+ rspamd_expr_stack_push (e, GINT_TO_POINTER (op));
+ }
+
+ break;
+ case SKIP_SPACES:
+ if (g_ascii_isspace (*p)) {
+ p ++;
+ }
+ else if (rspamd_expr_is_operation_symbol (*p)) {
+ state = PARSE_OP;
+ }
+ else {
+ state = PARSE_ATOM;
+ }
+ }
+ }
+
+ /* Now we process the stack and push operators to the output */
+ while ((op_stack = GPOINTER_TO_INT (rspamd_expr_stack_pop (e)))
+ != OP_INVALID) {
+ if (op_stack != OP_OBRACE) {
+ elt.type = ELT_OP;
+ elt.p.op = op_stack;
+ g_array_append_val (e->expressions, elt);
+ }
+ else {
+ g_set_error (err, rspamd_expr_quark(), 600,
+ "Braces mismatch");
+ goto err;
+ }
+ }
+
+ if (*target) {
+ *target = e;
+ }
+
+ return TRUE;
+
+err:
return FALSE;
}