Use binary flags for part flags.

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Wed, 29 Apr 2015 14:17:57 +0000 (15:17 +0100)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Wed, 29 Apr 2015 14:17:57 +0000 (15:17 +0100)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 29 Apr 2015 14:17:57 +0000 (15:17 +0100)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 29 Apr 2015 14:17:57 +0000 (15:17 +0100)
diff --git a/src/libmime/message.c b/src/libmime/message.c

index 95a9bea95df323e839f48bae281a5090d933bdf5..d29aeb2cd477eb28a61d0e45415d9fbd4f0ae9a1 100644 (file)
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -38,6 +38,9 @@
  #define RECURSION_LIMIT 30
  #define UTF8_CHARSET "UTF-8"
  
+#define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_PART_FLAG_UTF)
+#define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_PART_FLAG_UTF)
+
  GByteArray *
  strip_html_tags (struct rspamd_task *task,
         rspamd_mempool_t * pool,
@@ -280,7 +283,10 @@ reg_char:
  
         /* Check tag balancing */
         if (level_ptr && level_ptr->data != NULL) {
-               part->is_balanced = FALSE;
+               part->flags &= ~RSPAMD_MIME_PART_FLAG_BALANCED;
+       }
+       else {
+               part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED;
         }
  
         if (stateptr) {
@@ -957,6 +963,7 @@ rspamd_text_to_utf8 (struct rspamd_task *task,
         return res;
  }
  
+
  static GByteArray *
  convert_text_to_utf (struct rspamd_task *task,
         GByteArray * part_content,
@@ -970,35 +977,34 @@ convert_text_to_utf (struct rspamd_task *task,
         GByteArray *result_array;
  
         if (task->cfg->raw_mode) {
-               text_part->is_raw = TRUE;
+               SET_PART_RAW (text_part);
                 return part_content;
         }
  
         if ((charset =
                 g_mime_content_type_get_parameter (type, "charset")) == NULL) {
-               text_part->is_raw = TRUE;
+               SET_PART_RAW (text_part);
                 return part_content;
         }
         if (!charset_validate (task->task_pool, charset, &ocharset)) {
                 msg_info (
                         "<%s>: has invalid charset",
                         task->message_id);
-               text_part->is_raw = TRUE;
+               SET_PART_RAW (text_part);
                 return part_content;
         }
  
         if (g_ascii_strcasecmp (ocharset,
                 "utf-8") == 0 || g_ascii_strcasecmp (ocharset, "utf8") == 0) {
                 if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
-                       text_part->is_raw = FALSE;
-                       text_part->is_utf = TRUE;
+                       SET_PART_UTF (text_part);
                         return part_content;
                 }
                 else {
                         msg_info (
                                 "<%s>: contains invalid utf8 characters, assume it as raw",
                                 task->message_id);
-                       text_part->is_raw = TRUE;
+                       SET_PART_RAW (text_part);
                         return part_content;
                 }
         }
@@ -1013,8 +1019,7 @@ convert_text_to_utf (struct rspamd_task *task,
                                         task->message_id,
                                         ocharset,
                                         err ? err->message : "unknown problem");
-                       text_part->is_raw = TRUE;
-                       text_part->is_utf = FALSE;
+                       SET_PART_RAW (text_part);
                         g_error_free (err);
                         return part_content;
                 }
@@ -1023,8 +1028,7 @@ convert_text_to_utf (struct rspamd_task *task,
         result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
         result_array->data = res_str;
         result_array->len = write_bytes;
-       text_part->is_raw = FALSE;
-       text_part->is_utf = TRUE;
+       SET_PART_UTF (text_part);
  
         return result_array;
  }
@@ -1128,7 +1132,7 @@ detect_text_language (struct mime_text_part *part)
         const int max_chars = 32;
  
         if (part != NULL) {
-               if (part->is_utf) {
+               if (IS_PART_UTF (part)) {
                         /* Try to detect encoding by several symbols */
                         const gchar *p, *pp;
                         gunichar c;
@@ -1183,7 +1187,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
         guint i, nlen;
         GArray *tmp;
  
-       if (part->language && part->language[0] != '\0' && part->is_utf) {
+       if (part->language && part->language[0] != '\0' && IS_PART_UTF (part)) {
                 stem = sb_stemmer_new (part->language, "UTF_8");
                 if (stem == NULL) {
                         msg_info ("<%s> cannot create lemmatizer for %s language",
@@ -1193,7 +1197,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
  
         /* Ugly workaround */
         tmp = rspamd_tokenize_text (part->content->data,
-                       part->content->len, part->is_utf, task->cfg->min_word_len,
+                       part->content->len, IS_PART_UTF (part), task->cfg->min_word_len,
                         part->urls_offset, FALSE);
  
         if (tmp) {
@@ -1210,7 +1214,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
                                 w->len = nlen;
                         }
                         else {
-                               if (part->is_utf) {
+                               if (IS_PART_UTF (part)) {
                                         rspamd_str_lc_utf8 (w->begin, w->len);
                                 }
                                 else {
@@ -1263,9 +1267,9 @@ process_text_part (struct rspamd_task *task,
                 text_part =
                         rspamd_mempool_alloc0 (task->task_pool,
                                 sizeof (struct mime_text_part));
-               text_part->is_html = TRUE;
+               text_part->flags |= RSPAMD_MIME_PART_FLAG_HTML;
                 if (is_empty) {
-                       text_part->is_empty = TRUE;
+                       text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
                         text_part->orig = NULL;
                         text_part->content = NULL;
                         task->text_parts = g_list_prepend (task->text_parts, text_part);
@@ -1276,10 +1280,10 @@ process_text_part (struct rspamd_task *task,
                                 text_part->orig,
                                 type,
                                 text_part);
-               text_part->is_balanced = TRUE;
                 text_part->html_nodes = NULL;
                 text_part->parent = parent;
  
+               text_part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED;
                 text_part->content = strip_html_tags (task,
                                 task->task_pool,
                                 text_part,
@@ -1303,10 +1307,9 @@ process_text_part (struct rspamd_task *task,
                 text_part =
                         rspamd_mempool_alloc0 (task->task_pool,
                                 sizeof (struct mime_text_part));
-               text_part->is_html = FALSE;
                 text_part->parent = parent;
                 if (is_empty) {
-                       text_part->is_empty = TRUE;
+                       text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
                         text_part->orig = NULL;
                         text_part->content = NULL;
                         task->text_parts = g_list_prepend (task->text_parts, text_part);
@@ -1328,7 +1331,7 @@ process_text_part (struct rspamd_task *task,
         /* Post process part */
         detect_text_language (text_part);
         text_part->words = rspamd_tokenize_text (text_part->content->data,
-                       text_part->content->len, text_part->is_utf, task->cfg->min_word_len,
+                       text_part->content->len, IS_PART_UTF (text_part), task->cfg->min_word_len,
                         text_part->urls_offset, TRUE);
         rspamd_normalize_text_part (task, text_part);
  }
diff --git a/src/libmime/message.h b/src/libmime/message.h

index ef881ebd10cead911a91c587b81f17b5696b2548..0f3a0ccb47202ce66d58d7db39d4f48b7f3f7e89 100644 (file)
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -21,12 +21,18 @@ struct mime_part {
         const gchar *filename;
  };
  
+#define RSPAMD_MIME_PART_FLAG_UTF (1 << 0)
+#define RSPAMD_MIME_PART_FLAG_BALANCED (1 << 1)
+#define RSPAMD_MIME_PART_FLAG_EMPTY (1 << 2)
+#define RSPAMD_MIME_PART_FLAG_HTML (1 << 3)
+
+#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_EMPTY)
+#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_UTF)
+#define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_PART_FLAG_UTF))
+#define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_HTML)
+
  struct mime_text_part {
-       gboolean is_html;
-       gboolean is_raw;
-       gboolean is_balanced;
-       gboolean is_empty;
-       gboolean is_utf;
+       guint flags;
         GUnicodeScript script;
         const gchar *lang_code;
         const gchar *language;
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c

index b4271f1dc84a926c981f39981ee060b06deb3a18..aaeb19619925b1574673400a232533607d3809b0 100644 (file)
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -867,13 +867,13 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
                 while (cur) {
                         part = (struct mime_text_part *)cur->data;
                         /* Skip empty parts */
-                       if (part->is_empty) {
+                       if (IS_PART_EMPTY (part)) {
                                 cur = g_list_next (cur);
                                 continue;
                         }
  
                         /* Check raw flags */
-                       if (part->is_raw) {
+                       if (!IS_PART_UTF (part)) {
                                 raw = TRUE;
                         }
                         /* Select data for regexp */
@@ -1248,7 +1248,7 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
                                 NULL);
                         return FALSE;
                 }
-               if (!p1->is_empty && !p2->is_empty) {
+               if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) {
                         if (p1->diff_str != NULL && p2->diff_str != NULL) {
                                 diff = rspamd_diff_distance_normalized (p1->diff_str,
                                                 p2->diff_str);
@@ -1278,8 +1278,8 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
                                 }
                         }
                 }
-               else if ((p1->is_empty &&
-                       !p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+               else if ((IS_PART_EMPTY (p1) &&
+                       !IS_PART_EMPTY (p2)) || (!IS_PART_EMPTY (p1)&& IS_PART_EMPTY (p2))) {
                         /* Empty and non empty parts are different */
                         *pdiff = 0;
                         rspamd_mempool_set_variable (task->task_pool,
@@ -1430,7 +1430,7 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
         cur = g_list_first (task->text_parts);
         while (cur) {
                 p = cur->data;
-               if (p->is_html) {
+               if (IS_PART_HTML (p)) {
                         res = TRUE;
                 }
                 else {
@@ -1601,8 +1601,8 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
         cur = g_list_first (task->text_parts);
         while (cur) {
                 p = cur->data;
-               if (!p->is_empty && p->is_html) {
-                       if (p->is_balanced) {
+               if (!IS_PART_EMPTY (p) && IS_PART_HTML (p)) {
+                       if (p->flags & RSPAMD_MIME_PART_FLAG_BALANCED) {
                                 res = TRUE;
                         }
                         else {
@@ -1673,7 +1673,7 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
  
         while (cur && res == FALSE) {
                 p = cur->data;
-               if (!p->is_empty && p->is_html && p->html_nodes) {
+               if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes) {
                         g_node_traverse (p->html_nodes,
                                 G_PRE_ORDER,
                                 G_TRAVERSE_ALL,
@@ -1699,7 +1699,7 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
  
         while (cur && res == FALSE) {
                 p = cur->data;
-               if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+               if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes == NULL) {
                         res = TRUE;
                 }
                 cur = g_list_next (cur);
diff --git a/src/libserver/html.c b/src/libserver/html.c

index f978ff1c7fc7153d2588fb388b1d9b431ec2416c..2470310b4089b4549808973538edc3efcd308f73 100644 (file)
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -941,7 +941,7 @@ add_html_node (struct rspamd_task *task,
                         if (!check_balance (new, cur_level)) {
                                 debug_task (
                                         "mark part as unbalanced as it has not pairable closing tags");
-                               part->is_balanced = FALSE;
+                               part->flags &= ~RSPAMD_MIME_PART_FLAG_BALANCED;
                         }
                 }
                 else if ((data->flags & (FL_XML|FL_SGML)) == 0) {
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c

index 8bea1baea3f47baac94bac8dd4a0acf9a6ec2c86..e4e4c692bdf7b473aadf6fcd736821f959dedf4d 100644 (file)
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -296,14 +296,14 @@ rspamd_stat_process_tokenize (struct rspamd_tokenizer_config *cf,
         while (cur != NULL) {
                 part = (struct mime_text_part *)cur->data;
  
-               if (!part->is_empty && part->words != NULL) {
+               if (!IS_PART_EMPTY (part) && part->words != NULL) {
                         if (compat) {
                                 tok->tokenizer->tokenize_func (cf, task->task_pool,
-                                       part->words, tok->tokens, part->is_utf);
+                                       part->words, tok->tokens, IS_PART_UTF (part));
                         }
                         else {
                                 tok->tokenizer->tokenize_func (cf, task->task_pool,
-                                       part->normalized_words, tok->tokens, part->is_utf);
+                                       part->normalized_words, tok->tokens, IS_PART_UTF (part));
                         }
                 }
  
diff --git a/src/libutil/fuzzy.c b/src/libutil/fuzzy.c

index 83cb9cd29d4789bb220863b7459539d37dc4abd3..a15be12e2d064dec556674bc1d89c0d48cde6400 100644 (file)
--- a/src/libutil/fuzzy.c
+++ b/src/libutil/fuzzy.c
@@ -340,7 +340,7 @@ rspamd_fuzzy_from_text_part (struct mime_text_part *part,
         bzero (&rs, sizeof (rs));
         end = c + len;
  
-       if (part->is_utf) {
+       if (IS_PART_UTF (part)) {
                 while (c < end) {
                         if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) {
                                 c += cur_ex->len + 1;
@@ -400,7 +400,7 @@ rspamd_fuzzy_from_text_part (struct mime_text_part *part,
         begin = (gchar *)part->content->data;
         c = begin;
         end = c + len;
-       if (part->is_utf) {
+       if (IS_PART_UTF (part)) {
  
                 while (c < end) {
                         if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) {
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c

index a19b7a8f4d599fb5bcd7cc02646e212a3320cf9a..e83ba1770537b1246880e972b3e5880877aee373 100644 (file)
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -238,12 +238,12 @@ lua_textpart_is_utf (lua_State * L)
  {
         struct mime_text_part *part = lua_check_textpart (L);
  
-       if (part == NULL || part->is_empty) {
+       if (part == NULL || IS_PART_EMPTY (part)) {
                 lua_pushboolean (L, FALSE);
                 return 1;
         }
  
-       lua_pushboolean (L, part->is_utf);
+       lua_pushboolean (L, IS_PART_UTF (part));
  
         return 1;
  }
@@ -255,7 +255,7 @@ lua_textpart_get_content (lua_State * L)
         struct mime_text_part *part = lua_check_textpart (L);
         struct rspamd_lua_text *t;
  
-       if (part == NULL || part->is_empty) {
+       if (part == NULL || IS_PART_EMPTY (part)) {
                 lua_pushnil (L);
                 return 1;
         }
@@ -278,7 +278,7 @@ lua_textpart_get_length (lua_State * L)
                 return 1;
         }
  
-       if (part->is_empty) {
+       if (IS_PART_EMPTY (part)) {
                 lua_pushnumber (L, 0);
         }
         else {
@@ -298,7 +298,7 @@ lua_textpart_is_empty (lua_State * L)
                 return 1;
         }
  
-       lua_pushboolean (L, part->is_empty);
+       lua_pushboolean (L, IS_PART_EMPTY (part));
  
         return 1;
  }
@@ -313,7 +313,7 @@ lua_textpart_is_html (lua_State * L)
                 return 1;
         }
  
-       lua_pushboolean (L, part->is_html);
+       lua_pushboolean (L, IS_PART_HTML (part));
  
         return 1;
  }
@@ -324,7 +324,7 @@ lua_textpart_get_fuzzy (lua_State * L)
         struct mime_text_part *part = lua_check_textpart (L);
         gchar *out;
  
-       if (part == NULL || part->is_empty) {
+       if (part == NULL || IS_PART_EMPTY (part)) {
                 lua_pushnil (L);
                 return 1;
         }
@@ -380,7 +380,7 @@ lua_textpart_compare_distance (lua_State * L)
  
                 }
                 else {
-                       if (!part->is_empty && !other->is_empty) {
+                       if (!IS_PART_EMPTY (part) && !IS_PART_EMPTY (other)) {
                                 if (part->diff_str != NULL && other->diff_str != NULL) {
                                         diff = rspamd_diff_distance (part->diff_str,
                                                         other->diff_str);
@@ -389,8 +389,9 @@ lua_textpart_compare_distance (lua_State * L)
                                         diff = rspamd_fuzzy_compare_parts (part, other);
                                 }
                         }
-                       else if ((part->is_empty &&
-                               !other->is_empty) || (!part->is_empty && other->is_empty)) {
+                       else if ((IS_PART_EMPTY (part) &&
+                               !IS_PART_EMPTY (other)) || (!IS_PART_EMPTY (part) &&
+                                               IS_PART_EMPTY (other))) {
                                 /* Empty and non empty parts are different */
                                 diff = 0;
                         }
diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c

index 63b7167566574f09a72c78fd56adcdede8a76294..f1b9088db322492e52eeb8df82e9687d15c144d2 100644 (file)
--- a/src/lua/lua_trie.c
+++ b/src/lua/lua_trie.c
@@ -272,7 +272,7 @@ lua_trie_search_mime (lua_State *L)
                 while (cur) {
                         part = cur->data;
  
-                       if (!part->is_empty && part->content != NULL) {
+                       if (!IS_PART_EMPTY (part) && part->content != NULL) {
                                 text = part->content->data;
                                 len = part->content->len;
  
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c

index f8ad15be9b9ca6f217994d966fd932a691ef158f..3efec040b59ec3cc72d3d330eef82d97b071b8c5 100644 (file)
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -133,7 +133,7 @@ check_part (struct mime_text_part *part, gboolean raw_mode)
  
         p = part->content->data;
  
-       if (part->is_raw || raw_mode) {
+       if (IS_PART_UTF (part) || raw_mode) {
                 while (remain > 1) {
                         if ((g_ascii_isalpha (*p) &&
                                 (*(p + 1) & 0x80)) ||
@@ -213,7 +213,7 @@ chartable_symbol_callback (struct rspamd_task *task, void *unused)
         cur = g_list_first (task->text_parts);
         while (cur) {
                 part = cur->data;
-               if (!part->is_empty && check_part (part, task->cfg->raw_mode)) {
+               if (!IS_PART_EMPTY (part) && check_part (part, task->cfg->raw_mode)) {
                         rspamd_task_insert_result (task, chartable_module_ctx->symbol, 1, NULL);
                 }
                 cur = g_list_next (cur);
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c

index b5410e6cf5f13ef2f771102fc3c60c03131eeec7..af0aab420a0cdba2851f447fd6c6fc4a8fa06781 100644 (file)
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -524,7 +524,7 @@ fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool)
  {
         GArray *res;
  
-       if (!part->is_utf || !part->language || part->language[0] == '\0' ||
+       if (!IS_PART_UTF (part) || !part->language || part->language[0] == '\0' ||
                         part->normalized_words == NULL) {
                 res = part->words;
         }
@@ -961,7 +961,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
  
         while (cur) {
                 part = cur->data;
-               if (part->is_empty) {
+               if (IS_PART_EMPTY (part)) {
                         cur = g_list_next (cur);
                         continue;
                 }
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Wed, 29 Apr 2015 14:17:57 +0000 (15:17 +0100)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Wed, 29 Apr 2015 14:17:57 +0000 (15:17 +0100)
src/libmime/message.c		patch \| blob \| history
src/libmime/message.h		patch \| blob \| history
src/libmime/mime_expressions.c		patch \| blob \| history
src/libserver/html.c		patch \| blob \| history
src/libstat/stat_process.c		patch \| blob \| history
src/libutil/fuzzy.c		patch \| blob \| history
src/lua/lua_mimepart.c		patch \| blob \| history
src/lua/lua_trie.c		patch \| blob \| history
src/plugins/chartable.c		patch \| blob \| history
src/plugins/fuzzy_check.c		patch \| blob \| history