Use binary flags for part flags.

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-04-29 15:17:57 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2015-04-29 15:17:57 +0100
commit: d08dd10cf250bb86ee0bfe6e4c5d9414cd98711f (patch)
tree: b0d3fc1c4962495a3b070a9161b475688e1e2b60 /src/libmime
parent: 8befc1e63c658c0d8eed73f382e0a3160ad18a4a (diff)
download: rspamd-d08dd10cf250bb86ee0bfe6e4c5d9414cd98711f.tar.gz
rspamd-d08dd10cf250bb86ee0bfe6e4c5d9414cd98711f.zip
3 files changed, 45 insertions, 36 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 95a9bea95..d29aeb2cd 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -38,6 +38,9 @@
 #define RECURSION_LIMIT 30
 #define UTF8_CHARSET "UTF-8"
 
+#define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_PART_FLAG_UTF)
+#define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_PART_FLAG_UTF)
+
 GByteArray *
 strip_html_tags (struct rspamd_task *task,
 	rspamd_mempool_t * pool,
@@ -280,7 +283,10 @@ reg_char:
 
 	/* Check tag balancing */
 	if (level_ptr && level_ptr->data != NULL) {
-		part->is_balanced = FALSE;
+		part->flags &= ~RSPAMD_MIME_PART_FLAG_BALANCED;
+	}
+	else {
+		part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED;
 	}
 
 	if (stateptr) {
@@ -957,6 +963,7 @@ rspamd_text_to_utf8 (struct rspamd_task *task,
 	return res;
 }
 
+
 static GByteArray *
 convert_text_to_utf (struct rspamd_task *task,
 	GByteArray * part_content,
@@ -970,35 +977,34 @@ convert_text_to_utf (struct rspamd_task *task,
 	GByteArray *result_array;
 
 	if (task->cfg->raw_mode) {
-		text_part->is_raw = TRUE;
+		SET_PART_RAW (text_part);
 		return part_content;
 	}
 
 	if ((charset =
 		g_mime_content_type_get_parameter (type, "charset")) == NULL) {
-		text_part->is_raw = TRUE;
+		SET_PART_RAW (text_part);
 		return part_content;
 	}
 	if (!charset_validate (task->task_pool, charset, &ocharset)) {
 		msg_info (
 			"<%s>: has invalid charset",
 			task->message_id);
-		text_part->is_raw = TRUE;
+		SET_PART_RAW (text_part);
 		return part_content;
 	}
 
 	if (g_ascii_strcasecmp (ocharset,
 		"utf-8") == 0 || g_ascii_strcasecmp (ocharset, "utf8") == 0) {
 		if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
-			text_part->is_raw = FALSE;
-			text_part->is_utf = TRUE;
+			SET_PART_UTF (text_part);
 			return part_content;
 		}
 		else {
 			msg_info (
 				"<%s>: contains invalid utf8 characters, assume it as raw",
 				task->message_id);
-			text_part->is_raw = TRUE;
+			SET_PART_RAW (text_part);
 			return part_content;
 		}
 	}
@@ -1013,8 +1019,7 @@ convert_text_to_utf (struct rspamd_task *task,
 					task->message_id,
 					ocharset,
 					err ? err->message : "unknown problem");
-			text_part->is_raw = TRUE;
-			text_part->is_utf = FALSE;
+			SET_PART_RAW (text_part);
 			g_error_free (err);
 			return part_content;
 		}
@@ -1023,8 +1028,7 @@ convert_text_to_utf (struct rspamd_task *task,
 	result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
 	result_array->data = res_str;
 	result_array->len = write_bytes;
-	text_part->is_raw = FALSE;
-	text_part->is_utf = TRUE;
+	SET_PART_UTF (text_part);
 
 	return result_array;
 }
@@ -1128,7 +1132,7 @@ detect_text_language (struct mime_text_part *part)
 	const int max_chars = 32;
 
 	if (part != NULL) {
-		if (part->is_utf) {
+		if (IS_PART_UTF (part)) {
 			/* Try to detect encoding by several symbols */
 			const gchar *p, *pp;
 			gunichar c;
@@ -1183,7 +1187,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
 	guint i, nlen;
 	GArray *tmp;
 
-	if (part->language && part->language[0] != '\0' && part->is_utf) {
+	if (part->language && part->language[0] != '\0' && IS_PART_UTF (part)) {
 		stem = sb_stemmer_new (part->language, "UTF_8");
 		if (stem == NULL) {
 			msg_info ("<%s> cannot create lemmatizer for %s language",
@@ -1193,7 +1197,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
 
 	/* Ugly workaround */
 	tmp = rspamd_tokenize_text (part->content->data,
-			part->content->len, part->is_utf, task->cfg->min_word_len,
+			part->content->len, IS_PART_UTF (part), task->cfg->min_word_len,
 			part->urls_offset, FALSE);
 
 	if (tmp) {
@@ -1210,7 +1214,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
 				w->len = nlen;
 			}
 			else {
-				if (part->is_utf) {
+				if (IS_PART_UTF (part)) {
 					rspamd_str_lc_utf8 (w->begin, w->len);
 				}
 				else {
@@ -1263,9 +1267,9 @@ process_text_part (struct rspamd_task *task,
 		text_part =
 			rspamd_mempool_alloc0 (task->task_pool,
 				sizeof (struct mime_text_part));
-		text_part->is_html = TRUE;
+		text_part->flags |= RSPAMD_MIME_PART_FLAG_HTML;
 		if (is_empty) {
-			text_part->is_empty = TRUE;
+			text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
 			text_part->orig = NULL;
 			text_part->content = NULL;
 			task->text_parts = g_list_prepend (task->text_parts, text_part);
@@ -1276,10 +1280,10 @@ process_text_part (struct rspamd_task *task,
 				text_part->orig,
 				type,
 				text_part);
-		text_part->is_balanced = TRUE;
 		text_part->html_nodes = NULL;
 		text_part->parent = parent;
 
+		text_part->flags |= RSPAMD_MIME_PART_FLAG_BALANCED;
 		text_part->content = strip_html_tags (task,
 				task->task_pool,
 				text_part,
@@ -1303,10 +1307,9 @@ process_text_part (struct rspamd_task *task,
 		text_part =
 			rspamd_mempool_alloc0 (task->task_pool,
 				sizeof (struct mime_text_part));
-		text_part->is_html = FALSE;
 		text_part->parent = parent;
 		if (is_empty) {
-			text_part->is_empty = TRUE;
+			text_part->flags |= RSPAMD_MIME_PART_FLAG_EMPTY;
 			text_part->orig = NULL;
 			text_part->content = NULL;
 			task->text_parts = g_list_prepend (task->text_parts, text_part);
@@ -1328,7 +1331,7 @@ process_text_part (struct rspamd_task *task,
 	/* Post process part */
 	detect_text_language (text_part);
 	text_part->words = rspamd_tokenize_text (text_part->content->data,
-			text_part->content->len, text_part->is_utf, task->cfg->min_word_len,
+			text_part->content->len, IS_PART_UTF (text_part), task->cfg->min_word_len,
 			text_part->urls_offset, TRUE);
 	rspamd_normalize_text_part (task, text_part);
 }
diff --git a/src/libmime/message.h b/src/libmime/message.h
index ef881ebd1..0f3a0ccb4 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -21,12 +21,18 @@ struct mime_part {
 	const gchar *filename;
 };
 
+#define RSPAMD_MIME_PART_FLAG_UTF (1 << 0)
+#define RSPAMD_MIME_PART_FLAG_BALANCED (1 << 1)
+#define RSPAMD_MIME_PART_FLAG_EMPTY (1 << 2)
+#define RSPAMD_MIME_PART_FLAG_HTML (1 << 3)
+
+#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_EMPTY)
+#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_UTF)
+#define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_PART_FLAG_UTF))
+#define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_PART_FLAG_HTML)
+
 struct mime_text_part {
-	gboolean is_html;
-	gboolean is_raw;
-	gboolean is_balanced;
-	gboolean is_empty;
-	gboolean is_utf;
+	guint flags;
 	GUnicodeScript script;
 	const gchar *lang_code;
 	const gchar *language;
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index b4271f1dc..aaeb19619 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -867,13 +867,13 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
 		while (cur) {
 			part = (struct mime_text_part *)cur->data;
 			/* Skip empty parts */
-			if (part->is_empty) {
+			if (IS_PART_EMPTY (part)) {
 				cur = g_list_next (cur);
 				continue;
 			}
 
 			/* Check raw flags */
-			if (part->is_raw) {
+			if (!IS_PART_UTF (part)) {
 				raw = TRUE;
 			}
 			/* Select data for regexp */
@@ -1248,7 +1248,7 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
 				NULL);
 			return FALSE;
 		}
-		if (!p1->is_empty && !p2->is_empty) {
+		if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2)) {
 			if (p1->diff_str != NULL && p2->diff_str != NULL) {
 				diff = rspamd_diff_distance_normalized (p1->diff_str,
 						p2->diff_str);
@@ -1278,8 +1278,8 @@ rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
 				}
 			}
 		}
-		else if ((p1->is_empty &&
-			!p2->is_empty) || (!p1->is_empty && p2->is_empty)) {
+		else if ((IS_PART_EMPTY (p1) &&
+			!IS_PART_EMPTY (p2)) || (!IS_PART_EMPTY (p1)&& IS_PART_EMPTY (p2))) {
 			/* Empty and non empty parts are different */
 			*pdiff = 0;
 			rspamd_mempool_set_variable (task->task_pool,
@@ -1430,7 +1430,7 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
 	cur = g_list_first (task->text_parts);
 	while (cur) {
 		p = cur->data;
-		if (p->is_html) {
+		if (IS_PART_HTML (p)) {
 			res = TRUE;
 		}
 		else {
@@ -1601,8 +1601,8 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
 	cur = g_list_first (task->text_parts);
 	while (cur) {
 		p = cur->data;
-		if (!p->is_empty && p->is_html) {
-			if (p->is_balanced) {
+		if (!IS_PART_EMPTY (p) && IS_PART_HTML (p)) {
+			if (p->flags & RSPAMD_MIME_PART_FLAG_BALANCED) {
 				res = TRUE;
 			}
 			else {
@@ -1673,7 +1673,7 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
 
 	while (cur && res == FALSE) {
 		p = cur->data;
-		if (!p->is_empty && p->is_html && p->html_nodes) {
+		if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes) {
 			g_node_traverse (p->html_nodes,
 				G_PRE_ORDER,
 				G_TRAVERSE_ALL,
@@ -1699,7 +1699,7 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
 
 	while (cur && res == FALSE) {
 		p = cur->data;
-		if (!p->is_empty && p->is_html && p->html_nodes == NULL) {
+		if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html_nodes == NULL) {
 			res = TRUE;
 		}
 		cur = g_list_next (cur);
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-04-29 15:17:57 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2015-04-29 15:17:57 +0100
commit	d08dd10cf250bb86ee0bfe6e4c5d9414cd98711f (patch)
tree	b0d3fc1c4962495a3b070a9161b475688e1e2b60 /src/libmime
parent	8befc1e63c658c0d8eed73f382e0a3160ad18a4a (diff)
download	rspamd-d08dd10cf250bb86ee0bfe6e4c5d9414cd98711f.tar.gz rspamd-d08dd10cf250bb86ee0bfe6e4c5d9414cd98711f.zip