summaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-28 14:30:13 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-28 14:30:13 +0100
commit388c08c712c8279a52ad82caa213191352639ca9 (patch)
tree9f12bba4be9b9772044928949c920175fa89d072 /src/libmime
parent61dbbe0eca5291b1329393a5fe1903fde415a243 (diff)
downloadrspamd-388c08c712c8279a52ad82caa213191352639ca9.tar.gz
rspamd-388c08c712c8279a52ad82caa213191352639ca9.zip
[Feature] Stop using of GLists for headers, improve performance
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/message.c303
-rw-r--r--src/libmime/message.h35
-rw-r--r--src/libmime/mime_expressions.c6
3 files changed, 71 insertions, 273 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 346105438..e4ad8b954 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -56,17 +56,17 @@ static void
append_raw_header (struct rspamd_task *task,
GHashTable *target, struct raw_header *rh)
{
- struct raw_header *lp;
+ GPtrArray *ar;
- rh->next = NULL;
- rh->prev = rh;
- if ((lp =
- g_hash_table_lookup (target, rh->name)) != NULL) {
- DL_APPEND (lp, rh);
+ if ((ar = g_hash_table_lookup (target, rh->name)) != NULL) {
+ g_ptr_array_add (ar, rh);
}
else {
- g_hash_table_insert (target, rh->name, rh);
+ ar = g_ptr_array_sized_new (2);
+ g_ptr_array_add (ar, rh);
+ g_hash_table_insert (target, rh->name, ar);
}
+
msg_debug_task ("add raw header %s: %s", rh->name, rh->value);
}
@@ -108,7 +108,6 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
new =
rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct raw_header));
- new->prev = new;
l = p - c;
tmp = rspamd_mempool_alloc (task->task_pool, l + 1);
rspamd_strlcpy (tmp, c, l + 1);
@@ -1173,8 +1172,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
sizeof (struct rspamd_mime_part));
hdrs = g_mime_object_get_headers (GMIME_OBJECT (part));
- mime_part->raw_headers = g_hash_table_new (rspamd_strcase_hash,
- rspamd_strcase_equal);
+ mime_part->raw_headers = g_hash_table_new_full (rspamd_strcase_hash,
+ rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
if (hdrs != NULL) {
process_raw_headers (task, mime_part->raw_headers,
@@ -1243,8 +1242,8 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data)
sizeof (struct rspamd_mime_part));
hdrs = g_mime_object_get_headers (GMIME_OBJECT (part));
- mime_part->raw_headers = g_hash_table_new (rspamd_strcase_hash,
- rspamd_strcase_equal);
+ mime_part->raw_headers = g_hash_table_new_full (rspamd_strcase_hash,
+ rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
if (hdrs != NULL) {
process_raw_headers (task, mime_part->raw_headers,
@@ -1402,7 +1401,7 @@ rspamd_message_parse (struct rspamd_task *task)
GMimeParser *parser;
GMimeStream *stream;
GByteArray *tmp;
- GList *first, *cur;
+ GPtrArray *hdrs;
GMimeObject *parent;
const GMimeContentType *ct;
struct raw_header *rh;
@@ -1493,7 +1492,6 @@ rspamd_message_parse (struct rspamd_task *task)
RSPAMD_FILTER_ERROR, \
"cannot parse MIME in the message");
- /* TODO: backport to 0.9 */
g_object_unref (parser);
return FALSE;
}
@@ -1566,12 +1564,11 @@ rspamd_message_parse (struct rspamd_task *task)
rspamd_archives_process (task);
/* Parse received headers */
- first = rspamd_message_get_header (task, "Received", FALSE);
+ hdrs = rspamd_message_get_header_array (task, "Received", FALSE);
- for (cur = first, i = 0; cur != NULL; cur = g_list_next (cur), i ++) {
+ PTR_ARRAY_FOREACH (hdrs, i, rh) {
recv = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct received_header));
- rh = cur->data;
rspamd_smtp_recieved_parse (task, rh->decoded, strlen (rh->decoded), recv);
/*
* For the first header we must ensure that
@@ -1640,20 +1637,20 @@ rspamd_message_parse (struct rspamd_task *task)
}
if (task->from_envelope == NULL) {
- first = rspamd_message_get_header (task, "Return-Path", FALSE);
+ hdrs = rspamd_message_get_header_array (task, "Return-Path", FALSE);
- if (first) {
- rh = first->data;
+ if (hdrs && hdrs->len > 0) {
+ rh = g_ptr_array_index (hdrs, 0);
task->from_envelope = rspamd_email_address_from_smtp (rh->decoded,
strlen (rh->decoded));
}
}
if (task->deliver_to == NULL) {
- first = rspamd_message_get_header (task, "Delivered-To", FALSE);
+ hdrs = rspamd_message_get_header_array (task, "Delivered-To", FALSE);
- if (first) {
- rh = first->data;
+ if (hdrs && hdrs->len > 0) {
+ rh = g_ptr_array_index (hdrs, 0);
task->deliver_to = rspamd_mempool_strdup (task->task_pool, rh->decoded);
}
}
@@ -1671,10 +1668,12 @@ rspamd_message_parse (struct rspamd_task *task)
task->rcpt_mime);
#endif
}
- first = rspamd_message_get_header (task, "From", FALSE);
- if (first) {
- rh = first->data;
+
+ hdrs = rspamd_message_get_header_array (task, "From", FALSE);
+
+ if (hdrs && hdrs->len > 0) {
+ rh = g_ptr_array_index (hdrs, 0);
task->from_mime = internet_address_list_parse_string (rh->value);
if (task->from_mime) {
#ifdef GMIME24
@@ -1690,10 +1689,9 @@ rspamd_message_parse (struct rspamd_task *task)
}
/* Parse urls inside Subject header */
- cur = rspamd_message_get_header (task, "Subject", FALSE);
+ hdrs = rspamd_message_get_header_array (task, "Subject", FALSE);
- for (; cur != NULL; cur = g_list_next (cur)) {
- rh = cur->data;
+ PTR_ARRAY_FOREACH (hdrs, i, rh) {
p = rh->decoded;
len = strlen (p);
rspamd_url_find_multiple (task->task_pool, p, len, FALSE, NULL,
@@ -1783,99 +1781,73 @@ rspamd_message_parse (struct rspamd_task *task)
return TRUE;
}
-GList *
-rspamd_message_get_header (struct rspamd_task *task,
- const gchar *field,
- gboolean strong)
-{
- GList *gret = NULL;
- struct raw_header *rh;
-
- rh = g_hash_table_lookup (task->raw_headers, field);
-
- if (rh == NULL) {
- return NULL;
- }
-
- while (rh) {
- if (strong) {
- if (strcmp (rh->name, field) == 0) {
- gret = g_list_prepend (gret, rh);
- }
- }
- else {
- gret = g_list_prepend (gret, rh);
- }
- rh = rh->next;
- }
-
- if (gret != NULL) {
- gret = g_list_reverse (gret);
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)g_list_free, gret);
- }
-
- return gret;
-}
GPtrArray *
-rspamd_message_get_header_array (struct rspamd_task *task,
+rspamd_message_get_header_from_hash (GHashTable *htb,
+ rspamd_mempool_t *pool,
const gchar *field,
gboolean strong)
{
- GPtrArray *ret;
- struct raw_header *rh, *cur;
- guint nelems = 0;
+ GPtrArray *ret, *ar;
+ struct raw_header *cur;
+ guint i;
- rh = g_hash_table_lookup (task->raw_headers, field);
+ ar = g_hash_table_lookup (htb, field);
- if (rh == NULL) {
+ if (ar == NULL) {
return NULL;
}
- LL_FOREACH (rh, cur) {
- nelems ++;
- }
-
- ret = g_ptr_array_sized_new (nelems);
+ if (strong && pool != NULL) {
+ /* Need to filter what we have */
+ ret = g_ptr_array_sized_new (ar->len);
- LL_FOREACH (rh, cur) {
- if (strong) {
- if (strcmp (rh->name, field) != 0) {
+ PTR_ARRAY_FOREACH (ar, i, cur) {
+ if (strcmp (cur->name, field) != 0) {
continue;
}
- }
- g_ptr_array_add (ret, cur);
- }
+ g_ptr_array_add (ret, cur);
+ }
- rspamd_mempool_add_destructor (task->task_pool,
+ rspamd_mempool_add_destructor (pool,
(rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard, ret);
+ }
+ else {
+ ret = ar;
+ }
return ret;
}
GPtrArray *
+rspamd_message_get_header_array (struct rspamd_task *task,
+ const gchar *field,
+ gboolean strong)
+{
+ return rspamd_message_get_header_from_hash (task->raw_headers,
+ task->task_pool, field, strong);
+}
+
+GPtrArray *
rspamd_message_get_mime_header_array (struct rspamd_task *task,
const gchar *field,
gboolean strong)
{
- GPtrArray *ret;
- struct raw_header *rh, *cur;
+ GPtrArray *ret, *ar;
+ struct raw_header *cur;
guint nelems = 0, i;
struct rspamd_mime_part *mp;
for (i = 0; i < task->parts->len; i ++) {
mp = g_ptr_array_index (task->parts, i);
- rh = g_hash_table_lookup (mp->raw_headers, field);
+ ar = g_hash_table_lookup (mp->raw_headers, field);
- if (rh == NULL) {
+ if (ar == NULL) {
continue;
}
- LL_FOREACH (rh, cur) {
- nelems ++;
- }
+ nelems += ar->len;
}
if (nelems == 0) {
@@ -1886,11 +1858,11 @@ rspamd_message_get_mime_header_array (struct rspamd_task *task,
for (i = 0; i < task->parts->len; i ++) {
mp = g_ptr_array_index (task->parts, i);
- rh = g_hash_table_lookup (mp->raw_headers, field);
+ ar = g_hash_table_lookup (mp->raw_headers, field);
- LL_FOREACH (rh, cur) {
+ PTR_ARRAY_FOREACH (ar, i, cur) {
if (strong) {
- if (strcmp (rh->name, field) != 0) {
+ if (strcmp (cur->name, field) != 0) {
continue;
}
}
@@ -1904,152 +1876,3 @@ rspamd_message_get_mime_header_array (struct rspamd_task *task,
return ret;
}
-
-GPtrArray *
-rspamd_message_get_headers_array (struct rspamd_task *task, ...)
-{
- va_list ap;
- GPtrArray *ret;
- struct raw_header *rh, *cur;
- guint nelems = 0;
- const gchar *hname;
-
- va_start (ap, task);
-
- for (hname = va_arg (ap, const char *); hname != NULL;
- hname = va_arg (ap, const char *)) {
- rh = g_hash_table_lookup (task->raw_headers, hname);
-
- if (rh == NULL) {
- continue;
- }
- LL_FOREACH (rh, cur) {
- nelems ++;
- }
- }
-
- va_end (ap);
-
- if (nelems == 0) {
- return NULL;
- }
-
- ret = g_ptr_array_sized_new (nelems);
-
- /* Restart varargs processing */
- va_start (ap, task);
-
- for (hname = va_arg (ap, const char *); hname != NULL;
- hname = va_arg (ap, const char *)) {
- rh = g_hash_table_lookup (task->raw_headers, hname);
-
- if (rh == NULL) {
- continue;
- }
- LL_FOREACH (rh, cur) {
- g_ptr_array_add (ret, cur);
- }
- }
-
- va_end (ap);
-
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard, ret);
-
- return ret;
-}
-
-GPtrArray *
-rspamd_message_get_header_array_str (struct rspamd_task *task,
- const gchar *field,
- gboolean strong)
-{
- GPtrArray *ret;
- struct raw_header *rh, *cur;
- guint nelems = 0;
-
- rh = g_hash_table_lookup (task->raw_headers, field);
-
- if (rh == NULL) {
- return NULL;
- }
-
- LL_FOREACH (rh, cur) {
- nelems ++;
- }
-
- ret = g_ptr_array_sized_new (nelems);
-
- LL_FOREACH (rh, cur) {
- if (strong) {
- if (strcmp (rh->name, field) != 0) {
- continue;
- }
- }
-
- if (cur->decoded) {
- g_ptr_array_add (ret, cur->decoded);
- }
- }
-
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard, ret);
-
- return ret;
-}
-
-GPtrArray *
-rspamd_message_get_headers_array_str (struct rspamd_task *task, ...)
-{
- va_list ap;
- GPtrArray *ret;
- struct raw_header *rh, *cur;
- guint nelems = 0;
- const gchar *hname;
-
- va_start (ap, task);
-
- for (hname = va_arg (ap, const char *); hname != NULL;
- hname = va_arg (ap, const char *)) {
- rh = g_hash_table_lookup (task->raw_headers, hname);
-
- if (rh == NULL) {
- continue;
- }
- LL_FOREACH (rh, cur) {
- nelems ++;
- }
- }
-
- va_end (ap);
-
- if (nelems == 0) {
- return NULL;
- }
-
- ret = g_ptr_array_sized_new (nelems);
-
- /* Restart varargs processing */
- va_start (ap, task);
-
- for (hname = va_arg (ap, const char *); hname != NULL;
- hname = va_arg (ap, const char *)) {
- rh = g_hash_table_lookup (task->raw_headers, hname);
-
- if (rh == NULL) {
- continue;
- }
- LL_FOREACH (rh, cur) {
- if (cur->decoded) {
- g_ptr_array_add (ret, cur->decoded);
- }
- }
- }
-
- va_end (ap);
-
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t)rspamd_ptr_array_free_hard, ret);
-
- return ret;
-}
diff --git a/src/libmime/message.h b/src/libmime/message.h
index b6f7062dc..2a9201be8 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -98,7 +98,6 @@ struct raw_header {
gboolean empty_separator;
gchar *separator;
gchar *decoded;
- struct raw_header *prev, *next;
};
/**
@@ -109,17 +108,6 @@ struct raw_header {
gboolean rspamd_message_parse (struct rspamd_task *task);
/**
- * Get a list of header's values with specified header's name using raw headers
- * @param task worker task structure
- * @param field header's name
- * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
- * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time.
- */
-GList * rspamd_message_get_header (struct rspamd_task *task,
- const gchar *field,
- gboolean strong);
-
-/**
* Get an array of header's values with specified header's name using raw headers
* @param task worker task structure
* @param field header's name
@@ -141,30 +129,15 @@ GPtrArray *rspamd_message_get_mime_header_array (struct rspamd_task *task,
gboolean strong);
/**
- * Get array of all headers from the list specified
- * @param task
- * @param h1
- * @return An array of headers (should not be freed as well)
- */
-GPtrArray *rspamd_message_get_headers_array (struct rspamd_task *task, ...);
-
-/**
- * Get an array of header's values with specified header's name returning decoded strings as values
- * @param task worker task structure
+ * Get an array of header's values with specified header's name using raw headers
+ * @param htb hash table indexed by header name (caseless) with ptr arrays as elements
* @param field header's name
* @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not
* @return An array of header's values or NULL. It is NOT permitted to free array or values.
*/
-GPtrArray *rspamd_message_get_header_array_str (struct rspamd_task *task,
+GPtrArray *rspamd_message_get_header_from_hash (GHashTable *htb,
+ rspamd_mempool_t *pool,
const gchar *field,
gboolean strong);
-/**
- * Get array of all headers from the list specified returning decoded strings as values
- * @param task
- * @param h1
- * @return An array of headers (should not be freed as well)
- */
-GPtrArray *rspamd_message_get_headers_array_str (struct rspamd_task *task, ...);
-
#endif
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index bdf5644fd..4fd1a893c 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -1012,7 +1012,7 @@ gboolean
rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
{
struct expression_argument *arg;
- GList *headerlist;
+ GPtrArray *headerlist;
if (args == NULL || task == NULL) {
return FALSE;
@@ -1025,12 +1025,14 @@ rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
}
debug_task ("try to get header %s", (gchar *)arg->data);
- headerlist = rspamd_message_get_header (task,
+ headerlist = rspamd_message_get_header_array (task,
(gchar *)arg->data,
FALSE);
+
if (headerlist) {
return TRUE;
}
+
return FALSE;
}