From: Vsevolod Stakhov Date: Thu, 30 Jun 2016 14:22:36 +0000 (+0100) Subject: [Feature] Implement zero-copy mode for HTTP reading X-Git-Tag: 1.3.0~207 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=6d82717e74e4ef03739268ed6f260587be7af66b;p=rspamd.git [Feature] Implement zero-copy mode for HTTP reading --- diff --git a/conf/modules.d/phishing.conf b/conf/modules.d/phishing.conf index 861aee7ae..8875a3898 100644 --- a/conf/modules.d/phishing.conf +++ b/conf/modules.d/phishing.conf @@ -18,6 +18,9 @@ phishing { .include(try=true,priority=1) "$LOCAL_CONFDIR/local.d/phishing.conf" .include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/phishing.conf" symbol = "PHISHING"; + # Disabled by default + openphish_enabled = false; + openphish_premium = false; openphish_map = "https://www.openphish.com/feed.txt"; # Disabled by default phishtank_enabled = false; diff --git a/src/libutil/http.c b/src/libutil/http.c index cdd44abb8..0ba8c8d2b 100644 --- a/src/libutil/http.c +++ b/src/libutil/http.c @@ -31,6 +31,8 @@ struct _rspamd_http_privbuf { rspamd_fstring_t *data; + const gchar *zc_buf; + gsize zc_remain; ref_entry_t ref; }; @@ -101,6 +103,8 @@ static const rspamd_ftok_t last_modified_header = { }; static void rspamd_http_message_storage_cleanup (struct rspamd_http_message *msg); +static gboolean rspamd_http_message_grow_body (struct rspamd_http_message *msg, + gsize len); #define HTTP_ERROR http_error_quark () GQuark @@ -626,6 +630,14 @@ rspamd_http_on_headers_complete (http_parser * parser) return 0; } +static void +rspamd_http_switch_zc (struct _rspamd_http_privbuf *pbuf, + struct rspamd_http_message *msg) +{ + pbuf->zc_buf = msg->body_buf.begin + msg->body_buf.len; + pbuf->zc_remain = msg->body_buf.allocated_len - msg->body_buf.len; +} + static int rspamd_http_on_body (http_parser * parser, const gchar *at, size_t length) { @@ -633,17 +645,41 @@ rspamd_http_on_body (http_parser * parser, const gchar *at, size_t length) (struct rspamd_http_connection *)parser->data; struct rspamd_http_connection_private *priv; struct rspamd_http_message *msg; + struct _rspamd_http_privbuf *pbuf; + const gchar *p; priv = conn->priv; msg = priv->msg; + pbuf = priv->buf; + p = at; - if (!rspamd_http_message_append_body (msg, at, length)) { - return -1; + if (!pbuf->zc_buf) { + if (!rspamd_http_message_append_body (msg, at, length)) { + return -1; + } + + /* We might have some leftover in our private buffer */ + if (pbuf->data->len == length) { + /* Switch to zero-copy mode */ + rspamd_http_switch_zc (pbuf, msg); + } + } + else { + if (msg->body_buf.begin + msg->body_buf.len != at) { + /* Likely chunked encoding */ + memmove ((gchar *)msg->body_buf.begin + msg->body_buf.len, at, length); + p = msg->body_buf.begin + msg->body_buf.len; + } + + /* Adjust zero-copy buf */ + msg->body_buf.len += length; + pbuf->zc_buf = msg->body_buf.begin + msg->body_buf.len; + pbuf->zc_remain = msg->body_buf.allocated_len - msg->body_buf.len; } if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) && !IS_CONN_ENCRYPTED (priv)) { /* Incremental update is impossible for encrypted requests so far */ - return (conn->body_handler (conn, msg, at, length)); + return (conn->body_handler (conn, msg, p, length)); } return 0; @@ -957,25 +993,54 @@ static gssize rspamd_http_try_read (gint fd, struct rspamd_http_connection *conn, struct rspamd_http_connection_private *priv, - struct _rspamd_http_privbuf *pbuf) + struct _rspamd_http_privbuf *pbuf, + const gchar **buf_ptr) { gssize r; - rspamd_fstring_t *buf; + gchar *data; + gsize len; + struct rspamd_http_message *msg; - buf = priv->buf->data; + msg = priv->msg; + + if (pbuf->zc_buf == NULL) { + data = priv->buf->data->str; + len = priv->buf->data->allocated; + } + else { + data = (gchar *)pbuf->zc_buf; + len = pbuf->zc_remain; + + if (len == 0) { + rspamd_http_message_grow_body (priv->msg, priv->buf->data->allocated); + rspamd_http_switch_zc (pbuf, msg); + data = (gchar *)pbuf->zc_buf; + len = pbuf->zc_remain; + } + } if (priv->ssl) { - r = rspamd_ssl_read (priv->ssl, buf->str, buf->allocated); + r = rspamd_ssl_read (priv->ssl, data, len); } else { - r = read (fd, buf->str, buf->allocated); + r = read (fd, data, len); } if (r <= 0) { return r; } else { - buf->len = r; + if (pbuf->zc_buf == NULL) { + priv->buf->data->len = r; + } + else { + pbuf->zc_remain -= r; + pbuf->zc_buf += r; + } + } + + if (buf_ptr) { + *buf_ptr = data; } return r; @@ -997,7 +1062,7 @@ rspamd_http_event_handler (int fd, short what, gpointer ud) struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; struct rspamd_http_connection_private *priv; struct _rspamd_http_privbuf *pbuf; - rspamd_fstring_t *buf; + const gchar *d; gssize r; GError *err; @@ -1005,14 +1070,13 @@ rspamd_http_event_handler (int fd, short what, gpointer ud) pbuf = priv->buf; REF_RETAIN (pbuf); rspamd_http_connection_ref (conn); - buf = priv->buf->data; if (what == EV_READ) { - r = rspamd_http_try_read (fd, conn, priv, pbuf); + r = rspamd_http_try_read (fd, conn, priv, pbuf, &d); if (r > 0) { if (http_parser_execute (&priv->parser, &priv->parser_cb, - buf->str, r) != (size_t)r || priv->parser.http_errno != 0) { + d, r) != (size_t)r || priv->parser.http_errno != 0) { err = g_error_new (HTTP_ERROR, priv->parser.http_errno, "HTTP parser error: %s", http_errno_description (priv->parser.http_errno)); @@ -1058,11 +1122,11 @@ rspamd_http_event_handler (int fd, short what, gpointer ud) } else if (what == EV_TIMEOUT) { /* Let's try to read from the socket first */ - r = rspamd_http_try_read (fd, conn, priv, pbuf); + r = rspamd_http_try_read (fd, conn, priv, pbuf, &d); if (r > 0) { if (http_parser_execute (&priv->parser, &priv->parser_cb, - buf->str, r) != (size_t)r || priv->parser.http_errno != 0) { + d, r) != (size_t)r || priv->parser.http_errno != 0) { err = g_error_new (HTTP_ERROR, priv->parser.http_errno, "HTTP parser error: %s", http_errno_description (priv->parser.http_errno)); @@ -2202,6 +2266,7 @@ rspamd_http_message_set_body (struct rspamd_http_message *msg, } msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.allocated_len = len; if (data != NULL) { memcpy (msg->body_buf.str, data, len); @@ -2212,6 +2277,7 @@ rspamd_http_message_set_body (struct rspamd_http_message *msg, msg->body_buf.len = 0; msg->body_buf.begin = NULL; msg->body_buf.str = NULL; + msg->body_buf.allocated_len = 0; } } else { @@ -2231,6 +2297,7 @@ rspamd_http_message_set_body (struct rspamd_http_message *msg, msg->body_buf.begin = storage->normal->str; msg->body_buf.str = storage->normal->str; + msg->body_buf.allocated_len = storage->normal->allocated; } return TRUE; @@ -2269,6 +2336,7 @@ rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, msg->body_buf.begin = msg->body_buf.str; msg->body_buf.len = st.st_size; + msg->body_buf.allocated_len = st.st_size; return TRUE; } @@ -2288,6 +2356,7 @@ rspamd_http_message_set_body_from_fstring_steal (struct rspamd_http_message *msg msg->body_buf.str = fstr->str; msg->body_buf.begin = msg->body_buf.str; msg->body_buf.len = fstr->len; + msg->body_buf.allocated_len = fstr->allocated; return TRUE; } @@ -2307,13 +2376,14 @@ rspamd_http_message_set_body_from_fstring_copy (struct rspamd_http_message *msg, msg->body_buf.str = storage->normal->str; msg->body_buf.begin = msg->body_buf.str; msg->body_buf.len = storage->normal->len; + msg->body_buf.allocated_len = storage->normal->allocated; return TRUE; } -gboolean -rspamd_http_message_append_body (struct rspamd_http_message *msg, - const gchar *data, gsize len) + +static gboolean +rspamd_http_message_grow_body (struct rspamd_http_message *msg, gsize len) { struct stat st; union _rspamd_storage_u *storage; @@ -2350,11 +2420,39 @@ rspamd_http_message_append_body (struct rspamd_http_message *msg, if (msg->body_buf.str == MAP_FAILED) { return FALSE; } + + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.allocated_len = newlen; + } + } + else { + storage->normal = rspamd_fstring_grow (storage->normal, len); + + /* Append might cause realloc */ + msg->body_buf.begin = storage->normal->str; + msg->body_buf.len = storage->normal->len; + msg->body_buf.str = storage->normal->str; + msg->body_buf.allocated_len = storage->normal->allocated; + } + + return TRUE; +} + +gboolean +rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + union _rspamd_storage_u *storage; + + storage = &msg->body_buf.c; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + if (!rspamd_http_message_grow_body (msg, len)) { + return FALSE; } memcpy (msg->body_buf.str + msg->body_buf.len, data, len); msg->body_buf.len += len; - msg->body_buf.begin = msg->body_buf.str; } else { storage->normal = rspamd_fstring_append (storage->normal, data, len); @@ -2363,6 +2461,7 @@ rspamd_http_message_append_body (struct rspamd_http_message *msg, msg->body_buf.begin = storage->normal->str; msg->body_buf.len = storage->normal->len; msg->body_buf.str = storage->normal->str; + msg->body_buf.allocated_len = storage->normal->allocated; } return TRUE; diff --git a/src/libutil/http_private.h b/src/libutil/http_private.h index 7131e0bd1..4cb2851d7 100644 --- a/src/libutil/http_private.h +++ b/src/libutil/http_private.h @@ -46,6 +46,8 @@ struct rspamd_http_message { const gchar *begin; /* Data len */ gsize len; + /* Allocated len */ + gsize allocated_len; /* Data buffer (used to write data inside) */ gchar *str;