From 331f6807e9ef813755f8ec197cc24915c458a684 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 23 Aug 2010 19:07:56 +0400 Subject: * Move images library to core rspamd * Add lua api to access images properties --- CMakeLists.txt | 1 + src/images.c | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/images.h | 25 +++++ src/lua/lua_common.c | 1 + src/lua/lua_common.h | 1 + src/lua/lua_task.c | 104 +++++++++++++++++++++ src/main.h | 1 + src/message.c | 8 +- src/worker.c | 3 + 9 files changed, 397 insertions(+), 1 deletion(-) create mode 100644 src/images.c create mode 100644 src/images.h diff --git a/CMakeLists.txt b/CMakeLists.txt index af324b67b..30bf27387 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -463,6 +463,7 @@ SET(RSPAMDSRC src/modules.c src/greylist_storage.c src/hash.c src/html.c + src/images.c src/lmtp.c src/lmtp_proto.c src/logger.c diff --git a/src/images.c b/src/images.c new file mode 100644 index 000000000..00c9599c0 --- /dev/null +++ b/src/images.c @@ -0,0 +1,254 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "images.h" +#include "main.h" +#include "message.h" + +static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10}; +static const guint8 jpg_sig1[] = {0xff, 0xd8}; +static const guint8 jpg_sig2[] = {'J', 'F', 'I', 'F'}; +static const guint8 gif_signature[] = {'G', 'I', 'F', '8'}; +static const guint8 bmp_signature[] = {'B', 'M'}; + +static void process_image (struct worker_task *task, struct mime_part *part); + + +void +process_images (struct worker_task *task) +{ + GList *cur; + struct mime_part *part; + + cur = task->parts; + while (cur) { + part = cur->data; + if (g_mime_content_type_is_type (part->type, "image", "*") && part->content->len > 0) { + process_image (task, part); + } + cur = g_list_next (cur); + } + +} + +static enum known_image_types +detect_image_type (GByteArray *data) +{ + if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) { + if (memcmp (data->data, png_signature, sizeof (png_signature)) == 0) { + return IMAGE_TYPE_PNG; + } + } + if (data->len > 10) { + if (memcmp (data->data, jpg_sig1, sizeof (jpg_sig1)) == 0) { + if (memcmp (data->data + 6, jpg_sig2, sizeof (jpg_sig2)) == 0) { + return IMAGE_TYPE_JPG; + } + } + } + if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) { + if (memcmp (data->data, gif_signature, sizeof (gif_signature)) == 0) { + return IMAGE_TYPE_GIF; + } + } + if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) { + if (memcmp (data->data, bmp_signature, sizeof (bmp_signature)) == 0) { + return IMAGE_TYPE_BMP; + } + } + + return IMAGE_TYPE_UNKNOWN; +} + + +static struct rspamd_image * +process_png_image (struct worker_task *task, GByteArray *data) +{ + struct rspamd_image *img; + guint32 t; + guint8 *p; + + if (data->len < 24) { + msg_info ("bad png detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + /* In png we should find iHDR section and get data from it */ + /* Skip signature and read header section */ + p = data->data + 12; + if (memcmp (p, "IHDR", 4) != 0) { + msg_info ("png doesn't begins with IHDR section", task->message_id); + return NULL; + } + + img = memory_pool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_PNG; + img->data = data; + + p += 4; + memcpy (&t, p, sizeof (guint32)); + img->width = ntohl (t); + p += 4; + memcpy (&t, p, sizeof (guint32)); + img->height = ntohl (t); + + return img; +} + +static struct rspamd_image * +process_jpg_image (struct worker_task *task, GByteArray *data) +{ + guint8 *p; + guint16 t; + gsize remain; + struct rspamd_image *img; + + img = memory_pool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_JPG; + img->data = data; + + p = data->data; + remain = data->len; + /* In jpeg we should find any data stream (ff c0 .. ff c3) and extract its height and width */ + while (remain --) { + if (*p == 0xFF && remain > 8 && (*(p + 1) >= 0xC0 && *(p + 1) <= 0xC3)) { + memcpy (&t, p + 5, sizeof (guint16)); + img->height = ntohs (t); + memcpy (&t, p + 7, sizeof (guint16)); + img->width = ntohs (t); + return img; + } + p ++; + } + + return NULL; +} + +static struct rspamd_image * +process_gif_image (struct worker_task *task, GByteArray *data) +{ + struct rspamd_image *img; + guint8 *p; + guint16 t; + + if (data->len < 10) { + msg_info ("bad gif detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + img = memory_pool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_GIF; + img->data = data; + + p = data->data + 6; + memcpy (&t, p, sizeof (guint16)); + img->width = GUINT16_FROM_LE (t); + memcpy (&t, p + 2, sizeof (guint16)); + img->height = GUINT16_FROM_LE (t); + + return img; +} + +static struct rspamd_image * +process_bmp_image (struct worker_task *task, GByteArray *data) +{ + struct rspamd_image *img; + gint32 t; + guint8 *p; + + + + if (data->len < 28) { + msg_info ("bad bmp detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + img = memory_pool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_BMP; + img->data = data; + p = data->data + 18; + memcpy (&t, p, sizeof (gint32)); + img->width = abs (GINT32_FROM_LE (t)); + memcpy (&t, p + 4, sizeof (gint32)); + img->height = abs (GINT32_FROM_LE (t)); + + return img; +} + +static void +process_image (struct worker_task *task, struct mime_part *part) +{ + enum known_image_types type; + struct rspamd_image *img = NULL; + if ((type = detect_image_type (part->content)) != IMAGE_TYPE_UNKNOWN) { + switch (type) { + case IMAGE_TYPE_PNG: + img = process_png_image (task, part->content); + break; + case IMAGE_TYPE_JPG: + img = process_jpg_image (task, part->content); + break; + case IMAGE_TYPE_GIF: + img = process_gif_image (task, part->content); + break; + case IMAGE_TYPE_BMP: + img = process_bmp_image (task, part->content); + break; + default: + img = NULL; + break; + } + } + + if (img != NULL) { + msg_info ("detected %s image of size %ud x %ud in message <%s>", + image_type_str (img->type), + img->width, img->height, + task->message_id); + task->images = g_list_prepend (task->images, img); + } +} + +const char * +image_type_str (enum known_image_types type) +{ + switch (type) { + case IMAGE_TYPE_PNG: + return "PNG"; + break; + case IMAGE_TYPE_JPG: + return "JPEG"; + break; + case IMAGE_TYPE_GIF: + return "GIF"; + break; + case IMAGE_TYPE_BMP: + return "BMP"; + break; + default: + return "unknown"; + } + + return "unknown"; +} diff --git a/src/images.h b/src/images.h new file mode 100644 index 000000000..2a79e4edd --- /dev/null +++ b/src/images.h @@ -0,0 +1,25 @@ +#ifndef IMAGES_H_ +#define IMAGES_H_ + +#include "config.h" +#include "main.h" + +enum known_image_types { + IMAGE_TYPE_PNG, + IMAGE_TYPE_JPG, + IMAGE_TYPE_GIF, + IMAGE_TYPE_BMP, + IMAGE_TYPE_UNKNOWN = 9000 +}; + +struct rspamd_image { + enum known_image_types type; + GByteArray *data; + guint32 width; + guint32 height; +}; + +void process_images (struct worker_task *task); +const char *image_type_str (enum known_image_types type); + +#endif /* IMAGES_H_ */ diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index a880f50e3..c37588bb9 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -226,6 +226,7 @@ init_lua (struct config_file *cfg) (void)luaopen_hash_table (L); (void)luaopen_task (L); (void)luaopen_textpart (L); + (void)luaopen_image (L); (void)luaopen_message (L); (void)luaopen_classifier (L); (void)luaopen_statfile (L); diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 7bbf67533..d1f5f9eb4 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -28,6 +28,7 @@ int luaopen_metric (lua_State *L); int luaopen_radix (lua_State *L); int luaopen_hash_table (lua_State *L); int luaopen_textpart (lua_State *L); +int luaopen_image (lua_State *L); int luaopen_classifier (lua_State *L); int luaopen_statfile (lua_State * L); void init_lua (struct config_file *cfg); diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 3630462f9..32c7b41ef 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -27,6 +27,7 @@ #include "../message.h" #include "../expressions.h" #include "../dns.h" +#include "../images.h" /* Task methods */ LUA_FUNCTION_DEF (task, get_message); @@ -45,6 +46,7 @@ LUA_FUNCTION_DEF (task, get_from_ip); LUA_FUNCTION_DEF (task, get_from_ip_num); LUA_FUNCTION_DEF (task, get_client_ip_num); LUA_FUNCTION_DEF (task, get_helo); +LUA_FUNCTION_DEF (task, get_images); static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF (task, get_message), @@ -63,6 +65,7 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF (task, get_from_ip_num), LUA_INTERFACE_DEF (task, get_client_ip_num), LUA_INTERFACE_DEF (task, get_helo), + LUA_INTERFACE_DEF (task, get_images), {"__tostring", lua_class_tostring}, {NULL, NULL} }; @@ -82,6 +85,19 @@ static const struct luaL_reg textpartlib_m[] = { {NULL, NULL} }; +/* Image methods */ +LUA_FUNCTION_DEF (image, get_width); +LUA_FUNCTION_DEF (image, get_height); +LUA_FUNCTION_DEF (image, get_type); + +static const struct luaL_reg imagelib_m[] = { + LUA_INTERFACE_DEF (image, get_width), + LUA_INTERFACE_DEF (image, get_height), + LUA_INTERFACE_DEF (image, get_type), + {"__tostring", lua_class_tostring}, + {NULL, NULL} +}; + /* Utility functions */ static struct worker_task * lua_check_task (lua_State * L) @@ -99,6 +115,14 @@ lua_check_textpart (lua_State * L) return *((struct mime_text_part **)ud); } +static struct rspamd_image * +lua_check_image (lua_State * L) +{ + void *ud = luaL_checkudata (L, 1, "rspamd{image}"); + luaL_argcheck (L, ud != NULL, 1, "'image' expected"); + return *((struct rspamd_image **)ud); +} + /*** Task interface ***/ static int lua_task_get_message (lua_State * L) @@ -529,6 +553,33 @@ lua_task_get_helo (lua_State *L) return 1; } +static int +lua_task_get_images (lua_State *L) +{ + struct worker_task *task = lua_check_task (L); + int i = 1; + GList *cur; + struct rspamd_image **pimg; + + if (task) { + cur = task->images; + if (cur != NULL) { + lua_newtable (L); + while (cur) { + pimg = lua_newuserdata (L, sizeof (struct rspamd_image *)); + lua_setclass (L, "rspamd{image}", -1); + *pimg = cur->data; + lua_rawseti (L, -2, i++); + cur = g_list_next (cur); + } + return 1; + } + } + + lua_pushnil (L); + return 1; +} + /**** Textpart implementation *****/ @@ -591,6 +642,50 @@ lua_textpart_get_fuzzy (lua_State * L) return 1; } +/* Image functions */ +static int +lua_image_get_width (lua_State *L) +{ + struct rspamd_image *img = lua_check_image (L); + + if (img != NULL) { + lua_pushnumber (L, img->width); + } + else { + lua_pushnumber (L, 0); + } + return 1; +} + +static int +lua_image_get_height (lua_State *L) +{ + struct rspamd_image *img = lua_check_image (L); + + if (img != NULL) { + lua_pushnumber (L, img->height); + } + else { + lua_pushnumber (L, 0); + } + + return 1; +} + +static int +lua_image_get_type (lua_State *L) +{ + struct rspamd_image *img = lua_check_image (L); + + if (img != NULL) { + lua_pushstring (L, image_type_str (img->type)); + } + else { + lua_pushnil (L); + } + + return 1; +} /* Init part */ int @@ -610,3 +705,12 @@ luaopen_textpart (lua_State * L) return 1; } + +int +luaopen_image (lua_State * L) +{ + lua_newclass (L, "rspamd{image}", imagelib_m); + luaL_openlib (L, "rspamd_image", null_reg, 0); + + return 1; +} diff --git a/src/main.h b/src/main.h index 091daf168..b54d3ed8a 100644 --- a/src/main.h +++ b/src/main.h @@ -202,6 +202,7 @@ struct worker_task { char *raw_headers; /**< list of raw headers */ GList *received; /**< list of received headers */ GList *urls; /**< list of parsed urls */ + GList *images; /**< list of images */ GHashTable *results; /**< hash table of metric_result indexed by * metric's name */ GList *messages; /**< list of messages that would be reported */ diff --git a/src/message.c b/src/message.c index 49c48807b..ad730d360 100644 --- a/src/message.c +++ b/src/message.c @@ -29,6 +29,7 @@ #include "cfg_file.h" #include "html.h" #include "modules.h" +#include "images.h" #define RECURSION_LIMIT 30 #define UTF8_CHARSET "UTF-8" @@ -709,7 +710,7 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) mime_part->content = part_content; mime_part->parent = task->parser_parent_part; /* Extract checksums for some types */ - if (g_ascii_strcasecmp (type->type, "image") == 0 && part_content->len > 0) { + if (g_mime_content_type_is_type (type, "image", "*") && part_content->len > 0) { mime_part->checksum = g_compute_checksum_for_data (G_CHECKSUM_MD5, part_content->data, part_content->len); memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, mime_part->checksum); } @@ -814,6 +815,10 @@ process_message (struct worker_task *task) task->raw_headers = g_mime_message_get_headers (task->message); #endif +#ifdef RSPAMD_MAIN + process_images (task); +#endif + /* Parse received headers */ first = message_get_header (task->task_pool, message, "Received"); cur = first; @@ -881,6 +886,7 @@ process_message (struct worker_task *task) if (task->subject) { g_mime_message_set_subject (task->message, task->subject); } + /* Add recipients */ #ifndef GMIME24 if (task->rcpt) { diff --git a/src/worker.c b/src/worker.c index d249843b8..cbd234492 100644 --- a/src/worker.c +++ b/src/worker.c @@ -257,6 +257,9 @@ free_task (struct worker_task *task, gboolean is_soft) if (task->urls) { g_list_free (task->urls); } + if (task->images) { + g_list_free (task->images); + } if (task->messages) { g_list_free (task->messages); } -- cgit v1.2.3