diff options
author | heraklit256 <37872459+heraklit256@users.noreply.github.com> | 2018-12-19 19:26:34 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-19 19:26:34 +0000 |
commit | e36509986bcc059a99ca5c08d9a7d90c1f1217f5 (patch) | |
tree | a715e2e95154a1f6aaf62bf7f73adb002c823fa7 /src/lua/lua_mimepart.c | |
parent | 5f4617948c64483dfb648b5bfe784f8c84dd87ea (diff) | |
parent | 714faa2b804507f622d4780be646eaf8146a166d (diff) | |
download | rspamd-e36509986bcc059a99ca5c08d9a7d90c1f1217f5.tar.gz rspamd-e36509986bcc059a99ca5c08d9a7d90c1f1217f5.zip |
Merge pull request #8 from rspamd/master
merge upstream into local master
Diffstat (limited to 'src/lua/lua_mimepart.c')
-rw-r--r-- | src/lua/lua_mimepart.c | 358 |
1 files changed, 324 insertions, 34 deletions
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 78c3e05b9..50cdaa7b7 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -132,8 +132,16 @@ LUA_FUNCTION_DEF (textpart, get_stats); LUA_FUNCTION_DEF (textpart, get_words_count); /*** - * @method mime_part:get_words() - * Get words in the part + * @method mime_part:get_words([how]) + * Get words in the part. Optional `how` argument defines type of words returned: + * - `stem`: stemmed words (default) + * - `norm`: normalised words (utf normalised + lowercased) + * - `raw`: raw words in utf (if possible) + * - `full`: list of tables, each table has the following fields: + * - [1] - stemmed word + * - [2] - normalised word + * - [3] - raw word + * - [4] - flags (table of strings) * @return {table/strings} words in the part */ LUA_FUNCTION_DEF (textpart, get_words); @@ -162,6 +170,13 @@ LUA_FUNCTION_DEF (textpart, get_html); * @return {string} short abbreviation (such as `ru`) for the script's language */ LUA_FUNCTION_DEF (textpart, get_language); + +/*** + * @method text_part:get_charset() + * Returns part real charset + * @return {string} charset of the part + */ +LUA_FUNCTION_DEF (textpart, get_charset); /*** * @method text_part:get_languages() * Returns array of tables of all languages detected for a part: @@ -205,6 +220,7 @@ static const struct luaL_reg textpartlib_m[] = { LUA_INTERFACE_DEF (textpart, is_html), LUA_INTERFACE_DEF (textpart, get_html), LUA_INTERFACE_DEF (textpart, get_language), + LUA_INTERFACE_DEF (textpart, get_charset), LUA_INTERFACE_DEF (textpart, get_languages), LUA_INTERFACE_DEF (textpart, get_mimepart), LUA_INTERFACE_DEF (textpart, get_stats), @@ -285,6 +301,15 @@ LUA_FUNCTION_DEF (mimepart, get_header_full); * @return {number} number of header's occurrencies or 0 if not found */ LUA_FUNCTION_DEF (mimepart, get_header_count); + +/*** + * @method mimepart:get_raw_headers() + * Get all undecoded headers of a mime part as a string + * @return {rspamd_text} all raw headers for a message as opaque text + */ +LUA_FUNCTION_DEF (mimepart, get_raw_headers); + + /*** * @method mime_part:get_content() * Get the parsed content of part @@ -318,6 +343,20 @@ LUA_FUNCTION_DEF (mimepart, get_type); LUA_FUNCTION_DEF (mimepart, get_type_full); /*** + * @method mime_part:get_detected_type() + * Extract content-type string of the mime part. Use libmagic detection + * @return {string,string} content type in form 'type','subtype' + */ +LUA_FUNCTION_DEF (mimepart, get_detected_type); + +/*** + * @method mime_part:get_detected_type_full() + * Extract content-type string of the mime part with all attributes. Use libmagic detection + * @return {string,string,table} content type in form 'type','subtype', {attrs} + */ +LUA_FUNCTION_DEF (mimepart, get_detected_type_full); + +/*** * @method mime_part:get_cte() * Extract content-transfer-encoding for a part * @return {string} content transfer encoding (e.g. `base64` or `7bit`) @@ -355,6 +394,12 @@ LUA_FUNCTION_DEF (mimepart, get_image); * @return {bool} true if a part is an archive */ LUA_FUNCTION_DEF (mimepart, is_archive); +/*** + * @method mime_part:is_attachment() + * Returns true if mime part looks like an attachment + * @return {bool} true if a part looks like an attachment + */ +LUA_FUNCTION_DEF (mimepart, is_attachment); /*** * @method mime_part:get_archive() @@ -377,6 +422,20 @@ LUA_FUNCTION_DEF (mimepart, get_archive); */ LUA_FUNCTION_DEF (mimepart, is_multipart); /*** + * @method mime_part:is_message() + * Returns true if mime part is a message part (message/rfc822) + * @return {bool} true if a part is is a message part + */ +LUA_FUNCTION_DEF (mimepart, is_message); +/*** + * @method mime_part:get_boundary() + * Returns boundary for a part (extracted from parent multipart for normal parts and + * from the part itself for multipart) + * @return {string} boundary value or nil + */ +LUA_FUNCTION_DEF (mimepart, get_boundary); + +/*** * @method mime_part:get_children() * Returns rspamd_mimepart table of part's childer. Returns nil if mime part is not multipart * or a message part. @@ -402,6 +461,13 @@ LUA_FUNCTION_DEF (mimepart, get_text); * @return {string} 128 characters hex string with digest of the part */ LUA_FUNCTION_DEF (mimepart, get_digest); + +/*** + * @method mime_part:get_id() + * Returns the order of the part in parts list + * @return {number} index of the part (starting from 1 as it is Lua API) + */ +LUA_FUNCTION_DEF (mimepart, get_id); /*** * @method mime_part:is_broken() * Returns true if mime part has incorrectly specified content type @@ -421,6 +487,13 @@ LUA_FUNCTION_DEF (mimepart, is_broken); * @param {table} params optional parameters */ LUA_FUNCTION_DEF (mimepart, headers_foreach); +/*** + * @method mime_part:get_parent() + * Returns parent part for this part + * @return {rspamd_mimepart} parent part or nil + */ +LUA_FUNCTION_DEF (mimepart, get_parent); + static const struct luaL_reg mimepartlib_m[] = { LUA_INTERFACE_DEF (mimepart, get_content), @@ -428,22 +501,30 @@ static const struct luaL_reg mimepartlib_m[] = { LUA_INTERFACE_DEF (mimepart, get_length), LUA_INTERFACE_DEF (mimepart, get_type), LUA_INTERFACE_DEF (mimepart, get_type_full), + LUA_INTERFACE_DEF (mimepart, get_detected_type), + LUA_INTERFACE_DEF (mimepart, get_detected_type_full), LUA_INTERFACE_DEF (mimepart, get_cte), LUA_INTERFACE_DEF (mimepart, get_filename), + LUA_INTERFACE_DEF (mimepart, get_boundary), LUA_INTERFACE_DEF (mimepart, get_header), LUA_INTERFACE_DEF (mimepart, get_header_raw), LUA_INTERFACE_DEF (mimepart, get_header_full), LUA_INTERFACE_DEF (mimepart, get_header_count), + LUA_INTERFACE_DEF (mimepart, get_raw_headers), LUA_INTERFACE_DEF (mimepart, is_image), LUA_INTERFACE_DEF (mimepart, get_image), LUA_INTERFACE_DEF (mimepart, is_archive), LUA_INTERFACE_DEF (mimepart, get_archive), LUA_INTERFACE_DEF (mimepart, is_multipart), + LUA_INTERFACE_DEF (mimepart, is_message), LUA_INTERFACE_DEF (mimepart, get_children), + LUA_INTERFACE_DEF (mimepart, get_parent), LUA_INTERFACE_DEF (mimepart, is_text), LUA_INTERFACE_DEF (mimepart, is_broken), + LUA_INTERFACE_DEF (mimepart, is_attachment), LUA_INTERFACE_DEF (mimepart, get_text), LUA_INTERFACE_DEF (mimepart, get_digest), + LUA_INTERFACE_DEF (mimepart, get_id), LUA_INTERFACE_DEF (mimepart, headers_foreach), {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL} @@ -725,7 +806,7 @@ lua_textpart_get_words_count (lua_State *L) lua_pushinteger (L, 0); } else { - lua_pushinteger (L, part->utf_words->len); + lua_pushinteger (L, part->nwords); } return 1; @@ -736,8 +817,7 @@ lua_textpart_get_words (lua_State *L) { LUA_TRACE_POINT; struct rspamd_mime_text_part *part = lua_check_textpart (L); - rspamd_stat_token_t *w; - guint i; + enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM; if (part == NULL) { return luaL_error (L, "invalid arguments"); @@ -747,14 +827,27 @@ lua_textpart_get_words (lua_State *L) lua_createtable (L, 0, 0); } else { - lua_createtable (L, part->utf_words->len, 0); - - for (i = 0; i < part->utf_words->len; i ++) { - w = &g_array_index (part->utf_words, rspamd_stat_token_t, i); + if (lua_type (L, 2) == LUA_TSTRING) { + const gchar *how_str = lua_tostring (L, 2); - lua_pushlstring (L, w->begin, w->len); - lua_rawseti (L, -2, i + 1); + if (strcmp (how_str, "stem") == 0) { + how = RSPAMD_LUA_WORDS_STEM; + } + else if (strcmp (how_str, "norm") == 0) { + how = RSPAMD_LUA_WORDS_NORM; + } + else if (strcmp (how_str, "raw") == 0) { + how = RSPAMD_LUA_WORDS_RAW; + } + else if (strcmp (how_str, "full") == 0) { + how = RSPAMD_LUA_WORDS_FULL; + } + else { + return luaL_error (L, "unknown words type: %s", how_str); + } } + + return rspamd_lua_push_words (L, part->utf_words, how); } return 1; @@ -834,6 +927,28 @@ lua_textpart_get_language (lua_State * L) } static gint +lua_textpart_get_charset (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_text_part *part = lua_check_textpart (L); + + if (part != NULL) { + if (part->real_charset != NULL) { + lua_pushstring (L, part->real_charset); + return 1; + } + else { + lua_pushnil (L); + } + } + else { + return luaL_error (L, "invalid arguments"); + } + + return 1; +} + +static gint lua_textpart_get_languages (lua_State * L) { LUA_TRACE_POINT; @@ -878,8 +993,8 @@ struct lua_shingle_data { #define STORE_TOKEN(i, t) do { \ if ((i) < part->utf_words->len) { \ word = &g_array_index (part->utf_words, rspamd_stat_token_t, (i)); \ - sd->t.begin = word->begin; \ - sd->t.len = word->len; \ + sd->t.begin = word->stemmed.begin; \ + sd->t.len = word->stemmed.len; \ } \ }while (0) @@ -938,7 +1053,8 @@ lua_textpart_get_fuzzy_hashes (lua_State * L) for (i = 0; i < part->utf_words->len; i ++) { word = &g_array_index (part->utf_words, rspamd_stat_token_t, i); - rspamd_cryptobox_hash_update (&st, word->begin, word->len); + rspamd_cryptobox_hash_update (&st, + word->stemmed.begin, word->stemmed.len); } rspamd_cryptobox_hash_final (&st, digest); @@ -1123,48 +1239,49 @@ lua_mimepart_get_length (lua_State * L) } static gint -lua_mimepart_get_type_common (lua_State * L, gboolean full) +lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct, + gboolean full) { - struct rspamd_mime_part *part = lua_check_mimepart (L); + GHashTableIter it; gpointer k, v; struct rspamd_content_type_param *param; - if (part == NULL) { + if (ct == NULL) { lua_pushnil (L); lua_pushnil (L); return 2; } - lua_pushlstring (L, part->ct->type.begin, part->ct->type.len); - lua_pushlstring (L, part->ct->subtype.begin, part->ct->subtype.len); + lua_pushlstring (L, ct->type.begin, ct->type.len); + lua_pushlstring (L, ct->subtype.begin, ct->subtype.len); if (!full) { return 2; } - lua_createtable (L, 0, 2 + (part->ct->attrs ? - g_hash_table_size (part->ct->attrs) : 0)); + lua_createtable (L, 0, 2 + (ct->attrs ? + g_hash_table_size (ct->attrs) : 0)); - if (part->ct->charset.len > 0) { + if (ct->charset.len > 0) { lua_pushstring (L, "charset"); - lua_pushlstring (L, part->ct->charset.begin, part->ct->charset.len); + lua_pushlstring (L, ct->charset.begin, ct->charset.len); lua_settable (L, -3); } - if (part->ct->boundary.len > 0) { - lua_pushstring (L, "charset"); - lua_pushlstring (L, part->ct->boundary.begin, part->ct->boundary.len); + if (ct->boundary.len > 0) { + lua_pushstring (L, "boundary"); + lua_pushlstring (L, ct->boundary.begin, ct->boundary.len); lua_settable (L, -3); } - if (part->ct->attrs) { - g_hash_table_iter_init (&it, part->ct->attrs); + if (ct->attrs) { + g_hash_table_iter_init (&it, ct->attrs); while (g_hash_table_iter_next (&it, &k, &v)) { param = v; - if (param->name.len > 0 && param->name.len > 0) { + if (param->name.len > 0 && param->value.len > 0) { /* TODO: think about multiple values here */ lua_pushlstring (L, param->name.begin, param->name.len); lua_pushlstring (L, param->value.begin, param->value.len); @@ -1180,14 +1297,52 @@ static gint lua_mimepart_get_type (lua_State * L) { LUA_TRACE_POINT; - return lua_mimepart_get_type_common (L, FALSE); + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->ct, FALSE); } static gint lua_mimepart_get_type_full (lua_State * L) { LUA_TRACE_POINT; - return lua_mimepart_get_type_common (L, TRUE); + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->ct, TRUE); +} + +static gint +lua_mimepart_get_detected_type (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->detected_ct, FALSE); +} + +static gint +lua_mimepart_get_detected_type_full (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->detected_ct, TRUE); } static gint @@ -1223,6 +1378,35 @@ lua_mimepart_get_filename (lua_State * L) } static gint +lua_mimepart_get_boundary (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L), *parent; + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + if (IS_CT_MULTIPART (part->ct)) { + lua_pushlstring (L, part->specific.mp->boundary.begin, + part->specific.mp->boundary.len); + } + else { + parent = part->parent_part; + + if (!parent || !IS_CT_MULTIPART (parent->ct)) { + lua_pushnil (L); + } + else { + lua_pushlstring (L, parent->specific.mp->boundary.begin, + parent->specific.mp->boundary.len); + } + } + + return 1; +} + +static gint lua_mimepart_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how) { struct rspamd_mime_part *part = lua_check_mimepart (L); @@ -1273,6 +1457,28 @@ lua_mimepart_get_header_count (lua_State * L) } static gint +lua_mimepart_get_raw_headers (lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + struct rspamd_lua_text *t; + + if (part) { + t = lua_newuserdata (L, sizeof (*t)); + rspamd_lua_setclass (L, "rspamd{text}", -1); + t->start = part->raw_headers_str; + t->len = part->raw_headers_len; + t->flags = 0; + } + else { + return luaL_error (L, "invalid arguments"); + } + + + return 1; +} + +static gint lua_mimepart_is_image (lua_State * L) { LUA_TRACE_POINT; @@ -1318,6 +1524,52 @@ lua_mimepart_is_multipart (lua_State * L) } static gint +lua_mimepart_is_message (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + lua_pushboolean (L, IS_CT_MESSAGE (part->ct) ? true : false); + + return 1; +} + +static gint +lua_mimepart_is_attachment (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + if (!(part->flags & (RSPAMD_MIME_PART_IMAGE|RSPAMD_MIME_PART_TEXT))) { + if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) { + lua_pushboolean (L, true); + } + else { + if (part->cd && part->cd->filename.len > 0) { + /* We still have filename and it is not an image */ + lua_pushboolean (L, true); + } + else { + lua_pushboolean (L, false); + } + } + } + else { + lua_pushboolean (L, false); + } + + return 1; +} + +static gint lua_mimepart_is_text (lua_State * L) { LUA_TRACE_POINT; @@ -1411,13 +1663,13 @@ lua_mimepart_get_children (lua_State * L) return luaL_error (L, "invalid arguments"); } - if (!IS_CT_MULTIPART (part->ct) || part->specific.mp.children == NULL) { + if (!IS_CT_MULTIPART (part->ct) || part->specific.mp->children == NULL) { lua_pushnil (L); } else { - lua_createtable (L, part->specific.mp.children->len, 0); + lua_createtable (L, part->specific.mp->children->len, 0); - PTR_ARRAY_FOREACH (part->specific.mp.children, i, cur) { + PTR_ARRAY_FOREACH (part->specific.mp->children, i, cur) { pcur = lua_newuserdata (L, sizeof (*pcur)); *pcur = cur; rspamd_lua_setclass (L, "rspamd{mimepart}", -1); @@ -1428,6 +1680,29 @@ lua_mimepart_get_children (lua_State * L) return 1; } +static gint +lua_mimepart_get_parent (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + struct rspamd_mime_part **pparent; + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + if (part->parent_part) { + pparent = lua_newuserdata (L, sizeof (*pparent)); + *pparent = part->parent_part; + rspamd_lua_setclass (L, "rspamd{mimepart}", -1); + } + else { + lua_pushnil (L); + } + + return 1; +} + static gint lua_mimepart_get_text (lua_State * L) @@ -1472,6 +1747,21 @@ lua_mimepart_get_digest (lua_State * L) } static gint +lua_mimepart_get_id (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + lua_pushnumber (L, part->id); + + return 1; +} + +static gint lua_mimepart_headers_foreach (lua_State *L) { LUA_TRACE_POINT; |