aboutsummaryrefslogtreecommitdiffstats
path: root/src/lua/lua_mimepart.c
diff options
context:
space:
mode:
authorheraklit256 <37872459+heraklit256@users.noreply.github.com>2018-12-19 19:26:34 +0000
committerGitHub <noreply@github.com>2018-12-19 19:26:34 +0000
commite36509986bcc059a99ca5c08d9a7d90c1f1217f5 (patch)
treea715e2e95154a1f6aaf62bf7f73adb002c823fa7 /src/lua/lua_mimepart.c
parent5f4617948c64483dfb648b5bfe784f8c84dd87ea (diff)
parent714faa2b804507f622d4780be646eaf8146a166d (diff)
downloadrspamd-e36509986bcc059a99ca5c08d9a7d90c1f1217f5.tar.gz
rspamd-e36509986bcc059a99ca5c08d9a7d90c1f1217f5.zip
Merge pull request #8 from rspamd/master
merge upstream into local master
Diffstat (limited to 'src/lua/lua_mimepart.c')
-rw-r--r--src/lua/lua_mimepart.c358
1 files changed, 324 insertions, 34 deletions
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 78c3e05b9..50cdaa7b7 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -132,8 +132,16 @@ LUA_FUNCTION_DEF (textpart, get_stats);
LUA_FUNCTION_DEF (textpart, get_words_count);
/***
- * @method mime_part:get_words()
- * Get words in the part
+ * @method mime_part:get_words([how])
+ * Get words in the part. Optional `how` argument defines type of words returned:
+ * - `stem`: stemmed words (default)
+ * - `norm`: normalised words (utf normalised + lowercased)
+ * - `raw`: raw words in utf (if possible)
+ * - `full`: list of tables, each table has the following fields:
+ * - [1] - stemmed word
+ * - [2] - normalised word
+ * - [3] - raw word
+ * - [4] - flags (table of strings)
* @return {table/strings} words in the part
*/
LUA_FUNCTION_DEF (textpart, get_words);
@@ -162,6 +170,13 @@ LUA_FUNCTION_DEF (textpart, get_html);
* @return {string} short abbreviation (such as `ru`) for the script's language
*/
LUA_FUNCTION_DEF (textpart, get_language);
+
+/***
+ * @method text_part:get_charset()
+ * Returns part real charset
+ * @return {string} charset of the part
+ */
+LUA_FUNCTION_DEF (textpart, get_charset);
/***
* @method text_part:get_languages()
* Returns array of tables of all languages detected for a part:
@@ -205,6 +220,7 @@ static const struct luaL_reg textpartlib_m[] = {
LUA_INTERFACE_DEF (textpart, is_html),
LUA_INTERFACE_DEF (textpart, get_html),
LUA_INTERFACE_DEF (textpart, get_language),
+ LUA_INTERFACE_DEF (textpart, get_charset),
LUA_INTERFACE_DEF (textpart, get_languages),
LUA_INTERFACE_DEF (textpart, get_mimepart),
LUA_INTERFACE_DEF (textpart, get_stats),
@@ -285,6 +301,15 @@ LUA_FUNCTION_DEF (mimepart, get_header_full);
* @return {number} number of header's occurrencies or 0 if not found
*/
LUA_FUNCTION_DEF (mimepart, get_header_count);
+
+/***
+ * @method mimepart:get_raw_headers()
+ * Get all undecoded headers of a mime part as a string
+ * @return {rspamd_text} all raw headers for a message as opaque text
+ */
+LUA_FUNCTION_DEF (mimepart, get_raw_headers);
+
+
/***
* @method mime_part:get_content()
* Get the parsed content of part
@@ -318,6 +343,20 @@ LUA_FUNCTION_DEF (mimepart, get_type);
LUA_FUNCTION_DEF (mimepart, get_type_full);
/***
+ * @method mime_part:get_detected_type()
+ * Extract content-type string of the mime part. Use libmagic detection
+ * @return {string,string} content type in form 'type','subtype'
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_type);
+
+/***
+ * @method mime_part:get_detected_type_full()
+ * Extract content-type string of the mime part with all attributes. Use libmagic detection
+ * @return {string,string,table} content type in form 'type','subtype', {attrs}
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
+
+/***
* @method mime_part:get_cte()
* Extract content-transfer-encoding for a part
* @return {string} content transfer encoding (e.g. `base64` or `7bit`)
@@ -355,6 +394,12 @@ LUA_FUNCTION_DEF (mimepart, get_image);
* @return {bool} true if a part is an archive
*/
LUA_FUNCTION_DEF (mimepart, is_archive);
+/***
+ * @method mime_part:is_attachment()
+ * Returns true if mime part looks like an attachment
+ * @return {bool} true if a part looks like an attachment
+ */
+LUA_FUNCTION_DEF (mimepart, is_attachment);
/***
* @method mime_part:get_archive()
@@ -377,6 +422,20 @@ LUA_FUNCTION_DEF (mimepart, get_archive);
*/
LUA_FUNCTION_DEF (mimepart, is_multipart);
/***
+ * @method mime_part:is_message()
+ * Returns true if mime part is a message part (message/rfc822)
+ * @return {bool} true if a part is is a message part
+ */
+LUA_FUNCTION_DEF (mimepart, is_message);
+/***
+ * @method mime_part:get_boundary()
+ * Returns boundary for a part (extracted from parent multipart for normal parts and
+ * from the part itself for multipart)
+ * @return {string} boundary value or nil
+ */
+LUA_FUNCTION_DEF (mimepart, get_boundary);
+
+/***
* @method mime_part:get_children()
* Returns rspamd_mimepart table of part's childer. Returns nil if mime part is not multipart
* or a message part.
@@ -402,6 +461,13 @@ LUA_FUNCTION_DEF (mimepart, get_text);
* @return {string} 128 characters hex string with digest of the part
*/
LUA_FUNCTION_DEF (mimepart, get_digest);
+
+/***
+ * @method mime_part:get_id()
+ * Returns the order of the part in parts list
+ * @return {number} index of the part (starting from 1 as it is Lua API)
+ */
+LUA_FUNCTION_DEF (mimepart, get_id);
/***
* @method mime_part:is_broken()
* Returns true if mime part has incorrectly specified content type
@@ -421,6 +487,13 @@ LUA_FUNCTION_DEF (mimepart, is_broken);
* @param {table} params optional parameters
*/
LUA_FUNCTION_DEF (mimepart, headers_foreach);
+/***
+ * @method mime_part:get_parent()
+ * Returns parent part for this part
+ * @return {rspamd_mimepart} parent part or nil
+ */
+LUA_FUNCTION_DEF (mimepart, get_parent);
+
static const struct luaL_reg mimepartlib_m[] = {
LUA_INTERFACE_DEF (mimepart, get_content),
@@ -428,22 +501,30 @@ static const struct luaL_reg mimepartlib_m[] = {
LUA_INTERFACE_DEF (mimepart, get_length),
LUA_INTERFACE_DEF (mimepart, get_type),
LUA_INTERFACE_DEF (mimepart, get_type_full),
+ LUA_INTERFACE_DEF (mimepart, get_detected_type),
+ LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
LUA_INTERFACE_DEF (mimepart, get_cte),
LUA_INTERFACE_DEF (mimepart, get_filename),
+ LUA_INTERFACE_DEF (mimepart, get_boundary),
LUA_INTERFACE_DEF (mimepart, get_header),
LUA_INTERFACE_DEF (mimepart, get_header_raw),
LUA_INTERFACE_DEF (mimepart, get_header_full),
LUA_INTERFACE_DEF (mimepart, get_header_count),
+ LUA_INTERFACE_DEF (mimepart, get_raw_headers),
LUA_INTERFACE_DEF (mimepart, is_image),
LUA_INTERFACE_DEF (mimepart, get_image),
LUA_INTERFACE_DEF (mimepart, is_archive),
LUA_INTERFACE_DEF (mimepart, get_archive),
LUA_INTERFACE_DEF (mimepart, is_multipart),
+ LUA_INTERFACE_DEF (mimepart, is_message),
LUA_INTERFACE_DEF (mimepart, get_children),
+ LUA_INTERFACE_DEF (mimepart, get_parent),
LUA_INTERFACE_DEF (mimepart, is_text),
LUA_INTERFACE_DEF (mimepart, is_broken),
+ LUA_INTERFACE_DEF (mimepart, is_attachment),
LUA_INTERFACE_DEF (mimepart, get_text),
LUA_INTERFACE_DEF (mimepart, get_digest),
+ LUA_INTERFACE_DEF (mimepart, get_id),
LUA_INTERFACE_DEF (mimepart, headers_foreach),
{"__tostring", rspamd_lua_class_tostring},
{NULL, NULL}
@@ -725,7 +806,7 @@ lua_textpart_get_words_count (lua_State *L)
lua_pushinteger (L, 0);
}
else {
- lua_pushinteger (L, part->utf_words->len);
+ lua_pushinteger (L, part->nwords);
}
return 1;
@@ -736,8 +817,7 @@ lua_textpart_get_words (lua_State *L)
{
LUA_TRACE_POINT;
struct rspamd_mime_text_part *part = lua_check_textpart (L);
- rspamd_stat_token_t *w;
- guint i;
+ enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
if (part == NULL) {
return luaL_error (L, "invalid arguments");
@@ -747,14 +827,27 @@ lua_textpart_get_words (lua_State *L)
lua_createtable (L, 0, 0);
}
else {
- lua_createtable (L, part->utf_words->len, 0);
-
- for (i = 0; i < part->utf_words->len; i ++) {
- w = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
+ if (lua_type (L, 2) == LUA_TSTRING) {
+ const gchar *how_str = lua_tostring (L, 2);
- lua_pushlstring (L, w->begin, w->len);
- lua_rawseti (L, -2, i + 1);
+ if (strcmp (how_str, "stem") == 0) {
+ how = RSPAMD_LUA_WORDS_STEM;
+ }
+ else if (strcmp (how_str, "norm") == 0) {
+ how = RSPAMD_LUA_WORDS_NORM;
+ }
+ else if (strcmp (how_str, "raw") == 0) {
+ how = RSPAMD_LUA_WORDS_RAW;
+ }
+ else if (strcmp (how_str, "full") == 0) {
+ how = RSPAMD_LUA_WORDS_FULL;
+ }
+ else {
+ return luaL_error (L, "unknown words type: %s", how_str);
+ }
}
+
+ return rspamd_lua_push_words (L, part->utf_words, how);
}
return 1;
@@ -834,6 +927,28 @@ lua_textpart_get_language (lua_State * L)
}
static gint
+lua_textpart_get_charset (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_text_part *part = lua_check_textpart (L);
+
+ if (part != NULL) {
+ if (part->real_charset != NULL) {
+ lua_pushstring (L, part->real_charset);
+ return 1;
+ }
+ else {
+ lua_pushnil (L);
+ }
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
lua_textpart_get_languages (lua_State * L)
{
LUA_TRACE_POINT;
@@ -878,8 +993,8 @@ struct lua_shingle_data {
#define STORE_TOKEN(i, t) do { \
if ((i) < part->utf_words->len) { \
word = &g_array_index (part->utf_words, rspamd_stat_token_t, (i)); \
- sd->t.begin = word->begin; \
- sd->t.len = word->len; \
+ sd->t.begin = word->stemmed.begin; \
+ sd->t.len = word->stemmed.len; \
} \
}while (0)
@@ -938,7 +1053,8 @@ lua_textpart_get_fuzzy_hashes (lua_State * L)
for (i = 0; i < part->utf_words->len; i ++) {
word = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
- rspamd_cryptobox_hash_update (&st, word->begin, word->len);
+ rspamd_cryptobox_hash_update (&st,
+ word->stemmed.begin, word->stemmed.len);
}
rspamd_cryptobox_hash_final (&st, digest);
@@ -1123,48 +1239,49 @@ lua_mimepart_get_length (lua_State * L)
}
static gint
-lua_mimepart_get_type_common (lua_State * L, gboolean full)
+lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
+ gboolean full)
{
- struct rspamd_mime_part *part = lua_check_mimepart (L);
+
GHashTableIter it;
gpointer k, v;
struct rspamd_content_type_param *param;
- if (part == NULL) {
+ if (ct == NULL) {
lua_pushnil (L);
lua_pushnil (L);
return 2;
}
- lua_pushlstring (L, part->ct->type.begin, part->ct->type.len);
- lua_pushlstring (L, part->ct->subtype.begin, part->ct->subtype.len);
+ lua_pushlstring (L, ct->type.begin, ct->type.len);
+ lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);
if (!full) {
return 2;
}
- lua_createtable (L, 0, 2 + (part->ct->attrs ?
- g_hash_table_size (part->ct->attrs) : 0));
+ lua_createtable (L, 0, 2 + (ct->attrs ?
+ g_hash_table_size (ct->attrs) : 0));
- if (part->ct->charset.len > 0) {
+ if (ct->charset.len > 0) {
lua_pushstring (L, "charset");
- lua_pushlstring (L, part->ct->charset.begin, part->ct->charset.len);
+ lua_pushlstring (L, ct->charset.begin, ct->charset.len);
lua_settable (L, -3);
}
- if (part->ct->boundary.len > 0) {
- lua_pushstring (L, "charset");
- lua_pushlstring (L, part->ct->boundary.begin, part->ct->boundary.len);
+ if (ct->boundary.len > 0) {
+ lua_pushstring (L, "boundary");
+ lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
lua_settable (L, -3);
}
- if (part->ct->attrs) {
- g_hash_table_iter_init (&it, part->ct->attrs);
+ if (ct->attrs) {
+ g_hash_table_iter_init (&it, ct->attrs);
while (g_hash_table_iter_next (&it, &k, &v)) {
param = v;
- if (param->name.len > 0 && param->name.len > 0) {
+ if (param->name.len > 0 && param->value.len > 0) {
/* TODO: think about multiple values here */
lua_pushlstring (L, param->name.begin, param->name.len);
lua_pushlstring (L, param->value.begin, param->value.len);
@@ -1180,14 +1297,52 @@ static gint
lua_mimepart_get_type (lua_State * L)
{
LUA_TRACE_POINT;
- return lua_mimepart_get_type_common (L, FALSE);
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->ct, FALSE);
}
static gint
lua_mimepart_get_type_full (lua_State * L)
{
LUA_TRACE_POINT;
- return lua_mimepart_get_type_common (L, TRUE);
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->ct, TRUE);
+}
+
+static gint
+lua_mimepart_get_detected_type (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
+}
+
+static gint
+lua_mimepart_get_detected_type_full (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
}
static gint
@@ -1223,6 +1378,35 @@ lua_mimepart_get_filename (lua_State * L)
}
static gint
+lua_mimepart_get_boundary (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L), *parent;
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ if (IS_CT_MULTIPART (part->ct)) {
+ lua_pushlstring (L, part->specific.mp->boundary.begin,
+ part->specific.mp->boundary.len);
+ }
+ else {
+ parent = part->parent_part;
+
+ if (!parent || !IS_CT_MULTIPART (parent->ct)) {
+ lua_pushnil (L);
+ }
+ else {
+ lua_pushlstring (L, parent->specific.mp->boundary.begin,
+ parent->specific.mp->boundary.len);
+ }
+ }
+
+ return 1;
+}
+
+static gint
lua_mimepart_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how)
{
struct rspamd_mime_part *part = lua_check_mimepart (L);
@@ -1273,6 +1457,28 @@ lua_mimepart_get_header_count (lua_State * L)
}
static gint
+lua_mimepart_get_raw_headers (lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+ struct rspamd_lua_text *t;
+
+ if (part) {
+ t = lua_newuserdata (L, sizeof (*t));
+ rspamd_lua_setclass (L, "rspamd{text}", -1);
+ t->start = part->raw_headers_str;
+ t->len = part->raw_headers_len;
+ t->flags = 0;
+ }
+ else {
+ return luaL_error (L, "invalid arguments");
+ }
+
+
+ return 1;
+}
+
+static gint
lua_mimepart_is_image (lua_State * L)
{
LUA_TRACE_POINT;
@@ -1318,6 +1524,52 @@ lua_mimepart_is_multipart (lua_State * L)
}
static gint
+lua_mimepart_is_message (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ lua_pushboolean (L, IS_CT_MESSAGE (part->ct) ? true : false);
+
+ return 1;
+}
+
+static gint
+lua_mimepart_is_attachment (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ if (!(part->flags & (RSPAMD_MIME_PART_IMAGE|RSPAMD_MIME_PART_TEXT))) {
+ if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) {
+ lua_pushboolean (L, true);
+ }
+ else {
+ if (part->cd && part->cd->filename.len > 0) {
+ /* We still have filename and it is not an image */
+ lua_pushboolean (L, true);
+ }
+ else {
+ lua_pushboolean (L, false);
+ }
+ }
+ }
+ else {
+ lua_pushboolean (L, false);
+ }
+
+ return 1;
+}
+
+static gint
lua_mimepart_is_text (lua_State * L)
{
LUA_TRACE_POINT;
@@ -1411,13 +1663,13 @@ lua_mimepart_get_children (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- if (!IS_CT_MULTIPART (part->ct) || part->specific.mp.children == NULL) {
+ if (!IS_CT_MULTIPART (part->ct) || part->specific.mp->children == NULL) {
lua_pushnil (L);
}
else {
- lua_createtable (L, part->specific.mp.children->len, 0);
+ lua_createtable (L, part->specific.mp->children->len, 0);
- PTR_ARRAY_FOREACH (part->specific.mp.children, i, cur) {
+ PTR_ARRAY_FOREACH (part->specific.mp->children, i, cur) {
pcur = lua_newuserdata (L, sizeof (*pcur));
*pcur = cur;
rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
@@ -1428,6 +1680,29 @@ lua_mimepart_get_children (lua_State * L)
return 1;
}
+static gint
+lua_mimepart_get_parent (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+ struct rspamd_mime_part **pparent;
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ if (part->parent_part) {
+ pparent = lua_newuserdata (L, sizeof (*pparent));
+ *pparent = part->parent_part;
+ rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
+ }
+ else {
+ lua_pushnil (L);
+ }
+
+ return 1;
+}
+
static gint
lua_mimepart_get_text (lua_State * L)
@@ -1472,6 +1747,21 @@ lua_mimepart_get_digest (lua_State * L)
}
static gint
+lua_mimepart_get_id (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ lua_pushnumber (L, part->id);
+
+ return 1;
+}
+
+static gint
lua_mimepart_headers_foreach (lua_State *L)
{
LUA_TRACE_POINT;