if (re) {
data = luaL_checkstring (L, 2);
if (data) {
+ if ((g_regex_get_compile_flags (re) & G_REGEX_RAW) == 0) {
+ /* Validate input */
+ if (!g_utf8_validate (data, -1, NULL)) {
+ lua_pushnil (L);
+ return 1;
+ }
+ }
if (g_regex_match_full (re, data, -1, 0, 0, &mi, NULL)) {
matches = g_match_info_fetch_all (mi);
lua_newtable (L);
}
if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
- text_part->is_raw = FALSE;
- text_part->is_utf = TRUE;
- return part_content;
+ if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+ text_part->is_raw = FALSE;
+ text_part->is_utf = TRUE;
+ return part_content;
+ }
+ else {
+ msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id);
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
}
res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
if (res_str == NULL) {
- msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
+ msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem");
text_part->is_raw = TRUE;
return part_content;
}
task->message = message;
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message);
+ /* Save message id for future use */
+ task->message_id = g_mime_message_get_message_id (task->message);
+ if (task->message_id == NULL) {
+ task->message_id = "undef";
+ }
+
task->parser_recursion = 0;
#ifdef GMIME24
g_mime_message_foreach (message, mime_foreach_callback, task);
if (task->queue_id == NULL) {
task->queue_id = "undef";
}
- task->message_id = g_mime_message_get_message_id (task->message);
- if (task->message_id == NULL) {
- task->message_id = "undef";
- }
#ifdef GMIME24
task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message));
while (cur) {
debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
/* Try to match regexp */
+ if (!re->is_raw) {
+ /* Validate input */
+ if (!g_utf8_validate (cur->data, -1, NULL)) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) {
if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
regexp = re->raw_regexp;
}
else {
+ /* This time there is no need to validate anything as conversion succeed only for valid characters */
regexp = re->regexp;
}
/* Select data for regexp */
debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
rh = cur->data;
/* Try to match regexp */
+ if (!re->is_raw) {
+ /* Validate input */
+ if (!g_utf8_validate (rh->value, -1, NULL)) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ }
if (rh->value && g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
if (G_UNLIKELY (re->is_test)) {
msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);