aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime/mime_encoding.h
blob: 1a61339ca22328567148f2ba7980e7cc661323d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*-
 * Copyright 2016 Vsevolod Stakhov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef SRC_LIBMIME_MIME_ENCODING_H_
#define SRC_LIBMIME_MIME_ENCODING_H_

#include "config.h"
#include "mem_pool.h"
#include "fstring.h"
#include <unicode/uchar.h>

struct rspamd_task;
struct rspamd_mime_part;
struct rspamd_mime_text_part;
struct rspamd_charset_converter;

/**
 * Convert charset to a valid iconv charset
 * @param pool pool to store temporary data
 * @param in
 * @return
 */
const gchar * rspamd_mime_detect_charset (const rspamd_ftok_t *in,
		rspamd_mempool_t *pool);

/**
 * Convert text chunk to utf-8. Input encoding is substituted using
 * `rspamd_mime_detect_charset`.
 * If input encoding is already utf, this function returns input pointer.
 * Memory is allocated from pool if a conversion is needed
 * @param pool
 * @param input
 * @param len
 * @param in_enc
 * @param olen
 * @param err
 * @return
 */
gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
		gchar *input, gsize len, const gchar *in_enc,
		gsize *olen, GError **err);

/**
 * Converts data from `in` to `out`, returns `FALSE` if `enc` is not a valid iconv charset
 * @param in
 * @param out
 * @param enc
 * @return
 */
gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in,
		GByteArray *out, const gchar *enc);

/**
 * Maybe convert part to utf-8
 * @param task
 * @param text_part
 * @return
 */
void rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
		struct rspamd_mime_text_part *text_part);

/**
 * Checks utf8 charset and normalize/validate utf8 string
 * @param charset
 * @param in
 * @param len
 * @return
 */
gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
		gchar *in, gsize len, gboolean content_check);

/**
 * Ensure that all characters in string are valid utf8 chars or replace them
 * with '?'
 * @param in
 * @param len
 */
void rspamd_mime_charset_utf_enforce (gchar *in, gsize len);

/**
 * Gets cached converter
 * @param enc
 * @param err
 * @return
 */
struct rspamd_charset_converter *rspamd_mime_get_converter_cached (
		const gchar *enc,
		UErrorCode *err);

/**
 * Performs charset->utf16 conversion
 * @param cnv
 * @param dest
 * @param destCapacity
 * @param src
 * @param srcLength
 * @param pErrorCode
 * @return
 */
gint32
rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv,
							UChar *dest,
							gint32 destCapacity,
							const char *src,
							gint32 srcLength,
							UErrorCode *pErrorCode);

/**
 * Detect charset in text
 * @param in
 * @param inlen
 * @return detected charset name or NULL
 */
const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen);


#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */