1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/*-
* Copyright 2016 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_LIBMIME_MIME_ENCODING_H_
#define SRC_LIBMIME_MIME_ENCODING_H_
#include "config.h"
#include "mem_pool.h"
#include "fstring.h"
#include <unicode/uchar.h>
struct rspamd_task;
struct rspamd_mime_part;
struct rspamd_mime_text_part;
struct rspamd_charset_converter;
/**
* Convert charset to a valid iconv charset
* @param pool pool to store temporary data
* @param in
* @return
*/
const gchar * rspamd_mime_detect_charset (const rspamd_ftok_t *in,
rspamd_mempool_t *pool);
/**
* Convert text chunk to utf-8. Input encoding is substituted using
* `rspamd_mime_detect_charset`.
* If input encoding is already utf, this function returns input pointer.
* Memory is allocated from pool if a conversion is needed
* @param pool
* @param input
* @param len
* @param in_enc
* @param olen
* @param err
* @return
*/
gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
gchar *input, gsize len, const gchar *in_enc,
gsize *olen, GError **err);
/**
* Converts data from `in` to `out`, returns `FALSE` if `enc` is not a valid iconv charset
* @param in
* @param out
* @param enc
* @return
*/
gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in,
GByteArray *out, const gchar *enc);
/**
* Maybe convert part to utf-8
* @param task
* @param text_part
* @return
*/
void rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
struct rspamd_mime_text_part *text_part);
/**
* Checks utf8 charset and normalize/validate utf8 string
* @param charset
* @param in
* @param len
* @return
*/
gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
gchar *in, gsize len, gboolean content_check);
/**
* Ensure that all characters in string are valid utf8 chars or replace them
* with '?'
* @param in
* @param len
*/
void rspamd_mime_charset_utf_enforce (gchar *in, gsize len);
/**
* Gets cached converter
* @param enc
* @param err
* @return
*/
struct rspamd_charset_converter *rspamd_mime_get_converter_cached (
const gchar *enc,
UErrorCode *err);
/**
* Performs charset->utf16 conversion
* @param cnv
* @param dest
* @param destCapacity
* @param src
* @param srcLength
* @param pErrorCode
* @return
*/
gint32
rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv,
UChar *dest,
gint32 destCapacity,
const char *src,
gint32 srcLength,
UErrorCode *pErrorCode);
/**
* Detect charset in text
* @param in
* @param inlen
* @return detected charset name or NULL
*/
const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen);
#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */
|