summaryrefslogtreecommitdiffstats
path: root/src/libmime/mime_encoding.h
blob: 5224d33fb39d5333ce5428872d5f98b57dedcc3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*-
 * Copyright 2016 Vsevolod Stakhov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef SRC_LIBMIME_MIME_ENCODING_H_
#define SRC_LIBMIME_MIME_ENCODING_H_

#include "config.h"
#include "mem_pool.h"
#include "fstring.h"
#include <unicode/uchar.h>

#ifdef  __cplusplus
extern "C" {
#endif

struct rspamd_task;
struct rspamd_mime_part;
struct rspamd_mime_text_part;
struct rspamd_charset_converter;

/**
 * Convert charset to a valid iconv charset
 * @param pool pool to store temporary data
 * @param in
 * @return
 */
const gchar *rspamd_mime_detect_charset (const rspamd_ftok_t *in,
										 rspamd_mempool_t *pool);

/**
 * Convert text chunk to utf-8. Input encoding is substituted using
 * `rspamd_mime_detect_charset`.
 * If input encoding is already utf, this function returns input pointer.
 * Memory is allocated from pool if a conversion is needed
 * @param pool
 * @param input
 * @param len
 * @param in_enc
 * @param olen
 * @param err
 * @return
 */
gchar *rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
								 gchar *input, gsize len, const gchar *in_enc,
								 gsize *olen, GError **err);

/**
 * Converts data from `in` to `out`, returns `FALSE` if `enc` is not a valid iconv charset
 * @param in
 * @param out
 * @param enc
 * @return
 */
gboolean rspamd_mime_to_utf8_byte_array (GByteArray *in,
										 GByteArray *out, const gchar *enc);

/**
 * Maybe convert part to utf-8
 * @param task
 * @param text_part
 * @return
 */
void rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
										  struct rspamd_mime_text_part *text_part);

/**
 * Checks utf8 charset and normalize/validate utf8 string
 * @param charset
 * @param in
 * @param len
 * @return
 */
gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
										gchar *in, gsize len, gboolean content_check);

/**
 * Ensure that all characters in string are valid utf8 chars or replace them
 * with '?'
 * @param in
 * @param len
 */
void rspamd_mime_charset_utf_enforce (gchar *in, gsize len);

/**
 * Gets cached converter
 * @param enc
 * @param err
 * @return
 */
struct rspamd_charset_converter *rspamd_mime_get_converter_cached (
		const gchar *enc,
		UErrorCode *err);

/**
 * Performs charset->utf16 conversion
 * @param cnv
 * @param dest
 * @param destCapacity
 * @param src
 * @param srcLength
 * @param pErrorCode
 * @return
 */
gint32
rspamd_converter_to_uchars (struct rspamd_charset_converter *cnv,
							UChar *dest,
							gint32 destCapacity,
							const char *src,
							gint32 srcLength,
							UErrorCode *pErrorCode);

/**
 * Detect charset in text
 * @param in
 * @param inlen
 * @return detected charset name or NULL
 */
const char *rspamd_mime_charset_find_by_content (const gchar *in, gsize inlen);

#ifdef  __cplusplus
}
#endif

#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */