summaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-15 17:53:23 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-15 17:53:23 +0000
commit1e672eedb5921441ef7ba300548e72c9b2dd860a (patch)
treef659e6988de03251faab908f861dc1f5cefb45b3 /src/libmime
parenta25853e23a78266265f9e688c41c9b36245a8099 (diff)
downloadrspamd-1e672eedb5921441ef7ba300548e72c9b2dd860a.tar.gz
rspamd-1e672eedb5921441ef7ba300548e72c9b2dd860a.zip
[Feature] Add mime encoding manipulation routines
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/CMakeLists.txt3
-rw-r--r--src/libmime/mime_encoding.c275
-rw-r--r--src/libmime/mime_encoding.h62
-rw-r--r--src/libmime/mime_encoding_list.h1577
4 files changed, 1916 insertions, 1 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
index 0a3b22ecb..78ada58e1 100644
--- a/src/libmime/CMakeLists.txt
+++ b/src/libmime/CMakeLists.txt
@@ -8,6 +8,7 @@ SET(LIBRSPAMDMIMESRC
${CMAKE_CURRENT_SOURCE_DIR}/archives.c
${CMAKE_CURRENT_SOURCE_DIR}/content_type.c
${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c
- ${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c)
+ ${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/mime_encoding.c)
SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
new file mode 100644
index 000000000..f58d62b47
--- /dev/null
+++ b/src/libmime/mime_encoding.c
@@ -0,0 +1,275 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "libutil/regexp.h"
+#include "libserver/task.h"
+#include "message.h"
+#include <iconv.h>
+
+#define UTF8_CHARSET "UTF-8"
+
+#define RSPAMD_CHARSET_FLAG_UTF (1 << 0)
+#define RSPAMD_CHARSET_FLAG_ASCII (1 << 1)
+
+#define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+#define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+
+static rspamd_regexp_t *utf_compatible_re = NULL;
+
+struct rspamd_charset_substitution {
+ const gchar *input;
+ const gchar *canon;
+ gint flags;
+};
+
+#include "mime_encoding_list.h"
+
+static GHashTable *sub_hash = NULL;
+
+
+static GQuark
+rspamd_iconv_error_quark (void)
+{
+ return g_quark_from_static_string ("iconv error");
+}
+
+static void
+rspamd_mime_encoding_substitute_init (void)
+{
+ guint i;
+
+ sub_hash = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+
+ for (i = 0; i < G_N_ELEMENTS (sub); i ++) {
+ g_hash_table_insert (sub_hash, (void *)sub[i].input, (void *)&sub[i]);
+ }
+}
+
+static void
+rspamd_charset_normalize (gchar *in)
+{
+ /*
+ * This is a simple routine to validate input charset
+ * we just check that charset starts with alphanumeric and ends
+ * with alphanumeric
+ */
+ gchar *begin, *end;
+ gboolean changed = FALSE;
+
+ begin = in;
+
+ while (*begin && !g_ascii_isalnum (*begin)) {
+ begin ++;
+ changed = TRUE;
+ }
+
+ end = begin + strlen (begin) - 1;
+
+ while (end > begin && !g_ascii_isalnum (*end)) {
+ end --;
+ changed = TRUE;
+ }
+
+ if (changed) {
+ memmove (in, begin, end - begin + 2);
+ *(end + 1) = '\0';
+ }
+}
+
+const gchar *
+rspamd_mime_detect_charset (const rspamd_ftok_t *in, rspamd_mempool_t *pool)
+{
+ gchar *ret = NULL, *h, *t;
+ struct rspamd_charset_substitution *s;
+
+ if (sub_hash == NULL) {
+ rspamd_mime_encoding_substitute_init ();
+ }
+
+ ret = rspamd_mempool_ftokdup (pool, in);
+ rspamd_charset_normalize (ret);
+
+ if (memchr (in->begin, '-', in->len) != NULL) {
+ /* Try to remove '-' chars from encoding: e.g. CP-100 to CP100 */
+ h = ret;
+ t = ret;
+
+ while (*h != '\0') {
+ if (*h != '-') {
+ *t++ = *h;
+ }
+
+ h ++;
+ }
+
+ *t = '\0';
+ }
+
+ s = g_hash_table_lookup (sub_hash, ret);
+
+ if (s) {
+ return s->canon;
+ }
+
+ return ret;
+}
+
+gchar *
+rspamd_text_to_utf8 (rspamd_mempool_t *pool,
+ gchar *input, gsize len, const gchar *in_enc,
+ gsize *olen, GError **err)
+{
+ gchar *s, *d;
+ gsize outlen;
+ iconv_t ic;
+ rspamd_fstring_t *dst;
+ gsize remain, ret, inremain = len;
+
+ ic = iconv_open (UTF8_CHARSET, in_enc);
+
+ if (ic == (iconv_t)-1) {
+ g_set_error (err, rspamd_iconv_error_quark (), EINVAL,
+ "cannot open iconv for: %s", in_enc);
+
+ return NULL;
+ }
+
+ /* Preallocate for half of characters to be converted */
+ outlen = len + len / 2 + 1;
+ dst = rspamd_fstring_sized_new (outlen);
+ s = input;
+ d = dst->str;
+ remain = outlen - 1;
+
+ while (inremain > 0 && remain > 0) {
+ ret = iconv (ic, &s, &inremain, &d, &remain);
+ dst->len = d - dst->str;
+
+ if (ret == (gsize)-1) {
+ switch (errno) {
+ case E2BIG:
+ /* Enlarge string */
+ if (inremain > 0) {
+ dst = rspamd_fstring_grow (dst, inremain * 2);
+ d = dst->str + dst->len;
+ remain = dst->allocated - dst->len - 1;
+ }
+ break;
+ case EILSEQ:
+ case EINVAL:
+ /* Ignore bad characters */
+ if (remain > 0 && inremain > 0) {
+ *d++ = '?';
+ s++;
+ inremain --;
+ remain --;
+ }
+ break;
+ }
+ }
+ else if (ret == 0) {
+ break;
+ }
+ }
+
+ *d = '\0';
+ *olen = dst->len;
+ iconv_close (ic);
+ rspamd_mempool_add_destructor (pool,
+ (rspamd_mempool_destruct_t)rspamd_fstring_free, dst);
+ msg_info_pool ("converted from %s to UTF-8 inlen: %z, outlen: %z",
+ in_enc, len, dst->len);
+
+ return dst->str;
+}
+
+GByteArray *
+rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
+ struct rspamd_mime_text_part *text_part)
+{
+ GError *err = NULL;
+ gsize write_bytes;
+ const gchar *charset;
+ gchar *res_str;
+ GByteArray *result_array, *part_content = text_part->orig;
+ struct rspamd_mime_part *part = text_part->mime_part;
+
+ if (task->cfg && task->cfg->raw_mode) {
+ SET_PART_RAW (text_part);
+ return part_content;
+ }
+
+ if (utf_compatible_re == NULL) {
+ utf_compatible_re = rspamd_regexp_new (
+ "^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:us)|(?:ISO-8859-1)|"
+ "(?:latin.*)|(?:CSASCII)$",
+ "i", NULL);
+ }
+
+ if (part->ct->charset.len == 0) {
+ SET_PART_RAW (text_part);
+ return part_content;
+ }
+
+ charset = rspamd_mime_detect_charset (&part->ct->charset, task->task_pool);
+
+ if (charset == NULL) {
+ msg_info_task ("<%s>: has invalid charset", task->message_id);
+ SET_PART_RAW (text_part);
+
+ return part_content;
+ }
+
+ if (rspamd_regexp_match (utf_compatible_re, charset, strlen (charset), TRUE)) {
+ if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+ SET_PART_UTF (text_part);
+ return part_content;
+ }
+ else {
+ msg_info_task ("<%s>: contains invalid utf8 characters, assume it as raw",
+ task->message_id);
+ SET_PART_RAW (text_part);
+ return part_content;
+ }
+ }
+ else {
+ res_str = rspamd_text_to_utf8 (task->task_pool, part_content->data,
+ part_content->len,
+ charset,
+ &write_bytes,
+ &err);
+
+ if (res_str == NULL) {
+ msg_warn_task ("<%s>: cannot convert from %s to utf8: %s",
+ task->message_id,
+ charset,
+ err ? err->message : "unknown problem");
+ SET_PART_RAW (text_part);
+ g_error_free (err);
+
+ return part_content;
+ }
+ }
+
+ result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray));
+ result_array->data = res_str;
+ result_array->len = write_bytes;
+ SET_PART_UTF (text_part);
+
+ return result_array;
+}
diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h
new file mode 100644
index 000000000..9c0975406
--- /dev/null
+++ b/src/libmime/mime_encoding.h
@@ -0,0 +1,62 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_ENCODING_H_
+#define SRC_LIBMIME_MIME_ENCODING_H_
+
+#include "config.h"
+#include "mem_pool.h"
+
+struct rspamd_task;
+struct rspamd_mime_part;
+struct rspamd_mime_text_part;
+
+/**
+ * Convert charset to a valid iconv charset
+ * @param pool pool to store temporary data
+ * @param in
+ * @return
+ */
+const gchar * rspamd_mime_detect_charset (rspamd_mempool_t *pool,
+ const rspamd_ftok_t *in);
+
+/**
+ * Convert text chunk to utf-8. Input encoding is substituted using
+ * `rspamd_mime_detect_charset`.
+ * If input encoding is already utf, this function returns input pointer.
+ * Memory is allocated from pool if a conversion is needed
+ * @param pool
+ * @param input
+ * @param len
+ * @param in_enc
+ * @param olen
+ * @param err
+ * @return
+ */
+gchar * rspamd_mime_text_to_utf8 (rspamd_mempool_t *pool,
+ gchar *input, gsize len, const gchar *in_enc,
+ gsize *olen, GError **err);
+
+/**
+ * Maybe convert part to utf-8
+ * @param task
+ * @param text_part
+ * @return
+ */
+GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
+ struct rspamd_mime_text_part *text_part);
+
+
+#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */
diff --git a/src/libmime/mime_encoding_list.h b/src/libmime/mime_encoding_list.h
new file mode 100644
index 000000000..f03f008df
--- /dev/null
+++ b/src/libmime/mime_encoding_list.h
@@ -0,0 +1,1577 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_ENCODING_LIST_H_
+#define SRC_LIBMIME_MIME_ENCODING_LIST_H_
+
+static const struct rspamd_charset_substitution sub[] = {
+ {
+ .input = "iso-646-us",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "ansi_x3.4-1968",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "iso-ir-6",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "iso_646.irv:1991",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "ascii",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "iso646-us",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "us",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "ibm367",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "cp367",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "csascii",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "ascii7",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "default",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "646",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "iso_646.irv:1983",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "iso969-us",
+ .canon = "ansi_x3.4-1986",
+ .flags = RSPAMD_CHARSET_FLAG_ASCII,
+ },
+ {
+ .input = "tw-big5",
+ .canon = "big5",
+ .flags = 0,
+ },
+ {
+ .input = "csbig5",
+ .canon = "big5",
+ .flags = 0,
+ },
+ {
+ .input = "hkscs-big5",
+ .canon = "big5-hkscs",
+ .flags = 0,
+ },
+ {
+ .input = "big5hk",
+ .canon = "big5-hkscs",
+ .flags = 0,
+ },
+ {
+ .input = "big5-hkscs:unicode",
+ .canon = "big5-hkscs",
+ .flags = 0,
+ },
+ {
+ .input = "extended_unix_code_packed_format_for_japanese",
+ .canon = "euc-jp",
+ .flags = 0,
+ },
+ {
+ .input = "cseucpkdfmtjapanese",
+ .canon = "euc-jp",
+ .flags = 0,
+ },
+ {
+ .input = "x-eucjp",
+ .canon = "euc-jp",
+ .flags = 0,
+ },
+ {
+ .input = "x-euc-jp",
+ .canon = "euc-jp",
+ .flags = 0,
+ },
+ {
+ .input = "unicode-1-1-utf-8",
+ .canon = "utf-8",
+ .flags = RSPAMD_CHARSET_FLAG_UTF,
+ },
+ {
+ .input = "cseuckr",
+ .canon = "euc-kr",
+ .flags = 0,
+ },
+ {
+ .input = "5601",
+ .canon = "euc-kr",
+ .flags = 0,
+ },
+ {
+ .input = "ksc-5601",
+ .canon = "euc-kr",
+ .flags = 0,
+ },
+ {
+ .input = "ksc-5601-1987",
+ .canon = "euc-kr",
+ .flags = 0,
+ },
+ {
+ .input = "ksc-5601_1987",
+ .canon = "euc-kr",
+ .flags = 0,
+ },
+ {
+ .input = "ksc5601",
+ .canon = "euc-kr",
+ .flags = 0,
+ },
+ {
+ .input = "cns11643",
+ .canon = "euc-tw",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-euctw",
+ .canon = "euc-tw",
+ .flags = 0,
+ },
+ {
+ .input = "gb-18030",
+ .canon = "gb18030",
+ .flags = 0,
+ },
+ {
+ .input = "ibm1392",
+ .canon = "gb18030",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-1392",
+ .canon = "gb18030",
+ .flags = 0,
+ },
+ {
+ .input = "gb18030-2000",
+ .canon = "gb18030",
+ .flags = 0,
+ },
+ {
+ .input = "gb-2312",
+ .canon = "gb2312",
+ .flags = 0,
+ },
+ {
+ .input = "csgb2312",
+ .canon = "gb2312",
+ .flags = 0,
+ },
+ {
+ .input = "euc_cn",
+ .canon = "gb2312",
+ .flags = 0,
+ },
+ {
+ .input = "euccn",
+ .canon = "gb2312",
+ .flags = 0,
+ },
+ {
+ .input = "euc-cn",
+ .canon = "gb2312",
+ .flags = 0,
+ },
+ {
+ .input = "gb-k",
+ .canon = "gbk",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-1:1987",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-100",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "latin1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "l1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "ibm819",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "cp819",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "819",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "cp819",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "8859-1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_1",
+ .canon = "iso-8859-1",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-2:1987",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-101",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "latin2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "l2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "912",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "cp912",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-912",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "ibm912",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "8859-2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_2",
+ .canon = "iso-8859-2",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-3:1988",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-109",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "latin3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "l3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "913",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "cp913",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-913",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "ibm913",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "8859-3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_3",
+ .canon = "iso-8859-3",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-4:1988",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-110",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "latin4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "l4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "914",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "cp914",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-914",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "ibm914",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "8859-4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_4",
+ .canon = "iso-8859-4",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-5:1988",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-144",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-5",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "cyrillic",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatincyrillic",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "915",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "cp915",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-915",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "ibm915",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-5",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "8859-5",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_5",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_5",
+ .canon = "iso-8859-5",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-6:1987",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-127",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-6",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "ecma-114",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "asmo-708",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "arabic",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatinarabic",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "1089",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "cp1089",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-1089",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "ibm1089",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-6",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "8859-6",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_6",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_6",
+ .canon = "iso-8859-6",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-7:1987",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-126",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-7",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "elot_928",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "ecma-118",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "greek",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "greek8",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatingreek",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "813",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "cp813",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-813",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "ibm813",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-7",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "8859-7",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_7",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_7",
+ .canon = "iso-8859-7",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-8:1988",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-138",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-8",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "hebrew",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatinhebrew",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "916",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "cp916",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-916",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "ibm916",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-8",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "8859-8",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_8",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_8",
+ .canon = "iso-8859-8",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-9:1989",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-148",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-9",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "latin5",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "l5",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin5",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "920",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "cp920",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-920",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "ibm920",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-9",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "8859-9",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_9",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_9",
+ .canon = "iso-8859-9",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-13",
+ .canon = "iso-8859-13",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-13",
+ .canon = "iso-8859-13",
+ .flags = 0,
+ },
+ {
+ .input = "8859-13",
+ .canon = "iso-8859-13",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859_13",
+ .canon = "iso-8859-13",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859_13",
+ .canon = "iso-8859-13",
+ .flags = 0,
+ },
+ {
+ .input = "iso-ir-199",
+ .canon = "iso-8859-14",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-14:1998",
+ .canon = "iso-8859-14",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-14",
+ .canon = "iso-8859-14",
+ .flags = 0,
+ },
+ {
+ .input = "latin8",
+ .canon = "iso-8859-14",
+ .flags = 0,
+ },
+ {
+ .input = "iso-celtic",
+ .canon = "iso-8859-14",
+ .flags = 0,
+ },
+ {
+ .input = "l8",
+ .canon = "iso-8859-14",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin9",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "csisolatin0",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "latin9",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "latin0",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "923",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "cp923",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-923",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "ibm923",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "iso8859-15",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-15",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "8859-15",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "iso_8859-15_fdis",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "l9",
+ .canon = "iso-8859-15",
+ .flags = 0,
+ },
+ {
+ .input = "koi-8-r",
+ .canon = "koi8-r",
+ .flags = 0,
+ },
+ {
+ .input = "cskoi8r",
+ .canon = "koi8-r",
+ .flags = 0,
+ },
+ {
+ .input = "koi8",
+ .canon = "koi8-r",
+ .flags = 0,
+ },
+ {
+ .input = "koi-8-u",
+ .canon = "koi8-u",
+ .flags = 0,
+ },
+ {
+ .input = "koi-8-t",
+ .canon = "koi8-t",
+ .flags = 0,
+ },
+ {
+ .input = "shiftjis",
+ .canon = "shift_jis",
+ .flags = 0,
+ },
+ {
+ .input = "ms_kanji",
+ .canon = "shift_jis",
+ .flags = 0,
+ },
+ {
+ .input = "csshiftjis",
+ .canon = "shift_jis",
+ .flags = 0,
+ },
+ {
+ .input = "cp-437",
+ .canon = "ibm437",
+ .flags = 0,
+ },
+ {
+ .input = "cp437",
+ .canon = "ibm437",
+ .flags = 0,
+ },
+ {
+ .input = "437",
+ .canon = "ibm437",
+ .flags = 0,
+ },
+ {
+ .input = "cspc8codepage437437",
+ .canon = "ibm437",
+ .flags = 0,
+ },
+ {
+ .input = "cspc8codepage437",
+ .canon = "ibm437",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-437",
+ .canon = "ibm437",
+ .flags = 0,
+ },
+ {
+ .input = "cp-850",
+ .canon = "ibm850",
+ .flags = 0,
+ },
+ {
+ .input = "cp850",
+ .canon = "ibm850",
+ .flags = 0,
+ },
+ {
+ .input = "850",
+ .canon = "ibm850",
+ .flags = 0,
+ },
+ {
+ .input = "cspc850multilingual850",
+ .canon = "ibm850",
+ .flags = 0,
+ },
+ {
+ .input = "cspc850multilingual",
+ .canon = "ibm850",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-850",
+ .canon = "ibm850",
+ .flags = 0,
+ },
+ {
+ .input = "cp-851",
+ .canon = "ibm851",
+ .flags = 0,
+ },
+ {
+ .input = "cp851",
+ .canon = "ibm851",
+ .flags = 0,
+ },
+ {
+ .input = "851",
+ .canon = "ibm851",
+ .flags = 0,
+ },
+ {
+ .input = "csibm851",
+ .canon = "ibm851",
+ .flags = 0,
+ },
+ {
+ .input = "cp-852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "cp852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "cspcp852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "cspcp852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-852",
+ .canon = "ibm852",
+ .flags = 0,
+ },
+ {
+ .input = "cp-855",
+ .canon = "ibm855",
+ .flags = 0,
+ },
+ {
+ .input = "cp855",
+ .canon = "ibm855",
+ .flags = 0,
+ },
+ {
+ .input = "855",
+ .canon = "ibm855",
+ .flags = 0,
+ },
+ {
+ .input = "csibm855",
+ .canon = "ibm855",
+ .flags = 0,
+ },
+ {
+ .input = "cspcp855",
+ .canon = "ibm855",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-855",
+ .canon = "ibm855",
+ .flags = 0,
+ },
+ {
+ .input = "cp-857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "cp857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "csibm857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "csibm857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-857",
+ .canon = "ibm857",
+ .flags = 0,
+ },
+ {
+ .input = "cp-860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "cp860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "csibm860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "csibm860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-860",
+ .canon = "ibm860",
+ .flags = 0,
+ },
+ {
+ .input = "cp-861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "cp861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "cp-is",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "csibm861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "cp-is",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "csibm861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-861",
+ .canon = "ibm861",
+ .flags = 0,
+ },
+ {
+ .input = "cp-862",
+ .canon = "ibm862",
+ .flags = 0,
+ },
+ {
+ .input = "cp862",
+ .canon = "ibm862",
+ .flags = 0,
+ },
+ {
+ .input = "862",
+ .canon = "ibm862",
+ .flags = 0,
+ },
+ {
+ .input = "cspc862latinhebrew862",
+ .canon = "ibm862",
+ .flags = 0,
+ },
+ {
+ .input = "cspc862latinhebrew",
+ .canon = "ibm862",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-862",
+ .canon = "ibm862",
+ .flags = 0,
+ },
+ {
+ .input = "cp-863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "cp863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "csibm863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "csibm863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-863",
+ .canon = "ibm863",
+ .flags = 0,
+ },
+ {
+ .input = "cp-864",
+ .canon = "ibm864",
+ .flags = 0,
+ },
+ {
+ .input = "cp864",
+ .canon = "ibm864",
+ .flags = 0,
+ },
+ {
+ .input = "csibm864",
+ .canon = "ibm864",
+ .flags = 0,
+ },
+ {
+ .input = "csibm864",
+ .canon = "ibm864",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-864",
+ .canon = "ibm864",
+ .flags = 0,
+ },
+ {
+ .input = "cp-865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "cp865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "csibm865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "csibm865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-865",
+ .canon = "ibm865",
+ .flags = 0,
+ },
+ {
+ .input = "cp-866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "cp866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "csibm866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "csibm866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-866",
+ .canon = "ibm866",
+ .flags = 0,
+ },
+ {
+ .input = "cp-868",
+ .canon = "ibm868",
+ .flags = 0,
+ },
+ {
+ .input = "cp868",
+ .canon = "ibm868",
+ .flags = 0,
+ },
+ {
+ .input = "cp-ar",
+ .canon = "ibm868",
+ .flags = 0,
+ },
+ {
+ .input = "csibm868",
+ .canon = "ibm868",
+ .flags = 0,
+ },
+ {
+ .input = "ibm-868",
+ .canon = "ibm868",
+ .flags = 0,
+ },
+ {
+ .input = "cp-869",
+ .canon = "ibm869",
+ .flags = 0,
+ },
+ {
+ .input = "cp869",
+ .canon = "ibm869",
+ .flags = 0,
+ },
+ {
+ .input = "869",
+ .canon = "ibm869",
+ .flags = 0,
+ },
+ {
+ .input = "cp-gr",
+ .canon = "ibm869",
+ .flags = 0,
+ },
+ {
+ .input = "csibm869",
+ .canon = "ibm869",
+ .flags = 0,
+ },
+ {
+ .input = "cp-891",
+ .canon = "ibm891",
+ .flags = 0,
+ },
+ {
+ .input = "cp891",
+ .canon = "ibm891",
+ .flags = 0,
+ },
+ {
+ .input = "csibm891",
+ .canon = "ibm891",
+ .flags = 0,
+ },
+ {
+ .input = "cp-903",
+ .canon = "ibm903",
+ .flags = 0,
+ },
+ {
+ .input = "cp903",
+ .canon = "ibm903",
+ .flags = 0,
+ },
+ {
+ .input = "csibm903",
+ .canon = "ibm903",
+ .flags = 0,
+ },
+ {
+ .input = "cp-904",
+ .canon = "ibm904",
+ .flags = 0,
+ },
+ {
+ .input = "cp904",
+ .canon = "ibm904",
+ .flags = 0,
+ },
+ {
+ .input = "904",
+ .canon = "ibm904",
+ .flags = 0,
+ },
+ {
+ .input = "csibm904",
+ .canon = "ibm904",
+ .flags = 0,
+ },
+ {
+ .input = "cp-1251",
+ .canon = "cp1251",
+ .flags = 0,
+ },
+ {
+ .input = "windows-1251",
+ .canon = "cp1251",
+ .flags = 0,
+ },
+ {
+ .input = "cp-1255",
+ .canon = "cp1255",
+ .flags = 0,
+ },
+ {
+ .input = "windows-1255",
+ .canon = "cp1255",
+ .flags = 0,
+ },
+ {
+ .input = "tis620.2533",
+ .canon = "tis-620",
+ .flags = 0,
+ },
+};
+
+#endif /* SRC_LIBMIME_MIME_ENCODING_LIST_H_ */