You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_encoding.h 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef SRC_LIBMIME_MIME_ENCODING_H_
  17. #define SRC_LIBMIME_MIME_ENCODING_H_
  18. #include "config.h"
  19. #include "mem_pool.h"
  20. #include "fstring.h"
  21. #include <unicode/uchar.h>
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif
  25. struct rspamd_task;
  26. struct rspamd_mime_part;
  27. struct rspamd_mime_text_part;
  28. struct rspamd_charset_converter;
  29. /**
  30. * Convert charset alias to a canonic charset name
  31. * @param pool pool to store temporary data
  32. * @param in
  33. * @return
  34. */
  35. const gchar *rspamd_mime_detect_charset(const rspamd_ftok_t *in,
  36. rspamd_mempool_t *pool);
  37. /**
  38. * Convert text chunk to utf-8. Input encoding is substituted using
  39. * `rspamd_mime_detect_charset`.
  40. * If input encoding is already utf, this function returns input pointer.
  41. * Memory is allocated from pool if a conversion is needed
  42. * @param pool
  43. * @param input
  44. * @param len
  45. * @param in_enc canon charset
  46. * @param olen
  47. * @param err
  48. * @return
  49. */
  50. gchar *rspamd_mime_text_to_utf8(rspamd_mempool_t *pool,
  51. gchar *input, gsize len, const gchar *in_enc,
  52. gsize *olen, GError **err);
  53. /**
  54. * Converts data from `in` to `out`,
  55. * returns `FALSE` if `enc` is not a valid iconv charset
  56. *
  57. * This function, in fact, copies `in` from `out` replacing out content in
  58. * total.
  59. * @param in
  60. * @param out
  61. * @param enc validated canonical charset name. If NULL, then utf8 check is done only
  62. * @return
  63. */
  64. gboolean rspamd_mime_to_utf8_byte_array(GByteArray *in,
  65. GByteArray *out,
  66. rspamd_mempool_t *pool,
  67. const gchar *enc);
  68. /**
  69. * Maybe convert part to utf-8
  70. * @param task
  71. * @param text_part
  72. * @return
  73. */
  74. void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
  75. struct rspamd_mime_text_part *text_part);
  76. /**
  77. * Checks utf8 charset and normalize/validate utf8 string
  78. * @param charset
  79. * @param in
  80. * @param len
  81. * @return
  82. */
  83. gboolean rspamd_mime_charset_utf_check(rspamd_ftok_t *charset,
  84. gchar *in, gsize len,
  85. gboolean content_check);
  86. /**
  87. * Ensure that all characters in string are valid utf8 chars or replace them
  88. * with '?'
  89. * @param in
  90. * @param len
  91. */
  92. void rspamd_mime_charset_utf_enforce(gchar *in, gsize len);
  93. /**
  94. * Gets cached converter
  95. * @param enc input encoding
  96. * @param pool pool to use for temporary normalisation
  97. * @param is_canon TRUE if normalisation is needed
  98. * @param err output error
  99. * @return converter
  100. */
  101. struct rspamd_charset_converter *rspamd_mime_get_converter_cached(
  102. const gchar *enc,
  103. rspamd_mempool_t *pool,
  104. gboolean is_canon,
  105. UErrorCode *err);
  106. /**
  107. * Performs charset->utf16 conversion
  108. * @param cnv
  109. * @param dest
  110. * @param destCapacity
  111. * @param src
  112. * @param srcLength
  113. * @param pErrorCode
  114. * @return
  115. */
  116. int32_t
  117. rspamd_converter_to_uchars(struct rspamd_charset_converter *cnv,
  118. UChar *dest,
  119. int32_t destCapacity,
  120. const char *src,
  121. int32_t srcLength,
  122. UErrorCode *pErrorCode);
  123. /**
  124. * Detect charset in text
  125. * @param in
  126. * @param inlen
  127. * @return detected charset name or NULL
  128. */
  129. const char *rspamd_mime_charset_find_by_content(const gchar *in, gsize inlen,
  130. bool check_utf8);
  131. #ifdef __cplusplus
  132. }
  133. #endif
  134. #endif /* SRC_LIBMIME_MIME_ENCODING_H_ */