You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rfc2047_parser.rl 1.8KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. %%{
  2. # It actually implements rfc2047 + rfc2231 extension
  3. machine rfc2047_parser;
  4. action Start_Charset {
  5. charset_start = p;
  6. }
  7. action End_Charset {
  8. if (charset_start && p > charset_start) {
  9. charset_end = p;
  10. }
  11. }
  12. action End_Encoding {
  13. if (p > in) {
  14. switch (*(p - 1)) {
  15. case 'B':
  16. case 'b':
  17. encoding = RSPAMD_RFC2047_BASE64;
  18. break;
  19. default:
  20. encoding = RSPAMD_RFC2047_QP;
  21. break;
  22. }
  23. }
  24. }
  25. action Start_Encoded {
  26. encoded_start = p;
  27. }
  28. action End_Encoded {
  29. if (encoded_start && p > encoded_start) {
  30. encoded_end = p;
  31. }
  32. }
  33. primary_tag = alpha{1,8};
  34. subtag = alpha{1,8};
  35. language = primary_tag ( "-" subtag )*;
  36. especials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\"" | "/" | "[" | "]" | "?" | "." | "=" | "*";
  37. token = (graph - especials)+;
  38. charset = token;
  39. encoding = "Q" | "q" | "B" | "b";
  40. encoded_text = (print+ -- ("?="));
  41. encoded_word = "=?" charset >Start_Charset %End_Charset
  42. ("*" language)? "?"
  43. encoding %End_Encoding "?"
  44. encoded_text >Start_Encoded %End_Encoded
  45. "?="?;
  46. main := encoded_word;
  47. }%%
  48. #include "smtp_parsers.h"
  49. #include "mime_headers.h"
  50. %% write data;
  51. gboolean
  52. rspamd_rfc2047_parser (const char *in, gsize len, int *pencoding,
  53. const char **charset, gsize *charset_len,
  54. const char **encoded, gsize *encoded_len)
  55. {
  56. const char *p = in, *pe = in + len,
  57. *encoded_start = NULL, *encoded_end = NULL,
  58. *charset_start = NULL, *charset_end = NULL,
  59. *eof = in + len;
  60. int encoding = RSPAMD_RFC2047_QP, cs = 0;
  61. %% write init;
  62. %% write exec;
  63. if (encoded_end) {
  64. *pencoding = encoding;
  65. *charset = charset_start;
  66. *charset_len = charset_end - charset_start;
  67. *encoded = encoded_start;
  68. *encoded_len = encoded_end - encoded_start;
  69. return TRUE;
  70. }
  71. return FALSE;
  72. }