You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lua_parsers.h 3.2KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. /*-
  2. * Copyright 2020 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef RSPAMD_LUA_PARSERS_H
  17. #define RSPAMD_LUA_PARSERS_H
  18. #include "lua_common.h"
  19. /***
  20. * @function parsers.tokenize_text(input[, exceptions])
  21. * Create tokens from a text using optional exceptions list
  22. * @param {text/string} input input data
  23. * @param {table} exceptions, a table of pairs containing <start_pos,length> of exceptions in the input
  24. * @return {table/strings} list of strings representing words in the text
  25. */
  26. LUA_PUBLIC_FUNCTION_DEF (parsers, tokenize_text);
  27. /***
  28. * @function parsers.parse_html(input)
  29. * Parses HTML and returns the according text
  30. * @param {string|text} in input HTML
  31. * @return {rspamd_text} processed text with no HTML tags
  32. */
  33. LUA_PUBLIC_FUNCTION_DEF (parsers, parse_html);
  34. /***
  35. * @function parsers.parse_mail_address(str, [pool])
  36. * Parses email address and returns a table of tables in the following format:
  37. *
  38. * - `raw` - the original value without any processing
  39. * - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov`
  40. * - `addr` - address part of the address
  41. * - `user` - user part (if present) of the address, e.g. `blah`
  42. * - `domain` - domain part (if present), e.g. `foo.com`
  43. * - `flags` - table with following keys set to true if given condition fulfilled:
  44. * - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1.
  45. * - [ip] - domain is IPv4/IPv6 address
  46. * - [braced] - angled `<blah@foo.com>` address
  47. * - [quoted] - quoted user part
  48. * - [empty] - empty address
  49. * - [backslash] - user part contains backslash
  50. * - [8bit] - contains 8bit characters
  51. *
  52. * @param {string} str input string
  53. * @param {rspamd_mempool} pool memory pool to use
  54. * @return {table/tables} parsed list of mail addresses
  55. */
  56. LUA_PUBLIC_FUNCTION_DEF (parsers, parse_mail_address);
  57. /***
  58. * @function parsers.parse_content_type(ct_string, mempool)
  59. * Parses content-type string to a table:
  60. * - `type`
  61. * - `subtype`
  62. * - `charset`
  63. * - `boundary`
  64. * - other attributes
  65. *
  66. * @param {string} ct_string content type as string
  67. * @param {rspamd_mempool} mempool needed to store temporary data (e.g. task pool)
  68. * @return table or nil if cannot parse content type
  69. */
  70. LUA_PUBLIC_FUNCTION_DEF (parsers, parse_content_type);
  71. /***
  72. * @function parsers.parse_smtp_date(str[, local_tz])
  73. * Converts an SMTP date string to unix timestamp
  74. * @param {string} str input string
  75. * @param {boolean} local_tz convert to local tz if `true`
  76. * @return {number} time as unix timestamp (converted to float)
  77. */
  78. LUA_PUBLIC_FUNCTION_DEF (parsers, parse_smtp_date);
  79. #endif //RSPAMD_LUA_PARSERS_H