You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rspamd.classifiers.lua 3.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. --[[
  2. Copyright (c) 2011-2015, Vsevolod Stakhov <vsevolod@highsecure.ru>
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ]]--
  13. -- Detect language of message and selects appropriate statfiles for it
  14. local fun = require "fun"
  15. -- Common labels for specific statfiles
  16. local many_recipients_label = 'many recipients'
  17. local undisclosed_recipients_label = 'undisclosed recipients'
  18. local list_label = 'maillist'
  19. local long_subject_label = 'long subject'
  20. -- Get specific statfiles set based on message rules
  21. local function get_specific_statfiles(classifier, task)
  22. local spec_st = {}
  23. -- More 5 recipients
  24. local st_many = classifier:get_statfile_by_label(many_recipients_label)
  25. if st_many then
  26. local rcpt = task:get_recipients(2)
  27. if rcpt and #rcpt > 5 then
  28. fun.each(function(v) table.insert(spec_st,v) end, st_many)
  29. end
  30. end
  31. -- Undisclosed
  32. local st_undisc = classifier:get_statfile_by_label(undisclosed_recipients_label)
  33. if st_undisc then
  34. local rcpt = task:get_recipients(2)
  35. if rcpt and #rcpt == 0 then
  36. fun.each(function(v) table.insert(spec_st,v) end, st_undisc)
  37. end
  38. end
  39. -- Maillist
  40. local st_maillist = classifier:get_statfile_by_label(list_label)
  41. if st_maillist then
  42. local unsub_header = task:get_header_raw('List-Unsubscribe')
  43. if unsub_header then
  44. fun.each(function(v) table.insert(spec_st,v) end, st_maillist)
  45. end
  46. end
  47. -- Long subject
  48. local st_longsubj = classifier:get_statfile_by_label(long_subject_label)
  49. if st_longsubj then
  50. local subj = task:get_header_raw('Subject')
  51. if subj and string.len(subj) > 150 then
  52. fun.each(function(v) table.insert(spec_st,v) end, st_longsubj)
  53. end
  54. end
  55. if #spec_st > 1 then
  56. return spec_st
  57. else
  58. return nil
  59. end
  60. end
  61. classifiers['bayes'] = function(classifier, task, is_learn)
  62. -- Subfunction for detection of message's language
  63. local detect_language = function()
  64. local parts = task:get_text_parts()
  65. for _,p in ipairs(parts) do
  66. local l = p:get_language()
  67. if l then
  68. return l
  69. end
  70. end
  71. return nil
  72. end
  73. -- Main procedure
  74. local selected = {}
  75. local spec_st = get_specific_statfiles(classifier, task)
  76. if spec_st then
  77. if is_learn then
  78. return spec_st
  79. else
  80. -- Merge tables
  81. fun.each(function(v) table.insert(selected,v) end, spec_st)
  82. end
  83. end
  84. -- Detect statfile by language
  85. local language = detect_language()
  86. if language then
  87. -- Find statfiles with specified language
  88. for _,st in ipairs(classifier:get_statfiles()) do
  89. -- Skip labeled statfiles
  90. if not st:get_label() then
  91. local st_l = st:get_param('language')
  92. if st_l and st_l == language then
  93. -- Insert statfile with specified language
  94. table.insert(selected, st)
  95. end
  96. end
  97. end
  98. if #selected > 1 then
  99. return selected
  100. end
  101. end
  102. -- Language not detected or specific language statfiles have not been found
  103. for _,st in ipairs(classifier:get_statfiles()) do
  104. -- Skip labeled statfiles
  105. if not st:get_label() then
  106. local st_l = st:get_param('language')
  107. -- Insert only statfiles without language
  108. if not st_l then
  109. table.insert(selected, st)
  110. end
  111. end
  112. end
  113. if #selected > 1 then
  114. return selected
  115. end
  116. return nil
  117. end