aboutsummaryrefslogtreecommitdiffstats
path: root/lualib/lua_magic/init.lua
blob: e8629eedad96652fb67a0c9527e5a295c3a5c33e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
--[[
Copyright (c) 2019, Vsevolod Stakhov <vsevolod@highsecure.ru>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--

--[[[
-- @module lua_magic
-- This module contains file types detection logic
--]]

local patterns = require "lua_magic/patterns"
local types = require "lua_magic/types"
local fun = require "fun"
local lua_util = require "lua_util"

local rspamd_text = require "rspamd_text"
local rspamd_trie = require "rspamd_trie"

local N = "lua_magic"
local exports = {}
-- trie object
local compiled_patterns
-- {<str>, <match_object>, <pattern_object>} indexed by pattern number
local processed_patterns = {}

local function process_patterns()
  if not compiled_patterns then
    for _,pattern in ipairs(patterns) do
      for _,match in ipairs(pattern.matches) do
        if match.string then
          processed_patterns[#processed_patterns + 1] = {
            match.string, match, pattern
          }
        end
      end
    end

    compiled_patterns = rspamd_trie.create(fun.totable(
        fun.map(function(t) return t[1] end, processed_patterns)),
        rspamd_trie.flags.re
    )

    lua_util.debugm(N, rspamd_config, 'compiled %s patterns',
        #processed_patterns)
  end
end

local function match_chunk(input, offset, log_obj, res)
  local matches = compiled_patterns:match(input)

  if not log_obj then log_obj = rspamd_config end

  local function add_result(match, pattern)
    if not res[pattern.ext] then
      res[pattern.ext] = 0
    end
    if match.weight then
      res[pattern.ext] = res[pattern.ext] + match.weight
    else
      res[pattern.ext] = res[pattern.ext] + 1
    end

    lua_util.debugm(N, log_obj,'add pattern for %s, weight %s, total weight %s',
        pattern.ext, match.weight, res[pattern.ext])
  end

  for npat,matched_positions in pairs(matches) do
    local pat_data = processed_patterns[npat]
    local pattern = pat_data[3]
    local match = pat_data[2]

    local function match_position(pos, expected)
      local cmp = function(a, b) return a == b end
      if type(expected) == 'table' then
        -- Something like {'>', 0}
        if expected[1] == '>' then
          cmp = function(a, b) return a > b end
        elseif expected[1] == '>=' then
          cmp = function(a, b) return a >= b end
        elseif expected[1] == '<' then
          cmp = function(a, b) return a < b end
        elseif expected[1] == '<=' then
          cmp = function(a, b) return a <= b end
        elseif expected[1] == '!=' then
          cmp = function(a, b) return a ~= b end
        end
        expected = expected[2]
      end

      return cmp(pos, expected)
    end
    -- Single position
    if match.position then
      local position = match.position

      for _,pos in ipairs(matched_positions) do
        if match_position(pos + offset, position) then
          add_result(match, pattern)
        end
      end
    end
    -- Match all positions
    if match.positions then
      for _,position in ipairs(match.positions) do
        for _,pos in ipairs(matched_positions) do
          if match_position(pos, position) then
            add_result(match, pattern)
          end
        end
      end
    end
  end
end
exports.detect = function(input, log_obj)
  process_patterns()
  local res = {}

  if type(input) == 'string' then
    -- Convert to rspamd_text
    input = rspamd_text.fromstring(input)
  end

  if type(input) == 'userdata' and #input > exports.chunk_size * 3 then
    -- Split by chunks
    local chunk1, chunk2, chunk3 =
    input:span(1, exports.chunk_size),
    input:span(exports.chunk_size, exports.chunk_size),
    input:span(#input - exports.chunk_size, exports.chunk_size)
    local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size

    match_chunk(chunk1, offset1, log_obj, res)
    match_chunk(chunk2, offset2, log_obj, res)
    match_chunk(chunk3, offset3, log_obj, res)
  else
    match_chunk(input, 0, log_obj, res)
  end

  local extensions = lua_util.keys(res)

  if #extensions > 0 then
    table.sort(extensions, function(ex1, ex2)
      return res[ex1] > res[ex2]
    end)

    return extensions[1],types[extensions[1]]
  end

  -- Nothing found
  return nil
end

-- This parameter specifies how many bytes are checked in the input
-- Rspamd checks 2 chunks at start and 1 chunk at the end
exports.chunk_size = 16384

return exports