Procházet zdrojové kódy

[Project] Lua_magic: Add heuristics support

tags/2.0
Vsevolod Stakhov před 4 roky
rodič
revize
603ae65cd4
1 změnil soubory, kde provedl 63 přidání a 43 odebrání
  1. 63
    43
      lualib/lua_magic/init.lua

+ 63
- 43
lualib/lua_magic/init.lua Zobrazit soubor

@@ -131,23 +131,48 @@ local function process_patterns(log_obj)
end
end

local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, res)
local matches = trie:match(input)
local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_obj, res)
local matches = trie:match(chunk)

local last = tlen

local function add_result(match, pattern)
if not res[pattern.ext] then
res[pattern.ext] = 0
local function add_result(weight, ext)
if not res[ext] then
res[ext] = 0
end
if match.weight then
res[pattern.ext] = res[pattern.ext] + match.weight
res[ext] = res[ext] + weight
else
res[pattern.ext] = res[pattern.ext] + 1
res[ext] = res[ext] + 1
end

lua_util.debugm(N, log_obj,'add pattern for %s, weight %s, total weight %s',
pattern.ext, match.weight, res[pattern.ext])
ext, weight, res[ext])
end

local function match_position(pos, expected)
local cmp = function(a, b) return a == b end
if type(expected) == 'table' then
-- Something like {'>', 0}
if expected[1] == '>' then
cmp = function(a, b) return a > b end
elseif expected[1] == '>=' then
cmp = function(a, b) return a >= b end
elseif expected[1] == '<' then
cmp = function(a, b) return a < b end
elseif expected[1] == '<=' then
cmp = function(a, b) return a <= b end
elseif expected[1] == '!=' then
cmp = function(a, b) return a ~= b end
end
expected = expected[2]
end

-- Tail match
if expected < 0 then
expected = last + expected + 1
end
return cmp(pos, expected)
end

for npat,matched_positions in pairs(matches) do
@@ -155,30 +180,6 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re
local pattern = pat_data[3]
local match = pat_data[2]

local function match_position(pos, expected)
local cmp = function(a, b) return a == b end
if type(expected) == 'table' then
-- Something like {'>', 0}
if expected[1] == '>' then
cmp = function(a, b) return a > b end
elseif expected[1] == '>=' then
cmp = function(a, b) return a >= b end
elseif expected[1] == '<' then
cmp = function(a, b) return a < b end
elseif expected[1] == '<=' then
cmp = function(a, b) return a <= b end
elseif expected[1] == '!=' then
cmp = function(a, b) return a ~= b end
end
expected = expected[2]
end

-- Tail match
if expected < 0 then
expected = last + expected + 1
end
return cmp(pos, expected)
end
-- Single position
if match.position then
local position = match.position
@@ -187,13 +188,21 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re
lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
pattern.ext, pos, offset)
if match_position(pos + offset, position) then
add_result(match, pattern)
break
if match.heuristic then
local ext,weight = match.heuristic(input, log_obj)

if ext then
add_result(weight, ext)
break
end
else
add_result(match.weight, pattern.ext)
break
end
end
end
end
-- Match all positions
if match.positions then
elseif match.positions then
-- Match all positions
local all_right = true
for _,position in ipairs(match.positions) do
local matched = false
@@ -210,10 +219,21 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re
end

if all_right then
add_result(match, pattern)
if match.heuristic then
local ext,weight = match.heuristic(input, log_obj)

if ext then
add_result(weight, ext)
break
end
else
add_result(match.weight, pattern.ext)
break
end
end
end
end

end

local function process_detected(res)
@@ -248,13 +268,13 @@ exports.detect = function(input, log_obj)
-- Check tail matches
if inplen > min_tail_offset then
local tail = input:span(inplen - min_tail_offset, min_tail_offset)
match_chunk(tail, inplen, inplen - min_tail_offset,
match_chunk(tail, input, inplen, inplen - min_tail_offset,
compiled_tail_patterns, tail_patterns, log_obj, res)
end

-- Try short match
local head = input:span(1, math.min(max_short_offset, inplen))
match_chunk(head, inplen, 0,
match_chunk(head, input, inplen, 0,
compiled_short_patterns, short_patterns, log_obj, res)

-- Check if we have enough data or go to long patterns
@@ -273,13 +293,13 @@ exports.detect = function(input, log_obj)
input:span(inplen - exports.chunk_size, exports.chunk_size)
local offset1, offset2 = 0, inplen - exports.chunk_size

match_chunk(chunk1, inplen,
match_chunk(chunk1, input, inplen,
offset1, compiled_patterns, processed_patterns, log_obj, res)
match_chunk(chunk2, inplen,
match_chunk(chunk2, input, inplen,
offset2, compiled_patterns, processed_patterns, log_obj, res)
else
-- Input is short enough to match it at all
match_chunk(input, inplen, 0,
match_chunk(input, input, inplen, 0,
compiled_patterns, processed_patterns, log_obj, res)
end
else

Načítá se…
Zrušit
Uložit