aboutsummaryrefslogtreecommitdiffstats
path: root/modules/indexer/code/bleve/token/path/path_test.go
blob: cc52021ef7f1f92edd6af252c6a38094eff33012 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package path

import (
	"fmt"
	"testing"

	"github.com/blevesearch/bleve/v2/analysis"
	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
	"github.com/stretchr/testify/assert"
)

type Scenario struct {
	Input  string
	Tokens []string
}

func TestTokenFilter(t *testing.T) {
	scenarios := []struct {
		Input string
		Terms []string
	}{
		{
			Input: "Dockerfile",
			Terms: []string{"Dockerfile"},
		},
		{
			Input: "Dockerfile.rootless",
			Terms: []string{"Dockerfile.rootless"},
		},
		{
			Input: "a/b/c/Dockerfile.rootless",
			Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"},
		},
		{
			Input: "",
			Terms: []string{},
		},
	}

	for _, scenario := range scenarios {
		t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) {
			terms := extractTerms(scenario.Input)

			assert.Len(t, terms, len(scenario.Terms))

			for _, term := range terms {
				assert.Contains(t, scenario.Terms, term)
			}
		})
	}
}

func extractTerms(input string) []string {
	tokens := tokenize(input)
	filteredTokens := filter(tokens)
	terms := make([]string, 0, len(filteredTokens))

	for _, token := range filteredTokens {
		terms = append(terms, string(token.Term))
	}

	return terms
}

func filter(input analysis.TokenStream) analysis.TokenStream {
	filter := NewTokenFilter()
	return filter.Filter(input)
}

func tokenize(input string) analysis.TokenStream {
	tokenizer := unicode.NewUnicodeTokenizer()
	return tokenizer.Tokenize([]byte(input))
}