aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2022-10-08 19:22:44 +0800
committerGitHub <noreply@github.com>2022-10-08 12:22:44 +0100
commit768e16dad19e98e5a6090bd09a7662da6079eae0 (patch)
treee68861e02b9f99bd0e78fa6f44641512a2f547c7
parent7bb12d7efa9fd01e15ddc14b3b8e40df4a4abd42 (diff)
downloadgitea-768e16dad19e98e5a6090bd09a7662da6079eae0.tar.gz
gitea-768e16dad19e98e5a6090bd09a7662da6079eae0.zip
Use weighted algorithm for string matching when finding files in repo (#21370)
This PR is for: * https://github.com/go-gitea/gitea/issues/20231 Now, when a user searches `word`, they always see `/{word}.txt` before `/{w}e-g{o}t-{r}esult.{d}at` Demo: When searching "a", "a.ext" comes first. Then when searching "at", the longer matched "template" comes first. <details> ![image](https://user-images.githubusercontent.com/2114189/194588738-3644d891-956f-40e4-b79b-b97d34265456.png) ![image](https://user-images.githubusercontent.com/2114189/194588797-9b124670-4e1e-4510-a170-780295ed89b8.png) </details> This PR also makes the frontend tests could import feature JS files by introducing `jestSetup.js` Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: silverwind <me@silverwind.io>
-rw-r--r--jest.config.js5
-rw-r--r--web_src/js/features/repo-findfile.js87
-rw-r--r--web_src/js/features/repo-findfile.test.js34
-rw-r--r--web_src/js/testUtils/jestSetup.js5
-rw-r--r--web_src/js/utils.js30
-rw-r--r--web_src/js/utils.test.js18
6 files changed, 113 insertions, 66 deletions
diff --git a/jest.config.js b/jest.config.js
index 34d47acc43..2767c1b7ae 100644
--- a/jest.config.js
+++ b/jest.config.js
@@ -1,3 +1,4 @@
+// to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference
export default {
rootDir: 'web_src',
setupFilesAfterEnv: ['jest-extended/all'],
@@ -7,6 +8,8 @@ export default {
transform: {
'\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js',
},
+ setupFiles: [
+ './js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config)
+ ],
verbose: false,
};
-
diff --git a/web_src/js/features/repo-findfile.js b/web_src/js/features/repo-findfile.js
index 700a7fe693..750b906cef 100644
--- a/web_src/js/features/repo-findfile.js
+++ b/web_src/js/features/repo-findfile.js
@@ -1,45 +1,96 @@
import $ from 'jquery';
import {svg} from '../svg.js';
-import {strSubMatch} from '../utils.js';
const {csrf} = window.config;
const threshold = 50;
let files = [];
let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult;
-function filterRepoFiles(filter) {
- const treeLink = $repoFindFileInput.attr('data-url-tree-link');
- $repoFindFileTableBody.empty();
- const fileRes = [];
+// return the case-insensitive sub-match result as an array: [unmatched, matched, unmatched, matched, ...]
+// res[even] is unmatched, res[odd] is matched, see unit tests for examples
+// argument subLower must be a lower-cased string.
+export function strSubMatch(full, subLower) {
+ const res = [''];
+ let i = 0, j = 0;
+ const fullLower = full.toLowerCase();
+ while (i < subLower.length && j < fullLower.length) {
+ if (subLower[i] === fullLower[j]) {
+ if (res.length % 2 !== 0) res.push('');
+ res[res.length - 1] += full[j];
+ j++;
+ i++;
+ } else {
+ if (res.length % 2 === 0) res.push('');
+ res[res.length - 1] += full[j];
+ j++;
+ }
+ }
+ if (i !== subLower.length) {
+ // if the sub string doesn't match the full, only return the full as unmatched.
+ return [full];
+ }
+ if (j < full.length) {
+ // append remaining chars from full to result as unmatched
+ if (res.length % 2 === 0) res.push('');
+ res[res.length - 1] += full.substring(j);
+ }
+ return res;
+}
+
+export function calcMatchedWeight(matchResult) {
+ let weight = 0;
+ for (let i = 0; i < matchResult.length; i++) {
+ if (i % 2 === 1) { // matches are on odd indices, see strSubMatch
+ // use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight.
+ weight += matchResult[i].length * matchResult[i].length;
+ }
+ }
+ return weight;
+}
+
+export function filterRepoFilesWeighted(files, filter) {
+ let filterResult = [];
if (filter) {
- for (let i = 0; i < files.length && fileRes.length < threshold; i++) {
- const subMatch = strSubMatch(files[i], filter);
- if (subMatch.length > 1) {
- fileRes.push(subMatch);
+ const filterLower = filter.toLowerCase();
+ // TODO: for large repo, this loop could be slow, maybe there could be one more limit:
+ // ... && filterResult.length < threshold * 20, wait for more feedbacks
+ for (let i = 0; i < files.length; i++) {
+ const res = strSubMatch(files[i], filterLower);
+ if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings
+ filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)});
}
}
+ filterResult.sort((a, b) => b.matchWeight - a.matchWeight);
+ filterResult = filterResult.slice(0, threshold);
} else {
for (let i = 0; i < files.length && i < threshold; i++) {
- fileRes.push([files[i]]);
+ filterResult.push({matchResult: [files[i]], matchWeight: 0});
}
}
+ return filterResult;
+}
+
+function filterRepoFiles(filter) {
+ const treeLink = $repoFindFileInput.attr('data-url-tree-link');
+ $repoFindFileTableBody.empty();
+ const filterResult = filterRepoFilesWeighted(files, filter);
const tmplRow = `<tr><td><a></a></td></tr>`;
- $repoFindFileNoResult.toggle(fileRes.length === 0);
- for (const matchRes of fileRes) {
+ $repoFindFileNoResult.toggle(filterResult.length === 0);
+ for (const r of filterResult) {
const $row = $(tmplRow);
const $a = $row.find('a');
- $a.attr('href', `${treeLink}/${matchRes.join('')}`);
+ $a.attr('href', `${treeLink}/${r.matchResult.join('')}`);
const $octiconFile = $(svg('octicon-file')).addClass('mr-3');
$a.append($octiconFile);
- // if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz']
- // the matchRes[odd] is matched and highlighted to red.
- for (let j = 0; j < matchRes.length; j++) {
- if (!matchRes[j]) continue;
- const $span = $('<span>').text(matchRes[j]);
+ // if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz']
+ // the matchResult[odd] is matched and highlighted to red.
+ for (let j = 0; j < r.matchResult.length; j++) {
+ if (!r.matchResult[j]) continue;
+ const $span = $('<span>').text(r.matchResult[j]);
if (j % 2 === 1) $span.addClass('ui text red');
$a.append($span);
}
diff --git a/web_src/js/features/repo-findfile.test.js b/web_src/js/features/repo-findfile.test.js
new file mode 100644
index 0000000000..2d96ed4463
--- /dev/null
+++ b/web_src/js/features/repo-findfile.test.js
@@ -0,0 +1,34 @@
+import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js';
+
+describe('Repo Find Files', () => {
+ test('strSubMatch', () => {
+ expect(strSubMatch('abc', '')).toEqual(['abc']);
+ expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
+ expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
+ expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
+ expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
+ expect(strSubMatch('abc', 'z')).toEqual(['abc']);
+ expect(strSubMatch('abc', 'az')).toEqual(['abc']);
+
+ expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']);
+ expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);
+
+ expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
+ expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
+ });
+
+ test('calcMatchedWeight', () => {
+ expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy();
+ });
+
+ test('filterRepoFilesWeighted', () => {
+ // the first matched result should always be the "word.txt"
+ let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word');
+ expect(res).toHaveLength(2);
+ expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
+
+ res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word');
+ expect(res).toHaveLength(2);
+ expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
+ });
+});
diff --git a/web_src/js/testUtils/jestSetup.js b/web_src/js/testUtils/jestSetup.js
new file mode 100644
index 0000000000..779be9f0c7
--- /dev/null
+++ b/web_src/js/testUtils/jestSetup.js
@@ -0,0 +1,5 @@
+window.config = {
+ csrfToken: 'jest-test-csrf-token-123456',
+ pageData: {},
+ i18n: {},
+};
diff --git a/web_src/js/utils.js b/web_src/js/utils.js
index 8e8dc01be1..4020b7a7f4 100644
--- a/web_src/js/utils.js
+++ b/web_src/js/utils.js
@@ -59,36 +59,6 @@ export function parseIssueHref(href) {
return {owner, repo, type, index};
}
-// return the sub-match result as an array: [unmatched, matched, unmatched, matched, ...]
-// res[even] is unmatched, res[odd] is matched, see unit tests for examples
-export function strSubMatch(full, sub) {
- const res = [''];
- let i = 0, j = 0;
- const subLower = sub.toLowerCase(), fullLower = full.toLowerCase();
- while (i < subLower.length && j < fullLower.length) {
- if (subLower[i] === fullLower[j]) {
- if (res.length % 2 !== 0) res.push('');
- res[res.length - 1] += full[j];
- j++;
- i++;
- } else {
- if (res.length % 2 === 0) res.push('');
- res[res.length - 1] += full[j];
- j++;
- }
- }
- if (i !== sub.length) {
- // if the sub string doesn't match the full, only return the full as unmatched.
- return [full];
- }
- if (j < full.length) {
- // append remaining chars from full to result as unmatched
- if (res.length % 2 === 0) res.push('');
- res[res.length - 1] += full.substring(j);
- }
- return res;
-}
-
// pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200
export function prettyNumber(num, locale = 'en-US') {
if (typeof num !== 'number') return '';
diff --git a/web_src/js/utils.test.js b/web_src/js/utils.test.js
index 5c17c162af..762f29f6fe 100644
--- a/web_src/js/utils.test.js
+++ b/web_src/js/utils.test.js
@@ -1,5 +1,5 @@
import {
- basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch,
+ basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref,
prettyNumber, parseUrl,
} from './utils.js';
@@ -86,22 +86,6 @@ test('parseIssueHref', () => {
expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined});
});
-test('strSubMatch', () => {
- expect(strSubMatch('abc', '')).toEqual(['abc']);
- expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
- expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
- expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
- expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
- expect(strSubMatch('abc', 'z')).toEqual(['abc']);
- expect(strSubMatch('abc', 'az')).toEqual(['abc']);
-
- expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']);
- expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);
-
- expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
- expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
-});
-
test('prettyNumber', () => {
expect(prettyNumber()).toEqual('');
expect(prettyNumber(null)).toEqual('');