diff options
author | Giteabot <teabot@gitea.io> | 2024-06-17 15:41:47 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-17 07:41:47 +0000 |
commit | ed0fc0ec46466eb25e77cfefbf7f6d05f6f902b8 (patch) | |
tree | 4b71c5306a71da77cf1c142e29e7022dcaf98d09 | |
parent | fa307167f97a185fefd58f016a96ccdf55783b1c (diff) | |
download | gitea-ed0fc0ec46466eb25e77cfefbf7f6d05f6f902b8.tar.gz gitea-ed0fc0ec46466eb25e77cfefbf7f6d05f6f902b8.zip |
Fix natural sort (#31384) (#31394)
Backport #31384 by wxiaoguang
Fix #31374
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
-rw-r--r-- | modules/base/natural_sort.go | 57 | ||||
-rw-r--r-- | modules/base/natural_sort_test.go | 47 |
2 files changed, 87 insertions, 17 deletions
diff --git a/modules/base/natural_sort.go b/modules/base/natural_sort.go index 0f90ec70ce..acb9002276 100644 --- a/modules/base/natural_sort.go +++ b/modules/base/natural_sort.go @@ -4,12 +4,67 @@ package base import ( + "unicode/utf8" + "golang.org/x/text/collate" "golang.org/x/text/language" ) +func naturalSortGetRune(str string, pos int) (r rune, size int, has bool) { + if pos >= len(str) { + return 0, 0, false + } + r, size = utf8.DecodeRuneInString(str[pos:]) + if r == utf8.RuneError { + r, size = rune(str[pos]), 1 // if invalid input, treat it as a single byte ascii + } + return r, size, true +} + +func naturalSortAdvance(str string, pos int) (end int, isNumber bool) { + end = pos + for { + r, size, has := naturalSortGetRune(str, end) + if !has { + break + } + isCurRuneNum := '0' <= r && r <= '9' + if end == pos { + isNumber = isCurRuneNum + end += size + } else if isCurRuneNum == isNumber { + end += size + } else { + break + } + } + return end, isNumber +} + // NaturalSortLess compares two strings so that they could be sorted in natural order func NaturalSortLess(s1, s2 string) bool { + // There is a bug in Golang's collate package: https://github.com/golang/go/issues/67997 + // text/collate: CompareString(collate.Numeric) returns wrong result for "0.0" vs "1.0" #67997 + // So we need to handle the number parts by ourselves c := collate.New(language.English, collate.Numeric) - return c.CompareString(s1, s2) < 0 + pos1, pos2 := 0, 0 + for pos1 < len(s1) && pos2 < len(s2) { + end1, isNum1 := naturalSortAdvance(s1, pos1) + end2, isNum2 := naturalSortAdvance(s2, pos2) + part1, part2 := s1[pos1:end1], s2[pos2:end2] + if isNum1 && isNum2 { + if part1 != part2 { + if len(part1) != len(part2) { + return len(part1) < len(part2) + } + return part1 < part2 + } + } else { + if cmp := c.CompareString(part1, part2); cmp != 0 { + return cmp < 0 + } + } + pos1, pos2 = end1, end2 + } + return len(s1) < len(s2) } diff --git a/modules/base/natural_sort_test.go b/modules/base/natural_sort_test.go index f27a4eb53a..b001bc4ac9 100644 --- a/modules/base/natural_sort_test.go +++ b/modules/base/natural_sort_test.go @@ -10,21 +10,36 @@ import ( ) func TestNaturalSortLess(t *testing.T) { - test := func(s1, s2 string, less bool) { - assert.Equal(t, less, NaturalSortLess(s1, s2), "s1=%q, s2=%q", s1, s2) + testLess := func(s1, s2 string) { + assert.True(t, NaturalSortLess(s1, s2), "s1<s2 should be true: s1=%q, s2=%q", s1, s2) + assert.False(t, NaturalSortLess(s2, s1), "s2<s1 should be false: s1=%q, s2=%q", s1, s2) } - test("v1.20.0", "v1.2.0", false) - test("v1.20.0", "v1.29.0", true) - test("v1.20.0", "v1.20.0", false) - test("abc", "bcd", true) - test("a-1-a", "a-1-b", true) - test("2", "12", true) - test("a", "ab", true) - - test("A", "b", true) - test("a", "B", true) - - test("cafe", "café", true) - test("café", "cafe", false) - test("caff", "café", false) + testEqual := func(s1, s2 string) { + assert.False(t, NaturalSortLess(s1, s2), "s1<s2 should be false: s1=%q, s2=%q", s1, s2) + assert.False(t, NaturalSortLess(s2, s1), "s2<s1 should be false: s1=%q, s2=%q", s1, s2) + } + + testEqual("", "") + testLess("", "a") + testLess("", "1") + + testLess("v1.2", "v1.2.0") + testLess("v1.2.0", "v1.10.0") + testLess("v1.20.0", "v1.29.0") + testEqual("v1.20.0", "v1.20.0") + + testLess("a", "A") + testLess("a", "B") + testLess("A", "b") + testLess("A", "ab") + + testLess("abc", "bcd") + testLess("a-1-a", "a-1-b") + testLess("2", "12") + + testLess("cafe", "café") + testLess("café", "caff") + + testLess("A-2", "A-11") + testLess("0.txt", "1.txt") } |