aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2024-12-26 14:19:35 +0800
committerGitHub <noreply@github.com>2024-12-26 14:19:35 +0800
commit550abdbc2443209d6b2f4801c6e3c180b48d73bb (patch)
treec2e34e77924fab9da60d88d6e82de37e182b18fb
parent9bfa9f450da509bf61c6c762bcb1dc1d6c9bd0bd (diff)
downloadgitea-550abdbc2443209d6b2f4801c6e3c180b48d73bb.tar.gz
gitea-550abdbc2443209d6b2f4801c6e3c180b48d73bb.zip
Improve "ellipsis string" (#32989)
-rw-r--r--modules/util/truncate.go35
-rw-r--r--modules/util/truncate_test.go33
2 files changed, 60 insertions, 8 deletions
diff --git a/modules/util/truncate.go b/modules/util/truncate.go
index 331a98ef98..2bce248281 100644
--- a/modules/util/truncate.go
+++ b/modules/util/truncate.go
@@ -5,6 +5,7 @@ package util
import (
"strings"
+ "unicode"
"unicode/utf8"
)
@@ -18,6 +19,30 @@ func IsLikelyEllipsisLeftPart(s string) bool {
return strings.HasSuffix(s, utf8Ellipsis) || strings.HasSuffix(s, asciiEllipsis)
}
+func ellipsisGuessDisplayWidth(r rune) int {
+ // To make the truncated string as long as possible,
+ // CJK/emoji chars are considered as 2-ASCII width but not 3-4 bytes width.
+ // Here we only make the best guess (better than counting them in bytes),
+ // it's impossible to 100% correctly determine the width of a rune without a real font and render.
+ //
+ // ATTENTION: the guessed width can't be zero, more details in ellipsisDisplayString's comment
+ if r <= 255 {
+ return 1
+ }
+
+ switch {
+ case r == '\u3000': /* ideographic (CJK) characters, still use 2 */
+ return 2
+ case unicode.Is(unicode.M, r), /* (Mark) */
+ unicode.Is(unicode.Cf, r), /* (Other, format) */
+ unicode.Is(unicode.Cs, r), /* (Other, surrogate) */
+ unicode.Is(unicode.Z /* (Space) */, r):
+ return 1
+ default:
+ return 2
+ }
+}
+
// EllipsisDisplayString returns a truncated short string for display purpose.
// The length is the approximate number of ASCII-width in the string (CJK/emoji are 2-ASCII width)
// It appends "…" or "..." at the end of truncated string.
@@ -56,10 +81,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc
for i, r := range str {
encounterInvalid = encounterInvalid || r == utf8.RuneError
pos = i
- runeWidth := 1
- if r >= 128 {
- runeWidth = 2 // CJK/emoji chars are considered as 2-ASCII width
- }
+ runeWidth := ellipsisGuessDisplayWidth(r)
if used+runeWidth+3 > limit {
break
}
@@ -74,10 +96,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc
if nextCnt >= 4 {
break
}
- nextWidth++
- if r >= 128 {
- nextWidth++ // CJK/emoji chars are considered as 2-ASCII width
- }
+ nextWidth += ellipsisGuessDisplayWidth(r)
nextCnt++
}
if nextCnt <= 3 && used+nextWidth <= limit {
diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go
index 573d6ece26..8789c824f5 100644
--- a/modules/util/truncate_test.go
+++ b/modules/util/truncate_test.go
@@ -11,6 +11,30 @@ import (
"github.com/stretchr/testify/assert"
)
+func TestEllipsisGuessDisplayWidth(t *testing.T) {
+ cases := []struct {
+ r string
+ want int
+ }{
+ {r: "a", want: 1},
+ {r: "é", want: 1},
+ {r: "测", want: 2},
+ {r: "⚽", want: 2},
+ {r: "☁️", want: 3}, // 2 runes, it has a mark
+ {r: "\u200B", want: 1}, // ZWSP
+ {r: "\u3000", want: 2}, // ideographic space
+ }
+ for _, c := range cases {
+ t.Run(c.r, func(t *testing.T) {
+ w := 0
+ for _, r := range c.r {
+ w += ellipsisGuessDisplayWidth(r)
+ }
+ assert.Equal(t, c.want, w, "hex=% x", []byte(c.r))
+ })
+ }
+}
+
func TestEllipsisString(t *testing.T) {
cases := []struct {
limit int
@@ -37,6 +61,15 @@ func TestEllipsisString(t *testing.T) {
{limit: 7, input: "测试文本", left: "测试…", right: "…文本"},
{limit: 8, input: "测试文本", left: "测试文本", right: ""},
{limit: 9, input: "测试文本", left: "测试文本", right: ""},
+
+ {limit: 6, input: "测试abc", left: "测…", right: "…试abc"},
+ {limit: 7, input: "测试abc", left: "测试abc", right: ""}, // exactly 7-width
+ {limit: 8, input: "测试abc", left: "测试abc", right: ""},
+
+ {limit: 7, input: "测abc试啊", left: "测ab…", right: "…c试啊"},
+ {limit: 8, input: "测abc试啊", left: "测abc…", right: "…试啊"},
+ {limit: 9, input: "测abc试啊", left: "测abc试啊", right: ""}, // exactly 9-width
+ {limit: 10, input: "测abc试啊", left: "测abc试啊", right: ""},
}
for _, c := range cases {
t.Run(fmt.Sprintf("%s(%d)", c.input, c.limit), func(t *testing.T) {