aboutsummaryrefslogtreecommitdiffstats
path: root/modules/util/truncate.go
blob: 52534d3cac7cbef56407cf2762d555f464a87b67 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package util

import (
	"strings"
	"unicode"
	"unicode/utf8"
)

// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
const (
	utf8Ellipsis  = "…"
	asciiEllipsis = "..."
)

func IsLikelyEllipsisLeftPart(s string) bool {
	return strings.HasSuffix(s, utf8Ellipsis) || strings.HasSuffix(s, asciiEllipsis)
}

func ellipsisDisplayGuessWidth(r rune) int {
	// To make the truncated string as long as possible,
	// CJK/emoji chars are considered as 2-ASCII width but not 3-4 bytes width.
	// Here we only make the best guess (better than counting them in bytes),
	// it's impossible to 100% correctly determine the width of a rune without a real font and render.
	//
	// ATTENTION: the guessed width can't be zero, more details in ellipsisDisplayString's comment
	if r <= 255 {
		return 1
	}

	switch {
	case r == '\u3000': /* ideographic (CJK) characters, still use 2 */
		return 2
	case unicode.Is(unicode.M, r), /* (Mark) */
		unicode.Is(unicode.Cf, r), /* (Other, format) */
		unicode.Is(unicode.Cs, r), /* (Other, surrogate) */
		unicode.Is(unicode.Z /* (Space) */, r):
		return 1
	default:
		return 2
	}
}

// EllipsisDisplayString returns a truncated short string for display purpose.
// The length is the approximate number of ASCII-width in the string (CJK/emoji are 2-ASCII width)
// It appends "…" or "..." at the end of truncated string.
// It guarantees the length of the returned runes doesn't exceed the limit.
func EllipsisDisplayString(str string, limit int) string {
	s, _, _, _ := ellipsisDisplayString(str, limit, ellipsisDisplayGuessWidth)
	return s
}

// EllipsisDisplayStringX works like EllipsisDisplayString while it also returns the right part
func EllipsisDisplayStringX(str string, limit int) (left, right string) {
	return ellipsisDisplayStringX(str, limit, ellipsisDisplayGuessWidth)
}

func ellipsisDisplayStringX(str string, limit int, widthGuess func(rune) int) (left, right string) {
	left, offset, truncated, encounterInvalid := ellipsisDisplayString(str, limit, widthGuess)
	if truncated {
		right = str[offset:]
		r, _ := utf8.DecodeRune(UnsafeStringToBytes(right))
		encounterInvalid = encounterInvalid || r == utf8.RuneError
		ellipsis := utf8Ellipsis
		if encounterInvalid {
			ellipsis = asciiEllipsis
		}
		right = ellipsis + right
	}
	return left, right
}

func ellipsisDisplayString(str string, limit int, widthGuess func(rune) int) (res string, offset int, truncated, encounterInvalid bool) {
	if len(str) <= limit {
		return str, len(str), false, false
	}

	// To future maintainers: this logic must guarantee that the length of the returned runes doesn't exceed the limit,
	// because the returned string will also be used as database value. UTF-8 VARCHAR(10) could store 10 rune characters,
	// So each rune must be countered as at least 1 width.
	// Even if there are some special Unicode characters (zero-width, combining, etc.), they should NEVER be counted as zero.
	pos, used := 0, 0
	for i, r := range str {
		encounterInvalid = encounterInvalid || r == utf8.RuneError
		pos = i
		runeWidth := widthGuess(r)
		if used+runeWidth+3 > limit {
			break
		}
		used += runeWidth
		offset += utf8.RuneLen(r)
	}

	// if the remaining are fewer than 3 runes, then maybe we could add them, no need to ellipse
	if len(str)-pos <= 12 {
		var nextCnt, nextWidth int
		for _, r := range str[pos:] {
			if nextCnt >= 4 {
				break
			}
			nextWidth += widthGuess(r)
			nextCnt++
		}
		if nextCnt <= 3 && used+nextWidth <= limit {
			return str, len(str), false, false
		}
	}
	if limit < 3 {
		// if the limit is so small, do not add ellipsis
		return str[:offset], offset, true, false
	}
	ellipsis := utf8Ellipsis
	if encounterInvalid {
		ellipsis = asciiEllipsis
	}
	return str[:offset] + ellipsis, offset, true, encounterInvalid
}

func EllipsisTruncateRunes(str string, limit int) (left, right string) {
	return ellipsisDisplayStringX(str, limit, func(r rune) int { return 1 })
}

// TruncateRunes returns a truncated string with given rune limit,
// it returns input string if its rune length doesn't exceed the limit.
func TruncateRunes(str string, limit int) string {
	if utf8.RuneCountInString(str) < limit {
		return str
	}
	return string([]rune(str)[:limit])
}