summaryrefslogtreecommitdiffstats
path: root/modules/util
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2021-12-09 13:41:17 +0800
committerGitHub <noreply@github.com>2021-12-09 05:41:17 +0000
commitc7e23401a3b7d1e38aacd857c1ec9be53a2fa63a (patch)
treec44c00a56704303753f81af6c2324c5fce3b6838 /modules/util
parent183175263d9b45af6b27b677a9a0e96b45fbd4d3 (diff)
downloadgitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.tar.gz
gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.zip
Fix a panic in NotifyCreateIssueComment (caused by string truncation) (#17928)
* Fix a panic in NotifyCreateIssueComment (caused by string truncation) * more unit tests * refactor * fix some edge cases * use SplitStringAtByteN for comment content
Diffstat (limited to 'modules/util')
-rw-r--r--modules/util/truncate.go43
-rw-r--r--modules/util/truncate_test.go61
2 files changed, 95 insertions, 9 deletions
diff --git a/modules/util/truncate.go b/modules/util/truncate.go
index 8d0f630973..38c2c0d1d6 100644
--- a/modules/util/truncate.go
+++ b/modules/util/truncate.go
@@ -6,20 +6,23 @@ package util
import "unicode/utf8"
+// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
+const utf8Ellipsis = "…"
+const asciiEllipsis = "..."
+
// SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.)
func SplitStringAtByteN(input string, n int) (left, right string) {
if len(input) <= n {
- left = input
- return
+ return input, ""
}
if !utf8.ValidString(input) {
- left = input[:n-3] + "..."
- right = "..." + input[n-3:]
- return
+ if n-3 < 0 {
+ return input, ""
+ }
+ return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
}
- // in UTF8 "…" is 3 bytes so doesn't really gain us anything...
end := 0
for end <= n-3 {
_, size := utf8.DecodeRuneInString(input[end:])
@@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) {
end += size
}
- left = input[:end] + "…"
- right = "…" + input[end:]
- return
+ return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
+}
+
+// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.)
+func SplitStringAtRuneN(input string, n int) (left, right string) {
+ if !utf8.ValidString(input) {
+ if len(input) <= n || n-3 < 0 {
+ return input, ""
+ }
+ return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
+ }
+
+ if utf8.RuneCountInString(input) <= n {
+ return input, ""
+ }
+
+ count := 0
+ end := 0
+ for count < n-1 {
+ _, size := utf8.DecodeRuneInString(input[end:])
+ end += size
+ count++
+ }
+
+ return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
}
diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go
new file mode 100644
index 0000000000..e505a6ee4a
--- /dev/null
+++ b/modules/util/truncate_test.go
@@ -0,0 +1,61 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestSplitString(t *testing.T) {
+ type testCase struct {
+ input string
+ n int
+ leftSub string
+ ellipsis string
+ }
+
+ test := func(tc []*testCase, f func(input string, n int) (left, right string)) {
+ for _, c := range tc {
+ l, r := f(c.input, c.n)
+ if c.ellipsis != "" {
+ assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
+ assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):])
+ } else {
+ assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
+ assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "")
+ }
+ }
+ }
+
+ tc := []*testCase{
+ {"abc123xyz", 0, "", utf8Ellipsis},
+ {"abc123xyz", 1, "", utf8Ellipsis},
+ {"abc123xyz", 4, "a", utf8Ellipsis},
+ {"啊bc123xyz", 4, "", utf8Ellipsis},
+ {"啊bc123xyz", 6, "啊", utf8Ellipsis},
+ {"啊bc", 5, "啊bc", ""},
+ {"啊bc", 6, "啊bc", ""},
+ {"abc\xef\x03\xfe", 3, "", asciiEllipsis},
+ {"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
+ {"\xef\x03", 1, "\xef\x03", ""},
+ }
+ test(tc, SplitStringAtByteN)
+
+ tc = []*testCase{
+ {"abc123xyz", 0, "", utf8Ellipsis},
+ {"abc123xyz", 1, "", utf8Ellipsis},
+ {"abc123xyz", 4, "abc", utf8Ellipsis},
+ {"啊bc123xyz", 4, "啊bc", utf8Ellipsis},
+ {"啊bc123xyz", 6, "啊bc12", utf8Ellipsis},
+ {"啊bc", 3, "啊bc", ""},
+ {"啊bc", 4, "啊bc", ""},
+ {"abc\xef\x03\xfe", 3, "", asciiEllipsis},
+ {"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
+ {"\xef\x03", 1, "\xef\x03", ""},
+ }
+ test(tc, SplitStringAtRuneN)
+}