diff options
author | wxiaoguang <wxiaoguang@gmail.com> | 2021-12-09 13:41:17 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-09 05:41:17 +0000 |
commit | c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a (patch) | |
tree | c44c00a56704303753f81af6c2324c5fce3b6838 /modules/util | |
parent | 183175263d9b45af6b27b677a9a0e96b45fbd4d3 (diff) | |
download | gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.tar.gz gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.zip |
Fix a panic in NotifyCreateIssueComment (caused by string truncation) (#17928)
* Fix a panic in NotifyCreateIssueComment (caused by string truncation)
* more unit tests
* refactor
* fix some edge cases
* use SplitStringAtByteN for comment content
Diffstat (limited to 'modules/util')
-rw-r--r-- | modules/util/truncate.go | 43 | ||||
-rw-r--r-- | modules/util/truncate_test.go | 61 |
2 files changed, 95 insertions, 9 deletions
diff --git a/modules/util/truncate.go b/modules/util/truncate.go index 8d0f630973..38c2c0d1d6 100644 --- a/modules/util/truncate.go +++ b/modules/util/truncate.go @@ -6,20 +6,23 @@ package util import "unicode/utf8" +// in UTF8 "…" is 3 bytes so doesn't really gain us anything... +const utf8Ellipsis = "…" +const asciiEllipsis = "..." + // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.) func SplitStringAtByteN(input string, n int) (left, right string) { if len(input) <= n { - left = input - return + return input, "" } if !utf8.ValidString(input) { - left = input[:n-3] + "..." - right = "..." + input[n-3:] - return + if n-3 < 0 { + return input, "" + } + return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:] } - // in UTF8 "…" is 3 bytes so doesn't really gain us anything... end := 0 for end <= n-3 { _, size := utf8.DecodeRuneInString(input[end:]) @@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) { end += size } - left = input[:end] + "…" - right = "…" + input[end:] - return + return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:] +} + +// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.) +func SplitStringAtRuneN(input string, n int) (left, right string) { + if !utf8.ValidString(input) { + if len(input) <= n || n-3 < 0 { + return input, "" + } + return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:] + } + + if utf8.RuneCountInString(input) <= n { + return input, "" + } + + count := 0 + end := 0 + for count < n-1 { + _, size := utf8.DecodeRuneInString(input[end:]) + end += size + count++ + } + + return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:] } diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go new file mode 100644 index 0000000000..e505a6ee4a --- /dev/null +++ b/modules/util/truncate_test.go @@ -0,0 +1,61 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSplitString(t *testing.T) { + type testCase struct { + input string + n int + leftSub string + ellipsis string + } + + test := func(tc []*testCase, f func(input string, n int) (left, right string)) { + for _, c := range tc { + l, r := f(c.input, c.n) + if c.ellipsis != "" { + assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub) + assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):]) + } else { + assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub) + assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "") + } + } + } + + tc := []*testCase{ + {"abc123xyz", 0, "", utf8Ellipsis}, + {"abc123xyz", 1, "", utf8Ellipsis}, + {"abc123xyz", 4, "a", utf8Ellipsis}, + {"啊bc123xyz", 4, "", utf8Ellipsis}, + {"啊bc123xyz", 6, "啊", utf8Ellipsis}, + {"啊bc", 5, "啊bc", ""}, + {"啊bc", 6, "啊bc", ""}, + {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, + {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, + {"\xef\x03", 1, "\xef\x03", ""}, + } + test(tc, SplitStringAtByteN) + + tc = []*testCase{ + {"abc123xyz", 0, "", utf8Ellipsis}, + {"abc123xyz", 1, "", utf8Ellipsis}, + {"abc123xyz", 4, "abc", utf8Ellipsis}, + {"啊bc123xyz", 4, "啊bc", utf8Ellipsis}, + {"啊bc123xyz", 6, "啊bc12", utf8Ellipsis}, + {"啊bc", 3, "啊bc", ""}, + {"啊bc", 4, "啊bc", ""}, + {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, + {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, + {"\xef\x03", 1, "\xef\x03", ""}, + } + test(tc, SplitStringAtRuneN) +} |