Fix a panic in NotifyCreateIssueComment (caused by string truncation) (#17928)

* Fix a panic in NotifyCreateIssueComment (caused by string truncation) * more unit tests * refactor * fix some edge cases * use SplitStringAtByteN for comment content
author: wxiaoguang <wxiaoguang@gmail.com> 2021-12-09 13:41:17 +0800
committer: GitHub <noreply@github.com> 2021-12-09 05:41:17 +0000
commit: c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a (patch)
tree: c44c00a56704303753f81af6c2324c5fce3b6838 /modules/util
parent: 183175263d9b45af6b27b677a9a0e96b45fbd4d3 (diff)
download: gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.tar.gz
gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.zip
2 files changed, 95 insertions, 9 deletions
diff --git a/modules/util/truncate.go b/modules/util/truncate.go
index 8d0f630973..38c2c0d1d6 100644
--- a/modules/util/truncate.go
+++ b/modules/util/truncate.go
@@ -6,20 +6,23 @@ package util
 
 import "unicode/utf8"
 
+// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
+const utf8Ellipsis = "…"
+const asciiEllipsis = "..."
+
 // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.)
 func SplitStringAtByteN(input string, n int) (left, right string) {
 	if len(input) <= n {
-		left = input
-		return
+		return input, ""
 	}
 
 	if !utf8.ValidString(input) {
-		left = input[:n-3] + "..."
-		right = "..." + input[n-3:]
-		return
+		if n-3 < 0 {
+			return input, ""
+		}
+		return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
 	}
 
-	// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
 	end := 0
 	for end <= n-3 {
 		_, size := utf8.DecodeRuneInString(input[end:])
@@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) {
 		end += size
 	}
 
-	left = input[:end] + "…"
-	right = "…" + input[end:]
-	return
+	return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
+}
+
+// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.)
+func SplitStringAtRuneN(input string, n int) (left, right string) {
+	if !utf8.ValidString(input) {
+		if len(input) <= n || n-3 < 0 {
+			return input, ""
+		}
+		return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
+	}
+
+	if utf8.RuneCountInString(input) <= n {
+		return input, ""
+	}
+
+	count := 0
+	end := 0
+	for count < n-1 {
+		_, size := utf8.DecodeRuneInString(input[end:])
+		end += size
+		count++
+	}
+
+	return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
 }
diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go
new file mode 100644
index 0000000000..e505a6ee4a
--- /dev/null
+++ b/modules/util/truncate_test.go
@@ -0,0 +1,61 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSplitString(t *testing.T) {
+	type testCase struct {
+		input    string
+		n        int
+		leftSub  string
+		ellipsis string
+	}
+
+	test := func(tc []*testCase, f func(input string, n int) (left, right string)) {
+		for _, c := range tc {
+			l, r := f(c.input, c.n)
+			if c.ellipsis != "" {
+				assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
+				assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):])
+			} else {
+				assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
+				assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "")
+			}
+		}
+	}
+
+	tc := []*testCase{
+		{"abc123xyz", 0, "", utf8Ellipsis},
+		{"abc123xyz", 1, "", utf8Ellipsis},
+		{"abc123xyz", 4, "a", utf8Ellipsis},
+		{"啊bc123xyz", 4, "", utf8Ellipsis},
+		{"啊bc123xyz", 6, "啊", utf8Ellipsis},
+		{"啊bc", 5, "啊bc", ""},
+		{"啊bc", 6, "啊bc", ""},
+		{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
+		{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
+		{"\xef\x03", 1, "\xef\x03", ""},
+	}
+	test(tc, SplitStringAtByteN)
+
+	tc = []*testCase{
+		{"abc123xyz", 0, "", utf8Ellipsis},
+		{"abc123xyz", 1, "", utf8Ellipsis},
+		{"abc123xyz", 4, "abc", utf8Ellipsis},
+		{"啊bc123xyz", 4, "啊bc", utf8Ellipsis},
+		{"啊bc123xyz", 6, "啊bc12", utf8Ellipsis},
+		{"啊bc", 3, "啊bc", ""},
+		{"啊bc", 4, "啊bc", ""},
+		{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
+		{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
+		{"\xef\x03", 1, "\xef\x03", ""},
+	}
+	test(tc, SplitStringAtRuneN)
+}
author	wxiaoguang <wxiaoguang@gmail.com>	2021-12-09 13:41:17 +0800
committer	GitHub <noreply@github.com>	2021-12-09 05:41:17 +0000
commit	c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a (patch)
tree	c44c00a56704303753f81af6c2324c5fce3b6838 /modules/util
parent	183175263d9b45af6b27b677a9a0e96b45fbd4d3 (diff)
download	gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.tar.gz gitea-c7e23401a3b7d1e38aacd857c1ec9be53a2fa63a.zip