diff options
author | Gusted <williamzijl7@hotmail.com> | 2022-02-26 22:15:04 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-26 23:15:04 +0100 |
commit | 4fb718d405ac5dd26276ae54883afb9431a29375 (patch) | |
tree | 64781c44622ea277a7e8dbaaea72d595691d4bae /modules | |
parent | df35049196de70b5d0855e9777456e3f6ffd7772 (diff) | |
download | gitea-4fb718d405ac5dd26276ae54883afb9431a29375.tar.gz gitea-4fb718d405ac5dd26276ae54883afb9431a29375.zip |
Don't treat BOM escape sequence as hidden character. (#18909) (#18910)
* Don't treat BOM escape sequence as hidden character. (#18909)
Backport #18909
Diffstat (limited to 'modules')
-rw-r--r-- | modules/charset/escape.go | 8 | ||||
-rw-r--r-- | modules/charset/escape_test.go | 24 |
2 files changed, 27 insertions, 5 deletions
diff --git a/modules/charset/escape.go b/modules/charset/escape.go index d2e8fb0d87..9883700e88 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -63,6 +63,7 @@ func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { buf := make([]byte, 4096) readStart := 0 + runeCount := 0 var n int var writePos int @@ -79,6 +80,8 @@ readingloop: for i < len(bs) { r, size := utf8.DecodeRune(bs[i:]) + runeCount++ + // Now handle the codepoints switch { case r == utf8.RuneError: @@ -113,6 +116,8 @@ readingloop: lineHasRTLScript = false lineHasLTRScript = false + case runeCount == 1 && r == 0xFEFF: // UTF BOM + // the first BOM is safe case r == '\r' || r == '\t' || r == ' ': // These are acceptable control characters and space characters case unicode.IsSpace(r): @@ -144,7 +149,8 @@ readingloop: return } writePos = i + size - case unicode.Is(unicode.C, r): + // 65279 == BOM rune. + case unicode.Is(unicode.C, r) && r != rune(65279): escaped.Escaped = true escaped.HasControls = true if writePos < i { diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index 1804381413..01ccca7724 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -129,6 +129,14 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, "\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n", status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, }, + { + // UTF-8/16/32 all use the same codepoint for BOM + // Gitea could read UTF-16/32 content and convert into UTF-8 internally then render it, so we only process UTF-8 internally + name: "UTF BOM", + text: "\xef\xbb\xbftest", + result: "\xef\xbb\xbftest", + status: EscapeStatus{HasLTRScript: true}, + }, } func TestEscapeControlString(t *testing.T) { @@ -163,10 +171,18 @@ func TestEscapeControlReader(t *testing.T) { // lets add some control characters to the tests tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) copy(tests, escapeControlTests) + + // if there is a BOM, we should keep the BOM + addPrefix := func(prefix, s string) string { + if strings.HasPrefix(s, "\xef\xbb\xbf") { + return s[:3] + prefix + s[3:] + } + return prefix + s + } for _, test := range escapeControlTests { test.name += " (+Control)" - test.text = "\u001E" + test.text - test.result = `<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">` + "\u001e" + `</span></span>` + test.result + test.text = addPrefix("\u001E", test.text) + test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result) test.status.Escaped = true test.status.HasControls = true tests = append(tests, test) @@ -174,8 +190,8 @@ func TestEscapeControlReader(t *testing.T) { for _, test := range escapeControlTests { test.name += " (+Mark)" - test.text = "\u0300" + test.text - test.result = `<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">` + "\u0300" + `</span></span>` + test.result + test.text = addPrefix("\u0300", test.text) + test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">`+"\u0300"+`</span></span>`, test.result) test.status.Escaped = true test.status.HasMarks = true tests = append(tests, test) |