diff options
author | wxiaoguang <wxiaoguang@gmail.com> | 2023-12-17 22:38:54 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-17 14:38:54 +0000 |
commit | 20929edc9962281e35a81756d76dd1caa5741ff8 (patch) | |
tree | 138bbb9c97e609136fe83cf6e5524949218d1e72 /modules/charset | |
parent | 408a4842240e7dd906e682196bd4254d6c76fcb9 (diff) | |
download | gitea-20929edc9962281e35a81756d76dd1caa5741ff8.tar.gz gitea-20929edc9962281e35a81756d76dd1caa5741ff8.zip |
Add option to disable ambiguous unicode characters detection (#28454)
* Close #24483
* Close #28123
* Close #23682
* Close #23149
(maybe more)
Diffstat (limited to 'modules/charset')
-rw-r--r-- | modules/charset/escape.go | 59 | ||||
-rw-r--r-- | modules/charset/escape_stream.go | 2 | ||||
-rw-r--r-- | modules/charset/escape_test.go | 52 |
3 files changed, 27 insertions, 86 deletions
diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 5608836a45..92e417d1f7 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -8,11 +8,12 @@ package charset import ( - "bufio" + "html/template" "io" "strings" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/translation" ) @@ -20,20 +21,18 @@ import ( const RuneNBSP = 0xa0 // EscapeControlHTML escapes the unicode control sequences in a provided html document -func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { +func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { sb := &strings.Builder{} - outputStream := &HTMLStreamerWriter{Writer: sb} - streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) - - if err := StreamHTML(strings.NewReader(text), streamer); err != nil { - streamer.escaped.HasError = true - log.Error("Error whilst escaping: %v", err) - } - return streamer.escaped, sb.String() + escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader + return escaped, template.HTML(sb.String()) } -// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte +// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { + if !setting.UI.AmbiguousUnicodeDetection { + _, err = io.Copy(writer, reader) + return &EscapeStatus{}, err + } outputStream := &HTMLStreamerWriter{Writer: writer} streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) @@ -43,41 +42,3 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation. } return streamer.escaped, err } - -// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method. -func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { - bufRd := bufio.NewReader(reader) - outputStream := &HTMLStreamerWriter{Writer: writer} - streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) - - for { - line, rdErr := bufRd.ReadString('\n') - if len(line) > 0 { - if err := streamer.Text(line); err != nil { - streamer.escaped.HasError = true - log.Error("Error whilst escaping: %v", err) - return streamer.escaped, err - } - } - if rdErr != nil { - if rdErr != io.EOF { - err = rdErr - } - break - } - } - return streamer.escaped, err -} - -// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { - sb := &strings.Builder{} - outputStream := &HTMLStreamerWriter{Writer: sb} - streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) - - if err := streamer.Text(text); err != nil { - streamer.escaped.HasError = true - log.Error("Error whilst escaping: %v", err) - } - return streamer.escaped, sb.String() -} diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 03d4cfc0c1..3f08fd94a4 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -64,7 +64,7 @@ func (e *escapeStreamer) Text(data string) error { until, next = nextIdxs[0]+pos, nextIdxs[1]+pos } - // from pos until until we know that the runes are not \r\t\n or even ' ' + // from pos until we know that the runes are not \r\t\n or even ' ' runes := make([]rune, 0, next-until) positions := make([]int, 0, next-until+1) diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index f63c5c5c52..a353ced631 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -4,11 +4,14 @@ package charset import ( - "reflect" "strings" "testing" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" "code.gitea.io/gitea/modules/translation" + + "github.com/stretchr/testify/assert" ) type escapeControlTest struct { @@ -132,22 +135,8 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, }, } -func TestEscapeControlString(t *testing.T) { - for _, tt := range escapeControlTests { - t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlString(tt.text, &translation.MockLocale{}) - if !reflect.DeepEqual(*status, tt.status) { - t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) - } - if result != tt.result { - t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} - func TestEscapeControlReader(t *testing.T) { - // lets add some control characters to the tests + // add some control characters to the tests tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) copy(tests, escapeControlTests) @@ -169,29 +158,20 @@ func TestEscapeControlReader(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - input := strings.NewReader(tt.text) output := &strings.Builder{} - status, err := EscapeControlReader(input, output, &translation.MockLocale{}) - result := output.String() - if err != nil { - t.Errorf("EscapeControlReader(): err = %v", err) - } - - if !reflect.DeepEqual(*status, tt.status) { - t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status) - } - if result != tt.result { - t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result) - } + status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}) + assert.NoError(t, err) + assert.Equal(t, tt.status, *status) + assert.Equal(t, tt.result, output.String()) }) } } -func TestEscapeControlReader_panic(t *testing.T) { - bs := make([]byte, 0, 20479) - bs = append(bs, 'A') - for i := 0; i < 6826; i++ { - bs = append(bs, []byte("—")...) - } - _, _ = EscapeControlString(string(bs), &translation.MockLocale{}) +func TestSettingAmbiguousUnicodeDetection(t *testing.T) { + defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)() + _, out := EscapeControlHTML("a test", &translation.MockLocale{}) + assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out) + setting.UI.AmbiguousUnicodeDetection = false + _, out = EscapeControlHTML("a test", &translation.MockLocale{}) + assert.EqualValues(t, `a test`, out) } |