aboutsummaryrefslogtreecommitdiffstats
path: root/modules/charset
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2023-12-17 22:38:54 +0800
committerGitHub <noreply@github.com>2023-12-17 14:38:54 +0000
commit20929edc9962281e35a81756d76dd1caa5741ff8 (patch)
tree138bbb9c97e609136fe83cf6e5524949218d1e72 /modules/charset
parent408a4842240e7dd906e682196bd4254d6c76fcb9 (diff)
downloadgitea-20929edc9962281e35a81756d76dd1caa5741ff8.tar.gz
gitea-20929edc9962281e35a81756d76dd1caa5741ff8.zip
Add option to disable ambiguous unicode characters detection (#28454)
* Close #24483 * Close #28123 * Close #23682 * Close #23149 (maybe more)
Diffstat (limited to 'modules/charset')
-rw-r--r--modules/charset/escape.go59
-rw-r--r--modules/charset/escape_stream.go2
-rw-r--r--modules/charset/escape_test.go52
3 files changed, 27 insertions, 86 deletions
diff --git a/modules/charset/escape.go b/modules/charset/escape.go
index 5608836a45..92e417d1f7 100644
--- a/modules/charset/escape.go
+++ b/modules/charset/escape.go
@@ -8,11 +8,12 @@
package charset
import (
- "bufio"
+ "html/template"
"io"
"strings"
"code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
)
@@ -20,20 +21,18 @@ import (
const RuneNBSP = 0xa0
// EscapeControlHTML escapes the unicode control sequences in a provided html document
-func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
+func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
sb := &strings.Builder{}
- outputStream := &HTMLStreamerWriter{Writer: sb}
- streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
-
- if err := StreamHTML(strings.NewReader(text), streamer); err != nil {
- streamer.escaped.HasError = true
- log.Error("Error whilst escaping: %v", err)
- }
- return streamer.escaped, sb.String()
+ escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
+ return escaped, template.HTML(sb.String())
}
-// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
+// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
+ if !setting.UI.AmbiguousUnicodeDetection {
+ _, err = io.Copy(writer, reader)
+ return &EscapeStatus{}, err
+ }
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
@@ -43,41 +42,3 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.
}
return streamer.escaped, err
}
-
-// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method.
-func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
- bufRd := bufio.NewReader(reader)
- outputStream := &HTMLStreamerWriter{Writer: writer}
- streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
-
- for {
- line, rdErr := bufRd.ReadString('\n')
- if len(line) > 0 {
- if err := streamer.Text(line); err != nil {
- streamer.escaped.HasError = true
- log.Error("Error whilst escaping: %v", err)
- return streamer.escaped, err
- }
- }
- if rdErr != nil {
- if rdErr != io.EOF {
- err = rdErr
- }
- break
- }
- }
- return streamer.escaped, err
-}
-
-// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
-func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
- sb := &strings.Builder{}
- outputStream := &HTMLStreamerWriter{Writer: sb}
- streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
-
- if err := streamer.Text(text); err != nil {
- streamer.escaped.HasError = true
- log.Error("Error whilst escaping: %v", err)
- }
- return streamer.escaped, sb.String()
-}
diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go
index 03d4cfc0c1..3f08fd94a4 100644
--- a/modules/charset/escape_stream.go
+++ b/modules/charset/escape_stream.go
@@ -64,7 +64,7 @@ func (e *escapeStreamer) Text(data string) error {
until, next = nextIdxs[0]+pos, nextIdxs[1]+pos
}
- // from pos until until we know that the runes are not \r\t\n or even ' '
+ // from pos until we know that the runes are not \r\t\n or even ' '
runes := make([]rune, 0, next-until)
positions := make([]int, 0, next-until+1)
diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go
index f63c5c5c52..a353ced631 100644
--- a/modules/charset/escape_test.go
+++ b/modules/charset/escape_test.go
@@ -4,11 +4,14 @@
package charset
import (
- "reflect"
"strings"
"testing"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/modules/translation"
+
+ "github.com/stretchr/testify/assert"
)
type escapeControlTest struct {
@@ -132,22 +135,8 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
},
}
-func TestEscapeControlString(t *testing.T) {
- for _, tt := range escapeControlTests {
- t.Run(tt.name, func(t *testing.T) {
- status, result := EscapeControlString(tt.text, &translation.MockLocale{})
- if !reflect.DeepEqual(*status, tt.status) {
- t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status)
- }
- if result != tt.result {
- t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result)
- }
- })
- }
-}
-
func TestEscapeControlReader(t *testing.T) {
- // lets add some control characters to the tests
+ // add some control characters to the tests
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
copy(tests, escapeControlTests)
@@ -169,29 +158,20 @@ func TestEscapeControlReader(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- input := strings.NewReader(tt.text)
output := &strings.Builder{}
- status, err := EscapeControlReader(input, output, &translation.MockLocale{})
- result := output.String()
- if err != nil {
- t.Errorf("EscapeControlReader(): err = %v", err)
- }
-
- if !reflect.DeepEqual(*status, tt.status) {
- t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status)
- }
- if result != tt.result {
- t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result)
- }
+ status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
+ assert.NoError(t, err)
+ assert.Equal(t, tt.status, *status)
+ assert.Equal(t, tt.result, output.String())
})
}
}
-func TestEscapeControlReader_panic(t *testing.T) {
- bs := make([]byte, 0, 20479)
- bs = append(bs, 'A')
- for i := 0; i < 6826; i++ {
- bs = append(bs, []byte("—")...)
- }
- _, _ = EscapeControlString(string(bs), &translation.MockLocale{})
+func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
+ defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
+ _, out := EscapeControlHTML("a test", &translation.MockLocale{})
+ assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
+ setting.UI.AmbiguousUnicodeDetection = false
+ _, out = EscapeControlHTML("a test", &translation.MockLocale{})
+ assert.EqualValues(t, `a test`, out)
}