aboutsummaryrefslogtreecommitdiffstats
path: root/modules/charset
diff options
context:
space:
mode:
Diffstat (limited to 'modules/charset')
-rw-r--r--modules/charset/charset_test.go28
1 files changed, 23 insertions, 5 deletions
diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go
index 394a42c71f..33f0c10a7a 100644
--- a/modules/charset/charset_test.go
+++ b/modules/charset/charset_test.go
@@ -5,6 +5,7 @@
package charset
import (
+ "strings"
"testing"
"code.gitea.io/gitea/modules/setting"
@@ -12,6 +13,22 @@ import (
"github.com/stretchr/testify/assert"
)
+func resetDefaultCharsetsOrder() {
+ defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder))
+ for _, charset := range setting.Repository.DetectedCharsetsOrder {
+ defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset)))
+ }
+ setting.Repository.DetectedCharsetScore = map[string]int{}
+ i := 0
+ for _, charset := range defaultDetectedCharsetsOrder {
+ canonicalCharset := strings.ToLower(strings.TrimSpace(charset))
+ if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has {
+ setting.Repository.DetectedCharsetScore[canonicalCharset] = i
+ i++
+ }
+ }
+}
+
func TestRemoveBOMIfPresent(t *testing.T) {
res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
@@ -21,6 +38,7 @@ func TestRemoveBOMIfPresent(t *testing.T) {
}
func TestToUTF8WithErr(t *testing.T) {
+ resetDefaultCharsetsOrder()
var res string
var err error
@@ -76,6 +94,7 @@ func TestToUTF8WithErr(t *testing.T) {
}
func TestToUTF8WithFallback(t *testing.T) {
+ resetDefaultCharsetsOrder()
// "ABC"
res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43})
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -116,7 +135,7 @@ func TestToUTF8WithFallback(t *testing.T) {
}
func TestToUTF8(t *testing.T) {
-
+ resetDefaultCharsetsOrder()
// Note: golang compiler seems so behave differently depending on the current
// locale, so some conversions might behave differently. For that reason, we don't
// depend on particular conversions but in expected behaviors.
@@ -165,6 +184,7 @@ func TestToUTF8(t *testing.T) {
}
func TestToUTF8DropErrors(t *testing.T) {
+ resetDefaultCharsetsOrder()
// "ABC"
res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -204,6 +224,7 @@ func TestToUTF8DropErrors(t *testing.T) {
}
func TestDetectEncoding(t *testing.T) {
+ resetDefaultCharsetsOrder()
testSuccess := func(b []byte, expected string) {
encoding, err := DetectEncoding(b)
assert.NoError(t, err)
@@ -225,10 +246,7 @@ func TestDetectEncoding(t *testing.T) {
b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a}
encoding, err := DetectEncoding(b)
assert.NoError(t, err)
- // due to a race condition in `chardet` library, it could either detect
- // "ISO-8859-1" or "IS0-8859-2" here. Technically either is correct, so
- // we accept either.
- assert.Contains(t, encoding, "ISO-8859")
+ assert.Contains(t, encoding, "ISO-8859-1")
old := setting.Repository.AnsiCharset
setting.Repository.AnsiCharset = "placeholder"