aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2020-08-23 14:15:29 +0100
committerGitHub <noreply@github.com>2020-08-23 14:15:29 +0100
commite429c1164ec68b154b7f06db2c2e04dd25bad094 (patch)
tree1c86f2337213e21c1258d935cadc32eed72c8895
parent2026d885d6c11793e6fd68d06f163de90440e52d (diff)
downloadgitea-e429c1164ec68b154b7f06db2c2e04dd25bad094.tar.gz
gitea-e429c1164ec68b154b7f06db2c2e04dd25bad094.zip
Ensure that the detected charset order is set in chardet test (#12574)
TestToUTF8WithFallback is the cause of recurrent spurious test failures even despite code to set the detected charset order. The reason why this happens is because the preferred detected charset order is not being initialised for these tests. This PR simply ensures that this is set at the start of each test and would allow different tests to be written to allow differing orders. Replaces #12571 Close #12571 Signed-off-by: Andrew Thornton <art27@cantab.net>
-rw-r--r--modules/charset/charset_test.go28
1 files changed, 23 insertions, 5 deletions
diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go
index 394a42c71f..33f0c10a7a 100644
--- a/modules/charset/charset_test.go
+++ b/modules/charset/charset_test.go
@@ -5,6 +5,7 @@
package charset
import (
+ "strings"
"testing"
"code.gitea.io/gitea/modules/setting"
@@ -12,6 +13,22 @@ import (
"github.com/stretchr/testify/assert"
)
+func resetDefaultCharsetsOrder() {
+ defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder))
+ for _, charset := range setting.Repository.DetectedCharsetsOrder {
+ defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset)))
+ }
+ setting.Repository.DetectedCharsetScore = map[string]int{}
+ i := 0
+ for _, charset := range defaultDetectedCharsetsOrder {
+ canonicalCharset := strings.ToLower(strings.TrimSpace(charset))
+ if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has {
+ setting.Repository.DetectedCharsetScore[canonicalCharset] = i
+ i++
+ }
+ }
+}
+
func TestRemoveBOMIfPresent(t *testing.T) {
res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
@@ -21,6 +38,7 @@ func TestRemoveBOMIfPresent(t *testing.T) {
}
func TestToUTF8WithErr(t *testing.T) {
+ resetDefaultCharsetsOrder()
var res string
var err error
@@ -76,6 +94,7 @@ func TestToUTF8WithErr(t *testing.T) {
}
func TestToUTF8WithFallback(t *testing.T) {
+ resetDefaultCharsetsOrder()
// "ABC"
res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43})
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -116,7 +135,7 @@ func TestToUTF8WithFallback(t *testing.T) {
}
func TestToUTF8(t *testing.T) {
-
+ resetDefaultCharsetsOrder()
// Note: golang compiler seems so behave differently depending on the current
// locale, so some conversions might behave differently. For that reason, we don't
// depend on particular conversions but in expected behaviors.
@@ -165,6 +184,7 @@ func TestToUTF8(t *testing.T) {
}
func TestToUTF8DropErrors(t *testing.T) {
+ resetDefaultCharsetsOrder()
// "ABC"
res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -204,6 +224,7 @@ func TestToUTF8DropErrors(t *testing.T) {
}
func TestDetectEncoding(t *testing.T) {
+ resetDefaultCharsetsOrder()
testSuccess := func(b []byte, expected string) {
encoding, err := DetectEncoding(b)
assert.NoError(t, err)
@@ -225,10 +246,7 @@ func TestDetectEncoding(t *testing.T) {
b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a}
encoding, err := DetectEncoding(b)
assert.NoError(t, err)
- // due to a race condition in `chardet` library, it could either detect
- // "ISO-8859-1" or "IS0-8859-2" here. Technically either is correct, so
- // we accept either.
- assert.Contains(t, encoding, "ISO-8859")
+ assert.Contains(t, encoding, "ISO-8859-1")
old := setting.Repository.AnsiCharset
setting.Repository.AnsiCharset = "placeholder"