aboutsummaryrefslogtreecommitdiffstats
path: root/models/db
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2024-01-10 19:03:23 +0800
committerGitHub <noreply@github.com>2024-01-10 11:03:23 +0000
commit2df7563f3176aa8c7dcb070f660d53da4bb66e78 (patch)
treeacf64c3f4d40f15e0fc28d4f6060644172cc46f0 /models/db
parenta80debc208fbf0ecf6ca734e454cae08adafb570 (diff)
downloadgitea-2df7563f3176aa8c7dcb070f660d53da4bb66e78.tar.gz
gitea-2df7563f3176aa8c7dcb070f660d53da4bb66e78.zip
Recommend/convert to use case-sensitive collation for MySQL/MSSQL (#28662)
Mainly for MySQL/MSSQL. It is important for Gitea to use case-sensitive database charset collation. If the database is using a case-insensitive collation, Gitea will show startup error/warning messages, and show the errors/warnings on the admin panel's Self-Check page. Make `gitea doctor convert` work for MySQL to convert the collations of database & tables & columns. * Fix #28131 ## :warning: BREAKING :warning: It is not quite breaking, but it's highly recommended to convert the database&table&column to a consistent and case-sensitive collation.
Diffstat (limited to 'models/db')
-rw-r--r--models/db/collation.go191
-rw-r--r--models/db/convert.go15
-rwxr-xr-xmodels/db/engine.go2
3 files changed, 203 insertions, 5 deletions
diff --git a/models/db/collation.go b/models/db/collation.go
new file mode 100644
index 0000000000..2f5ff2bf05
--- /dev/null
+++ b/models/db/collation.go
@@ -0,0 +1,191 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package db
+
+import (
+ "errors"
+ "fmt"
+ "strings"
+
+ "code.gitea.io/gitea/modules/container"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+
+ "xorm.io/xorm"
+ "xorm.io/xorm/schemas"
+)
+
+type CheckCollationsResult struct {
+ ExpectedCollation string
+ AvailableCollation container.Set[string]
+ DatabaseCollation string
+ IsCollationCaseSensitive func(s string) bool
+ CollationEquals func(a, b string) bool
+ ExistingTableNumber int
+
+ InconsistentCollationColumns []string
+}
+
+func findAvailableCollationsMySQL(x *xorm.Engine) (ret container.Set[string], err error) {
+ var res []struct {
+ Collation string
+ }
+ if err = x.SQL("SHOW COLLATION WHERE (Collation = 'utf8mb4_bin') OR (Collation LIKE '%\\_as\\_cs%')").Find(&res); err != nil {
+ return nil, err
+ }
+ ret = make(container.Set[string], len(res))
+ for _, r := range res {
+ ret.Add(r.Collation)
+ }
+ return ret, nil
+}
+
+func findAvailableCollationsMSSQL(x *xorm.Engine) (ret container.Set[string], err error) {
+ var res []struct {
+ Name string
+ }
+ if err = x.SQL("SELECT * FROM sys.fn_helpcollations() WHERE name LIKE '%[_]CS[_]AS%'").Find(&res); err != nil {
+ return nil, err
+ }
+ ret = make(container.Set[string], len(res))
+ for _, r := range res {
+ ret.Add(r.Name)
+ }
+ return ret, nil
+}
+
+func CheckCollations(x *xorm.Engine) (*CheckCollationsResult, error) {
+ dbTables, err := x.DBMetas()
+ if err != nil {
+ return nil, err
+ }
+
+ res := &CheckCollationsResult{
+ ExistingTableNumber: len(dbTables),
+ CollationEquals: func(a, b string) bool { return a == b },
+ }
+
+ var candidateCollations []string
+ if x.Dialect().URI().DBType == schemas.MYSQL {
+ if _, err = x.SQL("SELECT @@collation_database").Get(&res.DatabaseCollation); err != nil {
+ return nil, err
+ }
+ res.IsCollationCaseSensitive = func(s string) bool {
+ return s == "utf8mb4_bin" || strings.HasSuffix(s, "_as_cs")
+ }
+ candidateCollations = []string{"utf8mb4_0900_as_cs", "uca1400_as_cs", "utf8mb4_bin"}
+ res.AvailableCollation, err = findAvailableCollationsMySQL(x)
+ if err != nil {
+ return nil, err
+ }
+ res.CollationEquals = func(a, b string) bool {
+ // MariaDB adds the "utf8mb4_" prefix, eg: "utf8mb4_uca1400_as_cs", but not the name "uca1400_as_cs" in "SHOW COLLATION"
+ // At the moment, it's safe to ignore the database difference, just trim the prefix and compare. It could be fixed easily if there is any problem in the future.
+ return a == b || strings.TrimPrefix(a, "utf8mb4_") == strings.TrimPrefix(b, "utf8mb4_")
+ }
+ } else if x.Dialect().URI().DBType == schemas.MSSQL {
+ if _, err = x.SQL("SELECT DATABASEPROPERTYEX(DB_NAME(), 'Collation')").Get(&res.DatabaseCollation); err != nil {
+ return nil, err
+ }
+ res.IsCollationCaseSensitive = func(s string) bool {
+ return strings.HasSuffix(s, "_CS_AS")
+ }
+ candidateCollations = []string{"Latin1_General_CS_AS"}
+ res.AvailableCollation, err = findAvailableCollationsMSSQL(x)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ return nil, nil
+ }
+
+ if res.DatabaseCollation == "" {
+ return nil, errors.New("unable to get collation for current database")
+ }
+
+ res.ExpectedCollation = setting.Database.CharsetCollation
+ if res.ExpectedCollation == "" {
+ for _, collation := range candidateCollations {
+ if res.AvailableCollation.Contains(collation) {
+ res.ExpectedCollation = collation
+ break
+ }
+ }
+ }
+
+ if res.ExpectedCollation == "" {
+ return nil, errors.New("unable to find a suitable collation for current database")
+ }
+
+ allColumnsMatchExpected := true
+ allColumnsMatchDatabase := true
+ for _, table := range dbTables {
+ for _, col := range table.Columns() {
+ if col.Collation != "" {
+ allColumnsMatchExpected = allColumnsMatchExpected && res.CollationEquals(col.Collation, res.ExpectedCollation)
+ allColumnsMatchDatabase = allColumnsMatchDatabase && res.CollationEquals(col.Collation, res.DatabaseCollation)
+ if !res.IsCollationCaseSensitive(col.Collation) || !res.CollationEquals(col.Collation, res.DatabaseCollation) {
+ res.InconsistentCollationColumns = append(res.InconsistentCollationColumns, fmt.Sprintf("%s.%s", table.Name, col.Name))
+ }
+ }
+ }
+ }
+ // if all columns match expected collation or all match database collation, then it could also be considered as "consistent"
+ if allColumnsMatchExpected || allColumnsMatchDatabase {
+ res.InconsistentCollationColumns = nil
+ }
+ return res, nil
+}
+
+func CheckCollationsDefaultEngine() (*CheckCollationsResult, error) {
+ return CheckCollations(x)
+}
+
+func alterDatabaseCollation(x *xorm.Engine, collation string) error {
+ if x.Dialect().URI().DBType == schemas.MYSQL {
+ _, err := x.Exec("ALTER DATABASE CHARACTER SET utf8mb4 COLLATE " + collation)
+ return err
+ } else if x.Dialect().URI().DBType == schemas.MSSQL {
+ // TODO: MSSQL has many limitations on changing database collation, it could fail in many cases.
+ _, err := x.Exec("ALTER DATABASE CURRENT COLLATE " + collation)
+ return err
+ }
+ return errors.New("unsupported database type")
+}
+
+// preprocessDatabaseCollation checks database & table column collation, and alter the database collation if needed
+func preprocessDatabaseCollation(x *xorm.Engine) {
+ r, err := CheckCollations(x)
+ if err != nil {
+ log.Error("Failed to check database collation: %v", err)
+ }
+ if r == nil {
+ return // no check result means the database doesn't need to do such check/process (at the moment ....)
+ }
+
+ // try to alter database collation to expected if the database is empty, it might fail in some cases (and it isn't necessary to succeed)
+ // at the moment, there is no "altering" solution for MSSQL, site admin should manually change the database collation
+ // and there is a bug https://github.com/go-testfixtures/testfixtures/pull/182 mssql: Invalid object name 'information_schema.tables'.
+ if !r.CollationEquals(r.DatabaseCollation, r.ExpectedCollation) && r.ExistingTableNumber == 0 && x.Dialect().URI().DBType == schemas.MYSQL {
+ if err = alterDatabaseCollation(x, r.ExpectedCollation); err != nil {
+ log.Error("Failed to change database collation to %q: %v", r.ExpectedCollation, err)
+ } else {
+ _, _ = x.Exec("SELECT 1") // after "altering", MSSQL's session becomes invalid, so make a simple query to "refresh" the session
+ if r, err = CheckCollations(x); err != nil {
+ log.Error("Failed to check database collation again after altering: %v", err) // impossible case
+ return
+ }
+ log.Warn("Current database has been altered to use collation %q", r.DatabaseCollation)
+ }
+ }
+
+ // check column collation, and show warning/error to end users -- no need to fatal, do not block the startup
+ if !r.IsCollationCaseSensitive(r.DatabaseCollation) {
+ log.Warn("Current database is using a case-insensitive collation %q, although Gitea could work with it, there might be some rare cases which don't work as expected.", r.DatabaseCollation)
+ }
+
+ if len(r.InconsistentCollationColumns) > 0 {
+ log.Error("There are %d table columns using inconsistent collation, they should use %q. Please go to admin panel Self Check page", len(r.InconsistentCollationColumns), r.DatabaseCollation)
+ }
+}
diff --git a/models/db/convert.go b/models/db/convert.go
index 112c8575ca..8c124471ab 100644
--- a/models/db/convert.go
+++ b/models/db/convert.go
@@ -14,13 +14,18 @@ import (
"xorm.io/xorm/schemas"
)
-// ConvertUtf8ToUtf8mb4 converts database and tables from utf8 to utf8mb4 if it's mysql and set ROW_FORMAT=dynamic
-func ConvertUtf8ToUtf8mb4() error {
+// ConvertDatabaseTable converts database and tables from utf8 to utf8mb4 if it's mysql and set ROW_FORMAT=dynamic
+func ConvertDatabaseTable() error {
if x.Dialect().URI().DBType != schemas.MYSQL {
return nil
}
- _, err := x.Exec(fmt.Sprintf("ALTER DATABASE `%s` CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci", setting.Database.Name))
+ r, err := CheckCollations(x)
+ if err != nil {
+ return err
+ }
+
+ _, err = x.Exec(fmt.Sprintf("ALTER DATABASE `%s` CHARACTER SET utf8mb4 COLLATE %s", setting.Database.Name, r.ExpectedCollation))
if err != nil {
return err
}
@@ -30,11 +35,11 @@ func ConvertUtf8ToUtf8mb4() error {
return err
}
for _, table := range tables {
- if _, err := x.Exec(fmt.Sprintf("ALTER TABLE `%s` ROW_FORMAT=dynamic;", table.Name)); err != nil {
+ if _, err := x.Exec(fmt.Sprintf("ALTER TABLE `%s` ROW_FORMAT=dynamic", table.Name)); err != nil {
return err
}
- if _, err := x.Exec(fmt.Sprintf("ALTER TABLE `%s` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;", table.Name)); err != nil {
+ if _, err := x.Exec(fmt.Sprintf("ALTER TABLE `%s` CONVERT TO CHARACTER SET utf8mb4 COLLATE %s", table.Name, r.ExpectedCollation)); err != nil {
return err
}
}
diff --git a/models/db/engine.go b/models/db/engine.go
index 99906813ca..2cd1c36c58 100755
--- a/models/db/engine.go
+++ b/models/db/engine.go
@@ -182,6 +182,8 @@ func InitEngineWithMigration(ctx context.Context, migrateFunc func(*xorm.Engine)
return err
}
+ preprocessDatabaseCollation(x)
+
// We have to run migrateFunc here in case the user is re-running installation on a previously created DB.
// If we do not then table schemas will be changed and there will be conflicts when the migrations run properly.
//