aboutsummaryrefslogtreecommitdiffstats
path: root/modules/util/string.go
blob: b9b59df3ef89a7618695f6d2681ed64b7a2d6a69 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package util

import (
	"strings"
	"unsafe"
)

func isSnakeCaseUpper(c byte) bool {
	return 'A' <= c && c <= 'Z'
}

func isSnakeCaseLowerOrNumber(c byte) bool {
	return 'a' <= c && c <= 'z' || '0' <= c && c <= '9'
}

// ToSnakeCase convert the input string to snake_case format.
//
// Some samples.
//
//	"FirstName"  => "first_name"
//	"HTTPServer" => "http_server"
//	"NoHTTPS"    => "no_https"
//	"GO_PATH"    => "go_path"
//	"GO PATH"    => "go_path"      // space is converted to underscore.
//	"GO-PATH"    => "go_path"      // hyphen is converted to underscore.
func ToSnakeCase(input string) string {
	if len(input) == 0 {
		return ""
	}

	var res []byte
	if len(input) == 1 {
		c := input[0]
		if isSnakeCaseUpper(c) {
			res = []byte{c + 'a' - 'A'}
		} else if isSnakeCaseLowerOrNumber(c) {
			res = []byte{c}
		} else {
			res = []byte{'_'}
		}
	} else {
		res = make([]byte, 0, len(input)*4/3)
		pos := 0
		needSep := false
		for pos < len(input) {
			c := input[pos]
			if c >= 0x80 {
				res = append(res, c)
				pos++
				continue
			}
			isUpper := isSnakeCaseUpper(c)
			if isUpper || isSnakeCaseLowerOrNumber(c) {
				end := pos + 1
				if isUpper {
					// skip the following upper letters
					for end < len(input) && isSnakeCaseUpper(input[end]) {
						end++
					}
					if end-pos > 1 && end < len(input) && isSnakeCaseLowerOrNumber(input[end]) {
						end--
					}
				}
				// skip the following lower or number letters
				for end < len(input) && (isSnakeCaseLowerOrNumber(input[end]) || input[end] >= 0x80) {
					end++
				}
				if needSep {
					res = append(res, '_')
				}
				res = append(res, input[pos:end]...)
				pos = end
				needSep = true
			} else {
				res = append(res, '_')
				pos++
				needSep = false
			}
		}
		for i := 0; i < len(res); i++ {
			if isSnakeCaseUpper(res[i]) {
				res[i] += 'a' - 'A'
			}
		}
	}
	return UnsafeBytesToString(res)
}

// UnsafeBytesToString uses Go's unsafe package to convert a byte slice to a string.
func UnsafeBytesToString(b []byte) string {
	return unsafe.String(unsafe.SliceData(b), len(b))
}

// UnsafeStringToBytes uses Go's unsafe package to convert a string to a byte slice.
func UnsafeStringToBytes(s string) []byte {
	return unsafe.Slice(unsafe.StringData(s), len(s))
}

// SplitTrimSpace splits the string at given separator and trims leading and trailing space
func SplitTrimSpace(input, sep string) []string {
	input = strings.TrimSpace(input)
	var stringList []string
	for s := range strings.SplitSeq(input, sep) {
		if s = strings.TrimSpace(s); s != "" {
			stringList = append(stringList, s)
		}
	}
	return stringList
}

func asciiLower(b byte) byte {
	if 'A' <= b && b <= 'Z' {
		return b + ('a' - 'A')
	}
	return b
}

// AsciiEqualFold is from Golang https://cs.opensource.google/go/go/+/refs/tags/go1.24.4:src/net/http/internal/ascii/print.go
// ASCII only. In most cases for protocols, we should only use this but not [strings.EqualFold]
func AsciiEqualFold(s, t string) bool { //nolint:revive // PascalCase
	if len(s) != len(t) {
		return false
	}
	for i := 0; i < len(s); i++ {
		if asciiLower(s[i]) != asciiLower(t[i]) {
			return false
		}
	}
	return true
}