diff options
author | wxiaoguang <wxiaoguang@gmail.com> | 2022-03-31 10:25:40 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-31 10:25:40 +0800 |
commit | c83168104b8bcb09e3e6e1490bd586a9e1a55bc3 (patch) | |
tree | 6732a64a1f40d9c27f3a3215bc847e701367e777 /modules/util/sanitize.go | |
parent | 84038f33f42b03dbf455e41f5036f67ba7b61a66 (diff) | |
download | gitea-c83168104b8bcb09e3e6e1490bd586a9e1a55bc3.tar.gz gitea-c83168104b8bcb09e3e6e1490bd586a9e1a55bc3.zip |
Use a more general (and faster) method to sanitize URLs with credentials (#19239)
Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs
Diffstat (limited to 'modules/util/sanitize.go')
-rw-r--r-- | modules/util/sanitize.go | 88 |
1 files changed, 50 insertions, 38 deletions
diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go index a782fcf512..685b2699b0 100644 --- a/modules/util/sanitize.go +++ b/modules/util/sanitize.go @@ -5,59 +5,71 @@ package util import ( - "net/url" - "strings" -) + "bytes" + "unicode" -const ( - userPlaceholder = "sanitized-credential" - unparsableURL = "(unparsable url)" + "github.com/yuin/goldmark/util" ) type sanitizedError struct { - err error - replacer *strings.Replacer + err error } func (err sanitizedError) Error() string { - return err.replacer.Replace(err.err.Error()) + return SanitizeCredentialURLs(err.err.Error()) } -// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text. -func NewSanitizedError(err error, oldnew ...string) error { - return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)} +func (err sanitizedError) Unwrap() error { + return err.err } -// NewURLSanitizedError wraps an error and replaces the url credential or removes them. -func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error { - return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)} +// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs +func SanitizeErrorCredentialURLs(err error) error { + return sanitizedError{err: err} } -// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them. -// If the url can't get parsed it gets replaced with a placeholder string. -func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error { - return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)} -} +const userPlaceholder = "sanitized-credential" -// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed. -func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer { - old := u.String() +var schemeSep = []byte("://") - if u.User != nil && usePlaceholder { - u.User = url.User(userPlaceholder) - } else { - u.User = nil +// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com" +func SanitizeCredentialURLs(s string) string { + bs := util.StringToReadOnlyBytes(s) + schemeSepPos := bytes.Index(bs, schemeSep) + if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 { + return s // fast return if there is no URL scheme or no userinfo } - return strings.NewReplacer(old, u.String()) -} - -// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed. -// If the url can't get parsed it gets replaced with a placeholder string -func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer { - u, err := url.Parse(unsanitizedURL) - if err != nil { - // don't log the error, since it might contain unsanitized URL. - return strings.NewReplacer(unsanitizedURL, unparsableURL) + out := make([]byte, 0, len(bs)+len(userPlaceholder)) + for schemeSepPos != -1 { + schemeSepPos += 3 // skip the "://" + sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host" + sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test" + sepLoop: + for ; sepEndPos < len(bs); sepEndPos++ { + c := bs[sepEndPos] + if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') { + continue + } + switch c { + case '@': + sepAtPos = sepEndPos + case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%': + continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars + default: + break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop + } + } + // if there is '@', and the string is like "s://u@h", then hide the "u" part + if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 { + out = append(out, bs[:schemeSepPos]...) + out = append(out, userPlaceholder...) + out = append(out, bs[sepAtPos:sepEndPos]...) + } else { + out = append(out, bs[:sepEndPos]...) + } + bs = bs[sepEndPos:] + schemeSepPos = bytes.Index(bs, schemeSep) } - return NewURLSanitizer(u, usePlaceholder) + out = append(out, bs...) + return util.BytesToReadOnlyString(out) } |