diff options
author | wxiaoguang <wxiaoguang@gmail.com> | 2022-03-31 10:25:40 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-31 10:25:40 +0800 |
commit | c83168104b8bcb09e3e6e1490bd586a9e1a55bc3 (patch) | |
tree | 6732a64a1f40d9c27f3a3215bc847e701367e777 /modules/util | |
parent | 84038f33f42b03dbf455e41f5036f67ba7b61a66 (diff) | |
download | gitea-c83168104b8bcb09e3e6e1490bd586a9e1a55bc3.tar.gz gitea-c83168104b8bcb09e3e6e1490bd586a9e1a55bc3.zip |
Use a more general (and faster) method to sanitize URLs with credentials (#19239)
Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs
Diffstat (limited to 'modules/util')
-rw-r--r-- | modules/util/sanitize.go | 88 | ||||
-rw-r--r-- | modules/util/sanitize_test.go | 139 |
2 files changed, 75 insertions, 152 deletions
diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go index a782fcf512..685b2699b0 100644 --- a/modules/util/sanitize.go +++ b/modules/util/sanitize.go @@ -5,59 +5,71 @@ package util import ( - "net/url" - "strings" -) + "bytes" + "unicode" -const ( - userPlaceholder = "sanitized-credential" - unparsableURL = "(unparsable url)" + "github.com/yuin/goldmark/util" ) type sanitizedError struct { - err error - replacer *strings.Replacer + err error } func (err sanitizedError) Error() string { - return err.replacer.Replace(err.err.Error()) + return SanitizeCredentialURLs(err.err.Error()) } -// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text. -func NewSanitizedError(err error, oldnew ...string) error { - return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)} +func (err sanitizedError) Unwrap() error { + return err.err } -// NewURLSanitizedError wraps an error and replaces the url credential or removes them. -func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error { - return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)} +// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs +func SanitizeErrorCredentialURLs(err error) error { + return sanitizedError{err: err} } -// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them. -// If the url can't get parsed it gets replaced with a placeholder string. -func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error { - return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)} -} +const userPlaceholder = "sanitized-credential" -// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed. -func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer { - old := u.String() +var schemeSep = []byte("://") - if u.User != nil && usePlaceholder { - u.User = url.User(userPlaceholder) - } else { - u.User = nil +// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com" +func SanitizeCredentialURLs(s string) string { + bs := util.StringToReadOnlyBytes(s) + schemeSepPos := bytes.Index(bs, schemeSep) + if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 { + return s // fast return if there is no URL scheme or no userinfo } - return strings.NewReplacer(old, u.String()) -} - -// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed. -// If the url can't get parsed it gets replaced with a placeholder string -func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer { - u, err := url.Parse(unsanitizedURL) - if err != nil { - // don't log the error, since it might contain unsanitized URL. - return strings.NewReplacer(unsanitizedURL, unparsableURL) + out := make([]byte, 0, len(bs)+len(userPlaceholder)) + for schemeSepPos != -1 { + schemeSepPos += 3 // skip the "://" + sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host" + sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test" + sepLoop: + for ; sepEndPos < len(bs); sepEndPos++ { + c := bs[sepEndPos] + if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') { + continue + } + switch c { + case '@': + sepAtPos = sepEndPos + case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%': + continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars + default: + break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop + } + } + // if there is '@', and the string is like "s://u@h", then hide the "u" part + if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 { + out = append(out, bs[:schemeSepPos]...) + out = append(out, userPlaceholder...) + out = append(out, bs[sepAtPos:sepEndPos]...) + } else { + out = append(out, bs[:sepEndPos]...) + } + bs = bs[sepEndPos:] + schemeSepPos = bytes.Index(bs, schemeSep) } - return NewURLSanitizer(u, usePlaceholder) + out = append(out, bs...) + return util.BytesToReadOnlyString(out) } diff --git a/modules/util/sanitize_test.go b/modules/util/sanitize_test.go index c141f5e947..78166cfdff 100644 --- a/modules/util/sanitize_test.go +++ b/modules/util/sanitize_test.go @@ -11,154 +11,65 @@ import ( "github.com/stretchr/testify/assert" ) -func TestNewSanitizedError(t *testing.T) { - err := errors.New("error while secret on test") - err2 := NewSanitizedError(err) - assert.Equal(t, err.Error(), err2.Error()) - - cases := []struct { - input error - oldnew []string - expected string - }{ - // case 0 - { - errors.New("error while secret on test"), - []string{"secret", "replaced"}, - "error while replaced on test", - }, - // case 1 - { - errors.New("error while sec-ret on test"), - []string{"secret", "replaced"}, - "error while sec-ret on test", - }, - } - - for n, c := range cases { - err := NewSanitizedError(c.input, c.oldnew...) - - assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n) - } +func TestSanitizeErrorCredentialURLs(t *testing.T) { + err := errors.New("error with https://a@b.com") + se := SanitizeErrorCredentialURLs(err) + assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error()) } -func TestNewStringURLSanitizer(t *testing.T) { +func TestSanitizeCredentialURLs(t *testing.T) { cases := []struct { - input string - placeholder bool - expected string + input string + expected string }{ - // case 0 { "https://github.com/go-gitea/test_repo.git", - true, "https://github.com/go-gitea/test_repo.git", }, - // case 1 - { - "https://github.com/go-gitea/test_repo.git", - false, - "https://github.com/go-gitea/test_repo.git", - }, - // case 2 { "https://mytoken@github.com/go-gitea/test_repo.git", - true, "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", }, - // case 3 - { - "https://mytoken@github.com/go-gitea/test_repo.git", - false, - "https://github.com/go-gitea/test_repo.git", - }, - // case 4 { "https://user:password@github.com/go-gitea/test_repo.git", - true, "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", }, - // case 5 { - "https://user:password@github.com/go-gitea/test_repo.git", - false, - "https://github.com/go-gitea/test_repo.git", + "ftp://x@", + "ftp://" + userPlaceholder + "@", }, - // case 6 { - "https://gi\nthub.com/go-gitea/test_repo.git", - false, - unparsableURL, + "ftp://x/@", + "ftp://x/@", }, - } - - for n, c := range cases { - // uses NewURLSanitizer internally - result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input) - - assert.Equal(t, c.expected, result, "case %d: error should match", n) - } -} - -func TestNewStringURLSanitizedError(t *testing.T) { - cases := []struct { - input string - placeholder bool - expected string - }{ - // case 0 { - "https://github.com/go-gitea/test_repo.git", - true, - "https://github.com/go-gitea/test_repo.git", - }, - // case 1 - { - "https://github.com/go-gitea/test_repo.git", - false, - "https://github.com/go-gitea/test_repo.git", + "ftp://u@x/@", // test multiple @ chars + "ftp://" + userPlaceholder + "@x/@", }, - // case 2 { - "https://mytoken@github.com/go-gitea/test_repo.git", - true, - "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", + "😊ftp://u@x😊", // test unicode + "😊ftp://" + userPlaceholder + "@x😊", }, - // case 3 { - "https://mytoken@github.com/go-gitea/test_repo.git", - false, - "https://github.com/go-gitea/test_repo.git", + "://@", + "://@", }, - // case 4 { - "https://user:password@github.com/go-gitea/test_repo.git", - true, - "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", + "//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string + "//u:p@h", }, - // case 5 { - "https://user:password@github.com/go-gitea/test_repo.git", - false, - "https://github.com/go-gitea/test_repo.git", + "s://u@h", // the minimal pattern to be sanitized + "s://" + userPlaceholder + "@h", }, - // case 6 { - "https://gi\nthub.com/go-gitea/test_repo.git", - false, - unparsableURL, + "URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com", + "URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com", }, } - encloseText := func(input string) string { - return "test " + input + " test" - } - for n, c := range cases { - err := errors.New(encloseText(c.input)) - - result := NewStringURLSanitizedError(err, c.input, c.placeholder) - - assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n) + result := SanitizeCredentialURLs(c.input) + assert.Equal(t, c.expected, result, "case %d: error should match", n) } } |