aboutsummaryrefslogtreecommitdiffstats
path: root/modules/util
diff options
context:
space:
mode:
authorwxiaoguang <wxiaoguang@gmail.com>2022-03-31 10:25:40 +0800
committerGitHub <noreply@github.com>2022-03-31 10:25:40 +0800
commitc83168104b8bcb09e3e6e1490bd586a9e1a55bc3 (patch)
tree6732a64a1f40d9c27f3a3215bc847e701367e777 /modules/util
parent84038f33f42b03dbf455e41f5036f67ba7b61a66 (diff)
downloadgitea-c83168104b8bcb09e3e6e1490bd586a9e1a55bc3.tar.gz
gitea-c83168104b8bcb09e3e6e1490bd586a9e1a55bc3.zip
Use a more general (and faster) method to sanitize URLs with credentials (#19239)
Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs
Diffstat (limited to 'modules/util')
-rw-r--r--modules/util/sanitize.go88
-rw-r--r--modules/util/sanitize_test.go139
2 files changed, 75 insertions, 152 deletions
diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go
index a782fcf512..685b2699b0 100644
--- a/modules/util/sanitize.go
+++ b/modules/util/sanitize.go
@@ -5,59 +5,71 @@
package util
import (
- "net/url"
- "strings"
-)
+ "bytes"
+ "unicode"
-const (
- userPlaceholder = "sanitized-credential"
- unparsableURL = "(unparsable url)"
+ "github.com/yuin/goldmark/util"
)
type sanitizedError struct {
- err error
- replacer *strings.Replacer
+ err error
}
func (err sanitizedError) Error() string {
- return err.replacer.Replace(err.err.Error())
+ return SanitizeCredentialURLs(err.err.Error())
}
-// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text.
-func NewSanitizedError(err error, oldnew ...string) error {
- return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)}
+func (err sanitizedError) Unwrap() error {
+ return err.err
}
-// NewURLSanitizedError wraps an error and replaces the url credential or removes them.
-func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error {
- return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)}
+// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
+func SanitizeErrorCredentialURLs(err error) error {
+ return sanitizedError{err: err}
}
-// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them.
-// If the url can't get parsed it gets replaced with a placeholder string.
-func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error {
- return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)}
-}
+const userPlaceholder = "sanitized-credential"
-// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed.
-func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer {
- old := u.String()
+var schemeSep = []byte("://")
- if u.User != nil && usePlaceholder {
- u.User = url.User(userPlaceholder)
- } else {
- u.User = nil
+// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
+func SanitizeCredentialURLs(s string) string {
+ bs := util.StringToReadOnlyBytes(s)
+ schemeSepPos := bytes.Index(bs, schemeSep)
+ if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
+ return s // fast return if there is no URL scheme or no userinfo
}
- return strings.NewReplacer(old, u.String())
-}
-
-// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed.
-// If the url can't get parsed it gets replaced with a placeholder string
-func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer {
- u, err := url.Parse(unsanitizedURL)
- if err != nil {
- // don't log the error, since it might contain unsanitized URL.
- return strings.NewReplacer(unsanitizedURL, unparsableURL)
+ out := make([]byte, 0, len(bs)+len(userPlaceholder))
+ for schemeSepPos != -1 {
+ schemeSepPos += 3 // skip the "://"
+ sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
+ sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
+ sepLoop:
+ for ; sepEndPos < len(bs); sepEndPos++ {
+ c := bs[sepEndPos]
+ if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
+ continue
+ }
+ switch c {
+ case '@':
+ sepAtPos = sepEndPos
+ case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
+ continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
+ default:
+ break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
+ }
+ }
+ // if there is '@', and the string is like "s://u@h", then hide the "u" part
+ if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
+ out = append(out, bs[:schemeSepPos]...)
+ out = append(out, userPlaceholder...)
+ out = append(out, bs[sepAtPos:sepEndPos]...)
+ } else {
+ out = append(out, bs[:sepEndPos]...)
+ }
+ bs = bs[sepEndPos:]
+ schemeSepPos = bytes.Index(bs, schemeSep)
}
- return NewURLSanitizer(u, usePlaceholder)
+ out = append(out, bs...)
+ return util.BytesToReadOnlyString(out)
}
diff --git a/modules/util/sanitize_test.go b/modules/util/sanitize_test.go
index c141f5e947..78166cfdff 100644
--- a/modules/util/sanitize_test.go
+++ b/modules/util/sanitize_test.go
@@ -11,154 +11,65 @@ import (
"github.com/stretchr/testify/assert"
)
-func TestNewSanitizedError(t *testing.T) {
- err := errors.New("error while secret on test")
- err2 := NewSanitizedError(err)
- assert.Equal(t, err.Error(), err2.Error())
-
- cases := []struct {
- input error
- oldnew []string
- expected string
- }{
- // case 0
- {
- errors.New("error while secret on test"),
- []string{"secret", "replaced"},
- "error while replaced on test",
- },
- // case 1
- {
- errors.New("error while sec-ret on test"),
- []string{"secret", "replaced"},
- "error while sec-ret on test",
- },
- }
-
- for n, c := range cases {
- err := NewSanitizedError(c.input, c.oldnew...)
-
- assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n)
- }
+func TestSanitizeErrorCredentialURLs(t *testing.T) {
+ err := errors.New("error with https://a@b.com")
+ se := SanitizeErrorCredentialURLs(err)
+ assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
}
-func TestNewStringURLSanitizer(t *testing.T) {
+func TestSanitizeCredentialURLs(t *testing.T) {
cases := []struct {
- input string
- placeholder bool
- expected string
+ input string
+ expected string
}{
- // case 0
{
"https://github.com/go-gitea/test_repo.git",
- true,
"https://github.com/go-gitea/test_repo.git",
},
- // case 1
- {
- "https://github.com/go-gitea/test_repo.git",
- false,
- "https://github.com/go-gitea/test_repo.git",
- },
- // case 2
{
"https://mytoken@github.com/go-gitea/test_repo.git",
- true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
- // case 3
- {
- "https://mytoken@github.com/go-gitea/test_repo.git",
- false,
- "https://github.com/go-gitea/test_repo.git",
- },
- // case 4
{
"https://user:password@github.com/go-gitea/test_repo.git",
- true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
- // case 5
{
- "https://user:password@github.com/go-gitea/test_repo.git",
- false,
- "https://github.com/go-gitea/test_repo.git",
+ "ftp://x@",
+ "ftp://" + userPlaceholder + "@",
},
- // case 6
{
- "https://gi\nthub.com/go-gitea/test_repo.git",
- false,
- unparsableURL,
+ "ftp://x/@",
+ "ftp://x/@",
},
- }
-
- for n, c := range cases {
- // uses NewURLSanitizer internally
- result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input)
-
- assert.Equal(t, c.expected, result, "case %d: error should match", n)
- }
-}
-
-func TestNewStringURLSanitizedError(t *testing.T) {
- cases := []struct {
- input string
- placeholder bool
- expected string
- }{
- // case 0
{
- "https://github.com/go-gitea/test_repo.git",
- true,
- "https://github.com/go-gitea/test_repo.git",
- },
- // case 1
- {
- "https://github.com/go-gitea/test_repo.git",
- false,
- "https://github.com/go-gitea/test_repo.git",
+ "ftp://u@x/@", // test multiple @ chars
+ "ftp://" + userPlaceholder + "@x/@",
},
- // case 2
{
- "https://mytoken@github.com/go-gitea/test_repo.git",
- true,
- "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
+ "😊ftp://u@x😊", // test unicode
+ "😊ftp://" + userPlaceholder + "@x😊",
},
- // case 3
{
- "https://mytoken@github.com/go-gitea/test_repo.git",
- false,
- "https://github.com/go-gitea/test_repo.git",
+ "://@",
+ "://@",
},
- // case 4
{
- "https://user:password@github.com/go-gitea/test_repo.git",
- true,
- "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
+ "//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
+ "//u:p@h",
},
- // case 5
{
- "https://user:password@github.com/go-gitea/test_repo.git",
- false,
- "https://github.com/go-gitea/test_repo.git",
+ "s://u@h", // the minimal pattern to be sanitized
+ "s://" + userPlaceholder + "@h",
},
- // case 6
{
- "https://gi\nthub.com/go-gitea/test_repo.git",
- false,
- unparsableURL,
+ "URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
+ "URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
},
}
- encloseText := func(input string) string {
- return "test " + input + " test"
- }
-
for n, c := range cases {
- err := errors.New(encloseText(c.input))
-
- result := NewStringURLSanitizedError(err, c.input, c.placeholder)
-
- assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n)
+ result := SanitizeCredentialURLs(c.input)
+ assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}