You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sanitize.go 2.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package util
  4. import (
  5. "bytes"
  6. "unicode"
  7. "github.com/yuin/goldmark/util"
  8. )
  9. type sanitizedError struct {
  10. err error
  11. }
  12. func (err sanitizedError) Error() string {
  13. return SanitizeCredentialURLs(err.err.Error())
  14. }
  15. func (err sanitizedError) Unwrap() error {
  16. return err.err
  17. }
  18. // SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
  19. func SanitizeErrorCredentialURLs(err error) error {
  20. return sanitizedError{err: err}
  21. }
  22. const userPlaceholder = "sanitized-credential"
  23. var schemeSep = []byte("://")
  24. // SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
  25. func SanitizeCredentialURLs(s string) string {
  26. bs := util.StringToReadOnlyBytes(s)
  27. schemeSepPos := bytes.Index(bs, schemeSep)
  28. if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
  29. return s // fast return if there is no URL scheme or no userinfo
  30. }
  31. out := make([]byte, 0, len(bs)+len(userPlaceholder))
  32. for schemeSepPos != -1 {
  33. schemeSepPos += 3 // skip the "://"
  34. sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
  35. sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
  36. sepLoop:
  37. for ; sepEndPos < len(bs); sepEndPos++ {
  38. c := bs[sepEndPos]
  39. if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
  40. continue
  41. }
  42. switch c {
  43. case '@':
  44. sepAtPos = sepEndPos
  45. case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
  46. continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
  47. default:
  48. break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
  49. }
  50. }
  51. // if there is '@', and the string is like "s://u@h", then hide the "u" part
  52. if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
  53. out = append(out, bs[:schemeSepPos]...)
  54. out = append(out, userPlaceholder...)
  55. out = append(out, bs[sepAtPos:sepEndPos]...)
  56. } else {
  57. out = append(out, bs[:sepEndPos]...)
  58. }
  59. bs = bs[sepEndPos:]
  60. schemeSepPos = bytes.Index(bs, schemeSep)
  61. }
  62. out = append(out, bs...)
  63. return util.BytesToReadOnlyString(out)
  64. }