You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

async.go 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. package certmagic
  2. import (
  3. "context"
  4. "errors"
  5. "log"
  6. "runtime"
  7. "sync"
  8. "time"
  9. "go.uber.org/zap"
  10. )
  11. var jm = &jobManager{maxConcurrentJobs: 1000}
  12. type jobManager struct {
  13. mu sync.Mutex
  14. maxConcurrentJobs int
  15. activeWorkers int
  16. queue []namedJob
  17. names map[string]struct{}
  18. }
  19. type namedJob struct {
  20. name string
  21. job func() error
  22. logger *zap.Logger
  23. }
  24. // Submit enqueues the given job with the given name. If name is non-empty
  25. // and a job with the same name is already enqueued or running, this is a
  26. // no-op. If name is empty, no duplicate prevention will occur. The job
  27. // manager will then run this job as soon as it is able.
  28. func (jm *jobManager) Submit(logger *zap.Logger, name string, job func() error) {
  29. jm.mu.Lock()
  30. defer jm.mu.Unlock()
  31. if jm.names == nil {
  32. jm.names = make(map[string]struct{})
  33. }
  34. if name != "" {
  35. // prevent duplicate jobs
  36. if _, ok := jm.names[name]; ok {
  37. return
  38. }
  39. jm.names[name] = struct{}{}
  40. }
  41. jm.queue = append(jm.queue, namedJob{name, job, logger})
  42. if jm.activeWorkers < jm.maxConcurrentJobs {
  43. jm.activeWorkers++
  44. go jm.worker()
  45. }
  46. }
  47. func (jm *jobManager) worker() {
  48. defer func() {
  49. if err := recover(); err != nil {
  50. buf := make([]byte, stackTraceBufferSize)
  51. buf = buf[:runtime.Stack(buf, false)]
  52. log.Printf("panic: certificate worker: %v\n%s", err, buf)
  53. }
  54. }()
  55. for {
  56. jm.mu.Lock()
  57. if len(jm.queue) == 0 {
  58. jm.activeWorkers--
  59. jm.mu.Unlock()
  60. return
  61. }
  62. next := jm.queue[0]
  63. jm.queue = jm.queue[1:]
  64. jm.mu.Unlock()
  65. if err := next.job(); err != nil {
  66. if next.logger != nil {
  67. next.logger.Error("job failed", zap.Error(err))
  68. }
  69. }
  70. if next.name != "" {
  71. jm.mu.Lock()
  72. delete(jm.names, next.name)
  73. jm.mu.Unlock()
  74. }
  75. }
  76. }
  77. func doWithRetry(ctx context.Context, log *zap.Logger, f func(context.Context) error) error {
  78. var attempts int
  79. ctx = context.WithValue(ctx, AttemptsCtxKey, &attempts)
  80. // the initial intervalIndex is -1, signaling
  81. // that we should not wait for the first attempt
  82. start, intervalIndex := time.Now(), -1
  83. var err error
  84. for time.Since(start) < maxRetryDuration {
  85. var wait time.Duration
  86. if intervalIndex >= 0 {
  87. wait = retryIntervals[intervalIndex]
  88. }
  89. timer := time.NewTimer(wait)
  90. select {
  91. case <-ctx.Done():
  92. timer.Stop()
  93. return context.Canceled
  94. case <-timer.C:
  95. err = f(ctx)
  96. attempts++
  97. if err == nil || errors.Is(err, context.Canceled) {
  98. return err
  99. }
  100. var errNoRetry ErrNoRetry
  101. if errors.As(err, &errNoRetry) {
  102. return err
  103. }
  104. if intervalIndex < len(retryIntervals)-1 {
  105. intervalIndex++
  106. }
  107. if time.Since(start) < maxRetryDuration {
  108. if log != nil {
  109. log.Error("will retry",
  110. zap.Error(err),
  111. zap.Int("attempt", attempts),
  112. zap.Duration("retrying_in", retryIntervals[intervalIndex]),
  113. zap.Duration("elapsed", time.Since(start)),
  114. zap.Duration("max_duration", maxRetryDuration))
  115. }
  116. } else {
  117. if log != nil {
  118. log.Error("final attempt; giving up",
  119. zap.Error(err),
  120. zap.Int("attempt", attempts),
  121. zap.Duration("elapsed", time.Since(start)),
  122. zap.Duration("max_duration", maxRetryDuration))
  123. }
  124. return nil
  125. }
  126. }
  127. }
  128. return err
  129. }
  130. // ErrNoRetry is an error type which signals
  131. // to stop retries early.
  132. type ErrNoRetry struct{ Err error }
  133. // Unwrap makes it so that e wraps e.Err.
  134. func (e ErrNoRetry) Unwrap() error { return e.Err }
  135. func (e ErrNoRetry) Error() string { return e.Err.Error() }
  136. type retryStateCtxKey struct{}
  137. // AttemptsCtxKey is the context key for the value
  138. // that holds the attempt counter. The value counts
  139. // how many times the operation has been attempted.
  140. // A value of 0 means first attempt.
  141. var AttemptsCtxKey retryStateCtxKey
  142. // retryIntervals are based on the idea of exponential
  143. // backoff, but weighed a little more heavily to the
  144. // front. We figure that intermittent errors would be
  145. // resolved after the first retry, but any errors after
  146. // that would probably require at least a few minutes
  147. // to clear up: either for DNS to propagate, for the
  148. // administrator to fix their DNS or network properties,
  149. // or some other external factor needs to change. We
  150. // chose intervals that we think will be most useful
  151. // without introducing unnecessary delay. The last
  152. // interval in this list will be used until the time
  153. // of maxRetryDuration has elapsed.
  154. var retryIntervals = []time.Duration{
  155. 1 * time.Minute,
  156. 2 * time.Minute,
  157. 2 * time.Minute,
  158. 5 * time.Minute, // elapsed: 10 min
  159. 10 * time.Minute,
  160. 20 * time.Minute,
  161. 20 * time.Minute, // elapsed: 1 hr
  162. 30 * time.Minute,
  163. 30 * time.Minute, // elapsed: 2 hr
  164. 1 * time.Hour,
  165. 3 * time.Hour, // elapsed: 6 hr
  166. 6 * time.Hour, // for up to maxRetryDuration
  167. }
  168. // maxRetryDuration is the maximum duration to try
  169. // doing retries using the above intervals.
  170. const maxRetryDuration = 24 * time.Hour * 30