diff options
Diffstat (limited to 'vendor/github.com/caddyserver/certmagic/solvers.go')
-rw-r--r-- | vendor/github.com/caddyserver/certmagic/solvers.go | 623 |
1 files changed, 623 insertions, 0 deletions
diff --git a/vendor/github.com/caddyserver/certmagic/solvers.go b/vendor/github.com/caddyserver/certmagic/solvers.go new file mode 100644 index 0000000000..c0957da3e6 --- /dev/null +++ b/vendor/github.com/caddyserver/certmagic/solvers.go @@ -0,0 +1,623 @@ +// Copyright 2015 Matthew Holt +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package certmagic + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "log" + "net" + "net/http" + "path" + "runtime" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/libdns/libdns" + "github.com/mholt/acmez" + "github.com/mholt/acmez/acme" +) + +// httpSolver solves the HTTP challenge. It must be +// associated with a config and an address to use +// for solving the challenge. If multiple httpSolvers +// are initialized concurrently, the first one to +// begin will start the server, and the last one to +// finish will stop the server. This solver must be +// wrapped by a distributedSolver to work properly, +// because the only way the HTTP challenge handler +// can access the keyAuth material is by loading it +// from storage, which is done by distributedSolver. +type httpSolver struct { + closed int32 // accessed atomically + acmeManager *ACMEManager + address string +} + +// Present starts an HTTP server if none is already listening on s.address. +func (s *httpSolver) Present(ctx context.Context, _ acme.Challenge) error { + solversMu.Lock() + defer solversMu.Unlock() + + si := getSolverInfo(s.address) + si.count++ + if si.listener != nil { + return nil // already be served by us + } + + // notice the unusual error handling here; we + // only continue to start a challenge server if + // we got a listener; in all other cases return + ln, err := robustTryListen(s.address) + if ln == nil { + return err + } + + // successfully bound socket, so save listener and start key auth HTTP server + si.listener = ln + go s.serve(si) + + return nil +} + +// serve is an HTTP server that serves only HTTP challenge responses. +func (s *httpSolver) serve(si *solverInfo) { + defer func() { + if err := recover(); err != nil { + buf := make([]byte, stackTraceBufferSize) + buf = buf[:runtime.Stack(buf, false)] + log.Printf("panic: http solver server: %v\n%s", err, buf) + } + }() + defer close(si.done) + httpServer := &http.Server{Handler: s.acmeManager.HTTPChallengeHandler(http.NewServeMux())} + httpServer.SetKeepAlivesEnabled(false) + err := httpServer.Serve(si.listener) + if err != nil && atomic.LoadInt32(&s.closed) != 1 { + log.Printf("[ERROR] key auth HTTP server: %v", err) + } +} + +// CleanUp cleans up the HTTP server if it is the last one to finish. +func (s *httpSolver) CleanUp(ctx context.Context, _ acme.Challenge) error { + solversMu.Lock() + defer solversMu.Unlock() + si := getSolverInfo(s.address) + si.count-- + if si.count == 0 { + // last one out turns off the lights + atomic.StoreInt32(&s.closed, 1) + if si.listener != nil { + si.listener.Close() + <-si.done + } + delete(solvers, s.address) + } + return nil +} + +// tlsALPNSolver is a type that can solve TLS-ALPN challenges. +// It must have an associated config and address on which to +// serve the challenge. +type tlsALPNSolver struct { + config *Config + address string +} + +// Present adds the certificate to the certificate cache and, if +// needed, starts a TLS server for answering TLS-ALPN challenges. +func (s *tlsALPNSolver) Present(ctx context.Context, chal acme.Challenge) error { + // load the certificate into the cache; this isn't strictly necessary + // if we're using the distributed solver since our GetCertificate + // function will check storage for the keyAuth anyway, but it seems + // like loading it into the cache is the right thing to do + cert, err := acmez.TLSALPN01ChallengeCert(chal) + if err != nil { + return err + } + certHash := hashCertificateChain(cert.Certificate) + s.config.certCache.mu.Lock() + s.config.certCache.cache[tlsALPNCertKeyName(chal.Identifier.Value)] = Certificate{ + Certificate: *cert, + Names: []string{chal.Identifier.Value}, + hash: certHash, // perhaps not necesssary + } + s.config.certCache.mu.Unlock() + + // the rest of this function increments the + // challenge count for the solver at this + // listener address, and if necessary, starts + // a simple TLS server + + solversMu.Lock() + defer solversMu.Unlock() + + si := getSolverInfo(s.address) + si.count++ + if si.listener != nil { + return nil // already be served by us + } + + // notice the unusual error handling here; we + // only continue to start a challenge server if + // we got a listener; in all other cases return + ln, err := robustTryListen(s.address) + if ln == nil { + return err + } + + // we were able to bind the socket, so make it into a TLS + // listener, store it with the solverInfo, and start the + // challenge server + + si.listener = tls.NewListener(ln, s.config.TLSConfig()) + + go func() { + defer func() { + if err := recover(); err != nil { + buf := make([]byte, stackTraceBufferSize) + buf = buf[:runtime.Stack(buf, false)] + log.Printf("panic: tls-alpn solver server: %v\n%s", err, buf) + } + }() + defer close(si.done) + for { + conn, err := si.listener.Accept() + if err != nil { + if atomic.LoadInt32(&si.closed) == 1 { + return + } + log.Printf("[ERROR] TLS-ALPN challenge server: accept: %v", err) + continue + } + go s.handleConn(conn) + } + }() + + return nil +} + +// handleConn completes the TLS handshake and then closes conn. +func (*tlsALPNSolver) handleConn(conn net.Conn) { + defer func() { + if err := recover(); err != nil { + buf := make([]byte, stackTraceBufferSize) + buf = buf[:runtime.Stack(buf, false)] + log.Printf("panic: tls-alpn solver handler: %v\n%s", err, buf) + } + }() + defer conn.Close() + tlsConn, ok := conn.(*tls.Conn) + if !ok { + log.Printf("[ERROR] TLS-ALPN challenge server: expected tls.Conn but got %T: %#v", conn, conn) + return + } + err := tlsConn.Handshake() + if err != nil { + log.Printf("[ERROR] TLS-ALPN challenge server: handshake: %v", err) + return + } +} + +// CleanUp removes the challenge certificate from the cache, and if +// it is the last one to finish, stops the TLS server. +func (s *tlsALPNSolver) CleanUp(ctx context.Context, chal acme.Challenge) error { + s.config.certCache.mu.Lock() + delete(s.config.certCache.cache, tlsALPNCertKeyName(chal.Identifier.Value)) + s.config.certCache.mu.Unlock() + + solversMu.Lock() + defer solversMu.Unlock() + si := getSolverInfo(s.address) + si.count-- + if si.count == 0 { + // last one out turns off the lights + atomic.StoreInt32(&si.closed, 1) + if si.listener != nil { + si.listener.Close() + <-si.done + } + delete(solvers, s.address) + } + + return nil +} + +// tlsALPNCertKeyName returns the key to use when caching a cert +// for use with the TLS-ALPN ACME challenge. It is simply to help +// avoid conflicts (although at time of writing, there shouldn't +// be, since the cert cache is keyed by hash of certificate chain). +func tlsALPNCertKeyName(sniName string) string { + return sniName + ":acme-tls-alpn" +} + +// DNS01Solver is a type that makes libdns providers usable +// as ACME dns-01 challenge solvers. +// See https://github.com/libdns/libdns +type DNS01Solver struct { + // The implementation that interacts with the DNS + // provider to set or delete records. (REQUIRED) + DNSProvider ACMEDNSProvider + + // The TTL for the temporary challenge records. + TTL time.Duration + + // Maximum time to wait for temporary record to appear. + PropagationTimeout time.Duration + + // Preferred DNS resolver(s) to use when doing DNS lookups. + Resolvers []string + + txtRecords map[string]dnsPresentMemory // keyed by domain name + txtRecordsMu sync.Mutex +} + +// Present creates the DNS TXT record for the given ACME challenge. +func (s *DNS01Solver) Present(ctx context.Context, challenge acme.Challenge) error { + dnsName := challenge.DNS01TXTRecordName() + keyAuth := challenge.DNS01KeyAuthorization() + + rec := libdns.Record{ + Type: "TXT", + Name: dnsName, + Value: keyAuth, + TTL: s.TTL, + } + + // multiple identifiers can have the same ACME challenge + // domain (e.g. example.com and *.example.com) so we need + // to ensure that we don't solve those concurrently and + // step on each challenges' metaphorical toes; see + // https://github.com/caddyserver/caddy/issues/3474 + activeDNSChallenges.Lock(dnsName) + + zone, err := findZoneByFQDN(dnsName, recursiveNameservers(s.Resolvers)) + if err != nil { + return fmt.Errorf("could not determine zone for domain %q: %v", dnsName, err) + } + + results, err := s.DNSProvider.AppendRecords(ctx, zone, []libdns.Record{rec}) + if err != nil { + return fmt.Errorf("adding temporary record for zone %s: %w", zone, err) + } + if len(results) != 1 { + return fmt.Errorf("expected one record, got %d: %v", len(results), results) + } + + // remember the record and zone we got so we can clean up more efficiently + s.txtRecordsMu.Lock() + if s.txtRecords == nil { + s.txtRecords = make(map[string]dnsPresentMemory) + } + s.txtRecords[dnsName] = dnsPresentMemory{dnsZone: zone, rec: results[0]} + s.txtRecordsMu.Unlock() + + return nil +} + +// Wait blocks until the TXT record created in Present() appears in +// authoritative lookups, i.e. until it has propagated, or until +// timeout, whichever is first. +func (s *DNS01Solver) Wait(ctx context.Context, challenge acme.Challenge) error { + dnsName := challenge.DNS01TXTRecordName() + keyAuth := challenge.DNS01KeyAuthorization() + + timeout := s.PropagationTimeout + if timeout == 0 { + timeout = 2 * time.Minute + } + const interval = 2 * time.Second + + resolvers := recursiveNameservers(s.Resolvers) + + var err error + start := time.Now() + for time.Since(start) < timeout { + select { + case <-time.After(interval): + case <-ctx.Done(): + return ctx.Err() + } + var ready bool + ready, err = checkDNSPropagation(dnsName, keyAuth, resolvers) + if err != nil { + return fmt.Errorf("checking DNS propagation of %s: %w", dnsName, err) + } + if ready { + return nil + } + } + + return fmt.Errorf("timed out waiting for record to fully propagate; verify DNS provider configuration is correct - last error: %v", err) +} + +// CleanUp deletes the DNS TXT record created in Present(). +func (s *DNS01Solver) CleanUp(ctx context.Context, challenge acme.Challenge) error { + dnsName := challenge.DNS01TXTRecordName() + + defer func() { + // always forget about it so we don't leak memory + s.txtRecordsMu.Lock() + delete(s.txtRecords, dnsName) + s.txtRecordsMu.Unlock() + + // always do this last - but always do it! + activeDNSChallenges.Unlock(dnsName) + }() + + // recall the record we created and zone we looked up + s.txtRecordsMu.Lock() + memory, ok := s.txtRecords[dnsName] + if !ok { + s.txtRecordsMu.Unlock() + return fmt.Errorf("no memory of presenting a DNS record for %s (probably OK if presenting failed)", challenge.Identifier.Value) + } + s.txtRecordsMu.Unlock() + + // clean up the record + _, err := s.DNSProvider.DeleteRecords(ctx, memory.dnsZone, []libdns.Record{memory.rec}) + if err != nil { + return fmt.Errorf("deleting temporary record for zone %s: %w", memory.dnsZone, err) + } + + return nil +} + +type dnsPresentMemory struct { + dnsZone string + rec libdns.Record +} + +// ACMEDNSProvider defines the set of operations required for +// ACME challenges. A DNS provider must be able to append and +// delete records in order to solve ACME challenges. Find one +// you can use at https://github.com/libdns. If your provider +// isn't implemented yet, feel free to contribute! +type ACMEDNSProvider interface { + libdns.RecordAppender + libdns.RecordDeleter +} + +// activeDNSChallenges synchronizes DNS challenges for +// names to ensure that challenges for the same ACME +// DNS name do not overlap; for example, the TXT record +// to make for both example.com and *.example.com are +// the same; thus we cannot solve them concurrently. +var activeDNSChallenges = newMapMutex() + +// mapMutex implements named mutexes. +type mapMutex struct { + cond *sync.Cond + set map[interface{}]struct{} +} + +func newMapMutex() *mapMutex { + return &mapMutex{ + cond: sync.NewCond(new(sync.Mutex)), + set: make(map[interface{}]struct{}), + } +} + +func (mmu *mapMutex) Lock(key interface{}) { + mmu.cond.L.Lock() + defer mmu.cond.L.Unlock() + for mmu.locked(key) { + mmu.cond.Wait() + } + mmu.set[key] = struct{}{} + return +} + +func (mmu *mapMutex) Unlock(key interface{}) { + mmu.cond.L.Lock() + defer mmu.cond.L.Unlock() + delete(mmu.set, key) + mmu.cond.Broadcast() +} + +func (mmu *mapMutex) locked(key interface{}) (ok bool) { + _, ok = mmu.set[key] + return +} + +// distributedSolver allows the ACME HTTP-01 and TLS-ALPN challenges +// to be solved by an instance other than the one which initiated it. +// This is useful behind load balancers or in other cluster/fleet +// configurations. The only requirement is that the instance which +// initiates the challenge shares the same storage and locker with +// the others in the cluster. The storage backing the certificate +// cache in distributedSolver.config is crucial. +// +// Obviously, the instance which completes the challenge must be +// serving on the HTTPChallengePort for the HTTP-01 challenge or the +// TLSALPNChallengePort for the TLS-ALPN-01 challenge (or have all +// the packets port-forwarded) to receive and handle the request. The +// server which receives the challenge must handle it by checking to +// see if the challenge token exists in storage, and if so, decode it +// and use it to serve up the correct response. HTTPChallengeHandler +// in this package as well as the GetCertificate method implemented +// by a Config support and even require this behavior. +// +// In short: the only two requirements for cluster operation are +// sharing sync and storage, and using the facilities provided by +// this package for solving the challenges. +type distributedSolver struct { + // The config with a certificate cache + // with a reference to the storage to + // use which is shared among all the + // instances in the cluster - REQUIRED. + acmeManager *ACMEManager + + // Since the distributedSolver is only a + // wrapper over an actual solver, place + // the actual solver here. + solver acmez.Solver + + // The CA endpoint URL associated with + // this solver. + caURL string +} + +// Present invokes the underlying solver's Present method +// and also stores domain, token, and keyAuth to the storage +// backing the certificate cache of dhs.acmeManager. +func (dhs distributedSolver) Present(ctx context.Context, chal acme.Challenge) error { + infoBytes, err := json.Marshal(chal) + if err != nil { + return err + } + + err = dhs.acmeManager.config.Storage.Store(dhs.challengeTokensKey(chal.Identifier.Value), infoBytes) + if err != nil { + return err + } + + err = dhs.solver.Present(ctx, chal) + if err != nil { + return fmt.Errorf("presenting with embedded solver: %v", err) + } + return nil +} + +// CleanUp invokes the underlying solver's CleanUp method +// and also cleans up any assets saved to storage. +func (dhs distributedSolver) CleanUp(ctx context.Context, chal acme.Challenge) error { + err := dhs.acmeManager.config.Storage.Delete(dhs.challengeTokensKey(chal.Identifier.Value)) + if err != nil { + return err + } + err = dhs.solver.CleanUp(ctx, chal) + if err != nil { + return fmt.Errorf("cleaning up embedded provider: %v", err) + } + return nil +} + +// challengeTokensPrefix returns the key prefix for challenge info. +func (dhs distributedSolver) challengeTokensPrefix() string { + return path.Join(dhs.acmeManager.storageKeyCAPrefix(dhs.caURL), "challenge_tokens") +} + +// challengeTokensKey returns the key to use to store and access +// challenge info for domain. +func (dhs distributedSolver) challengeTokensKey(domain string) string { + return path.Join(dhs.challengeTokensPrefix(), StorageKeys.Safe(domain)+".json") +} + +// solverInfo associates a listener with the +// number of challenges currently using it. +type solverInfo struct { + closed int32 // accessed atomically + count int + listener net.Listener + done chan struct{} // used to signal when our own solver server is done +} + +// getSolverInfo gets a valid solverInfo struct for address. +func getSolverInfo(address string) *solverInfo { + si, ok := solvers[address] + if !ok { + si = &solverInfo{done: make(chan struct{})} + solvers[address] = si + } + return si +} + +// robustTryListen calls net.Listen for a TCP socket at addr. +// This function may return both a nil listener and a nil error! +// If it was able to bind the socket, it returns the listener +// and no error. If it wasn't able to bind the socket because +// the socket is already in use, then it returns a nil listener +// and nil error. If it had any other error, it returns the +// error. The intended error handling logic for this function +// is to proceed if the returned listener is not nil; otherwise +// return err (which may also be nil). In other words, this +// function ignores errors if the socket is already in use, +// which is useful for our challenge servers, where we assume +// that whatever is already listening can solve the challenges. +func robustTryListen(addr string) (net.Listener, error) { + var listenErr error + for i := 0; i < 2; i++ { + // doesn't hurt to sleep briefly before the second + // attempt in case the OS has timing issues + if i > 0 { + time.Sleep(100 * time.Millisecond) + } + + // if we can bind the socket right away, great! + var ln net.Listener + ln, listenErr = net.Listen("tcp", addr) + if listenErr == nil { + return ln, nil + } + + // if it failed just because the socket is already in use, we + // have no choice but to assume that whatever is using the socket + // can answer the challenge already, so we ignore the error + connectErr := dialTCPSocket(addr) + if connectErr == nil { + return nil, nil + } + + // hmm, we couldn't connect to the socket, so something else must + // be wrong, right? wrong!! we've had reports across multiple OSes + // now that sometimes connections fail even though the OS told us + // that the address was already in use; either the listener is + // fluctuating between open and closed very, very quickly, or the + // OS is inconsistent and contradicting itself; I have been unable + // to reproduce this, so I'm now resorting to hard-coding substring + // matching in error messages as a really hacky and unreliable + // safeguard against this, until we can idenify exactly what was + // happening; see the following threads for more info: + // https://caddy.community/t/caddy-retry-error/7317 + // https://caddy.community/t/v2-upgrade-to-caddy2-failing-with-errors/7423 + if strings.Contains(listenErr.Error(), "address already in use") || + strings.Contains(listenErr.Error(), "one usage of each socket address") { + log.Printf("[WARNING] OS reports a contradiction: %v - but we cannot connect to it, with this error: %v; continuing anyway 🤞 (I don't know what causes this... if you do, please help?)", listenErr, connectErr) + return nil, nil + } + } + return nil, fmt.Errorf("could not start listener for challenge server at %s: %v", addr, listenErr) +} + +// dialTCPSocket connects to a TCP address just for the sake of +// seeing if it is open. It returns a nil error if a TCP connection +// can successfully be made to addr within a short timeout. +func dialTCPSocket(addr string) error { + conn, err := net.DialTimeout("tcp", addr, 250*time.Millisecond) + if err == nil { + conn.Close() + } + return err +} + +// The active challenge solvers, keyed by listener address, +// and protected by a mutex. Note that the creation of +// solver listeners and the incrementing of their counts +// are atomic operations guarded by this mutex. +var ( + solvers = make(map[string]*solverInfo) + solversMu sync.Mutex +) + +// Interface guards +var ( + _ acmez.Solver = (*DNS01Solver)(nil) + _ acmez.Waiter = (*DNS01Solver)(nil) +) |