From 7565e5c3de051400a9e3703f707049cbb9054cf3 Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 15 May 2023 23:20:30 +0100 Subject: Implement systemd-notify protocol (#21151) This PR adds support for the systemd notify protocol. Several status messagess are provided. We should likely add a common notify/status message for graceful. Replaces #21140 Signed-off-by: Andrew Thornton --------- Signed-off-by: Andrew Thornton Co-authored-by: ltdk Co-authored-by: Giteabot --- modules/graceful/manager_unix.go | 67 +++++++++++++++++++++++++++++- modules/graceful/net_unix.go | 88 ++++++++++++++++++++++++++++++++++++---- modules/graceful/restart_unix.go | 16 ++++++-- 3 files changed, 159 insertions(+), 12 deletions(-) (limited to 'modules/graceful') diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index ca6ccc1b66..5d72111bff 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -11,6 +11,7 @@ import ( "os" "os/signal" "runtime/pprof" + "strconv" "sync" "syscall" "time" @@ -45,7 +46,7 @@ type Manager struct { func newGracefulManager(ctx context.Context) *Manager { manager := &Manager{ - isChild: len(os.Getenv(listenFDs)) > 0 && os.Getppid() > 1, + isChild: len(os.Getenv(listenFDsEnv)) > 0 && os.Getppid() > 1, lock: &sync.RWMutex{}, } manager.createServerWaitGroup.Add(numberOfServersToCreate) @@ -53,6 +54,41 @@ func newGracefulManager(ctx context.Context) *Manager { return manager } +type systemdNotifyMsg string + +const ( + readyMsg systemdNotifyMsg = "READY=1" + stoppingMsg systemdNotifyMsg = "STOPPING=1" + reloadingMsg systemdNotifyMsg = "RELOADING=1" + watchdogMsg systemdNotifyMsg = "WATCHDOG=1" +) + +func statusMsg(msg string) systemdNotifyMsg { + return systemdNotifyMsg("STATUS=" + msg) +} + +func pidMsg() systemdNotifyMsg { + return systemdNotifyMsg("MAINPID=" + strconv.Itoa(os.Getpid())) +} + +// Notify systemd of status via the notify protocol +func (g *Manager) notify(msg systemdNotifyMsg) { + conn, err := getNotifySocket() + if err != nil { + // the err is logged in getNotifySocket + return + } + if conn == nil { + return + } + defer conn.Close() + + if _, err = conn.Write([]byte(msg)); err != nil { + log.Warn("Failed to notify NOTIFY_SOCKET: %v", err) + return + } +} + func (g *Manager) start(ctx context.Context) { // Make contexts g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx) @@ -72,6 +108,8 @@ func (g *Manager) start(ctx context.Context) { // Set the running state & handle signals g.setState(stateRunning) + g.notify(statusMsg("Starting Gitea")) + g.notify(pidMsg()) go g.handleSignals(g.managerCtx) // Handle clean up of unused provided listeners and delayed start-up @@ -84,6 +122,7 @@ func (g *Manager) start(ctx context.Context) { // Ignore the error here there's not much we can do with it // They're logged in the CloseProvidedListeners function _ = CloseProvidedListeners() + g.notify(readyMsg) }() if setting.StartupTimeout > 0 { go func() { @@ -104,6 +143,8 @@ func (g *Manager) start(ctx context.Context) { return case <-time.After(setting.StartupTimeout): log.Error("Startup took too long! Shutting down") + g.notify(statusMsg("Startup took too long! Shutting down")) + g.notify(stoppingMsg) g.doShutdown() } }() @@ -126,6 +167,13 @@ func (g *Manager) handleSignals(ctx context.Context) { syscall.SIGTSTP, ) + watchdogTimeout := getWatchdogTimeout() + t := &time.Ticker{} + if watchdogTimeout != 0 { + g.notify(watchdogMsg) + t = time.NewTicker(watchdogTimeout / 2) + } + pid := syscall.Getpid() for { select { @@ -136,6 +184,7 @@ func (g *Manager) handleSignals(ctx context.Context) { g.DoGracefulRestart() case syscall.SIGUSR1: log.Warn("PID %d. Received SIGUSR1. Releasing and reopening logs", pid) + g.notify(statusMsg("Releasing and reopening logs")) if err := log.ReleaseReopen(); err != nil { log.Error("Error whilst releasing and reopening logs: %v", err) } @@ -153,6 +202,8 @@ func (g *Manager) handleSignals(ctx context.Context) { default: log.Info("PID %d. Received %v.", pid, sig) } + case <-t.C: + g.notify(watchdogMsg) case <-ctx.Done(): log.Warn("PID: %d. Background context for manager closed - %v - Shutting down...", pid, ctx.Err()) g.DoGracefulShutdown() @@ -169,6 +220,9 @@ func (g *Manager) doFork() error { } g.forked = true g.lock.Unlock() + + g.notify(reloadingMsg) + // We need to move the file logs to append pids setting.RestartLogsWithPIDSuffix() @@ -191,18 +245,27 @@ func (g *Manager) DoGracefulRestart() { } } else { log.Info("PID: %d. Not set restartable. Shutting down...", os.Getpid()) - + g.notify(stoppingMsg) g.doShutdown() } } // DoImmediateHammer causes an immediate hammer func (g *Manager) DoImmediateHammer() { + g.notify(statusMsg("Sending immediate hammer")) g.doHammerTime(0 * time.Second) } // DoGracefulShutdown causes a graceful shutdown func (g *Manager) DoGracefulShutdown() { + g.lock.Lock() + if !g.forked { + g.lock.Unlock() + g.notify(stoppingMsg) + } else { + g.lock.Unlock() + g.notify(statusMsg("Shutting down after fork")) + } g.doShutdown() } diff --git a/modules/graceful/net_unix.go b/modules/graceful/net_unix.go index a2620d2c80..e9c1285123 100644 --- a/modules/graceful/net_unix.go +++ b/modules/graceful/net_unix.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "sync" + "time" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -21,9 +22,12 @@ import ( ) const ( - listenFDs = "LISTEN_FDS" - startFD = 3 - unlinkFDs = "GITEA_UNLINK_FDS" + listenFDsEnv = "LISTEN_FDS" + startFD = 3 + unlinkFDsEnv = "GITEA_UNLINK_FDS" + + notifySocketEnv = "NOTIFY_SOCKET" + watchdogTimeoutEnv = "WATCHDOG_USEC" ) // In order to keep the working directory the same as when we started we record @@ -38,6 +42,9 @@ var ( activeListenersToUnlink = []bool{} providedListeners = []net.Listener{} activeListeners = []net.Listener{} + + notifySocketAddr string + watchdogTimeout time.Duration ) func getProvidedFDs() (savedErr error) { @@ -45,18 +52,52 @@ func getProvidedFDs() (savedErr error) { once.Do(func() { mutex.Lock() defer mutex.Unlock() + // now handle some additional systemd provided things + notifySocketAddr = os.Getenv(notifySocketEnv) + if notifySocketAddr != "" { + log.Debug("Systemd Notify Socket provided: %s", notifySocketAddr) + savedErr = os.Unsetenv(notifySocketEnv) + if savedErr != nil { + log.Warn("Unable to Unset the NOTIFY_SOCKET environment variable: %v", savedErr) + return + } + // FIXME: We don't handle WATCHDOG_PID + timeoutStr := os.Getenv(watchdogTimeoutEnv) + if timeoutStr != "" { + savedErr = os.Unsetenv(watchdogTimeoutEnv) + if savedErr != nil { + log.Warn("Unable to Unset the WATCHDOG_USEC environment variable: %v", savedErr) + return + } - numFDs := os.Getenv(listenFDs) + s, err := strconv.ParseInt(timeoutStr, 10, 64) + if err != nil { + log.Error("Unable to parse the provided WATCHDOG_USEC: %v", err) + savedErr = fmt.Errorf("unable to parse the provided WATCHDOG_USEC: %w", err) + return + } + if s <= 0 { + log.Error("Unable to parse the provided WATCHDOG_USEC: %s should be a positive number", timeoutStr) + savedErr = fmt.Errorf("unable to parse the provided WATCHDOG_USEC: %s should be a positive number", timeoutStr) + return + } + watchdogTimeout = time.Duration(s) * time.Microsecond + } + } else { + log.Trace("No Systemd Notify Socket provided") + } + + numFDs := os.Getenv(listenFDsEnv) if numFDs == "" { return } n, err := strconv.Atoi(numFDs) if err != nil { - savedErr = fmt.Errorf("%s is not a number: %s. Err: %w", listenFDs, numFDs, err) + savedErr = fmt.Errorf("%s is not a number: %s. Err: %w", listenFDsEnv, numFDs, err) return } - fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDs), ",") + fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDsEnv), ",") providedListenersToUnlink = make([]bool, n) for _, fdStr := range fdsToUnlinkStr { i, err := strconv.Atoi(fdStr) @@ -73,7 +114,7 @@ func getProvidedFDs() (savedErr error) { if err == nil { // Close the inherited file if it's a listener if err = file.Close(); err != nil { - savedErr = fmt.Errorf("error closing provided socket fd %d: %s", i, err) + savedErr = fmt.Errorf("error closing provided socket fd %d: %w", i, err) return } providedListeners = append(providedListeners, l) @@ -255,3 +296,36 @@ func getActiveListenersToUnlink() []bool { copy(listenersToUnlink, activeListenersToUnlink) return listenersToUnlink } + +func getNotifySocket() (*net.UnixConn, error) { + if err := getProvidedFDs(); err != nil { + // This error will be logged elsewhere + return nil, nil + } + + if notifySocketAddr == "" { + return nil, nil + } + + socketAddr := &net.UnixAddr{ + Name: notifySocketAddr, + Net: "unixgram", + } + + notifySocket, err := net.DialUnix(socketAddr.Net, nil, socketAddr) + if err != nil { + log.Warn("failed to dial NOTIFY_SOCKET %s: %v", socketAddr, err) + return nil, err + } + + return notifySocket, nil +} + +func getWatchdogTimeout() time.Duration { + if err := getProvidedFDs(); err != nil { + // This error will be logged elsewhere + return 0 + } + + return watchdogTimeout +} diff --git a/modules/graceful/restart_unix.go b/modules/graceful/restart_unix.go index 406fe6c8af..a7c2b8288a 100644 --- a/modules/graceful/restart_unix.go +++ b/modules/graceful/restart_unix.go @@ -16,6 +16,7 @@ import ( "strings" "sync" "syscall" + "time" ) var killParent sync.Once @@ -70,11 +71,20 @@ func RestartProcess() (int, error) { // Pass on the environment and replace the old count key with the new one. var env []string for _, v := range os.Environ() { - if !strings.HasPrefix(v, listenFDs+"=") { + if !strings.HasPrefix(v, listenFDsEnv+"=") { env = append(env, v) } } - env = append(env, fmt.Sprintf("%s=%d", listenFDs, len(listeners))) + env = append(env, fmt.Sprintf("%s=%d", listenFDsEnv, len(listeners))) + + if notifySocketAddr != "" { + env = append(env, fmt.Sprintf("%s=%s", notifySocketEnv, notifySocketAddr)) + } + + if watchdogTimeout != 0 { + watchdogStr := strconv.FormatInt(int64(watchdogTimeout/time.Millisecond), 10) + env = append(env, fmt.Sprintf("%s=%s", watchdogTimeoutEnv, watchdogStr)) + } sb := &strings.Builder{} for i, unlink := range getActiveListenersToUnlink() { @@ -87,7 +97,7 @@ func RestartProcess() (int, error) { unlinkStr := sb.String() if len(unlinkStr) > 0 { unlinkStr = unlinkStr[:len(unlinkStr)-1] - env = append(env, fmt.Sprintf("%s=%s", unlinkFDs, unlinkStr)) + env = append(env, fmt.Sprintf("%s=%s", unlinkFDsEnv, unlinkStr)) } allFiles := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, files...) -- cgit v1.2.3