Delay service readiness until after startuphooks have finished (#5649)

* Move startup hooks wg into a runtime pointer, check before notifying systemd
* Switch default systemd notification to server
* Add 1 sec delay to allow etcd to write to disk
Signed-off-by: Derek Nola <derek.nola@suse.com>
This commit is contained in:
Derek Nola 2022-06-15 09:00:52 -07:00 committed by GitHub
parent 97c69546c5
commit a9b5a1933f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 27 additions and 12 deletions

View File

@ -28,6 +28,7 @@ import (
"github.com/k3s-io/k3s/pkg/nodeconfig" "github.com/k3s-io/k3s/pkg/nodeconfig"
"github.com/k3s-io/k3s/pkg/rootless" "github.com/k3s-io/k3s/pkg/rootless"
"github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/util"
"github.com/k3s-io/k3s/pkg/version"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
@ -146,8 +147,13 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error {
} }
} }
os.Setenv("NOTIFY_SOCKET", notifySocket) // By default, the server is responsible for notifying systemd
systemd.SdNotify(true, "READY=1\n") // On agent-only nodes, the agent will notify systemd
if notifySocket != "" {
logrus.Info(version.Program + " agent is up and running")
os.Setenv("NOTIFY_SOCKET", notifySocket)
systemd.SdNotify(true, "READY=1\n")
}
<-ctx.Done() <-ctx.Done()
return ctx.Err() return ctx.Err()

View File

@ -444,6 +444,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
logrus.Info("Starting " + version.Program + " " + app.App.Version) logrus.Info("Starting " + version.Program + " " + app.App.Version)
notifySocket := os.Getenv("NOTIFY_SOCKET") notifySocket := os.Getenv("NOTIFY_SOCKET")
os.Unsetenv("NOTIFY_SOCKET")
ctx := signals.SetupSignalContext() ctx := signals.SetupSignalContext()
@ -455,16 +456,16 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
if !serverConfig.ControlConfig.DisableAPIServer { if !serverConfig.ControlConfig.DisableAPIServer {
<-serverConfig.ControlConfig.Runtime.APIServerReady <-serverConfig.ControlConfig.Runtime.APIServerReady
logrus.Info("Kube API server is now running") logrus.Info("Kube API server is now running")
} else { serverConfig.ControlConfig.Runtime.StartupHooksWg.Wait()
}
if !serverConfig.ControlConfig.DisableETCD {
<-serverConfig.ControlConfig.Runtime.ETCDReady <-serverConfig.ControlConfig.Runtime.ETCDReady
logrus.Info("ETCD server is now running") logrus.Info("ETCD server is now running")
} }
logrus.Info(version.Program + " is up and running") logrus.Info(version.Program + " is up and running")
if (cfg.DisableAgent || cfg.DisableAPIServer) && notifySocket != "" { os.Setenv("NOTIFY_SOCKET", notifySocket)
os.Setenv("NOTIFY_SOCKET", notifySocket) systemd.SdNotify(true, "READY=1\n")
systemd.SdNotify(true, "READY=1\n")
}
}() }()
url := fmt.Sprintf("https://%s:%d", serverConfig.ControlConfig.BindAddressOrLoopback(false), serverConfig.ControlConfig.SupervisorPort) url := fmt.Sprintf("https://%s:%d", serverConfig.ControlConfig.BindAddressOrLoopback(false), serverConfig.ControlConfig.SupervisorPort)

View File

@ -8,6 +8,7 @@ import (
"net/http" "net/http"
"sort" "sort"
"strings" "strings"
"sync"
"time" "time"
"github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/util"
@ -269,6 +270,7 @@ type ControlRuntime struct {
APIServerReady <-chan struct{} APIServerReady <-chan struct{}
AgentReady <-chan struct{} AgentReady <-chan struct{}
ETCDReady <-chan struct{} ETCDReady <-chan struct{}
StartupHooksWg *sync.WaitGroup
ClusterControllerStart func(ctx context.Context) error ClusterControllerStart func(ctx context.Context) error
LeaderElectedClusterControllerStart func(ctx context.Context) error LeaderElectedClusterControllerStart func(ctx context.Context) error

View File

@ -186,6 +186,10 @@ func encryptionConfigHandler(ctx context.Context, server *config.Control) http.H
genErrorMessage(resp, http.StatusBadRequest, err) genErrorMessage(resp, http.StatusBadRequest, err)
return return
} }
// If a user kills the k3s server immediately after this call, we run into issues where the files
// have not yet been written. This sleep ensures that things have time to sync to disk before
// the request completes.
time.Sleep(1 * time.Second)
resp.WriteHeader(http.StatusOK) resp.WriteHeader(http.StatusOK)
}) })
} }

View File

@ -64,6 +64,8 @@ func StartServer(ctx context.Context, config *Config, cfg *cmds.Server) error {
wg.Add(len(config.StartupHooks)) wg.Add(len(config.StartupHooks))
config.ControlConfig.Runtime.Handler = router(ctx, config, cfg) config.ControlConfig.Runtime.Handler = router(ctx, config, cfg)
config.ControlConfig.Runtime.StartupHooksWg = wg
shArgs := cmds.StartupHookArgs{ shArgs := cmds.StartupHookArgs{
APIServerReady: config.ControlConfig.Runtime.APIServerReady, APIServerReady: config.ControlConfig.Runtime.APIServerReady,
KubeConfigAdmin: config.ControlConfig.Runtime.KubeConfigAdmin, KubeConfigAdmin: config.ControlConfig.Runtime.KubeConfigAdmin,
@ -79,7 +81,7 @@ func StartServer(ctx context.Context, config *Config, cfg *cmds.Server) error {
if config.ControlConfig.DisableAPIServer { if config.ControlConfig.DisableAPIServer {
go setETCDLabelsAndAnnotations(ctx, config) go setETCDLabelsAndAnnotations(ctx, config)
} else { } else {
go startOnAPIServerReady(ctx, wg, config) go startOnAPIServerReady(ctx, config)
} }
if err := printTokens(&config.ControlConfig); err != nil { if err := printTokens(&config.ControlConfig); err != nil {
@ -89,18 +91,18 @@ func StartServer(ctx context.Context, config *Config, cfg *cmds.Server) error {
return writeKubeConfig(config.ControlConfig.Runtime.ServerCA, config) return writeKubeConfig(config.ControlConfig.Runtime.ServerCA, config)
} }
func startOnAPIServerReady(ctx context.Context, wg *sync.WaitGroup, config *Config) { func startOnAPIServerReady(ctx context.Context, config *Config) {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
case <-config.ControlConfig.Runtime.APIServerReady: case <-config.ControlConfig.Runtime.APIServerReady:
if err := runControllers(ctx, wg, config); err != nil { if err := runControllers(ctx, config); err != nil {
logrus.Fatalf("failed to start controllers: %v", err) logrus.Fatalf("failed to start controllers: %v", err)
} }
} }
} }
func runControllers(ctx context.Context, wg *sync.WaitGroup, config *Config) error { func runControllers(ctx context.Context, config *Config) error {
controlConfig := &config.ControlConfig controlConfig := &config.ControlConfig
sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigAdmin) sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigAdmin)
@ -108,7 +110,7 @@ func runControllers(ctx context.Context, wg *sync.WaitGroup, config *Config) err
return errors.Wrap(err, "failed to create new server context") return errors.Wrap(err, "failed to create new server context")
} }
wg.Wait() controlConfig.Runtime.StartupHooksWg.Wait()
if err := stageFiles(ctx, sc, controlConfig); err != nil { if err := stageFiles(ctx, sc, controlConfig); err != nil {
return errors.Wrap(err, "failed to stage files") return errors.Wrap(err, "failed to stage files")
} }