From 7aa3d0838543959937c12a54a5c0c731c7e3e549 Mon Sep 17 00:00:00 2001 From: larmog Date: Mon, 4 Nov 2019 12:08:47 +0100 Subject: [PATCH 1/2] Wait for api-server to report version after starting --- pkg/daemons/control/server.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pkg/daemons/control/server.go b/pkg/daemons/control/server.go index 40999f6aad..7d9a4c06a6 100644 --- a/pkg/daemons/control/server.go +++ b/pkg/daemons/control/server.go @@ -30,6 +30,7 @@ import ( "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apiserver/pkg/authentication/authenticator" + "k8s.io/client-go/discovery" "k8s.io/client-go/tools/clientcmd" ccmapp "k8s.io/kubernetes/cmd/cloud-controller-manager/app" "k8s.io/kubernetes/cmd/kube-apiserver/app" @@ -90,6 +91,10 @@ func Server(ctx context.Context, cfg *config.Control) error { return err } + if err := waitForAPIServer(runtime); err != nil { + return err + } + runtime.Handler = handler runtime.Authenticator = auth @@ -825,3 +830,27 @@ func checkForCloudControllerPrivileges(runtime *config.ControlRuntime) error { } return nil } + +func waitForAPIServer(runtime *config.ControlRuntime) error { + restConfig, err := clientcmd.BuildConfigFromFlags("", runtime.KubeConfigAdmin) + if err != nil { + return err + } + + discoveryclient, err := discovery.NewDiscoveryClientForConfig(restConfig) + if err != nil { + return err + } + + for i := 0; i < 60; i++ { + info, err := discoveryclient.ServerVersion() + if err == nil { + logrus.Infof("apiserver %s is up and running", info) + return nil + } + logrus.Infof("waiting for apiserver to become available") + time.Sleep(1 * time.Second) + } + + return fmt.Errorf("timeout waiting for apiserver") +} From 01f6e0e64ec14e5ad8d281858deae6a9e70d8d83 Mon Sep 17 00:00:00 2001 From: Erik Wilson Date: Tue, 5 Nov 2019 04:44:35 -0700 Subject: [PATCH 2/2] Add context to server daemon functions that wait --- pkg/daemons/control/server.go | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pkg/daemons/control/server.go b/pkg/daemons/control/server.go index 7d9a4c06a6..e985be4be6 100644 --- a/pkg/daemons/control/server.go +++ b/pkg/daemons/control/server.go @@ -91,7 +91,7 @@ func Server(ctx context.Context, cfg *config.Control) error { return err } - if err := waitForAPIServer(runtime); err != nil { + if err := waitForAPIServer(ctx, runtime); err != nil { return err } @@ -105,7 +105,7 @@ func Server(ctx context.Context, cfg *config.Control) error { controllerManager(cfg, runtime) if !cfg.DisableCCM { - cloudControllerManager(cfg, runtime) + cloudControllerManager(ctx, cfg, runtime) } return nil @@ -782,7 +782,7 @@ func expired(certFile string) bool { return certutil.IsCertExpired(certificates[0]) } -func cloudControllerManager(cfg *config.Control, runtime *config.ControlRuntime) { +func cloudControllerManager(ctx context.Context, cfg *config.Control, runtime *config.ControlRuntime) { argsMap := map[string]string{ "kubeconfig": runtime.KubeConfigCloudController, "allocate-node-cidrs": "true", @@ -808,8 +808,12 @@ func cloudControllerManager(cfg *config.Control, runtime *config.ControlRuntime) // check for the cloud controller rbac binding if err := checkForCloudControllerPrivileges(runtime); err != nil { logrus.Infof("Waiting for cloudcontroller rbac role to be created") - time.Sleep(time.Second) - continue + select { + case <-ctx.Done(): + logrus.Fatalf("cloud-controller-manager context canceled: %v", ctx.Err()) + case <-time.After(time.Second): + continue + } } break } @@ -831,7 +835,7 @@ func checkForCloudControllerPrivileges(runtime *config.ControlRuntime) error { return nil } -func waitForAPIServer(runtime *config.ControlRuntime) error { +func waitForAPIServer(ctx context.Context, runtime *config.ControlRuntime) error { restConfig, err := clientcmd.BuildConfigFromFlags("", runtime.KubeConfigAdmin) if err != nil { return err @@ -849,7 +853,12 @@ func waitForAPIServer(runtime *config.ControlRuntime) error { return nil } logrus.Infof("waiting for apiserver to become available") - time.Sleep(1 * time.Second) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(time.Second): + continue + } } return fmt.Errorf("timeout waiting for apiserver")