From 7cdfaad6ce8651daad1e5203ea644bc28239e9b3 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Sat, 6 Mar 2021 02:29:57 -0800 Subject: [PATCH] Always use static ports for client load-balancers (#3026) * Always use static ports for the load-balancers This fixes an issue where RKE2 kube-proxy daemonset pods were failing to communicate with the apiserver when RKE2 was restarted because the load-balancer used a different port every time it started up. This also changes the apiserver load-balancer port to be 1 below the supervisor port instead of 1 above it. This makes the apiserver port consistent at 6443 across servers and agents on RKE2. Additional fixes below were required to successfully test and use this change on etcd-only nodes. * Actually add lb-server-port flag to CLI * Fix nil pointer when starting server with --disable-etcd but no --server * Don't try to use full URI as initial load-balancer endpoint * Fix etcd load-balancer pool updates * Update dynamiclistener to fix cert updates on etcd-only nodes * Handle recursive initial server URL in load balancer * Don't run the deploy controller on etcd-only nodes --- go.mod | 2 +- go.sum | 4 +- pkg/agent/config/config.go | 7 ++-- pkg/agent/loadbalancer/loadbalancer.go | 5 +++ pkg/agent/proxy/apiproxy.go | 40 ++++++++++++------- pkg/agent/run.go | 2 +- pkg/cli/cmds/agent.go | 7 ++-- pkg/cli/cmds/server.go | 1 + pkg/cli/server/server.go | 17 ++++---- pkg/clientaccess/token.go | 2 +- pkg/clientaccess/token_test.go | 8 ++-- pkg/cluster/bootstrap.go | 2 +- pkg/cluster/managed.go | 14 ++++++- pkg/etcd/etcd.go | 2 +- pkg/etcd/etcdproxy.go | 9 +++-- pkg/server/etcd.go | 5 --- .../rancher/dynamiclistener/listener.go | 4 +- vendor/modules.txt | 2 +- 18 files changed, 80 insertions(+), 53 deletions(-) diff --git a/go.mod b/go.mod index 7596b2e049..979747631c 100644 --- a/go.mod +++ b/go.mod @@ -94,7 +94,7 @@ require ( github.com/opencontainers/selinux v1.6.0 github.com/pierrec/lz4 v2.5.2+incompatible github.com/pkg/errors v0.9.1 - github.com/rancher/dynamiclistener v0.2.2 + github.com/rancher/dynamiclistener v0.2.3 github.com/rancher/remotedialer v0.2.0 github.com/rancher/wrangler v0.6.1 github.com/rancher/wrangler-api v0.6.0 diff --git a/go.sum b/go.sum index 037d796975..2badd9666b 100644 --- a/go.sum +++ b/go.sum @@ -787,8 +787,8 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T github.com/quobyte/api v0.1.8/go.mod h1:jL7lIHrmqQ7yh05OJ+eEEdHr0u/kmT1Ff9iHd+4H6VI= github.com/rakelkar/gonetsh v0.0.0-20190930180311-e5c5ffe4bdf0 h1:iXE9kmlAqhusXxzkXictdNgWS7p4ZBnmv9SdyMgTf6E= github.com/rakelkar/gonetsh v0.0.0-20190930180311-e5c5ffe4bdf0/go.mod h1:4XHkfaUj+URzGO9sohoAgt2V9Y8nIW7fugpu0E6gShk= -github.com/rancher/dynamiclistener v0.2.2 h1:70dMwOr1sqb6mQqfU2nDb/fr5cv7HJjH+kFYzoxb8KU= -github.com/rancher/dynamiclistener v0.2.2/go.mod h1:9WusTANoiRr8cDWCTtf5txieulezHbpv4vhLADPp0zU= +github.com/rancher/dynamiclistener v0.2.3 h1:FHn0Gkx+kIUqsFs3zMMR2QC9ufH/AoBLqO5zH5hbtqw= +github.com/rancher/dynamiclistener v0.2.3/go.mod h1:9WusTANoiRr8cDWCTtf5txieulezHbpv4vhLADPp0zU= github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009/go.mod h1:wpITyDPTi/Na5h73XkbuEf2AP9fbgrIGqqxVzFhYD6U= github.com/rancher/remotedialer v0.2.0 h1:xD7t3K6JYwTdAsxmGtTHQMkEkFgKouQ1foLxVW424Dc= github.com/rancher/remotedialer v0.2.0/go.mod h1:tkU8ZvrR5lRgaKWaX71nAy6daeqvPFx/lJEnbW7tXSI= diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index c09045fcb1..8e5d25af2f 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -169,7 +169,7 @@ func getServingCert(nodeName, nodeIP, servingCertFile, servingKeyFile, nodePassw func getHostFile(filename, keyFile string, info *clientaccess.Info) error { basename := filepath.Base(filename) - fileBytes, err := clientaccess.Get("/v1-"+version.Program+"/"+basename, info) + fileBytes, err := info.Get("/v1-" + version.Program + "/" + basename) if err != nil { return err } @@ -308,8 +308,9 @@ func get(envInfo *cmds.Agent, proxy proxy.Proxy) (*config.Node, error) { return nil, err } + // If the supervisor and externally-facing apiserver are not on the same port, tell the proxy where to find the apiserver. if controlConfig.SupervisorPort != controlConfig.HTTPSPort { - if err := proxy.StartAPIServerProxy(controlConfig.HTTPSPort); err != nil { + if err := proxy.SetAPIServerPort(controlConfig.HTTPSPort); err != nil { return nil, errors.Wrapf(err, "failed to setup access to API Server port %d on at %s", controlConfig.HTTPSPort, proxy.SupervisorURL()) } } @@ -523,7 +524,7 @@ func get(envInfo *cmds.Agent, proxy proxy.Proxy) (*config.Node, error) { } func getConfig(info *clientaccess.Info) (*config.Control, error) { - data, err := clientaccess.Get("/v1-"+version.Program+"/config", info) + data, err := info.Get("/v1-" + version.Program + "/config") if err != nil { return nil, err } diff --git a/pkg/agent/loadbalancer/loadbalancer.go b/pkg/agent/loadbalancer/loadbalancer.go index 01490781b4..88f1f762f6 100644 --- a/pkg/agent/loadbalancer/loadbalancer.go +++ b/pkg/agent/loadbalancer/loadbalancer.go @@ -58,6 +58,11 @@ func New(dataDir, serviceName, serverURL string, lbServerPort int) (_lb *LoadBal return nil, err } + if serverURL == localServerURL { + logrus.Debugf("Initial server URL for load balancer points at local server URL - starting with empty original server address") + originalServerAddress = "" + } + lb := &LoadBalancer{ dialer: &net.Dialer{}, configFile: filepath.Join(dataDir, "etc", serviceName+".json"), diff --git a/pkg/agent/proxy/apiproxy.go b/pkg/agent/proxy/apiproxy.go index 573d217fb4..2efc6f5590 100644 --- a/pkg/agent/proxy/apiproxy.go +++ b/pkg/agent/proxy/apiproxy.go @@ -13,16 +13,23 @@ import ( type Proxy interface { Update(addresses []string) - StartAPIServerProxy(port int) error + SetAPIServerPort(port int) error SupervisorURL() string SupervisorAddresses() []string APIServerURL() string IsAPIServerLBEnabled() bool } -func NewAPIProxy(enabled bool, dataDir, supervisorURL string, lbServerPort int) (Proxy, error) { +// NewSupervisorProxy sets up a new proxy for retrieving supervisor and apiserver addresses. If +// lbEnabled is true, a load-balancer is started on the requested port to connect to the supervisor +// address, and the address of this local load-balancer is returned instead of the actual supervisor +// and apiserver addresses. +// NOTE: This is a proxy in the API sense - it returns either actual server URLs, or the URL of the +// local load-balancer. It is not actually responsible for proxying requests at the network level; +// this is handled by the load-balancers that the proxy optionally steers connections towards. +func NewSupervisorProxy(lbEnabled bool, dataDir, supervisorURL string, lbServerPort int) (Proxy, error) { p := proxy{ - lbEnabled: enabled, + lbEnabled: lbEnabled, dataDir: dataDir, initialSupervisorURL: supervisorURL, supervisorURL: supervisorURL, @@ -30,7 +37,7 @@ func NewAPIProxy(enabled bool, dataDir, supervisorURL string, lbServerPort int) lbServerPort: lbServerPort, } - if enabled { + if lbEnabled { lb, err := loadbalancer.New(dataDir, loadbalancer.SupervisorServiceName, supervisorURL, p.lbServerPort) if err != nil { return nil, err @@ -51,20 +58,20 @@ func NewAPIProxy(enabled bool, dataDir, supervisorURL string, lbServerPort int) } type proxy struct { - dataDir string - lbEnabled bool - lbServerPort int + dataDir string + lbEnabled bool + lbServerPort int + apiServerEnabled bool - initialSupervisorURL string + apiServerURL string supervisorURL string supervisorPort string + initialSupervisorURL string fallbackSupervisorAddress string supervisorAddresses []string - supervisorLB *loadbalancer.LoadBalancer - apiServerURL string - apiServerLB *loadbalancer.LoadBalancer - apiServerEnabled bool + apiServerLB *loadbalancer.LoadBalancer + supervisorLB *loadbalancer.LoadBalancer } func (p *proxy) Update(addresses []string) { @@ -96,7 +103,12 @@ func (p *proxy) setSupervisorPort(addresses []string) []string { return newAddresses } -func (p *proxy) StartAPIServerProxy(port int) error { +// SetAPIServerPort configures the proxy to return a different set of addresses for the apiserver, +// for use in cases where the apiserver is not running on the same port as the supervisor. If +// load-balancing is enabled, another load-balancer is started on a port one below the supervisor +// load-balancer, and the address of this load-balancer is returned instead of the actual apiserver +// addresses. +func (p *proxy) SetAPIServerPort(port int) error { u, err := url.Parse(p.initialSupervisorURL) if err != nil { return errors.Wrapf(err, "failed to parse server URL %s", p.initialSupervisorURL) @@ -109,7 +121,7 @@ func (p *proxy) StartAPIServerProxy(port int) error { if p.lbEnabled { lbServerPort := p.lbServerPort if lbServerPort != 0 { - lbServerPort = lbServerPort + 1 + lbServerPort = lbServerPort - 1 } lb, err := loadbalancer.New(p.dataDir, loadbalancer.APIServerServiceName, p.apiServerURL, lbServerPort) if err != nil { diff --git a/pkg/agent/run.go b/pkg/agent/run.go index fb8613104b..aefb347065 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -148,7 +148,7 @@ func Run(ctx context.Context, cfg cmds.Agent) error { return err } - proxy, err := proxy.NewAPIProxy(!cfg.DisableLoadBalancer, agentDir, cfg.ServerURL, cfg.LBServerPort) + proxy, err := proxy.NewSupervisorProxy(!cfg.DisableLoadBalancer, agentDir, cfg.ServerURL, cfg.LBServerPort) if err != nil { return err } diff --git a/pkg/cli/cmds/agent.go b/pkg/cli/cmds/agent.go index 6e4128641e..f9a2dc42d5 100644 --- a/pkg/cli/cmds/agent.go +++ b/pkg/cli/cmds/agent.go @@ -165,13 +165,13 @@ var ( Destination: &AgentConfig.EnableSELinux, EnvVar: version.ProgramUpper + "_SELINUX", } - LBServerPort = cli.IntFlag{ + LBServerPortFlag = cli.IntFlag{ Name: "lb-server-port", - Usage: "(agent/node) Internal Loadbalancer port", + Usage: "(agent/node) Local port for supervisor client load-balancer. If the supervisor and apiserver are not colocated an additional port 1 less than this port will also be used for the apiserver client load-balancer.", Hidden: false, Destination: &AgentConfig.LBServerPort, EnvVar: version.ProgramUpper + "_LB_SERVER_PORT", - Value: 0, + Value: 6444, } ) @@ -247,6 +247,7 @@ func NewAgentCommand(action func(ctx *cli.Context) error) cli.Command { Destination: &AgentConfig.Rootless, }, &SELinuxFlag, + LBServerPortFlag, // Deprecated/hidden below diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index 8d845f8bf6..40f48b944c 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -424,6 +424,7 @@ func NewServerCommand(action func(*cli.Context) error) cli.Command { Destination: &ServerConfig.EncryptSecrets, }, &SELinuxFlag, + LBServerPortFlag, // Hidden/Deprecated flags below diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 516a649a1b..ecf4600187 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -33,10 +33,6 @@ import ( _ "github.com/mattn/go-sqlite3" // ensure we have sqlite ) -const ( - lbServerPort = 6444 -) - func Run(app *cli.Context) error { if err := cmds.InitLogging(); err != nil { return err @@ -157,10 +153,17 @@ func run(app *cli.Context, cfg *cmds.Server) error { serverConfig.ControlConfig.SupervisorPort = serverConfig.ControlConfig.HTTPSPort } + if serverConfig.ControlConfig.DisableETCD && serverConfig.ControlConfig.JoinURL == "" { + return errors.New("invalid flag use. --server required with --disable-etcd") + } + if serverConfig.ControlConfig.DisableAPIServer { - serverConfig.ControlConfig.APIServerPort = lbServerPort + // Servers without a local apiserver need to connect to the apiserver via the proxy load-balancer. + serverConfig.ControlConfig.APIServerPort = cmds.AgentConfig.LBServerPort + // If the supervisor and externally-facing apiserver are not on the same port, the proxy will + // have a separate load-balancer for the apiserver that we need to use instead. if serverConfig.ControlConfig.SupervisorPort != serverConfig.ControlConfig.HTTPSPort { - serverConfig.ControlConfig.APIServerPort = lbServerPort + 1 + serverConfig.ControlConfig.APIServerPort = cmds.AgentConfig.LBServerPort - 1 } } @@ -332,8 +335,6 @@ func run(app *cli.Context, cfg *cmds.Server) error { } if serverConfig.ControlConfig.DisableAPIServer { - // setting LBServerPort to a prespecified port to initialize the kubeconfigs with the right address - agentConfig.LBServerPort = lbServerPort // initialize the apiAddress Channel for receiving the api address from etcd agentConfig.APIAddressCh = make(chan string, 1) setAPIAddressChannel(ctx, &serverConfig, &agentConfig) diff --git a/pkg/clientaccess/token.go b/pkg/clientaccess/token.go index c914881f87..e87d6f3cb6 100644 --- a/pkg/clientaccess/token.go +++ b/pkg/clientaccess/token.go @@ -177,7 +177,7 @@ func GetHTTPClient(cacerts []byte) *http.Client { } // Get makes a request to a subpath of info's BaseURL -func Get(path string, info *Info) ([]byte, error) { +func (info *Info) Get(path string) ([]byte, error) { u, err := url.Parse(info.BaseURL) if err != nil { return nil, err diff --git a/pkg/clientaccess/token_test.go b/pkg/clientaccess/token_test.go index df63e62bc2..cf6ef46ee6 100644 --- a/pkg/clientaccess/token_test.go +++ b/pkg/clientaccess/token_test.go @@ -71,7 +71,7 @@ func TestTrustedCA(t *testing.T) { // Confirm that the cert is actually trusted by the OS CA bundle by making a request // with empty cert pool testInfo.CACerts = nil - res, err := Get("/v1-k3s/server-bootstrap", testInfo) + res, err := testInfo.Get("/v1-k3s/server-bootstrap") assert.NoError(err) assert.NotEmpty(res) } @@ -190,7 +190,7 @@ func TestInvalidCredentials(t *testing.T) { info, err := ParseAndValidateToken(server.URL, testCase) assert.NoError(err, testCase) if assert.NotNil(info) { - res, err := Get("/v1-k3s/server-bootstrap", info) + res, err := info.Get("/v1-k3s/server-bootstrap") assert.Error(err, testCase) assert.Empty(res, testCase) } @@ -198,7 +198,7 @@ func TestInvalidCredentials(t *testing.T) { info, err = ParseAndValidateTokenForUser(server.URL, testCase, defaultUsername) assert.NoError(err, testCase) if assert.NotNil(info) { - res, err := Get("/v1-k3s/server-bootstrap", info) + res, err := info.Get("/v1-k3s/server-bootstrap") assert.Error(err, testCase) assert.Empty(res, testCase) } @@ -297,7 +297,7 @@ func TestParseAndGet(t *testing.T) { assert.Error(err, testCase) } else if assert.NoError(err, testCase) { info.BaseURL = server.URL + testCase.extraBasePost - _, err := Get(testCase.path, info) + _, err := info.Get(testCase.path) // Check for expected error when making Get request if testCase.getFail { assert.Error(err, testCase) diff --git a/pkg/cluster/bootstrap.go b/pkg/cluster/bootstrap.go index 24f02a22cc..cb2eac104a 100644 --- a/pkg/cluster/bootstrap.go +++ b/pkg/cluster/bootstrap.go @@ -121,7 +121,7 @@ func (c *Cluster) bootstrapped() error { // and loads it into the ControlRuntimeBootstrap struct. Unlike the storage bootstrap path, // this data does not need to be decrypted since it is generated on-demand by an existing server. func (c *Cluster) httpBootstrap() error { - content, err := clientaccess.Get("/v1-"+version.Program+"/server-bootstrap", c.clientAccessInfo) + content, err := c.clientAccessInfo.Get("/v1-" + version.Program + "/server-bootstrap") if err != nil { return err } diff --git a/pkg/cluster/managed.go b/pkg/cluster/managed.go index a3bef9b4c7..b0dac0bb4c 100644 --- a/pkg/cluster/managed.go +++ b/pkg/cluster/managed.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "net/http" + "net/url" "os" "path/filepath" "strings" @@ -195,8 +196,17 @@ func (c *Cluster) setupEtcdProxy(ctx context.Context, etcdProxy etcd.Proxy) { logrus.Warnf("failed to get etcd client URLs: %v", err) continue } - etcdProxy.Update(newAddresses) - + // client URLs are a full URI, but the proxy only wants host:port + var hosts []string + for _, address := range newAddresses { + u, err := url.Parse(address) + if err != nil { + logrus.Warnf("failed to parse etcd client URL: %v", err) + continue + } + hosts = append(hosts, u.Host) + } + etcdProxy.Update(hosts) } }() } diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index 14353ae851..49393d70f7 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -718,7 +718,7 @@ func (e *ETCD) setLearnerProgress(ctx context.Context, status *learnerProgress) // clientURLs returns a list of all non-learner etcd cluster member client access URLs func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info, selfIP string) ([]string, Members, error) { var memberList Members - resp, err := clientaccess.Get("/db/info", clientAccessInfo) + resp, err := clientAccessInfo.Get("/db/info") if err != nil { return nil, memberList, err } diff --git a/pkg/etcd/etcdproxy.go b/pkg/etcd/etcdproxy.go index 4380635783..0d7ad801dd 100644 --- a/pkg/etcd/etcdproxy.go +++ b/pkg/etcd/etcdproxy.go @@ -17,6 +17,11 @@ type Proxy interface { // NewETCDProxy initializes a new proxy structure that contain a load balancer // which listens on port 2379 and proxy between etcd cluster members func NewETCDProxy(enabled bool, dataDir, etcdURL string) (Proxy, error) { + u, err := url.Parse(etcdURL) + if err != nil { + return nil, errors.Wrap(err, "failed to parse etcd client URL") + } + e := &etcdproxy{ dataDir: dataDir, initialETCDURL: etcdURL, @@ -32,10 +37,6 @@ func NewETCDProxy(enabled bool, dataDir, etcdURL string) (Proxy, error) { e.etcdLBURL = lb.LoadBalancerServerURL() } - u, err := url.Parse(e.initialETCDURL) - if err != nil { - return nil, errors.Wrap(err, "failed to parse etcd client URL") - } e.fallbackETCDAddress = u.Host e.etcdPort = u.Port() diff --git a/pkg/server/etcd.go b/pkg/server/etcd.go index 9ed44711e8..03d1551541 100644 --- a/pkg/server/etcd.go +++ b/pkg/server/etcd.go @@ -28,11 +28,6 @@ func setETCDLabelsAndAnnotations(ctx context.Context, config *Config) error { continue } - if err := stageFiles(ctx, sc, controlConfig); err != nil { - logrus.Infof("Failed to set etcd role label: %v", err) - continue - } - if err := sc.Start(ctx); err != nil { logrus.Infof("Failed to set etcd role label: %v", err) continue diff --git a/vendor/github.com/rancher/dynamiclistener/listener.go b/vendor/github.com/rancher/dynamiclistener/listener.go index 3a30e2047f..43289354f5 100644 --- a/vendor/github.com/rancher/dynamiclistener/listener.go +++ b/vendor/github.com/rancher/dynamiclistener/listener.go @@ -347,7 +347,7 @@ func (l *listener) loadCert() (*tls.Certificate, error) { if err != nil { return nil, err } - if l.cert != nil && l.version == secret.ResourceVersion { + if l.cert != nil && l.version == secret.ResourceVersion && secret.ResourceVersion != "" { return l.cert, nil } @@ -360,7 +360,7 @@ func (l *listener) loadCert() (*tls.Certificate, error) { if err != nil { return nil, err } - if l.cert != nil && l.version == secret.ResourceVersion { + if l.cert != nil && l.version == secret.ResourceVersion && secret.ResourceVersion != "" { return l.cert, nil } diff --git a/vendor/modules.txt b/vendor/modules.txt index 9b2ef7f1c8..342104b47f 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -863,7 +863,7 @@ github.com/prometheus/procfs/internal/util # github.com/rakelkar/gonetsh v0.0.0-20190930180311-e5c5ffe4bdf0 github.com/rakelkar/gonetsh/netroute github.com/rakelkar/gonetsh/netsh -# github.com/rancher/dynamiclistener v0.2.2 +# github.com/rancher/dynamiclistener v0.2.3 ## explicit github.com/rancher/dynamiclistener github.com/rancher/dynamiclistener/cert