k3s/pkg/cluster/bootstrap.go
Brad Davidson a3bbd58f37 Fix managed etcd cold startup deadlock issue #2249
We should ignore --token and --server if the managed database is initialized,
just like we ignore --cluster-init. If the user wants to join a new
cluster, or rejoin a cluster after --cluster-reset, they need to delete
the database. This a cleaner way to prevent deadlocking on quorum loss,
and removes the requirement that the target of the --server argument
must be online before already joined nodes can start.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
2020-09-27 02:44:49 -07:00

131 lines
3.5 KiB
Go

package cluster
import (
"bytes"
"context"
"errors"
"os"
"path/filepath"
"github.com/rancher/k3s/pkg/bootstrap"
"github.com/rancher/k3s/pkg/clientaccess"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
)
func (c *Cluster) Bootstrap(ctx context.Context) error {
if err := c.assignManagedDriver(ctx); err != nil {
return err
}
runBootstrap, err := c.shouldBootstrapLoad(ctx)
if err != nil {
return err
}
c.shouldBootstrap = runBootstrap
if runBootstrap {
if err := c.bootstrap(ctx); err != nil {
return err
}
}
return nil
}
// shouldBootstrapLoad returns true if we need to load ControlRuntimeBootstrap data again.
// This is controlled by a stamp file on disk that records successful bootstrap using a hash of the join token.
func (c *Cluster) shouldBootstrapLoad(ctx context.Context) (bool, error) {
// Non-nil managedDB indicates that the database is either initialized, initializing, or joining
if c.managedDB != nil {
c.runtime.HTTPBootstrap = true
isInitialized, err := c.managedDB.IsInitialized(ctx, c.config)
if err != nil {
return false, err
}
if isInitialized {
// If the database is initialized we skip bootstrapping; if the user wants to rejoin a
// cluster they need to delete the database.
logrus.Infof("Managed %s cluster bootstrap already complete and initialized", c.managedDB.EndpointName())
return false, nil
} else if c.config.JoinURL == "" {
// Not initialized, not joining - must be initializing (cluster-init)
logrus.Infof("Managed %s cluster initializing", c.managedDB.EndpointName())
return false, nil
} else {
// Not initialized, but have a Join URL - fail if there's no token; if there is then validate it.
if c.config.Token == "" {
return false, errors.New(version.ProgramUpper + "_TOKEN is required to join a cluster")
}
token, err := clientaccess.NormalizeAndValidateTokenForUser(c.config.JoinURL, c.config.Token, "server")
if err != nil {
return false, err
}
info, err := clientaccess.ParseAndValidateToken(c.config.JoinURL, token)
if err != nil {
return false, err
}
logrus.Infof("Managed %s cluster not yet initialized", c.managedDB.EndpointName())
c.clientAccessInfo = info
}
}
// Check the stamp file to see if we have successfully bootstrapped using this token.
// NOTE: The fact that we use a hash of the token to generate the stamp
// means that it is unsafe to use the same token for multiple clusters.
stamp := c.bootstrapStamp()
if _, err := os.Stat(stamp); err == nil {
logrus.Info("Cluster bootstrap already complete")
return false, nil
}
// No errors and no bootstrap stamp, need to bootstrap.
return true, nil
}
func (c *Cluster) bootstrapped() error {
if err := os.MkdirAll(filepath.Dir(c.bootstrapStamp()), 0700); err != nil {
return err
}
if _, err := os.Stat(c.bootstrapStamp()); err == nil {
return nil
}
f, err := os.Create(c.bootstrapStamp())
if err != nil {
return err
}
return f.Close()
}
func (c *Cluster) httpBootstrap() error {
content, err := clientaccess.Get("/v1-"+version.Program+"/server-bootstrap", c.clientAccessInfo)
if err != nil {
return err
}
return bootstrap.Read(bytes.NewBuffer(content), &c.runtime.ControlRuntimeBootstrap)
}
func (c *Cluster) bootstrap(ctx context.Context) error {
c.joining = true
if c.runtime.HTTPBootstrap {
return c.httpBootstrap()
}
// Bootstrap directly from datastore
return c.storageBootstrap(ctx)
}
func (c *Cluster) bootstrapStamp() string {
return filepath.Join(c.config.DataDir, "db/joined-"+keyHash(c.config.Token))
}