Small dqlite fixes

This commit is contained in:
Darren Shepherd 2019-12-16 11:44:13 -07:00
parent 97383868bd
commit 4acaa0740d
7 changed files with 128 additions and 16 deletions

View File

@ -19,8 +19,9 @@ RUN if [ "${ARCH}" == "amd64" ]; then \
ARG DQLITE=true
ENV DQLITE $DQLITE
COPY --from=rancher/dqlite-build:v1.2.1-r3 /dist/artifacts /usr/src/
RUN if [ "$DQLITE" = true ]; then \
curl -sfL https://github.com/rancher/dqlite-build/releases/download/v1.1.0-r6/dqlite-${ARCH}.tgz | tar xzf - -C / && \
tar xzf /usr/src/dqlite.tgz -C / && \
apk add --allow-untrusted /usr/local/packages/*.apk \
;fi

View File

@ -4,6 +4,7 @@ import (
"context"
"strings"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/clientaccess"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/kine/pkg/client"
@ -25,12 +26,12 @@ type Cluster struct {
func (c *Cluster) Start(ctx context.Context) error {
if err := c.startClusterAndHTTPS(ctx); err != nil {
return err
return errors.Wrap(err, "start cluster and https")
}
if c.runJoin {
if err := c.postJoin(ctx); err != nil {
return err
return errors.Wrap(err, "post join")
}
}
@ -61,7 +62,7 @@ func (c *Cluster) startStorage(ctx context.Context) error {
etcdConfig, err := endpoint.Listen(ctx, c.config.Datastore)
if err != nil {
return err
return errors.Wrap(err, "creating storage endpoint")
}
c.etcdConfig = etcdConfig

View File

@ -86,7 +86,7 @@ func (c *Cluster) initClusterDB(ctx context.Context, l net.Listener, handler htt
}
func (c *Cluster) dqliteEnabled() bool {
stamp := filepath.Join(c.config.DataDir, "db", "state.dqlite")
stamp := filepath.Join(dqlite.GetDBDir(c.config.DataDir))
if _, err := os.Stat(stamp); err == nil {
return true
}

View File

@ -20,6 +20,7 @@ import (
// registering k3s cloud provider
_ "github.com/rancher/k3s/pkg/cloudprovider"
"github.com/pkg/errors"
certutil "github.com/rancher/dynamiclistener/cert"
"github.com/rancher/k3s/pkg/clientaccess"
"github.com/rancher/k3s/pkg/cluster"
@ -81,7 +82,7 @@ func Server(ctx context.Context, cfg *config.Control) error {
cfg.Runtime = runtime
if err := prepare(ctx, cfg, runtime); err != nil {
return err
return errors.Wrap(err, "preparing server")
}
cfg.Runtime.Tunnel = setupTunnel()

View File

@ -6,6 +6,7 @@ import (
"strconv"
"github.com/canonical/go-dqlite/client"
"github.com/canonical/go-dqlite/driver"
controllerv1 "github.com/rancher/wrangler-api/pkg/generated/controllers/core/v1"
"github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
@ -64,9 +65,33 @@ func (h *handler) sync(key string, node *v1.Node) (*v1.Node, error) {
return node, nil
}
func (h *handler) ensureExists(address string) error {
c, err := client.FindLeader(h.ctx, h.nodeStore, h.opts...)
if err == driver.ErrNoAvailableLeader {
logrus.Fatalf("no dqlite leader found: %v", err)
} else if err != nil {
return err
}
defer c.Close()
members, err := c.Cluster(h.ctx)
if err != nil {
return err
}
for _, member := range members {
if member.Address == address {
return nil
}
}
logrus.Fatalf("Address %s is not member of the cluster", address)
return nil
}
func (h *handler) handleSelf(node *v1.Node) (*v1.Node, error) {
if node.Annotations[nodeID] == h.id && node.Annotations[nodeAddress] == h.address {
return node, nil
return node, h.ensureExists(h.address)
}
node = node.DeepCopy()

View File

@ -2,8 +2,15 @@ package dqlite
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
"github.com/canonical/go-dqlite/client"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -19,7 +26,11 @@ func (d *DQLite) Test(ctx context.Context) error {
}
logrus.Infof("Testing connection to peers %v", ips)
return d.Join(ctx, nil)
if err := d.Join(ctx, nil); err != nil {
return err
}
logrus.Infof("Connection OK to peers %v", ips)
return nil
}
func (d *DQLite) Join(ctx context.Context, nodes []client.NodeInfo) error {
@ -42,10 +53,62 @@ func (d *DQLite) Join(ctx context.Context, nodes []client.NodeInfo) error {
for _, testNode := range current {
if testNode.Address == d.NodeInfo.Address {
nodeID, err := getClusterID(false, d.DataDir)
if err != nil {
return errors.Wrap(err, "get cluster ID")
}
if testNode.ID != nodeID {
if err := d.node.Close(); err != nil {
return errors.Wrap(err, "node close for id reset")
}
if err := writeClusterID(testNode.ID, d.DataDir); err != nil {
return errors.Wrap(err, "restart node to reset ID")
}
return fmt.Errorf("reseting node ID from %d to %d, please restart", nodeID, testNode.ID)
}
return nil
}
}
if found, err := cleanDir(d.DataDir, true); err != nil {
return err
} else if found {
if err := d.node.Close(); err != nil {
return errors.Wrap(err, "node close for cleaning")
}
_, _ = cleanDir(d.DataDir, false)
return fmt.Errorf("cleaned DB directory, now restart and join")
}
logrus.Infof("Joining dqlite cluster as address=%s, id=%d", d.NodeInfo.Address, d.NodeInfo.ID)
return client.Add(ctx, d.NodeInfo)
}
func cleanDir(dataDir string, check bool) (bool, error) {
dbDir := GetDBDir(dataDir)
backupDir := filepath.Join(dbDir, fmt.Sprintf(".backup-%d", time.Now().Unix()))
files, err := ioutil.ReadDir(dbDir)
if err != nil {
return false, errors.Wrap(err, "cleaning dqlite DB dir")
}
for _, file := range files {
if file.IsDir() || strings.HasPrefix(file.Name(), ".") || ignoreFile[file.Name()] {
continue
}
if check {
return true, nil
}
if err := os.MkdirAll(backupDir, 0700); err != nil {
return false, errors.Wrapf(err, "creating backup dir %s", backupDir)
}
oldName := filepath.Join(dbDir, file.Name())
newName := filepath.Join(backupDir, file.Name())
logrus.Infof("Backing up %s => %s", oldName, newName)
if err := os.Rename(oldName, newName); err != nil {
return false, errors.Wrapf(err, "backup %s", oldName)
}
}
return false, nil
}

View File

@ -25,6 +25,18 @@ import (
"k8s.io/apimachinery/pkg/util/net"
)
const (
PeersFile = "peers.db"
NodeIDFile = "node-id"
)
var (
ignoreFile = map[string]bool{
PeersFile: true,
NodeIDFile: true,
}
)
type Certs struct {
ServerTrust *x509.Certificate
ClientTrust *x509.Certificate
@ -127,7 +139,7 @@ func (d *DQLite) startController(ctx context.Context) {
}
func (d *DQLite) nodeStore(ctx context.Context, initCluster bool) error {
peerDB := filepath.Join(d.DataDir, "db", "state.dqlite", "peers.db")
peerDB := filepath.Join(GetDBDir(d.DataDir), PeersFile)
ns, err := client.DefaultNodeStore(peerDB)
if err != nil {
return err
@ -172,13 +184,17 @@ func getDialer(advertiseAddress, bindAddress string, tlsConfig *tls.Config) (cli
return dialer.NewHTTPDialer(advertiseAddress, bindAddress, tlsConfig)
}
func GetDBDir(dataDir string) string {
return filepath.Join(dataDir, "db", "state.dqlite")
}
func getNode(dataDir string, advertiseAddress, bindAddress string, initCluster bool, dial client.DialFunc) (dqlite.NodeInfo, *dqlite.Node, error) {
id, err := getClusterID(initCluster, dataDir)
if err != nil {
return dqlite.NodeInfo{}, nil, errors.Wrap(err, "reading cluster id")
}
dbDir := filepath.Join(dataDir, "db", "state.dqlite")
dbDir := GetDBDir(dataDir)
node, err := dqlite.New(id, advertiseAddress, dbDir,
dqlite.WithBindAddress(bindAddress),
@ -190,8 +206,16 @@ func getNode(dataDir string, advertiseAddress, bindAddress string, initCluster b
}, node, err
}
func writeClusterID(id uint64, dataDir string) error {
idFile := filepath.Join(GetDBDir(dataDir), NodeIDFile)
if err := os.MkdirAll(filepath.Dir(idFile), 0700); err != nil {
return err
}
return ioutil.WriteFile(idFile, []byte(strconv.FormatUint(id, 10)), 0644)
}
func getClusterID(initCluster bool, dataDir string) (uint64, error) {
idFile := filepath.Join(dataDir, "db/state.dqlite/node-id")
idFile := filepath.Join(GetDBDir(dataDir), NodeIDFile)
content, err := ioutil.ReadFile(idFile)
if os.IsNotExist(err) {
content = nil
@ -201,14 +225,11 @@ func getClusterID(initCluster bool, dataDir string) (uint64, error) {
idStr := strings.TrimSpace(string(content))
if idStr == "" {
if err := os.MkdirAll(filepath.Dir(idFile), 0700); err != nil {
return 0, err
}
id := rand.Uint64()
if initCluster {
id = 1
}
return id, ioutil.WriteFile(idFile, []byte(strconv.FormatUint(id, 10)), 0644)
return id, writeClusterID(id, dataDir)
}
return strconv.ParseUint(idStr, 10, 64)
@ -216,5 +237,5 @@ func getClusterID(initCluster bool, dataDir string) (uint64, error) {
func (d *DQLite) getBindAddress() string {
// only anonymous works???
return "@" + filepath.Join(d.DataDir, "db", "state.dqlite", "dqlite.sock")
return "@" + filepath.Join(GetDBDir(d.DataDir), "dqlite.sock")
}