k3s/pkg/rootless/rootless.go
Akihiro Suda 728ebcc027 rootless: remove rootful /run/{netns,containerd} symlinks
Since a recent commit, rootless mode was failing with the following errors:

```
E0122 22:59:47.615567      21 kuberuntime_manager.go:755] createPodSandbox for pod "helm-install-traefik-wf8lc_kube-system(9de0a1b2-e2a2-4ea5-8fb6-22c9272a182f)" failed: rpc error: code = Unknown desc = failed to create network namespace for sandbox "285ab835609387f82d304bac1fefa5fb2a6c49a542a9921995d0c35d33c683d5": failed to setup netns: open /var/run/netns/cni-c628a228-651e-e03e-d27d-bb5e87281846: permission denied
...
E0122 23:31:34.027814      21 pod_workers.go:191] Error syncing pod 1a77d21f-ff3d-4475-9749-224229ddc31a ("coredns-854c77959c-w4d7g_kube-system(1a77d21f-ff3d-4475-9749-224229ddc31a)"), skipping: failed to "CreatePodSandbox" for "coredns-854c77959c-w4d7g_kube-system(1a77d21f-ff3d-4475-9749-224229ddc31a)" with CreatePodSandboxError: "CreatePodSandbox for pod \"coredns-854c77959c-w4d7g_kube-system(1a77d21f-ff3d-4475-9749-224229ddc31a)\" failed: rpc error: code = Unknown desc = failed to create containerd task: io.containerd.runc.v2: create new shim socket: listen unix /run/containerd/s/8f0e40e11a69738407f1ebaf31ced3f08c29bb62022058813314fb004f93c422: bind: permission denied\n: exit status 1: unknown"
```

Remove symlinks to /run/{netns,containerd} so that rootless mode can create their own /run/{netns,containerd}.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
2021-01-22 19:51:43 -08:00

182 lines
4.4 KiB
Go

// +build !windows
package rootless
import (
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/pkg/errors"
"github.com/rootless-containers/rootlesskit/pkg/child"
"github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink"
"github.com/rootless-containers/rootlesskit/pkg/network/slirp4netns"
"github.com/rootless-containers/rootlesskit/pkg/parent"
portbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
"github.com/sirupsen/logrus"
)
var (
pipeFD = "_K3S_ROOTLESS_FD"
childEnv = "_K3S_ROOTLESS_SOCK"
Sock = ""
)
func Rootless(stateDir string) error {
defer func() {
os.Unsetenv(pipeFD)
os.Unsetenv(childEnv)
}()
hasFD := os.Getenv(pipeFD) != ""
hasChildEnv := os.Getenv(childEnv) != ""
if hasFD {
logrus.Debug("Running rootless child")
childOpt, err := createChildOpt()
if err != nil {
logrus.Fatal(err)
}
if err := child.Child(*childOpt); err != nil {
logrus.Fatalf("child died: %v", err)
}
}
if hasChildEnv {
Sock = os.Getenv(childEnv)
logrus.Debug("Running rootless process")
return setupMounts(stateDir)
}
logrus.Debug("Running rootless parent")
if err := validateSysctl(); err != nil {
logrus.Fatal(err)
}
parentOpt, err := createParentOpt(filepath.Join(stateDir, "rootless"))
if err != nil {
logrus.Fatal(err)
}
os.Setenv(childEnv, filepath.Join(parentOpt.StateDir, parent.StateFileAPISock))
if err := parent.Parent(*parentOpt); err != nil {
logrus.Fatal(err)
}
os.Exit(0)
return nil
}
func validateSysctl() error {
expected := map[string]string{
// kernel.unprivileged_userns_clone needs to be 1 to allow userns on some distros.
"kernel.unprivileged_userns_clone": "1",
// net.ipv4.ip_forward should not need to be 1 in the parent namespace.
// However, the current k3s implementation has a bug that requires net.ipv4.ip_forward=1
// https://github.com/rancher/k3s/issues/2420#issuecomment-715051120
"net.ipv4.ip_forward": "1",
// Currently, kernel.dmesg_restrict needs to be 0 to allow OOM-related messages
// https://github.com/rootless-containers/usernetes/issues/204
"kernel.dmesg_restrict": "0",
}
for key, expectedValue := range expected {
if actualValue, err := readSysctl(key); err == nil {
if expectedValue != actualValue {
return errors.Errorf("expected sysctl value %q to be %q, got %q; try adding \"%s=%s\" to /etc/sysctl.conf and running `sudo sysctl --system`",
key, expectedValue, actualValue, key, expectedValue)
}
}
}
return nil
}
func readSysctl(key string) (string, error) {
p := "/proc/sys/" + strings.ReplaceAll(key, ".", "/")
b, err := ioutil.ReadFile(p)
if err != nil {
return "", err
}
return strings.TrimSpace(string(b)), nil
}
func parseCIDR(s string) (*net.IPNet, error) {
if s == "" {
return nil, nil
}
ip, ipnet, err := net.ParseCIDR(s)
if err != nil {
return nil, err
}
if !ip.Equal(ipnet.IP) {
return nil, errors.Errorf("cidr must be like 10.0.2.0/24, not like 10.0.2.100/24")
}
return ipnet, nil
}
func createParentOpt(stateDir string) (*parent.Opt, error) {
if err := os.MkdirAll(stateDir, 0755); err != nil {
return nil, errors.Wrapf(err, "failed to mkdir %s", stateDir)
}
stateDir, err := ioutil.TempDir("", "rootless")
if err != nil {
return nil, err
}
opt := &parent.Opt{
StateDir: stateDir,
CreatePIDNS: true,
}
mtu := 0
ipnet, err := parseCIDR("10.41.0.0/16")
if err != nil {
return nil, err
}
disableHostLoopback := true
binary := "slirp4netns"
if _, err := exec.LookPath(binary); err != nil {
return nil, err
}
debugWriter := &logrusDebugWriter{}
opt.NetworkDriver, err = slirp4netns.NewParentDriver(debugWriter, binary, mtu, ipnet, disableHostLoopback, "", false, false)
if err != nil {
return nil, err
}
opt.PortDriver, err = portbuiltin.NewParentDriver(debugWriter, stateDir)
if err != nil {
return nil, err
}
opt.PipeFDEnvKey = pipeFD
return opt, nil
}
type logrusDebugWriter struct {
}
func (w *logrusDebugWriter) Write(p []byte) (int, error) {
s := strings.TrimSuffix(string(p), "\n")
logrus.Debug(s)
return len(p), nil
}
func createChildOpt() (*child.Opt, error) {
opt := &child.Opt{}
opt.TargetCmd = os.Args
opt.PipeFDEnvKey = pipeFD
opt.NetworkDriver = slirp4netns.NewChildDriver()
opt.PortDriver = portbuiltin.NewChildDriver(&logrusDebugWriter{})
opt.CopyUpDirs = []string{"/etc", "/var/run", "/run", "/var/lib"}
opt.CopyUpDriver = tmpfssymlink.NewChildDriver()
opt.MountProcfs = true
opt.Reaper = true
return opt, nil
}