From a3f87a81bd372ef9158448de890a9e5f89a7c1b8 Mon Sep 17 00:00:00 2001 From: Chris Kim Date: Wed, 9 Dec 2020 11:39:33 -0800 Subject: [PATCH 1/3] Independently set kubelet-cgroups and runtime-cgroups, and detect if we are running under a systemd scope Signed-off-by: Chris Kim --- pkg/daemons/agent/agent.go | 85 +++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/pkg/daemons/agent/agent.go b/pkg/daemons/agent/agent.go index 6b60305546..271b7fcf8a 100644 --- a/pkg/daemons/agent/agent.go +++ b/pkg/daemons/agent/agent.go @@ -20,6 +20,8 @@ import ( _ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration ) +const k3sCgroupRoot = "/k3s" + func Agent(config *config.Agent) error { rand.Seed(time.Now().UTC().UnixNano()) @@ -120,7 +122,7 @@ func startKubelet(cfg *config.Agent) error { if err != nil || defaultIP.String() != cfg.NodeIP { argsMap["node-ip"] = cfg.NodeIP } - root, hasCFS, hasPIDs := checkCgroups() + kubeletRoot, runtimeRoot, hasCFS, hasPIDs := checkCgroups() if !hasCFS { logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us") argsMap["cpu-cfs-quota"] = "false" @@ -131,9 +133,11 @@ func startKubelet(cfg *config.Agent) error { argsMap["enforce-node-allocatable"] = "" argsMap["feature-gates"] = addFeatureGate(argsMap["feature-gates"], "SupportPodPidsLimit=false") } - if root != "" { - argsMap["runtime-cgroups"] = root - argsMap["kubelet-cgroups"] = root + if kubeletRoot != "" { + argsMap["kubelet-cgroups"] = kubeletRoot + } + if runtimeRoot != "" { + argsMap["runtime-cgroups"] = runtimeRoot } if system.RunningInUserNS() { argsMap["feature-gates"] = addFeatureGate(argsMap["feature-gates"], "DevicePlugins=false") @@ -172,10 +176,10 @@ func addFeatureGate(current, new string) string { return current + "," + new } -func checkCgroups() (root string, hasCFS bool, hasPIDs bool) { +func checkCgroups() (kubeletRoot string, runtimeRoot string, hasCFS bool, hasPIDs bool) { f, err := os.Open("/proc/self/cgroup") if err != nil { - return "", false, false + return "", "", false, false } defer f.Close() @@ -194,37 +198,52 @@ func checkCgroups() (root string, hasCFS bool, hasPIDs bool) { if _, err := os.Stat(p); err == nil { hasCFS = true } - } - } - } - - // Examine process ID 1 to see if there is a cgroup assigned to it. - // When we are not in a container, process 1 is likely to be systemd or some other service manager. - // It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html - // When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in - // a host PID scenario but we don't support this. - g, err := os.Open("/proc/1/cgroup") - if err != nil { - return "", false, false - } - defer g.Close() - root = "" - scan = bufio.NewScanner(g) - for scan.Scan() { - parts := strings.Split(scan.Text(), ":") - if len(parts) < 3 { - continue - } - systems := strings.Split(parts[1], ",") - for _, system := range systems { - if system == "name=systemd" { + } else if system == "name=systemd" { + // If we detect that we are running under a `.scope` unit with systemd + // we can assume we are being directly invoked from the command line + // and thus need to set our kubelet root to something out of the context + // of `/user.slice` to ensure that `CPUAccounting` and `MemoryAccounting` + // are enabled, as they are generally disabled by default for `user.slice` + // Note that we are not setting the `runtimeRoot` as if we are running with + // `--docker`, we will inadvertently move the cgroup `dockerd` lives in + // which is not ideal and causes dockerd to become unmanageable by systemd. last := parts[len(parts)-1] - if last != "/" && last != "/init.scope" { - root = "/systemd" + i := strings.LastIndex(last, ".scope") + if i > 0 { + kubeletRoot = k3sCgroupRoot } } } } - return root, hasCFS, hasPIDs + if kubeletRoot == "" { + // Examine process ID 1 to see if there is a cgroup assigned to it. + // When we are not in a container, process 1 is likely to be systemd or some other service manager. + // It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html + // When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in + // a host PID scenario but we don't support this. + g, err := os.Open("/proc/1/cgroup") + if err != nil { + return "", "", false, false + } + defer g.Close() + scan = bufio.NewScanner(g) + for scan.Scan() { + parts := strings.Split(scan.Text(), ":") + if len(parts) < 3 { + continue + } + systems := strings.Split(parts[1], ",") + for _, system := range systems { + if system == "name=systemd" { + last := parts[len(parts)-1] + if last != "/" && last != "/init.scope" { + kubeletRoot = k3sCgroupRoot + runtimeRoot = k3sCgroupRoot + } + } + } + } + } + return kubeletRoot, runtimeRoot, hasCFS, hasPIDs } From 48925fcb880fcd5e32a7b3c5b272b8f87fb97073 Mon Sep 17 00:00:00 2001 From: Chris Kim <30601846+Oats87@users.noreply.github.com> Date: Wed, 9 Dec 2020 11:59:54 -0800 Subject: [PATCH 2/3] Simplify checkCgroups function call Co-authored-by: Brian Downs --- pkg/daemons/agent/agent.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/daemons/agent/agent.go b/pkg/daemons/agent/agent.go index 271b7fcf8a..d1097ebc8f 100644 --- a/pkg/daemons/agent/agent.go +++ b/pkg/daemons/agent/agent.go @@ -176,7 +176,7 @@ func addFeatureGate(current, new string) string { return current + "," + new } -func checkCgroups() (kubeletRoot string, runtimeRoot string, hasCFS bool, hasPIDs bool) { +func checkCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) { f, err := os.Open("/proc/self/cgroup") if err != nil { return "", "", false, false From 61ef2ce95ed79808761a41de859374cd233267c5 Mon Sep 17 00:00:00 2001 From: Chris Kim Date: Wed, 9 Dec 2020 12:32:27 -0800 Subject: [PATCH 3/3] use version.Program Signed-off-by: Chris Kim --- pkg/daemons/agent/agent.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/daemons/agent/agent.go b/pkg/daemons/agent/agent.go index d1097ebc8f..76792645a2 100644 --- a/pkg/daemons/agent/agent.go +++ b/pkg/daemons/agent/agent.go @@ -11,6 +11,7 @@ import ( "github.com/opencontainers/runc/libcontainer/system" "github.com/rancher/k3s/pkg/daemons/config" "github.com/rancher/k3s/pkg/daemons/executor" + "github.com/rancher/k3s/pkg/version" "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/util/net" "k8s.io/component-base/logs" @@ -20,8 +21,6 @@ import ( _ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration ) -const k3sCgroupRoot = "/k3s" - func Agent(config *config.Agent) error { rand.Seed(time.Now().UTC().UnixNano()) @@ -210,7 +209,7 @@ func checkCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) { last := parts[len(parts)-1] i := strings.LastIndex(last, ".scope") if i > 0 { - kubeletRoot = k3sCgroupRoot + kubeletRoot = "/" + version.Program } } } @@ -238,8 +237,8 @@ func checkCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) { if system == "name=systemd" { last := parts[len(parts)-1] if last != "/" && last != "/init.scope" { - kubeletRoot = k3sCgroupRoot - runtimeRoot = k3sCgroupRoot + kubeletRoot = "/" + version.Program + runtimeRoot = "/" + version.Program } } }