Add systemd cgroup controller support

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
This commit is contained in:
Brad Davidson 2022-04-18 16:06:50 -07:00 committed by Brad Davidson
parent 1caae63140
commit 333311c7ee
8 changed files with 52 additions and 44 deletions

View File

@ -45,18 +45,22 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
}
isRunningInUserNS := userns.RunningInUserNS()
_, _, hasCFS, hasPIDs := cgroups.CheckCgroups()
_, _, controllers := cgroups.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
disableCgroup := isRunningInUserNS && (!controllers["cpu"] || !controllers["pids"] || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
}
systemdCgroup := controllers["cpuset"] && os.Getenv("NOTIFY_SOCKET") != ""
cfg.AgentConfig.Systemd = systemdCgroup
var containerdTemplate string
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: disableCgroup,
SystemdCgroup: systemdCgroup,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries.Registry,
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),

View File

@ -45,6 +45,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: true,
SystemdCgroup: false,
IsRunningInUserNS: false,
PrivateRegistryConfig: privRegistries.Registry,
}

View File

@ -14,6 +14,7 @@ type ContainerdRuntimeConfig struct {
type ContainerdConfig struct {
NodeConfig *config.Node
DisableCgroup bool
SystemdCgroup bool
IsRunningInUserNS bool
PrivateRegistryConfig *registries.Registry
ExtraRuntimes map[string]ContainerdRuntimeConfig

View File

@ -81,6 +81,9 @@ enable_keychain = true
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins.cri.containerd.runtimes.runc.options]
SystemdCgroup = {{ .SystemdCgroup }}
{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
[plugins.cri.registry.mirrors]{{end}}

View File

@ -65,34 +65,30 @@ func validateCgroupsV2() error {
return nil
}
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
controllers = make(map[string]bool)
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
// under the unified hierarchy.
if cgroupsModeV2 {
m, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return "", "", false, false
return
}
controllers, err := m.Controllers()
enabledControllers, err := m.Controllers()
if err != nil {
return "", "", false, false
return
}
// Intentionally using an expressionless switch to match the logic below
for _, controller := range controllers {
switch {
case controller == "cpu":
hasCFS = true
case controller == "pids":
hasPIDs = true
}
for _, controller := range enabledControllers {
controllers[controller] = true
}
}
f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false
return
}
defer f.Close()
@ -102,10 +98,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
enabledControllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
// For v2, controllers = {""} (only contains a single empty string) so this section is not used.
for _, controller := range enabledControllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
// If we detect that we are running under a `.scope` unit with systemd
@ -128,10 +124,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
controllers[controller] = true
}
case controller == "pids":
hasPIDs = true
default:
controllers[controller] = true
}
}
}
@ -146,7 +142,7 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false
return
}
defer g.Close()
scan = bufio.NewScanner(g)
@ -170,5 +166,5 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
return
}

View File

@ -1,3 +1,4 @@
//go:build windows
// +build windows
package cgroups
@ -6,6 +7,6 @@ func Validate() error {
return nil
}
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
return "", "", false, false
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
return
}

View File

@ -18,17 +18,15 @@ import (
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
)
func createRootlessConfig(argsMap map[string]string, hasCFS, hasPIDs bool) {
func createRootlessConfig(argsMap map[string]string, controllers map[string]bool) {
argsMap["feature-gates=KubeletInUserNamespace"] = "true"
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if hasCFS && hasPIDs && cgroupfsWritable {
if controllers["cpu"] && controllers["pids"] && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
// cgroupfs v2, delegated for rootless by systemd
argsMap["cgroup-driver"] = "cgroupfs"
} else {
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
return
}
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
}
func checkRuntimeEndpoint(cfg *config.Agent, argsMap map[string]string) {
@ -67,14 +65,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
bindAddress = "::1"
}
argsMap := map[string]string{
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
//"cgroup-root": "/k3s",
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
"cgroup-driver": "cgroupfs",
"authentication-token-webhook": "true",
"anonymous-auth": "false",
@ -138,13 +135,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
if err != nil || defaultIP.String() != cfg.NodeIP {
argsMap["node-ip"] = cfg.NodeIP
}
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := cgroups.CheckCgroups()
if !hasCFS {
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
kubeletRoot, runtimeRoot, controllers := cgroups.CheckCgroups()
if !controllers["cpu"] {
logrus.Warn("Disabling CPU quotas due to missing cpu controller or cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
}
if !hasPIDs {
logrus.Fatal("PIDS cgroup support not found")
if !controllers["pids"] {
logrus.Fatal("pids cgroup controller not found")
}
if kubeletRoot != "" {
argsMap["kubelet-cgroups"] = kubeletRoot
@ -172,7 +169,11 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
}
if cfg.Rootless {
createRootlessConfig(argsMap, hasCFS, hasCFS)
createRootlessConfig(argsMap, controllers)
}
if cfg.Systemd {
argsMap["cgroup-driver"] = "systemd"
}
if cfg.ProtectKernelDefaults {

View File

@ -90,6 +90,7 @@ type Agent struct {
ExtraKubeProxyArgs []string
PauseImage string
Snapshotter string
Systemd bool
CNIPlugin bool
NodeTaints []string
NodeLabels []string