//go:build linux // +build linux package cgroups import ( "bufio" "errors" "fmt" "os" "path/filepath" "strings" cgroups "github.com/containerd/cgroups/v3" cgroupsv1 "github.com/containerd/cgroups/v3/cgroup1" cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2" "github.com/k3s-io/k3s/pkg/version" "github.com/sirupsen/logrus" ) func Validate() error { switch cgroups.Mode() { case cgroups.Unified: return validateCgroupsV2() case cgroups.Legacy, cgroups.Hybrid: return validateCgroupsV1() default: return errors.New("unhandled cgroup mode") } } func validateCgroupsV1() error { controllers, err := cgroupsv1.Default() if err != nil { return err } m := make(map[string]struct{}) for _, controller := range controllers { name := string(controller.Name()) m[name] = struct{}{} } if _, ok := m["cpuset"]; !ok { logrus.Warn(`Failed to find cpuset cgroup, you may need to add "cgroup_enable=cpuset" to your linux cmdline (/boot/cmdline.txt on a Raspberry Pi)`) } if _, ok := m["memory"]; !ok { msg := "ailed to find memory cgroup, you may need to add \"cgroup_memory=1 cgroup_enable=memory\" to your linux cmdline (/boot/cmdline.txt on a Raspberry Pi)" logrus.Error("F" + msg) return errors.New("f" + msg) } return nil } func validateCgroupsV2() error { manager, err := cgroupsv2.NewManager("/sys/fs/cgroup", "/", &cgroupsv2.Resources{}) if err != nil { return err } controllers, err := manager.RootControllers() if err != nil { return err } m := make(map[string]struct{}) for _, controller := range controllers { m[controller] = struct{}{} } for _, controller := range []string{"cpu", "cpuset", "memory"} { if _, ok := m[controller]; !ok { return fmt.Errorf("failed to find %s cgroup (v2)", controller) } } return nil } func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) { cgroupsModeV2 := cgroups.Mode() == cgroups.Unified controllers = make(map[string]bool) // For Unified (v2) cgroups we can directly check to see what controllers are mounted // under the unified hierarchy. if cgroupsModeV2 { m, err := cgroupsv2.NewManager("/sys/fs/cgroup", "/", &cgroupsv2.Resources{}) if err != nil { return } enabledControllers, err := m.Controllers() if err != nil { return } // Intentionally using an expressionless switch to match the logic below for _, controller := range enabledControllers { controllers[controller] = true } } f, err := os.Open("/proc/self/cgroup") if err != nil { return } defer f.Close() scan := bufio.NewScanner(f) for scan.Scan() { parts := strings.Split(scan.Text(), ":") if len(parts) < 3 { continue } enabledControllers := strings.Split(parts[1], ",") // For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"} // For v2, controllers = {""} (only contains a single empty string) so this section is not used. for _, controller := range enabledControllers { switch { case controller == "name=systemd" || cgroupsModeV2: // If we detect that we are running under a `.scope` unit with systemd // we can assume we are being directly invoked from the command line // and thus need to set our kubelet root to something out of the context // of `/user.slice` to ensure that `CPUAccounting` and `MemoryAccounting` // are enabled, as they are generally disabled by default for `user.slice` // Note that we are not setting the `runtimeRoot` as if we are running with // `--docker`, we will inadvertently move the cgroup `dockerd` lives in // which is not ideal and causes dockerd to become unmanageable by systemd. last := parts[len(parts)-1] i := strings.LastIndex(last, ".scope") if i > 0 { kubeletRoot = "/" + version.Program } case controller == "cpu": // It is common for this to show up multiple times in /sys/fs/cgroup if the controllers are comounted: // as "cpu" and "cpuacct", symlinked to the actual hierarchy at "cpu,cpuacct". Unfortunately the order // listed in /proc/self/cgroups may not be the same order used in /sys/fs/cgroup, so this check // can fail if we use the comma-separated name. Instead, we check for the controller using the symlink. p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us") if _, err := os.Stat(p); err == nil { controllers[controller] = true } default: controllers[controller] = true } } } // If we're running with v1 and didn't find a scope assigned by systemd, we need to create our own root cgroup to avoid // just inheriting from the parent process. The kubelet will take care of moving us into it when we start it up later. if kubeletRoot == "" { // Examine process ID 1 to see if there is a cgroup assigned to it. // When we are not in a container, process 1 is likely to be systemd or some other service manager. // It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html // When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in // a host PID scenario but we don't support this. g, err := os.Open("/proc/1/cgroup") if err != nil { return } defer g.Close() scan = bufio.NewScanner(g) for scan.Scan() { parts := strings.Split(scan.Text(), ":") if len(parts) < 3 { continue } controllers := strings.Split(parts[1], ",") // For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"} // For v2, controllers = {""} (only contains a single empty string) for _, controller := range controllers { switch { case controller == "name=systemd" || cgroupsModeV2: last := parts[len(parts)-1] if last != "/" && last != "/init.scope" { kubeletRoot = "/" + version.Program runtimeRoot = "/" + version.Program } } } } } return }