Initial windows support for agent (#3375)

Signed-off-by: Jamie Phillips <jamie.phillips@suse.com>
This commit is contained in:
Jamie Phillips 2021-06-01 15:29:46 -04:00 committed by GitHub
parent c2e561f25e
commit 7345ac35ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 382 additions and 256 deletions

View File

@ -19,12 +19,11 @@ import (
"strings"
"time"
"github.com/containerd/containerd/snapshots/overlay"
fuseoverlayfs "github.com/containerd/fuse-overlayfs-snapshotter"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/agent/proxy"
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/clientaccess"
"github.com/rancher/k3s/pkg/containerd"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/daemons/control/deps"
"github.com/rancher/k3s/pkg/util"
@ -447,12 +446,12 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N
if !nodeConfig.Docker && nodeConfig.ContainerRuntimeEndpoint == "" {
switch nodeConfig.AgentConfig.Snapshotter {
case "overlayfs":
if err := overlay.Supported(nodeConfig.Containerd.Root); err != nil {
if err := containerd.OverlaySupported(nodeConfig.Containerd.Root); err != nil {
return nil, errors.Wrapf(err, "\"overlayfs\" snapshotter cannot be enabled for %q, try using \"fuse-overlayfs\" or \"native\"",
nodeConfig.Containerd.Root)
}
case "fuse-overlayfs":
if err := fuseoverlayfs.Supported(nodeConfig.Containerd.Root); err != nil {
if err := containerd.FuseoverlayfsSupported(nodeConfig.Containerd.Root); err != nil {
return nil, errors.Wrapf(err, "\"fuse-overlayfs\" snapshotter cannot be enabled for %q, try using \"native\"",
nodeConfig.Containerd.Root)
}

View File

@ -0,0 +1,73 @@
// +build linux
package containerd
import (
"context"
"io/ioutil"
"os"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/agent/templates"
util2 "github.com/rancher/k3s/pkg/agent/util"
"github.com/rancher/k3s/pkg/cgroups"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/version"
"github.com/rancher/wharfie/pkg/registries"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// setupContainerdConfig generates the containerd.toml, using a template combined with various
// runtime configurations and registry mirror settings provided by the administrator.
func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
privRegistries, err := registries.GetPrivateRegistries(cfg.AgentConfig.PrivateRegistry)
if err != nil {
return err
}
isRunningInUserNS := system.RunningInUserNS()
_, _, hasCFS, hasPIDs := cgroups.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
}
var containerdTemplate string
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: disableCgroup,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries.Registry(),
}
selEnabled, selConfigured, err := selinuxStatus()
if err != nil {
return errors.Wrap(err, "failed to detect selinux")
}
switch {
case !cfg.SELinux && selEnabled:
logrus.Warn("SELinux is enabled on this host, but " + version.Program + " has not been started with --selinux - containerd SELinux support is disabled")
case cfg.SELinux && !selConfigured:
logrus.Warnf("SELinux is enabled for "+version.Program+" but process is not running in context '%s', "+version.Program+"-selinux policy may need to be applied", SELinuxContextType)
}
containerdTemplateBytes, err := ioutil.ReadFile(cfg.Containerd.Template)
if err == nil {
logrus.Infof("Using containerd template at %s", cfg.Containerd.Template)
containerdTemplate = string(containerdTemplateBytes)
} else if os.IsNotExist(err) {
containerdTemplate = templates.ContainerdConfigTemplate
} else {
return err
}
parsedTemplate, err := templates.ParseTemplateFromConfig(containerdTemplate, containerdConfig)
if err != nil {
return err
}
return util2.WriteFile(cfg.Containerd.Config, parsedTemplate)
}

View File

@ -0,0 +1,16 @@
// +build windows
package containerd
import (
"context"
"github.com/rancher/k3s/pkg/daemons/config"
)
// setupContainerdConfig generates the containerd.toml, using a template combined with various
// runtime configurations and registry mirror settings provided by the administrator.
func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
// TODO: Create windows config setup.
return nil
}

View File

@ -21,19 +21,13 @@ import (
"github.com/containerd/containerd/reference/docker"
"github.com/klauspost/compress/zstd"
"github.com/natefinch/lumberjack"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pierrec/lz4"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/agent/templates"
util2 "github.com/rancher/k3s/pkg/agent/util"
"github.com/rancher/k3s/pkg/daemons/agent"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/untar"
"github.com/rancher/k3s/pkg/version"
"github.com/rancher/wharfie/pkg/registries"
"github.com/rancher/wrangler/pkg/merr"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"google.golang.org/grpc"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/util"
@ -330,56 +324,3 @@ func prePullImages(ctx context.Context, conn *grpc.ClientConn, images io.Reader)
}
return nil
}
// setupContainerdConfig generates the containerd.toml, using a template combined with various
// runtime configurations and registry mirror settings provided by the administrator.
func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
privRegistries, err := registries.GetPrivateRegistries(cfg.AgentConfig.PrivateRegistry)
if err != nil {
return err
}
isRunningInUserNS := system.RunningInUserNS()
_, _, hasCFS, hasPIDs := agent.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
}
var containerdTemplate string
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: disableCgroup,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries.Registry(),
}
selEnabled, selConfigured, err := selinuxStatus()
if err != nil {
return errors.Wrap(err, "failed to detect selinux")
}
switch {
case !cfg.SELinux && selEnabled:
logrus.Warn("SELinux is enabled on this host, but " + version.Program + " has not been started with --selinux - containerd SELinux support is disabled")
case cfg.SELinux && !selConfigured:
logrus.Warnf("SELinux is enabled for "+version.Program+" but process is not running in context '%s', "+version.Program+"-selinux policy may need to be applied", SELinuxContextType)
}
containerdTemplateBytes, err := ioutil.ReadFile(cfg.Containerd.Template)
if err == nil {
logrus.Infof("Using containerd template at %s", cfg.Containerd.Template)
containerdTemplate = string(containerdTemplateBytes)
} else if os.IsNotExist(err) {
containerdTemplate = templates.ContainerdConfigTemplate
} else {
return err
}
parsedTemplate, err := templates.ParseTemplateFromConfig(containerdTemplate, containerdConfig)
if err != nil {
return err
}
return util2.WriteFile(cfg.Containerd.Config, parsedTemplate)
}

View File

@ -7,12 +7,10 @@ import (
"path/filepath"
"strconv"
"sync"
"syscall"
"github.com/google/tcpproxy"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type LoadBalancer struct {
@ -156,9 +154,3 @@ func onDialError(src net.Conn, dstDialErr error) {
logrus.Debugf("Incoming conn %v, error dialing load balancer servers: %v", src.RemoteAddr().String(), dstDialErr)
src.Close()
}
func reusePort(network, address string, conn syscall.RawConn) error {
return conn.Control(func(descriptor uintptr) {
syscall.SetsockoptInt(int(descriptor), unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)
})
}

View File

@ -0,0 +1,9 @@
// +build windows
package loadbalancer
import "syscall"
func reusePort(network, address string, conn syscall.RawConn) error {
return nil
}

View File

@ -0,0 +1,15 @@
// +build linux
package loadbalancer
import (
"syscall"
"golang.org/x/sys/unix"
)
func reusePort(network, address string, conn syscall.RawConn) error {
return conn.Control(func(descriptor uintptr) {
syscall.SetsockoptInt(int(descriptor), unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)
})
}

View File

@ -10,8 +10,6 @@ import (
"strings"
"time"
"github.com/containerd/cgroups"
cgroupsv2 "github.com/containerd/cgroups/v2"
systemd "github.com/coreos/go-systemd/daemon"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/agent/config"
@ -21,6 +19,7 @@ import (
"github.com/rancher/k3s/pkg/agent/proxy"
"github.com/rancher/k3s/pkg/agent/syssetup"
"github.com/rancher/k3s/pkg/agent/tunnel"
"github.com/rancher/k3s/pkg/cgroups"
"github.com/rancher/k3s/pkg/cli/cmds"
"github.com/rancher/k3s/pkg/clientaccess"
cp "github.com/rancher/k3s/pkg/cloudprovider"
@ -199,7 +198,7 @@ func coreClient(cfg string) (kubernetes.Interface, error) {
}
func Run(ctx context.Context, cfg cmds.Agent) error {
if err := validate(); err != nil {
if err := cgroups.Validate(); err != nil {
return err
}
@ -237,53 +236,6 @@ func Run(ctx context.Context, cfg cmds.Agent) error {
return run(ctx, cfg, proxy)
}
func validate() error {
if cgroups.Mode() == cgroups.Unified {
return validateCgroupsV2()
}
return validateCgroupsV1()
}
func validateCgroupsV1() error {
cgroups, err := ioutil.ReadFile("/proc/self/cgroup")
if err != nil {
return err
}
if !strings.Contains(string(cgroups), "cpuset") {
logrus.Warn(`Failed to find cpuset cgroup, you may need to add "cgroup_enable=cpuset" to your linux cmdline (/boot/cmdline.txt on a Raspberry Pi)`)
}
if !strings.Contains(string(cgroups), "memory") {
msg := "ailed to find memory cgroup, you may need to add \"cgroup_memory=1 cgroup_enable=memory\" to your linux cmdline (/boot/cmdline.txt on a Raspberry Pi)"
logrus.Error("F" + msg)
return errors.New("f" + msg)
}
return nil
}
func validateCgroupsV2() error {
manager, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return err
}
controllers, err := manager.RootControllers()
if err != nil {
return err
}
m := make(map[string]struct{})
for _, controller := range controllers {
m[controller] = struct{}{}
}
for _, controller := range []string{"cpu", "cpuset", "memory"} {
if _, ok := m[controller]; !ok {
return fmt.Errorf("failed to find %s cgroup (v2)", controller)
}
}
return nil
}
func configureNode(ctx context.Context, agentConfig *daemonconfig.Agent, nodes v1.NodeInterface) error {
count := 0
for {

View File

@ -1,3 +1,7 @@
package syssetup
func Configure() {}
import kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config"
func Configure(enableIPv6 bool, config *kubeproxyconfig.KubeProxyConntrackConfiguration) {
}

View File

@ -0,0 +1,173 @@
// +build linux
package cgroups
import (
"bufio"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"github.com/containerd/cgroups"
cgroupsv2 "github.com/containerd/cgroups/v2"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
)
func Validate() error {
if cgroups.Mode() == cgroups.Unified {
return validateCgroupsV2()
}
return validateCgroupsV1()
}
func validateCgroupsV1() error {
cgroups, err := ioutil.ReadFile("/proc/self/cgroup")
if err != nil {
return err
}
if !strings.Contains(string(cgroups), "cpuset") {
logrus.Warn(`Failed to find cpuset cgroup, you may need to add "cgroup_enable=cpuset" to your linux cmdline (/boot/cmdline.txt on a Raspberry Pi)`)
}
if !strings.Contains(string(cgroups), "memory") {
msg := "ailed to find memory cgroup, you may need to add \"cgroup_memory=1 cgroup_enable=memory\" to your linux cmdline (/boot/cmdline.txt on a Raspberry Pi)"
logrus.Error("F" + msg)
return errors.New("f" + msg)
}
return nil
}
func validateCgroupsV2() error {
manager, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return err
}
controllers, err := manager.RootControllers()
if err != nil {
return err
}
m := make(map[string]struct{})
for _, controller := range controllers {
m[controller] = struct{}{}
}
for _, controller := range []string{"cpu", "cpuset", "memory"} {
if _, ok := m[controller]; !ok {
return fmt.Errorf("failed to find %s cgroup (v2)", controller)
}
}
return nil
}
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
// under the unified hierarchy.
if cgroupsModeV2 {
m, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return "", "", false, false
}
controllers, err := m.Controllers()
if err != nil {
return "", "", false, false
}
// Intentionally using an expressionless switch to match the logic below
for _, controller := range controllers {
switch {
case controller == "cpu":
hasCFS = true
case controller == "pids":
hasPIDs = true
}
}
}
f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false
}
defer f.Close()
scan := bufio.NewScanner(f)
for scan.Scan() {
parts := strings.Split(scan.Text(), ":")
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
// If we detect that we are running under a `.scope` unit with systemd
// we can assume we are being directly invoked from the command line
// and thus need to set our kubelet root to something out of the context
// of `/user.slice` to ensure that `CPUAccounting` and `MemoryAccounting`
// are enabled, as they are generally disabled by default for `user.slice`
// Note that we are not setting the `runtimeRoot` as if we are running with
// `--docker`, we will inadvertently move the cgroup `dockerd` lives in
// which is not ideal and causes dockerd to become unmanageable by systemd.
last := parts[len(parts)-1]
i := strings.LastIndex(last, ".scope")
if i > 0 {
kubeletRoot = "/" + version.Program
}
case controller == "cpu":
// It is common for this to show up multiple times in /sys/fs/cgroup if the controllers are comounted:
// as "cpu" and "cpuacct", symlinked to the actual hierarchy at "cpu,cpuacct". Unfortunately the order
// listed in /proc/self/cgroups may not be the same order used in /sys/fs/cgroup, so this check
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
}
case controller == "pids":
hasPIDs = true
}
}
}
// If we're running with v1 and didn't find a scope assigned by systemd, we need to create our own root cgroup to avoid
// just inheriting from the parent process. The kubelet will take care of moving us into it when we start it up later.
if kubeletRoot == "" {
// Examine process ID 1 to see if there is a cgroup assigned to it.
// When we are not in a container, process 1 is likely to be systemd or some other service manager.
// It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html
// When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false
}
defer g.Close()
scan = bufio.NewScanner(g)
for scan.Scan() {
parts := strings.Split(scan.Text(), ":")
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
last := parts[len(parts)-1]
if last != "/" && last != "/init.scope" {
kubeletRoot = "/" + version.Program
runtimeRoot = "/" + version.Program
}
}
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
}

View File

@ -0,0 +1,11 @@
// +build windows
package cgroups
func Validate() error {
return nil
}
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
return "", "", false, false
}

View File

@ -1,7 +1,6 @@
package cmds
import (
"errors"
"fmt"
"os"
"runtime"
@ -21,8 +20,6 @@ var (
}
)
var ErrCommandNoArgs = errors.New("this command does not take any arguments")
func init() {
// hack - force "file,dns" lookup order if go dns is used
if os.Getenv("RES_OPTIONS") == "" {

View File

@ -15,6 +15,7 @@ import (
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/etcd"
"github.com/rancher/k3s/pkg/server"
util2 "github.com/rancher/k3s/pkg/util"
"github.com/rancher/wrangler/pkg/signals"
"github.com/urfave/cli"
)
@ -69,7 +70,7 @@ func run(app *cli.Context, cfg *cmds.Server) error {
}
if len(app.Args()) > 0 {
return cmds.ErrCommandNoArgs
return util2.ErrCommandNoArgs
}
serverConfig.ControlConfig.DataDir = dataDir

View File

@ -0,0 +1,16 @@
// +build linux
package containerd
import (
"github.com/containerd/containerd/snapshots/overlay"
fuseoverlayfs "github.com/containerd/fuse-overlayfs-snapshotter"
)
func OverlaySupported(root string) error {
return overlay.Supported(root)
}
func FuseoverlayfsSupported(root string) error {
return fuseoverlayfs.Supported(root)
}

View File

@ -0,0 +1,16 @@
// +build windows
package containerd
import (
"github.com/pkg/errors"
util2 "github.com/rancher/k3s/pkg/util"
)
func OverlaySupported(root string) error {
return errors.Wrapf(util2.ErrUnsupportedPlatform, "overlayfs is not supported")
}
func FuseoverlayfsSupported(root string) error {
return errors.Wrapf(util2.ErrUnsupportedPlatform, "fuse-overlayfs is not supported")
}

View File

@ -1,28 +1,23 @@
package agent
import (
"bufio"
"math/rand"
"os"
"path/filepath"
"strings"
"time"
"github.com/containerd/cgroups"
cgroupsv2 "github.com/containerd/cgroups/v2"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/rancher/k3s/pkg/cgroups"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/daemons/executor"
"github.com/rancher/k3s/pkg/util"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/util/net"
"k8s.io/component-base/logs"
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
_ "k8s.io/component-base/metrics/prometheus/restclient" // for client metric registration
_ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
)
const unixPrefix = "unix://"
@ -133,7 +128,7 @@ func startKubelet(cfg *config.Agent) error {
if err != nil || defaultIP.String() != cfg.NodeIP {
argsMap["node-ip"] = cfg.NodeIP
}
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := CheckCgroups()
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := cgroups.CheckCgroups()
if !hasCFS {
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
@ -170,20 +165,7 @@ func startKubelet(cfg *config.Agent) error {
}
if cfg.Rootless {
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if hasCFS && hasPIDs && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
// cgroupfs v2, delegated for rootless by systemd
argsMap["cgroup-driver"] = "cgroupfs"
} else {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Setting cgroup driver to \"none\".")
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
argsMap["cgroup-driver"] = "none"
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
argsMap["cgroups-per-qos"] = "false"
argsMap["enforce-node-allocatable"] = ""
}
createRootlessConfig(argsMap, hasCFS, hasCFS)
}
if cfg.ProtectKernelDefaults {
@ -216,111 +198,3 @@ func ImageCredProvAvailable(cfg *config.Agent) bool {
}
return true
}
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
// under the unified hierarchy.
if cgroupsModeV2 {
m, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return "", "", false, false
}
controllers, err := m.Controllers()
if err != nil {
return "", "", false, false
}
// Intentionally using an expressionless switch to match the logic below
for _, controller := range controllers {
switch {
case controller == "cpu":
hasCFS = true
case controller == "pids":
hasPIDs = true
}
}
}
f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false
}
defer f.Close()
scan := bufio.NewScanner(f)
for scan.Scan() {
parts := strings.Split(scan.Text(), ":")
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
// If we detect that we are running under a `.scope` unit with systemd
// we can assume we are being directly invoked from the command line
// and thus need to set our kubelet root to something out of the context
// of `/user.slice` to ensure that `CPUAccounting` and `MemoryAccounting`
// are enabled, as they are generally disabled by default for `user.slice`
// Note that we are not setting the `runtimeRoot` as if we are running with
// `--docker`, we will inadvertently move the cgroup `dockerd` lives in
// which is not ideal and causes dockerd to become unmanageable by systemd.
last := parts[len(parts)-1]
i := strings.LastIndex(last, ".scope")
if i > 0 {
kubeletRoot = "/" + version.Program
}
case controller == "cpu":
// It is common for this to show up multiple times in /sys/fs/cgroup if the controllers are comounted:
// as "cpu" and "cpuacct", symlinked to the actual hierarchy at "cpu,cpuacct". Unfortunately the order
// listed in /proc/self/cgroups may not be the same order used in /sys/fs/cgroup, so this check
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
}
case controller == "pids":
hasPIDs = true
}
}
}
// If we're running with v1 and didn't find a scope assigned by systemd, we need to create our own root cgroup to avoid
// just inheriting from the parent process. The kubelet will take care of moving us into it when we start it up later.
if kubeletRoot == "" {
// Examine process ID 1 to see if there is a cgroup assigned to it.
// When we are not in a container, process 1 is likely to be systemd or some other service manager.
// It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html
// When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false
}
defer g.Close()
scan = bufio.NewScanner(g)
for scan.Scan() {
parts := strings.Split(scan.Text(), ":")
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
last := parts[len(parts)-1]
if last != "/" && last != "/init.scope" {
kubeletRoot = "/" + version.Program
runtimeRoot = "/" + version.Program
}
}
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
}

View File

@ -0,0 +1,25 @@
// +build linux
package agent
import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func createRootlessConfig(argsMap map[string]string, hasCFS, hasPIDs bool) {
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if hasCFS && hasPIDs && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
// cgroupfs v2, delegated for rootless by systemd
argsMap["cgroup-driver"] = "cgroupfs"
} else {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Setting cgroup driver to \"none\".")
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
argsMap["cgroup-driver"] = "none"
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
argsMap["cgroups-per-qos"] = "false"
argsMap["enforce-node-allocatable"] = ""
}
}

View File

@ -0,0 +1,6 @@
// +build windows
package agent
func createRootlessConfig(argsMap map[string]string, hasCfs, hasPIDs bool) {
}

6
pkg/util/errors.go Normal file
View File

@ -0,0 +1,6 @@
package util
import "errors"
var ErrCommandNoArgs = errors.New("this command does not take any arguments")
var ErrUnsupportedPlatform = errors.New("unsupported platform")