k3s/pkg/daemons/agent/agent.go
Chris Kim 48925fcb88
Simplify checkCgroups function call
Co-authored-by: Brian Downs <brian.downs@gmail.com>
2020-12-09 11:59:54 -08:00

250 lines
7.6 KiB
Go

package agent
import (
"bufio"
"math/rand"
"os"
"path/filepath"
"strings"
"time"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/daemons/executor"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/net"
"k8s.io/component-base/logs"
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
_ "k8s.io/component-base/metrics/prometheus/restclient" // for client metric registration
_ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration
)
const k3sCgroupRoot = "/k3s"
func Agent(config *config.Agent) error {
rand.Seed(time.Now().UTC().UnixNano())
logs.InitLogs()
defer logs.FlushLogs()
if err := startKubelet(config); err != nil {
return err
}
if !config.DisableKubeProxy {
return startKubeProxy(config)
}
return nil
}
func startKubeProxy(cfg *config.Agent) error {
argsMap := map[string]string{
"proxy-mode": "iptables",
"healthz-bind-address": "127.0.0.1",
"kubeconfig": cfg.KubeConfigKubeProxy,
"cluster-cidr": cfg.ClusterCIDR.String(),
}
if cfg.NodeName != "" {
argsMap["hostname-override"] = cfg.NodeName
}
args := config.GetArgsList(argsMap, cfg.ExtraKubeProxyArgs)
logrus.Infof("Running kube-proxy %s", config.ArgString(args))
return executor.KubeProxy(args)
}
func startKubelet(cfg *config.Agent) error {
argsMap := map[string]string{
"healthz-bind-address": "127.0.0.1",
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
//"cgroup-root": "/k3s",
"cgroup-driver": "cgroupfs",
"authentication-token-webhook": "true",
"anonymous-auth": "false",
"authorization-mode": modes.ModeWebhook,
}
if cfg.PodManifests != "" && argsMap["pod-manifest-path"] == "" {
argsMap["pod-manifest-path"] = cfg.PodManifests
}
if err := os.MkdirAll(argsMap["pod-manifest-path"], 0755); err != nil {
logrus.Errorf("Failed to mkdir %s: %v", argsMap["pod-manifest-path"], err)
}
if cfg.RootDir != "" {
argsMap["root-dir"] = cfg.RootDir
argsMap["cert-dir"] = filepath.Join(cfg.RootDir, "pki")
argsMap["seccomp-profile-root"] = filepath.Join(cfg.RootDir, "seccomp")
}
if cfg.CNIConfDir != "" {
argsMap["cni-conf-dir"] = cfg.CNIConfDir
}
if cfg.CNIBinDir != "" {
argsMap["cni-bin-dir"] = cfg.CNIBinDir
}
if cfg.CNIPlugin {
argsMap["network-plugin"] = "cni"
}
if len(cfg.ClusterDNS) > 0 {
argsMap["cluster-dns"] = cfg.ClusterDNS.String()
}
if cfg.ResolvConf != "" {
argsMap["resolv-conf"] = cfg.ResolvConf
}
if cfg.RuntimeSocket != "" {
argsMap["container-runtime"] = "remote"
argsMap["container-runtime-endpoint"] = cfg.RuntimeSocket
argsMap["containerd"] = cfg.RuntimeSocket
argsMap["serialize-image-pulls"] = "false"
} else if cfg.PauseImage != "" {
argsMap["pod-infra-container-image"] = cfg.PauseImage
}
if cfg.ListenAddress != "" {
argsMap["address"] = cfg.ListenAddress
}
if cfg.ClientCA != "" {
argsMap["anonymous-auth"] = "false"
argsMap["client-ca-file"] = cfg.ClientCA
}
if cfg.ServingKubeletCert != "" && cfg.ServingKubeletKey != "" {
argsMap["tls-cert-file"] = cfg.ServingKubeletCert
argsMap["tls-private-key-file"] = cfg.ServingKubeletKey
}
if cfg.NodeName != "" {
argsMap["hostname-override"] = cfg.NodeName
}
defaultIP, err := net.ChooseHostInterface()
if err != nil || defaultIP.String() != cfg.NodeIP {
argsMap["node-ip"] = cfg.NodeIP
}
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := checkCgroups()
if !hasCFS {
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
}
if !hasPIDs {
logrus.Warn("Disabling pod PIDs limit feature due to missing cgroup pids support")
argsMap["cgroups-per-qos"] = "false"
argsMap["enforce-node-allocatable"] = ""
argsMap["feature-gates"] = addFeatureGate(argsMap["feature-gates"], "SupportPodPidsLimit=false")
}
if kubeletRoot != "" {
argsMap["kubelet-cgroups"] = kubeletRoot
}
if runtimeRoot != "" {
argsMap["runtime-cgroups"] = runtimeRoot
}
if system.RunningInUserNS() {
argsMap["feature-gates"] = addFeatureGate(argsMap["feature-gates"], "DevicePlugins=false")
}
argsMap["node-labels"] = strings.Join(cfg.NodeLabels, ",")
if len(cfg.NodeTaints) > 0 {
argsMap["register-with-taints"] = strings.Join(cfg.NodeTaints, ",")
}
if !cfg.DisableCCM {
argsMap["cloud-provider"] = "external"
}
if cfg.Rootless {
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
argsMap["cgroup-driver"] = "none"
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
argsMap["cgroups-per-qos"] = "false"
argsMap["enforce-node-allocatable"] = ""
}
if cfg.ProtectKernelDefaults {
argsMap["protect-kernel-defaults"] = "true"
}
args := config.GetArgsList(argsMap, cfg.ExtraKubeletArgs)
logrus.Infof("Running kubelet %s", config.ArgString(args))
return executor.Kubelet(args)
}
func addFeatureGate(current, new string) string {
if current == "" {
return new
}
return current + "," + new
}
func checkCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false
}
defer f.Close()
scan := bufio.NewScanner(f)
for scan.Scan() {
parts := strings.Split(scan.Text(), ":")
if len(parts) < 3 {
continue
}
systems := strings.Split(parts[1], ",")
for _, system := range systems {
if system == "pids" {
hasPIDs = true
} else if system == "cpu" {
p := filepath.Join("/sys/fs/cgroup", parts[1], parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
}
} else if system == "name=systemd" {
// If we detect that we are running under a `.scope` unit with systemd
// we can assume we are being directly invoked from the command line
// and thus need to set our kubelet root to something out of the context
// of `/user.slice` to ensure that `CPUAccounting` and `MemoryAccounting`
// are enabled, as they are generally disabled by default for `user.slice`
// Note that we are not setting the `runtimeRoot` as if we are running with
// `--docker`, we will inadvertently move the cgroup `dockerd` lives in
// which is not ideal and causes dockerd to become unmanageable by systemd.
last := parts[len(parts)-1]
i := strings.LastIndex(last, ".scope")
if i > 0 {
kubeletRoot = k3sCgroupRoot
}
}
}
}
if kubeletRoot == "" {
// Examine process ID 1 to see if there is a cgroup assigned to it.
// When we are not in a container, process 1 is likely to be systemd or some other service manager.
// It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html
// When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false
}
defer g.Close()
scan = bufio.NewScanner(g)
for scan.Scan() {
parts := strings.Split(scan.Text(), ":")
if len(parts) < 3 {
continue
}
systems := strings.Split(parts[1], ",")
for _, system := range systems {
if system == "name=systemd" {
last := parts[len(parts)-1]
if last != "/" && last != "/init.scope" {
kubeletRoot = k3sCgroupRoot
runtimeRoot = k3sCgroupRoot
}
}
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
}