mirror of
https://github.com/k3s-io/k3s.git
synced 2024-06-07 19:41:36 +00:00
rootless: enable resource limitation (requires cgroup v2, systemd)
Now rootless mode can be used with cgroup v2 resource limitations. A pod is executed in a cgroup like "/user.slice/user-1001.slice/user@1001.service/k3s-rootless.service/kubepods/podd0eb6921-c81a-4214-b36c-d3b9bb212fac/63b5a253a1fd4627da16bfce9bec58d72144cf30fe833e0ca9a6d60ebf837475". This is accomplished by running `kubelet` in a cgroup namespace, and enabling `cgroupfs` driver for the cgroup hierarchy delegated by systemd. To enable cgroup v2 resource limitation, `k3s server --rootless` needs to be launched as `systemctl --user` service. Please see the comment lines in `k3s-rootless.service` for the usage. Running `k3s server --rootless` via a terminal is not supported. When it really needs to be launched via a terminal, `systemd-run --user -p Delegate --tty` needs to be prepended to create a systemd scope. Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
parent
11ef43011a
commit
6e8284e3d4
45
k3s-rootless.service
Normal file
45
k3s-rootless.service
Normal file
@ -0,0 +1,45 @@
|
||||
# systemd unit file for k3s (rootless)
|
||||
#
|
||||
# Usage:
|
||||
# - [Optional] Enable cgroup v2 delegation, see https://rootlesscontaine.rs/getting-started/common/cgroup2/ .
|
||||
# This step is optional, but highly recommended for enabling CPU and memory resource limtitation.
|
||||
#
|
||||
# - Copy this file as `~/.config/systemd/user/k3s-rootless.service`.
|
||||
# Installing this file as a system-wide service (`/etc/systemd/...`) is not supported.
|
||||
# Depending on the path of `k3s` binary, you might need to modify the `ExecStart=/usr/local/bin/k3s ...` line of this file.
|
||||
#
|
||||
# - Run `systemctl --user daemon-reload`
|
||||
#
|
||||
# - Run `systemctl --user enable --now k3s-rootless`
|
||||
#
|
||||
# - Run `KUBECONFIG=~/.kube/k3s.yaml kubectl get pods -A`, and make sure the pods are running.
|
||||
#
|
||||
# Troubleshooting:
|
||||
# - See `systemctl --user status k3s-rootless` to check the daemon status
|
||||
# - See `journalctl --user -f -u k3s-rootless` to see the daemon log
|
||||
# - See also https://rootlesscontaine.rs/
|
||||
|
||||
[Unit]
|
||||
Description=k3s (Rootless)
|
||||
|
||||
[Service]
|
||||
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
# NOTE: Don't try to run `k3s server --rootless` on a terminal, as it doesn't enable cgroup v2 delegation.
|
||||
# If you really need to try it on a terminal, prepend `systemd-run --user -p Delegate=yes --tty` to create a systemd scope.
|
||||
ExecStart=/usr/local/bin/k3s server --rootless
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
TimeoutSec=0
|
||||
RestartSec=2
|
||||
Restart=always
|
||||
StartLimitBurst=3
|
||||
StartLimitInterval=60s
|
||||
LimitNOFILE=infinity
|
||||
LimitNPROC=infinity
|
||||
LimitCORE=infinity
|
||||
TasksMax=infinity
|
||||
Delegate=yes
|
||||
Type=simple
|
||||
KillMode=mixed
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
@ -26,11 +26,13 @@ import (
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rancher/k3s/pkg/agent/templates"
|
||||
util2 "github.com/rancher/k3s/pkg/agent/util"
|
||||
"github.com/rancher/k3s/pkg/daemons/agent"
|
||||
"github.com/rancher/k3s/pkg/daemons/config"
|
||||
"github.com/rancher/k3s/pkg/untar"
|
||||
"github.com/rancher/k3s/pkg/version"
|
||||
"github.com/rancher/wrangler/pkg/merr"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
"google.golang.org/grpc"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||
@ -336,10 +338,21 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
isRunningInUserNS := system.RunningInUserNS()
|
||||
_, _, hasCFS, hasPIDs := agent.CheckCgroups()
|
||||
// "/sys/fs/cgroup" is namespaced
|
||||
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
||||
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
|
||||
if disableCgroup {
|
||||
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
|
||||
}
|
||||
|
||||
var containerdTemplate string
|
||||
containerdConfig := templates.ContainerdConfig{
|
||||
NodeConfig: cfg,
|
||||
IsRunningInUserNS: system.RunningInUserNS(),
|
||||
DisableCgroup: disableCgroup,
|
||||
IsRunningInUserNS: isRunningInUserNS,
|
||||
PrivateRegistryConfig: privRegistries,
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
|
||||
type ContainerdConfig struct {
|
||||
NodeConfig *config.Node
|
||||
DisableCgroup bool
|
||||
IsRunningInUserNS bool
|
||||
PrivateRegistryConfig *Registry
|
||||
}
|
||||
@ -22,8 +23,10 @@ const ContainerdConfigTemplate = `
|
||||
stream_server_port = "10010"
|
||||
enable_selinux = {{ .NodeConfig.SELinux }}
|
||||
|
||||
{{- if .IsRunningInUserNS }}
|
||||
{{- if .DisableCgroup}}
|
||||
disable_cgroup = true
|
||||
{{end}}
|
||||
{{- if .IsRunningInUserNS }}
|
||||
disable_apparmor = true
|
||||
restrict_oom_score_adj = true
|
||||
{{end}}
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
"github.com/rancher/k3s/pkg/daemons/executor"
|
||||
"github.com/rancher/k3s/pkg/version"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
"k8s.io/apimachinery/pkg/util/net"
|
||||
"k8s.io/component-base/logs"
|
||||
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
|
||||
@ -128,7 +129,7 @@ func startKubelet(cfg *config.Agent) error {
|
||||
if err != nil || defaultIP.String() != cfg.NodeIP {
|
||||
argsMap["node-ip"] = cfg.NodeIP
|
||||
}
|
||||
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := checkCgroups()
|
||||
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := CheckCgroups()
|
||||
if !hasCFS {
|
||||
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
|
||||
argsMap["cpu-cfs-quota"] = "false"
|
||||
@ -158,11 +159,20 @@ func startKubelet(cfg *config.Agent) error {
|
||||
}
|
||||
|
||||
if cfg.Rootless {
|
||||
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
|
||||
argsMap["cgroup-driver"] = "none"
|
||||
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
|
||||
argsMap["cgroups-per-qos"] = "false"
|
||||
argsMap["enforce-node-allocatable"] = ""
|
||||
// "/sys/fs/cgroup" is namespaced
|
||||
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
||||
if hasCFS && hasPIDs && cgroupfsWritable {
|
||||
logrus.Info("cgroup v2 controllers are delegated for rootless.")
|
||||
// cgroupfs v2, delegated for rootless by systemd
|
||||
argsMap["cgroup-driver"] = "cgroupfs"
|
||||
} else {
|
||||
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Setting cgroup driver to \"none\".")
|
||||
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
|
||||
argsMap["cgroup-driver"] = "none"
|
||||
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
|
||||
argsMap["cgroups-per-qos"] = "false"
|
||||
argsMap["enforce-node-allocatable"] = ""
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.ProtectKernelDefaults {
|
||||
@ -182,7 +192,7 @@ func addFeatureGate(current, new string) string {
|
||||
return current + "," + new
|
||||
}
|
||||
|
||||
func checkCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
||||
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
||||
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
|
||||
|
||||
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
|
||||
|
@ -8,8 +8,10 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rootless-containers/rootlesskit/pkg/child"
|
||||
"github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink"
|
||||
@ -17,12 +19,14 @@ import (
|
||||
"github.com/rootless-containers/rootlesskit/pkg/parent"
|
||||
portbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
pipeFD = "_K3S_ROOTLESS_FD"
|
||||
childEnv = "_K3S_ROOTLESS_SOCK"
|
||||
Sock = ""
|
||||
pipeFD = "_K3S_ROOTLESS_FD"
|
||||
childEnv = "_K3S_ROOTLESS_SOCK"
|
||||
evacuateCgroup2Env = "_K3S_ROOTLESS_EVACUATE_CGROUP2" // boolean
|
||||
Sock = ""
|
||||
)
|
||||
|
||||
func Rootless(stateDir string) error {
|
||||
@ -61,6 +65,9 @@ func Rootless(stateDir string) error {
|
||||
}
|
||||
|
||||
os.Setenv(childEnv, filepath.Join(parentOpt.StateDir, parent.StateFileAPISock))
|
||||
if parentOpt.EvacuateCgroup2 != "" {
|
||||
os.Setenv(evacuateCgroup2Env, "1")
|
||||
}
|
||||
if err := parent.Parent(*parentOpt); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
@ -128,8 +135,26 @@ func createParentOpt(stateDir string) (*parent.Opt, error) {
|
||||
}
|
||||
|
||||
opt := &parent.Opt{
|
||||
StateDir: stateDir,
|
||||
CreatePIDNS: true,
|
||||
StateDir: stateDir,
|
||||
CreatePIDNS: true,
|
||||
CreateCgroupNS: true,
|
||||
CreateUTSNS: true,
|
||||
CreateIPCNS: true,
|
||||
}
|
||||
|
||||
selfCgroupMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if selfCgroup2 := selfCgroupMap[""]; selfCgroup2 == "" {
|
||||
logrus.Warnf("enabling cgroup2 is highly recommended, see https://rootlesscontaine.rs/getting-started/common/cgroup2/")
|
||||
} else {
|
||||
selfCgroup2Dir := filepath.Join("/sys/fs/cgroup", selfCgroup2)
|
||||
if unix.Access(selfCgroup2Dir, unix.W_OK) == nil {
|
||||
opt.EvacuateCgroup2 = "k3s_evac"
|
||||
} else {
|
||||
logrus.Warn("cannot set cgroup2 evacuation, make sure to run k3s as a systemd unit")
|
||||
}
|
||||
}
|
||||
|
||||
mtu := 0
|
||||
@ -177,5 +202,12 @@ func createChildOpt() (*child.Opt, error) {
|
||||
opt.CopyUpDriver = tmpfssymlink.NewChildDriver()
|
||||
opt.MountProcfs = true
|
||||
opt.Reaper = true
|
||||
if v := os.Getenv(evacuateCgroup2Env); v != "" {
|
||||
var err error
|
||||
opt.EvacuateCgroup2, err = strconv.ParseBool(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return opt, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user