mirror of
https://github.com/k3s-io/k3s.git
synced 2024-06-07 19:41:36 +00:00
Nvidia container runtime discovery in containerd config template (#3890)
* Update the default containerd config template with support for adding extra container runtimes. Add logic to discover nvidia container runtimes installed via the the gpu operator or package manager. Signed-off-by: Joe Kralicky <joe.kralicky@suse.com>
This commit is contained in:
parent
086ca8ba6a
commit
debb508643
@ -1,3 +1,4 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package containerd
|
||||
@ -57,6 +58,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
||||
DisableCgroup: disableCgroup,
|
||||
IsRunningInUserNS: isRunningInUserNS,
|
||||
PrivateRegistryConfig: privRegistries.Registry(),
|
||||
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),
|
||||
}
|
||||
|
||||
selEnabled, selConfigured, err := selinuxStatus()
|
||||
|
66
pkg/agent/containerd/nvidia.go
Normal file
66
pkg/agent/containerd/nvidia.go
Normal file
@ -0,0 +1,66 @@
|
||||
// +build linux
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/rancher/k3s/pkg/agent/templates"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// findNvidiaContainerRuntimes returns a list of nvidia container runtimes that
|
||||
// are available on the system. It checks install locations used by the nvidia
|
||||
// gpu operator and by system package managers. The gpu operator installation
|
||||
// takes precedence over the system package manager installation.
|
||||
// The given fs.FS should represent the filesystem root directory to search in.
|
||||
func findNvidiaContainerRuntimes(root fs.FS) map[string]templates.ContainerdRuntimeConfig {
|
||||
// Check these locations in order. The GPU operator's installation should
|
||||
// take precedence over the package manager's installation.
|
||||
locationsToCheck := []string{
|
||||
"usr/local/nvidia/toolkit", // Path when installing via GPU Operator
|
||||
"usr/bin", // Path when installing via package manager
|
||||
}
|
||||
|
||||
// Fill in the binary location with just the name of the binary,
|
||||
// and check against each of the possible locations. If a match is found,
|
||||
// set the location to the full path.
|
||||
potentialRuntimes := map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "nvidia-container-runtime",
|
||||
},
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "nvidia-container-runtime-experimental",
|
||||
},
|
||||
}
|
||||
foundRuntimes := map[string]templates.ContainerdRuntimeConfig{}
|
||||
RUNTIME:
|
||||
for runtimeName, runtimeConfig := range potentialRuntimes {
|
||||
for _, location := range locationsToCheck {
|
||||
binaryPath := filepath.Join(location, runtimeConfig.BinaryName)
|
||||
logrus.Debugf("Searching for %s container runtime at /%s", runtimeName, binaryPath)
|
||||
if info, err := fs.Stat(root, binaryPath); err == nil {
|
||||
if info.IsDir() {
|
||||
logrus.Debugf("Found %s container runtime at /%s, but it is a directory. Skipping.", runtimeName, binaryPath)
|
||||
continue
|
||||
}
|
||||
runtimeConfig.BinaryName = filepath.Join("/", binaryPath)
|
||||
logrus.Infof("Found %s container runtime at %s", runtimeName, runtimeConfig.BinaryName)
|
||||
foundRuntimes[runtimeName] = runtimeConfig
|
||||
// Skip to the next runtime to enforce precedence.
|
||||
continue RUNTIME
|
||||
} else {
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
logrus.Debugf("%s container runtime not found at /%s", runtimeName, binaryPath)
|
||||
} else {
|
||||
logrus.Errorf("Error searching for %s container runtime at /%s: %v", runtimeName, binaryPath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return foundRuntimes
|
||||
}
|
218
pkg/agent/containerd/nvidia_test.go
Normal file
218
pkg/agent/containerd/nvidia_test.go
Normal file
@ -0,0 +1,218 @@
|
||||
// +build linux
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"reflect"
|
||||
"testing"
|
||||
"testing/fstest"
|
||||
|
||||
"github.com/rancher/k3s/pkg/agent/templates"
|
||||
)
|
||||
|
||||
func Test_UnitFindNvidiaContainerRuntimes(t *testing.T) {
|
||||
executable := &fstest.MapFile{Mode: 0755}
|
||||
type args struct {
|
||||
root fs.FS
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want map[string]templates.ContainerdRuntimeConfig
|
||||
}{
|
||||
{
|
||||
name: "No runtimes",
|
||||
args: args{
|
||||
root: fstest.MapFS{},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{},
|
||||
},
|
||||
{
|
||||
name: "Nvidia runtime in /usr/bin",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Experimental runtime in /usr/local/nvidia/toolkit",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Two runtimes in separate directories",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime": executable,
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Experimental runtime in /usr/bin",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/bin/nvidia-container-runtime-experimental",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Same runtime in two directories",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Both runtimes in /usr/bin",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||
"usr/bin/nvidia-container-runtime": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||
},
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/bin/nvidia-container-runtime-experimental",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Both runtimes in both directories",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||
"usr/bin/nvidia-container-runtime": executable,
|
||||
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||
},
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Both runtimes in /usr/local/nvidia/toolkit",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||
},
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Both runtimes in /usr/bin and one duplicate in /usr/local/nvidia/toolkit",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime": executable,
|
||||
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||
},
|
||||
"nvidia-experimental": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Runtime is a directory",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime": &fstest.MapFile{
|
||||
Mode: fs.ModeDir,
|
||||
},
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{},
|
||||
},
|
||||
{
|
||||
name: "Runtime in both directories, but one is a directory",
|
||||
args: args{
|
||||
root: fstest.MapFS{
|
||||
"usr/bin/nvidia-container-runtime": executable,
|
||||
"usr/local/nvidia/toolkit/nvidia-container-runtime": &fstest.MapFile{
|
||||
Mode: fs.ModeDir,
|
||||
},
|
||||
},
|
||||
},
|
||||
want: map[string]templates.ContainerdRuntimeConfig{
|
||||
"nvidia": {
|
||||
RuntimeType: "io.containerd.runc.v2",
|
||||
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := findNvidiaContainerRuntimes(tt.args.root); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("findNvidiaContainerRuntimes() = %+v\nWant = %+v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -6,9 +6,15 @@ import (
|
||||
"github.com/rancher/k3s/pkg/daemons/config"
|
||||
)
|
||||
|
||||
type ContainerdRuntimeConfig struct {
|
||||
RuntimeType string
|
||||
BinaryName string
|
||||
}
|
||||
|
||||
type ContainerdConfig struct {
|
||||
NodeConfig *config.Node
|
||||
DisableCgroup bool
|
||||
IsRunningInUserNS bool
|
||||
PrivateRegistryConfig *registries.Registry
|
||||
ExtraRuntimes map[string]ContainerdRuntimeConfig
|
||||
}
|
||||
|
@ -112,6 +112,13 @@ enable_keychain = true
|
||||
{{end}}
|
||||
{{end}}
|
||||
{{end}}
|
||||
|
||||
{{range $k, $v := .ExtraRuntimes}}
|
||||
[plugins.cri.containerd.runtimes."{{$k}}"]
|
||||
runtime_type = "{{$v.RuntimeType}}"
|
||||
[plugins.cri.containerd.runtimes."{{$k}}".options]
|
||||
BinaryName = "{{$v.BinaryName}}"
|
||||
{{end}}
|
||||
`
|
||||
|
||||
func ParseTemplateFromConfig(templateBuffer string, config interface{}) (string, error) {
|
||||
|
Loading…
Reference in New Issue
Block a user