mirror of
https://github.com/k3s-io/k3s.git
synced 2024-06-07 19:41:36 +00:00
Nvidia container runtime discovery in containerd config template (#3890)
* Update the default containerd config template with support for adding extra container runtimes. Add logic to discover nvidia container runtimes installed via the the gpu operator or package manager. Signed-off-by: Joe Kralicky <joe.kralicky@suse.com>
This commit is contained in:
parent
086ca8ba6a
commit
debb508643
@ -1,3 +1,4 @@
|
|||||||
|
//go:build linux
|
||||||
// +build linux
|
// +build linux
|
||||||
|
|
||||||
package containerd
|
package containerd
|
||||||
@ -57,6 +58,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
|||||||
DisableCgroup: disableCgroup,
|
DisableCgroup: disableCgroup,
|
||||||
IsRunningInUserNS: isRunningInUserNS,
|
IsRunningInUserNS: isRunningInUserNS,
|
||||||
PrivateRegistryConfig: privRegistries.Registry(),
|
PrivateRegistryConfig: privRegistries.Registry(),
|
||||||
|
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),
|
||||||
}
|
}
|
||||||
|
|
||||||
selEnabled, selConfigured, err := selinuxStatus()
|
selEnabled, selConfigured, err := selinuxStatus()
|
||||||
|
66
pkg/agent/containerd/nvidia.go
Normal file
66
pkg/agent/containerd/nvidia.go
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// +build linux
|
||||||
|
|
||||||
|
package containerd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io/fs"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/rancher/k3s/pkg/agent/templates"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// findNvidiaContainerRuntimes returns a list of nvidia container runtimes that
|
||||||
|
// are available on the system. It checks install locations used by the nvidia
|
||||||
|
// gpu operator and by system package managers. The gpu operator installation
|
||||||
|
// takes precedence over the system package manager installation.
|
||||||
|
// The given fs.FS should represent the filesystem root directory to search in.
|
||||||
|
func findNvidiaContainerRuntimes(root fs.FS) map[string]templates.ContainerdRuntimeConfig {
|
||||||
|
// Check these locations in order. The GPU operator's installation should
|
||||||
|
// take precedence over the package manager's installation.
|
||||||
|
locationsToCheck := []string{
|
||||||
|
"usr/local/nvidia/toolkit", // Path when installing via GPU Operator
|
||||||
|
"usr/bin", // Path when installing via package manager
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill in the binary location with just the name of the binary,
|
||||||
|
// and check against each of the possible locations. If a match is found,
|
||||||
|
// set the location to the full path.
|
||||||
|
potentialRuntimes := map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
foundRuntimes := map[string]templates.ContainerdRuntimeConfig{}
|
||||||
|
RUNTIME:
|
||||||
|
for runtimeName, runtimeConfig := range potentialRuntimes {
|
||||||
|
for _, location := range locationsToCheck {
|
||||||
|
binaryPath := filepath.Join(location, runtimeConfig.BinaryName)
|
||||||
|
logrus.Debugf("Searching for %s container runtime at /%s", runtimeName, binaryPath)
|
||||||
|
if info, err := fs.Stat(root, binaryPath); err == nil {
|
||||||
|
if info.IsDir() {
|
||||||
|
logrus.Debugf("Found %s container runtime at /%s, but it is a directory. Skipping.", runtimeName, binaryPath)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
runtimeConfig.BinaryName = filepath.Join("/", binaryPath)
|
||||||
|
logrus.Infof("Found %s container runtime at %s", runtimeName, runtimeConfig.BinaryName)
|
||||||
|
foundRuntimes[runtimeName] = runtimeConfig
|
||||||
|
// Skip to the next runtime to enforce precedence.
|
||||||
|
continue RUNTIME
|
||||||
|
} else {
|
||||||
|
if errors.Is(err, fs.ErrNotExist) {
|
||||||
|
logrus.Debugf("%s container runtime not found at /%s", runtimeName, binaryPath)
|
||||||
|
} else {
|
||||||
|
logrus.Errorf("Error searching for %s container runtime at /%s: %v", runtimeName, binaryPath, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return foundRuntimes
|
||||||
|
}
|
218
pkg/agent/containerd/nvidia_test.go
Normal file
218
pkg/agent/containerd/nvidia_test.go
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
// +build linux
|
||||||
|
|
||||||
|
package containerd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/fs"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
"testing/fstest"
|
||||||
|
|
||||||
|
"github.com/rancher/k3s/pkg/agent/templates"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_UnitFindNvidiaContainerRuntimes(t *testing.T) {
|
||||||
|
executable := &fstest.MapFile{Mode: 0755}
|
||||||
|
type args struct {
|
||||||
|
root fs.FS
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args args
|
||||||
|
want map[string]templates.ContainerdRuntimeConfig
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "No runtimes",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Nvidia runtime in /usr/bin",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Experimental runtime in /usr/local/nvidia/toolkit",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Two runtimes in separate directories",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime": executable,
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Experimental runtime in /usr/bin",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/bin/nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Same runtime in two directories",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Both runtimes in /usr/bin",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||||
|
"usr/bin/nvidia-container-runtime": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/bin/nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Both runtimes in both directories",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||||
|
"usr/bin/nvidia-container-runtime": executable,
|
||||||
|
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Both runtimes in /usr/local/nvidia/toolkit",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Both runtimes in /usr/bin and one duplicate in /usr/local/nvidia/toolkit",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime": executable,
|
||||||
|
"usr/bin/nvidia-container-runtime-experimental": executable,
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
"nvidia-experimental": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Runtime is a directory",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime": &fstest.MapFile{
|
||||||
|
Mode: fs.ModeDir,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Runtime in both directories, but one is a directory",
|
||||||
|
args: args{
|
||||||
|
root: fstest.MapFS{
|
||||||
|
"usr/bin/nvidia-container-runtime": executable,
|
||||||
|
"usr/local/nvidia/toolkit/nvidia-container-runtime": &fstest.MapFile{
|
||||||
|
Mode: fs.ModeDir,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: map[string]templates.ContainerdRuntimeConfig{
|
||||||
|
"nvidia": {
|
||||||
|
RuntimeType: "io.containerd.runc.v2",
|
||||||
|
BinaryName: "/usr/bin/nvidia-container-runtime",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := findNvidiaContainerRuntimes(tt.args.root); !reflect.DeepEqual(got, tt.want) {
|
||||||
|
t.Errorf("findNvidiaContainerRuntimes() = %+v\nWant = %+v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -6,9 +6,15 @@ import (
|
|||||||
"github.com/rancher/k3s/pkg/daemons/config"
|
"github.com/rancher/k3s/pkg/daemons/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ContainerdRuntimeConfig struct {
|
||||||
|
RuntimeType string
|
||||||
|
BinaryName string
|
||||||
|
}
|
||||||
|
|
||||||
type ContainerdConfig struct {
|
type ContainerdConfig struct {
|
||||||
NodeConfig *config.Node
|
NodeConfig *config.Node
|
||||||
DisableCgroup bool
|
DisableCgroup bool
|
||||||
IsRunningInUserNS bool
|
IsRunningInUserNS bool
|
||||||
PrivateRegistryConfig *registries.Registry
|
PrivateRegistryConfig *registries.Registry
|
||||||
|
ExtraRuntimes map[string]ContainerdRuntimeConfig
|
||||||
}
|
}
|
||||||
|
@ -112,6 +112,13 @@ enable_keychain = true
|
|||||||
{{end}}
|
{{end}}
|
||||||
{{end}}
|
{{end}}
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
||||||
|
{{range $k, $v := .ExtraRuntimes}}
|
||||||
|
[plugins.cri.containerd.runtimes."{{$k}}"]
|
||||||
|
runtime_type = "{{$v.RuntimeType}}"
|
||||||
|
[plugins.cri.containerd.runtimes."{{$k}}".options]
|
||||||
|
BinaryName = "{{$v.BinaryName}}"
|
||||||
|
{{end}}
|
||||||
`
|
`
|
||||||
|
|
||||||
func ParseTemplateFromConfig(templateBuffer string, config interface{}) (string, error) {
|
func ParseTemplateFromConfig(templateBuffer string, config interface{}) (string, error) {
|
||||||
|
Loading…
Reference in New Issue
Block a user