Nvidia container runtime discovery in containerd config template (#3890)

* Update the default containerd config template with support for adding extra container runtimes. Add logic to discover nvidia container runtimes installed via the the gpu operator or package manager.

Signed-off-by: Joe Kralicky <joe.kralicky@suse.com>
This commit is contained in:
Joe Kralicky 2021-09-15 17:31:11 -04:00 committed by GitHub
parent 086ca8ba6a
commit debb508643
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 299 additions and 0 deletions

View File

@ -1,3 +1,4 @@
//go:build linux
// +build linux
package containerd
@ -57,6 +58,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
DisableCgroup: disableCgroup,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries.Registry(),
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),
}
selEnabled, selConfigured, err := selinuxStatus()

View File

@ -0,0 +1,66 @@
// +build linux
package containerd
import (
"errors"
"io/fs"
"path/filepath"
"github.com/rancher/k3s/pkg/agent/templates"
"github.com/sirupsen/logrus"
)
// findNvidiaContainerRuntimes returns a list of nvidia container runtimes that
// are available on the system. It checks install locations used by the nvidia
// gpu operator and by system package managers. The gpu operator installation
// takes precedence over the system package manager installation.
// The given fs.FS should represent the filesystem root directory to search in.
func findNvidiaContainerRuntimes(root fs.FS) map[string]templates.ContainerdRuntimeConfig {
// Check these locations in order. The GPU operator's installation should
// take precedence over the package manager's installation.
locationsToCheck := []string{
"usr/local/nvidia/toolkit", // Path when installing via GPU Operator
"usr/bin", // Path when installing via package manager
}
// Fill in the binary location with just the name of the binary,
// and check against each of the possible locations. If a match is found,
// set the location to the full path.
potentialRuntimes := map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "nvidia-container-runtime",
},
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "nvidia-container-runtime-experimental",
},
}
foundRuntimes := map[string]templates.ContainerdRuntimeConfig{}
RUNTIME:
for runtimeName, runtimeConfig := range potentialRuntimes {
for _, location := range locationsToCheck {
binaryPath := filepath.Join(location, runtimeConfig.BinaryName)
logrus.Debugf("Searching for %s container runtime at /%s", runtimeName, binaryPath)
if info, err := fs.Stat(root, binaryPath); err == nil {
if info.IsDir() {
logrus.Debugf("Found %s container runtime at /%s, but it is a directory. Skipping.", runtimeName, binaryPath)
continue
}
runtimeConfig.BinaryName = filepath.Join("/", binaryPath)
logrus.Infof("Found %s container runtime at %s", runtimeName, runtimeConfig.BinaryName)
foundRuntimes[runtimeName] = runtimeConfig
// Skip to the next runtime to enforce precedence.
continue RUNTIME
} else {
if errors.Is(err, fs.ErrNotExist) {
logrus.Debugf("%s container runtime not found at /%s", runtimeName, binaryPath)
} else {
logrus.Errorf("Error searching for %s container runtime at /%s: %v", runtimeName, binaryPath, err)
}
}
}
}
return foundRuntimes
}

View File

@ -0,0 +1,218 @@
// +build linux
package containerd
import (
"io/fs"
"reflect"
"testing"
"testing/fstest"
"github.com/rancher/k3s/pkg/agent/templates"
)
func Test_UnitFindNvidiaContainerRuntimes(t *testing.T) {
executable := &fstest.MapFile{Mode: 0755}
type args struct {
root fs.FS
}
tests := []struct {
name string
args args
want map[string]templates.ContainerdRuntimeConfig
}{
{
name: "No runtimes",
args: args{
root: fstest.MapFS{},
},
want: map[string]templates.ContainerdRuntimeConfig{},
},
{
name: "Nvidia runtime in /usr/bin",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/bin/nvidia-container-runtime",
},
},
},
{
name: "Experimental runtime in /usr/local/nvidia/toolkit",
args: args{
root: fstest.MapFS{
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
},
},
},
{
name: "Two runtimes in separate directories",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime": executable,
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
},
},
},
{
name: "Experimental runtime in /usr/bin",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime-experimental": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/bin/nvidia-container-runtime-experimental",
},
},
},
{
name: "Same runtime in two directories",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime-experimental": executable,
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
},
},
},
{
name: "Both runtimes in /usr/bin",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime-experimental": executable,
"usr/bin/nvidia-container-runtime": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/bin/nvidia-container-runtime",
},
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/bin/nvidia-container-runtime-experimental",
},
},
},
{
name: "Both runtimes in both directories",
args: args{
root: fstest.MapFS{
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
"usr/bin/nvidia-container-runtime": executable,
"usr/bin/nvidia-container-runtime-experimental": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
},
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
},
},
},
{
name: "Both runtimes in /usr/local/nvidia/toolkit",
args: args{
root: fstest.MapFS{
"usr/local/nvidia/toolkit/nvidia-container-runtime": executable,
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime",
},
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
},
},
},
{
name: "Both runtimes in /usr/bin and one duplicate in /usr/local/nvidia/toolkit",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime": executable,
"usr/bin/nvidia-container-runtime-experimental": executable,
"usr/local/nvidia/toolkit/nvidia-container-runtime-experimental": executable,
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/bin/nvidia-container-runtime",
},
"nvidia-experimental": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/local/nvidia/toolkit/nvidia-container-runtime-experimental",
},
},
},
{
name: "Runtime is a directory",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime": &fstest.MapFile{
Mode: fs.ModeDir,
},
},
},
want: map[string]templates.ContainerdRuntimeConfig{},
},
{
name: "Runtime in both directories, but one is a directory",
args: args{
root: fstest.MapFS{
"usr/bin/nvidia-container-runtime": executable,
"usr/local/nvidia/toolkit/nvidia-container-runtime": &fstest.MapFile{
Mode: fs.ModeDir,
},
},
},
want: map[string]templates.ContainerdRuntimeConfig{
"nvidia": {
RuntimeType: "io.containerd.runc.v2",
BinaryName: "/usr/bin/nvidia-container-runtime",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := findNvidiaContainerRuntimes(tt.args.root); !reflect.DeepEqual(got, tt.want) {
t.Errorf("findNvidiaContainerRuntimes() = %+v\nWant = %+v", got, tt.want)
}
})
}
}

View File

@ -6,9 +6,15 @@ import (
"github.com/rancher/k3s/pkg/daemons/config"
)
type ContainerdRuntimeConfig struct {
RuntimeType string
BinaryName string
}
type ContainerdConfig struct {
NodeConfig *config.Node
DisableCgroup bool
IsRunningInUserNS bool
PrivateRegistryConfig *registries.Registry
ExtraRuntimes map[string]ContainerdRuntimeConfig
}

View File

@ -112,6 +112,13 @@ enable_keychain = true
{{end}}
{{end}}
{{end}}
{{range $k, $v := .ExtraRuntimes}}
[plugins.cri.containerd.runtimes."{{$k}}"]
runtime_type = "{{$v.RuntimeType}}"
[plugins.cri.containerd.runtimes."{{$k}}".options]
BinaryName = "{{$v.BinaryName}}"
{{end}}
`
func ParseTemplateFromConfig(templateBuffer string, config interface{}) (string, error) {