-
Notifications
You must be signed in to change notification settings - Fork 353
Make CDI device requests consistent with other methods #1132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6849ebd
426186c
f17d424
dc87dcf
1bc2a9f
8650ca6
8be03cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,10 +42,12 @@ const ( | |
type CUDA struct { | ||
logger logger.Interface | ||
|
||
annotations map[string]string | ||
env map[string]string | ||
isPrivileged bool | ||
mounts []specs.Mount | ||
|
||
annotationsPrefixes []string | ||
acceptDeviceListAsVolumeMounts bool | ||
acceptEnvvarUnprivileged bool | ||
preferredVisibleDeviceEnvVars []string | ||
|
@@ -54,12 +56,17 @@ type CUDA struct { | |
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec. | ||
// The process environment is read (if present) to construc the CUDA Image. | ||
func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) { | ||
if spec == nil { | ||
return New(opts...) | ||
} | ||
|
||
var env []string | ||
if spec != nil && spec.Process != nil { | ||
if spec.Process != nil { | ||
env = spec.Process.Env | ||
} | ||
|
||
specOpts := []Option{ | ||
WithAnnotations(spec.Annotations), | ||
WithEnv(env), | ||
WithMounts(spec.Mounts), | ||
WithPrivileged(IsPrivileged((*OCISpec)(spec))), | ||
|
@@ -95,6 +102,10 @@ func (i CUDA) IsLegacy() bool { | |
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 | ||
} | ||
|
||
func (i CUDA) IsPrivileged() bool { | ||
return i.isPrivileged | ||
} | ||
|
||
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment | ||
// variables. | ||
func (i CUDA) GetRequirements() ([]string, error) { | ||
|
@@ -212,19 +223,12 @@ func parseMajorMinorVersion(version string) (string, error) { | |
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/ | ||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { | ||
var hasCDIdevice bool | ||
for _, device := range i.VisibleDevicesFromEnvVar() { | ||
for _, device := range i.VisibleDevices() { | ||
if !parser.IsQualifiedName(device) { | ||
return false | ||
} | ||
hasCDIdevice = true | ||
} | ||
|
||
for _, device := range i.DevicesFromMounts() { | ||
if !strings.HasPrefix(device, "cdi/") { | ||
return false | ||
} | ||
hasCDIdevice = true | ||
} | ||
return hasCDIdevice | ||
} | ||
|
||
|
@@ -234,6 +238,12 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { | |
// In cases where environment variable requests required privileged containers, | ||
// such devices requests are ignored. | ||
func (i CUDA) VisibleDevices() []string { | ||
// If annotation device requests are present, these are preferred. | ||
annotationDeviceRequests := i.cdiDeviceRequestsFromAnnotations() | ||
if len(annotationDeviceRequests) > 0 { | ||
return annotationDeviceRequests | ||
} | ||
|
||
// If enabled, try and get the device list from volume mounts first | ||
if i.acceptDeviceListAsVolumeMounts { | ||
volumeMountDeviceRequests := i.visibleDevicesFromMounts() | ||
|
@@ -260,6 +270,31 @@ func (i CUDA) VisibleDevices() []string { | |
return nil | ||
} | ||
|
||
// cdiDeviceRequestsFromAnnotations returns a list of devices specified in the | ||
// annotations. | ||
// Keys starting with the specified prefixes are considered and expected to | ||
// contain a comma-separated list of fully-qualified CDI devices names. | ||
// The format of the requested devices is not checked and the list is not | ||
// deduplicated. | ||
func (i CUDA) cdiDeviceRequestsFromAnnotations() []string { | ||
if len(i.annotationsPrefixes) == 0 || len(i.annotations) == 0 { | ||
return nil | ||
} | ||
|
||
var devices []string | ||
for key, value := range i.annotations { | ||
for _, prefix := range i.annotationsPrefixes { | ||
if strings.HasPrefix(key, prefix) { | ||
devices = append(devices, strings.Split(value, ",")...) | ||
// There is no need to check additional prefixes since we | ||
// typically deduplicate devices in any case. | ||
break | ||
} | ||
} | ||
} | ||
return devices | ||
} | ||
|
||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through environment variables. | ||
// If any of the preferredVisibleDeviceEnvVars are present in the image, they | ||
// are used to determine the visible devices. If this is not the case, the | ||
|
@@ -276,20 +311,27 @@ func (i CUDA) VisibleDevicesFromEnvVar() []string { | |
// visibleDevicesFromMounts returns the set of visible devices requested as mounts. | ||
func (i CUDA) visibleDevicesFromMounts() []string { | ||
var devices []string | ||
for _, device := range i.DevicesFromMounts() { | ||
for _, device := range i.requestsFromMounts() { | ||
switch { | ||
case strings.HasPrefix(device, volumeMountDevicePrefixCDI): | ||
continue | ||
case strings.HasPrefix(device, volumeMountDevicePrefixImex): | ||
continue | ||
case strings.HasPrefix(device, volumeMountDevicePrefixCDI): | ||
name, err := cdiDeviceMountRequest(device).qualifiedName() | ||
if err != nil { | ||
i.logger.Warningf("Ignoring invalid mount request for CDI device %v: %v", device, err) | ||
continue | ||
} | ||
devices = append(devices, name) | ||
default: | ||
devices = append(devices, device) | ||
} | ||
devices = append(devices, device) | ||
|
||
} | ||
return devices | ||
} | ||
|
||
// DevicesFromMounts returns a list of device specified as mounts. | ||
func (i CUDA) DevicesFromMounts() []string { | ||
// requestsFromMounts returns a list of device specified as mounts. | ||
func (i CUDA) requestsFromMounts() []string { | ||
root := filepath.Clean(DeviceListAsVolumeMountsRoot) | ||
seen := make(map[string]bool) | ||
var devices []string | ||
|
@@ -321,23 +363,30 @@ func (i CUDA) DevicesFromMounts() []string { | |
return devices | ||
} | ||
|
||
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image. | ||
func (i CUDA) CDIDevicesFromMounts() []string { | ||
var devices []string | ||
for _, mountDevice := range i.DevicesFromMounts() { | ||
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixCDI) { | ||
continue | ||
} | ||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, volumeMountDevicePrefixCDI), "/", 3) | ||
if len(parts) != 3 { | ||
continue | ||
} | ||
vendor := parts[0] | ||
class := parts[1] | ||
device := parts[2] | ||
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device)) | ||
// a cdiDeviceMountRequest represents a CDI device requests as a mount. | ||
// Here the host path /dev/null is mounted to a particular path in the container. | ||
// The container path has the form: | ||
// /var/run/nvidia-container-devices/cdi/<vendor>/<class>/<device> | ||
// or | ||
// /var/run/nvidia-container-devices/cdi/<vendor>/<class>=<device> | ||
type cdiDeviceMountRequest string | ||
|
||
// qualifiedName returns the fully-qualified name of the CDI device. | ||
func (m cdiDeviceMountRequest) qualifiedName() (string, error) { | ||
if !strings.HasPrefix(string(m), volumeMountDevicePrefixCDI) { | ||
return "", fmt.Errorf("invalid mount CDI device request: %s", m) | ||
} | ||
return devices | ||
|
||
requestedDevice := strings.TrimPrefix(string(m), volumeMountDevicePrefixCDI) | ||
if parser.IsQualifiedName(requestedDevice) { | ||
return requestedDevice, nil | ||
} | ||
|
||
parts := strings.SplitN(requestedDevice, "/", 3) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this trying to parse vendor, class, device from
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, and the argument 3 probably means "at most two splits", i.e. max three tokens. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, a mounted CDI device will have the path From the
|
||
if len(parts) != 3 { | ||
return "", fmt.Errorf("invalid mount CDI device request: %s", m) | ||
} | ||
return fmt.Sprintf("%s/%s=%s", parts[0], parts[1], parts[2]), nil | ||
} | ||
|
||
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image. | ||
|
@@ -352,7 +401,7 @@ func (i CUDA) ImexChannelsFromEnvVar() []string { | |
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image. | ||
func (i CUDA) ImexChannelsFromMounts() []string { | ||
var channels []string | ||
for _, mountDevice := range i.DevicesFromMounts() { | ||
for _, mountDevice := range i.requestsFromMounts() { | ||
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) { | ||
continue | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I renamed this function since we're returning all requests including:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: DevicesFromMounts has been renamed and turned private to the config pkg
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. It was not used outside this package.