Skip to content

Commit

Permalink
Add git model repo / git image puller logic for nim cache
Browse files Browse the repository at this point in the history
  • Loading branch information
shil committed Aug 5, 2024
1 parent 9033166 commit f84b0f2
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 0 deletions.
15 changes: 15 additions & 0 deletions api/v1alpha1/nimcache_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ type NIMSource struct {
// NGCSource represents models stored in NGC
NGC *NGCSource `json:"ngc,omitempty"`

// GITSource represents models stored in NGC
GIT *GITSource `json:"git,omitempty"`

// NGCSource represents models stored in NVIDIA DataStore service
DataStore *DataStoreSource `json:"dataStore,omitempty"`
}
Expand All @@ -63,6 +66,18 @@ type NGCSource struct {
Model ModelSpec `json:"model,omitempty"`
}

// GITSource references a model stored on NVIDIA NGC
type GITSource struct {
// The name of an existing pull secret containing the NGC_API_KEY
AuthSecret string `json:"authSecret"`
// ModelPuller is the container image that can pull the model
ModelPuller string `json:"modelPuller"`
// PullSecret to pull the model puller image
PullSecret string `json:"pullSecret,omitempty"`
// Model spec for caching
Model ModelSpec `json:"model,omitempty"`
}

// ModelSpec is the spec required to cache selected models
type ModelSpec struct {
// Profiles are the specific model profiles to cache
Expand Down
69 changes: 69 additions & 0 deletions internal/controller/nimcache_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,75 @@ func constructJob(nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) {
job.Spec.Template.Spec.Containers[0].Args = []string{"--profiles"}
job.Spec.Template.Spec.Containers[0].Args = append(job.Spec.Template.Spec.Containers[0].Args, selectedProfiles...)
}
} else if nimCache.Spec.Source.GIT != nil {
job.Spec.Template.Spec.Containers = []corev1.Container{
{
Name: "nim-cache",
Image: nimCache.Spec.Source.GIT.ModelPuller,
// TODO: finalize standard image / command line / config
// to download model from git
Command: []string{"tdb-git-download-to-cache"},
EnvFrom: nimCache.Spec.Source.EnvFromSecrets(),
Env: []corev1.EnvVar{
{
Name: "HF_HOME",
Value: "/model-store", // Need to be set to a writable directory by non-root user
},
{
Name: "NIM_CACHE_PATH", // Note: in the download mode, NIM_CACHE_PATH is not used
Value: "/model-store",
},
{
Name: "NGC_HOME", // Note: NGC_HOME is required and handled as NIM_CACHE_PATH in the download mode
Value: "/model-store",
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "nim-cache-volume",
MountPath: "/model-store",
},
},
Resources: corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]apiResource.Quantity{
"cpu": nimCache.Spec.Resources.CPU,
"memory": nimCache.Spec.Resources.Memory,
"nvidia.com/gpu": *apiResource.NewQuantity(int64(nimCache.Spec.Resources.GPUs), apiResource.DecimalExponent),
},
Requests: map[corev1.ResourceName]apiResource.Quantity{
"cpu": nimCache.Spec.Resources.CPU,
"memory": nimCache.Spec.Resources.Memory,
"nvidia.com/gpu": *apiResource.NewQuantity(int64(nimCache.Spec.Resources.GPUs), apiResource.DecimalExponent),
},
},
TerminationMessagePath: "/dev/termination-log",
TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: ptr.To[bool](false),
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
RunAsNonRoot: ptr.To[bool](true),
RunAsGroup: ptr.To[int64](2000),
RunAsUser: ptr.To[int64](1000),
},
},
}
job.Spec.Template.Spec.ImagePullSecrets = []corev1.LocalObjectReference{
{
Name: nimCache.Spec.Source.GIT.PullSecret,
},
}
// Pass specific profiles to download based on user selection or auto-selection
// TODO: See if the logic applies to git model repo
selectedProfiles, err := getSelectedProfiles(nimCache)
if err != nil {
return nil, err
}
if selectedProfiles != nil {
job.Spec.Template.Spec.Containers[0].Args = []string{"--profiles"}
job.Spec.Template.Spec.Containers[0].Args = append(job.Spec.Template.Spec.Containers[0].Args, selectedProfiles...)
}
}
return job, nil
}
Expand Down

0 comments on commit f84b0f2

Please sign in to comment.