Skip to content
This repository has been archived by the owner on May 25, 2023. It is now read-only.

Automated cherry pick of #638: Take init containers into account when getting pod resource #645: Order task by CreationTimestamp first, then by UID #647: In allocate, skip adding Job if its queue is not found #680

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions pkg/scheduler/actions/allocate/allocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,16 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
jobsMap := map[api.QueueID]*util.PriorityQueue{}

for _, job := range ssn.Jobs {
if _, found := jobsMap[job.Queue]; !found {
jobsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn)
}

if queue, found := ssn.Queues[job.Queue]; found {
queues.Push(queue)
} else {
glog.Warningf("Skip adding Job <%s/%s> because its queue %s is not found",
job.Namespace, job.Name, job.Queue)
continue
}

if _, found := jobsMap[job.Queue]; !found {
jobsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn)
}

glog.V(4).Infof("Added Job <%s/%s> into Queue <%s>", job.Namespace, job.Name, job.Queue)
Expand Down Expand Up @@ -143,7 +147,7 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
selectedNodes := util.SelectBestNode(nodeScores)
for _, node := range selectedNodes {
// Allocate idle resource to the task.
if task.Resreq.LessEqual(node.Idle) {
if task.InitResreq.LessEqual(node.Idle) {
glog.V(3).Infof("Binding Task <%v/%v> to node <%v>",
task.Namespace, task.Name, node.Name)
if err := ssn.Allocate(task, node.Name); err != nil {
Expand All @@ -162,9 +166,9 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
}

// Allocate releasing resource to the task if any.
if task.Resreq.LessEqual(node.Releasing) {
if task.InitResreq.LessEqual(node.Releasing) {
glog.V(3).Infof("Pipelining Task <%v/%v> to node <%v> for <%v> on <%v>",
task.Namespace, task.Name, node.Name, task.Resreq, node.Releasing)
task.Namespace, task.Name, node.Name, task.InitResreq, node.Releasing)
if err := ssn.Pipeline(task, node.Name); err != nil {
glog.Errorf("Failed to pipeline Task %v on %v in Session %v",
task.UID, node.Name, ssn.UID)
Expand Down
2 changes: 1 addition & 1 deletion pkg/scheduler/actions/backfill/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func (alloc *backfillAction) Execute(ssn *framework.Session) {
// TODO (k82cn): When backfill, it's also need to balance between Queues.
for _, job := range ssn.Jobs {
for _, task := range job.TaskStatusIndex[api.Pending] {
if task.Resreq.IsEmpty() {
if task.InitResreq.IsEmpty() {
// As task did not request resources, so it only need to meet predicates.
// TODO (k82cn): need to prioritize nodes to avoid pod hole.
for _, node := range ssn.Nodes {
Expand Down
6 changes: 3 additions & 3 deletions pkg/scheduler/actions/preempt/preempt.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func preempt(

var preemptees []*api.TaskInfo
preempted := api.EmptyResource()
resreq := preemptor.Resreq.Clone()
resreq := preemptor.InitResreq.Clone()

for _, task := range node.Tasks {
if filter == nil {
Expand Down Expand Up @@ -239,9 +239,9 @@ func preempt(

metrics.RegisterPreemptionAttempts()
glog.V(3).Infof("Preempted <%v> for task <%s/%s> requested <%v>.",
preempted, preemptor.Namespace, preemptor.Name, preemptor.Resreq)
preempted, preemptor.Namespace, preemptor.Name, preemptor.InitResreq)

if preemptor.Resreq.LessEqual(preempted) {
if preemptor.InitResreq.LessEqual(preempted) {
if err := stmt.Pipeline(preemptor, node.Name); err != nil {
glog.Errorf("Failed to pipline Task <%s/%s> on Node <%s>",
preemptor.Namespace, preemptor.Name, node.Name)
Expand Down
7 changes: 3 additions & 4 deletions pkg/scheduler/actions/reclaim/reclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,13 @@ func (alloc *reclaimAction) Execute(ssn *framework.Session) {
}

assigned := false

for _, n := range ssn.Nodes {
// If predicates failed, next node.
if err := ssn.PredicateFn(task, n); err != nil {
continue
}

resreq := task.Resreq.Clone()
resreq := task.InitResreq.Clone()
reclaimed := api.EmptyResource()

glog.V(3).Infof("Considering Task <%s/%s> on Node <%s>.",
Expand Down Expand Up @@ -172,9 +171,9 @@ func (alloc *reclaimAction) Execute(ssn *framework.Session) {
}

glog.V(3).Infof("Reclaimed <%v> for task <%s/%s> requested <%v>.",
reclaimed, task.Namespace, task.Name, task.Resreq)
reclaimed, task.Namespace, task.Name, task.InitResreq)

if task.Resreq.LessEqual(reclaimed) {
if task.InitResreq.LessEqual(reclaimed) {
if err := ssn.Pipeline(task, n.Name); err != nil {
glog.Errorf("Failed to pipline Task <%s/%s> on Node <%s>",
task.Namespace, task.Name, n.Name)
Expand Down
31 changes: 16 additions & 15 deletions pkg/scheduler/api/job_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ type TaskInfo struct {
Name string
Namespace string

// Resreq is the resource that used when task running.
Resreq *Resource
// InitResreq is the resource that used to launch a task.
InitResreq *Resource

NodeName string
Status TaskStatus
Expand All @@ -61,25 +64,22 @@ func getJobID(pod *v1.Pod) JobID {
}

func NewTaskInfo(pod *v1.Pod) *TaskInfo {
req := EmptyResource()

// TODO(k82cn): also includes initContainers' resource.
for _, c := range pod.Spec.Containers {
req.Add(NewResource(c.Resources.Requests))
}
req := GetPodResourceWithoutInitContainers(pod)
initResreq := GetPodResourceRequest(pod)

jobID := getJobID(pod)

ti := &TaskInfo{
UID: TaskID(pod.UID),
Job: jobID,
Name: pod.Name,
Namespace: pod.Namespace,
NodeName: pod.Spec.NodeName,
Status: getTaskStatus(pod),
Priority: 1,
Pod: pod,
Resreq: req,
UID: TaskID(pod.UID),
Job: jobID,
Name: pod.Name,
Namespace: pod.Namespace,
NodeName: pod.Spec.NodeName,
Status: getTaskStatus(pod),
Priority: 1,
Pod: pod,
Resreq: req,
InitResreq: initResreq,
}

if pod.Spec.Priority != nil {
Expand All @@ -100,6 +100,7 @@ func (ti *TaskInfo) Clone() *TaskInfo {
Priority: ti.Priority,
Pod: ti.Pod,
Resreq: ti.Resreq.Clone(),
InitResreq: ti.InitResreq.Clone(),
VolumeReady: ti.VolumeReady,
}
}
Expand Down
71 changes: 71 additions & 0 deletions pkg/scheduler/api/pod_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package api

import (
"k8s.io/api/core/v1"
)

// Refer k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go#GetResourceRequest.
//
// GetResourceRequest returns a *Resource that covers the largest width in each resource dimension.
// Because init-containers run sequentially, we collect the max in each dimension iteratively.
// In contrast, we sum the resource vectors for regular containers since they run simultaneously.
//
// To be consistent with kubernetes default scheduler, it is only used for predicates of actions(e.g.
// allocate, backfill, preempt, reclaim), please use GetPodResourceWithoutInitContainers for other cases.
//
// Example:
//
// Pod:
// InitContainers
// IC1:
// CPU: 2
// Memory: 1G
// IC2:
// CPU: 2
// Memory: 3G
// Containers
// C1:
// CPU: 2
// Memory: 1G
// C2:
// CPU: 1
// Memory: 1G
//
// Result: CPU: 3, Memory: 3G
func GetPodResourceRequest(pod *v1.Pod) *Resource {
result := GetPodResourceWithoutInitContainers(pod)

// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
result.SetMaxResource(NewResource(container.Resources.Requests))
}

return result
}

// GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain
// init containers' resource request.
func GetPodResourceWithoutInitContainers(pod *v1.Pod) *Resource {
result := EmptyResource()
for _, container := range pod.Spec.Containers {
result.Add(NewResource(container.Resources.Requests))
}

return result
}
162 changes: 162 additions & 0 deletions pkg/scheduler/api/pod_info_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package api

import (
"reflect"
"testing"

"k8s.io/api/core/v1"
)

func TestGetPodResourceRequest(t *testing.T) {
tests := []struct {
name string
pod *v1.Pod
expectedResource *Resource
}{
{
name: "get resource for pod without init containers",
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("1000m", "1G"),
},
},
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "1G"),
},
},
},
},
},
expectedResource: NewResource(buildResourceList("3000m", "2G")),
},
{
name: "get resource for pod with init containers",
pod: &v1.Pod{
Spec: v1.PodSpec{
InitContainers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "5G"),
},
},
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "1G"),
},
},
},
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("1000m", "1G"),
},
},
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "1G"),
},
},
},
},
},
expectedResource: NewResource(buildResourceList("3000m", "5G")),
},
}

for i, test := range tests {
req := GetPodResourceRequest(test.pod)
if !reflect.DeepEqual(req, test.expectedResource) {
t.Errorf("case %d(%s) failed: \n expected %v, \n got: %v \n",
i, test.name, test.expectedResource, req)
}
}
}

func TestGetPodResourceWithoutInitContainers(t *testing.T) {
tests := []struct {
name string
pod *v1.Pod
expectedResource *Resource
}{
{
name: "get resource for pod without init containers",
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("1000m", "1G"),
},
},
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "1G"),
},
},
},
},
},
expectedResource: NewResource(buildResourceList("3000m", "2G")),
},
{
name: "get resource for pod with init containers",
pod: &v1.Pod{
Spec: v1.PodSpec{
InitContainers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "5G"),
},
},
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "1G"),
},
},
},
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("1000m", "1G"),
},
},
{
Resources: v1.ResourceRequirements{
Requests: buildResourceList("2000m", "1G"),
},
},
},
},
},
expectedResource: NewResource(buildResourceList("3000m", "2G")),
},
}

for i, test := range tests {
req := GetPodResourceWithoutInitContainers(test.pod)
if !reflect.DeepEqual(req, test.expectedResource) {
t.Errorf("case %d(%s) failed: \n expected %v, \n got: %v \n",
i, test.name, test.expectedResource, req)
}
}
}
Loading