kubernetes-retired · asifdxtreme · Mar 13, 2019 · Mar 18, 2019 · Mar 18, 2019 · Mar 18, 2019
diff --git a/pkg/scheduler/actions/allocate/allocate.go b/pkg/scheduler/actions/allocate/allocate.go
@@ -46,12 +46,16 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
 	jobsMap := map[api.QueueID]*util.PriorityQueue{}
 
 	for _, job := range ssn.Jobs {
-		if _, found := jobsMap[job.Queue]; !found {
-			jobsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn)
-		}
-
 		if queue, found := ssn.Queues[job.Queue]; found {
 			queues.Push(queue)
+		} else {
+			glog.Warningf("Skip adding Job <%s/%s> because its queue %s is not found",
+				job.Namespace, job.Name, job.Queue)
+			continue
+		}
+
+		if _, found := jobsMap[job.Queue]; !found {
+			jobsMap[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn)
 		}
 
 		glog.V(4).Infof("Added Job <%s/%s> into Queue <%s>", job.Namespace, job.Name, job.Queue)
@@ -143,7 +147,7 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
 			selectedNodes := util.SelectBestNode(nodeScores)
 			for _, node := range selectedNodes {
 				// Allocate idle resource to the task.
-				if task.Resreq.LessEqual(node.Idle) {
+				if task.InitResreq.LessEqual(node.Idle) {
 					glog.V(3).Infof("Binding Task <%v/%v> to node <%v>",
 						task.Namespace, task.Name, node.Name)
 					if err := ssn.Allocate(task, node.Name); err != nil {
@@ -162,9 +166,9 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
 				}
 
 				// Allocate releasing resource to the task if any.
-				if task.Resreq.LessEqual(node.Releasing) {
+				if task.InitResreq.LessEqual(node.Releasing) {
 					glog.V(3).Infof("Pipelining Task <%v/%v> to node <%v> for <%v> on <%v>",
-						task.Namespace, task.Name, node.Name, task.Resreq, node.Releasing)
+						task.Namespace, task.Name, node.Name, task.InitResreq, node.Releasing)
 					if err := ssn.Pipeline(task, node.Name); err != nil {
 						glog.Errorf("Failed to pipeline Task %v on %v in Session %v",
 							task.UID, node.Name, ssn.UID)

diff --git a/pkg/scheduler/actions/backfill/backfill.go b/pkg/scheduler/actions/backfill/backfill.go
@@ -44,7 +44,7 @@ func (alloc *backfillAction) Execute(ssn *framework.Session) {
 	// TODO (k82cn): When backfill, it's also need to balance between Queues.
 	for _, job := range ssn.Jobs {
 		for _, task := range job.TaskStatusIndex[api.Pending] {
-			if task.Resreq.IsEmpty() {
+			if task.InitResreq.IsEmpty() {
 				// As task did not request resources, so it only need to meet predicates.
 				// TODO (k82cn): need to prioritize nodes to avoid pod hole.
 				for _, node := range ssn.Nodes {

diff --git a/pkg/scheduler/actions/preempt/preempt.go b/pkg/scheduler/actions/preempt/preempt.go
@@ -203,7 +203,7 @@ func preempt(
 
 		var preemptees []*api.TaskInfo
 		preempted := api.EmptyResource()
-		resreq := preemptor.Resreq.Clone()
+		resreq := preemptor.InitResreq.Clone()
 
 		for _, task := range node.Tasks {
 			if filter == nil {
@@ -239,9 +239,9 @@ func preempt(
 
 		metrics.RegisterPreemptionAttempts()
 		glog.V(3).Infof("Preempted <%v> for task <%s/%s> requested <%v>.",
-			preempted, preemptor.Namespace, preemptor.Name, preemptor.Resreq)
+			preempted, preemptor.Namespace, preemptor.Name, preemptor.InitResreq)
 
-		if preemptor.Resreq.LessEqual(preempted) {
+		if preemptor.InitResreq.LessEqual(preempted) {
 			if err := stmt.Pipeline(preemptor, node.Name); err != nil {
 				glog.Errorf("Failed to pipline Task <%s/%s> on Node <%s>",
 					preemptor.Namespace, preemptor.Name, node.Name)

diff --git a/pkg/scheduler/actions/reclaim/reclaim.go b/pkg/scheduler/actions/reclaim/reclaim.go
@@ -110,14 +110,13 @@ func (alloc *reclaimAction) Execute(ssn *framework.Session) {
 		}
 
 		assigned := false
-
 		for _, n := range ssn.Nodes {
 			// If predicates failed, next node.
 			if err := ssn.PredicateFn(task, n); err != nil {
 				continue
 			}
 
-			resreq := task.Resreq.Clone()
+			resreq := task.InitResreq.Clone()
 			reclaimed := api.EmptyResource()
 
 			glog.V(3).Infof("Considering Task <%s/%s> on Node <%s>.",
@@ -172,9 +171,9 @@ func (alloc *reclaimAction) Execute(ssn *framework.Session) {
 			}
 
 			glog.V(3).Infof("Reclaimed <%v> for task <%s/%s> requested <%v>.",
-				reclaimed, task.Namespace, task.Name, task.Resreq)
+				reclaimed, task.Namespace, task.Name, task.InitResreq)
 
-			if task.Resreq.LessEqual(reclaimed) {
+			if task.InitResreq.LessEqual(reclaimed) {
 				if err := ssn.Pipeline(task, n.Name); err != nil {
 					glog.Errorf("Failed to pipline Task <%s/%s> on Node <%s>",
 						task.Namespace, task.Name, n.Name)

diff --git a/pkg/scheduler/api/job_info.go b/pkg/scheduler/api/job_info.go
@@ -38,7 +38,10 @@ type TaskInfo struct {
 	Name      string
 	Namespace string
 
+	// Resreq is the resource that used when task running.
 	Resreq *Resource
+	// InitResreq is the resource that used to launch a task.
+	InitResreq *Resource
 
 	NodeName    string
 	Status      TaskStatus
@@ -61,25 +64,22 @@ func getJobID(pod *v1.Pod) JobID {
 }
 
 func NewTaskInfo(pod *v1.Pod) *TaskInfo {
-	req := EmptyResource()
-
-	// TODO(k82cn): also includes initContainers' resource.
-	for _, c := range pod.Spec.Containers {
-		req.Add(NewResource(c.Resources.Requests))
-	}
+	req := GetPodResourceWithoutInitContainers(pod)
+	initResreq := GetPodResourceRequest(pod)
 
 	jobID := getJobID(pod)
 
 	ti := &TaskInfo{
-		UID:       TaskID(pod.UID),
-		Job:       jobID,
-		Name:      pod.Name,
-		Namespace: pod.Namespace,
-		NodeName:  pod.Spec.NodeName,
-		Status:    getTaskStatus(pod),
-		Priority:  1,
-		Pod:       pod,
-		Resreq:    req,
+		UID:        TaskID(pod.UID),
+		Job:        jobID,
+		Name:       pod.Name,
+		Namespace:  pod.Namespace,
+		NodeName:   pod.Spec.NodeName,
+		Status:     getTaskStatus(pod),
+		Priority:   1,
+		Pod:        pod,
+		Resreq:     req,
+		InitResreq: initResreq,
 	}
 
 	if pod.Spec.Priority != nil {
@@ -100,6 +100,7 @@ func (ti *TaskInfo) Clone() *TaskInfo {
 		Priority:    ti.Priority,
 		Pod:         ti.Pod,
 		Resreq:      ti.Resreq.Clone(),
+		InitResreq:  ti.InitResreq.Clone(),
 		VolumeReady: ti.VolumeReady,
 	}
 }

diff --git a/pkg/scheduler/api/pod_info.go b/pkg/scheduler/api/pod_info.go
@@ -0,0 +1,71 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package api
+
+import (
+	"k8s.io/api/core/v1"
+)
+
+// Refer k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go#GetResourceRequest.
+//
+// GetResourceRequest returns a *Resource that covers the largest width in each resource dimension.
+// Because init-containers run sequentially, we collect the max in each dimension iteratively.
+// In contrast, we sum the resource vectors for regular containers since they run simultaneously.
+//
+// To be consistent with kubernetes default scheduler, it is only used for predicates of actions(e.g.
+// allocate, backfill, preempt, reclaim), please use GetPodResourceWithoutInitContainers for other cases.
+//
+// Example:
+//
+// Pod:
+//   InitContainers
+//     IC1:
+//       CPU: 2
+//       Memory: 1G
+//     IC2:
+//       CPU: 2
+//       Memory: 3G
+//   Containers
+//     C1:
+//       CPU: 2
+//       Memory: 1G
+//     C2:
+//       CPU: 1
+//       Memory: 1G
+//
+// Result: CPU: 3, Memory: 3G
+func GetPodResourceRequest(pod *v1.Pod) *Resource {
+	result := GetPodResourceWithoutInitContainers(pod)
+
+	// take max_resource(sum_pod, any_init_container)
+	for _, container := range pod.Spec.InitContainers {
+		result.SetMaxResource(NewResource(container.Resources.Requests))
+	}
+
+	return result
+}
+
+// GetPodResourceWithoutInitContainers returns Pod's resource request, it does not contain
+// init containers' resource request.
+func GetPodResourceWithoutInitContainers(pod *v1.Pod) *Resource {
+	result := EmptyResource()
+	for _, container := range pod.Spec.Containers {
+		result.Add(NewResource(container.Resources.Requests))
+	}
+
+	return result
+}
diff --git a/pkg/scheduler/api/pod_info_test.go b/pkg/scheduler/api/pod_info_test.go
@@ -0,0 +1,162 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package api
+
+import (
+	"reflect"
+	"testing"
+
+	"k8s.io/api/core/v1"
+)
+
+func TestGetPodResourceRequest(t *testing.T) {
+	tests := []struct {
+		name             string
+		pod              *v1.Pod
+		expectedResource *Resource
+	}{
+		{
+			name: "get resource for pod without init containers",
+			pod: &v1.Pod{
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("1000m", "1G"),
+							},
+						},
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "1G"),
+							},
+						},
+					},
+				},
+			},
+			expectedResource: NewResource(buildResourceList("3000m", "2G")),
+		},
+		{
+			name: "get resource for pod with init containers",
+			pod: &v1.Pod{
+				Spec: v1.PodSpec{
+					InitContainers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "5G"),
+							},
+						},
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "1G"),
+							},
+						},
+					},
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("1000m", "1G"),
+							},
+						},
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "1G"),
+							},
+						},
+					},
+				},
+			},
+			expectedResource: NewResource(buildResourceList("3000m", "5G")),
+		},
+	}
+
+	for i, test := range tests {
+		req := GetPodResourceRequest(test.pod)
+		if !reflect.DeepEqual(req, test.expectedResource) {
+			t.Errorf("case %d(%s) failed: \n expected %v, \n got: %v \n",
+				i, test.name, test.expectedResource, req)
+		}
+	}
+}
+
+func TestGetPodResourceWithoutInitContainers(t *testing.T) {
+	tests := []struct {
+		name             string
+		pod              *v1.Pod
+		expectedResource *Resource
+	}{
+		{
+			name: "get resource for pod without init containers",
+			pod: &v1.Pod{
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("1000m", "1G"),
+							},
+						},
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "1G"),
+							},
+						},
+					},
+				},
+			},
+			expectedResource: NewResource(buildResourceList("3000m", "2G")),
+		},
+		{
+			name: "get resource for pod with init containers",
+			pod: &v1.Pod{
+				Spec: v1.PodSpec{
+					InitContainers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "5G"),
+							},
+						},
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "1G"),
+							},
+						},
+					},
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("1000m", "1G"),
+							},
+						},
+						{
+							Resources: v1.ResourceRequirements{
+								Requests: buildResourceList("2000m", "1G"),
+							},
+						},
+					},
+				},
+			},
+			expectedResource: NewResource(buildResourceList("3000m", "2G")),
+		},
+	}
+
+	for i, test := range tests {
+		req := GetPodResourceWithoutInitContainers(test.pod)
+		if !reflect.DeepEqual(req, test.expectedResource) {
+			t.Errorf("case %d(%s) failed: \n expected %v, \n got: %v \n",
+				i, test.name, test.expectedResource, req)
+		}
+	}
+}