Skip to content

Commit

Permalink
Partially revert #2440 (#2443)
Browse files Browse the repository at this point in the history
  • Loading branch information
chizhg authored Sep 21, 2020
1 parent a61876e commit 37f1a12
Showing 1 changed file with 43 additions and 6 deletions.
49 changes: 43 additions & 6 deletions scripts/library.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,50 @@ function wait_until_object_does_not_exist() {
# Waits until all pods are running in the given namespace.
# Parameters: $1 - namespace.
function wait_until_pods_running() {
echo "Waiting until all pods in namespace $1 are up"
kubectl wait pod --for=condition=Ready -n "$1" -l '!job-name' --timeout=5m || return 1
# Also wait for all the job pods to be completed.
# This is mainly for maintaining backward compatibility.
if [[ $(kubectl get jobs --ignore-not-found=true -n "$1") ]]; then
kubectl wait job --for=condition=Complete --all -n "$1" --timeout=5m || return 1
echo -n "Waiting until all pods in namespace $1 are up"
local failed_pod=""
for i in {1..150}; do # timeout after 5 minutes
# List all pods. Ignore Terminating pods as those have either been replaced through
# a deployment or terminated on purpose (through chaosduck for example).
local pods="$(kubectl get pods --no-headers -n $1 2>/dev/null | grep -v Terminating)"
# All pods must be running (ignore ImagePull error to allow the pod to retry)
local not_running_pods=$(echo "${pods}" | grep -v Running | grep -v Completed | grep -v ErrImagePull | grep -v ImagePullBackOff)
if [[ -n "${pods}" ]] && [[ -z "${not_running_pods}" ]]; then
# All Pods are running or completed. Verify the containers on each Pod.
local all_ready=1
while read pod ; do
local status=(`echo -n ${pod} | cut -f2 -d' ' | tr '/' ' '`)
# Set this Pod as the failed_pod. If nothing is wrong with it, then after the checks, set
# failed_pod to the empty string.
failed_pod=$(echo -n "${pod}" | cut -f1 -d' ')
# All containers must be ready
[[ -z ${status[0]} ]] && all_ready=0 && break
[[ -z ${status[1]} ]] && all_ready=0 && break
[[ ${status[0]} -lt 1 ]] && all_ready=0 && break
[[ ${status[1]} -lt 1 ]] && all_ready=0 && break
[[ ${status[0]} -ne ${status[1]} ]] && all_ready=0 && break
# All the tests passed, this is not a failed pod.
failed_pod=""
done <<< "$(echo "${pods}" | grep -v Completed)"
if (( all_ready )); then
echo -e "\nAll pods are up:\n${pods}"
return 0
fi
elif [[ -n "${not_running_pods}" ]]; then
# At least one Pod is not running, just save the first one's name as the failed_pod.
failed_pod="$(echo "${not_running_pods}" | head -n 1 | cut -f1 -d' ')"
fi
echo -n "."
sleep 2
done
echo -e "\n\nERROR: timeout waiting for pods to come up\n${pods}"
if [[ -n "${failed_pod}" ]]; then
echo -e "\n\nFailed Pod (data in YAML format) - ${failed_pod}\n"
kubectl -n $1 get pods "${failed_pod}" -oyaml
echo -e "\n\nPod Logs\n"
kubectl -n $1 logs "${failed_pod}" --all-containers
fi
return 1
}

# Waits until all batch jobs complete in the given namespace.
Expand Down

0 comments on commit 37f1a12

Please sign in to comment.