forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJenkinsfile.ppc64le
71 lines (68 loc) · 2.77 KB
/
Jenkinsfile.ppc64le
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
pipeline {
options {
buildDiscarder(logRotator(numToKeepStr: '30'))
timeout(time: 30, unit: 'MINUTES')
}
agent {
docker {
alwaysPull true
// Open-CE 1.4.1 has CUDA 10.2, NCCL 2.8.3, TensorFlow 2.6.0, and PyTorch 1.9.1
image 'tensorflowppc64le/tensorflow-ppc64le:osuosl-ubi7-horovod-opence1.4.1-py3.9-ppc64le'
args '--cap-add=SYS_PTRACE --shm-size=256g'
label 'power8-gpu'
registryCredentialsId 'TensorFlow'
}
}
stages {
stage ('UPDATE_GITHUB_STATUS') {
steps {
setBuildStatus("ppc64le Build/Tests Pending", "PENDING");
}
}
stage ('BUILD_HOROVOD') {
steps {
sh '''#!/usr/bin/env bash
git submodule update --init --recursive
. ${CONDA_INIT}
conda activate ${CONDA_ENV}
set -xe
HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITHOUT_GLOO=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 \
HOROVOD_CUDA_HOME="/usr/local/cuda" HOROVOD_GPU_OPERATIONS=NCCL \
pip install -v . --no-cache-dir --no-deps
'''
}
}
stage ('TEST_HOROVOD') {
steps {
ansiColor('xterm') {
sh '''#!/usr/bin/env bash
. ${CONDA_INIT}
conda activate ${CONDA_ENV}
set -xe
# TensorFlow unit tests
horovodrun -n 2 -H localhost:2 pytest -k 'not multi_gpu' -v -s test/parallel/test_tensorflow.py
# Container has only 2 GPUs, so run the 'multi_gpu' test seperatly on one process
horovodrun -n 1 -H localhost:1 pytest -k 'multi_gpu' -v -s test/parallel/test_tensorflow.py
# PyTorch unit tests
horovodrun -n 2 -H localhost:2 pytest -v -s test/parallel/test_torch.py
'''
}
}
}
} // end of stages
post {
success {
setBuildStatus("ppc64le Build/Tests Passed", "SUCCESS");
}
failure {
setBuildStatus("ppc64le Build/Tests Failed", "FAILURE");
}
unstable {
setBuildStatus("ppc64le Build/Tests Failed", "FAILURE");
}
}
} //end of pipeline
void setBuildStatus(String message, String state) {
githubNotify context: 'ppc64le-checks', description: "${message}", status: "${state}",
targetUrl: "https://powerci.osuosl.org/job/Horovod_PPC64LE_GPU_PIPELINE/view/change-requests/job/${BRANCH_NAME}/${BUILD_NUMBER}/console"
}