-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminimal_mpi.cpp
128 lines (95 loc) · 2.58 KB
/
minimal_mpi.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <iostream>
#include <iomanip>
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <mpi.h>
namespace {
int numranks, rank;
char hn[256];
int checkpoint_req = 0;
void done_checkpoint () { checkpoint_req = 0; }
}
int checkpoint_requested (MPI_Comm comm)
{
int local_checkpoint_req = checkpoint_req;
MPI_Allreduce(&local_checkpoint_req, &checkpoint_req,
1, MPI_INT, MPI_MAX, comm);
return checkpoint_req;
}
void my_sig_handler (int signum)
{
time_t now;
time(&now);
if (0 == rank) printf("...inside handler function\n");
switch (signum)
{
case SIGINT:
case SIGTERM:
case SIGUSR1:
checkpoint_req = 1;
if (0 == rank) printf("...caught signal %d at %s", signum, ctime(&now));
break;
default:
if (0 == rank)
{
printf("...caught other unknown signal: %d at %s", signum, ctime(&now));
printf(" see \"man 7 signal\" for a list of known signals\n");
}
break;
}
// re-register default signal handler for action
//if (0 == rank) printf(" --> Restoring default handler for signal %d\n", signum);
//signal(signum, SIG_DFL);
return;
}
void register_sig_handler ()
{
if (0 == rank) printf("Registering user-specified signal handlers for PID %d\n", getpid());
signal(SIGINT, my_sig_handler);
signal(SIGTERM, my_sig_handler);
signal(SIGUSR1, my_sig_handler);
signal(SIGUSR2, my_sig_handler);
}
void do_checkpoint (MPI_Comm comm)
{
for (int i=1; i<=10; i++)
{
if (0 == rank)
{
printf("\t%2d : Inside checkpoint function\n",i);
fflush(stdout);
sleep(5);
}
MPI_Barrier(comm);
}
done_checkpoint();
return;
}
int main (int argc, char **argv)
{
gethostname(hn, sizeof(hn) / sizeof(char));
MPI_Init(&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &numranks);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
std::cout << "Hello from " << rank << " / " << std::string (hn)
<< ", running " << argv[0] << " on " << numranks << " ranks" << std::endl;
// register our user-defined signal handlers, on every rank
register_sig_handler();
for (int i=1; i<=5000 ;i++)
{
if (0 == rank)
{
printf("%2d : Main function loop\n",i);
fflush(stdout);
sleep(5);
}
MPI_Barrier(MPI_COMM_WORLD);
// this function needs to perform a reduction to see if any rank received
// a signal, hence it is blocking.
if (checkpoint_requested(MPI_COMM_WORLD))
do_checkpoint(MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}