From 9dbaa955c58dc2208a0f67ae07e5c7fa6a70f56c Mon Sep 17 00:00:00 2001 From: haixuanTao Date: Sat, 12 Oct 2024 02:50:22 +0200 Subject: [PATCH] Loop over trying to connect to the coordinator on first connection to make it possible to await the coordinator to be ready --- binaries/daemon/src/coordinator.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/binaries/daemon/src/coordinator.rs b/binaries/daemon/src/coordinator.rs index ad72ec02..a9efe86b 100644 --- a/binaries/daemon/src/coordinator.rs +++ b/binaries/daemon/src/coordinator.rs @@ -9,12 +9,16 @@ use dora_message::{ daemon_to_coordinator::{CoordinatorRequest, DaemonCoordinatorReply, DaemonRegisterRequest}, }; use eyre::{eyre, Context}; -use std::{io::ErrorKind, net::SocketAddr}; +use std::{io::ErrorKind, net::SocketAddr, time::Duration}; use tokio::{ net::TcpStream, sync::{mpsc, oneshot}, + time::sleep, }; use tokio_stream::{wrappers::ReceiverStream, Stream}; +use tracing::warn; + +const DAEMON_COORDINATOR_RETRY_INTERVAL: std::time::Duration = Duration::from_secs(1); #[derive(Debug)] pub struct CoordinatorEvent { @@ -28,9 +32,20 @@ pub async fn register( listen_port: u16, clock: &HLC, ) -> eyre::Result>> { - let mut stream = TcpStream::connect(addr) - .await - .wrap_err("failed to connect to dora-coordinator")?; + let mut stream = loop { + match TcpStream::connect(addr) + .await + .wrap_err("failed to connect to dora-coordinator") + { + Err(err) => { + warn!("Could not connect to: {addr}, with error: {err}. Retring in {DAEMON_COORDINATOR_RETRY_INTERVAL:#?}.."); + sleep(DAEMON_COORDINATOR_RETRY_INTERVAL).await; + } + Ok(stream) => { + break stream; + } + }; + }; stream .set_nodelay(true) .wrap_err("failed to set TCP_NODELAY")?;