Skip to content

Commit

Permalink
Handle race-condition of node not being validated
Browse files Browse the repository at this point in the history
If it is restartating at the end of the test suite
If MIM is restarted asynchronously, simple checking if application is running
is not enough. We first need to wait for MIM to actually fully restart.

The best place to do it is in end_per_suite. So, if some suite crashes MIM,
the rest of suites would quickly fail in init_per_suite (and we need to wait
only once)
  • Loading branch information
arcusfelis committed Dec 9, 2024
1 parent 42c3067 commit ebc3c1b
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
12 changes: 9 additions & 3 deletions big_tests/src/cth_validate_nodes.erl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
-export([pre_init_per_suite/3]).
-export([terminate/1]).

-record(state, {}).
-record(state, {node_keys = []}).

%% CT callbacks

Expand All @@ -19,8 +19,8 @@ init(_Id, _Opts) ->

pre_init_per_suite(_Suite, Config, State) ->
case distributed_helper:validate_nodes() of
ok ->
{Config, State};
{ok, NodeKeys} ->
{Config, State#state{node_keys = NodeKeys}};
{error, Reason} ->
case os:getenv("SKIP_VALIDATE_NODES") of
"true" ->
Expand All @@ -31,5 +31,11 @@ pre_init_per_suite(_Suite, Config, State) ->
end
end.

post_end_per_suite(_SuiteName, _Config, Return, State = #state{node_keys = NodeKeys}) ->
%% In case a suite is restarting the node at the end of the suite execution
%% ensure we wait enough for it actually start
distributed_helper:wait_for_nodes_to_start(NodeKeys),
{Return, State#state{node_keys = []}}.

terminate(_State) ->
ok.
14 changes: 12 additions & 2 deletions test/common/distributed_helper.erl
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,23 @@ lookup_config_opt(Key) ->

%% @doc Checks if MongooseIM nodes are running
validate_nodes() ->
Results = [validate_node(Node) || Node <- get_node_keys()],
validate_nodes(get_node_keys()).

validate_nodes(NodeKeys) ->
Results = [validate_node(Node) || Node <- NodeKeys],
Errors = [Res || Res <- Results, Res =/= ok],
case Errors of
[] -> ok;
[] -> {ok, NodeKeys};
_ -> {error, Errors}
end.

wait_for_nodes_to_start([]) -> ok;
wait_for_nodes_to_start(NodeKeys) ->
wait_helper:wait_until(fun() -> validate_nodes(NodeKeys) end, {ok, NodeKeys},
#{time_left => timer:seconds(20),
sleep_time => 1000,
name => wait_for_nodes_to_start}).

get_node_keys() ->
case os:getenv("TEST_HOSTS") of
false ->
Expand Down

0 comments on commit ebc3c1b

Please sign in to comment.