Skip to content

Commit

Permalink
Add dist_blocker
Browse files Browse the repository at this point in the history
Dist_blocker would block the remote node from reconnecting
until we finish the cleaning
  • Loading branch information
arcusfelis committed Mar 5, 2024
1 parent 1835e8f commit 665c903
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/ejabberd_sup.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).

init([]) ->
DistBlocker = worker_spec(mongoose_dist_blocker),
Hooks = worker_spec(gen_hook),
Cleaner = worker_spec(mongoose_cleaner),
Router = worker_spec(ejabberd_router),
Expand All @@ -64,7 +65,8 @@ init([]) ->
IQSupervisor =
ejabberd_tmp_sup_spec(ejabberd_iq_sup, [ejabberd_iq_sup, mongoose_iq_worker]),
{ok, {{one_for_one, 10, 1},
[StartIdServer,
[DistBlocker,
StartIdServer,
PG,
Hooks,
Cleaner,
Expand Down
120 changes: 120 additions & 0 deletions src/mongoose_dist_blocker.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
%% @doc Disallow MongooseIM node connections until cleaning is done
%%
%% This module prevents a node from reconnecting, until cleaning activity is
%% finished. It prevents race conditions.
%%
%% This module assume all nodes share the same cookie.
-module(mongoose_dist_blocker).
-behaviour(gen_server).

-include("mongoose_logger.hrl").

%% API
-export([start_link/0,
add_cleaner/1,
cleaning_done/2]).

%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
terminate/2, code_change/3]).

-ignore_xref([start_link/0]).

start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).

%% Register Pid as a cleaner.
add_cleaner(CleanerPid) ->
gen_server:call(?MODULE, {add_cleaner, CleanerPid}).

%% Cleaner calls must call this function.
cleaning_done(CleanerPid, Node) ->
gen_server:call(?MODULE, {cleaning_done, CleanerPid, Node}).

%%--------------------------------------------------------------------
%% gen_server callbacks
%%--------------------------------------------------------------------
init([]) ->
net_kernel:monitor_nodes(true),
State = #{cleaners => [], waiting => []},
State2 = lists:foldl(fun handle_nodeup/2, State, nodes()),
{ok, State2}.

handle_call({add_cleaner, CleanerPid}, _From, State) ->
{reply, ok, handle_add_cleaner(CleanerPid, State)};
handle_call({cleaning_done, CleanerPid, Node}, _From, State) ->
{reply, ok, maybe_unblock(State, handle_cleaning_done(CleanerPid, Node, State))};
handle_call(Request, From, State) ->
?UNEXPECTED_CALL(Request, From),
{reply, ok, State}.

Check warning on line 49 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L48-L49

Added lines #L48 - L49 were not covered by tests

handle_cast(Msg, State) ->
?UNEXPECTED_CAST(Msg),
{noreply, State}.

Check warning on line 53 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L52-L53

Added lines #L52 - L53 were not covered by tests

handle_info({nodeup, Node}, State) ->
{noreply, handle_nodeup(Node, State)};
handle_info({nodedown, Node}, State) ->
{noreply, handle_nodedown(Node, State)};
handle_info({'DOWN', _Ref, process, Pid, _Info}, State) ->
{noreply, maybe_unblock(State, handle_cleaner_down(Pid, State))};
handle_info(Info, State) ->
?UNEXPECTED_INFO(Info),
{noreply, State}.

Check warning on line 63 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L62-L63

Added lines #L62 - L63 were not covered by tests

terminate(_Reason, _State) ->
ok.

Check warning on line 66 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L66

Added line #L66 was not covered by tests

code_change(_OldVsn, State, _Extra) ->
{ok, State}.

Check warning on line 69 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L69

Added line #L69 was not covered by tests

%%--------------------------------------------------------------------
%% internal functions
%%--------------------------------------------------------------------

handle_nodeup(Node, State) ->
%% We change the cookie as soon as the node is connected.
%% Alternative is to do it on nodedown, but because nodedown-s are async,
%% we would have a high chance of race conditions (so, node could reconnect
%% before we set cookie).
erlang:set_cookie(Node, blocking_cookie()),
State.

%% Make cookie, that would prevent node from connecting
blocking_cookie() ->
list_to_atom(atom_to_list(erlang:get_cookie()) ++ "_blocked_by_" ++ atom_to_list(node())).

%% Allow the node to connect to us again
unblock_node(Node) ->
erlang:set_cookie(Node, erlang:get_cookie()).

handle_nodedown(Node, State = #{cleaners := []}) ->
%% Skip waiting when no cleaners
unblock_node(Node),
State;

Check warning on line 94 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L93-L94

Added lines #L93 - L94 were not covered by tests
handle_nodedown(Node, State = #{cleaners := Cleaners, waiting := Waiting}) ->
New = [{Node, CleanerPid} || CleanerPid <- Cleaners],
State#{waiting := lists:usort(New ++ Waiting)}.

handle_add_cleaner(CleanerPid, State = #{cleaners := Cleaners}) ->
erlang:monitor(process, CleanerPid),
State#{cleaners := lists:usort([CleanerPid | Cleaners])}.

handle_cleaning_done(CleanerPid, Node, State = #{waiting := Waiting}) ->
State#{waiting := lists:delete({Node, CleanerPid}, Waiting)}.

handle_cleaner_down(CleanerPid, State = #{cleaners := Cleaners, waiting := Waiting}) ->
State#{cleaners := lists:delete(CleanerPid, Cleaners),
waiting := [X || {_Node, CleanerPid2} = X <- Waiting, CleanerPid =/= CleanerPid2]}.

Check warning on line 108 in src/mongoose_dist_blocker.erl

View check run for this annotation

Codecov / codecov/patch

src/mongoose_dist_blocker.erl#L108

Added line #L108 was not covered by tests

%% Unblock nodes when the last cleaner confirms the cleaning is done.
%% Call this function each time you remove entries from the waiting list.
maybe_unblock(_OldState = #{waiting := OldWaiting}, NewState = #{waiting := NewWaiting}) ->
OldNodes = cast_waiting_to_nodes(OldWaiting),
NewNodes = cast_waiting_to_nodes(NewWaiting),
CleanedNodes = OldNodes -- NewNodes,
[unblock_node(Node) || Node <- CleanedNodes],
NewState.

cast_waiting_to_nodes(Waiting) ->
lists:usort([Node || {Node, _CleanerPid} <- Waiting]).

0 comments on commit 665c903

Please sign in to comment.