Skip to content

Reconcile QQ node dead during delete and redeclare #14241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions deps/rabbit/src/amqqueue.erl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
% exclusive_owner
get_exclusive_owner/1,
get_leader_node/1,
get_nodes/1,
% name (#resource)
get_name/1,
set_name/2,
Expand Down Expand Up @@ -394,15 +393,6 @@ get_leader_node(#amqqueue{pid = {_, Leader}}) -> Leader;
get_leader_node(#amqqueue{pid = none}) -> none;
get_leader_node(#amqqueue{pid = Pid}) -> node(Pid).

-spec get_nodes(amqqueue_v2()) -> [node(),...].

get_nodes(Q) ->
case amqqueue:get_type_state(Q) of
#{nodes := Nodes} ->
Nodes;
_ ->
[get_leader_node(Q)]
end.

% operator_policy

Expand Down
2 changes: 1 addition & 1 deletion deps/rabbit/src/rabbit_amqp_management.erl
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ encode_queue(Q, NumMsgs, NumConsumers) ->
{Leader :: node() | none, Replicas :: [node(),...]}.
queue_topology(Q) ->
Leader = amqqueue:get_leader_node(Q),
Replicas = amqqueue:get_nodes(Q),
Replicas = rabbit_amqqueue:get_nodes(Q),
{Leader, Replicas}.

decode_exchange({map, KVList}) ->
Expand Down
14 changes: 8 additions & 6 deletions deps/rabbit/src/rabbit_amqqueue.erl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
-export([list/0, list_durable/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2,
emit_info_all/5, list_local/1, info_local/1,
emit_info_local/4, emit_info_down/4]).
-export([get_nodes/1]).
-export([count/0]).
-export([list_down/1, list_down/2, list_all/1,
count/1, list_names/0, list_names/1, list_local_names/0,
Expand Down Expand Up @@ -1233,6 +1234,12 @@ list() ->
count() ->
rabbit_db_queue:count().

-spec get_nodes(amqqueue:amqqueue_v2()) -> [node(),...].

get_nodes(Q) ->
[{members, Nodes}] = info(Q, [members]),
Nodes.

-spec list_names() -> [name()].

list_names() ->
Expand Down Expand Up @@ -2042,12 +2049,7 @@ pseudo_queue(#resource{kind = queue} = QueueName, Pid, Durable)
).

get_quorum_nodes(Q) ->
case amqqueue:get_type_state(Q) of
#{nodes := Nodes} ->
Nodes;
_ ->
[]
end.
rabbit_amqqueue:get_nodes(Q).

-spec prepend_extra_bcc(Qs) ->
Qs when Qs :: [amqqueue:amqqueue() |
Expand Down
7 changes: 7 additions & 0 deletions deps/rabbit/src/rabbit_core_ff.erl
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,10 @@
stability => stable,
depends_on => ['rabbitmq_4.0.0']
}}).

-rabbit_feature_flag(
{'track_qq_members_uids',
#{desc => "Track queue members UIDs in the metadata store",
stability => stable,
depends_on => []
}}).
17 changes: 2 additions & 15 deletions deps/rabbit/src/rabbit_jms_selector_parser.erl
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
-file("rabbit_jms_selector_parser.yrl", 0).
-module(rabbit_jms_selector_parser).
-file("rabbit_jms_selector_parser.erl", 3).
-export([parse/1, parse_and_scan/1, format_error/1]).
-file("rabbit_jms_selector_parser.yrl", 122).

Expand All @@ -26,9 +24,7 @@ process_escape_char({string, Line, Value}) ->
%%
%% %CopyrightBegin%
%%
%% SPDX-License-Identifier: Apache-2.0
%%
%% Copyright Ericsson AB 1996-2025. All Rights Reserved.
%% Copyright Ericsson AB 1996-2021. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
Expand All @@ -50,16 +46,10 @@ process_escape_char({string, Line, Value}) ->

-type yecc_ret() :: {'error', _} | {'ok', _}.

-ifdef (YECC_PARSE_DOC).
-doc ?YECC_PARSE_DOC.
-endif.
-spec parse(Tokens :: list()) -> yecc_ret().
parse(Tokens) ->
yeccpars0(Tokens, {no_func, no_location}, 0, [], []).

-ifdef (YECC_PARSE_AND_SCAN_DOC).
-doc ?YECC_PARSE_AND_SCAN_DOC.
-endif.
-spec parse_and_scan({function() | {atom(), atom()}, [_]}
| {atom(), atom(), [_]}) -> yecc_ret().
parse_and_scan({F, A}) ->
Expand All @@ -68,9 +58,6 @@ parse_and_scan({M, F, A}) ->
Arity = length(A),
yeccpars0([], {{fun M:F/Arity, A}, no_location}, 0, [], []).

-ifdef (YECC_FORMAT_ERROR_DOC).
-doc ?YECC_FORMAT_ERROR_DOC.
-endif.
-spec format_error(any()) -> [char() | list()].
format_error(Message) ->
case io_lib:deep_char_list(Message) of
Expand Down Expand Up @@ -212,7 +199,7 @@ yecctoken2string1(Other) ->



-file("rabbit_jms_selector_parser.erl", 215).
-file("rabbit_jms_selector_parser.erl", 202).

-dialyzer({nowarn_function, yeccpars2/7}).
-compile({nowarn_unused_function, yeccpars2/7}).
Expand Down
2 changes: 1 addition & 1 deletion deps/rabbit/src/rabbit_queue_location.erl
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ select_members(Size, _, AllNodes, RunningNodes, _, _, GetQueues) ->
Counters0 = maps:from_list([{N, 0} || N <- lists:delete(?MODULE:node(), AllNodes)]),
Queues = GetQueues(),
Counters = lists:foldl(fun(Q, Acc) ->
#{nodes := Nodes} = amqqueue:get_type_state(Q),
Nodes = rabbit_amqqueue:get_nodes(Q),
lists:foldl(fun(N, A)
when is_map_key(N, A) ->
maps:update_with(N, fun(C) -> C+1 end, A);
Expand Down
131 changes: 104 additions & 27 deletions deps/rabbit/src/rabbit_quorum_queue.erl
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,17 @@ start_cluster(Q) ->
{LeaderNode, FollowerNodes} =
rabbit_queue_location:select_leader_and_followers(Q, QuorumSize),
LeaderId = {RaName, LeaderNode},
UIDs = maps:from_list([{Node, ra:new_uid(ra_lib:to_binary(RaName))}
|| Node <- [LeaderNode | FollowerNodes]]),
NewQ0 = amqqueue:set_pid(Q, LeaderId),
NewQ1 = amqqueue:set_type_state(NewQ0,
#{nodes => [LeaderNode | FollowerNodes]}),
NewQ1 = case rabbit_feature_flags:is_enabled(track_qq_members_uids) of
false ->
amqqueue:set_type_state(NewQ0,
#{nodes => [LeaderNode | FollowerNodes]});
true ->
amqqueue:set_type_state(NewQ0,
#{nodes => UIDs})
end,

Versions = [V || {ok, V} <- erpc:multicall(FollowerNodes,
rabbit_fifo, version, [],
Expand Down Expand Up @@ -722,7 +730,7 @@ repair_amqqueue_nodes(Q0) ->
{Name, _} = amqqueue:get_pid(Q0),
Members = ra_leaderboard:lookup_members(Name),
RaNodes = [N || {_, N} <- Members],
#{nodes := Nodes} = amqqueue:get_type_state(Q0),
Nodes = get_nodes(Q0),
case lists:sort(RaNodes) =:= lists:sort(Nodes) of
true ->
%% up to date
Expand All @@ -731,7 +739,18 @@ repair_amqqueue_nodes(Q0) ->
%% update amqqueue record
Fun = fun (Q) ->
TS0 = amqqueue:get_type_state(Q),
TS = TS0#{nodes => RaNodes},
TS = case rabbit_feature_flags:is_enabled(track_qq_members_uids)
andalso has_uuid_tracking(TS0)
of
false ->
TS0#{nodes => RaNodes};
true ->
RaUids = maps:from_list([{N, erpc:call(N, ra_directory, uid_of,
[?RA_SYSTEM, Name],
?RPC_TIMEOUT)}
|| N <- RaNodes]),
TS0#{nodes => RaUids}
end,
amqqueue:set_type_state(Q, TS)
end,
_ = rabbit_amqqueue:update(QName, Fun),
Expand Down Expand Up @@ -791,6 +810,23 @@ recover(_Vhost, Queues) ->
ServerId = {Name, node()},
QName = amqqueue:get_name(Q0),
MutConf = make_mutable_config(Q0),
RaUId = ra_directory:uid_of(?RA_SYSTEM, Name),
#{nodes := Nodes} = QTypeState0 = amqqueue:get_type_state(Q0),
QTypeState = case Nodes of
List when is_list(List) ->
%% Queue is not aware of node to uid mapping, do nothing
QTypeState0;
#{node() := RaUId} ->
%% Queue is aware and uid for current node is correct, do nothing
QTypeState0;
_ ->
%% Queue is aware but either current node has no UId or it
%% does not match the one returned by ra_directory, regen uid
maybe_delete_data_dir(RaUId),
NewRaUId = ra:new_uid(ra_lib:to_binary(Name)),
QTypeState0#{nodes := Nodes#{node() => NewRaUId}}
end,
Q = amqqueue:set_type_state(Q0, QTypeState),
Res = case ra:restart_server(?RA_SYSTEM, ServerId, MutConf) of
ok ->
% queue was restarted, good
Expand All @@ -803,7 +839,7 @@ recover(_Vhost, Queues) ->
[rabbit_misc:rs(QName), Err1]),
% queue was never started on this node
% so needs to be started from scratch.
case start_server(make_ra_conf(Q0, ServerId)) of
case start_server(make_ra_conf(Q, ServerId)) of
ok -> ok;
Err2 ->
rabbit_log:warning("recover: quorum queue ~w could not"
Expand All @@ -825,8 +861,7 @@ recover(_Vhost, Queues) ->
%% present in the rabbit_queue table and not just in
%% rabbit_durable_queue
%% So many code paths are dependent on this.
ok = rabbit_db_queue:set_dirty(Q0),
Q = Q0,
ok = rabbit_db_queue:set_dirty(Q),
case Res of
ok ->
{[Q | R0], F0};
Expand Down Expand Up @@ -1207,12 +1242,17 @@ cleanup_data_dir() ->
maybe_delete_data_dir(UId) ->
_ = ra_directory:unregister_name(?RA_SYSTEM, UId),
Dir = ra_env:server_data_dir(?RA_SYSTEM, UId),
{ok, Config} = ra_log:read_config(Dir),
case maps:get(machine, Config) of
{module, rabbit_fifo, _} ->
ra_lib:recursive_delete(Dir);
_ ->
ok
case filelib:is_dir(Dir) of
false ->
ok;
true ->
{ok, Config} = ra_log:read_config(Dir),
case maps:get(machine, Config) of
{module, rabbit_fifo, _} ->
ra_lib:recursive_delete(Dir);
_ ->
ok
end
end.

policy_changed(Q) ->
Expand Down Expand Up @@ -1378,16 +1418,29 @@ add_member(Q, Node, Membership) ->
do_add_member(Q, Node, Membership, ?MEMBER_CHANGE_TIMEOUT).


do_add_member(Q, Node, Membership, Timeout)
when ?is_amqqueue(Q) andalso
?amqqueue_is_quorum(Q) andalso
do_add_member(Q0, Node, Membership, Timeout)
when ?is_amqqueue(Q0) andalso
?amqqueue_is_quorum(Q0) andalso
is_atom(Node) ->
{RaName, _} = amqqueue:get_pid(Q),
QName = amqqueue:get_name(Q),
{RaName, _} = amqqueue:get_pid(Q0),
QName = amqqueue:get_name(Q0),
%% TODO parallel calls might crash this, or add a duplicate in quorum_nodes
ServerId = {RaName, Node},
Members = members(Q),

Members = members(Q0),
QTypeState0 = #{nodes := _Nodes}= amqqueue:get_type_state(Q0),
NewRaUId = ra:new_uid(ra_lib:to_binary(RaName)),
%QTypeState = case Nodes of
% L when is_list(L) ->
% %% Queue is not aware of node to uid mapping, just add the new node
% QTypeState0#{nodes := lists:usort([Node | Nodes])};
% #{Node := _} ->
% %% Queue is aware and uid for targeted node exists, do nothing
% QTypeState0;
% _ ->
% %% Queue is aware but current node has no UId, regen uid
% QTypeState0#{nodes := Nodes#{Node => NewRaUId}}
%end,
Q = amqqueue:set_type_state(Q0, QTypeState0),
MachineVersion = erpc_call(Node, rabbit_fifo, version, [], infinity),
Conf = make_ra_conf(Q, ServerId, Membership, MachineVersion),
case ra:start_server(?RA_SYSTEM, Conf) of
Expand All @@ -1403,8 +1456,12 @@ do_add_member(Q, Node, Membership, Timeout)
{ok, {RaIndex, RaTerm}, Leader} ->
Fun = fun(Q1) ->
Q2 = update_type_state(
Q1, fun(#{nodes := Nodes} = Ts) ->
Ts#{nodes => lists:usort([Node | Nodes])}
Q1, fun(#{nodes := NodesList} = Ts) when is_list(NodesList) ->
Ts#{nodes => lists:usort([Node | NodesList])};
(#{nodes := #{Node := _} = _NodesMap} = Ts) ->
Ts;
(#{nodes := NodesMap} = Ts) when is_map(NodesMap) ->
Ts#{nodes => maps:put(Node, NewRaUId, NodesMap)}
end),
amqqueue:set_pid(Q2, Leader)
end,
Expand Down Expand Up @@ -1477,8 +1534,10 @@ delete_member(Q, Node) when ?amqqueue_is_quorum(Q) ->
Fun = fun(Q1) ->
update_type_state(
Q1,
fun(#{nodes := Nodes} = Ts) ->
Ts#{nodes => lists:delete(Node, Nodes)}
fun(#{nodes := Nodes} = Ts) when is_list(Nodes) ->
Ts#{nodes => lists:delete(Node, Nodes)};
(#{nodes := Nodes} = Ts) when is_map(Nodes) ->
Ts#{nodes => maps:remove(Node, Nodes)}
end)
end,
_ = rabbit_amqqueue:update(QName, Fun),
Expand Down Expand Up @@ -1986,7 +2045,15 @@ make_ra_conf(Q, ServerId, TickTimeout,
QName = amqqueue:get_name(Q),
RaMachine = ra_machine(Q),
[{ClusterName, _} | _] = Members = members(Q),
UId = ra:new_uid(ra_lib:to_binary(ClusterName)),
{_, Node} = ServerId,
UId = case amqqueue:get_type_state(Q) of
#{uids := #{Node := Id}} ->
Id;
_ ->
%% Queue was declared on an older version of RabbitMQ
%% and does not have the node to uid mappings
ra:new_uid(ra_lib:to_binary(ClusterName))
end,
FName = rabbit_misc:rs(QName),
Formatter = {?MODULE, format_ra_event, [QName]},
LogCfg = #{uid => UId,
Expand Down Expand Up @@ -2016,7 +2083,12 @@ make_mutable_config(Q) ->

get_nodes(Q) when ?is_amqqueue(Q) ->
#{nodes := Nodes} = amqqueue:get_type_state(Q),
Nodes.
case Nodes of
List when is_list(List) ->
List;
Map when is_map(Map) ->
maps:keys(Map)
end.

get_connected_nodes(Q) when ?is_amqqueue(Q) ->
ErlangNodes = [node() | nodes()],
Expand Down Expand Up @@ -2125,7 +2197,7 @@ force_checkpoint_on_queue(QName) ->
{ok, Q} when ?amqqueue_is_quorum(Q) ->
{RaName, _} = amqqueue:get_pid(Q),
rabbit_log:debug("Sending command to force ~ts to take a checkpoint", [QNameFmt]),
Nodes = amqqueue:get_nodes(Q),
Nodes = rabbit_amqqueue:get_nodes(Q),
_ = [ra:cast_aux_command({RaName, Node}, force_checkpoint)
|| Node <- Nodes],
ok;
Expand Down Expand Up @@ -2372,3 +2444,8 @@ queue_vm_stats_sups() ->
queue_vm_ets() ->
{[quorum_ets],
[[ra_log_ets]]}.

has_uuid_tracking(#{nodes := Nodes}) when is_map(Nodes) ->
true;
has_uuid_tracking(_QTypeState) ->
false.
4 changes: 2 additions & 2 deletions deps/rabbit/src/rabbit_stream_coordinator.erl
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ stop() ->

new_stream(Q, LeaderNode)
when ?is_amqqueue(Q) andalso is_atom(LeaderNode) ->
#{name := StreamId,
nodes := Nodes} = amqqueue:get_type_state(Q),
#{name := StreamId} = amqqueue:get_type_state(Q),
Nodes = rabbit_amqqueue:get_nodes(Q),
%% assertion leader is in nodes configuration
true = lists:member(LeaderNode, Nodes),
process_command({new_stream, StreamId,
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy