Skip to content

Commit 2a6fe6d

Browse files
committed
Publish aggregated/per-object/detailed RAFT metrics
For aggregated metrics, we just pick specific metrics (currently num_segments and commit_latency) and only publish the maximum value, without labels (`max_` is added to the metric name). For example: ``` > curl -s localhost:15692/metrics/per-object | rg -e ^rabbitmq_raft_num_segments -e ^rabbitmq_raft_commit_latency rabbitmq_raft_commit_latency_seconds{module="rabbit_khepri",ra_system="coordination"} 0.0 rabbitmq_raft_commit_latency_seconds{queue="qq2",vhost="/"} 0.02 rabbitmq_raft_commit_latency_seconds{queue="qqq-1",vhost="/"} 0.01 rabbitmq_raft_commit_latency_seconds{queue="qqq-2",vhost="/"} 0.0 rabbitmq_raft_num_segments{module="rabbit_khepri",ra_system="coordination"} 1.0 rabbitmq_raft_num_segments{queue="qq2",vhost="/"} 132.0 rabbitmq_raft_num_segments{queue="qqq-2",vhost="/"} 245.0 > curl -s localhost:15692/metrics/ | rg ^rabbitmq_raft_max rabbitmq_raft_max_commit_latency_seconds 0.02 rabbitmq_raft_max_num_segments 245.0 ```
1 parent 0193f44 commit 2a6fe6d

File tree

3 files changed

+145
-19
lines changed

3 files changed

+145
-19
lines changed

deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_raft_metrics_collector.erl

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
-module(prometheus_rabbitmq_raft_metrics_collector).
88

99
-behaviour(prometheus_collector).
10-
-include_lib("prometheus/include/prometheus.hrl").
1110

1211
-export([register/0,
1312
deregister_cleanup/1,
@@ -16,7 +15,8 @@
1615
-import(prometheus_model_helpers, [create_mf/4,
1716
counter_metric/2]).
1817

19-
-define(METRIC_NAME_PREFIX, "rabbitmq_raft_").
18+
-define(METRIC_NAME_PREFIX, <<"rabbitmq_raft_">>).
19+
-define(DETAILED_METRIC_NAME_PREFIX, <<"rabbitmq_raft_detailed_">>).
2020

2121
%%====================================================================
2222
%% Collector API
@@ -28,19 +28,108 @@ register() ->
2828
deregister_cleanup(_) ->
2929
ok.
3030

31+
collect_mf('per-object', Callback) ->
32+
collect_per_object_metrics(?METRIC_NAME_PREFIX, Callback);
33+
collect_mf('detailed', Callback) ->
34+
case get(prometheus_mf_filter) of
35+
undefined ->
36+
ok;
37+
MFNames ->
38+
case lists:member(raft_metrics, MFNames) of
39+
true ->
40+
collect_detailed_metrics(?DETAILED_METRIC_NAME_PREFIX, Callback);
41+
false ->
42+
ok
43+
end
44+
end;
3145
collect_mf(_Registry, Callback) ->
46+
case application:get_env(rabbitmq_prometheus, return_per_object_metrics, false) of
47+
false ->
48+
collect_aggregate_metrics(?METRIC_NAME_PREFIX, Callback);
49+
true ->
50+
collect_per_object_metrics(?METRIC_NAME_PREFIX, Callback)
51+
end.
52+
53+
%% INTERNAL
54+
55+
collect_aggregate_metrics(Prefix, Callback) ->
56+
collect_max_values(Prefix, Callback),
57+
collect_key_component_metrics(Prefix, Callback).
58+
59+
collect_per_object_metrics(Prefix, Callback) ->
60+
collect_key_component_metrics(Prefix, Callback),
61+
collect_key_per_object_metrics(?METRIC_NAME_PREFIX, Callback).
62+
63+
collect_detailed_metrics(Prefix, Callback) ->
64+
VHostFilterFun = case get(prometheus_vhost_filter) of
65+
undefined ->
66+
fun(_) -> true end;
67+
VHosts ->
68+
fun(VHost) -> lists:member(VHost, VHosts) end
69+
end,
70+
collect_all_matching_metrics(Prefix, Callback, VHostFilterFun).
71+
72+
collect_key_per_object_metrics(Prefix, Callback) ->
3273
maps:foreach(
3374
fun(Name, #{type := Type, help := Help, values := Values}) ->
3475
Callback(
35-
create_mf(?METRIC_NAME(Name),
76+
create_mf(<<Prefix/binary, (prometheus_model_helpers:metric_name(Name))/binary>>,
3677
Help,
3778
Type,
3879
Values))
3980
end,
4081
seshat:format(ra, [term,
41-
snapshot_index,
42-
last_applied,
43-
commit_index,
44-
last_written_index,
45-
commit_latency,
46-
num_segments])).
82+
snapshot_index,
83+
last_applied,
84+
commit_index,
85+
last_written_index,
86+
commit_latency,
87+
num_segments])).
88+
89+
collect_all_matching_metrics(Prefix, Callback, VHostFilterFun) ->
90+
maps:foreach(
91+
fun(Name, #{type := Type, help := Help, values := Values0}) ->
92+
Values = maps:filter(fun(#{vhost := V}, _) ->
93+
VHostFilterFun(V);
94+
(_, _) -> true
95+
end, Values0),
96+
Callback(
97+
create_mf(<<Prefix/binary, (prometheus_model_helpers:metric_name(Name))/binary>>,
98+
Help,
99+
Type,
100+
Values))
101+
end,
102+
seshat:format(ra)).
103+
104+
collect_max_values(Prefix, Callback) ->
105+
%% max values for QQ metrics
106+
%% eg.
107+
%% rabbitmq_raft_num_segments{queue="q1",vhost="/"} 5.0
108+
%% rabbitmq_raft_num_segments{queue="q2",vhost="/"} 10.0
109+
%% becomes
110+
%% rabbitmq_raft_max_num_segments 10.0
111+
QQMetrics = [num_segments],
112+
maps:foreach(
113+
fun(Name, #{type := Type, help := Help, values := Values}) ->
114+
Max = lists:max(maps:values(Values)),
115+
Callback(
116+
create_mf(<<Prefix/binary, "max_", (prometheus_model_helpers:metric_name(Name))/binary>>,
117+
Help,
118+
Type,
119+
#{#{} => Max}))
120+
121+
end,
122+
seshat:format(ra, QQMetrics)).
123+
124+
collect_key_component_metrics(Prefix, Callback) ->
125+
WALMetrics = [wal_files, bytes_written, mem_tables],
126+
SegmentWriterMetrics = [entries, segments],
127+
maps:foreach(
128+
fun(Name, #{type := Type, help := Help, values := Values}) ->
129+
Callback(
130+
create_mf(<<Prefix/binary, (prometheus_model_helpers:metric_name(Name))/binary>>,
131+
Help,
132+
Type,
133+
Values))
134+
end,
135+
seshat:format(ra, WALMetrics ++ SegmentWriterMetrics)).

deps/rabbitmq_prometheus/src/rabbit_prometheus_dispatcher.erl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ build_dispatcher() ->
1717
prometheus_rabbitmq_core_metrics_collector,
1818
prometheus_rabbitmq_global_metrics_collector,
1919
prometheus_rabbitmq_message_size_metrics_collector,
20+
prometheus_rabbitmq_raft_metrics_collector,
2021
prometheus_rabbitmq_alarm_metrics_collector,
2122
prometheus_rabbitmq_dynamic_collector,
2223
prometheus_process_collector],
@@ -26,8 +27,7 @@ build_dispatcher() ->
2627
prometheus_vm_memory_collector,
2728
prometheus_mnesia_collector,
2829
prometheus_vm_statistics_collector,
29-
prometheus_vm_msacc_collector,
30-
prometheus_rabbitmq_raft_metrics_collector
30+
prometheus_vm_msacc_collector
3131
],
3232
prometheus_registry:register_collectors(
3333
case application:get_env(rabbitmq_prometheus, return_per_object_metrics, fasle) of
@@ -38,7 +38,8 @@ build_dispatcher() ->
3838
prometheus_registry:register_collectors('per-object',
3939
CoreCollectors ++ PerObjectCollectors),
4040
prometheus_registry:register_collectors('detailed', [
41-
prometheus_rabbitmq_core_metrics_collector
41+
prometheus_rabbitmq_core_metrics_collector,
42+
prometheus_rabbitmq_raft_metrics_collector
4243
]),
4344
prometheus_registry:register_collectors('memory-breakdown', [
4445
prometheus_rabbitmq_core_metrics_collector

deps/rabbitmq_prometheus/test/rabbit_prometheus_http_SUITE.erl

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ groups() ->
7272
vhost_status_metric,
7373
exchange_bindings_metric,
7474
exchange_names_metric,
75-
stream_pub_sub_metrics
75+
stream_pub_sub_metrics,
76+
raft_metrics_per_object_test
7677
]},
7778
{special_chars, [], [core_metrics_special_chars]},
7879
{authentication, [], [basic_auth]}
@@ -158,6 +159,12 @@ init_per_group(detailed_metrics, Config0) ->
158159
Q <- [ <<"queue-with-messages">>, <<"queue-with-consumer">> ]
159160
],
160161

162+
amqp_channel:call(DefaultCh,
163+
#'queue.declare'{queue = <<"a_quorum_queue">>,
164+
durable = true,
165+
arguments = [{<<"x-queue-type">>, longstr, <<"quorum">>}]
166+
}),
167+
161168
DefaultConsumer = sleeping_consumer(),
162169
#'basic.consume_ok'{consumer_tag = DefaultCTag} =
163170
amqp_channel:subscribe(DefaultCh, #'basic.consume'{queue = <<"default-queue-with-consumer">>}, DefaultConsumer),
@@ -400,7 +407,14 @@ aggregated_metrics_test(Config) ->
400407
?assertEqual(match, re:run(Body, "^rabbitmq_erlang_uptime_seconds ", [{capture, none}, multiline])),
401408
?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
402409
%% Check the first TOTALS metric value
403-
?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).
410+
?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])),
411+
?assertEqual(nomatch, re:run(Body, "^rabbitmq_raft_commit_latency_seconds", [{capture, none}, multiline])),
412+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_bytes_written.*ra_log_segment_writer", [{capture, none}, multiline])),
413+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_bytes_written.*ra_log_wal", [{capture, none}, multiline])),
414+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_entries{", [{capture, none}, multiline])),
415+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_mem_tables{", [{capture, none}, multiline])),
416+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_segments{", [{capture, none}, multiline])),
417+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_wal_files{", [{capture, none}, multiline])).
404418

405419
endpoint_per_object_metrics(Config) ->
406420
per_object_metrics_test(Config, "/metrics/per-object").
@@ -438,7 +452,8 @@ per_object_metrics_test(Config, Path) ->
438452
?assertEqual(match, re:run(Body, "^rabbitmq_io_read_time_seconds_total ", [{capture, none}, multiline])),
439453
?assertEqual(match, re:run(Body, "^rabbitmq_raft_commit_latency_seconds{", [{capture, none}, multiline])),
440454
%% Check the first TOTALS metric value
441-
?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])).
455+
?assertEqual(match, re:run(Body, "^rabbitmq_connections ", [{capture, none}, multiline])),
456+
?assertEqual(match, re:run(Body, "^rabbitmq_raft_num_segments{", [{capture, none}, multiline])).
442457

443458
memory_breakdown_metrics_test(Config) ->
444459
{_Headers, Body} = http_get_with_pal(Config, "/metrics/memory-breakdown", [], 200),
@@ -552,7 +567,8 @@ queue_consumer_count_all_vhosts_per_object_test(Config) ->
552567
#{queue => "vhost-2-queue-with-consumer",vhost => "vhost-2"} => [1],
553568
#{queue => "vhost-2-queue-with-messages",vhost => "vhost-2"} => [0],
554569
#{queue => "default-queue-with-consumer",vhost => "/"} => [1],
555-
#{queue => "default-queue-with-messages",vhost => "/"} => [0]},
570+
#{queue => "default-queue-with-messages",vhost => "/"} => [0],
571+
#{queue => "a_quorum_queue",vhost => "/"} => [0]},
556572

557573
rabbitmq_detailed_queue_info =>
558574
#{#{queue => "default-queue-with-consumer",
@@ -578,7 +594,10 @@ queue_consumer_count_all_vhosts_per_object_test(Config) ->
578594
#{queue => "vhost-2-queue-with-messages",
579595
vhost => "vhost-2",
580596
queue_type => "rabbit_classic_queue",
581-
membership => "leader"} => [1]}
597+
membership => "leader"} => [1],
598+
#{membership => "leader",
599+
queue => "a_quorum_queue",vhost => "/",
600+
queue_type => "rabbit_quorum_queue"} => [1]}
582601
},
583602

584603
%% No vhost given, all should be returned
@@ -596,7 +615,8 @@ queue_coarse_metrics_per_object_test(Config) ->
596615
Expected2 = #{#{queue => "vhost-2-queue-with-consumer", vhost => "vhost-2"} => [11],
597616
#{queue => "vhost-2-queue-with-messages", vhost => "vhost-2"} => [11]},
598617
ExpectedD = #{#{queue => "default-queue-with-consumer", vhost => "/"} => [3],
599-
#{queue => "default-queue-with-messages", vhost => "/"} => [3]},
618+
#{queue => "default-queue-with-messages", vhost => "/"} => [3],
619+
#{queue => "a_quorum_queue",vhost => "/"} => [0]},
600620

601621
{_, Body1} = http_get_with_pal(Config, "/metrics/detailed?vhost=vhost-1&family=queue_coarse_metrics", [], 200),
602622
?assertEqual(Expected1,
@@ -704,7 +724,8 @@ queue_metrics_per_object_test(Config) ->
704724
Expected2 = #{#{queue => "vhost-2-queue-with-consumer", vhost => "vhost-2"} => [11],
705725
#{queue => "vhost-2-queue-with-messages", vhost => "vhost-2"} => [1]},
706726
ExpectedD = #{#{queue => "default-queue-with-consumer", vhost => "/"} => [3],
707-
#{queue => "default-queue-with-messages", vhost => "/"} => [1]},
727+
#{queue => "default-queue-with-messages", vhost => "/"} => [1],
728+
#{queue => "a_quorum_queue",vhost => "/"} => [0]},
708729
{_, Body1} = http_get_with_pal(Config, "/metrics/detailed?vhost=vhost-1&family=queue_metrics", [], 200),
709730
?assertEqual(Expected1,
710731
map_get(rabbitmq_detailed_queue_messages_ram, parse_response(Body1))),
@@ -835,6 +856,21 @@ core_metrics_special_chars(Config) ->
835856
maps:to_list(LabelValue3)),
836857
ok.
837858

859+
raft_metrics_per_object_test(Config) ->
860+
Expected1 = #{#{module => "rabbit_khepri", ra_system => "coordination"} => ["0.0"],
861+
#{module => "rabbit_stream_coordinator", ra_system => "coordination"} => ["0.0"]},
862+
Expected2 = Expected1#{#{queue => "a_quorum_queue", vhost => "/"} => ["0.0"]},
863+
864+
{_, Body1} = http_get_with_pal(Config, "/metrics/detailed?family=raft_metrics&vhost=foo", [], 200),
865+
?assertEqual(Expected1,
866+
map_get(rabbitmq_raft_detailed_num_segments, parse_response(Body1))),
867+
868+
{_, Body2} = http_get_with_pal(Config, "/metrics/detailed?family=raft_metrics&vhost=/", [], 200),
869+
?assertEqual(Expected2,
870+
map_get(rabbitmq_raft_detailed_num_segments, parse_response(Body2))),
871+
872+
ok.
873+
838874
basic_auth(Config) ->
839875
http_get(Config, [{"accept-encoding", "deflate"}], 401),
840876
AuthHeader = rabbit_mgmt_test_util:auth_header("guest", "guest"),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy