Skip to content

Commit 0b54b39

Browse files
committed
amcheck: Fix checks of entry order for GIN indexes
This tightens a couple checks in checking GIN indexes, which might have resulted in incorrect results (false positives/negatives). * The code skipped ordering checks if the entries were for different attributes (for multi-column GIN indexes), possibly missing some cases of data corruption. But the attribute number is part of the ordering, so we can check that. * The root page was skipped when checking entry order, but that is unnecessary. The root page is subject to the same ordering rules, we can process it just like any other page. * The high key on the right-most page was not checked, but that is needed only for inner pages (we don't store the high key for those). For leaf pages we can check the high key just fine. * Correct the detection of split pages. If the page gets split, the cached parent key is greater than the current child key (not less, as the code incorrectly expected). Issues reported by Arseniy Mukhin, along with a proposed patch. Review by Andrey M. Borodin, cleanup and improvements by me. Author: Arseniy Mukhin <arseniy.mukhin.dev@gmail.com> Reviewed-by: Andrey M. Borodin <x4mmm@yandex-team.ru> Discussion: https://postgr.es/m/CAE7r3MJ611B9TE=YqBBncewp7-k64VWs+sjk7XF6fJUX77uFBA@mail.gmail.com
1 parent 8dd41c0 commit 0b54b39

File tree

3 files changed

+229
-24
lines changed

3 files changed

+229
-24
lines changed

contrib/amcheck/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ tests += {
4949
't/003_cic_2pc.pl',
5050
't/004_verify_nbtree_unique.pl',
5151
't/005_pitr.pl',
52+
't/006_verify_gin.pl',
5253
],
5354
},
5455
}

contrib/amcheck/t/006_verify_gin.pl

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
2+
# Copyright (c) 2021-2025, PostgreSQL Global Development Group
3+
4+
use strict;
5+
use warnings FATAL => 'all';
6+
7+
use PostgreSQL::Test::Cluster;
8+
use PostgreSQL::Test::Utils;
9+
10+
use Test::More;
11+
12+
my $node;
13+
my $blksize;
14+
15+
# to get the split fast, we want tuples to be as large as possible, but the same time we don't want them to be toasted.
16+
my $filler_size = 1900;
17+
18+
#
19+
# Test set-up
20+
#
21+
$node = PostgreSQL::Test::Cluster->new('test');
22+
$node->init(no_data_checksums => 1);
23+
$node->append_conf('postgresql.conf', 'autovacuum=off');
24+
$node->start;
25+
$blksize = int($node->safe_psql('postgres', 'SHOW block_size;'));
26+
$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
27+
$node->safe_psql(
28+
'postgres', q(
29+
CREATE OR REPLACE FUNCTION random_string( INT ) RETURNS text AS $$
30+
SELECT string_agg(substring('0123456789abcdefghijklmnopqrstuvwxyz', ceil(random() * 36)::integer, 1), '') from generate_series(1, $1);
31+
$$ LANGUAGE SQL;));
32+
33+
# Tests
34+
invalid_entry_order_leaf_page_test();
35+
invalid_entry_order_inner_page_test();
36+
invalid_entry_columns_order_test();
37+
38+
sub invalid_entry_order_leaf_page_test
39+
{
40+
my $relname = "test";
41+
my $indexname = "test_gin_idx";
42+
43+
$node->safe_psql(
44+
'postgres', qq(
45+
DROP TABLE IF EXISTS $relname;
46+
CREATE TABLE $relname (a text[]);
47+
INSERT INTO $relname (a) VALUES ('{aaaaa,bbbbb}');
48+
CREATE INDEX $indexname ON $relname USING gin (a);
49+
));
50+
my $relpath = relation_filepath($indexname);
51+
52+
$node->stop;
53+
54+
my $blkno = 1; # root
55+
56+
# produce wrong order by replacing aaaaa with ccccc
57+
string_replace_block(
58+
$relpath,
59+
'aaaaa',
60+
'ccccc',
61+
$blkno
62+
);
63+
64+
$node->start;
65+
66+
my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname')));
67+
my $expected = "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295";
68+
like($stderr, qr/$expected/);
69+
}
70+
71+
sub invalid_entry_order_inner_page_test
72+
{
73+
my $relname = "test";
74+
my $indexname = "test_gin_idx";
75+
76+
# to break the order in the inner page we need at least 3 items (rightmost key in the inner level is not checked for the order)
77+
# so fill table until we have 2 splits
78+
$node->safe_psql(
79+
'postgres', qq(
80+
DROP TABLE IF EXISTS $relname;
81+
CREATE TABLE $relname (a text[]);
82+
INSERT INTO $relname (a) VALUES (('{' || 'pppppppppp' || random_string($filler_size) ||'}')::text[]);
83+
INSERT INTO $relname (a) VALUES (('{' || 'qqqqqqqqqq' || random_string($filler_size) ||'}')::text[]);
84+
INSERT INTO $relname (a) VALUES (('{' || 'rrrrrrrrrr' || random_string($filler_size) ||'}')::text[]);
85+
INSERT INTO $relname (a) VALUES (('{' || 'ssssssssss' || random_string($filler_size) ||'}')::text[]);
86+
INSERT INTO $relname (a) VALUES (('{' || 'tttttttttt' || random_string($filler_size) ||'}')::text[]);
87+
INSERT INTO $relname (a) VALUES (('{' || 'uuuuuuuuuu' || random_string($filler_size) ||'}')::text[]);
88+
INSERT INTO $relname (a) VALUES (('{' || 'vvvvvvvvvv' || random_string($filler_size) ||'}')::text[]);
89+
INSERT INTO $relname (a) VALUES (('{' || 'wwwwwwwwww' || random_string($filler_size) ||'}')::text[]);
90+
CREATE INDEX $indexname ON $relname USING gin (a);
91+
));
92+
my $relpath = relation_filepath($indexname);
93+
94+
$node->stop;
95+
96+
my $blkno = 1; # root
97+
98+
# we have rrrrrrrrr... and tttttttttt... as keys in the root, so produce wrong order by replacing rrrrrrrrrr....
99+
string_replace_block(
100+
$relpath,
101+
'rrrrrrrrrr',
102+
'zzzzzzzzzz',
103+
$blkno
104+
);
105+
106+
$node->start;
107+
108+
my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname')));
109+
my $expected = "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295";
110+
like($stderr, qr/$expected/);
111+
}
112+
113+
sub invalid_entry_columns_order_test
114+
{
115+
my $relname = "test";
116+
my $indexname = "test_gin_idx";
117+
118+
$node->safe_psql(
119+
'postgres', qq(
120+
DROP TABLE IF EXISTS $relname;
121+
CREATE TABLE $relname (a text[],b text[]);
122+
INSERT INTO $relname (a,b) VALUES ('{aaa}','{bbb}');
123+
CREATE INDEX $indexname ON $relname USING gin (a,b);
124+
));
125+
my $relpath = relation_filepath($indexname);
126+
127+
$node->stop;
128+
129+
my $blkno = 1; # root
130+
131+
# mess column numbers
132+
# root items order before: (1,aaa), (2,bbb)
133+
# root items order after: (2,aaa), (1,bbb)
134+
my $attrno_1 = pack('s', 1);
135+
my $attrno_2 = pack('s', 2);
136+
137+
my $find = qr/($attrno_1)(.)(aaa)/s;
138+
my $replace = $attrno_2 . '$2$3';
139+
string_replace_block(
140+
$relpath,
141+
$find,
142+
$replace,
143+
$blkno
144+
);
145+
146+
$find = qr/($attrno_2)(.)(bbb)/s;
147+
$replace = $attrno_1 . '$2$3';
148+
string_replace_block(
149+
$relpath,
150+
$find,
151+
$replace,
152+
$blkno
153+
);
154+
155+
$node->start;
156+
157+
my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname')));
158+
my $expected = "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295";
159+
like($stderr, qr/$expected/);
160+
}
161+
162+
# Returns the filesystem path for the named relation.
163+
sub relation_filepath
164+
{
165+
my ($relname) = @_;
166+
167+
my $pgdata = $node->data_dir;
168+
my $rel = $node->safe_psql('postgres',
169+
qq(SELECT pg_relation_filepath('$relname')));
170+
die "path not found for relation $relname" unless defined $rel;
171+
return "$pgdata/$rel";
172+
}
173+
174+
# substitute pattern 'find' with 'replace' within the block with number 'blkno' in the file 'filename'
175+
sub string_replace_block
176+
{
177+
my ($filename, $find, $replace, $blkno) = @_;
178+
179+
my $fh;
180+
open($fh, '+<', $filename) or BAIL_OUT("open failed: $!");
181+
binmode $fh;
182+
183+
my $offset = $blkno * $blksize;
184+
my $buffer;
185+
186+
sysseek($fh, $offset, 0) or BAIL_OUT("seek failed: $!");
187+
sysread($fh, $buffer, $blksize) or BAIL_OUT("read failed: $!");
188+
189+
$buffer =~ s/$find/'"' . $replace . '"'/gee;
190+
191+
sysseek($fh, $offset, 0) or BAIL_OUT("seek failed: $!");
192+
syswrite($fh, $buffer) or BAIL_OUT("write failed: $!");
193+
194+
close($fh) or BAIL_OUT("close failed: $!");
195+
196+
return;
197+
}
198+
199+
done_testing();

contrib/amcheck/verify_gin.c

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -459,17 +459,18 @@ gin_check_parent_keys_consistency(Relation rel,
459459
Datum parent_key = gintuple_get_key(&state,
460460
stack->parenttup,
461461
&parent_key_category);
462+
OffsetNumber parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup);
462463
ItemId iid = PageGetItemIdCareful(rel, stack->blkno,
463464
page, maxoff);
464465
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
465-
OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
466+
OffsetNumber page_max_key_attnum = gintuple_get_attrnum(&state, idxtuple);
466467
GinNullCategory page_max_key_category;
467468
Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category);
468469

469470
if (rightlink != InvalidBlockNumber &&
470-
ginCompareEntries(&state, attnum, page_max_key,
471-
page_max_key_category, parent_key,
472-
parent_key_category) > 0)
471+
ginCompareAttEntries(&state, page_max_key_attnum, page_max_key,
472+
page_max_key_category, parent_key_attnum,
473+
parent_key, parent_key_category) < 0)
473474
{
474475
/* split page detected, install right link to the stack */
475476
GinScanItem *ptr;
@@ -508,9 +509,7 @@ gin_check_parent_keys_consistency(Relation rel,
508509
{
509510
ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
510511
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
511-
OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
512-
GinNullCategory prev_key_category;
513-
Datum prev_key;
512+
OffsetNumber current_attnum = gintuple_get_attrnum(&state, idxtuple);
514513
GinNullCategory current_key_category;
515514
Datum current_key;
516515

@@ -523,20 +522,24 @@ gin_check_parent_keys_consistency(Relation rel,
523522
current_key = gintuple_get_key(&state, idxtuple, &current_key_category);
524523

525524
/*
526-
* First block is metadata, skip order check. Also, never check
527-
* for high key on rightmost page, as this key is not really
528-
* stored explicitly.
525+
* Compare the entry to the preceding one.
529526
*
530-
* Also make sure to not compare entries for different attnums,
531-
* which may be stored on the same page.
527+
* Don't check for high key on the rightmost inner page, as this
528+
* key is not really stored explicitly.
529+
*
530+
* The entries may be for different attributes, so make sure to
531+
* use ginCompareAttEntries for comparison.
532532
*/
533-
if (i != FirstOffsetNumber && attnum == prev_attnum && stack->blkno != GIN_ROOT_BLKNO &&
534-
!(i == maxoff && rightlink == InvalidBlockNumber))
533+
if ((i != FirstOffsetNumber) &&
534+
!(i == maxoff && rightlink == InvalidBlockNumber && !GinPageIsLeaf(page)))
535535
{
536+
Datum prev_key;
537+
GinNullCategory prev_key_category;
538+
536539
prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category);
537-
if (ginCompareEntries(&state, attnum, prev_key,
538-
prev_key_category, current_key,
539-
current_key_category) >= 0)
540+
if (ginCompareAttEntries(&state, prev_attnum, prev_key,
541+
prev_key_category, current_attnum,
542+
current_key, current_key_category) >= 0)
540543
ereport(ERROR,
541544
(errcode(ERRCODE_INDEX_CORRUPTED),
542545
errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u",
@@ -551,13 +554,14 @@ gin_check_parent_keys_consistency(Relation rel,
551554
i == maxoff)
552555
{
553556
GinNullCategory parent_key_category;
557+
OffsetNumber parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup);
554558
Datum parent_key = gintuple_get_key(&state,
555559
stack->parenttup,
556560
&parent_key_category);
557561

558-
if (ginCompareEntries(&state, attnum, current_key,
559-
current_key_category, parent_key,
560-
parent_key_category) > 0)
562+
if (ginCompareAttEntries(&state, current_attnum, current_key,
563+
current_key_category, parent_key_attnum,
564+
parent_key, parent_key_category) > 0)
561565
{
562566
/*
563567
* There was a discrepancy between parent and child
@@ -576,6 +580,7 @@ gin_check_parent_keys_consistency(Relation rel,
576580
stack->blkno, stack->parentblk);
577581
else
578582
{
583+
parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup);
579584
parent_key = gintuple_get_key(&state,
580585
stack->parenttup,
581586
&parent_key_category);
@@ -584,9 +589,9 @@ gin_check_parent_keys_consistency(Relation rel,
584589
* Check if it is properly adjusted. If succeed,
585590
* proceed to the next key.
586591
*/
587-
if (ginCompareEntries(&state, attnum, current_key,
588-
current_key_category, parent_key,
589-
parent_key_category) > 0)
592+
if (ginCompareAttEntries(&state, current_attnum, current_key,
593+
current_key_category, parent_key_attnum,
594+
parent_key, parent_key_category) > 0)
590595
ereport(ERROR,
591596
(errcode(ERRCODE_INDEX_CORRUPTED),
592597
errmsg("index \"%s\" has inconsistent records on page %u offset %u",
@@ -638,7 +643,7 @@ gin_check_parent_keys_consistency(Relation rel,
638643
}
639644

640645
prev_tuple = CopyIndexTuple(idxtuple);
641-
prev_attnum = attnum;
646+
prev_attnum = current_attnum;
642647
}
643648

644649
LockBuffer(buffer, GIN_UNLOCK);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy