Skip to content

Commit c46c803

Browse files
committed
Fix relfilenodemap.c's handling of cache invalidations.
The old code entered a new hash table entry first, then scanned pg_class to determine what value to fill in, and then populated the entry. This fails to work properly if a cache invalidation happens as a result of opening pg_class. Repair. Along the way, get rid of the idea of blowing away the entire hash table as a method of processing invalidations. Instead, just delete all the entries one by one. This is probably not quite as cheap but it's simpler, and shouldn't happen often. Andres Freund
1 parent cd8115e commit c46c803

File tree

1 file changed

+100
-89
lines changed

1 file changed

+100
-89
lines changed

src/backend/utils/cache/relfilenodemap.c

Lines changed: 100 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,20 @@ RelfilenodeMapInvalidateCallback(Datum arg, Oid relid)
5757
HASH_SEQ_STATUS status;
5858
RelfilenodeMapEntry *entry;
5959

60-
/* nothing to do if not active or deleted */
61-
if (RelfilenodeMapHash == NULL)
62-
return;
63-
64-
/* if relid is InvalidOid, we must invalidate the entire cache */
65-
if (relid == InvalidOid)
66-
{
67-
hash_destroy(RelfilenodeMapHash);
68-
RelfilenodeMapHash = NULL;
69-
return;
70-
}
60+
/* callback only gets registered after creating the hash */
61+
Assert(RelfilenodeMapHash != NULL);
7162

7263
hash_seq_init(&status, RelfilenodeMapHash);
7364
while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL)
7465
{
75-
/* Same OID may occur in more than one tablespace. */
76-
if (entry->relid == relid)
66+
/*
67+
* If relid is InvalidOid, signalling a complete reset, we must remove
68+
* all entries, otherwise just remove the specific relation's entry.
69+
* Always remove negative cache entries.
70+
*/
71+
if (relid == InvalidOid || /* complete reset */
72+
entry->relid == InvalidOid || /* negative cache entry */
73+
entry->relid == relid) /* individual flushed relation */
7774
{
7875
if (hash_search(RelfilenodeMapHash,
7976
(void *) &entry->key,
@@ -92,32 +89,12 @@ static void
9289
InitializeRelfilenodeMap(void)
9390
{
9491
HASHCTL ctl;
95-
static bool initial_init_done = false;
96-
int i;
92+
int i;
9793

9894
/* Make sure we've initialized CacheMemoryContext. */
9995
if (CacheMemoryContext == NULL)
10096
CreateCacheMemoryContext();
10197

102-
/* Initialize the hash table. */
103-
MemSet(&ctl, 0, sizeof(ctl));
104-
ctl.keysize = sizeof(RelfilenodeMapKey);
105-
ctl.entrysize = sizeof(RelfilenodeMapEntry);
106-
ctl.hash = tag_hash;
107-
ctl.hcxt = CacheMemoryContext;
108-
109-
RelfilenodeMapHash =
110-
hash_create("RelfilenodeMap cache", 1024, &ctl,
111-
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
112-
113-
/*
114-
* For complete resets we simply delete the entire hash, but there's no
115-
* need to do the other stuff multiple times. Especially the initialization
116-
* of the relcche invalidation should only be done once.
117-
*/
118-
if (initial_init_done)
119-
return;
120-
12198
/* build skey */
12299
MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey));
123100

@@ -134,10 +111,25 @@ InitializeRelfilenodeMap(void)
134111
relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace;
135112
relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode;
136113

114+
/* Initialize the hash table. */
115+
MemSet(&ctl, 0, sizeof(ctl));
116+
ctl.keysize = sizeof(RelfilenodeMapKey);
117+
ctl.entrysize = sizeof(RelfilenodeMapEntry);
118+
ctl.hash = tag_hash;
119+
ctl.hcxt = CacheMemoryContext;
120+
121+
/*
122+
* Only create the RelfilenodeMapHash now, so we don't end up partially
123+
* initialized when fmgr_info_cxt() above ERRORs out with an out of memory
124+
* error.
125+
*/
126+
RelfilenodeMapHash =
127+
hash_create("RelfilenodeMap cache", 1024, &ctl,
128+
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
129+
137130
/* Watch for invalidation events. */
138131
CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback,
139132
(Datum) 0);
140-
initial_init_done = true;
141133
}
142134

143135
/*
@@ -156,6 +148,7 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
156148
Relation relation;
157149
HeapTuple ntp;
158150
ScanKeyData skey[2];
151+
Oid relid;
159152

160153
if (RelfilenodeMapHash == NULL)
161154
InitializeRelfilenodeMap();
@@ -169,81 +162,99 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
169162
key.relfilenode = relfilenode;
170163

171164
/*
172-
* Check cache and enter entry if nothing could be found. Even if no target
165+
* Check cache and return entry if one is found. Even if no target
173166
* relation can be found later on we store the negative match and return a
174-
* InvalidOid from cache. That's not really necessary for performance since
175-
* querying invalid values isn't supposed to be a frequent thing, but the
176-
* implementation is simpler this way.
167+
* InvalidOid from cache. That's not really necessary for performance
168+
* since querying invalid values isn't supposed to be a frequent thing,
169+
* but it's basically free.
177170
*/
178-
entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
171+
entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_FIND, &found);
179172

180173
if (found)
181174
return entry->relid;
182175

183-
/* initialize empty/negative cache entry before doing the actual lookup */
184-
entry->relid = InvalidOid;
185-
186176
/* ok, no previous cache entry, do it the hard way */
187177

188-
/* check shared tables */
178+
/* initialize empty/negative cache entry before doing the actual lookups */
179+
relid = InvalidOid;
180+
189181
if (reltablespace == GLOBALTABLESPACE_OID)
190182
{
191-
entry->relid = RelationMapFilenodeToOid(relfilenode, true);
192-
return entry->relid;
183+
/*
184+
* Ok, shared table, check relmapper.
185+
*/
186+
relid = RelationMapFilenodeToOid(relfilenode, true);
193187
}
188+
else
189+
{
190+
/*
191+
* Not a shared table, could either be a plain relation or a
192+
* non-shared, nailed one, like e.g. pg_class.
193+
*/
194194

195-
/* check plain relations by looking in pg_class */
196-
relation = heap_open(RelationRelationId, AccessShareLock);
195+
/* check for plain relations by looking in pg_class */
196+
relation = heap_open(RelationRelationId, AccessShareLock);
197197

198-
/* copy scankey to local copy, it will be modified during the scan */
199-
memcpy(skey, relfilenode_skey, sizeof(skey));
198+
/* copy scankey to local copy, it will be modified during the scan */
199+
memcpy(skey, relfilenode_skey, sizeof(skey));
200200

201-
/* set scan arguments */
202-
skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
203-
skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
201+
/* set scan arguments */
202+
skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
203+
skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
204204

205-
scandesc = systable_beginscan(relation,
206-
ClassTblspcRelfilenodeIndexId,
207-
true,
208-
NULL,
209-
2,
210-
skey);
205+
scandesc = systable_beginscan(relation,
206+
ClassTblspcRelfilenodeIndexId,
207+
true,
208+
NULL,
209+
2,
210+
skey);
211211

212-
found = false;
212+
found = false;
213213

214-
while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
215-
{
216-
if (found)
217-
elog(ERROR,
218-
"unexpected duplicate for tablespace %u, relfilenode %u",
219-
reltablespace, relfilenode);
220-
found = true;
214+
while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
215+
{
216+
if (found)
217+
elog(ERROR,
218+
"unexpected duplicate for tablespace %u, relfilenode %u",
219+
reltablespace, relfilenode);
220+
found = true;
221221

222222
#ifdef USE_ASSERT_CHECKING
223-
if (assert_enabled)
224-
{
225-
bool isnull;
226-
Oid check;
227-
check = fastgetattr(ntp, Anum_pg_class_reltablespace,
228-
RelationGetDescr(relation),
229-
&isnull);
230-
Assert(!isnull && check == reltablespace);
231-
232-
check = fastgetattr(ntp, Anum_pg_class_relfilenode,
233-
RelationGetDescr(relation),
234-
&isnull);
235-
Assert(!isnull && check == relfilenode);
236-
}
223+
if (assert_enabled)
224+
{
225+
bool isnull;
226+
Oid check;
227+
check = fastgetattr(ntp, Anum_pg_class_reltablespace,
228+
RelationGetDescr(relation),
229+
&isnull);
230+
Assert(!isnull && check == reltablespace);
231+
232+
check = fastgetattr(ntp, Anum_pg_class_relfilenode,
233+
RelationGetDescr(relation),
234+
&isnull);
235+
Assert(!isnull && check == relfilenode);
236+
}
237237
#endif
238-
entry->relid = HeapTupleGetOid(ntp);
239-
}
238+
relid = HeapTupleGetOid(ntp);
239+
}
240240

241-
systable_endscan(scandesc);
242-
heap_close(relation, AccessShareLock);
241+
systable_endscan(scandesc);
242+
heap_close(relation, AccessShareLock);
243243

244-
/* check for tables that are mapped but not shared */
245-
if (!found)
246-
entry->relid = RelationMapFilenodeToOid(relfilenode, false);
244+
/* check for tables that are mapped but not shared */
245+
if (!found)
246+
relid = RelationMapFilenodeToOid(relfilenode, false);
247+
}
248+
249+
/*
250+
* Only enter entry into cache now, our opening of pg_class could have
251+
* caused cache invalidations to be executed which would have deleted a
252+
* new entry if we had entered it above.
253+
*/
254+
entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
255+
if (found)
256+
elog(ERROR, "corrupted hashtable");
257+
entry->relid = relid;
247258

248-
return entry->relid;
259+
return relid;
249260
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy