Skip to content

Commit 352d297

Browse files
committed
dshash: Add sequential scan support.
Add ability to scan all entries sequentially to dshash. The interface is similar but a bit different both from that of dynahash and simple dshash search functions. The most significant differences is that dshash's interfac always needs a call to dshash_seq_term when scan ends. Another is locking. Dshash holds partition lock when returning an entry, dshash_seq_next() also holds lock when returning an entry but callers shouldn't release it, since the lock is essential to continue a scan. The seqscan interface allows entry deletion while a scan is in progress using dshash_delete_current(). Reviewed-By: Andres Freund <andres@anarazel.de> Author: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
1 parent adb5c28 commit 352d297

File tree

3 files changed

+186
-1
lines changed

3 files changed

+186
-1
lines changed

src/backend/lib/dshash.c

Lines changed: 162 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ struct dshash_table
127127
#define NUM_SPLITS(size_log2) \
128128
(size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
129129

130+
/* How many buckets are there in a given size? */
131+
#define NUM_BUCKETS(size_log2) \
132+
(((size_t) 1) << (size_log2))
133+
130134
/* How many buckets are there in each partition at a given size? */
131135
#define BUCKETS_PER_PARTITION(size_log2) \
132136
(((size_t) 1) << NUM_SPLITS(size_log2))
@@ -153,6 +157,10 @@ struct dshash_table
153157
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154158
((partition) << NUM_SPLITS(size_log2))
155159

160+
/* Choose partition based on bucket index. */
161+
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
162+
((bucket_idx) >> NUM_SPLITS(size_log2))
163+
156164
/* The head of the active bucket for a given hash value (lvalue). */
157165
#define BUCKET_FOR_HASH(hash_table, hash) \
158166
(hash_table->buckets[ \
@@ -324,7 +332,7 @@ dshash_destroy(dshash_table *hash_table)
324332
ensure_valid_bucket_pointers(hash_table);
325333

326334
/* Free all the entries. */
327-
size = ((size_t) 1) << hash_table->size_log2;
335+
size = NUM_BUCKETS(hash_table->size_log2);
328336
for (i = 0; i < size; ++i)
329337
{
330338
dsa_pointer item_pointer = hash_table->buckets[i];
@@ -592,6 +600,159 @@ dshash_memhash(const void *v, size_t size, void *arg)
592600
return tag_hash(v, size);
593601
}
594602

603+
/*
604+
* dshash_seq_init/_next/_term
605+
* Sequentially scan through dshash table and return all the
606+
* elements one by one, return NULL when no more.
607+
*
608+
* dshash_seq_term should always be called when a scan finished.
609+
* The caller may delete returned elements midst of a scan by using
610+
* dshash_delete_current(). exclusive must be true to delete elements.
611+
*/
612+
void
613+
dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
614+
bool exclusive)
615+
{
616+
status->hash_table = hash_table;
617+
status->curbucket = 0;
618+
status->nbuckets = 0;
619+
status->curitem = NULL;
620+
status->pnextitem = InvalidDsaPointer;
621+
status->curpartition = -1;
622+
status->exclusive = exclusive;
623+
}
624+
625+
/*
626+
* Returns the next element.
627+
*
628+
* Returned elements are locked and the caller must not explicitly release
629+
* it. It is released at the next call to dshash_next().
630+
*/
631+
void *
632+
dshash_seq_next(dshash_seq_status *status)
633+
{
634+
dsa_pointer next_item_pointer;
635+
636+
if (status->curitem == NULL)
637+
{
638+
int partition;
639+
640+
Assert(status->curbucket == 0);
641+
Assert(!status->hash_table->find_locked);
642+
643+
/* first shot. grab the first item. */
644+
partition =
645+
PARTITION_FOR_BUCKET_INDEX(status->curbucket,
646+
status->hash_table->size_log2);
647+
LWLockAcquire(PARTITION_LOCK(status->hash_table, partition),
648+
status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
649+
status->curpartition = partition;
650+
651+
/* resize doesn't happen from now until seq scan ends */
652+
status->nbuckets =
653+
NUM_BUCKETS(status->hash_table->control->size_log2);
654+
ensure_valid_bucket_pointers(status->hash_table);
655+
656+
next_item_pointer = status->hash_table->buckets[status->curbucket];
657+
}
658+
else
659+
next_item_pointer = status->pnextitem;
660+
661+
Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
662+
status->curpartition),
663+
status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
664+
665+
/* Move to the next bucket if we finished the current bucket */
666+
while (!DsaPointerIsValid(next_item_pointer))
667+
{
668+
int next_partition;
669+
670+
if (++status->curbucket >= status->nbuckets)
671+
{
672+
/* all buckets have been scanned. finish. */
673+
return NULL;
674+
}
675+
676+
/* Check if move to the next partition */
677+
next_partition =
678+
PARTITION_FOR_BUCKET_INDEX(status->curbucket,
679+
status->hash_table->size_log2);
680+
681+
if (status->curpartition != next_partition)
682+
{
683+
/*
684+
* Move to the next partition. Lock the next partition then
685+
* release the current, not in the reverse order to avoid
686+
* concurrent resizing. Avoid dead lock by taking lock in the
687+
* same order with resize().
688+
*/
689+
LWLockAcquire(PARTITION_LOCK(status->hash_table,
690+
next_partition),
691+
status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
692+
LWLockRelease(PARTITION_LOCK(status->hash_table,
693+
status->curpartition));
694+
status->curpartition = next_partition;
695+
}
696+
697+
next_item_pointer = status->hash_table->buckets[status->curbucket];
698+
}
699+
700+
status->curitem =
701+
dsa_get_address(status->hash_table->area, next_item_pointer);
702+
status->hash_table->find_locked = true;
703+
status->hash_table->find_exclusively_locked = status->exclusive;
704+
705+
/*
706+
* The caller may delete the item. Store the next item in case of
707+
* deletion.
708+
*/
709+
status->pnextitem = status->curitem->next;
710+
711+
return ENTRY_FROM_ITEM(status->curitem);
712+
}
713+
714+
/*
715+
* Terminates the seqscan and release all locks.
716+
*
717+
* Should be always called when finishing or exiting a seqscan.
718+
*/
719+
void
720+
dshash_seq_term(dshash_seq_status *status)
721+
{
722+
status->hash_table->find_locked = false;
723+
status->hash_table->find_exclusively_locked = false;
724+
725+
if (status->curpartition >= 0)
726+
LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
727+
}
728+
729+
/* Remove the current entry while a seq scan. */
730+
void
731+
dshash_delete_current(dshash_seq_status *status)
732+
{
733+
dshash_table *hash_table = status->hash_table;
734+
dshash_table_item *item = status->curitem;
735+
size_t partition PG_USED_FOR_ASSERTS_ONLY;
736+
737+
partition = PARTITION_FOR_HASH(item->hash);
738+
739+
Assert(status->exclusive);
740+
Assert(hash_table->control->magic == DSHASH_MAGIC);
741+
Assert(hash_table->find_locked);
742+
Assert(hash_table->find_exclusively_locked);
743+
Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
744+
LW_EXCLUSIVE));
745+
746+
delete_item(hash_table, item);
747+
}
748+
749+
/* Get the current entry while a seq scan. */
750+
void *
751+
dshash_get_current(dshash_seq_status *status)
752+
{
753+
return ENTRY_FROM_ITEM(status->curitem);
754+
}
755+
595756
/*
596757
* Print debugging information about the internal state of the hash table to
597758
* stderr. The caller must hold no partition locks.

src/include/lib/dshash.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,21 @@ typedef struct dshash_parameters
5959
struct dshash_table_item;
6060
typedef struct dshash_table_item dshash_table_item;
6161

62+
/*
63+
* Sequential scan state. The detail is exposed to let users know the storage
64+
* size but it should be considered as an opaque type by callers.
65+
*/
66+
typedef struct dshash_seq_status
67+
{
68+
dshash_table *hash_table; /* dshash table working on */
69+
int curbucket; /* bucket number we are at */
70+
int nbuckets; /* total number of buckets in the dshash */
71+
dshash_table_item *curitem; /* item we are currently at */
72+
dsa_pointer pnextitem; /* dsa-pointer to the next item */
73+
int curpartition; /* partition number we are at */
74+
bool exclusive; /* locking mode */
75+
} dshash_seq_status;
76+
6277
/* Creating, sharing and destroying from hash tables. */
6378
extern dshash_table *dshash_create(dsa_area *area,
6479
const dshash_parameters *params,
@@ -80,6 +95,14 @@ extern bool dshash_delete_key(dshash_table *hash_table, const void *key);
8095
extern void dshash_delete_entry(dshash_table *hash_table, void *entry);
8196
extern void dshash_release_lock(dshash_table *hash_table, void *entry);
8297

98+
/* seq scan support */
99+
extern void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
100+
bool exclusive);
101+
extern void *dshash_seq_next(dshash_seq_status *status);
102+
extern void dshash_seq_term(dshash_seq_status *status);
103+
extern void dshash_delete_current(dshash_seq_status *status);
104+
extern void *dshash_get_current(dshash_seq_status *status);
105+
83106
/* Convenience hash and compare functions wrapping memcmp and tag_hash. */
84107
extern int dshash_memcmp(const void *a, const void *b, size_t size, void *arg);
85108
extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg);

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3103,6 +3103,7 @@ dshash_hash
31033103
dshash_hash_function
31043104
dshash_parameters
31053105
dshash_partition
3106+
dshash_seq_status
31063107
dshash_table
31073108
dshash_table_control
31083109
dshash_table_handle

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy