Skip to content

Commit 9e873ec

Browse files
committed
add GUCs for array similarity & function
1 parent 7731322 commit 9e873ec

File tree

3 files changed

+58
-18
lines changed

3 files changed

+58
-18
lines changed

src/rum.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,9 +801,31 @@ extern Datum rum_ts_distance_td(PG_FUNCTION_ARGS);
801801

802802
extern Datum tsquery_to_distance_query(PG_FUNCTION_ARGS);
803803

804+
/* rum_arr_utils.c */
805+
typedef enum SimilarityType
806+
{
807+
SMT_COSINE = 1,
808+
SMT_JACCARD = 2,
809+
SMT_OVERLAP = 3
810+
} SimilarityType;
811+
812+
#define RUM_SIMILARITY_FUNCTION_DEFAULT SMT_COSINE
813+
#define RUM_SIMILARITY_THRESHOLD_DEFAULT 0.5
814+
815+
extern Datum rum_anyarray_config(PG_FUNCTION_ARGS);
816+
extern Datum rum_extract_anyarray(PG_FUNCTION_ARGS);
817+
extern Datum rum_extract_anyarray_query(PG_FUNCTION_ARGS);
818+
extern Datum rum_anyarray_consistent(PG_FUNCTION_ARGS);
819+
extern Datum rum_anyarray_ordering(PG_FUNCTION_ARGS);
820+
extern Datum rum_anyarray_similar(PG_FUNCTION_ARGS);
821+
extern Datum rum_anyarray_distance(PG_FUNCTION_ARGS);
822+
804823

805824
/* GUC parameters */
806-
extern PGDLLIMPORT int RumFuzzySearchLimit;
825+
extern PGDLLIMPORT int RumFuzzySearchLimit;
826+
extern PGDLLIMPORT float8 RumArraySimilarityThreshold;
827+
extern PGDLLIMPORT int RumArraySimilarityFunction;
828+
807829

808830
/*
809831
* Functions for reading ItemPointers with additional information. Used in

src/rum_arr_utils.c

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,9 @@ typedef struct SimpleArray
9797
AnyArrayTypeInfo *info;
9898
} SimpleArray;
9999

100-
typedef enum SimilarityType
101-
{
102-
AA_Cosine,
103-
AA_Jaccard,
104-
AA_Overlap
105-
} SimilarityType;
100+
101+
float8 RumArraySimilarityThreshold = RUM_SIMILARITY_THRESHOLD_DEFAULT;
102+
int RumArraySimilarityFunction = RUM_SIMILARITY_FUNCTION_DEFAULT;
106103

107104

108105
PG_FUNCTION_INFO_V1(rum_anyarray_config);
@@ -117,10 +114,6 @@ PG_FUNCTION_INFO_V1(rum_anyarray_similar);
117114
PG_FUNCTION_INFO_V1(rum_anyarray_distance);
118115

119116

120-
static SimilarityType SmlType = AA_Cosine;
121-
static float8 SmlLimit = 0.5;
122-
123-
124117
static Oid getAMProc(Oid amOid, Oid typid);
125118

126119
static AnyArrayTypeInfo *getAnyArrayTypeInfo(MemoryContext ctx, Oid typid);
@@ -139,7 +132,6 @@ static int32 getNumOfIntersect(SimpleArray *sa, SimpleArray *sb);
139132
static float8 getSimilarity(SimpleArray *sa, SimpleArray *sb, int32 intersection);
140133

141134

142-
143135
/*
144136
* Specifies additional information type for operator class.
145137
*/
@@ -390,7 +382,8 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
390382

391383
INIT_DUMMY_SIMPLE_ARRAY(&sa, nentries);
392384
INIT_DUMMY_SIMPLE_ARRAY(&sb, nkeys);
393-
res = getSimilarity(&sa, &sb, intersection) >= SmlLimit;
385+
res = getSimilarity(&sa, &sb, intersection) >=
386+
RumArraySimilarityThreshold;
394387
}
395388
else
396389
res = false;
@@ -491,7 +484,7 @@ rum_anyarray_similar(PG_FUNCTION_ARGS)
491484
PG_FREE_IF_COPY(b, 1);
492485
PG_FREE_IF_COPY(a, 0);
493486

494-
PG_RETURN_BOOL(result >= SmlLimit);
487+
PG_RETURN_BOOL(result >= RumArraySimilarityThreshold);
495488
}
496489

497490
Datum
@@ -851,19 +844,19 @@ getSimilarity(SimpleArray *sa, SimpleArray *sb, int32 intersection)
851844
{
852845
float8 result = 0.0;
853846

854-
switch (SmlType)
847+
switch (RumArraySimilarityFunction)
855848
{
856-
case AA_Cosine:
849+
case SMT_COSINE:
857850
result = ((float8) intersection) /
858851
sqrt(((float8) sa->nelems) * ((float8) sb->nelems));
859852
break;
860-
case AA_Jaccard:
853+
case SMT_JACCARD:
861854
result = ((float8) intersection) /
862855
(((float8) sa->nelems) +
863856
((float8) sb->nelems) -
864857
((float8) intersection));
865858
break;
866-
case AA_Overlap:
859+
case SMT_OVERLAP:
867860
result = intersection;
868861
break;
869862
default:

src/rumutil.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ PG_FUNCTION_INFO_V1(rumhandler);
3636
/* Kind of relation optioms for rum index */
3737
static relopt_kind rum_relopt_kind;
3838

39+
static const struct config_enum_entry rum_array_similarity_function_opts[] =
40+
{
41+
{ "cosine", SMT_COSINE, false },
42+
{ "jaccard", SMT_JACCARD, false },
43+
{ "overlap", SMT_OVERLAP, false },
44+
{ NULL, 0, false }
45+
};
46+
3947
/*
4048
* Module load callback
4149
*/
@@ -51,6 +59,23 @@ _PG_init(void)
5159
PGC_USERSET, 0,
5260
NULL, NULL, NULL);
5361

62+
DefineCustomRealVariable("rum_array_similarity_threshold",
63+
"Sets the array similarity threshold.",
64+
NULL,
65+
&RumArraySimilarityThreshold,
66+
RUM_SIMILARITY_THRESHOLD_DEFAULT, 0.0, 1.0,
67+
PGC_USERSET, 0,
68+
NULL, NULL, NULL);
69+
70+
DefineCustomEnumVariable("rum_array_similarity_function",
71+
"Sets the array similarity function.",
72+
NULL,
73+
&RumArraySimilarityFunction,
74+
RUM_SIMILARITY_FUNCTION_DEFAULT,
75+
rum_array_similarity_function_opts,
76+
PGC_USERSET, 0,
77+
NULL, NULL, NULL);
78+
5479
rum_relopt_kind = add_reloption_kind();
5580

5681
add_string_reloption(rum_relopt_kind, "attach",

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy