Skip to content

Commit 00ceebc

Browse files
author
Artur Zakirov
committed
Added:
- rum_distance_query type, casting from tquery to rum_distance_query - rum_ts_distance() overloads to enable calculate distance with normalization parameter - <=> operator overload
1 parent c63d4ea commit 00ceebc

File tree

5 files changed

+166
-5
lines changed

5 files changed

+166
-5
lines changed

expected/rum.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,29 @@ SELECT
150150
57.5727 | 57.5727 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
151151
(2 rows)
152152

153+
-- Check ranking normalization
154+
SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), *
155+
FROM test_rum
156+
WHERE a @@ to_tsquery('pg_catalog.english', 'way')
157+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way');
158+
rum_ts_distance | t | a
159+
-----------------+--------------------------------------------------------------------------+---------------------------------------------------------------
160+
16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9
161+
16.4493 | itself. Put on your “specs” and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12
162+
16.4493 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2
163+
16.4493 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
164+
(4 rows)
165+
166+
SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), *
167+
FROM test_rum
168+
WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)')
169+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)');
170+
rum_ts_distance | t | a
171+
-----------------+---------------------------------------------------------------------+---------------------------------------------------------
172+
8.22467 | itself. Put on your “specs” and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12
173+
57.5727 | thinking--“to go or not to go?” We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14
174+
(2 rows)
175+
153176
INSERT INTO test_rum (t) VALUES ('foo bar foo the over foo qq bar');
154177
INSERT INTO test_rum (t) VALUES ('345 qwerty copyright');
155178
INSERT INTO test_rum (t) VALUES ('345 qwerty');

rum--1.0.sql

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,31 @@ LANGUAGE C;
66
-- Access method
77
CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler;
88

9-
-- Opclasses
9+
-- tsvector opclasses
10+
11+
CREATE TYPE rum_distance_query AS (query tsquery, method int);
12+
13+
CREATE FUNCTION tsquery_to_distance_query(tsquery)
14+
RETURNS rum_distance_query
15+
AS 'MODULE_PATHNAME', 'tsquery_to_distance_query'
16+
LANGUAGE C IMMUTABLE STRICT;
17+
18+
CREATE CAST (tsquery AS rum_distance_query)
19+
WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT;
20+
1021
CREATE FUNCTION rum_ts_distance(tsvector,tsquery)
1122
RETURNS float4
12-
AS 'MODULE_PATHNAME'
23+
AS 'MODULE_PATHNAME', 'rum_ts_distance_tt'
24+
LANGUAGE C IMMUTABLE STRICT;
25+
26+
CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int)
27+
RETURNS float4
28+
AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf'
29+
LANGUAGE C IMMUTABLE STRICT;
30+
31+
CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query)
32+
RETURNS float4
33+
AS 'MODULE_PATHNAME', 'rum_ts_distance_td'
1334
LANGUAGE C IMMUTABLE STRICT;
1435

1536
CREATE OPERATOR <=> (
@@ -18,6 +39,12 @@ CREATE OPERATOR <=> (
1839
PROCEDURE = rum_ts_distance
1940
);
2041

42+
CREATE OPERATOR <=> (
43+
LEFTARG = tsvector,
44+
RIGHTARG = rum_distance_query,
45+
PROCEDURE = rum_ts_distance
46+
);
47+
2148
CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal)
2249
RETURNS internal
2350
AS 'MODULE_PATHNAME'

rum.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,12 @@ extern Datum rum_extract_tsquery(PG_FUNCTION_ARGS);
767767
extern Datum rum_tsvector_config(PG_FUNCTION_ARGS);
768768
extern Datum rum_tsquery_pre_consistent(PG_FUNCTION_ARGS);
769769
extern Datum rum_tsquery_distance(PG_FUNCTION_ARGS);
770-
extern Datum rum_ts_distance(PG_FUNCTION_ARGS);
770+
extern Datum rum_ts_distance_tt(PG_FUNCTION_ARGS);
771+
extern Datum rum_ts_distance_ttf(PG_FUNCTION_ARGS);
772+
extern Datum rum_ts_distance_td(PG_FUNCTION_ARGS);
773+
774+
extern Datum tsquery_to_distance_query(PG_FUNCTION_ARGS);
775+
771776

772777
/* GUC parameters */
773778
extern PGDLLIMPORT int RumFuzzySearchLimit;

rum_ts_utils.c

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@
1111

1212
#include "postgres.h"
1313

14+
#include "access/htup_details.h"
1415
#include "catalog/pg_type.h"
16+
#include "funcapi.h"
1517
#include "miscadmin.h"
1618
#include "tsearch/ts_type.h"
1719
#include "tsearch/ts_utils.h"
1820
#include "utils/array.h"
1921
#include "utils/builtins.h"
22+
#include "utils/typcache.h"
2023

2124
#include "rum.h"
2225

@@ -29,7 +32,11 @@ PG_FUNCTION_INFO_V1(rum_tsquery_pre_consistent);
2932
PG_FUNCTION_INFO_V1(rum_tsquery_consistent);
3033
PG_FUNCTION_INFO_V1(rum_tsquery_timestamp_consistent);
3134
PG_FUNCTION_INFO_V1(rum_tsquery_distance);
32-
PG_FUNCTION_INFO_V1(rum_ts_distance);
35+
PG_FUNCTION_INFO_V1(rum_ts_distance_tt);
36+
PG_FUNCTION_INFO_V1(rum_ts_distance_ttf);
37+
PG_FUNCTION_INFO_V1(rum_ts_distance_td);
38+
39+
PG_FUNCTION_INFO_V1(tsquery_to_distance_query);
3340

3441
static int count_pos(char *ptr, int len);
3542
static char *decompress_pos(char *ptr, uint16 *pos);
@@ -1182,7 +1189,7 @@ rum_tsquery_distance(PG_FUNCTION_ARGS)
11821189
}
11831190

11841191
Datum
1185-
rum_ts_distance(PG_FUNCTION_ARGS)
1192+
rum_ts_distance_tt(PG_FUNCTION_ARGS)
11861193
{
11871194
TSVector txt = PG_GETARG_TSVECTOR(0);
11881195
TSQuery query = PG_GETARG_TSQUERY(1);
@@ -1198,6 +1205,95 @@ rum_ts_distance(PG_FUNCTION_ARGS)
11981205
PG_RETURN_FLOAT4(1.0 / res);
11991206
}
12001207

1208+
Datum
1209+
rum_ts_distance_ttf(PG_FUNCTION_ARGS)
1210+
{
1211+
TSVector txt = PG_GETARG_TSVECTOR(0);
1212+
TSQuery query = PG_GETARG_TSQUERY(1);
1213+
int method = PG_GETARG_INT32(2);
1214+
float4 res;
1215+
1216+
res = calc_score(weights, txt, query, method);
1217+
1218+
PG_FREE_IF_COPY(txt, 0);
1219+
PG_FREE_IF_COPY(query, 1);
1220+
if (res == 0)
1221+
PG_RETURN_FLOAT4(get_float4_infinity());
1222+
else
1223+
PG_RETURN_FLOAT4(1.0 / res);
1224+
}
1225+
1226+
Datum
1227+
rum_ts_distance_td(PG_FUNCTION_ARGS)
1228+
{
1229+
TSVector txt = PG_GETARG_TSVECTOR(0);
1230+
HeapTupleHeader d = PG_GETARG_HEAPTUPLEHEADER(1);
1231+
1232+
Oid tupType = HeapTupleHeaderGetTypeId(d);
1233+
int32 tupTypmod = HeapTupleHeaderGetTypMod(d);
1234+
TupleDesc tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1235+
HeapTupleData tuple;
1236+
1237+
TSQuery query;
1238+
int method;
1239+
bool isnull;
1240+
float4 res;
1241+
1242+
tuple.t_len = HeapTupleHeaderGetDatumLength(d);
1243+
ItemPointerSetInvalid(&(tuple.t_self));
1244+
tuple.t_tableOid = InvalidOid;
1245+
tuple.t_data = d;
1246+
1247+
query = DatumGetTSQuery(fastgetattr(&tuple, 1, tupdesc, &isnull));
1248+
if (isnull)
1249+
{
1250+
ReleaseTupleDesc(tupdesc);
1251+
PG_FREE_IF_COPY(txt, 0);
1252+
PG_FREE_IF_COPY(d, 1);
1253+
elog(ERROR, "NULL query value is not allowed");
1254+
}
1255+
1256+
method = DatumGetInt32(fastgetattr(&tuple, 2, tupdesc, &isnull));
1257+
if (isnull)
1258+
method = 0;
1259+
1260+
res = calc_score(weights, txt, query, method);
1261+
1262+
ReleaseTupleDesc(tupdesc);
1263+
PG_FREE_IF_COPY(txt, 0);
1264+
PG_FREE_IF_COPY(d, 1);
1265+
1266+
if (res == 0)
1267+
PG_RETURN_FLOAT4(get_float4_infinity());
1268+
else
1269+
PG_RETURN_FLOAT4(1.0 / res);
1270+
}
1271+
1272+
Datum
1273+
tsquery_to_distance_query(PG_FUNCTION_ARGS)
1274+
{
1275+
TSQuery query = PG_GETARG_TSQUERY(0);
1276+
1277+
TupleDesc tupdesc;
1278+
HeapTuple htup;
1279+
Datum values[2];
1280+
bool nulls[2];
1281+
1282+
/* Build a tuple descriptor for our result type */
1283+
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1284+
elog(ERROR, "return type must be a row type");
1285+
1286+
tupdesc = BlessTupleDesc(tupdesc);
1287+
1288+
MemSet(nulls, 0, sizeof(nulls));
1289+
values[0] = TSQueryGetDatum(query);
1290+
values[1] = Int32GetDatum(DEF_NORM_METHOD);
1291+
1292+
htup = heap_form_tuple(tupdesc, values, nulls);
1293+
1294+
PG_RETURN_DATUM(HeapTupleGetDatum(htup));
1295+
}
1296+
12011297
Datum
12021298
rum_tsvector_config(PG_FUNCTION_ARGS)
12031299
{

sql/rum.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ SELECT
5252
FROM test_rum
5353
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2;
5454

55+
-- Check ranking normalization
56+
SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), *
57+
FROM test_rum
58+
WHERE a @@ to_tsquery('pg_catalog.english', 'way')
59+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way');
60+
SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), *
61+
FROM test_rum
62+
WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)')
63+
ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)');
64+
5565
INSERT INTO test_rum (t) VALUES ('foo bar foo the over foo qq bar');
5666
INSERT INTO test_rum (t) VALUES ('345 qwerty copyright');
5767
INSERT INTO test_rum (t) VALUES ('345 qwerty');

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy