Skip to content

Commit 27f78d4

Browse files
author
Maksim Milyutin
committed
First incomplete version of lexeme hashing in index
1 parent 2f57c4b commit 27f78d4

File tree

4 files changed

+49
-19
lines changed

4 files changed

+49
-19
lines changed

rum--1.0.sql

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,17 @@ RETURNS bytea
8080
AS 'MODULE_PATHNAME'
8181
LANGUAGE C IMMUTABLE STRICT;
8282

83+
CREATE FUNCTION rum_cmp_tslexeme(bytea, bytea)
84+
RETURNS integer
85+
AS 'MODULE_PATHNAME'
86+
LANGUAGE C IMMUTABLE STRICT;
87+
8388
CREATE OPERATOR CLASS rum_tsvector_ops
8489
FOR TYPE tsvector USING rum
8590
AS
8691
OPERATOR 1 @@ (tsvector, tsquery),
8792
OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops,
88-
FUNCTION 1 gin_cmp_tslexeme(text, text),
93+
FUNCTION 1 rum_cmp_tslexeme(bytea, bytea),
8994
FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal),
9095
FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal),
9196
FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
@@ -94,7 +99,7 @@ AS
9499
FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal),
95100
FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal),
96101
FUNCTION 10 rum_ts_join_pos(internal, internal),
97-
STORAGE text;
102+
STORAGE bytea;
98103
-- timestamp ops
99104

100105
CREATE FUNCTION timestamp_distance(timestamp, timestamp)

rum.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ typedef signed char RumNullCategory;
243243
*/
244244
#define RumGetDownlink(itup) RumItemPointerGetBlockNumber(&(itup)->t_tid)
245245
#define RumSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
246-
246+
CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops);
247247

248248
/*
249249
* Data (posting tree) pages

rum_ts_utils.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
#include "postgres.h"
1313

14+
#include "access/hash.h"
1415
#include "access/htup_details.h"
16+
#include "catalog/pg_collation.h"
1517
#include "catalog/pg_type.h"
1618
#include "funcapi.h"
1719
#include "miscadmin.h"
@@ -25,6 +27,7 @@
2527

2628
#include <math.h>
2729

30+
PG_FUNCTION_INFO_V1(rum_cmp_tslexeme);
2831
PG_FUNCTION_INFO_V1(rum_extract_tsvector);
2932
PG_FUNCTION_INFO_V1(rum_extract_tsquery);
3033
PG_FUNCTION_INFO_V1(rum_tsvector_config);
@@ -503,11 +506,15 @@ rum_extract_tsvector(PG_FUNCTION_ARGS)
503506
for (i = 0; i < vector->size; i++)
504507
{
505508
text *txt;
509+
bytea *hash_value;
506510
bytea *posData;
507511
int posDataSize;
508512

509513
txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
510-
entries[i] = PointerGetDatum(txt);
514+
hash_value = (bytea *) palloc(VARHDRSZ + sizeof(int32));
515+
SET_VARSIZE(hash_value, VARHDRSZ + sizeof(int32));
516+
*VARDATA(hash_value) = DirectFunctionCall1(hashtext, PointerGetDatum(txt));
517+
entries[i] = PointerGetDatum(hash_value);
511518

512519
if (we->haspos)
513520
{
@@ -586,10 +593,14 @@ rum_extract_tsquery(PG_FUNCTION_ARGS)
586593
for (i = 0; i < (*nentries); i++)
587594
{
588595
text *txt;
596+
bytea *hash_value;
589597

590598
txt = cstring_to_text_with_len(GETOPERAND(query) + operands[i]->distance,
591599
operands[i]->length);
592-
entries[i] = PointerGetDatum(txt);
600+
hash_value = (bytea *) palloc(VARHDRSZ + sizeof(int32));
601+
SET_VARSIZE(hash_value, VARHDRSZ + sizeof(int32));
602+
*VARDATA(hash_value) = DirectFunctionCall1(hashtext, PointerGetDatum(txt));
603+
entries[i] = PointerGetDatum(hash_value);
593604
partialmatch[i] = operands[i]->prefix;
594605
(*extra_data)[i] = (Pointer) map_item_operand;
595606
}
@@ -1389,3 +1400,17 @@ rum_ts_join_pos(PG_FUNCTION_ARGS)
13891400

13901401
PG_RETURN_BYTEA_P(result);
13911402
}
1403+
1404+
Datum
1405+
rum_cmp_tslexeme(PG_FUNCTION_ARGS)
1406+
{
1407+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
1408+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
1409+
int32 a = *VARDATA(arg1);
1410+
int32 b = *VARDATA(arg2);
1411+
int cmp;
1412+
1413+
cmp = (a > b) ? 1 : ((a == b) ? 0 : -1);
1414+
1415+
PG_RETURN_INT32(cmp);
1416+
}

sql/rum.sql

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,17 @@ DELETE FROM tst WHERE i = 5;
100100
VACUUM tst;
101101
INSERT INTO tst SELECT i%10, to_tsvector('simple', substr(md5(i::text), 1, 1)) FROM generate_series(14001,15000) i;
102102

103-
set enable_bitmapscan=off;
104-
explain (costs off)
105-
SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
106-
FROM test_rum
107-
WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
108-
ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
109-
SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
110-
FROM test_rum
111-
WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
112-
ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
113-
SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), *
114-
FROM test_rum
115-
WHERE a @@ to_tsquery('pg_catalog.english', 'b:*')
116-
ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*');
103+
-- set enable_bitmapscan=off;
104+
-- explain (costs off)
105+
-- SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
106+
-- FROM test_rum
107+
-- WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
108+
-- ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
109+
-- SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), *
110+
-- FROM test_rum
111+
-- WHERE a @@ to_tsquery('pg_catalog.english', 'w:*')
112+
-- ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*');
113+
-- SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), *
114+
-- FROM test_rum
115+
-- WHERE a @@ to_tsquery('pg_catalog.english', 'b:*')
116+
-- ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*');

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy