Skip to content

Commit e306df7

Browse files
committed
Full Text Search support for json and jsonb
The new functions are ts_headline() and to_tsvector. Dmitry Dolgov, edited and documented by me.
1 parent c80b992 commit e306df7

File tree

9 files changed

+613
-0
lines changed

9 files changed

+613
-0
lines changed

doc/src/sgml/func.sgml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9564,6 +9564,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
95649564
<entry><literal>to_tsvector('english', 'The Fat Rats')</literal></entry>
95659565
<entry><literal>'fat':2 'rat':3</literal></entry>
95669566
</row>
9567+
<row>
9568+
<entry>
9569+
<literal><function>to_tsvector(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">document</> <type>json(b)</type>)</function></literal>
9570+
</entry>
9571+
<entry><type>tsvector</type></entry>
9572+
<entry>reduce document text to <type>tsvector</></entry>
9573+
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
9574+
<entry><literal>'fat':2 'rat':3</literal></entry>
9575+
</row>
95679576
<row>
95689577
<entry>
95699578
<indexterm>
@@ -9610,6 +9619,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
96109619
<entry><literal>ts_headline('x y z', 'z'::tsquery)</literal></entry>
96119620
<entry><literal>x y &lt;b&gt;z&lt;/b&gt;</literal></entry>
96129621
</row>
9622+
<row>
9623+
<entry>
9624+
<literal><function>ts_headline(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>json(b)</>, <replaceable class="PARAMETER">query</replaceable> <type>tsquery</> <optional>, <replaceable class="PARAMETER">options</replaceable> <type>text</> </optional>)</function></literal>
9625+
</entry>
9626+
<entry><type>text</type></entry>
9627+
<entry>display a query match</entry>
9628+
<entry><literal>ts_headline('{"a":"x y z"}'::json, 'z'::tsquery)</literal></entry>
9629+
<entry><literal>{"a":"x y &lt;b&gt;z&lt;/b&gt;"}</literal></entry>
9630+
</row>
96139631
<row>
96149632
<entry>
96159633
<indexterm>

src/backend/tsearch/to_tsany.c

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "tsearch/ts_cache.h"
1717
#include "tsearch/ts_utils.h"
1818
#include "utils/builtins.h"
19+
#include "utils/jsonapi.h"
1920

2021

2122
typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
2425
int qoperator; /* query operator */
2526
} MorphOpaque;
2627

28+
typedef struct TSVectorBuildState
29+
{
30+
ParsedText *prs;
31+
TSVector result;
32+
Oid cfgId;
33+
} TSVectorBuildState;
34+
35+
static void add_to_tsvector(void *state, char *elem_value, int elem_len);
2736

2837
Datum
2938
get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
256265
PointerGetDatum(in)));
257266
}
258267

268+
Datum
269+
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
270+
{
271+
Oid cfgId = PG_GETARG_OID(0);
272+
Jsonb *jb = PG_GETARG_JSONB(1);
273+
TSVectorBuildState state;
274+
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
275+
276+
prs->words = NULL;
277+
state.result = NULL;
278+
state.cfgId = cfgId;
279+
state.prs = prs;
280+
281+
iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
282+
283+
PG_FREE_IF_COPY(jb, 1);
284+
285+
if (state.result == NULL)
286+
{
287+
/* There weren't any string elements in jsonb,
288+
* so wee need to return an empty vector */
289+
290+
if (prs->words != NULL)
291+
pfree(prs->words);
292+
293+
state.result = palloc(CALCDATASIZE(0, 0));
294+
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
295+
state.result->size = 0;
296+
}
297+
298+
PG_RETURN_TSVECTOR(state.result);
299+
}
300+
301+
Datum
302+
jsonb_to_tsvector(PG_FUNCTION_ARGS)
303+
{
304+
Jsonb *jb = PG_GETARG_JSONB(0);
305+
Oid cfgId;
306+
307+
cfgId = getTSCurrentConfig(true);
308+
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
309+
ObjectIdGetDatum(cfgId),
310+
JsonbGetDatum(jb)));
311+
}
312+
313+
Datum
314+
json_to_tsvector_byid(PG_FUNCTION_ARGS)
315+
{
316+
Oid cfgId = PG_GETARG_OID(0);
317+
text *json = PG_GETARG_TEXT_P(1);
318+
TSVectorBuildState state;
319+
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
320+
321+
prs->words = NULL;
322+
state.result = NULL;
323+
state.cfgId = cfgId;
324+
state.prs = prs;
325+
326+
iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
327+
328+
PG_FREE_IF_COPY(json, 1);
329+
if (state.result == NULL)
330+
{
331+
/* There weren't any string elements in json,
332+
* so wee need to return an empty vector */
333+
334+
if (prs->words != NULL)
335+
pfree(prs->words);
336+
337+
state.result = palloc(CALCDATASIZE(0, 0));
338+
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
339+
state.result->size = 0;
340+
}
341+
342+
PG_RETURN_TSVECTOR(state.result);
343+
}
344+
345+
Datum
346+
json_to_tsvector(PG_FUNCTION_ARGS)
347+
{
348+
text *json = PG_GETARG_TEXT_P(0);
349+
Oid cfgId;
350+
351+
cfgId = getTSCurrentConfig(true);
352+
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
353+
ObjectIdGetDatum(cfgId),
354+
PointerGetDatum(json)));
355+
}
356+
357+
/*
358+
* Extend current TSVector from _state with a new one,
359+
* build over a json(b) element.
360+
*/
361+
static void
362+
add_to_tsvector(void *_state, char *elem_value, int elem_len)
363+
{
364+
TSVectorBuildState *state = (TSVectorBuildState *) _state;
365+
ParsedText *prs = state->prs;
366+
TSVector item_vector;
367+
int i;
368+
369+
prs->lenwords = elem_len / 6;
370+
if (prs->lenwords == 0)
371+
prs->lenwords = 2;
372+
373+
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
374+
prs->curwords = 0;
375+
prs->pos = 0;
376+
377+
parsetext(state->cfgId, prs, elem_value, elem_len);
378+
379+
if (prs->curwords)
380+
{
381+
if (state->result != NULL)
382+
{
383+
for (i = 0; i < prs->curwords; i++)
384+
prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
385+
386+
item_vector = make_tsvector(prs);
387+
388+
state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
389+
TSVectorGetDatum(state->result),
390+
PointerGetDatum(item_vector));
391+
}
392+
else
393+
state->result = make_tsvector(prs);
394+
}
395+
}
396+
259397
/*
260398
* to_tsquery
261399
*/

src/backend/tsearch/wparser.c

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "tsearch/ts_cache.h"
2121
#include "tsearch/ts_utils.h"
2222
#include "utils/builtins.h"
23+
#include "utils/jsonapi.h"
2324
#include "utils/varlena.h"
2425

2526

@@ -31,6 +32,19 @@ typedef struct
3132
LexDescr *list;
3233
} TSTokenTypeStorage;
3334

35+
/* state for ts_headline_json_* */
36+
typedef struct HeadlineJsonState
37+
{
38+
HeadlineParsedText *prs;
39+
TSConfigCacheEntry *cfg;
40+
TSParserCacheEntry *prsobj;
41+
TSQuery query;
42+
List *prsoptions;
43+
bool transformed;
44+
} HeadlineJsonState;
45+
46+
static text * headline_json_value(void *_state, char *elem_value, int elem_len);
47+
3448
static void
3549
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
3650
{
@@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
363377
PG_GETARG_DATUM(1),
364378
PG_GETARG_DATUM(2)));
365379
}
380+
381+
Datum
382+
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
383+
{
384+
Jsonb *out, *jb = PG_GETARG_JSONB(1);
385+
TSQuery query = PG_GETARG_TSQUERY(2);
386+
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
387+
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
388+
389+
HeadlineParsedText prs;
390+
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
391+
392+
memset(&prs, 0, sizeof(HeadlineParsedText));
393+
prs.lenwords = 32;
394+
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
395+
396+
state->prs = &prs;
397+
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
398+
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
399+
state->query = query;
400+
if (opt)
401+
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
402+
else
403+
state->prsoptions = NIL;
404+
405+
if (!OidIsValid(state->prsobj->headlineOid))
406+
ereport(ERROR,
407+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
408+
errmsg("text search parser does not support headline creation")));
409+
410+
out = transform_jsonb_string_values(jb, state, action);
411+
412+
PG_FREE_IF_COPY(jb, 1);
413+
PG_FREE_IF_COPY(query, 2);
414+
if (opt)
415+
PG_FREE_IF_COPY(opt, 3);
416+
417+
pfree(prs.words);
418+
419+
if (state->transformed)
420+
{
421+
pfree(prs.startsel);
422+
pfree(prs.stopsel);
423+
}
424+
425+
PG_RETURN_JSONB(out);
426+
}
427+
428+
Datum
429+
ts_headline_jsonb(PG_FUNCTION_ARGS)
430+
{
431+
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
432+
ObjectIdGetDatum(getTSCurrentConfig(true)),
433+
PG_GETARG_DATUM(0),
434+
PG_GETARG_DATUM(1)));
435+
}
436+
437+
Datum
438+
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
439+
{
440+
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
441+
PG_GETARG_DATUM(0),
442+
PG_GETARG_DATUM(1),
443+
PG_GETARG_DATUM(2)));
444+
}
445+
446+
Datum
447+
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
448+
{
449+
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
450+
ObjectIdGetDatum(getTSCurrentConfig(true)),
451+
PG_GETARG_DATUM(0),
452+
PG_GETARG_DATUM(1),
453+
PG_GETARG_DATUM(2)));
454+
}
455+
456+
Datum
457+
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
458+
{
459+
text *json = PG_GETARG_TEXT_P(1);
460+
TSQuery query = PG_GETARG_TSQUERY(2);
461+
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
462+
text *out;
463+
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
464+
465+
HeadlineParsedText prs;
466+
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
467+
468+
memset(&prs, 0, sizeof(HeadlineParsedText));
469+
prs.lenwords = 32;
470+
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
471+
472+
state->prs = &prs;
473+
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
474+
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
475+
state->query = query;
476+
if (opt)
477+
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
478+
else
479+
state->prsoptions = NIL;
480+
481+
if (!OidIsValid(state->prsobj->headlineOid))
482+
ereport(ERROR,
483+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
484+
errmsg("text search parser does not support headline creation")));
485+
486+
out = transform_json_string_values(json, state, action);
487+
488+
PG_FREE_IF_COPY(json, 1);
489+
PG_FREE_IF_COPY(query, 2);
490+
if (opt)
491+
PG_FREE_IF_COPY(opt, 3);
492+
pfree(prs.words);
493+
494+
if (state->transformed)
495+
{
496+
pfree(prs.startsel);
497+
pfree(prs.stopsel);
498+
}
499+
500+
PG_RETURN_TEXT_P(out);
501+
}
502+
503+
Datum
504+
ts_headline_json(PG_FUNCTION_ARGS)
505+
{
506+
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
507+
ObjectIdGetDatum(getTSCurrentConfig(true)),
508+
PG_GETARG_DATUM(0),
509+
PG_GETARG_DATUM(1)));
510+
}
511+
512+
Datum
513+
ts_headline_json_byid(PG_FUNCTION_ARGS)
514+
{
515+
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
516+
PG_GETARG_DATUM(0),
517+
PG_GETARG_DATUM(1),
518+
PG_GETARG_DATUM(2)));
519+
}
520+
521+
Datum
522+
ts_headline_json_opt(PG_FUNCTION_ARGS)
523+
{
524+
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
525+
ObjectIdGetDatum(getTSCurrentConfig(true)),
526+
PG_GETARG_DATUM(0),
527+
PG_GETARG_DATUM(1),
528+
PG_GETARG_DATUM(2)));
529+
}
530+
531+
532+
/*
533+
* Return headline in text from, generated from a json(b) element
534+
*/
535+
static text *
536+
headline_json_value(void *_state, char *elem_value, int elem_len)
537+
{
538+
HeadlineJsonState *state = (HeadlineJsonState *) _state;
539+
540+
HeadlineParsedText *prs = state->prs;
541+
TSConfigCacheEntry *cfg = state->cfg;
542+
TSParserCacheEntry *prsobj = state->prsobj;
543+
TSQuery query = state->query;
544+
List *prsoptions = state->prsoptions;
545+
546+
prs->curwords = 0;
547+
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
548+
FunctionCall3(&(prsobj->prsheadline),
549+
PointerGetDatum(prs),
550+
PointerGetDatum(prsoptions),
551+
PointerGetDatum(query));
552+
553+
state->transformed = true;
554+
return generateHeadline(prs);
555+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy