Skip to content

Commit bb89237

Browse files
committed
1 Eliminate duplicate field HLWORD->skip
2 Rework support for html tags in parser 3 add HighlightAll to headline function for generating highlighted whole text with saved html tags
1 parent e48cfac commit bb89237

File tree

6 files changed

+218
-117
lines changed

6 files changed

+218
-117
lines changed

contrib/tsearch2/expected/tsearch2.out

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -458,20 +458,20 @@ select * from parse('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc
458458
12 |
459459
1 | asdf
460460
12 |
461-
13 |
461+
13 | <fr>
462462
1 | qwer
463463
12 |
464464
1 | jf
465465
12 |
466466
1 | sdjk
467-
13 |
467+
13 | <we hjwer <werrwe>
468468
12 |
469469
3 | ewr1
470470
12 | >
471471
12 |
472472
3 | ewri2
473473
12 |
474-
13 |
474+
13 | <a href="qwe<qwe>">
475475
12 |
476476

477477
19 | /usr/local/fff
@@ -515,7 +515,7 @@ select * from parse('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc
515515
22 | 234
516516
12 |
517517

518-
13 |
518+
13 | <i <b>
519519
12 |
520520
1 | wow
521521
12 |
@@ -2130,6 +2130,35 @@ A thousand years to trace
21302130
The granite features of this cliff
21312131
(1 row)
21322132

2133+
select headline('
2134+
<html>
2135+
<!-- some comment -->
2136+
<body>
2137+
Sea view wow <u>foo bar</u> <i>qq</i>
2138+
<a href="http://www.google.com/foo.bar.html" target="_blank">YES &nbsp;</a>
2139+
ff-bg
2140+
<script>
2141+
document.write(15);
2142+
</script>
2143+
</body>
2144+
</html>',
2145+
to_tsquery('sea&foo'), 'HighlightAll=true');
2146+
headline
2147+
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2148+
2149+
<html>
2150+
<!-- some comment -->
2151+
<body>
2152+
<b>Sea</b> view wow <u><b>foo</b> bar</u> <i>qq</i>
2153+
<a href="http://www.google.com/foo.bar.html" target="_blank">YES &nbsp;</a>
2154+
ff-bg
2155+
<script>
2156+
document.write(15);
2157+
</script>
2158+
</body>
2159+
</html>
2160+
(1 row)
2161+
21332162
--check debug
21342163
select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
21352164
ts_name | tok_type | description | token | dict_name | tsvector

contrib/tsearch2/sql/tsearch2.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,20 @@ The sculpture of these granite seams,
253253
Upon a woman s face. E. J. Pratt (1882 1964)
254254
', to_tsquery('sea'));
255255

256+
257+
select headline('
258+
<html>
259+
<!-- some comment -->
260+
<body>
261+
Sea view wow <u>foo bar</u> <i>qq</i>
262+
<a href="http://www.google.com/foo.bar.html" target="_blank">YES &nbsp;</a>
263+
ff-bg
264+
<script>
265+
document.write(15);
266+
</script>
267+
</body>
268+
</html>',
269+
to_tsquery('sea&foo'), 'HighlightAll=true');
256270
--check debug
257271
select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
258272

contrib/tsearch2/ts_cfg.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ genhl(HLPRSTEXT * prs)
510510
ptr = ((char *) out) + dist;
511511
}
512512

513-
if (wrd->in && !wrd->skip && !wrd->repeated)
513+
if (wrd->in && !wrd->repeated)
514514
{
515515
if (wrd->replace)
516516
{
@@ -532,7 +532,7 @@ genhl(HLPRSTEXT * prs)
532532
ptr += prs->stopsellen;
533533
}
534534
}
535-
}
535+
} else
536536

537537
if (!wrd->repeated)
538538
pfree(wrd->word);

contrib/tsearch2/ts_cfg.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,13 @@ typedef struct
4646

4747
typedef struct
4848
{
49-
uint16 len;
50-
uint8 selected:1,
49+
uint32 selected:1,
5150
in:1,
52-
skip:1,
5351
replace:1,
54-
repeated:1;
55-
uint8 type;
52+
repeated:1,
53+
unused:4,
54+
type:8,
55+
len:16;
5656
char *word;
5757
ITEM *item;
5858
} HLWORD;

contrib/tsearch2/wordparser/parser.l

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,48 @@
1010

1111
char *token = NULL; /* pointer to token */
1212
int tokenlen;
13-
char *s = NULL; /* to return WHOLE hyphenated-word */
13+
static char *s = NULL; /* to return WHOLE hyphenated-word */
1414

1515
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
1616

17+
typedef struct {
18+
int tlen;
19+
int clen;
20+
char *str;
21+
} TagStorage;
22+
23+
static TagStorage ts={0,0,NULL};
24+
25+
static void
26+
addTag() {
27+
while( ts.clen+tsearch2_yyleng+1 > ts.tlen ) {
28+
ts.tlen*=2;
29+
ts.str=realloc(ts.str,ts.tlen);
30+
if (!ts.str)
31+
ereport(ERROR,
32+
(errcode(ERRCODE_OUT_OF_MEMORY),
33+
errmsg("out of memory")));
34+
}
35+
memcpy(ts.str+ts.clen,tsearch2_yytext,tsearch2_yyleng);
36+
ts.clen+=tsearch2_yyleng;
37+
ts.str[ts.clen]='\0';
38+
}
39+
40+
static void
41+
startTag() {
42+
if ( ts.str==NULL ) {
43+
ts.tlen=tsearch2_yyleng+1;
44+
ts.str=malloc(ts.tlen);
45+
if (!ts.str)
46+
ereport(ERROR,
47+
(errcode(ERRCODE_OUT_OF_MEMORY),
48+
errmsg("out of memory")));
49+
}
50+
ts.clen=0;
51+
ts.str[0]='\0';
52+
addTag();
53+
}
54+
1755
%}
1856

1957
%option 8bit
@@ -46,47 +84,46 @@ URI [-_[:alnum:]/%,\.;=&?#]+
4684

4785
%%
4886

49-
"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
87+
"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; startTag(); }
5088

5189
<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
5290
BEGIN INITIAL;
53-
*tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0';
54-
token = tsearch2_yytext;
55-
tokenlen = tsearch2_yyleng;
56-
return SPACE;
91+
addTag();
92+
token = ts.str;
93+
tokenlen = ts.clen;
94+
return TAG;
5795
}
5896

59-
"<!--" { BEGIN INCOMMENT; }
97+
"<!--" { BEGIN INCOMMENT; startTag(); }
6098

6199
<INCOMMENT>"-->" {
62100
BEGIN INITIAL;
63-
*tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0';
64-
token = tsearch2_yytext;
65-
tokenlen = tsearch2_yyleng;
66-
return SPACE;
101+
addTag();
102+
token = ts.str;
103+
tokenlen = ts.clen;
104+
return TAG;
67105
}
68106

69107

70-
"<"[\![:alpha:]] { BEGIN INTAG; }
108+
"<"[\![:alpha:]] { BEGIN INTAG; startTag(); }
71109

72-
"</"[[:alpha:]] { BEGIN INTAG; }
110+
"</"[[:alpha:]] { BEGIN INTAG; startTag(); }
73111

74-
<INTAG>"\"" { BEGIN QINTAG; }
112+
<INTAG>"\"" { BEGIN QINTAG; addTag(); }
75113

76-
<QINTAG>"\\\"" ;
114+
<QINTAG>"\\\"" { addTag(); }
77115

78-
<QINTAG>"\"" { BEGIN INTAG; }
116+
<QINTAG>"\"" { BEGIN INTAG; addTag(); }
79117

80118
<INTAG>">" {
81119
BEGIN INITIAL;
82-
token = tsearch2_yytext;
83-
*tsearch2_yytext=' ';
84-
token = tsearch2_yytext;
85-
tokenlen = 1;
120+
addTag();
121+
token = ts.str;
122+
tokenlen = ts.clen;
86123
return TAG;
87124
}
88125

89-
<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n ;
126+
<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n { addTag(); }
90127

91128
\&(quot|amp|nbsp|lt|gt)\; {
92129
token = tsearch2_yytext;
@@ -295,3 +332,4 @@ void tsearch2_start_parse_str(char* str, int limit) {
295332
tsearch2_yy_switch_to_buffer( buf );
296333
BEGIN INITIAL;
297334
}
335+

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy