Skip to content

Commit 00e1ca8

Browse files
committed
Breake words by dots
1 parent 833e5b2 commit 00e1ca8

File tree

4 files changed

+54
-3
lines changed

4 files changed

+54
-3
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.o
2+
*.so
3+
results

expected/pg_tsparser.out

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ SELECT * FROM ts_parse('tsparser', '345 qwe@efd.r '' http://www.com/ http://aew.
4040
12 | '
4141
14 | http://
4242
6 | www.com
43+
1 | www
44+
12 | .
45+
1 | com
4346
12 | /
4447
14 | http://
4548
5 | aew.werc.ewr/?ad=qwe&dw
@@ -51,6 +54,12 @@ SELECT * FROM ts_parse('tsparser', '345 qwe@efd.r '' http://www.com/ http://aew.
5154
18 | /?ad=qwe&dw
5255
12 |
5356
6 | 2aew.werc.ewr
57+
3 | 2aew
58+
12 | .
59+
6 | werc.ewr
60+
1 | werc
61+
12 | .
62+
1 | ewr
5463
12 |
5564
14 | http://
5665
5 | 3aew.werc.ewr/?ad=qwe&dw
@@ -59,6 +68,12 @@ SELECT * FROM ts_parse('tsparser', '345 qwe@efd.r '' http://www.com/ http://aew.
5968
12 |
6069
14 | http://
6170
6 | 4aew.werc.ewr
71+
3 | 4aew
72+
12 | .
73+
6 | werc.ewr
74+
1 | werc
75+
12 | .
76+
1 | ewr
6277
12 |
6378
14 | http://
6479
5 | 5aew.werc.ewr:8100/?
@@ -177,7 +192,7 @@ SELECT * FROM ts_parse('tsparser', '345 qwe@efd.r '' http://www.com/ http://aew.
177192
12 |
178193
12 | <>
179194
1 | qwerty
180-
(143 rows)
195+
(158 rows)
181196

182197
-- Test text search configuration with parser
183198
CREATE TEXT SEARCH CONFIGURATION english_ts (
@@ -209,3 +224,15 @@ SELECT to_tsvector('english_ts', '12-abc');
209224
'12':2 '12-abc':1 'abc':3
210225
(1 row)
211226

227+
SELECT to_tsvector('english_ts', 'test.com');
228+
to_tsvector
229+
-------------------------------
230+
'com':3 'test':2 'test.com':1
231+
(1 row)
232+
233+
SELECT to_tsvector('english_ts', 'test2.com');
234+
to_tsvector
235+
---------------------------------
236+
'com':3 'test2':2 'test2.com':1
237+
(1 row)
238+

sql/pg_tsparser.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ ALTER TEXT SEARCH CONFIGURATION english_ts
2424
SELECT to_tsvector('english_ts', 'pg_trgm');
2525
SELECT to_tsvector('english_ts', '12_abc');
2626
SELECT to_tsvector('english_ts', '12-abc');
27+
SELECT to_tsvector('english_ts', 'test.com');
28+
SELECT to_tsvector('english_ts', 'test2.com');

tsparser.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ typedef enum
171171
TPS_InPort,
172172
TPS_InHostFirstAN,
173173
TPS_InHost,
174+
TPS_InHostAsciiWord,
175+
TPS_InHostNumWord,
174176
TPS_InEmail,
175177
TPS_InFileFirst,
176178
TPS_InFileTwiddle,
@@ -1443,7 +1445,7 @@ static const TParserStateActionItem actionTPS_InHostDomainSecond[] = {
14431445
};
14441446

14451447
static const TParserStateActionItem actionTPS_InHostDomain[] = {
1446-
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
1448+
{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InHostAsciiWord, HOST, SpecialHyphen},
14471449
{p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
14481450
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
14491451
{p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
@@ -1454,7 +1456,22 @@ static const TParserStateActionItem actionTPS_InHostDomain[] = {
14541456
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
14551457
{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
14561458
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
1457-
{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
1459+
{NULL, 0, A_BINGO | A_CLRALL, TPS_InHostAsciiWord, HOST, SpecialHyphen}
1460+
};
1461+
1462+
static const TParserStateActionItem actionTPS_InHostAsciiWord[] = {
1463+
{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL},
1464+
{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
1465+
{p_isdigit, 0, A_NEXT, TPS_InHostNumWord, 0, NULL},
1466+
{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
1467+
{p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
1468+
{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
1469+
};
1470+
1471+
static const TParserStateActionItem actionTPS_InHostNumWord[] = {
1472+
{p_isEOF, 0, A_BINGO, TPS_Base, NUMWORD, NULL},
1473+
{p_isalnum, 0, A_NEXT, TPS_InHostNumWord, 0, NULL},
1474+
{NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
14581475
};
14591476

14601477
static const TParserStateActionItem actionTPS_InPortFirst[] = {
@@ -1782,6 +1799,8 @@ static const TParserStateAction Actions[] = {
17821799
TPARSERSTATEACTION(TPS_InPort),
17831800
TPARSERSTATEACTION(TPS_InHostFirstAN),
17841801
TPARSERSTATEACTION(TPS_InHost),
1802+
TPARSERSTATEACTION(TPS_InHostAsciiWord),
1803+
TPARSERSTATEACTION(TPS_InHostNumWord),
17851804
TPARSERSTATEACTION(TPS_InEmail),
17861805
TPARSERSTATEACTION(TPS_InFileFirst),
17871806
TPARSERSTATEACTION(TPS_InFileTwiddle),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy