Skip to content

Commit 4f94b49

Browse files
committed
Improve word parser.
- allow ~ in filenames - -8.2.1 now is '-' and '8.2.1' instead of '-8.2' '.' '3' - '.text' now is not a file
1 parent 95aca45 commit 4f94b49

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

contrib/tsearch2/wordparser/parser.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,15 @@ SpecialHyphen(TParser * prs)
243243
prs->state->poschar -= prs->state->lencharlexeme;
244244
}
245245

246+
static void
247+
SpecialVerVersion(TParser * prs)
248+
{
249+
prs->state->posbyte -= prs->state->lenbytelexeme;
250+
prs->state->poschar -= prs->state->lencharlexeme;
251+
prs->state->lenbytelexeme = 0;
252+
prs->state->lencharlexeme = 0;
253+
}
254+
246255
static int
247256
p_isstophost(TParser * prs)
248257
{
@@ -326,8 +335,9 @@ static TParserStateActionItem actionTPS_Base[] = {
326335
{p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
327336
{p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
328337
{p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL},
338+
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
329339
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
330-
{p_iseqC, '.', A_PUSH, TPS_InPathFirst, 0, NULL},
340+
{p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL},
331341
{NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
332342
};
333343

@@ -429,11 +439,25 @@ static TParserStateActionItem actionTPS_InDecimalFirst[] = {
429439
static TParserStateActionItem actionTPS_InDecimal[] = {
430440
{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
431441
{p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
442+
{p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL},
432443
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
433444
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
434445
{NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
435446
};
436447

448+
static TParserStateActionItem actionTPS_InVerVersion[] = {
449+
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
450+
{p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion},
451+
{NULL, 0, A_POP, TPS_Null, 0, NULL}
452+
};
453+
454+
static TParserStateActionItem actionTPS_InSVerVersion[] = {
455+
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
456+
{p_isdigit, 0, A_BINGO, TPS_InUnsignedInt, SPACE, NULL},
457+
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
458+
};
459+
460+
437461
static TParserStateActionItem actionTPS_InVersionFirst[] = {
438462
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
439463
{p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
@@ -537,6 +561,7 @@ static TParserStateActionItem actionTPS_InTag[] = {
537561
{p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL},
538562
{p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL},
539563
{p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
564+
{p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL},
540565
{p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
541566
{NULL, 0, A_POP, TPS_Null, 0, NULL}
542567
};
@@ -676,6 +701,16 @@ static TParserStateActionItem actionTPS_InFileFirst[] = {
676701
{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
677702
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
678703
{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
704+
{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
705+
{NULL, 0, A_POP, TPS_Null, 0, NULL}
706+
};
707+
708+
static TParserStateActionItem actionTPS_InFileTwiddle[] = {
709+
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
710+
{p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL},
711+
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
712+
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
713+
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
679714
{NULL, 0, A_POP, TPS_Null, 0, NULL}
680715
};
681716

@@ -689,6 +724,13 @@ static TParserStateActionItem actionTPS_InPathFirst[] = {
689724
{NULL, 0, A_POP, TPS_Null, 0, NULL}
690725
};
691726

727+
static TParserStateActionItem actionTPS_InPathFirstFirst[] = {
728+
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
729+
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
730+
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
731+
{NULL, 0, A_POP, TPS_Null, 0, NULL}
732+
};
733+
692734
static TParserStateActionItem actionTPS_InPathSecond[] = {
693735
{p_isEOF, 0, A_BINGO|A_CLEAR, TPS_Base, FILEPATH, NULL},
694736
{p_iseqC, '/', A_NEXT|A_PUSH, TPS_InFileFirst, 0, NULL},
@@ -920,6 +962,8 @@ static const TParserStateAction Actions[] = {
920962
{TPS_InUDecimal, actionTPS_InUDecimal},
921963
{TPS_InDecimalFirst, actionTPS_InDecimalFirst},
922964
{TPS_InDecimal, actionTPS_InDecimal},
965+
{TPS_InVerVersion, actionTPS_InVerVersion},
966+
{TPS_InSVerVersion, actionTPS_InSVerVersion},
923967
{TPS_InVersionFirst, actionTPS_InVersionFirst},
924968
{TPS_InVersion, actionTPS_InVersion},
925969
{TPS_InMantissaFirst, actionTPS_InMantissaFirst},
@@ -953,7 +997,9 @@ static const TParserStateAction Actions[] = {
953997
{TPS_InHost, actionTPS_InHost},
954998
{TPS_InEmail, actionTPS_InEmail},
955999
{TPS_InFileFirst, actionTPS_InFileFirst},
1000+
{TPS_InFileTwiddle, actionTPS_InFileTwiddle},
9561001
{TPS_InPathFirst, actionTPS_InPathFirst},
1002+
{TPS_InPathFirstFirst, actionTPS_InPathFirstFirst},
9571003
{TPS_InPathSecond, actionTPS_InPathSecond},
9581004
{TPS_InFile, actionTPS_InFile},
9591005
{TPS_InFileNext, actionTPS_InFileNext},

contrib/tsearch2/wordparser/parser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ typedef enum
1919
TPS_InUDecimal,
2020
TPS_InDecimalFirst,
2121
TPS_InDecimal,
22+
TPS_InVerVersion,
23+
TPS_InSVerVersion,
2224
TPS_InVersionFirst,
2325
TPS_InVersion,
2426
TPS_InMantissaFirst,
@@ -52,7 +54,9 @@ typedef enum
5254
TPS_InHost,
5355
TPS_InEmail,
5456
TPS_InFileFirst,
57+
TPS_InFileTwiddle,
5558
TPS_InPathFirst,
59+
TPS_InPathFirstFirst,
5660
TPS_InPathSecond,
5761
TPS_InFile,
5862
TPS_InFileNext,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy