Skip to content

Commit 04e9704

Browse files
committed
Now ispell dictionary can eat dictionaries in MySpell format,
used by OpenOffice. Dictionaries are placed at http://lingucomponent.openoffice.org/spell_dic.html Dictionary automatically recognizes format of files. Warning. MySpell's format has limitation with compound word support: it's impossible to mark affix as compound-only affix. So for norwegian, german etc languages it's recommended to use original ispell format. For that reason I don't want to remove my2ispell scripts, it's has workaround at least for norwegian language.
1 parent 1a1326d commit 04e9704

File tree

2 files changed

+92
-3
lines changed

2 files changed

+92
-3
lines changed

contrib/tsearch2/ispell/spell.c

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
391391
char flagflags = 0;
392392
FILE *affix;
393393
int line=0;
394+
int oldformat = 0;
394395

395396
if (!(affix = fopen(filename, "r")))
396397
return (1);
@@ -412,19 +413,22 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
412413
while (*s && t_isspace(s)) s++;
413414
if ( *s && pg_mblen(s) == 1 )
414415
Conf->compoundcontrol = *s;
416+
oldformat++;
415417
continue;
416418
}
417419
}
418420
if (STRNCMP(tmpstr, "suffixes") == 0)
419421
{
420422
suffixes = 1;
421423
prefixes = 0;
424+
oldformat++;
422425
continue;
423426
}
424427
if (STRNCMP(tmpstr, "prefixes") == 0)
425428
{
426429
suffixes = 0;
427430
prefixes = 1;
431+
oldformat++;
428432
continue;
429433
}
430434
if (STRNCMP(tmpstr, "flag") == 0)
@@ -433,10 +437,11 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
433437
flagflags = 0;
434438

435439
while (*s && t_isspace(s)) s++;
440+
oldformat++;
436441

437442
/* allow only single-encoded flags */
438-
if ( pg_mblen(s) != 1 )
439-
continue;
443+
if ( pg_mblen(s) != 1 )
444+
elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
440445

441446
if (*s == '*')
442447
{
@@ -455,12 +460,22 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
455460
/* allow only single-encoded flags */
456461
if ( pg_mblen(s) != 1 ) {
457462
flagflags = 0;
458-
continue;
463+
elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
459464
}
460465

461466
flag = (unsigned char) *s;
462467
continue;
463468
}
469+
if ( STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 ||
470+
STRNCMP(str, "PFX")==0 || STRNCMP(str, "SFX")==0 ) {
471+
472+
if ( oldformat )
473+
elog(ERROR,"Wrong affix file format");
474+
475+
fclose(affix);
476+
return NIImportOOAffixes(Conf, filename);
477+
478+
}
464479
if ((!suffixes) && (!prefixes))
465480
continue;
466481

@@ -475,6 +490,79 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
475490
return (0);
476491
}
477492

493+
int
494+
NIImportOOAffixes(IspellDict * Conf, const char *filename) {
495+
char str[BUFSIZ];
496+
char type[BUFSIZ];
497+
char sflag[BUFSIZ];
498+
char mask[BUFSIZ];
499+
char find[BUFSIZ];
500+
char repl[BUFSIZ];
501+
bool isSuffix = false;
502+
int flag = 0;
503+
char flagflags = 0;
504+
FILE *affix;
505+
int line=0;
506+
int scanread = 0;
507+
char scanbuf[BUFSIZ];
508+
509+
sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ/5, BUFSIZ/5, BUFSIZ/5, BUFSIZ/5);
510+
511+
if (!(affix = fopen(filename, "r")))
512+
return (1);
513+
Conf->compoundcontrol = '\t';
514+
515+
while (fgets(str, sizeof(str), affix))
516+
{
517+
line++;
518+
if ( *str == '\0' || t_isspace(str) || t_iseq(str,'#') )
519+
continue;
520+
pg_verifymbstr( str, strlen(str), false);
521+
522+
if ( STRNCMP(str, "COMPOUNDFLAG")==0 ) {
523+
char *s = str+strlen("COMPOUNDFLAG");
524+
while (*s && t_isspace(s)) s++;
525+
if ( *s && pg_mblen(s) == 1 )
526+
Conf->compoundcontrol = *s;
527+
continue;
528+
}
529+
530+
scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
531+
532+
lowerstr(type);
533+
if ( scanread<4 || (STRNCMP(type,"sfx") && STRNCMP(type,"pfx")) )
534+
continue;
535+
536+
if ( scanread == 4 ) {
537+
if ( strlen(sflag) != 1 )
538+
continue;
539+
flag = *sflag;
540+
isSuffix = (STRNCMP(type,"sfx")==0) ? true : false;
541+
lowerstr(find);
542+
if ( t_iseq(find,'y') )
543+
flagflags |= FF_CROSSPRODUCT;
544+
else
545+
flagflags = 0;
546+
} else {
547+
if ( strlen(sflag) != 1 || flag != *sflag || flag==0 )
548+
continue;
549+
lowerstr(repl);
550+
lowerstr(find);
551+
lowerstr(mask);
552+
if ( t_iseq(find,'0') )
553+
*find = '\0';
554+
if ( t_iseq(repl,'0') )
555+
*repl = '\0';
556+
557+
NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
558+
}
559+
}
560+
561+
fclose(affix);
562+
563+
return 0;
564+
}
565+
478566
static int
479567
MergeAffix(IspellDict * Conf, int a1, int a2)
480568
{

contrib/tsearch2/ispell/spell.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ typedef struct
121121

122122
TSLexeme *NINormalizeWord(IspellDict * Conf, char *word);
123123
int NIImportAffixes(IspellDict * Conf, const char *filename);
124+
int NIImportOOAffixes(IspellDict * Conf, const char *filename);
124125
int NIImportDictionary(IspellDict * Conf, const char *filename);
125126

126127
int NIAddSpell(IspellDict * Conf, const char *word, const char *flag);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy