Skip to content

Commit 2a1b349

Browse files
committed
Make the locale comparison in pg_upgrade more lenient
If the locale names are not equal, try to canonicalize both of them by passing them to setlocale(). Before, we only canonicalized the old cluster's locale if upgrading from a 8.4-9.2 server, but we also need to canonicalize when upgrading from a pre-8.4 server. That was an oversight in the code. But we should also canonicalize on newer server versions, so that we cope if the canonical form changes from one release to another. I'm about to do just that to fix bug #11431, by mapping a locale name that contains non-ASCII characters to a pure-ASCII alias of the same locale. This is partial backpatch of commit 33755e8 in master. Apply to 9.2, 9.3 and 9.4. The canonicalization code didn't exist before 9.2. In 9.2 and 9.3, this effectively also back-patches the changes from commit 5827472, to be more lax about the spelling of the encoding in the locale names.
1 parent 385f0d9 commit 2a1b349

File tree

1 file changed

+80
-32
lines changed

1 file changed

+80
-32
lines changed

contrib/pg_upgrade/check.c

Lines changed: 80 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
1616
static void check_new_cluster_is_empty(void);
1717
static void check_locale_and_encoding(ControlData *oldctrl,
1818
ControlData *newctrl);
19+
static bool equivalent_locale(int category, const char *loca, const char *locb);
20+
static bool equivalent_encoding(const char *chara, const char *charb);
1921
static void check_is_super_user(ClusterInfo *cluster);
2022
static void check_for_prepared_transactions(ClusterInfo *cluster);
2123
static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
@@ -361,23 +363,8 @@ set_locale_and_encoding(ClusterInfo *cluster)
361363
i_datcollate = PQfnumber(res, "datcollate");
362364
i_datctype = PQfnumber(res, "datctype");
363365

364-
if (GET_MAJOR_VERSION(cluster->major_version) < 902)
365-
{
366-
/*
367-
* Pre-9.2 did not canonicalize the supplied locale names to match
368-
* what the system returns, while 9.2+ does, so convert pre-9.2 to
369-
* match.
370-
*/
371-
ctrl->lc_collate = get_canonical_locale_name(LC_COLLATE,
372-
pg_strdup(PQgetvalue(res, 0, i_datcollate)));
373-
ctrl->lc_ctype = get_canonical_locale_name(LC_CTYPE,
374-
pg_strdup(PQgetvalue(res, 0, i_datctype)));
375-
}
376-
else
377-
{
378-
ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
379-
ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
380-
}
366+
ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
367+
ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
381368

382369
PQclear(res);
383370
}
@@ -407,23 +394,84 @@ static void
407394
check_locale_and_encoding(ControlData *oldctrl,
408395
ControlData *newctrl)
409396
{
397+
if (!equivalent_locale(LC_COLLATE, oldctrl->lc_collate, newctrl->lc_collate))
398+
pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
399+
oldctrl->lc_collate, newctrl->lc_collate);
400+
if (!equivalent_locale(LC_CTYPE, oldctrl->lc_ctype, newctrl->lc_ctype))
401+
pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
402+
oldctrl->lc_ctype, newctrl->lc_ctype);
403+
if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
404+
pg_fatal("encoding cluster values do not match: old \"%s\", new \"%s\"\n",
405+
oldctrl->encoding, newctrl->encoding);
406+
}
407+
408+
/*
409+
* equivalent_locale()
410+
*
411+
* Best effort locale-name comparison. Return false if we are not 100% sure
412+
* the locales are equivalent.
413+
*
414+
* Note: The encoding parts of the names are ignored. This function is
415+
* currently used to compare locale names stored in pg_database, and
416+
* pg_database contains a separate encoding field. That's compared directly
417+
* in check_locale_and_encoding().
418+
*/
419+
static bool
420+
equivalent_locale(int category, const char *loca, const char *locb)
421+
{
422+
const char *chara;
423+
const char *charb;
424+
char *canona;
425+
char *canonb;
426+
int lena;
427+
int lenb;
428+
410429
/*
411-
* These are often defined with inconsistent case, so use pg_strcasecmp().
412-
* They also often use inconsistent hyphenation, which we cannot fix, e.g.
413-
* UTF-8 vs. UTF8, so at least we display the mismatching values.
430+
* If the names are equal, the locales are equivalent. Checking this
431+
* first avoids calling setlocale() in the common case that the names
432+
* are equal. That's a good thing, if setlocale() is buggy, for example.
414433
*/
415-
if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
416-
pg_log(PG_FATAL,
417-
"lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
418-
oldctrl->lc_collate, newctrl->lc_collate);
419-
if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
420-
pg_log(PG_FATAL,
421-
"lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
422-
oldctrl->lc_ctype, newctrl->lc_ctype);
423-
if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
424-
pg_log(PG_FATAL,
425-
"encoding cluster values do not match: old \"%s\", new \"%s\"\n",
426-
oldctrl->encoding, newctrl->encoding);
434+
if (pg_strcasecmp(loca, locb) == 0)
435+
return true;
436+
437+
/*
438+
* Not identical. Canonicalize both names, remove the encoding parts,
439+
* and try again.
440+
*/
441+
canona = get_canonical_locale_name(category, loca);
442+
chara = strrchr(canona, '.');
443+
lena = chara ? (chara - canona) : strlen(canona);
444+
445+
canonb = get_canonical_locale_name(category, locb);
446+
charb = strrchr(canonb, '.');
447+
lenb = charb ? (charb - canonb) : strlen(canonb);
448+
449+
if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0)
450+
return true;
451+
452+
return false;
453+
}
454+
455+
/*
456+
* equivalent_encoding()
457+
*
458+
* Best effort encoding-name comparison. Return true only if the encodings
459+
* are valid server-side encodings and known equivalent.
460+
*
461+
* Because the lookup in pg_valid_server_encoding() does case folding and
462+
* ignores non-alphanumeric characters, this will recognize many popular
463+
* variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
464+
*/
465+
static bool
466+
equivalent_encoding(const char *chara, const char *charb)
467+
{
468+
int enca = pg_valid_server_encoding(chara);
469+
int encb = pg_valid_server_encoding(charb);
470+
471+
if (enca < 0 || encb < 0)
472+
return false;
473+
474+
return (enca == encb);
427475
}
428476

429477

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy