mirror of
https://https.git.savannah.gnu.org/git/gnulib.git
synced 2026-04-28 06:33:36 +00:00
Reported by Patrice Dumas <pertusus@gnu.org> in <https://lists.gnu.org/archive/html/bug-gnulib/2026-04/msg00154.html>. * lib/bcp47.c (bcp47_to_xpg): Don't parse a variant of length 4 that contains some digits as a script. * tests/test-bcp47.c (main): Add a test case with a variant of length 4.
209 lines
5.7 KiB
C
209 lines
5.7 KiB
C
/* Test support for locale names in BCP 47 syntax.
|
|
Copyright (C) 2024-2026 Free Software Foundation, Inc.
|
|
|
|
This file is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as
|
|
published by the Free Software Foundation, either version 3 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This file is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|
|
|
/* Written by Bruno Haible <bruno@clisp.org>, 2024. */
|
|
|
|
#include <config.h>
|
|
|
|
#include "bcp47.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include "macros.h"
|
|
|
|
static void
|
|
test_correspondence (const char *xpg, const char *bcp47)
|
|
{
|
|
/* Test xpg_to_bcp47. */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
xpg_to_bcp47 (buf, xpg);
|
|
ASSERT (streq (buf, bcp47));
|
|
}
|
|
|
|
/* Test bcp47_to_xpg. */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, bcp47, NULL);
|
|
ASSERT (streq (buf, xpg));
|
|
}
|
|
}
|
|
|
|
int
|
|
main ()
|
|
{
|
|
/* Languages with a single script. */
|
|
|
|
test_correspondence ("de", "de");
|
|
test_correspondence ("de_DE", "de-DE");
|
|
test_correspondence ("de_AT", "de-AT");
|
|
|
|
/* Languages with a script that depends on the territory. */
|
|
|
|
test_correspondence ("az_AZ", "az-Latn-AZ");
|
|
test_correspondence ("az_AZ@cyrillic", "az-Cyrl-AZ");
|
|
test_correspondence ("az_IR", "az-Arab-IR");
|
|
|
|
test_correspondence ("ku_IQ", "ku-Arab-IQ");
|
|
test_correspondence ("ku_IR", "ku-Arab-IR");
|
|
test_correspondence ("ku_SY", "ku-Latn-SY");
|
|
test_correspondence ("ku_TR", "ku-Latn-TR");
|
|
|
|
test_correspondence ("pa_PK", "pa-Arab-PK");
|
|
test_correspondence ("pa_IN", "pa-Guru-IN");
|
|
|
|
test_correspondence ("zh_CN", "zh-Hans-CN");
|
|
test_correspondence ("zh_HK", "zh-Hant-HK");
|
|
test_correspondence ("zh_MO", "zh-Hant-MO");
|
|
test_correspondence ("zh_SG", "zh-Hans-SG");
|
|
test_correspondence ("zh_TW", "zh-Hant-TW");
|
|
|
|
/* Languages with a main script and one or more alternate scripts. */
|
|
|
|
test_correspondence ("be_BY", "be-Cyrl-BY");
|
|
test_correspondence ("be_BY@latin", "be-Latn-BY");
|
|
|
|
test_correspondence ("ber@arabic", "ber-Arab");
|
|
test_correspondence ("ber", "ber-Latn");
|
|
test_correspondence ("ber_DZ", "ber-Latn-DZ");
|
|
test_correspondence ("ber_MA", "ber-Latn-MA");
|
|
|
|
test_correspondence ("bs_BA", "bs-Latn-BA");
|
|
test_correspondence ("bs_BA@cyrillic", "bs-Cyrl-BA");
|
|
|
|
test_correspondence ("ha_NG", "ha-Latn-NG");
|
|
test_correspondence ("ha_NG@arabic", "ha-Arab-NG");
|
|
|
|
test_correspondence ("iu_CA", "iu-Cans-CA");
|
|
test_correspondence ("iu_CA@latin", "iu-Latn-CA");
|
|
|
|
test_correspondence ("kk_KZ", "kk-Cyrl-KZ");
|
|
test_correspondence ("kk_KZ@latin", "kk-Latn-KZ");
|
|
|
|
test_correspondence ("ks_IN", "ks-Arab-IN");
|
|
test_correspondence ("ks_IN@devanagari", "ks-Deva-IN");
|
|
|
|
test_correspondence ("mn_MN", "mn-Cyrl-MN");
|
|
test_correspondence ("mn_MN@mongolian", "mn-Mong-MN");
|
|
|
|
test_correspondence ("nan_TW", "nan-Hant-TW");
|
|
test_correspondence ("nan_TW@latin", "nan-Latn-TW");
|
|
|
|
test_correspondence ("sd_PK", "sd-Arab-PK");
|
|
test_correspondence ("sd_IN", "sd-Arab-IN");
|
|
test_correspondence ("sd_IN@devanagari", "sd-Deva-IN");
|
|
|
|
test_correspondence ("sr_BA@latin", "sr-Latn-BA");
|
|
test_correspondence ("sr_BA", "sr-Cyrl-BA");
|
|
test_correspondence ("sr_RS", "sr-Cyrl-RS");
|
|
test_correspondence ("sr_RS@latin", "sr-Latn-RS");
|
|
|
|
test_correspondence ("uz_UZ", "uz-Latn-UZ");
|
|
test_correspondence ("uz_UZ@cyrillic", "uz-Cyrl-UZ");
|
|
|
|
test_correspondence ("yi_US", "yi-Hebr-US");
|
|
test_correspondence ("yi_US@latin", "yi-Latn-US");
|
|
|
|
/* For Quechua, Microsoft uses the ISO 639-3 code "quz" instead of the
|
|
ISO 639-1 code "qu". */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "quz-PE", NULL);
|
|
ASSERT (streq (buf, "qu_PE"));
|
|
}
|
|
|
|
/* For Tamazight, Microsoft uses the ISO 639-3 code "tzm" instead of the
|
|
ISO 639-2 code "ber". */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "tzm-MA", NULL);
|
|
ASSERT (streq (buf, "ber_MA"));
|
|
}
|
|
|
|
/* Languages with a regional variant. */
|
|
|
|
test_correspondence ("ca", "ca");
|
|
test_correspondence ("ca@valencia", "ca-valencia");
|
|
|
|
/* Languages with a spelling rules variant. */
|
|
test_correspondence ("de@1901", "de-1901");
|
|
|
|
/* Test xpg_to_bcp47 with an encoding. */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
xpg_to_bcp47 (buf, "en_US.UTF-8");
|
|
ASSERT (streq (buf, "en-US"));
|
|
}
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
xpg_to_bcp47 (buf, "az_AZ.UTF-8@cyrillic");
|
|
ASSERT (streq (buf, "az-Cyrl-AZ"));
|
|
}
|
|
|
|
/* Test bcp47_to_xpg with an encoding. */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "en-US", "UTF-8");
|
|
ASSERT (streq (buf, "en_US.UTF-8"));
|
|
}
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "az-Cyrl-AZ", "UTF-8");
|
|
ASSERT (streq (buf, "az_AZ.UTF-8@cyrillic"));
|
|
}
|
|
|
|
/* Test case mapping done by bcp47_to_xpg. */
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "EN-US", "UTF-8");
|
|
ASSERT (streq (buf, "en_US.UTF-8"));
|
|
}
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "en-us", "UTF-8");
|
|
ASSERT (streq (buf, "en_US.UTF-8"));
|
|
}
|
|
{
|
|
char buf[BCP47_MAX];
|
|
memset (buf, 0x77, BCP47_MAX);
|
|
|
|
bcp47_to_xpg (buf, "Zh-hANs-cN", "UTF-8");
|
|
ASSERT (streq (buf, "zh_CN.UTF-8"));
|
|
}
|
|
|
|
return test_exit_status;
|
|
}
|