1
0
mirror of https://https.git.savannah.gnu.org/git/gnulib.git synced 2026-04-28 06:33:36 +00:00
Files
gnulib/tests/test-bcp47.c
Bruno Haible 6f68489dda bcp47: Fix handling of variants of length 4.
Reported by Patrice Dumas <pertusus@gnu.org> in
<https://lists.gnu.org/archive/html/bug-gnulib/2026-04/msg00154.html>.

* lib/bcp47.c (bcp47_to_xpg): Don't parse a variant of length 4 that
contains some digits as a script.
* tests/test-bcp47.c (main): Add a test case with a variant of length 4.
2026-04-24 15:58:30 +02:00

209 lines
5.7 KiB
C

/* Test support for locale names in BCP 47 syntax.
Copyright (C) 2024-2026 Free Software Foundation, Inc.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Written by Bruno Haible <bruno@clisp.org>, 2024. */
#include <config.h>
#include "bcp47.h"
#include <string.h>
#include "macros.h"
static void
test_correspondence (const char *xpg, const char *bcp47)
{
/* Test xpg_to_bcp47. */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
xpg_to_bcp47 (buf, xpg);
ASSERT (streq (buf, bcp47));
}
/* Test bcp47_to_xpg. */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, bcp47, NULL);
ASSERT (streq (buf, xpg));
}
}
int
main ()
{
/* Languages with a single script. */
test_correspondence ("de", "de");
test_correspondence ("de_DE", "de-DE");
test_correspondence ("de_AT", "de-AT");
/* Languages with a script that depends on the territory. */
test_correspondence ("az_AZ", "az-Latn-AZ");
test_correspondence ("az_AZ@cyrillic", "az-Cyrl-AZ");
test_correspondence ("az_IR", "az-Arab-IR");
test_correspondence ("ku_IQ", "ku-Arab-IQ");
test_correspondence ("ku_IR", "ku-Arab-IR");
test_correspondence ("ku_SY", "ku-Latn-SY");
test_correspondence ("ku_TR", "ku-Latn-TR");
test_correspondence ("pa_PK", "pa-Arab-PK");
test_correspondence ("pa_IN", "pa-Guru-IN");
test_correspondence ("zh_CN", "zh-Hans-CN");
test_correspondence ("zh_HK", "zh-Hant-HK");
test_correspondence ("zh_MO", "zh-Hant-MO");
test_correspondence ("zh_SG", "zh-Hans-SG");
test_correspondence ("zh_TW", "zh-Hant-TW");
/* Languages with a main script and one or more alternate scripts. */
test_correspondence ("be_BY", "be-Cyrl-BY");
test_correspondence ("be_BY@latin", "be-Latn-BY");
test_correspondence ("ber@arabic", "ber-Arab");
test_correspondence ("ber", "ber-Latn");
test_correspondence ("ber_DZ", "ber-Latn-DZ");
test_correspondence ("ber_MA", "ber-Latn-MA");
test_correspondence ("bs_BA", "bs-Latn-BA");
test_correspondence ("bs_BA@cyrillic", "bs-Cyrl-BA");
test_correspondence ("ha_NG", "ha-Latn-NG");
test_correspondence ("ha_NG@arabic", "ha-Arab-NG");
test_correspondence ("iu_CA", "iu-Cans-CA");
test_correspondence ("iu_CA@latin", "iu-Latn-CA");
test_correspondence ("kk_KZ", "kk-Cyrl-KZ");
test_correspondence ("kk_KZ@latin", "kk-Latn-KZ");
test_correspondence ("ks_IN", "ks-Arab-IN");
test_correspondence ("ks_IN@devanagari", "ks-Deva-IN");
test_correspondence ("mn_MN", "mn-Cyrl-MN");
test_correspondence ("mn_MN@mongolian", "mn-Mong-MN");
test_correspondence ("nan_TW", "nan-Hant-TW");
test_correspondence ("nan_TW@latin", "nan-Latn-TW");
test_correspondence ("sd_PK", "sd-Arab-PK");
test_correspondence ("sd_IN", "sd-Arab-IN");
test_correspondence ("sd_IN@devanagari", "sd-Deva-IN");
test_correspondence ("sr_BA@latin", "sr-Latn-BA");
test_correspondence ("sr_BA", "sr-Cyrl-BA");
test_correspondence ("sr_RS", "sr-Cyrl-RS");
test_correspondence ("sr_RS@latin", "sr-Latn-RS");
test_correspondence ("uz_UZ", "uz-Latn-UZ");
test_correspondence ("uz_UZ@cyrillic", "uz-Cyrl-UZ");
test_correspondence ("yi_US", "yi-Hebr-US");
test_correspondence ("yi_US@latin", "yi-Latn-US");
/* For Quechua, Microsoft uses the ISO 639-3 code "quz" instead of the
ISO 639-1 code "qu". */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "quz-PE", NULL);
ASSERT (streq (buf, "qu_PE"));
}
/* For Tamazight, Microsoft uses the ISO 639-3 code "tzm" instead of the
ISO 639-2 code "ber". */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "tzm-MA", NULL);
ASSERT (streq (buf, "ber_MA"));
}
/* Languages with a regional variant. */
test_correspondence ("ca", "ca");
test_correspondence ("ca@valencia", "ca-valencia");
/* Languages with a spelling rules variant. */
test_correspondence ("de@1901", "de-1901");
/* Test xpg_to_bcp47 with an encoding. */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
xpg_to_bcp47 (buf, "en_US.UTF-8");
ASSERT (streq (buf, "en-US"));
}
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
xpg_to_bcp47 (buf, "az_AZ.UTF-8@cyrillic");
ASSERT (streq (buf, "az-Cyrl-AZ"));
}
/* Test bcp47_to_xpg with an encoding. */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "en-US", "UTF-8");
ASSERT (streq (buf, "en_US.UTF-8"));
}
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "az-Cyrl-AZ", "UTF-8");
ASSERT (streq (buf, "az_AZ.UTF-8@cyrillic"));
}
/* Test case mapping done by bcp47_to_xpg. */
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "EN-US", "UTF-8");
ASSERT (streq (buf, "en_US.UTF-8"));
}
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "en-us", "UTF-8");
ASSERT (streq (buf, "en_US.UTF-8"));
}
{
char buf[BCP47_MAX];
memset (buf, 0x77, BCP47_MAX);
bcp47_to_xpg (buf, "Zh-hANs-cN", "UTF-8");
ASSERT (streq (buf, "zh_CN.UTF-8"));
}
return test_exit_status;
}