mirror of
https://https.git.savannah.gnu.org/git/gnulib.git
synced 2026-05-13 15:13:36 +00:00
unilbrk: Strengthen tests.
* lib/gen-uni-tables.c: Add shell commands for creating tests/unilbrk/LineBreakTest.txt. * tests/unilbrk/LineBreakTest.txt: New file, from unicode.org. * tests/unilbrk/test-uc-possible-linebreaks.c: New file, based on tests/uniwbrk/test-uc-wordbreaks.c. * tests/unilbrk/test-uc-possible-linebreaks.sh: New file, based on tests/uniwbrk/test-uc-wordbreaks.sh. * modules/unilbrk/u32-possible-linebreaks-tests (Files): Add them. (Makefile.am): Arrange to compile test-uc-possible-linebreaks.c and test test-uc-possible-linebreaks.sh.
This commit is contained in:
14
ChangeLog
14
ChangeLog
@@ -1,3 +1,17 @@
|
||||
2024-09-15 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
unilbrk: Strengthen tests.
|
||||
* lib/gen-uni-tables.c: Add shell commands for creating
|
||||
tests/unilbrk/LineBreakTest.txt.
|
||||
* tests/unilbrk/LineBreakTest.txt: New file, from unicode.org.
|
||||
* tests/unilbrk/test-uc-possible-linebreaks.c: New file, based on
|
||||
tests/uniwbrk/test-uc-wordbreaks.c.
|
||||
* tests/unilbrk/test-uc-possible-linebreaks.sh: New file, based on
|
||||
tests/uniwbrk/test-uc-wordbreaks.sh.
|
||||
* modules/unilbrk/u32-possible-linebreaks-tests (Files): Add them.
|
||||
(Makefile.am): Arrange to compile test-uc-possible-linebreaks.c and test
|
||||
test-uc-possible-linebreaks.sh.
|
||||
|
||||
2024-09-15 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
uniwbrk tests: Modernize code.
|
||||
|
||||
@@ -12232,6 +12232,10 @@ main (int argc, char * argv[])
|
||||
* > ../tests/unigbrk/GraphemeBreakTest.txt \\
|
||||
* && { sed -e 's/^/# /' -e 's/ $//' < /media/nas/bruno/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/license.txt; \\
|
||||
* echo; \\
|
||||
* cat /media/nas/bruno/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/16.0.0/ucd/auxiliary/LineBreakTest.txt; } \\
|
||||
* > ../tests/unilbrk/LineBreakTest.txt \\
|
||||
* && { sed -e 's/^/# /' -e 's/ $//' < /media/nas/bruno/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/license.txt; \\
|
||||
* echo; \\
|
||||
* cat /media/nas/bruno/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/16.0.0/ucd/auxiliary/WordBreakTest.txt; } \\
|
||||
* > ../tests/uniwbrk/WordBreakTest.txt"
|
||||
* End:
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
Files:
|
||||
tests/unilbrk/test-u32-possible-linebreaks.c
|
||||
tests/unilbrk/test-uc-possible-linebreaks.c
|
||||
tests/unilbrk/test-uc-possible-linebreaks.sh
|
||||
tests/unilbrk/LineBreakTest.txt
|
||||
tests/macros.h
|
||||
|
||||
Depends-on:
|
||||
@@ -7,7 +10,9 @@ Depends-on:
|
||||
configure.ac:
|
||||
|
||||
Makefile.am:
|
||||
TESTS += test-u32-possible-linebreaks
|
||||
check_PROGRAMS += test-u32-possible-linebreaks
|
||||
TESTS += test-u32-possible-linebreaks unilbrk/test-uc-possible-linebreaks.sh
|
||||
check_PROGRAMS += test-u32-possible-linebreaks test-uc-possible-linebreaks
|
||||
test_u32_possible_linebreaks_SOURCES = unilbrk/test-u32-possible-linebreaks.c
|
||||
test_u32_possible_linebreaks_LDADD = $(LDADD) $(LIBUNISTRING)
|
||||
test_uc_possible_linebreaks_SOURCES = unilbrk/test-uc-possible-linebreaks.c
|
||||
test_uc_possible_linebreaks_LDADD = $(LDADD) $(LIBUNISTRING)
|
||||
|
||||
16740
tests/unilbrk/LineBreakTest.txt
Normal file
16740
tests/unilbrk/LineBreakTest.txt
Normal file
File diff suppressed because one or more lines are too long
189
tests/unilbrk/test-uc-possible-linebreaks.c
Normal file
189
tests/unilbrk/test-uc-possible-linebreaks.c
Normal file
@@ -0,0 +1,189 @@
|
||||
/* Line break function test, using test data from UCD.
|
||||
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published
|
||||
by the Free Software Foundation, either version 3 of the License,
|
||||
or (at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>, 2024. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <unilbrk.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
const char *filename;
|
||||
FILE *stream;
|
||||
int exit_code;
|
||||
int lineno;
|
||||
char line[16384];
|
||||
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf (stderr, "usage: %s FILENAME\n"
|
||||
"where FILENAME is the location of the LineBreakTest.txt test file.\n",
|
||||
argv[0]);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
filename = argv[1];
|
||||
stream = fopen (filename, "r");
|
||||
if (stream == NULL)
|
||||
{
|
||||
fprintf (stderr, "error during fopen of '%s'\n", filename);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
exit_code = 0;
|
||||
lineno = 0;
|
||||
while (fgets (line, sizeof (line), stream))
|
||||
{
|
||||
lineno++;
|
||||
|
||||
/* Cut off the trailing comment, if any. */
|
||||
char *comment = strchr (line, '#');
|
||||
if (comment != NULL)
|
||||
*comment = '\0';
|
||||
/* Is the remaining line blank? */
|
||||
if (line[strspn (line, " \t\r\n")] == '\0')
|
||||
continue;
|
||||
|
||||
const char *p;
|
||||
uint32_t input[1024];
|
||||
char breaks[1024];
|
||||
char breaks_expected[1025];
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
p = line;
|
||||
do
|
||||
{
|
||||
p += strspn (p, " \t\r\n");
|
||||
if (!strncmp (p, "\303\267" /* ÷ */, 2))
|
||||
{
|
||||
breaks_expected[i] = 1;
|
||||
p += 2;
|
||||
}
|
||||
else if (!strncmp (p, "\303\227" /* × */, 2))
|
||||
{
|
||||
breaks_expected[i] = 0;
|
||||
p += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (stderr, "%s:%d.%d: syntax error expecting '÷' or '×'\n",
|
||||
filename, lineno, (int) (p - line + 1));
|
||||
exit (1);
|
||||
}
|
||||
|
||||
p += strspn (p, " \t\r\n");
|
||||
if (*p != '\0')
|
||||
{
|
||||
unsigned int next_int;
|
||||
int n;
|
||||
|
||||
if (sscanf (p, "%x%n", &next_int, &n) != 1)
|
||||
{
|
||||
fprintf (stderr, "%s:%d.%d: syntax error at '%s' "
|
||||
"expecting hexadecimal Unicode code point number\n",
|
||||
filename, lineno, (int) (p - line + 1), p);
|
||||
exit (1);
|
||||
}
|
||||
p += n;
|
||||
|
||||
input[i] = next_int;
|
||||
}
|
||||
|
||||
p += strspn (p, " \t\r\n");
|
||||
i++;
|
||||
}
|
||||
while (*p != '\0');
|
||||
|
||||
u32_possible_linebreaks (input, i - 1, "UTF-8", breaks);
|
||||
|
||||
int matches = 1;
|
||||
{
|
||||
int j;
|
||||
for (j = 0; j < i - 1; j++)
|
||||
{
|
||||
/* The character U+FFFC has line break property CB, which according
|
||||
to rule (LB1) is resolved "into other line breaking classes
|
||||
depending on criteria outside the scope of this algorithm".
|
||||
Thus it makes no sense to check the breaks[] entry before or
|
||||
after such a character. */
|
||||
if (!(input[j] == 0xFFFC
|
||||
|| (j > 0 && input[j - 1] == 0xFFFC)
|
||||
/* Also consider intervening characters with property LBP_CM
|
||||
or LBP_ZWJ, per (LB9). */
|
||||
|| (j > 1 && (input[j - 1] == 0x0308 || input[j - 1] == 0x200D)
|
||||
&& input[j - 2] == 0xFFFC)))
|
||||
/* A regional indicator with a combining character is nonsense,
|
||||
because regional indicators are supposed to come in pairs. */
|
||||
if (!(j >= 2 && (input[0] >= 0x1F1E6 && input[0] <= 0x1F1FF)
|
||||
&& input[1] == 0x0308))
|
||||
/* There is a disagreement regarding whether to allow a line break
|
||||
after a U+0020 SPACE character at the start of the text.
|
||||
We consider that the start of the text is equivalent to the
|
||||
state after a newline was seen; hence the loop starts with
|
||||
property LBP_BK. By the rules (LB4,LB5,LB6) an extra line
|
||||
break after a mandatory line break is undesired, even with
|
||||
intervening spaces (because these rules come before (LB18)).
|
||||
Whereas the LineBreakTest.txt file allows a line break after
|
||||
the space.
|
||||
Similarly when the first two characters at the start of the
|
||||
text have property LBP_CM and LBP_ZWJ, respectively. (LB9). */
|
||||
if (!(((j == 1 || (j > 1 && (input[j - 2] >= 0x000A && input[j - 2] <= 0x000D || input[j - 2] == 0x0085)))
|
||||
&& input[j - 1] == 0x0020)
|
||||
|| ((j == 2 || (j > 2 && (input[j - 3] >= 0x000A && input[j - 3] <= 0x000D || input[j - 3] == 0x0085)))
|
||||
&& ((input[j - 2] == 0x0020 && input[j - 1] == 0x0020)
|
||||
|| (input[j - 2] == 0x0308 && input[j - 1] == 0x200D)
|
||||
|| (input[j - 2] == 0x200D && input[j - 1] == 0x0308)))))
|
||||
matches &= (!(breaks[j] == UC_BREAK_PROHIBITED
|
||||
|| breaks[j] == UC_BREAK_MANDATORY
|
||||
|| breaks[j] == UC_BREAK_CR_BEFORE_LF)
|
||||
|| (j > 0 && breaks[j - 1] == UC_BREAK_MANDATORY))
|
||||
== breaks_expected[j];
|
||||
}
|
||||
}
|
||||
if (!matches)
|
||||
{
|
||||
int j;
|
||||
|
||||
fprintf (stderr, "%s:%d: expected: ", filename, lineno);
|
||||
for (j = 0; j < i - 1; j++)
|
||||
fprintf (stderr, "%s U+%04X ",
|
||||
breaks_expected[j] == 1 ? "\303\267" : "\303\227",
|
||||
input[j]);
|
||||
fprintf (stderr, "\n");
|
||||
fprintf (stderr, "%s:%d: actual: ", filename, lineno);
|
||||
for (j = 0; j < i - 1; j++)
|
||||
fprintf (stderr, "%s U+%04X ",
|
||||
(!(breaks[j] == UC_BREAK_PROHIBITED
|
||||
|| breaks[j] == UC_BREAK_MANDATORY
|
||||
|| breaks[j] == UC_BREAK_CR_BEFORE_LF)
|
||||
|| (j > 0 && breaks[j - 1] == UC_BREAK_MANDATORY))
|
||||
? "\303\267" : "\303\227",
|
||||
input[j]);
|
||||
fprintf (stderr, "\n");
|
||||
exit_code = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return exit_code;
|
||||
}
|
||||
3
tests/unilbrk/test-uc-possible-linebreaks.sh
Executable file
3
tests/unilbrk/test-uc-possible-linebreaks.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
${CHECKER} ./test-uc-possible-linebreaks${EXEEXT} "${srcdir}/unilbrk/LineBreakTest.txt"
|
||||
Reference in New Issue
Block a user