unicase.h (20860B)
1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 2 /* Unicode character case mappings. 3 Copyright (C) 2002, 2009-2024 Free Software Foundation, Inc. 4 5 This file is free software: you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as 7 published by the Free Software Foundation; either version 2.1 of the 8 License, or (at your option) any later version. 9 10 This file is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 17 18 #ifndef _UNICASE_H 19 #define _UNICASE_H 20 21 #include "unitypes.h" 22 23 /* Get bool. */ 24 #include <unistring/stdbool.h> 25 26 /* Get size_t. */ 27 #include <stddef.h> 28 29 /* Get uninorm_t. */ 30 #include "uninorm.h" 31 32 #if 1 33 # include <unistring/woe32dll.h> 34 #else 35 # define LIBUNISTRING_DLL_VARIABLE 36 #endif 37 38 #ifdef __cplusplus 39 extern "C" { 40 #endif 41 42 /* ========================================================================= */ 43 44 /* Character case mappings. 45 These mappings are locale and context independent. 46 WARNING! These functions are not sufficient for languages such as German. 47 Better use the functions below that treat an entire string at once and are 48 language aware. */ 49 50 /* Return the uppercase mapping of a Unicode character. */ 51 extern ucs4_t 52 uc_toupper (ucs4_t uc) 53 _UC_ATTRIBUTE_CONST; 54 55 /* Return the lowercase mapping of a Unicode character. */ 56 extern ucs4_t 57 uc_tolower (ucs4_t uc) 58 _UC_ATTRIBUTE_CONST; 59 60 /* Return the titlecase mapping of a Unicode character. */ 61 extern ucs4_t 62 uc_totitle (ucs4_t uc) 63 _UC_ATTRIBUTE_CONST; 64 65 /* ========================================================================= */ 66 67 /* String case mappings. */ 68 69 /* These functions are locale dependent. The iso639_language argument 70 identifies the language (e.g. "tr" for Turkish). NULL means to use 71 locale independent case mappings. */ 72 73 /* Return the ISO 639 language code of the current locale. 74 Return "" if it is unknown, or in the "C" locale. */ 75 extern const char * 76 uc_locale_language (void) 77 _UC_ATTRIBUTE_PURE; 78 79 /* Conventions: 80 81 All functions prefixed with u8_ operate on UTF-8 encoded strings. 82 Their unit is an uint8_t (1 byte). 83 84 All functions prefixed with u16_ operate on UTF-16 encoded strings. 85 Their unit is an uint16_t (a 2-byte word). 86 87 All functions prefixed with u32_ operate on UCS-4 encoded strings. 88 Their unit is an uint32_t (a 4-byte word). 89 90 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly 91 n units. 92 93 Functions returning a string result take a (resultbuf, lengthp) argument 94 pair. If resultbuf is not NULL and the result fits into *lengthp units, 95 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly 96 allocated string is returned. In both cases, *lengthp is set to the 97 length (number of units) of the returned string. In case of error, 98 NULL is returned and errno is set. */ 99 100 /* Return the uppercase mapping of a string. 101 The nf argument identifies the normalization form to apply after the 102 case-mapping. It can also be NULL, for no normalization. */ 103 extern uint8_t * 104 u8_toupper (const uint8_t *s, size_t n, const char *iso639_language, 105 uninorm_t nf, 106 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 107 extern uint16_t * 108 u16_toupper (const uint16_t *s, size_t n, const char *iso639_language, 109 uninorm_t nf, 110 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 111 extern uint32_t * 112 u32_toupper (const uint32_t *s, size_t n, const char *iso639_language, 113 uninorm_t nf, 114 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 115 116 /* Return the lowercase mapping of a string. 117 The nf argument identifies the normalization form to apply after the 118 case-mapping. It can also be NULL, for no normalization. */ 119 extern uint8_t * 120 u8_tolower (const uint8_t *s, size_t n, const char *iso639_language, 121 uninorm_t nf, 122 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 123 extern uint16_t * 124 u16_tolower (const uint16_t *s, size_t n, const char *iso639_language, 125 uninorm_t nf, 126 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 127 extern uint32_t * 128 u32_tolower (const uint32_t *s, size_t n, const char *iso639_language, 129 uninorm_t nf, 130 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 131 132 /* Return the titlecase mapping of a string. 133 The nf argument identifies the normalization form to apply after the 134 case-mapping. It can also be NULL, for no normalization. */ 135 extern uint8_t * 136 u8_totitle (const uint8_t *s, size_t n, const char *iso639_language, 137 uninorm_t nf, 138 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 139 extern uint16_t * 140 u16_totitle (const uint16_t *s, size_t n, const char *iso639_language, 141 uninorm_t nf, 142 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 143 extern uint32_t * 144 u32_totitle (const uint32_t *s, size_t n, const char *iso639_language, 145 uninorm_t nf, 146 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 147 148 /* The case-mapping context given by a prefix string. */ 149 typedef struct casing_prefix_context 150 { 151 /* These fields are private, undocumented. */ 152 uint32_t last_char_except_ignorable; 153 uint32_t last_char_normal_or_above; 154 } 155 casing_prefix_context_t; 156 /* The case-mapping context of the empty prefix string. */ 157 extern LIBUNISTRING_DLL_VARIABLE const casing_prefix_context_t unicase_empty_prefix_context; 158 /* Return the case-mapping context of a given prefix string. */ 159 extern casing_prefix_context_t 160 u8_casing_prefix_context (const uint8_t *s, size_t n); 161 extern casing_prefix_context_t 162 u16_casing_prefix_context (const uint16_t *s, size_t n); 163 extern casing_prefix_context_t 164 u32_casing_prefix_context (const uint32_t *s, size_t n); 165 /* Return the case-mapping context of the prefix concat(A, S), given the 166 case-mapping context of the prefix A. */ 167 extern casing_prefix_context_t 168 u8_casing_prefixes_context (const uint8_t *s, size_t n, 169 casing_prefix_context_t a_context); 170 extern casing_prefix_context_t 171 u16_casing_prefixes_context (const uint16_t *s, size_t n, 172 casing_prefix_context_t a_context); 173 extern casing_prefix_context_t 174 u32_casing_prefixes_context (const uint32_t *s, size_t n, 175 casing_prefix_context_t a_context); 176 177 /* The case-mapping context given by a suffix string. */ 178 typedef struct casing_suffix_context 179 { 180 /* These fields are private, undocumented. */ 181 uint32_t first_char_except_ignorable; 182 uint32_t bits; 183 } 184 casing_suffix_context_t; 185 /* The case-mapping context of the empty suffix string. */ 186 extern LIBUNISTRING_DLL_VARIABLE const casing_suffix_context_t unicase_empty_suffix_context; 187 /* Return the case-mapping context of a given suffix string. */ 188 extern casing_suffix_context_t 189 u8_casing_suffix_context (const uint8_t *s, size_t n); 190 extern casing_suffix_context_t 191 u16_casing_suffix_context (const uint16_t *s, size_t n); 192 extern casing_suffix_context_t 193 u32_casing_suffix_context (const uint32_t *s, size_t n); 194 /* Return the case-mapping context of the suffix concat(S, A), given the 195 case-mapping context of the suffix A. */ 196 extern casing_suffix_context_t 197 u8_casing_suffixes_context (const uint8_t *s, size_t n, 198 casing_suffix_context_t a_context); 199 extern casing_suffix_context_t 200 u16_casing_suffixes_context (const uint16_t *s, size_t n, 201 casing_suffix_context_t a_context); 202 extern casing_suffix_context_t 203 u32_casing_suffixes_context (const uint32_t *s, size_t n, 204 casing_suffix_context_t a_context); 205 206 /* Return the uppercase mapping of a string that is surrounded by a prefix 207 and a suffix. */ 208 extern uint8_t * 209 u8_ct_toupper (const uint8_t *s, size_t n, 210 casing_prefix_context_t prefix_context, 211 casing_suffix_context_t suffix_context, 212 const char *iso639_language, 213 uninorm_t nf, 214 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 215 extern uint16_t * 216 u16_ct_toupper (const uint16_t *s, size_t n, 217 casing_prefix_context_t prefix_context, 218 casing_suffix_context_t suffix_context, 219 const char *iso639_language, 220 uninorm_t nf, 221 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 222 extern uint32_t * 223 u32_ct_toupper (const uint32_t *s, size_t n, 224 casing_prefix_context_t prefix_context, 225 casing_suffix_context_t suffix_context, 226 const char *iso639_language, 227 uninorm_t nf, 228 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 229 230 /* Return the lowercase mapping of a string that is surrounded by a prefix 231 and a suffix. */ 232 extern uint8_t * 233 u8_ct_tolower (const uint8_t *s, size_t n, 234 casing_prefix_context_t prefix_context, 235 casing_suffix_context_t suffix_context, 236 const char *iso639_language, 237 uninorm_t nf, 238 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 239 extern uint16_t * 240 u16_ct_tolower (const uint16_t *s, size_t n, 241 casing_prefix_context_t prefix_context, 242 casing_suffix_context_t suffix_context, 243 const char *iso639_language, 244 uninorm_t nf, 245 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 246 extern uint32_t * 247 u32_ct_tolower (const uint32_t *s, size_t n, 248 casing_prefix_context_t prefix_context, 249 casing_suffix_context_t suffix_context, 250 const char *iso639_language, 251 uninorm_t nf, 252 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 253 254 /* Return the titlecase mapping of a string that is surrounded by a prefix 255 and a suffix. */ 256 extern uint8_t * 257 u8_ct_totitle (const uint8_t *s, size_t n, 258 casing_prefix_context_t prefix_context, 259 casing_suffix_context_t suffix_context, 260 const char *iso639_language, 261 uninorm_t nf, 262 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 263 extern uint16_t * 264 u16_ct_totitle (const uint16_t *s, size_t n, 265 casing_prefix_context_t prefix_context, 266 casing_suffix_context_t suffix_context, 267 const char *iso639_language, 268 uninorm_t nf, 269 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 270 extern uint32_t * 271 u32_ct_totitle (const uint32_t *s, size_t n, 272 casing_prefix_context_t prefix_context, 273 casing_suffix_context_t suffix_context, 274 const char *iso639_language, 275 uninorm_t nf, 276 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 277 278 /* Return the case folded string. 279 Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent 280 to comparing S1 and S2 with uN_casecmp(). 281 The nf argument identifies the normalization form to apply after the 282 case-mapping. It can also be NULL, for no normalization. */ 283 extern uint8_t * 284 u8_casefold (const uint8_t *s, size_t n, const char *iso639_language, 285 uninorm_t nf, 286 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 287 extern uint16_t * 288 u16_casefold (const uint16_t *s, size_t n, const char *iso639_language, 289 uninorm_t nf, 290 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 291 extern uint32_t * 292 u32_casefold (const uint32_t *s, size_t n, const char *iso639_language, 293 uninorm_t nf, 294 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 295 /* Likewise, for a string that is surrounded by a prefix and a suffix. */ 296 extern uint8_t * 297 u8_ct_casefold (const uint8_t *s, size_t n, 298 casing_prefix_context_t prefix_context, 299 casing_suffix_context_t suffix_context, 300 const char *iso639_language, 301 uninorm_t nf, 302 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp); 303 extern uint16_t * 304 u16_ct_casefold (const uint16_t *s, size_t n, 305 casing_prefix_context_t prefix_context, 306 casing_suffix_context_t suffix_context, 307 const char *iso639_language, 308 uninorm_t nf, 309 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp); 310 extern uint32_t * 311 u32_ct_casefold (const uint32_t *s, size_t n, 312 casing_prefix_context_t prefix_context, 313 casing_suffix_context_t suffix_context, 314 const char *iso639_language, 315 uninorm_t nf, 316 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp); 317 318 /* Compare S1 and S2, ignoring differences in case and normalization. 319 The nf argument identifies the normalization form to apply after the 320 case-mapping. It can also be NULL, for no normalization. 321 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and 322 return 0. Upon failure, return -1 with errno set. */ 323 extern int 324 u8_casecmp (const uint8_t *s1, size_t n1, 325 const uint8_t *s2, size_t n2, 326 const char *iso639_language, uninorm_t nf, int *resultp); 327 extern int 328 u16_casecmp (const uint16_t *s1, size_t n1, 329 const uint16_t *s2, size_t n2, 330 const char *iso639_language, uninorm_t nf, int *resultp); 331 extern int 332 u32_casecmp (const uint32_t *s1, size_t n1, 333 const uint32_t *s2, size_t n2, 334 const char *iso639_language, uninorm_t nf, int *resultp); 335 extern int 336 ulc_casecmp (const char *s1, size_t n1, 337 const char *s2, size_t n2, 338 const char *iso639_language, uninorm_t nf, int *resultp); 339 340 /* Convert the string S of length N to a NUL-terminated byte sequence, in such 341 a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib 342 function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll(). 343 NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */ 344 extern char * 345 u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language, 346 uninorm_t nf, 347 char *_UC_RESTRICT resultbuf, size_t *lengthp); 348 extern char * 349 u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language, 350 uninorm_t nf, 351 char *_UC_RESTRICT resultbuf, size_t *lengthp); 352 extern char * 353 u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language, 354 uninorm_t nf, 355 char *_UC_RESTRICT resultbuf, size_t *lengthp); 356 extern char * 357 ulc_casexfrm (const char *s, size_t n, const char *iso639_language, 358 uninorm_t nf, 359 char *_UC_RESTRICT resultbuf, size_t *lengthp); 360 361 /* Compare S1 and S2, ignoring differences in case and normalization, using the 362 collation rules of the current locale. 363 The nf argument identifies the normalization form to apply after the 364 case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also 365 be NULL, for no normalization. 366 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and 367 return 0. Upon failure, return -1 with errno set. */ 368 extern int 369 u8_casecoll (const uint8_t *s1, size_t n1, 370 const uint8_t *s2, size_t n2, 371 const char *iso639_language, uninorm_t nf, int *resultp); 372 extern int 373 u16_casecoll (const uint16_t *s1, size_t n1, 374 const uint16_t *s2, size_t n2, 375 const char *iso639_language, uninorm_t nf, int *resultp); 376 extern int 377 u32_casecoll (const uint32_t *s1, size_t n1, 378 const uint32_t *s2, size_t n2, 379 const char *iso639_language, uninorm_t nf, int *resultp); 380 extern int 381 ulc_casecoll (const char *s1, size_t n1, 382 const char *s2, size_t n2, 383 const char *iso639_language, uninorm_t nf, int *resultp); 384 385 386 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false 387 otherwise, and return 0. Upon failure, return -1 with errno set. */ 388 extern int 389 u8_is_uppercase (const uint8_t *s, size_t n, 390 const char *iso639_language, 391 bool *resultp); 392 extern int 393 u16_is_uppercase (const uint16_t *s, size_t n, 394 const char *iso639_language, 395 bool *resultp); 396 extern int 397 u32_is_uppercase (const uint32_t *s, size_t n, 398 const char *iso639_language, 399 bool *resultp); 400 401 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false 402 otherwise, and return 0. Upon failure, return -1 with errno set. */ 403 extern int 404 u8_is_lowercase (const uint8_t *s, size_t n, 405 const char *iso639_language, 406 bool *resultp); 407 extern int 408 u16_is_lowercase (const uint16_t *s, size_t n, 409 const char *iso639_language, 410 bool *resultp); 411 extern int 412 u32_is_lowercase (const uint32_t *s, size_t n, 413 const char *iso639_language, 414 bool *resultp); 415 416 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false 417 otherwise, and return 0. Upon failure, return -1 with errno set. */ 418 extern int 419 u8_is_titlecase (const uint8_t *s, size_t n, 420 const char *iso639_language, 421 bool *resultp); 422 extern int 423 u16_is_titlecase (const uint16_t *s, size_t n, 424 const char *iso639_language, 425 bool *resultp); 426 extern int 427 u32_is_titlecase (const uint32_t *s, size_t n, 428 const char *iso639_language, 429 bool *resultp); 430 431 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to 432 false otherwise, and return 0. Upon failure, return -1 with errno set. */ 433 extern int 434 u8_is_casefolded (const uint8_t *s, size_t n, 435 const char *iso639_language, 436 bool *resultp); 437 extern int 438 u16_is_casefolded (const uint16_t *s, size_t n, 439 const char *iso639_language, 440 bool *resultp); 441 extern int 442 u32_is_casefolded (const uint32_t *s, size_t n, 443 const char *iso639_language, 444 bool *resultp); 445 446 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to 447 either upper case or lower case or title case is not a no-op. 448 Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping, 449 under the lower case mapping, and under the title case mapping; in other 450 words, when NFD(S) consists entirely of caseless characters. 451 Upon failure, return -1 with errno set. */ 452 extern int 453 u8_is_cased (const uint8_t *s, size_t n, 454 const char *iso639_language, 455 bool *resultp); 456 extern int 457 u16_is_cased (const uint16_t *s, size_t n, 458 const char *iso639_language, 459 bool *resultp); 460 extern int 461 u32_is_cased (const uint32_t *s, size_t n, 462 const char *iso639_language, 463 bool *resultp); 464 465 466 /* ========================================================================= */ 467 468 #ifdef __cplusplus 469 } 470 #endif 471 472 #endif /* _UNICASE_H */