gnunet-android

GNUnet for Android
Log | Files | Refs | README

unicase.h (20860B)


      1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
      2 /* Unicode character case mappings.
      3    Copyright (C) 2002, 2009-2024 Free Software Foundation, Inc.
      4 
      5    This file is free software: you can redistribute it and/or modify
      6    it under the terms of the GNU Lesser General Public License as
      7    published by the Free Software Foundation; either version 2.1 of the
      8    License, or (at your option) any later version.
      9 
     10    This file is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU Lesser General Public License for more details.
     14 
     15    You should have received a copy of the GNU Lesser General Public License
     16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     17 
     18 #ifndef _UNICASE_H
     19 #define _UNICASE_H
     20 
     21 #include "unitypes.h"
     22 
     23 /* Get bool.  */
     24 #include <unistring/stdbool.h>
     25 
     26 /* Get size_t.  */
     27 #include <stddef.h>
     28 
     29 /* Get uninorm_t.  */
     30 #include "uninorm.h"
     31 
     32 #if 1
     33 # include <unistring/woe32dll.h>
     34 #else
     35 # define LIBUNISTRING_DLL_VARIABLE
     36 #endif
     37 
     38 #ifdef __cplusplus
     39 extern "C" {
     40 #endif
     41 
     42 /* ========================================================================= */
     43 
     44 /* Character case mappings.
     45    These mappings are locale and context independent.
     46    WARNING! These functions are not sufficient for languages such as German.
     47    Better use the functions below that treat an entire string at once and are
     48    language aware.  */
     49 
     50 /* Return the uppercase mapping of a Unicode character.  */
     51 extern ucs4_t
     52        uc_toupper (ucs4_t uc)
     53        _UC_ATTRIBUTE_CONST;
     54 
     55 /* Return the lowercase mapping of a Unicode character.  */
     56 extern ucs4_t
     57        uc_tolower (ucs4_t uc)
     58        _UC_ATTRIBUTE_CONST;
     59 
     60 /* Return the titlecase mapping of a Unicode character.  */
     61 extern ucs4_t
     62        uc_totitle (ucs4_t uc)
     63        _UC_ATTRIBUTE_CONST;
     64 
     65 /* ========================================================================= */
     66 
     67 /* String case mappings.  */
     68 
     69 /* These functions are locale dependent.  The iso639_language argument
     70    identifies the language (e.g. "tr" for Turkish).  NULL means to use
     71    locale independent case mappings.  */
     72 
     73 /* Return the ISO 639 language code of the current locale.
     74    Return "" if it is unknown, or in the "C" locale.  */
     75 extern const char *
     76        uc_locale_language (void)
     77        _UC_ATTRIBUTE_PURE;
     78 
     79 /* Conventions:
     80 
     81    All functions prefixed with u8_ operate on UTF-8 encoded strings.
     82    Their unit is an uint8_t (1 byte).
     83 
     84    All functions prefixed with u16_ operate on UTF-16 encoded strings.
     85    Their unit is an uint16_t (a 2-byte word).
     86 
     87    All functions prefixed with u32_ operate on UCS-4 encoded strings.
     88    Their unit is an uint32_t (a 4-byte word).
     89 
     90    All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
     91    n units.
     92 
     93    Functions returning a string result take a (resultbuf, lengthp) argument
     94    pair.  If resultbuf is not NULL and the result fits into *lengthp units,
     95    it is put in resultbuf, and resultbuf is returned.  Otherwise, a freshly
     96    allocated string is returned.  In both cases, *lengthp is set to the
     97    length (number of units) of the returned string.  In case of error,
     98    NULL is returned and errno is set.  */
     99 
    100 /* Return the uppercase mapping of a string.
    101    The nf argument identifies the normalization form to apply after the
    102    case-mapping.  It can also be NULL, for no normalization.  */
    103 extern uint8_t *
    104        u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
    105                    uninorm_t nf,
    106                    uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    107 extern uint16_t *
    108        u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
    109                     uninorm_t nf,
    110                     uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    111 extern uint32_t *
    112        u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
    113                     uninorm_t nf,
    114                     uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    115 
    116 /* Return the lowercase mapping of a string.
    117    The nf argument identifies the normalization form to apply after the
    118    case-mapping.  It can also be NULL, for no normalization.  */
    119 extern uint8_t *
    120        u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
    121                    uninorm_t nf,
    122                    uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    123 extern uint16_t *
    124        u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
    125                     uninorm_t nf,
    126                     uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    127 extern uint32_t *
    128        u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
    129                     uninorm_t nf,
    130                     uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    131 
    132 /* Return the titlecase mapping of a string.
    133    The nf argument identifies the normalization form to apply after the
    134    case-mapping.  It can also be NULL, for no normalization.  */
    135 extern uint8_t *
    136        u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
    137                    uninorm_t nf,
    138                    uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    139 extern uint16_t *
    140        u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
    141                     uninorm_t nf,
    142                     uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    143 extern uint32_t *
    144        u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
    145                     uninorm_t nf,
    146                     uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    147 
    148 /* The case-mapping context given by a prefix string.  */
    149 typedef struct casing_prefix_context
    150         {
    151           /* These fields are private, undocumented.  */
    152           uint32_t last_char_except_ignorable;
    153           uint32_t last_char_normal_or_above;
    154         }
    155         casing_prefix_context_t;
    156 /* The case-mapping context of the empty prefix string.  */
    157 extern LIBUNISTRING_DLL_VARIABLE const casing_prefix_context_t unicase_empty_prefix_context;
    158 /* Return the case-mapping context of a given prefix string.  */
    159 extern casing_prefix_context_t
    160        u8_casing_prefix_context (const uint8_t *s, size_t n);
    161 extern casing_prefix_context_t
    162        u16_casing_prefix_context (const uint16_t *s, size_t n);
    163 extern casing_prefix_context_t
    164        u32_casing_prefix_context (const uint32_t *s, size_t n);
    165 /* Return the case-mapping context of the prefix concat(A, S), given the
    166    case-mapping context of the prefix A.  */
    167 extern casing_prefix_context_t
    168        u8_casing_prefixes_context (const uint8_t *s, size_t n,
    169                                    casing_prefix_context_t a_context);
    170 extern casing_prefix_context_t
    171        u16_casing_prefixes_context (const uint16_t *s, size_t n,
    172                                     casing_prefix_context_t a_context);
    173 extern casing_prefix_context_t
    174        u32_casing_prefixes_context (const uint32_t *s, size_t n,
    175                                     casing_prefix_context_t a_context);
    176 
    177 /* The case-mapping context given by a suffix string.  */
    178 typedef struct casing_suffix_context
    179         {
    180           /* These fields are private, undocumented.  */
    181           uint32_t first_char_except_ignorable;
    182           uint32_t bits;
    183         }
    184         casing_suffix_context_t;
    185 /* The case-mapping context of the empty suffix string.  */
    186 extern LIBUNISTRING_DLL_VARIABLE const casing_suffix_context_t unicase_empty_suffix_context;
    187 /* Return the case-mapping context of a given suffix string.  */
    188 extern casing_suffix_context_t
    189        u8_casing_suffix_context (const uint8_t *s, size_t n);
    190 extern casing_suffix_context_t
    191        u16_casing_suffix_context (const uint16_t *s, size_t n);
    192 extern casing_suffix_context_t
    193        u32_casing_suffix_context (const uint32_t *s, size_t n);
    194 /* Return the case-mapping context of the suffix concat(S, A), given the
    195    case-mapping context of the suffix A.  */
    196 extern casing_suffix_context_t
    197        u8_casing_suffixes_context (const uint8_t *s, size_t n,
    198                                    casing_suffix_context_t a_context);
    199 extern casing_suffix_context_t
    200        u16_casing_suffixes_context (const uint16_t *s, size_t n,
    201                                     casing_suffix_context_t a_context);
    202 extern casing_suffix_context_t
    203        u32_casing_suffixes_context (const uint32_t *s, size_t n,
    204                                     casing_suffix_context_t a_context);
    205 
    206 /* Return the uppercase mapping of a string that is surrounded by a prefix
    207    and a suffix.  */
    208 extern uint8_t *
    209        u8_ct_toupper (const uint8_t *s, size_t n,
    210                       casing_prefix_context_t prefix_context,
    211                       casing_suffix_context_t suffix_context,
    212                       const char *iso639_language,
    213                       uninorm_t nf,
    214                       uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    215 extern uint16_t *
    216        u16_ct_toupper (const uint16_t *s, size_t n,
    217                       casing_prefix_context_t prefix_context,
    218                       casing_suffix_context_t suffix_context,
    219                       const char *iso639_language,
    220                       uninorm_t nf,
    221                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    222 extern uint32_t *
    223        u32_ct_toupper (const uint32_t *s, size_t n,
    224                       casing_prefix_context_t prefix_context,
    225                       casing_suffix_context_t suffix_context,
    226                       const char *iso639_language,
    227                       uninorm_t nf,
    228                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    229 
    230 /* Return the lowercase mapping of a string that is surrounded by a prefix
    231    and a suffix.  */
    232 extern uint8_t *
    233        u8_ct_tolower (const uint8_t *s, size_t n,
    234                       casing_prefix_context_t prefix_context,
    235                       casing_suffix_context_t suffix_context,
    236                       const char *iso639_language,
    237                       uninorm_t nf,
    238                       uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    239 extern uint16_t *
    240        u16_ct_tolower (const uint16_t *s, size_t n,
    241                       casing_prefix_context_t prefix_context,
    242                       casing_suffix_context_t suffix_context,
    243                       const char *iso639_language,
    244                       uninorm_t nf,
    245                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    246 extern uint32_t *
    247        u32_ct_tolower (const uint32_t *s, size_t n,
    248                       casing_prefix_context_t prefix_context,
    249                       casing_suffix_context_t suffix_context,
    250                       const char *iso639_language,
    251                       uninorm_t nf,
    252                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    253 
    254 /* Return the titlecase mapping of a string that is surrounded by a prefix
    255    and a suffix.  */
    256 extern uint8_t *
    257        u8_ct_totitle (const uint8_t *s, size_t n,
    258                       casing_prefix_context_t prefix_context,
    259                       casing_suffix_context_t suffix_context,
    260                       const char *iso639_language,
    261                       uninorm_t nf,
    262                       uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    263 extern uint16_t *
    264        u16_ct_totitle (const uint16_t *s, size_t n,
    265                       casing_prefix_context_t prefix_context,
    266                       casing_suffix_context_t suffix_context,
    267                       const char *iso639_language,
    268                       uninorm_t nf,
    269                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    270 extern uint32_t *
    271        u32_ct_totitle (const uint32_t *s, size_t n,
    272                       casing_prefix_context_t prefix_context,
    273                       casing_suffix_context_t suffix_context,
    274                       const char *iso639_language,
    275                       uninorm_t nf,
    276                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    277 
    278 /* Return the case folded string.
    279    Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
    280    to comparing S1 and S2 with uN_casecmp().
    281    The nf argument identifies the normalization form to apply after the
    282    case-mapping.  It can also be NULL, for no normalization.  */
    283 extern uint8_t *
    284        u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
    285                     uninorm_t nf,
    286                     uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    287 extern uint16_t *
    288        u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
    289                      uninorm_t nf,
    290                      uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    291 extern uint32_t *
    292        u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
    293                      uninorm_t nf,
    294                      uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    295 /* Likewise, for a string that is surrounded by a prefix and a suffix.  */
    296 extern uint8_t *
    297        u8_ct_casefold (const uint8_t *s, size_t n,
    298                        casing_prefix_context_t prefix_context,
    299                        casing_suffix_context_t suffix_context,
    300                        const char *iso639_language,
    301                        uninorm_t nf,
    302                        uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    303 extern uint16_t *
    304        u16_ct_casefold (const uint16_t *s, size_t n,
    305                         casing_prefix_context_t prefix_context,
    306                         casing_suffix_context_t suffix_context,
    307                         const char *iso639_language,
    308                         uninorm_t nf,
    309                         uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    310 extern uint32_t *
    311        u32_ct_casefold (const uint32_t *s, size_t n,
    312                         casing_prefix_context_t prefix_context,
    313                         casing_suffix_context_t suffix_context,
    314                         const char *iso639_language,
    315                         uninorm_t nf,
    316                         uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
    317 
    318 /* Compare S1 and S2, ignoring differences in case and normalization.
    319    The nf argument identifies the normalization form to apply after the
    320    case-mapping.  It can also be NULL, for no normalization.
    321    If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
    322    return 0.  Upon failure, return -1 with errno set.  */
    323 extern int
    324        u8_casecmp (const uint8_t *s1, size_t n1,
    325                    const uint8_t *s2, size_t n2,
    326                    const char *iso639_language, uninorm_t nf, int *resultp);
    327 extern int
    328        u16_casecmp (const uint16_t *s1, size_t n1,
    329                     const uint16_t *s2, size_t n2,
    330                     const char *iso639_language, uninorm_t nf, int *resultp);
    331 extern int
    332        u32_casecmp (const uint32_t *s1, size_t n1,
    333                     const uint32_t *s2, size_t n2,
    334                     const char *iso639_language, uninorm_t nf, int *resultp);
    335 extern int
    336        ulc_casecmp (const char *s1, size_t n1,
    337                     const char *s2, size_t n2,
    338                     const char *iso639_language, uninorm_t nf, int *resultp);
    339 
    340 /* Convert the string S of length N to a NUL-terminated byte sequence, in such
    341    a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
    342    function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
    343    NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization.  */
    344 extern char *
    345        u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
    346                     uninorm_t nf,
    347                     char *_UC_RESTRICT resultbuf, size_t *lengthp);
    348 extern char *
    349        u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
    350                      uninorm_t nf,
    351                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
    352 extern char *
    353        u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
    354                      uninorm_t nf,
    355                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
    356 extern char *
    357        ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
    358                      uninorm_t nf,
    359                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
    360 
    361 /* Compare S1 and S2, ignoring differences in case and normalization, using the
    362    collation rules of the current locale.
    363    The nf argument identifies the normalization form to apply after the
    364    case-mapping.  It must be either UNINORM_NFC or UNINORM_NFKC.  It can also
    365    be NULL, for no normalization.
    366    If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
    367    return 0.  Upon failure, return -1 with errno set.  */
    368 extern int
    369        u8_casecoll (const uint8_t *s1, size_t n1,
    370                     const uint8_t *s2, size_t n2,
    371                     const char *iso639_language, uninorm_t nf, int *resultp);
    372 extern int
    373        u16_casecoll (const uint16_t *s1, size_t n1,
    374                      const uint16_t *s2, size_t n2,
    375                      const char *iso639_language, uninorm_t nf, int *resultp);
    376 extern int
    377        u32_casecoll (const uint32_t *s1, size_t n1,
    378                      const uint32_t *s2, size_t n2,
    379                      const char *iso639_language, uninorm_t nf, int *resultp);
    380 extern int
    381        ulc_casecoll (const char *s1, size_t n1,
    382                      const char *s2, size_t n2,
    383                      const char *iso639_language, uninorm_t nf, int *resultp);
    384 
    385 
    386 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
    387    otherwise, and return 0.  Upon failure, return -1 with errno set.  */
    388 extern int
    389        u8_is_uppercase (const uint8_t *s, size_t n,
    390                         const char *iso639_language,
    391                         bool *resultp);
    392 extern int
    393        u16_is_uppercase (const uint16_t *s, size_t n,
    394                          const char *iso639_language,
    395                          bool *resultp);
    396 extern int
    397        u32_is_uppercase (const uint32_t *s, size_t n,
    398                          const char *iso639_language,
    399                          bool *resultp);
    400 
    401 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
    402    otherwise, and return 0.  Upon failure, return -1 with errno set.  */
    403 extern int
    404        u8_is_lowercase (const uint8_t *s, size_t n,
    405                         const char *iso639_language,
    406                         bool *resultp);
    407 extern int
    408        u16_is_lowercase (const uint16_t *s, size_t n,
    409                          const char *iso639_language,
    410                          bool *resultp);
    411 extern int
    412        u32_is_lowercase (const uint32_t *s, size_t n,
    413                          const char *iso639_language,
    414                          bool *resultp);
    415 
    416 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
    417    otherwise, and return 0.  Upon failure, return -1 with errno set.  */
    418 extern int
    419        u8_is_titlecase (const uint8_t *s, size_t n,
    420                         const char *iso639_language,
    421                         bool *resultp);
    422 extern int
    423        u16_is_titlecase (const uint16_t *s, size_t n,
    424                          const char *iso639_language,
    425                          bool *resultp);
    426 extern int
    427        u32_is_titlecase (const uint32_t *s, size_t n,
    428                          const char *iso639_language,
    429                          bool *resultp);
    430 
    431 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
    432    false otherwise, and return 0.  Upon failure, return -1 with errno set.  */
    433 extern int
    434        u8_is_casefolded (const uint8_t *s, size_t n,
    435                          const char *iso639_language,
    436                          bool *resultp);
    437 extern int
    438        u16_is_casefolded (const uint16_t *s, size_t n,
    439                           const char *iso639_language,
    440                           bool *resultp);
    441 extern int
    442        u32_is_casefolded (const uint32_t *s, size_t n,
    443                           const char *iso639_language,
    444                           bool *resultp);
    445 
    446 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
    447    either upper case or lower case or title case is not a no-op.
    448    Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
    449    under the lower case mapping, and under the title case mapping; in other
    450    words, when NFD(S) consists entirely of caseless characters.
    451    Upon failure, return -1 with errno set.  */
    452 extern int
    453        u8_is_cased (const uint8_t *s, size_t n,
    454                     const char *iso639_language,
    455                     bool *resultp);
    456 extern int
    457        u16_is_cased (const uint16_t *s, size_t n,
    458                      const char *iso639_language,
    459                      bool *resultp);
    460 extern int
    461        u32_is_cased (const uint32_t *s, size_t n,
    462                      const char *iso639_language,
    463                      bool *resultp);
    464 
    465 
    466 /* ========================================================================= */
    467 
    468 #ifdef __cplusplus
    469 }
    470 #endif
    471 
    472 #endif /* _UNICASE_H */