gnunet-android

GNUnet for Android
Log | Files | Refs | README

unigbrk.h (5526B)


      1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
      2 /* Grapheme cluster breaks in Unicode strings.
      3    Copyright (C) 2010-2024 Free Software Foundation, Inc.
      4    Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
      5 
      6    This file is free software.
      7    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
      8    You can redistribute it and/or modify it under either
      9      - the terms of the GNU Lesser General Public License as published
     10        by the Free Software Foundation, either version 3, or (at your
     11        option) any later version, or
     12      - the terms of the GNU General Public License as published by the
     13        Free Software Foundation; either version 2, or (at your option)
     14        any later version, or
     15      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
     16 
     17    This file is distributed in the hope that it will be useful,
     18    but WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    Lesser General Public License and the GNU General Public License
     21    for more details.
     22 
     23    You should have received a copy of the GNU Lesser General Public
     24    License and of the GNU General Public License along with this
     25    program.  If not, see <https://www.gnu.org/licenses/>.  */
     26 
     27 #ifndef _UNIGBRK_H
     28 #define _UNIGBRK_H
     29 
     30 /* Get bool.  */
     31 #include <unistring/stdbool.h>
     32 
     33 /* Get size_t. */
     34 #include <stddef.h>
     35 
     36 #include "unitypes.h"
     37 
     38 #ifdef __cplusplus
     39 extern "C" {
     40 #endif
     41 
     42 /* ========================================================================= */
     43 
     44 /* Property defined in Unicode Standard Annex #29, section "Grapheme Cluster
     45    Boundaries"
     46    <https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries> */
     47 
     48 /* Possible values of the Grapheme_Cluster_Break property.
     49    This enumeration may be extended in the future.  */
     50 enum
     51 {
     52   GBP_OTHER        = 0,
     53   GBP_CR           = 1,
     54   GBP_LF           = 2,
     55   GBP_CONTROL      = 3,
     56   GBP_EXTEND       = 4,
     57   GBP_PREPEND      = 5,
     58   GBP_SPACINGMARK  = 6,
     59   GBP_L            = 7,
     60   GBP_V            = 8,
     61   GBP_T            = 9,
     62   GBP_LV           = 10,
     63   GBP_LVT          = 11,
     64   GBP_RI           = 12,
     65   GBP_ZWJ          = 13,
     66   GBP_EB           = 14, /* obsolete */
     67   GBP_EM           = 15, /* obsolete */
     68   GBP_GAZ          = 16, /* obsolete */
     69   GBP_EBG          = 17  /* obsolete */
     70 };
     71 
     72 /* Return the Grapheme_Cluster_Break property of a Unicode character. */
     73 extern int
     74        uc_graphemeclusterbreak_property (ucs4_t uc)
     75        _UC_ATTRIBUTE_CONST;
     76 
     77 /* ========================================================================= */
     78 
     79 /* Grapheme cluster breaks.  */
     80 
     81 /* Returns true if there is a grapheme cluster boundary between Unicode code
     82    points A and B.  A "grapheme cluster" is an approximation to a
     83    user-perceived character, which sometimes corresponds to multiple code
     84    points.  For example, an English letter followed by an acute accent can be
     85    expressed as two consecutive Unicode code points, but it is perceived by the
     86    user as only a single character and therefore constitutes a single grapheme
     87    cluster.
     88 
     89    Implements extended (not legacy) grapheme cluster rules, because UAX #29
     90    indicates that they are preferred.
     91 
     92    Note: This function does not work right with syllables in Indic scripts or
     93    emojis, because it does not look at the characters before A and after B.
     94 
     95    Use A == 0 or B == 0 to indicate start of text or end of text,
     96    respectively. */
     97 extern bool
     98        uc_is_grapheme_break (ucs4_t a, ucs4_t b)
     99        _UC_ATTRIBUTE_CONST;
    100 
    101 /* Returns the start of the next grapheme cluster following S, or NULL if the
    102    end of the string has been reached.
    103    Note: These functions do not work right with syllables in Indic scripts or
    104    emojis, because they do not consider the characters before S. */
    105 extern const uint8_t *
    106        u8_grapheme_next (const uint8_t *s, const uint8_t *end)
    107        _UC_ATTRIBUTE_PURE;
    108 extern const uint16_t *
    109        u16_grapheme_next (const uint16_t *s, const uint16_t *end)
    110        _UC_ATTRIBUTE_PURE;
    111 extern const uint32_t *
    112        u32_grapheme_next (const uint32_t *s, const uint32_t *end)
    113        _UC_ATTRIBUTE_PURE;
    114 
    115 /* Returns the start of the previous grapheme cluster before S, or NULL if the
    116    start of the string has been reached.
    117    Note: These functions do not work right with syllables in Indic scripts or
    118    emojis, because they do not consider the characters at or after S. */
    119 extern const uint8_t *
    120        u8_grapheme_prev (const uint8_t *s, const uint8_t *start)
    121        _UC_ATTRIBUTE_PURE;
    122 extern const uint16_t *
    123        u16_grapheme_prev (const uint16_t *s, const uint16_t *start)
    124        _UC_ATTRIBUTE_PURE;
    125 extern const uint32_t *
    126        u32_grapheme_prev (const uint32_t *s, const uint32_t *start)
    127        _UC_ATTRIBUTE_PURE;
    128 
    129 /* Determine the grapheme cluster boundaries in S, and store the result at
    130    p[0..n-1].  p[i] = 1 means that a new grapheme cluster begins at s[i].  p[i]
    131    = 0 means that s[i-1] and s[i] are part of the same grapheme cluster.  p[0]
    132    will always be 1.
    133  */
    134 extern void
    135        u8_grapheme_breaks (const uint8_t *s, size_t n, char *p);
    136 extern void
    137        u16_grapheme_breaks (const uint16_t *s, size_t n, char *p);
    138 extern void
    139        u32_grapheme_breaks (const uint32_t *s, size_t n, char *p);
    140 extern void
    141        ulc_grapheme_breaks (const char *s, size_t n, char *p);
    142 extern void
    143        uc_grapheme_breaks (const ucs4_t *s, size_t n, char *p);
    144 
    145 /* ========================================================================= */
    146 
    147 #ifdef __cplusplus
    148 }
    149 #endif
    150 
    151 
    152 #endif /* _UNIGBRK_H */