uniwbrk.h (3257B)
1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 2 /* Word breaks in Unicode strings. 3 Copyright (C) 2001-2003, 2005-2024 Free Software Foundation, Inc. 4 Written by Bruno Haible <bruno@clisp.org>, 2009. 5 6 This file is free software. 7 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 8 You can redistribute it and/or modify it under either 9 - the terms of the GNU Lesser General Public License as published 10 by the Free Software Foundation, either version 3, or (at your 11 option) any later version, or 12 - the terms of the GNU General Public License as published by the 13 Free Software Foundation; either version 2, or (at your option) 14 any later version, or 15 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 16 17 This file is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 Lesser General Public License and the GNU General Public License 21 for more details. 22 23 You should have received a copy of the GNU Lesser General Public 24 License and of the GNU General Public License along with this 25 program. If not, see <https://www.gnu.org/licenses/>. */ 26 27 #ifndef _UNIWBRK_H 28 #define _UNIWBRK_H 29 30 /* Get size_t. */ 31 #include <stddef.h> 32 33 #include "unitypes.h" 34 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 /* ========================================================================= */ 41 42 /* Property defined in Unicode Standard Annex #29, section "Word Boundaries" 43 <https://www.unicode.org/reports/tr29/#Word_Boundaries> */ 44 45 /* Possible values of the Word_Break property. 46 This enumeration may be extended in the future. */ 47 enum 48 { 49 WBP_OTHER = 0, 50 WBP_CR = 11, 51 WBP_LF = 12, 52 WBP_NEWLINE = 10, 53 WBP_EXTEND = 8, 54 WBP_FORMAT = 9, 55 WBP_KATAKANA = 1, 56 WBP_ALETTER = 2, 57 WBP_MIDNUMLET = 3, 58 WBP_MIDLETTER = 4, 59 WBP_MIDNUM = 5, 60 WBP_NUMERIC = 6, 61 WBP_EXTENDNUMLET = 7, 62 WBP_RI = 13, 63 WBP_DQ = 14, 64 WBP_SQ = 15, 65 WBP_HL = 16, 66 WBP_ZWJ = 17, 67 WBP_EB = 18, /* obsolete */ 68 WBP_EM = 19, /* obsolete */ 69 WBP_GAZ = 20, /* obsolete */ 70 WBP_EBG = 21, /* obsolete */ 71 WBP_WSS = 22 72 }; 73 74 /* Return the Word_Break property of a Unicode character. */ 75 extern int 76 uc_wordbreak_property (ucs4_t uc) 77 _UC_ATTRIBUTE_CONST; 78 79 /* ========================================================================= */ 80 81 /* Word breaks. */ 82 83 /* Determine the word break points in S, and store the result at p[0..n-1]. 84 p[i] = 1 means that there is a word boundary between s[i-1] and s[i]. 85 p[i] = 0 means that s[i-1] and s[i] must not be separated. 86 */ 87 extern void 88 u8_wordbreaks (const uint8_t *s, size_t n, char *p); 89 extern void 90 u16_wordbreaks (const uint16_t *s, size_t n, char *p); 91 extern void 92 u32_wordbreaks (const uint32_t *s, size_t n, char *p); 93 extern void 94 ulc_wordbreaks (const char *s, size_t n, char *_UC_RESTRICT p); 95 96 /* ========================================================================= */ 97 98 #ifdef __cplusplus 99 } 100 #endif 101 102 103 #endif /* _UNIWBRK_H */