1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
#ifndef S_UTF8_H
#define S_UTF8_H
/*--moo--*/
#ifndef u_int32_t
# define u_int32_t unsigned int
#endif
#ifndef UCS4
# define UCS4 u_int32_t
#endif
/* UTF8_SUPPORT_FULL_UCS4
* define this to support the full potential range of UCS-4 codepoints
* (in anticipation of a future UTF-8 standard)
*/
/*#define UTF8_SUPPORT_FULL_UCS4 1*/
#undef UTF8_SUPPORT_FULL_UCS4
/* UTF8_MAXBYTES
* maximum number of bytes required to represent a single character in UTF-8
*
* UTF8_MAXBYTES1 = UTF8_MAXBYTES+1
* maximum bytes per character including NUL terminator
*/
#ifdef UTF8_SUPPORT_FULL_UCS4
# ifndef UTF8_MAXBYTES
# define UTF8_MAXBYTES 6
# endif
# ifndef UTF8_MAXBYTES1
# define UTF8_MAXBYTES1 7
# endif
#else
# ifndef UTF8_MAXBYTES
# define UTF8_MAXBYTES 4
# endif
# ifndef UTF8_MAXBYTES1
# define UTF8_MAXBYTES1 5
# endif
#endif
/*--/moo--*/
/* is c the start of a utf8 sequence? */
#define isutf(c) (((c)&0xC0)!=0x80)
/* convert UTF-8 data to wide character */
int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz);
/* the opposite conversion */
int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz);
/* moo: get byte length of character number, or 0 if not supported */
int u8_wc_nbytes(u_int32_t ch);
/* moo: compute required storage for UTF-8 encoding of 's[0..n-1]' */
int u8_wcs_nbytes(u_int32_t *ucs, int size);
/* single character to UTF-8, no NUL termination */
int u8_wc_toutf8(char *dest, u_int32_t ch);
/* moo: single character to UTF-8, with NUL termination */
int u8_wc_toutf8_nul(char *dest, u_int32_t ch);
/* character number to byte offset */
int u8_offset(char *str, int charnum);
/* byte offset to character number */
int u8_charnum(char *s, int offset);
/* return next character, updating an index variable */
u_int32_t u8_nextchar(char *s, int *i);
/* move to next character */
void u8_inc(char *s, int *i);
/* move to previous character */
void u8_dec(char *s, int *i);
/* moo: move pointer to next character */
void u8_inc_ptr(char **sp);
/* moo: move pointer to previous character */
void u8_dec_ptr(char **sp);
/* returns length of next utf-8 sequence */
int u8_seqlen(char *s);
#endif /* S_UTF8_H */
|