aboutsummaryrefslogtreecommitdiff
path: root/pd/src/s_utf8.h
blob: 56c40d480b250924d95ed6abe150ac08b41cdcea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#ifndef S_UTF8_H
#define S_UTF8_H

/*--moo--*/
#ifndef u_int32_t
# define u_int32_t unsigned int
#endif

#ifndef UCS4
# define UCS4 u_int32_t
#endif

/* UTF8_SUPPORT_FULL_UCS4
 *  define this to support the full potential range of UCS-4 codepoints
 *  (in anticipation of a future UTF-8 standard)
 */
/*#define UTF8_SUPPORT_FULL_UCS4 1*/
#undef UTF8_SUPPORT_FULL_UCS4

/* UTF8_MAXBYTES
 *   maximum number of bytes required to represent a single character in UTF-8
 *
 * UTF8_MAXBYTES1 = UTF8_MAXBYTES+1 
 *  maximum bytes per character including NUL terminator
 */
#ifdef UTF8_SUPPORT_FULL_UCS4
# ifndef UTF8_MAXBYTES
#  define UTF8_MAXBYTES  6
# endif
# ifndef UTF8_MAXBYTES1
#  define UTF8_MAXBYTES1 7
# endif
#else
# ifndef UTF8_MAXBYTES
#  define UTF8_MAXBYTES  4
# endif
# ifndef UTF8_MAXBYTES1
#  define UTF8_MAXBYTES1 5
# endif
#endif
/*--/moo--*/

/* is c the start of a utf8 sequence? */
#define isutf(c) (((c)&0xC0)!=0x80)

/* convert UTF-8 data to wide character */
int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz);

/* the opposite conversion */
int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz);

/* moo: get byte length of character number, or 0 if not supported */
int u8_wc_nbytes(u_int32_t ch);

/* moo: compute required storage for UTF-8 encoding of 's[0..n-1]' */
int u8_wcs_nbytes(u_int32_t *ucs, int size);

/* single character to UTF-8, no NUL termination */
int u8_wc_toutf8(char *dest, u_int32_t ch);

/* moo: single character to UTF-8, with NUL termination */
int u8_wc_toutf8_nul(char *dest, u_int32_t ch);

/* character number to byte offset */
int u8_offset(char *str, int charnum);

/* byte offset to character number */
int u8_charnum(char *s, int offset);

/* return next character, updating an index variable */
u_int32_t u8_nextchar(char *s, int *i);

/* move to next character */
void u8_inc(char *s, int *i);

/* move to previous character */
void u8_dec(char *s, int *i);

/* moo: move pointer to next character */
void u8_inc_ptr(char **sp);

/* moo: move pointer to previous character */
void u8_dec_ptr(char **sp);

/* returns length of next utf-8 sequence */
int u8_seqlen(char *s);

#endif /* S_UTF8_H */