diff options
Diffstat (limited to 'pd/src/s_utf8.h')
-rw-r--r-- | pd/src/s_utf8.h | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/pd/src/s_utf8.h b/pd/src/s_utf8.h new file mode 100644 index 00000000..56c40d48 --- /dev/null +++ b/pd/src/s_utf8.h @@ -0,0 +1,88 @@ +#ifndef S_UTF8_H +#define S_UTF8_H + +/*--moo--*/ +#ifndef u_int32_t +# define u_int32_t unsigned int +#endif + +#ifndef UCS4 +# define UCS4 u_int32_t +#endif + +/* UTF8_SUPPORT_FULL_UCS4 + * define this to support the full potential range of UCS-4 codepoints + * (in anticipation of a future UTF-8 standard) + */ +/*#define UTF8_SUPPORT_FULL_UCS4 1*/ +#undef UTF8_SUPPORT_FULL_UCS4 + +/* UTF8_MAXBYTES + * maximum number of bytes required to represent a single character in UTF-8 + * + * UTF8_MAXBYTES1 = UTF8_MAXBYTES+1 + * maximum bytes per character including NUL terminator + */ +#ifdef UTF8_SUPPORT_FULL_UCS4 +# ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES 6 +# endif +# ifndef UTF8_MAXBYTES1 +# define UTF8_MAXBYTES1 7 +# endif +#else +# ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES 4 +# endif +# ifndef UTF8_MAXBYTES1 +# define UTF8_MAXBYTES1 5 +# endif +#endif +/*--/moo--*/ + +/* is c the start of a utf8 sequence? */ +#define isutf(c) (((c)&0xC0)!=0x80) + +/* convert UTF-8 data to wide character */ +int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz); + +/* the opposite conversion */ +int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz); + +/* moo: get byte length of character number, or 0 if not supported */ +int u8_wc_nbytes(u_int32_t ch); + +/* moo: compute required storage for UTF-8 encoding of 's[0..n-1]' */ +int u8_wcs_nbytes(u_int32_t *ucs, int size); + +/* single character to UTF-8, no NUL termination */ +int u8_wc_toutf8(char *dest, u_int32_t ch); + +/* moo: single character to UTF-8, with NUL termination */ +int u8_wc_toutf8_nul(char *dest, u_int32_t ch); + +/* character number to byte offset */ +int u8_offset(char *str, int charnum); + +/* byte offset to character number */ +int u8_charnum(char *s, int offset); + +/* return next character, updating an index variable */ +u_int32_t u8_nextchar(char *s, int *i); + +/* move to next character */ +void u8_inc(char *s, int *i); + +/* move to previous character */ +void u8_dec(char *s, int *i); + +/* moo: move pointer to next character */ +void u8_inc_ptr(char **sp); + +/* moo: move pointer to previous character */ +void u8_dec_ptr(char **sp); + +/* returns length of next utf-8 sequence */ +int u8_seqlen(char *s); + +#endif /* S_UTF8_H */ |