From 21c068f1916330e90f814bed461fe0821d1665ec Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Sun, 9 Oct 2011 16:36:37 +0000 Subject: checked in pd-0.43-0.src.tar.gz svn path=/trunk/; revision=15557 --- pd/src/s_utf8.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 pd/src/s_utf8.h (limited to 'pd/src/s_utf8.h') diff --git a/pd/src/s_utf8.h b/pd/src/s_utf8.h new file mode 100644 index 00000000..56c40d48 --- /dev/null +++ b/pd/src/s_utf8.h @@ -0,0 +1,88 @@ +#ifndef S_UTF8_H +#define S_UTF8_H + +/*--moo--*/ +#ifndef u_int32_t +# define u_int32_t unsigned int +#endif + +#ifndef UCS4 +# define UCS4 u_int32_t +#endif + +/* UTF8_SUPPORT_FULL_UCS4 + * define this to support the full potential range of UCS-4 codepoints + * (in anticipation of a future UTF-8 standard) + */ +/*#define UTF8_SUPPORT_FULL_UCS4 1*/ +#undef UTF8_SUPPORT_FULL_UCS4 + +/* UTF8_MAXBYTES + * maximum number of bytes required to represent a single character in UTF-8 + * + * UTF8_MAXBYTES1 = UTF8_MAXBYTES+1 + * maximum bytes per character including NUL terminator + */ +#ifdef UTF8_SUPPORT_FULL_UCS4 +# ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES 6 +# endif +# ifndef UTF8_MAXBYTES1 +# define UTF8_MAXBYTES1 7 +# endif +#else +# ifndef UTF8_MAXBYTES +# define UTF8_MAXBYTES 4 +# endif +# ifndef UTF8_MAXBYTES1 +# define UTF8_MAXBYTES1 5 +# endif +#endif +/*--/moo--*/ + +/* is c the start of a utf8 sequence? */ +#define isutf(c) (((c)&0xC0)!=0x80) + +/* convert UTF-8 data to wide character */ +int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz); + +/* the opposite conversion */ +int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz); + +/* moo: get byte length of character number, or 0 if not supported */ +int u8_wc_nbytes(u_int32_t ch); + +/* moo: compute required storage for UTF-8 encoding of 's[0..n-1]' */ +int u8_wcs_nbytes(u_int32_t *ucs, int size); + +/* single character to UTF-8, no NUL termination */ +int u8_wc_toutf8(char *dest, u_int32_t ch); + +/* moo: single character to UTF-8, with NUL termination */ +int u8_wc_toutf8_nul(char *dest, u_int32_t ch); + +/* character number to byte offset */ +int u8_offset(char *str, int charnum); + +/* byte offset to character number */ +int u8_charnum(char *s, int offset); + +/* return next character, updating an index variable */ +u_int32_t u8_nextchar(char *s, int *i); + +/* move to next character */ +void u8_inc(char *s, int *i); + +/* move to previous character */ +void u8_dec(char *s, int *i); + +/* moo: move pointer to next character */ +void u8_inc_ptr(char **sp); + +/* moo: move pointer to previous character */ +void u8_dec_ptr(char **sp); + +/* returns length of next utf-8 sequence */ +int u8_seqlen(char *s); + +#endif /* S_UTF8_H */ -- cgit v1.2.1