From a0697179b24fb4a882db214cacd5ab58716b4dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?IOhannes=20m=20zm=C3=B6lnig?= Date: Wed, 2 Dec 2009 10:40:38 +0000 Subject: hmm, it seems like i cleaned up the code sometime ago and forgot to commit svn path=/trunk/externals/iem/unicode/; revision=12818 --- utf82codenumber-help.pd | 76 +++++++++------ utf82codenumber.pd | 248 +++++++++++++++++++++++++++++++----------------- 2 files changed, 209 insertions(+), 115 deletions(-) diff --git a/utf82codenumber-help.pd b/utf82codenumber-help.pd index 293def1..21b7e03 100644 --- a/utf82codenumber-help.pd +++ b/utf82codenumber-help.pd @@ -1,22 +1,16 @@ -#N canvas 499 69 530 396 10; +#N canvas 1579 140 530 515 10; #X text 44 27 UTF-8 to Unicode code numbers; #X msg 130 58 64; #X floatatom 130 219 0 0 0 3 Unicode_code_number - -; -#X text 161 59 ASCII '@'; -#X text 284 136 Lydian letter A; #X msg 148 136 240 \, 144 \, 164 \, 160; -#X obj 130 162 utf82codenumber; -#X msg 144 83 211 \, 153; -#X text 209 85 schwa; #X msg 147 111 224 \, 188 \, 128; -#X text 248 112 tibetian om; #X text 47 299 this object converts bytestreams of UTF-8 encoded strings and outputs the according Unicode code number of the letters (as stream). ; #X text 51 344 you have to serialize UTF-8 lists as output by e.g. [binfile] and deserialize the Unicode code number lists for use by e.g. [text3d]; -#N canvas 37 117 450 356 example 0; +#N canvas 456 284 616 356 example 0; #N canvas 0 0 450 437 serialize 0; #X obj 131 56 inlet list; #X obj 149 252 list split 1; @@ -40,7 +34,7 @@ e.g. [text3d]; #X connect 6 0 9 0; #X connect 6 1 4 0; #X connect 6 2 8 0; -#X restore 109 103 pd serialize; +#X restore 109 164 pd serialize; #N canvas 0 0 450 300 deserialize 0; #X obj 84 82 inlet done; #X obj 186 81 inlet stream; @@ -61,27 +55,49 @@ e.g. [text3d]; #X connect 7 0 4 1; #X connect 7 1 6 1; #X connect 8 0 6 0; -#X restore 109 207 pd deserialize; -#X obj 149 158 utf82codenumber; -#X obj 109 230 list prepend set; -#X obj 109 253 list trim; -#X msg 109 80 64 211 153 224 188 128 240 144 164 160; -#X text 112 58 a multibyte UTF-8 encoded string; -#X text 107 300 a string of unicode code numbers; -#X msg 109 276; -#X obj 46 230 loadbang; +#X restore 109 237 pd deserialize; +#X obj 109 260 list prepend set; +#X obj 109 283 list trim; +#X text 112 8 a multibyte UTF-8 encoded string; +#X text 107 330 a string of unicode code numbers; +#X msg 109 306; +#X obj 46 260 loadbang; +#X obj 149 199 utf82codenumber; +#X msg 109 30 64 201 153 224 188 128 240 144 164 160; +#X msg 46 280 set; +#X msg 136 129 64 201 153 224 108 128 240 144 164 160; +#X text 134 74 erroneous string: byte#5 indicates new single-byte within +a multi-byte \; therefore character#3 (which depends on byte#5) will +be skipped; +#X obj 251 220 bng 15 250 50 0 empty empty empty 17 7 0 10 -262144 +-1 -1; +#X text 274 221 error; #X connect 0 0 1 0; -#X connect 0 1 2 0; +#X connect 0 1 8 0; #X connect 0 2 1 2; -#X connect 1 0 3 0; -#X connect 2 0 1 1; -#X connect 3 0 4 0; -#X connect 4 0 8 0; -#X connect 5 0 0 0; -#X connect 9 0 3 0; +#X connect 1 0 2 0; +#X connect 2 0 3 0; +#X connect 3 0 6 0; +#X connect 7 0 2 0; +#X connect 8 0 1 1; +#X connect 8 1 13 0; +#X connect 9 0 0 0; +#X connect 10 0 6 0; +#X connect 11 0 0 0; #X restore 330 228 pd example; -#X connect 1 0 6 0; -#X connect 5 0 6 0; -#X connect 6 0 2 0; -#X connect 7 0 6 0; -#X connect 9 0 6 0; +#X msg 144 83 201 \, 153; +#X text 209 85 schwa (U0259 = 601); +#X text 160 58 ASCII '@' (U0040 = 64); +#X text 248 112 tibetian om (U0F00 = 3840); +#X text 284 136 Lydian letter A (U10920 = 67872); +#X obj 130 162 utf82codenumber; +#X obj 232 182 print error; +#X text 52 401 if the bytestream is not a valid UTF-8 stream \, a message +will be send to the 2nd outlet indicating an error. the currently decoded +character will be skipped.; +#X connect 1 0 13 0; +#X connect 3 0 13 0; +#X connect 4 0 13 0; +#X connect 8 0 13 0; +#X connect 13 0 2 0; +#X connect 13 1 14 0; diff --git a/utf82codenumber.pd b/utf82codenumber.pd index a2a7372..d02aa5f 100644 --- a/utf82codenumber.pd +++ b/utf82codenumber.pd @@ -1,4 +1,4 @@ -#N canvas 14 0 649 526 10; +#N canvas 400 289 649 526 10; #X obj 131 172 inlet byte; #X obj 131 316 outlet byte; #N canvas 327 0 739 602 shifter 0; @@ -42,39 +42,34 @@ be stripped off beforehand.; #X restore 131 283 pd shifter; #X text 222 316 unicode code number; #X text 215 170 UTF-8 multibyte; -#N canvas 1194 0 977 874 stream2multibytelist 1; +#N canvas 272 0 977 874 stream2multibytelist 0; #X text 259 228 aux; -#X text 323 231 2byte; -#X text 386 233 3byte; -#X text 456 233 4byte; +#X text 393 231 2byte; +#X text 456 233 3byte; +#X text 526 233 4byte; #X text 180 232 1byte; #X obj 181 249 & 127; -#X obj 321 249 & 31; +#X obj 391 249 & 31; #X obj 251 249 & 63; -#X obj 390 249 & 15; -#X obj 460 249 & 7; +#X obj 460 249 & 15; +#X obj 530 249 & 7; #X obj 251 270 t f b; #X msg 213 292 1; -#X text 220 404 expectedbytes; -#X obj 181 346 i; -#X msg 238 323 0; -#X obj 204 591 t l l; +#X obj 248 581 t l l; #X obj 181 613 list prepend; -#X msg 353 294 2; -#X obj 181 318 t b f; +#X msg 423 294 2; #X obj 181 271 t f b; -#X obj 321 270 t f b; -#X obj 212 346 + 1; -#X obj 390 273 t f b; -#X msg 422 297 3; -#X obj 460 271 t f b; -#X msg 492 295 4; -#X obj 204 565 list prepend; +#X obj 391 270 t f b; +#X obj 460 273 t f b; +#X msg 492 297 3; +#X obj 530 271 t f b; +#X msg 562 295 4; +#X obj 248 555 list prepend; #X obj 181 207 moses 128; #X obj 251 207 moses 192; -#X obj 321 207 moses 224; -#X obj 390 207 moses 240; -#X obj 285 544 t b b; +#X obj 391 207 moses 224; +#X obj 460 207 moses 240; +#X obj 329 534 t b b; #X obj 181 689 outlet multibytelist; #X obj 181 154 inlet bytestream; #X obj 456 683 outlet error; @@ -82,78 +77,160 @@ be stripped off beforehand.; (aka "list"s) of characters.; #X text 63 69 header-bits (used for describing the multibyte-nature in the incoming stream) are stripped of the lists); -#X obj 227 373 t f b b; #X text 521 479 errors:; #X text 550 502 a continuation-byte at the beginning; #X text 550 518 a continuation-byte after the last byte; #X text 550 542 a start-byte within a sequence; -#X text 555 597 192-193: overlong encoding; +#X text 555 587 192-193: overlong encoding; #X text 555 618 245-253: restricted by RFC-3629; #X text 556 637 254-255: not defined; -#X obj 460 207 moses 245; -#X obj 548 238 t b; -#X text 583 243 5byte \, 6byte; -#X text 583 229 codepoint > 0x10FFFF; -#X text 583 259 undefined; -#X obj 524 614 t b; -#X obj 180 376 t f f; -#X obj 344 383 print current; -#X obj 181 460 select -1; +#X obj 530 207 moses 245; +#X obj 590 238 t b; +#X text 653 243 5byte \, 6byte; +#X text 653 229 codepoint > 0x10FFFF; +#X text 653 259 undefined; +#X obj 520 634 t b; #X obj 181 181 t f f; -#X obj 353 179 print newbyte; #X obj 181 643 t l b; -#X connect 5 0 19 0; -#X connect 6 0 20 0; +#N canvas 234 245 992 588 multibyte 0; +#X obj 70 66 inlet byte; +#X obj 313 61 inlet newlength; +#X obj 115 543 outlet byte|bang; +#X text 246 539 byte: append byte to list; +#X text 247 556 bang: list has finished; +#X obj 527 464 outlet error; +#X obj 527 175 select 0; +#X text 577 206 unexpected byte (continuation-byte at beginning); +#X obj 70 100 pack 0 0; +#X msg 132 100 0; +#X obj 70 258 select 0; +#X text 148 263 state: 0=expect new sequence; +#X text 199 280 1=expect continuation; +#X obj 70 196 == 0; +#X text 76 214 0=newbyte; +#X text 76 227 1==contbyte; +#X text 605 312 overlong; +#X obj 115 483 f; +#X obj 70 145 swap; +#X obj 70 168 t f f; +#X obj 70 279 f; +#X obj 70 308 select 0; +#X obj 70 347 i; +#X obj 99 347 - 1; +#X obj 70 390 select 1; +#X obj 70 413 t b b; +#X msg 185 350 0; +#X obj 121 328 t f b; +#X msg 153 349 1; +#X obj 185 372 t f; +#X obj 330 383 < 1; +#X obj 330 408 select 1; +#X obj 70 121 t l b l; +#X obj 549 317 t b b; +#X obj 527 213 t b b; +#X obj 70 369 t f b f; +#X obj 116 509 t a a; +#X obj 527 422 t b b; +#X text 79 30 now this is a bit overcomplicated....; +#X connect 0 0 8 0; +#X connect 1 0 8 1; +#X connect 6 0 34 0; +#X connect 8 0 32 0; +#X connect 9 0 8 1; +#X connect 10 0 20 0; +#X connect 10 1 34 0; +#X connect 13 0 10 0; +#X connect 17 0 36 0; +#X connect 18 0 19 0; +#X connect 18 1 17 1; +#X connect 19 0 13 0; +#X connect 19 1 20 1; +#X connect 20 0 21 0; +#X connect 21 0 22 0; +#X connect 21 1 27 0; +#X connect 22 0 23 0; +#X connect 22 0 35 0; +#X connect 23 0 22 1; +#X connect 24 0 25 0; +#X connect 24 1 30 0; +#X connect 25 0 36 0; +#X connect 25 1 26 0; +#X connect 26 0 29 0; +#X connect 27 0 22 0; +#X connect 27 1 28 0; +#X connect 28 0 29 0; +#X connect 29 0 10 1; +#X connect 30 0 31 0; +#X connect 31 0 33 0; +#X connect 32 0 18 0; +#X connect 32 1 9 0; +#X connect 33 0 37 0; +#X connect 34 0 37 0; +#X connect 35 0 24 0; +#X connect 35 1 17 0; +#X connect 36 0 2 0; +#X connect 37 0 5 0; +#X connect 37 1 26 0; +#X restore 181 424 pd multibyte; +#X obj 181 450 route bang; +#X obj 262 450 t b b; +#X obj 213 335 t f; +#X text 266 407 #expectedbytes; +#X text 233 371 byte; +#X obj 181 368 t f f; +#X obj 321 207 moses 194; +#X obj 521 582 t b; +#X connect 5 0 15 0; +#X connect 6 0 16 0; #X connect 7 0 10 0; -#X connect 8 0 22 0; -#X connect 9 0 24 0; -#X connect 10 0 18 0; -#X connect 11 0 37 0; -#X connect 13 0 21 0; -#X connect 13 0 51 0; -#X connect 14 0 21 0; -#X connect 15 0 16 1; -#X connect 15 1 26 1; -#X connect 16 0 56 0; -#X connect 17 0 37 0; -#X connect 18 0 13 0; -#X connect 18 1 26 0; -#X connect 19 0 18 0; -#X connect 19 1 11 0; -#X connect 20 0 18 0; -#X connect 20 1 17 0; -#X connect 21 0 13 1; -#X connect 22 0 18 0; +#X connect 8 0 17 0; +#X connect 9 0 19 0; +#X connect 10 0 53 0; +#X connect 11 0 50 0; +#X connect 12 0 13 1; +#X connect 12 1 21 1; +#X connect 13 0 46 0; +#X connect 14 0 50 0; +#X connect 15 0 53 0; +#X connect 15 1 11 0; +#X connect 16 0 53 0; +#X connect 16 1 14 0; +#X connect 17 0 53 0; +#X connect 17 1 18 0; +#X connect 18 0 50 0; +#X connect 19 0 53 0; +#X connect 19 1 20 0; +#X connect 20 0 50 0; +#X connect 21 0 12 0; +#X connect 22 0 5 0; #X connect 22 1 23 0; -#X connect 23 0 37 0; -#X connect 24 0 18 0; +#X connect 23 0 7 0; +#X connect 23 1 54 0; +#X connect 24 0 6 0; #X connect 24 1 25 0; -#X connect 25 0 37 0; -#X connect 26 0 15 0; -#X connect 27 0 5 0; -#X connect 27 1 28 0; -#X connect 28 0 7 0; -#X connect 28 1 29 0; -#X connect 29 0 6 0; -#X connect 29 1 30 0; -#X connect 30 0 8 0; -#X connect 30 1 45 0; -#X connect 31 0 26 1; -#X connect 31 1 16 1; -#X connect 33 0 54 0; -#X connect 37 0 53 1; -#X connect 37 1 14 0; -#X connect 37 2 31 0; -#X connect 45 0 9 0; -#X connect 45 1 46 0; -#X connect 46 0 50 0; -#X connect 50 0 34 0; -#X connect 51 0 53 0; -#X connect 51 1 52 0; -#X connect 53 0 16 0; -#X connect 54 0 27 0; -#X connect 54 1 55 0; -#X connect 56 0 32 0; +#X connect 25 0 8 0; +#X connect 25 1 39 0; +#X connect 26 0 21 1; +#X connect 26 1 13 1; +#X connect 28 0 45 0; +#X connect 39 0 9 0; +#X connect 39 1 40 0; +#X connect 40 0 44 0; +#X connect 44 0 29 0; +#X connect 45 0 22 0; +#X connect 46 0 27 0; +#X connect 46 1 26 0; +#X connect 47 0 48 0; +#X connect 47 1 49 0; +#X connect 48 0 13 0; +#X connect 48 1 21 0; +#X connect 49 0 29 0; +#X connect 49 1 26 0; +#X connect 50 0 47 1; +#X connect 53 0 47 0; +#X connect 54 0 55 0; +#X connect 54 1 24 0; +#X connect 55 0 29 0; #X restore 131 241 pd stream2multibytelist; #X text 47 43 converts a stream of UTF-8 bytes into a stream of Unicode code numbers; @@ -162,6 +239,7 @@ has been received; #X obj 400 306 outlet error; #X text 104 378 if the bytestream is invalid \, a message is sent to the output; +#X text 77 479 (c) 2009 IOhannes m zmölnig & Maira Sala; #X connect 0 0 5 0; #X connect 2 0 1 0; #X connect 5 0 2 0; -- cgit v1.2.1