aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2009-12-02 10:40:38 +0000
committerIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2009-12-02 10:40:38 +0000
commita0697179b24fb4a882db214cacd5ab58716b4dd5 (patch)
tree977968e166925316dae27ae75dc2b62d96ec4488
parentfc292139d2d05b7c931ce3dc9c9c7aaace2bb84a (diff)
hmm, it seems like i cleaned up the code sometime ago and forgot to commitHEADsvn2git-headexternals/iem/unicode
svn path=/trunk/externals/iem/unicode/; revision=12818
-rw-r--r--utf82codenumber-help.pd76
-rw-r--r--utf82codenumber.pd248
2 files changed, 209 insertions, 115 deletions
diff --git a/utf82codenumber-help.pd b/utf82codenumber-help.pd
index 293def1..21b7e03 100644
--- a/utf82codenumber-help.pd
+++ b/utf82codenumber-help.pd
@@ -1,22 +1,16 @@
-#N canvas 499 69 530 396 10;
+#N canvas 1579 140 530 515 10;
#X text 44 27 UTF-8 to Unicode code numbers;
#X msg 130 58 64;
#X floatatom 130 219 0 0 0 3 Unicode_code_number - -;
-#X text 161 59 ASCII '@';
-#X text 284 136 Lydian letter A;
#X msg 148 136 240 \, 144 \, 164 \, 160;
-#X obj 130 162 utf82codenumber;
-#X msg 144 83 211 \, 153;
-#X text 209 85 schwa;
#X msg 147 111 224 \, 188 \, 128;
-#X text 248 112 tibetian om;
#X text 47 299 this object converts bytestreams of UTF-8 encoded strings
and outputs the according Unicode code number of the letters (as stream).
;
#X text 51 344 you have to serialize UTF-8 lists as output by e.g.
[binfile] and deserialize the Unicode code number lists for use by
e.g. [text3d];
-#N canvas 37 117 450 356 example 0;
+#N canvas 456 284 616 356 example 0;
#N canvas 0 0 450 437 serialize 0;
#X obj 131 56 inlet list;
#X obj 149 252 list split 1;
@@ -40,7 +34,7 @@ e.g. [text3d];
#X connect 6 0 9 0;
#X connect 6 1 4 0;
#X connect 6 2 8 0;
-#X restore 109 103 pd serialize;
+#X restore 109 164 pd serialize;
#N canvas 0 0 450 300 deserialize 0;
#X obj 84 82 inlet done;
#X obj 186 81 inlet stream;
@@ -61,27 +55,49 @@ e.g. [text3d];
#X connect 7 0 4 1;
#X connect 7 1 6 1;
#X connect 8 0 6 0;
-#X restore 109 207 pd deserialize;
-#X obj 149 158 utf82codenumber;
-#X obj 109 230 list prepend set;
-#X obj 109 253 list trim;
-#X msg 109 80 64 211 153 224 188 128 240 144 164 160;
-#X text 112 58 a multibyte UTF-8 encoded string;
-#X text 107 300 a string of unicode code numbers;
-#X msg 109 276;
-#X obj 46 230 loadbang;
+#X restore 109 237 pd deserialize;
+#X obj 109 260 list prepend set;
+#X obj 109 283 list trim;
+#X text 112 8 a multibyte UTF-8 encoded string;
+#X text 107 330 a string of unicode code numbers;
+#X msg 109 306;
+#X obj 46 260 loadbang;
+#X obj 149 199 utf82codenumber;
+#X msg 109 30 64 201 153 224 188 128 240 144 164 160;
+#X msg 46 280 set;
+#X msg 136 129 64 201 153 224 108 128 240 144 164 160;
+#X text 134 74 erroneous string: byte#5 indicates new single-byte within
+a multi-byte \; therefore character#3 (which depends on byte#5) will
+be skipped;
+#X obj 251 220 bng 15 250 50 0 empty empty empty 17 7 0 10 -262144
+-1 -1;
+#X text 274 221 error;
#X connect 0 0 1 0;
-#X connect 0 1 2 0;
+#X connect 0 1 8 0;
#X connect 0 2 1 2;
-#X connect 1 0 3 0;
-#X connect 2 0 1 1;
-#X connect 3 0 4 0;
-#X connect 4 0 8 0;
-#X connect 5 0 0 0;
-#X connect 9 0 3 0;
+#X connect 1 0 2 0;
+#X connect 2 0 3 0;
+#X connect 3 0 6 0;
+#X connect 7 0 2 0;
+#X connect 8 0 1 1;
+#X connect 8 1 13 0;
+#X connect 9 0 0 0;
+#X connect 10 0 6 0;
+#X connect 11 0 0 0;
#X restore 330 228 pd example;
-#X connect 1 0 6 0;
-#X connect 5 0 6 0;
-#X connect 6 0 2 0;
-#X connect 7 0 6 0;
-#X connect 9 0 6 0;
+#X msg 144 83 201 \, 153;
+#X text 209 85 schwa (U0259 = 601);
+#X text 160 58 ASCII '@' (U0040 = 64);
+#X text 248 112 tibetian om (U0F00 = 3840);
+#X text 284 136 Lydian letter A (U10920 = 67872);
+#X obj 130 162 utf82codenumber;
+#X obj 232 182 print error;
+#X text 52 401 if the bytestream is not a valid UTF-8 stream \, a message
+will be send to the 2nd outlet indicating an error. the currently decoded
+character will be skipped.;
+#X connect 1 0 13 0;
+#X connect 3 0 13 0;
+#X connect 4 0 13 0;
+#X connect 8 0 13 0;
+#X connect 13 0 2 0;
+#X connect 13 1 14 0;
diff --git a/utf82codenumber.pd b/utf82codenumber.pd
index a2a7372..d02aa5f 100644
--- a/utf82codenumber.pd
+++ b/utf82codenumber.pd
@@ -1,4 +1,4 @@
-#N canvas 14 0 649 526 10;
+#N canvas 400 289 649 526 10;
#X obj 131 172 inlet byte;
#X obj 131 316 outlet byte;
#N canvas 327 0 739 602 shifter 0;
@@ -42,39 +42,34 @@ be stripped off beforehand.;
#X restore 131 283 pd shifter;
#X text 222 316 unicode code number;
#X text 215 170 UTF-8 multibyte;
-#N canvas 1194 0 977 874 stream2multibytelist 1;
+#N canvas 272 0 977 874 stream2multibytelist 0;
#X text 259 228 aux;
-#X text 323 231 2byte;
-#X text 386 233 3byte;
-#X text 456 233 4byte;
+#X text 393 231 2byte;
+#X text 456 233 3byte;
+#X text 526 233 4byte;
#X text 180 232 1byte;
#X obj 181 249 & 127;
-#X obj 321 249 & 31;
+#X obj 391 249 & 31;
#X obj 251 249 & 63;
-#X obj 390 249 & 15;
-#X obj 460 249 & 7;
+#X obj 460 249 & 15;
+#X obj 530 249 & 7;
#X obj 251 270 t f b;
#X msg 213 292 1;
-#X text 220 404 expectedbytes;
-#X obj 181 346 i;
-#X msg 238 323 0;
-#X obj 204 591 t l l;
+#X obj 248 581 t l l;
#X obj 181 613 list prepend;
-#X msg 353 294 2;
-#X obj 181 318 t b f;
+#X msg 423 294 2;
#X obj 181 271 t f b;
-#X obj 321 270 t f b;
-#X obj 212 346 + 1;
-#X obj 390 273 t f b;
-#X msg 422 297 3;
-#X obj 460 271 t f b;
-#X msg 492 295 4;
-#X obj 204 565 list prepend;
+#X obj 391 270 t f b;
+#X obj 460 273 t f b;
+#X msg 492 297 3;
+#X obj 530 271 t f b;
+#X msg 562 295 4;
+#X obj 248 555 list prepend;
#X obj 181 207 moses 128;
#X obj 251 207 moses 192;
-#X obj 321 207 moses 224;
-#X obj 390 207 moses 240;
-#X obj 285 544 t b b;
+#X obj 391 207 moses 224;
+#X obj 460 207 moses 240;
+#X obj 329 534 t b b;
#X obj 181 689 outlet multibytelist;
#X obj 181 154 inlet bytestream;
#X obj 456 683 outlet error;
@@ -82,78 +77,160 @@ be stripped off beforehand.;
(aka "list"s) of characters.;
#X text 63 69 header-bits (used for describing the multibyte-nature
in the incoming stream) are stripped of the lists);
-#X obj 227 373 t f b b;
#X text 521 479 errors:;
#X text 550 502 a continuation-byte at the beginning;
#X text 550 518 a continuation-byte after the last byte;
#X text 550 542 a start-byte within a sequence;
-#X text 555 597 192-193: overlong encoding;
+#X text 555 587 192-193: overlong encoding;
#X text 555 618 245-253: restricted by RFC-3629;
#X text 556 637 254-255: not defined;
-#X obj 460 207 moses 245;
-#X obj 548 238 t b;
-#X text 583 243 5byte \, 6byte;
-#X text 583 229 codepoint > 0x10FFFF;
-#X text 583 259 undefined;
-#X obj 524 614 t b;
-#X obj 180 376 t f f;
-#X obj 344 383 print current;
-#X obj 181 460 select -1;
+#X obj 530 207 moses 245;
+#X obj 590 238 t b;
+#X text 653 243 5byte \, 6byte;
+#X text 653 229 codepoint > 0x10FFFF;
+#X text 653 259 undefined;
+#X obj 520 634 t b;
#X obj 181 181 t f f;
-#X obj 353 179 print newbyte;
#X obj 181 643 t l b;
-#X connect 5 0 19 0;
-#X connect 6 0 20 0;
+#N canvas 234 245 992 588 multibyte 0;
+#X obj 70 66 inlet byte;
+#X obj 313 61 inlet newlength;
+#X obj 115 543 outlet byte|bang;
+#X text 246 539 byte: append byte to list;
+#X text 247 556 bang: list has finished;
+#X obj 527 464 outlet error;
+#X obj 527 175 select 0;
+#X text 577 206 unexpected byte (continuation-byte at beginning);
+#X obj 70 100 pack 0 0;
+#X msg 132 100 0;
+#X obj 70 258 select 0;
+#X text 148 263 state: 0=expect new sequence;
+#X text 199 280 1=expect continuation;
+#X obj 70 196 == 0;
+#X text 76 214 0=newbyte;
+#X text 76 227 1==contbyte;
+#X text 605 312 overlong;
+#X obj 115 483 f;
+#X obj 70 145 swap;
+#X obj 70 168 t f f;
+#X obj 70 279 f;
+#X obj 70 308 select 0;
+#X obj 70 347 i;
+#X obj 99 347 - 1;
+#X obj 70 390 select 1;
+#X obj 70 413 t b b;
+#X msg 185 350 0;
+#X obj 121 328 t f b;
+#X msg 153 349 1;
+#X obj 185 372 t f;
+#X obj 330 383 < 1;
+#X obj 330 408 select 1;
+#X obj 70 121 t l b l;
+#X obj 549 317 t b b;
+#X obj 527 213 t b b;
+#X obj 70 369 t f b f;
+#X obj 116 509 t a a;
+#X obj 527 422 t b b;
+#X text 79 30 now this is a bit overcomplicated....;
+#X connect 0 0 8 0;
+#X connect 1 0 8 1;
+#X connect 6 0 34 0;
+#X connect 8 0 32 0;
+#X connect 9 0 8 1;
+#X connect 10 0 20 0;
+#X connect 10 1 34 0;
+#X connect 13 0 10 0;
+#X connect 17 0 36 0;
+#X connect 18 0 19 0;
+#X connect 18 1 17 1;
+#X connect 19 0 13 0;
+#X connect 19 1 20 1;
+#X connect 20 0 21 0;
+#X connect 21 0 22 0;
+#X connect 21 1 27 0;
+#X connect 22 0 23 0;
+#X connect 22 0 35 0;
+#X connect 23 0 22 1;
+#X connect 24 0 25 0;
+#X connect 24 1 30 0;
+#X connect 25 0 36 0;
+#X connect 25 1 26 0;
+#X connect 26 0 29 0;
+#X connect 27 0 22 0;
+#X connect 27 1 28 0;
+#X connect 28 0 29 0;
+#X connect 29 0 10 1;
+#X connect 30 0 31 0;
+#X connect 31 0 33 0;
+#X connect 32 0 18 0;
+#X connect 32 1 9 0;
+#X connect 33 0 37 0;
+#X connect 34 0 37 0;
+#X connect 35 0 24 0;
+#X connect 35 1 17 0;
+#X connect 36 0 2 0;
+#X connect 37 0 5 0;
+#X connect 37 1 26 0;
+#X restore 181 424 pd multibyte;
+#X obj 181 450 route bang;
+#X obj 262 450 t b b;
+#X obj 213 335 t f;
+#X text 266 407 #expectedbytes;
+#X text 233 371 byte;
+#X obj 181 368 t f f;
+#X obj 321 207 moses 194;
+#X obj 521 582 t b;
+#X connect 5 0 15 0;
+#X connect 6 0 16 0;
#X connect 7 0 10 0;
-#X connect 8 0 22 0;
-#X connect 9 0 24 0;
-#X connect 10 0 18 0;
-#X connect 11 0 37 0;
-#X connect 13 0 21 0;
-#X connect 13 0 51 0;
-#X connect 14 0 21 0;
-#X connect 15 0 16 1;
-#X connect 15 1 26 1;
-#X connect 16 0 56 0;
-#X connect 17 0 37 0;
-#X connect 18 0 13 0;
-#X connect 18 1 26 0;
-#X connect 19 0 18 0;
-#X connect 19 1 11 0;
-#X connect 20 0 18 0;
-#X connect 20 1 17 0;
-#X connect 21 0 13 1;
-#X connect 22 0 18 0;
+#X connect 8 0 17 0;
+#X connect 9 0 19 0;
+#X connect 10 0 53 0;
+#X connect 11 0 50 0;
+#X connect 12 0 13 1;
+#X connect 12 1 21 1;
+#X connect 13 0 46 0;
+#X connect 14 0 50 0;
+#X connect 15 0 53 0;
+#X connect 15 1 11 0;
+#X connect 16 0 53 0;
+#X connect 16 1 14 0;
+#X connect 17 0 53 0;
+#X connect 17 1 18 0;
+#X connect 18 0 50 0;
+#X connect 19 0 53 0;
+#X connect 19 1 20 0;
+#X connect 20 0 50 0;
+#X connect 21 0 12 0;
+#X connect 22 0 5 0;
#X connect 22 1 23 0;
-#X connect 23 0 37 0;
-#X connect 24 0 18 0;
+#X connect 23 0 7 0;
+#X connect 23 1 54 0;
+#X connect 24 0 6 0;
#X connect 24 1 25 0;
-#X connect 25 0 37 0;
-#X connect 26 0 15 0;
-#X connect 27 0 5 0;
-#X connect 27 1 28 0;
-#X connect 28 0 7 0;
-#X connect 28 1 29 0;
-#X connect 29 0 6 0;
-#X connect 29 1 30 0;
-#X connect 30 0 8 0;
-#X connect 30 1 45 0;
-#X connect 31 0 26 1;
-#X connect 31 1 16 1;
-#X connect 33 0 54 0;
-#X connect 37 0 53 1;
-#X connect 37 1 14 0;
-#X connect 37 2 31 0;
-#X connect 45 0 9 0;
-#X connect 45 1 46 0;
-#X connect 46 0 50 0;
-#X connect 50 0 34 0;
-#X connect 51 0 53 0;
-#X connect 51 1 52 0;
-#X connect 53 0 16 0;
-#X connect 54 0 27 0;
-#X connect 54 1 55 0;
-#X connect 56 0 32 0;
+#X connect 25 0 8 0;
+#X connect 25 1 39 0;
+#X connect 26 0 21 1;
+#X connect 26 1 13 1;
+#X connect 28 0 45 0;
+#X connect 39 0 9 0;
+#X connect 39 1 40 0;
+#X connect 40 0 44 0;
+#X connect 44 0 29 0;
+#X connect 45 0 22 0;
+#X connect 46 0 27 0;
+#X connect 46 1 26 0;
+#X connect 47 0 48 0;
+#X connect 47 1 49 0;
+#X connect 48 0 13 0;
+#X connect 48 1 21 0;
+#X connect 49 0 29 0;
+#X connect 49 1 26 0;
+#X connect 50 0 47 1;
+#X connect 53 0 47 0;
+#X connect 54 0 55 0;
+#X connect 54 1 24 0;
+#X connect 55 0 29 0;
#X restore 131 241 pd stream2multibytelist;
#X text 47 43 converts a stream of UTF-8 bytes into a stream of Unicode
code numbers;
@@ -162,6 +239,7 @@ has been received;
#X obj 400 306 outlet error;
#X text 104 378 if the bytestream is invalid \, a message is sent to
the output;
+#X text 77 479 (c) 2009 IOhannes m zmölnig & Maira Sala;
#X connect 0 0 5 0;
#X connect 2 0 1 0;
#X connect 5 0 2 0;