pix_recNN/pix_recNN-help.pd


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

#N canvas 871 74 498 783 10;
#X obj 36 327 gemwin;
#X msg 36 301 create \, 1;
#N canvas 75 72 765 790 pix2sig_stuff~ 0;
#X obj 120 35 gemhead;
#X obj 120 132 pix_texture;
#X obj 119 274 outlet~;
#X obj 139 185 square 4;
#X obj 139 163 separator;
#X obj 61 165 separator;
#X obj 120 101 pix_video;
#X msg 186 64 dimen 640 480;
#X obj 26 36 block~ 2048;
#X msg 186 38 dimen 320 240;
#X msg 76 535 getprecision;
#X msg 93 696 getlearnrate;
#X msg 65 671 learnrate 0.2;
#X msg 424 459 getneurons;
#X msg 404 206 train;
#X obj 31 227 inlet~;
#X msg 65 647 learnrate 0.05;
#X msg 381 708 getmemory;
#X msg 361 639 memory 0;
#X msg 361 660 memory 1;
#X obj 61 252 pix_recNN;
#X text 296 49 <- input dimension;
#X obj 78 226 r \$0-recNN;
#X obj 62 564 s \$0-recNN;
#X msg 76 498 precision \$1;
#X floatatom 76 481 5 0 0 0 - - -;
#X text 42 335 precision:;
#X text 53 358 1: means every pixel is used in calculation;
#X text 53 372 2: only every second pixel;
#X text 53 386 ...;
#X obj 62 411 loadbang;
#X msg 407 401 neurons 2048;
#X msg 407 422 neurons 64;
#X obj 407 492 s \$0-recNN;
#X text 403 336 neurons:;
#X text 416 357 nr. of neurons used in the calculation;
#X text 415 370 (_MUST_ be the same as the buffersize !!!);
#X text 43 615 learnrate:;
#X obj 65 725 s \$0-recNN;
#X msg 361 681 memory 3;
#X obj 361 741 s \$0-recNN;
#X text 343 543 memory:;
#X text 356 565 this determines \, how much values from the past the
recurrent net considers in the calculation;
#X text 357 604 (be carefull with large values !!!);
#X msg 62 456 precision 1;
#X msg 62 436 precision 4;
#X obj 404 233 s \$0-recNN;
#X text 397 126 train:;
#X text 417 152 trains the neural net;
#X text 418 166 (the current video frame to;
#X text 425 178 the current audio block);
#X connect 0 0 6 0;
#X connect 1 0 4 0;
#X connect 1 0 5 0;
#X connect 4 0 3 0;
#X connect 5 0 20 0;
#X connect 6 0 1 0;
#X connect 7 0 6 0;
#X connect 9 0 6 0;
#X connect 10 0 23 0;
#X connect 11 0 38 0;
#X connect 12 0 38 0;
#X connect 13 0 33 0;
#X connect 14 0 46 0;
#X connect 15 0 20 0;
#X connect 16 0 38 0;
#X connect 17 0 40 0;
#X connect 18 0 40 0;
#X connect 19 0 40 0;
#X connect 20 1 2 0;
#X connect 22 0 20 0;
#X connect 24 0 23 0;
#X connect 25 0 24 0;
#X connect 30 0 45 0;
#X connect 31 0 33 0;
#X connect 32 0 33 0;
#X connect 39 0 40 0;
#X connect 44 0 23 0;
#X connect 45 0 23 0;
#X restore 89 542 pd pix2sig_stuff~;
#X msg 110 302 0 \, destroy;
#X obj 116 587 unsig~;
#X obj 206 432 osc~ 440;
#X obj 205 456 *~;
#X obj 237 456 tgl 15 0 empty empty empty 0 -6 0 8 -262144 -1 -1 0
1;
#X obj 207 496 sig~ 0;
#X floatatom 117 608 8 0 0 0 - - -;
#X text 25 23 pix_recNN:;
#X text 24 57 pix_recNN is an instument/interface. This instrument
should be useful as a general experimental video interface to generate
audio. You can train the neural net with playing audio samples to specific
video frames in real-time. The main interest for me was not to train
the net exactly to reproduce these samples \, but to make experimental
sounds \, which are "between" all the trained samples.;
#X text 22 214 (but this version is unfinished - e.g. the training
algorithm must be tuned etc. - so it's only a very basic prototype...)
;
#X text 207 320 <- create gemwin;
#X obj 41 442 readsf~;
#X obj 41 401 openpanel;
#X msg 41 421 open \$1;
#X obj 41 380 bng 15 250 50 0 empty empty empty 0 -6 0 8 -262144 -1
-1;
#X text 67 379 <- load sample for training;
#X obj 122 417 tgl 25 0 empty empty empty 0 -6 0 8 -195568 -1 -1 0
1;
#X floatatom 206 414 5 0 0 0 - - -;
#X text 272 431 <- simple osc for training;
#X text 262 497 <- to train silence;
#X obj 85 463 bng 15 250 50 0 empty empty empty 0 -6 0 8 -262144 -1
-1;
#X text 216 541 <- audio/video work;
#X obj 90 684 dac~;
#X obj 90 659 *~;
#X obj 118 659 dbtorms;
#X floatatom 118 641 5 0 0 0 - - -;
#X text 168 638 <- outvol in dB;
#X text 22 170 pix_recNN uses a 2 layer recurrent neural net (for more
detailed info look at the source code.);
#X text 119 737 Georg Holzmann <grh@mur.at> \, 2004;
#X connect 1 0 0 0;
#X connect 2 0 4 0;
#X connect 2 0 26 0;
#X connect 3 0 0 0;
#X connect 4 0 9 0;
#X connect 5 0 6 0;
#X connect 6 0 2 0;
#X connect 7 0 6 1;
#X connect 8 0 2 0;
#X connect 14 0 2 0;
#X connect 14 1 23 0;
#X connect 15 0 16 0;
#X connect 16 0 14 0;
#X connect 17 0 15 0;
#X connect 19 0 14 0;
#X connect 20 0 5 0;
#X connect 26 0 25 0;
#X connect 26 0 25 1;
#X connect 27 0 26 1;
#X connect 28 0 27 0;