aboutsummaryrefslogtreecommitdiff
path: root/pix_recNN/pix_recNN.h
blob: 944ebd335e76bfb13b2a83dda0189b41bd6b0903 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/////////////////////////////////////////////////////////////////////////////
//
//   GEM - Graphics Environment for Multimedia
//
//   pix_recNN~
//   Calculates an audio signal out of a video frame
//   with a recurrent neural network
//
//   (see RecurrentNeuralNet.h for more info)
//
//   header file
//
//   Copyright (c) 2005 Georg Holzmann <grh@gmx.at>
//   (and of course lot's of other developers for PD and GEM)
//
//   For information on usage and redistribution, and for a DISCLAIMER OF ALL
//   WARRANTIES, see the file, "GEM.LICENSE.TERMS" in this distribution.
//
/////////////////////////////////////////////////////////////////////////////


#ifndef _INCLUDE_PIX_RECNN_H__
#define _INCLUDE_PIX_RECNN_H__

#include <string>
#include <sstream>
#include <fstream>
#include "Base/GemPixObj.h"
#include "NNet.h"
#include "RecurrentNeuron.h"


using std::string;
using std::endl;
using std::ifstream;
using std::ofstream;
using std::istringstream;

using namespace TheBrain;


/*-----------------------------------------------------------------
 *  CLASS
 * pix_recNN~
 *
 * calculates an audio signal out of a video frame with
 * a recurrent neural network
 *
 * KEYWORDS
 * pix audio
 *
 * DESCRIPTION
 * 1 signal-outlet
 */
class GEM_EXTERN pix_recNN : public GemPixObj
{
  CPPEXTERN_HEADER(pix_recNN, GemPixObj)

 public:

  /* Constructor
   */
  pix_recNN(t_floatarg arg0, t_floatarg arg1, t_floatarg arg2);

 protected:

  /* Destructor
   */
  virtual ~pix_recNN();


  //-----------------------------------
  /* Image STUFF:
   */

  /* The pixBlock with the current image
   *  pixBlock      m_pixBlock;
   */
  unsigned char *m_data_;
  int            m_xsize_;
  int            m_ysize_;
  int            m_csize_;
  int            m_format_;

  /* precision of the image:
   * 1 means every pixel is taken for the calculation,
   * 2 every second pixel, 3 every third, ...
   */
  int precision_;

  /* temporary float for calculation
   */
  float **temp_pix_;

  /* processImage
   */
  virtual void processImage(imageStruct &image);


  //-----------------------------------
  /* Neural Network STUFF:
   */

  /* the neural net
   * (size: buffsize)
   */
  NNet<RecurrentNeuron,RecurrentNeuron> *net_;

  /* training modus on
   * (will only be on for one audio buffer)
   */
  bool train_on_;

  /* the number of neurons, which should be
   * THE SAME as the audio buffer size
   */
  int neuron_nr_;

  /* memory determines, how much results from the past
   * are used to calculate an output value
   * (0 means only the result from the current frame,
   * 2 also from the last frame, etc.)
   */
  int memory_;


  //-----------------------------------
  /* Audio STUFF:
   */

  /* the outlet
   */
  t_outlet *out0_;

  /* DSP perform
   */
  static t_int* perform(t_int* w);

  /* DSP-Message
   */
  virtual void dspMess(void *data, t_signal** sp);


  //-----------------------------------
  /* File IO:
   */

  /* saves the contents of the current net to file
   */
  virtual void saveNet(string filename);

  /* loads the parameters of the net from file
   */
  virtual void loadNet(string filename);

 private:

  /* a helper to build a new net
   */
  virtual void buildNewNet();

  //-----------------------------------
  /* static members
   * (interface to the PD world)
   */

  /* set/get the precision of the image calculation
   */
  static void setPrecision(void *data, t_floatarg precision);
  static void getPrecision(void *data);

  /* method to train the network
   */
  static void setTrainOn(void *data);

  /* changes the number of neurons
   * (which should be the same as the audio buffer)
   * ATTENTION: a new net will be initialized
   */
  static void setNeurons(void *data, t_floatarg neurons);
  static void getNeurons(void *data);

  /* changes the nblock size
   * ATTENTION: a new net will be initialized
   */
  static void setMemory(void *data, t_floatarg memory);
  static void getMemory(void *data);

  /* sets the learnrate of the net
   */
  static void setLearnrate(void *data, t_floatarg learn_rate);
  static void getLearnrate(void *data);

  /* DSP callback
   */
  static void dspMessCallback(void* data, t_signal** sp);

  /* File IO:
   */
  static void saveToFile(void *data, t_symbol *filename);
  static void loadFromFile(void *data, t_symbol *filename);
};

#endif  // for header file