desiredata/src/m_simd.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

/* Definitions for SIMD functionality; added by T.Grill */
#ifndef __M_SIMD_H
#define __M_SIMD_H

/* general vector functions */
void zerovec_8(t_float *dst,int n);
void setvec_8(t_float *dst,t_float v,int n);
void copyvec_8(t_float *dst,const t_float *src,int n);
void addvec_8(t_float *dst,const t_float *src,int n);
void testcopyvec_8(t_float *dst,const t_float *src,int n);
void testaddvec_8(t_float *dst,const t_float *src,int n);

#define SIMD_BYTEALIGN (128/8)

/* how many floats do we calculate in the loop of a SIMD codelet? */
#define SIMD_BLOCK 16  /* must be a power of 2 */

//#if defined(__GNUC__)  && (defined(_X86_) || defined(__i386__) || defined(__i586__) || defined(__i686__))
#ifdef SIMD_SSE /* Intel SSE with GNU C */ /* ought to add this in configure.in */
t_int *sigwrap_perf_simd(t_int *w);
void line_tilde_slope_simd(t_float* out, t_int n, t_float* value, t_float* slopes, t_float* slopestep);
float env_tilde_accum_simd(t_float* in, t_float* hp, t_int n);
t_int* sigsqrt_perf_simd(t_int *w);
t_int* sigrsqrt_perf_simd(t_int *w);
float peakvec_simd(t_float* vec, t_int n, t_float cur_max);
#endif

//#if defined(__GNUC__) && defined(__POWERPC__) && defined(__ALTIVEC__)
#ifdef SIMD_ALTIVEC /* Altivec with GNU C  ( -faltivec must be given as a compiler option! ) */ /* ought to add this in configure.in */
    #include "m_simd_ve_gcc.h"
#endif

#ifdef DONTUSESIMD
    /* This is used when there's no implementation of SIMD code for the current platform and/or compiler */
    /* These are the functions that can be coded for SIMD */
    #define zero_perf_simd          zero_perf8
    #define copy_perf_simd          copy_perf8
    #define sig_tilde_perf_simd     sig_tilde_perf8
    #define sigwrap_perf_simd       sigwrap_perform
    #define line_tilde_slope_simd   line_tilde_slope
    #define env_tilde_accum_simd    env_tilde_accum_8 /* it's a bad place to set that here since there's no public prototype */
    #define plus_perf_simd          plus_perf8
    #define scalarplus_perf_simd    scalarplus_perf8
    #define minus_perf_simd         minus_perf8
    #define scalarminus_perf_simd   scalarminus_perf8
    #define times_perf_simd         times_perf8
    #define scalartimes_perf_simd   scalartimes_perf8
    #define sqr_perf_simd           sqr_perf8
    #define over_perf_simd          over_perf8
    #define scalarover_perf_simd    scalarover_perf8
    #define min_perf_simd           min_perf8
    #define scalarmin_perf_simd     scalarmin_perf8
    #define max_perf_simd           max_perf8
    #define scalarmax_perf_simd     scalarmax_perf8
    #define clip_perf_simd          clip_perform  /* SIMD not implemented */
    #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
    #define sigsqrt_perf_simd       sigsqrt_perform  /* SIMD not implemented */
    #define sigrsqrt_perf_simd      sigrsqrt_perform /* SIMD not implemented */
    #define peakvec_simd            peakvec
    /* #define sum_vecsimd                sumvec_8 */
#endif

/* check if n meets the requirements for SIMD codelets */
#define SIMD_CHKCNT(n) ( ((n)&(SIMD_BLOCK-1)) == 0 )
/* check if a pointer is correctly aligned for SIMD codelets */
#define SIMD_CHKALIGN(ptr) ( ((size_t)(ptr) & (SIMD_BYTEALIGN-1)) == 0 )
/* check n and 1 pointer at once */
#define SIMD_CHECK1(n,ptr1) (SIMD_CHKCNT(n) && SIMD_CHKALIGN(ptr1) && simd_runtime_check())
/* check n and 2 pointers at once */
#define SIMD_CHECK2(n,ptr1,ptr2) (SIMD_CHKCNT(n) && SIMD_CHKALIGN(ptr1) && SIMD_CHKALIGN(ptr2) && simd_runtime_check())
/* check n and 3 pointers at once */
#define SIMD_CHECK3(n,ptr1,ptr2,ptr3) (SIMD_CHKCNT(n) && SIMD_CHKALIGN(ptr1) && SIMD_CHKALIGN(ptr2) && SIMD_CHKALIGN(ptr3) && simd_runtime_check())

/* T.Grill - bit alignment for signal vectors (must be a multiple of 8!) */
/* if undefined no alignment occurs */
#ifdef SIMD_BYTEALIGN
    #define VECTORALIGNMENT (SIMD_BYTEALIGN*8)
#else
    #define VECTORALIGNMENT 128
#endif

#endif /* __M_SIMD_H */