aboutsummaryrefslogtreecommitdiff
path: root/system/image/pdp_imageproc_mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'system/image/pdp_imageproc_mmx.c')
-rw-r--r--system/image/pdp_imageproc_mmx.c594
1 files changed, 594 insertions, 0 deletions
diff --git a/system/image/pdp_imageproc_mmx.c b/system/image/pdp_imageproc_mmx.c
new file mode 100644
index 0000000..67f6e77
--- /dev/null
+++ b/system/image/pdp_imageproc_mmx.c
@@ -0,0 +1,594 @@
+/*
+ * Pure Data Packet. c wrapper for mmx image processing routines.
+ * Copyright (c) by Tom Schouten <pdp@zzz.kotnet.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+/* this is a c wrapper around platform specific (mmx) code */
+#include <stdlib.h>
+#include <math.h>
+#include "pdp_mmx.h"
+#include "pdp_imageproc.h"
+
+/* pdp memory alloc/dealloc prototype */
+void *pdp_alloc(int size);
+void pdp_dealloc(void *);
+
+// utility stuff
+inline static s16 float2fixed(float f)
+{
+ if (f > 1) f = 1;
+ if (f < -1) f = -1;
+ f *= 0x7fff;
+ return (s16)f;
+}
+
+inline static void setvec(s16 *v, float f)
+{
+ s16 a = float2fixed(f);
+ v[0] = a;
+ v[1] = a;
+ v[2] = a;
+ v[3] = a;
+}
+
+
+
+// add two images
+void pdp_imageproc_add_process(void *x, u32 width, u32 height, s16 *image, s16 *image2)
+{
+ unsigned int totalnbpixels = width * height;
+ pixel_add_s16(image, image2, totalnbpixels>>2);
+}
+
+// mul two images
+void pdp_imageproc_mul_process(void *x, u32 width, u32 height, s16 *image, s16 *image2)
+{
+ unsigned int totalnbpixels = width * height;
+ pixel_mul_s16(image, image2, totalnbpixels>>2);
+}
+
+
+// mix 2 images
+#define MIX_SIZE 8*sizeof(s16)
+void *pdp_imageproc_mix_new(void){return pdp_alloc(MIX_SIZE);}
+int pdp_imageproc_mix_nb_stackwords(void){return PDP_IMAGEPROC_NB_STACKWORDS(MIX_SIZE);}
+void pdp_imageproc_mix_delete(void *x) {pdp_dealloc (x);}
+void pdp_imageproc_mix_setleftgain(void *x, float gain){setvec((s16 *)x, gain);}
+void pdp_imageproc_mix_setrightgain(void *x, float gain){setvec((s16 *)x + 4, gain);}
+void pdp_imageproc_mix_process(void *x, u32 width, u32 height, s16 *image, s16 *image2)
+{
+ s16 *d = (s16 *)x;
+ unsigned int totalnbpixels = width * height;
+ pixel_mix_s16(image, image2, totalnbpixels>>2, d, d+4);
+}
+
+
+// random mix 2 images
+#define RANDMIX_SIZE 8*sizeof(s16)
+void *pdp_imageproc_randmix_new(void){return pdp_alloc(RANDMIX_SIZE);}
+int pdp_imageproc_randmix_nb_stackwords(void) {return PDP_IMAGEPROC_NB_STACKWORDS(RANDMIX_SIZE);}
+void pdp_imageproc_randmix_delete(void *x) {pdp_dealloc (x);}
+void pdp_imageproc_randmix_setthreshold(void *x, float threshold){setvec((s16 *)x, 2*threshold-1);}
+void pdp_imageproc_randmix_setseed(void *x, float seed)
+{
+ s16 *d = (s16 *)x;
+ srandom((u32)seed);
+ d[4] = (s16)random();
+ d[5] = (s16)random();
+ d[6] = (s16)random();
+ d[7] = (s16)random();
+
+}
+void pdp_imageproc_randmix_process(void *x, u32 width, u32 height, s16 *image, s16 *image2)
+{
+ s16 *d = (s16 *)x;
+ unsigned int totalnbpixels = width * height;
+ pixel_randmix_s16(image, image2, totalnbpixels>>2, d+4, d);
+}
+
+
+// 3x1 or 1x3 in place convolution
+// orientation
+typedef struct
+{
+ s16 min1[4];
+ s16 zero[4];
+ s16 plus1[4];
+ s16 border[4];
+ u32 orientation;
+ u32 nbpasses;
+} t_conv;
+void *pdp_imageproc_conv_new(void){return(pdp_alloc(sizeof(t_conv)));}
+void pdp_imageproc_conv_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_conv_setmin1(void *x, float val){setvec(((t_conv *)x)->min1, val);}
+void pdp_imageproc_conv_setzero(void *x, float val){setvec(((t_conv *)x)->zero, val);}
+void pdp_imageproc_conv_setplus1(void *x, float val){setvec(((t_conv *)x)->plus1, val);}
+void pdp_imageproc_conv_setbordercolor(void *x, float val){setvec(((t_conv *)x)->border, val);}
+void pdp_imageproc_conv_setorientation(void *x, u32 val){((t_conv *)x)->orientation = val;}
+void pdp_imageproc_conv_setnbpasses(void *x, u32 val){((t_conv *)x)->nbpasses = val;}
+void pdp_imageproc_conv_process(void *x, u32 width, u32 height, s16 *image)
+
+{
+ t_conv *d = (t_conv *)x;
+
+ u32 orientation = d->orientation;
+ u32 nbp = d->nbpasses;
+ u32 i,j;
+
+ if (orientation == PDP_IMAGEPROC_CONV_HORIZONTAL)
+ {
+ for(i=0; i<width*height; i+=width)
+ for (j=0; j<nbp; j++)
+ pixel_conv_hor_s16(image+i, width>>2, d->border, d->min1);
+ }
+
+ else
+ {
+ for (j=0; j<nbp; j++)
+ for(i=0; i<width; i +=4) pixel_conv_ver_s16(image+i, height, width, d->border, d->min1);
+ }
+
+
+
+}
+
+// apply a gain to an image
+void *pdp_imageproc_gain_new(void){return(pdp_alloc(8*sizeof(s16)));}
+void pdp_imageproc_gain_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_gain_setgain(void *x, float gain)
+{
+ /* convert float to s16 + shift */
+ s16 *d = (s16 *)x;
+ s16 g;
+ int i;
+ float sign;
+ int shift = 0;
+
+ sign = (gain < 0) ? -1 : 1;
+ gain *= sign;
+
+ /* max shift = 16 */
+ for(i=0; i<=16; i++){
+ if (gain < 0x4000){
+ gain *= 2;
+ shift++;
+ }
+ else break;
+ }
+
+ gain *= sign;
+ g = (s16) gain;
+
+ //g = 0x4000;
+ //shift = 14;
+
+ d[0]=g;
+ d[1]=g;
+ d[2]=g;
+ d[3]=g;
+ d[4]=(s16)shift;
+ d[5]=0;
+ d[6]=0;
+ d[7]=0;
+}
+void pdp_imageproc_gain_process(void *x, u32 width, u32 height, s16 *image)
+{
+ s16 *d = (s16 *)x;
+ pixel_gain_s16(image, (width*height)>>2, d, (u64 *)(d+4));
+}
+
+// colour rotation for 2 colour planes
+void *pdp_imageproc_crot2d_new(void){return pdp_alloc(16*sizeof(s16));}
+void pdp_imageproc_crot2d_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_crot2d_setmatrix(void *x, float *matrix)
+{
+ s16 *d = (s16 *)x;
+ setvec(d, matrix[0]);
+ setvec(d+4, matrix[1]);
+ setvec(d+8, matrix[2]);
+ setvec(d+12, matrix[3]);
+}
+void pdp_imageproc_crot2d_process(void *x, s16 *image, u32 width, u32 height)
+{
+ s16 *d = (s16 *)x;
+ pixel_crot2d_s16(image, width*height >> 2, d);
+}
+
+// biquad and biquad time
+typedef struct
+{
+ s16 ma1[4];
+ s16 ma2[4];
+ s16 b0[4];
+ s16 b1[4];
+ s16 b2[4];
+ s16 u0[4];
+ s16 u1[4];
+ s16 u0_save[4];
+ s16 u1_save[4];
+ u32 nbpasses;
+ u32 direction;
+} t_bq;
+
+void *pdp_imageproc_bq_new(void){return pdp_alloc(sizeof(t_bq));}
+void pdp_imageproc_bq_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_bq_setcoef(void *x, float *coef) // a0,-a1,-a2,b0,b1,b2,u0,u1
+{
+ t_bq *d = (t_bq *)x;
+ float ia0 = 1.0f / coef[0];
+
+ /* all coefs are s1.14 fixed point */
+ /* representing values -2 < x < 2 */
+ /* so scale down before using the ordinary s0.15 float->fixed routine */
+
+ ia0 *= 0.5f;
+
+ // coef
+ setvec(d->ma1, ia0*coef[1]);
+ setvec(d->ma2, ia0*coef[2]);
+ setvec(d->b0, ia0*coef[3]);
+ setvec(d->b1, ia0*coef[4]);
+ setvec(d->b2, ia0*coef[5]);
+
+ // state to reset too
+ setvec(d->u0_save, coef[6]);
+ setvec(d->u1_save, coef[7]);
+
+}
+void pdp_imageproc_bq_setnbpasses(void *x, u32 nbpasses){((t_bq *)x)->nbpasses = nbpasses;}
+void pdp_imageproc_bq_setdirection(void *x, u32 direction){((t_bq *)x)->direction = direction;}
+void pdp_imageproc_bq_process(void *x, u32 width, u32 height, s16* image);
+
+
+void pdp_imageproc_bqt_process(void *x, u32 width, u32 height, s16 *image, s16 *state0, s16 *state1)
+{
+ s16 *d = (s16 *)x;
+ pixel_biquad_time_s16(image, state0, state1, d, (width*height)>>2);
+}
+
+void pdp_imageproc_bq_process(void *x, u32 width, u32 height, s16 *image)
+{
+ t_bq *d = (t_bq *)x;
+ s16 *c = d->ma1; /* coefs */
+ s16 *s = d->u0; /* state */
+ u32 direction = d->direction;
+ u32 nbp = d->nbpasses;
+ unsigned int i,j;
+
+
+
+ /* VERTICAL */
+
+ if ((direction & PDP_IMAGEPROC_BIQUAD_TOP2BOTTOM)
+ && (direction & PDP_IMAGEPROC_BIQUAD_BOTTOM2TOP)){
+
+ for(i=0; i<width; i +=4){
+ for (j=0; j<nbp; j++){
+ pixel_biquad_vertb_s16(image+i, height>>2, width, c, s);
+ pixel_biquad_verbt_s16(image+i, height>>2, width, c, s);
+ }
+ }
+ }
+
+ else if (direction & PDP_IMAGEPROC_BIQUAD_TOP2BOTTOM){
+ for(i=0; i<width; i +=4){
+ for (j=0; j<nbp; j++){
+ pixel_biquad_vertb_s16(image+i, height>>2, width, c, s);
+ }
+ }
+ }
+
+ else if (direction & PDP_IMAGEPROC_BIQUAD_BOTTOM2TOP){
+ for(i=0; i<width; i +=4){
+ for (j=0; j<nbp; j++){
+ pixel_biquad_verbt_s16(image+i, height>>2, width, c, s);
+ }
+ }
+ }
+
+ /* HORIZONTAL */
+
+ if ((direction & PDP_IMAGEPROC_BIQUAD_LEFT2RIGHT)
+ && (direction & PDP_IMAGEPROC_BIQUAD_RIGHT2LEFT)){
+
+ for(i=0; i<(width*height); i +=(width<<2)){
+ for (j=0; j<nbp; j++){
+ pixel_biquad_horlr_s16(image+i, width>>2, width, c, s);
+ pixel_biquad_horrl_s16(image+i, width>>2, width, c, s);
+ }
+ }
+ }
+
+ else if (direction & PDP_IMAGEPROC_BIQUAD_LEFT2RIGHT){
+ for(i=0; i<(width*height); i +=(width<<2)){
+ for (j=0; j<nbp; j++){
+ pixel_biquad_horlr_s16(image+i, width>>2, width, c, s);
+ }
+ }
+ }
+
+ else if (direction & PDP_IMAGEPROC_BIQUAD_RIGHT2LEFT){
+ for(i=0; i<(width*height); i +=(width<<2)){
+ for (j=0; j<nbp; j++){
+ pixel_biquad_horrl_s16(image+i, width>>2, width, c, s);
+ }
+ }
+ }
+
+}
+
+// produce a random image
+// note: random number generator can be platform specific
+// however, it should be seeded. (same seed produces the same result)
+void *pdp_imageproc_random_new(void){return pdp_alloc(4*sizeof(s16));}
+void pdp_imageproc_random_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_random_setseed(void *x, float seed)
+{
+ s16 *d = (s16 *)x;
+ srandom((u32)seed);
+ d[0] = (s16)random();
+ d[1] = (s16)random();
+ d[2] = (s16)random();
+ d[3] = (s16)random();
+
+}
+void pdp_imageproc_random_process(void *x, u32 width, u32 height, s16 *image)
+{
+ s16 *d = (s16 *)x;
+ unsigned int totalnbpixels = width * height;
+ pixel_rand_s16(image, totalnbpixels>>2, d);
+}
+
+
+/* resampling stuff
+ this is quite a zoo of data structures
+ the major point is this: the resampler mmx code is shared for all resampling code
+ it uses data specified in t_resample_cbrd (Cooked Bilinear Resampler Data)
+
+ then the there are several feeder algorithms. one is the linear mapper. it's
+ data is specified in t_resample_clrd (Cooked Linear Remapper Data)
+
+ for each feeder algorithm, there are several high level algorithms. like zoom,
+ rotate, ...
+*/
+
+typedef struct
+{
+ u32 lineoffset;
+ s16 *image;
+ u32 width;
+ u32 height;
+
+} t_resample_id; // Image Data
+
+/* initialize image meta data (dimensions + location) */
+static void pdp_imageproc_resample_init_id(t_resample_id *x, u32 offset, s16* image, u32 w, u32 h)
+{
+ x->lineoffset = offset;
+ x->image = image;
+ x->width = w;
+ x->height = h;
+}
+
+// mmx resampling source image resampling data + coefs
+typedef struct
+{
+ // vector data for resampling routine (resampling computation)
+ u8 reserved[0x60]; //internal data
+ s16 *address[2]; //64 bit splatted offset address
+ s16 twowidthm1[4]; //64 bit splatted 2*(width-1)
+ s16 twoheightm1[4]; //64 bit splatted 2*(height-1)
+ s16 lineoffset[4]; //64 bit splatted line offset in pixels
+
+} t_resample_cid; // Cooked Image Data
+
+/* convert image meta data into a cooked format used by the resampler routine */
+static void pdp_imageproc_resample_init_cid(t_resample_cid *r, t_resample_id *i)
+{
+ u32 twowm1 = (i->width-1)<<1;
+ u32 twohm1 = (i->height-1)<<1;
+ r->address[0] = i->image;
+ r->address[1] = i->image;
+ r->twowidthm1[0] = twowm1;
+ r->twowidthm1[1] = twowm1;
+ r->twowidthm1[2] = twowm1;
+ r->twowidthm1[3] = twowm1;
+ r->twoheightm1[0] = twohm1;
+ r->twoheightm1[1] = twohm1;
+ r->twoheightm1[2] = twohm1;
+ r->twoheightm1[3] = twohm1;
+ r->lineoffset[0] = i->lineoffset;
+ r->lineoffset[1] = i->lineoffset;
+ r->lineoffset[2] = i->lineoffset;
+ r->lineoffset[3] = i->lineoffset;
+}
+
+// linear mapping data struct (zoom, scale, rotate, shear, ...)
+typedef struct
+{
+ s32 rowstatex[2]; // row state x coord
+ s32 rowstatey[2]; // row state y coord
+ s32 colstatex[2]; // column state x coord
+ s32 colstatey[2]; // column state y coord
+ s32 rowincx[2]; // row inc vector x coord
+ s32 rowincy[2]; // row inc vector y coord
+ s32 colincx[2]; // column inc vector x coord
+ s32 colincy[2]; // column inc vector y coord
+} t_resample_clmd; // Cooked Linear Mapping Data
+
+/* convert incremental linear remapping vectors to internal cooked format */
+static void pdp_imageproc_resample_cookedlinmap_init(t_resample_clmd *l, s32 sx, s32 sy, s32 rix, s32 riy, s32 cix, s32 ciy)
+{
+ l->colstatex[0] = l->rowstatex[0] = sx;
+ l->colstatex[1] = l->rowstatex[1] = sx + rix;
+ l->colstatey[0] = l->rowstatey[0] = sy;
+ l->colstatey[1] = l->rowstatey[1] = sy + riy;
+ l->rowincx[0] = rix << 1;
+ l->rowincx[1] = rix << 1;
+ l->rowincy[0] = riy << 1;
+ l->rowincy[1] = riy << 1;
+ l->colincx[0] = cix;
+ l->colincx[1] = cix;
+ l->colincy[0] = ciy;
+ l->colincy[1] = ciy;
+}
+
+
+/* this struct contains all the data necessary for
+ bilin interpolation from src -> dst image
+ (src can be == dst) */
+typedef struct
+{
+ t_resample_cid csrc; //cooked src image meta data for bilinear interpolator
+ t_resample_id src; //src image meta
+ t_resample_id dst; //dst image meta
+} t_resample_cbrd; //Bilinear Resampler Data
+
+
+/* this struct contains high level zoom parameters,
+ all image relative */
+typedef struct
+{
+ float centerx;
+ float centery;
+ float zoomx;
+ float zoomy;
+ float angle;
+} t_resample_zrd;
+
+
+/* convert floating point center and zoom data to incremental linear remapping vectors */
+static void pdp_imageproc_resample_clmd_init_from_id_zrd(t_resample_clmd *l, t_resample_id *i, t_resample_zrd *z)
+{
+ double izx = 1.0f / (z->zoomx);
+ double izy = 1.0f / (z->zoomy);
+ double scale = (double)0xffffffff;
+ double scalew = scale / ((double)(i->width - 1));
+ double scaleh = scale / ((double)(i->height - 1));
+ double cx = ((double)z->centerx) * ((double)(i->width - 1));
+ double cy = ((double)z->centery) * ((double)(i->height - 1));
+ double angle = z->angle * (-M_PI / 180.0);
+ double c = cos(angle);
+ double s = sin(angle);
+
+ /* affine x, y mappings in screen coordinates */
+ double mapx(double x, double y){return cx + izx * ( c * (x-cx) + s * (y-cy));}
+ double mapy(double x, double y){return cy + izy * (-s * (x-cx) + c * (y-cy));}
+
+ u32 tl_x = (u32)(scalew * mapx(0,0));
+ u32 tl_y = (u32)(scaleh * mapy(0,0));
+
+
+ u32 row_inc_x = (u32)(scalew * (mapx(1,0)-mapx(0,0)));
+ u32 row_inc_y = (u32)(scaleh * (mapy(1,0)-mapy(0,0)));
+ u32 col_inc_x = (u32)(scalew * (mapx(0,1)-mapx(0,0)));
+ u32 col_inc_y = (u32)(scaleh * (mapy(0,1)-mapy(0,0)));
+
+
+ pdp_imageproc_resample_cookedlinmap_init(l, tl_x, tl_y, row_inc_x, row_inc_y, col_inc_x, col_inc_y);
+}
+
+/* this struct contains all data for the zoom object */
+typedef struct
+{
+ t_resample_cbrd cbrd; // Bilinear Resampler Data
+ t_resample_clmd clmd; // Cooked Linear Mapping data
+ t_resample_zrd zrd; // Zoom / Rotate Data
+} t_resample_zoom_rotate;
+
+// zoom + rotate
+void *pdp_imageproc_resample_affinemap_new(void)
+{
+ t_resample_zoom_rotate *z = (t_resample_zoom_rotate *)pdp_alloc(sizeof(t_resample_zoom_rotate));
+ z->zrd.centerx = 0.5;
+ z->zrd.centery = 0.5;
+ z->zrd.zoomx = 1.0;
+ z->zrd.zoomy = 1.0;
+ z->zrd.angle = 0.0f;
+ return (void *)z;
+}
+void pdp_imageproc_resample_affinemap_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_resample_affinemap_setcenterx(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.centerx = f;}
+void pdp_imageproc_resample_affinemap_setcentery(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.centery = f;}
+void pdp_imageproc_resample_affinemap_setzoomx(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.zoomx = f;}
+void pdp_imageproc_resample_affinemap_setzoomy(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.zoomy = f;}
+void pdp_imageproc_resample_affinemap_setangle(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.angle = f;}
+void pdp_imageproc_resample_affinemap_process(void *x, u32 width, u32 height, s16 *srcimage, s16 *dstimage)
+{
+ t_resample_zoom_rotate *z = (t_resample_zoom_rotate *)x;
+
+ /* setup resampler image meta data */
+ pdp_imageproc_resample_init_id(&(z->cbrd.src), width, srcimage, width, height);
+ pdp_imageproc_resample_init_id(&(z->cbrd.dst), width, dstimage, width, height);
+ pdp_imageproc_resample_init_cid(&(z->cbrd.csrc),&(z->cbrd.src));
+
+ /* setup linmap data from zoom_rotate parameters */
+ pdp_imageproc_resample_clmd_init_from_id_zrd(&(z->clmd), &(z->cbrd.src), &(z->zrd));
+
+
+ /* call assembler routine */
+ pixel_resample_linmap_s16(z);
+}
+
+
+
+// polynomials
+
+
+typedef struct
+{
+ u32 order;
+ u32 nbpasses;
+ s16 coefs[0];
+} t_cheby;
+
+void *pdp_imageproc_cheby_new(int order)
+{
+ t_cheby *z;
+ int i;
+ if (order < 2) order = 2;
+ z = (t_cheby *)pdp_alloc(sizeof(t_cheby) + (order + 1) * sizeof(s16[4]));
+ z->order = order;
+ setvec(z->coefs + 0*4, 0);
+ setvec(z->coefs + 1*4, 0.25);
+ for (i=2; i<=order; i++) setvec(z->coefs + i*4, 0);
+
+ return z;
+}
+void pdp_imageproc_cheby_delete(void *x){pdp_dealloc(x);}
+void pdp_imageproc_cheby_setcoef(void *x, u32 n, float f)
+{
+ t_cheby *z = (t_cheby *)x;
+ if (n <= z->order){
+ setvec(z->coefs + n*4, f * 0.25); // coefs are in s2.13 format
+ }
+}
+void pdp_imageproc_cheby_setnbpasses(void *x, u32 n){((t_cheby *)x)->nbpasses = n;}
+
+void pdp_imageproc_cheby_process(void *x, u32 width, u32 height, s16 *image)
+{
+ t_cheby *z = (t_cheby *)x;
+ u32 iterations = z->nbpasses;
+ u32 i,j;
+ for (j=0; j < (height*width); j += width)
+ for (i=0; i<iterations; i++)
+ pixel_cheby_s16_3plus(image+j, width>>2, z->order+1, z->coefs);
+
+ //pixel_cheby_s16_3plus(image, (width*height)>>2, z->order+1, z->coefs);
+}