From 7da1d644ff98078ad2a78d940ec991abff440b00 Mon Sep 17 00:00:00 2001
From: Tom Schouten <doelie@users.sourceforge.net>
Date: Wed, 5 Feb 2003 06:05:39 +0000
Subject: pdp 0.8.3

svn path=/trunk/externals/pdp/; revision=382
---
 system/mmx/Makefile             |   5 +-
 system/mmx/pixel_cheby_s16.s    |  90 ++++++++++++
 system/mmx/pixel_resample_s16.s | 314 ++++++++++++++++++++++++++++++++++++++++
 system/pdp.c                    |   8 +
 system/pdp_control.c            |   4 +-
 system/pdp_imageproc_mmx.c      | 235 ++++++++++++++++++++++++++++++
 system/pdp_imageproc_portable.c | 171 ++++++++++++++++++++++
 system/pdp_queue.c              |   5 +-
 system/pdp_resample.c           |   8 +-
 9 files changed, 834 insertions(+), 6 deletions(-)
 create mode 100644 system/mmx/pixel_cheby_s16.s
 create mode 100644 system/mmx/pixel_resample_s16.s

(limited to 'system')

diff --git a/system/mmx/Makefile b/system/mmx/Makefile
index 0f8f836..51e5052 100644
--- a/system/mmx/Makefile
+++ b/system/mmx/Makefile
@@ -14,7 +14,9 @@ pixel_biquad_s16.o \
 pixel_ca_s1.o \
 pixel_rand_s16.o \
 pixel_crot_s16.o \
-pixel_gain_s16.o
+pixel_gain_s16.o \
+pixel_resample_s16.o \
+pixel_cheby_s16.o
 
 all:	$(OBJ)
 
@@ -27,3 +29,4 @@ clean:
 	rm -f pdp_mmx.a
 	rm -f pdp_mmx_test
 
+
diff --git a/system/mmx/pixel_cheby_s16.s b/system/mmx/pixel_cheby_s16.s
new file mode 100644
index 0000000..2afe9e2
--- /dev/null
+++ b/system/mmx/pixel_cheby_s16.s
@@ -0,0 +1,90 @@
+#    Pure Data Packet mmx routine.
+#    Copyright (c) by Tom Schouten <pdp@zzz.kotnet.org>
+# 
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+# 
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+# 
+#    You should have received a copy of the GNU General Public License
+#    along with this program; if not, write to the Free Software
+#    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+.globl pixel_cheby_s16_3plus
+.type  pixel_cheby_s16_3plus,@function
+
+# void pixel_cheby_s16(int *buf, int nb_8pixel_vectors, int order+1, short int *coefs)
+
+
+# coefs are s2.13 fixed point (-4->4)
+pixel_cheby_s16_3plus:
+	pushl %ebp
+	movl %esp, %ebp
+	push %esi
+	push %edi
+	push %edx
+
+	movl 8(%ebp),  %esi	# input array
+	movl 12(%ebp), %ecx	# vector count
+	movl 16(%ebp), %eax	# get order+1
+
+	shll $3, %eax
+	movl 20(%ebp), %edx
+	addl %eax, %edx		# edx = coef endx address
+	
+#	jmp skip
+	
+	.align 16
+	.loop_cheby:	
+
+	movl 20(%ebp), %edi	# get coefs
+	movq (%esi), %mm0	# load 4 pixels from memory (mm0 = x)
+	pcmpeqw %mm2, %mm2
+	movq %mm0, %mm1		# mm1 (T_n-1) <- x
+	psrlw $1, %mm2		# mm2 (T_n-2) <- 1
+	
+
+	movq (%edi), %mm4	# mm4 (acc) == a0
+	psraw $1, %mm4		# mm4 == a0/2
+	movq %mm0, %mm5		# mm5 (intermediate)
+	pmulhw 8(%edi), %mm5	# mm5 == (x * a1)/2
+	paddsw %mm5, %mm4	# acc = c0 + c1 x
+	addl $16, %edi
+
+	.loop_cheby_inner:	
+	movq %mm1, %mm3		# mm3 == T_n-1
+	psraw $2, %mm2		# mm2 == T_n-2 / 4
+	pmulhw %mm0, %mm3	# mm3 == (2 x T_n-1) / 4
+	psubsw %mm2, %mm3	# mm3 == (2 x T_n-1 - T_n-2) / 4
+	paddsw %mm3, %mm3
+	paddsw %mm3, %mm3	# mm3 == T_n
+	movq %mm1, %mm2		# mm2 == new T_n-1
+	movq %mm3, %mm1		# mm3 == new T_n-2
+	pmulhw (%edi), %mm3	# mm3 = a_n * T_n / 2
+	paddsw %mm3, %mm4	# accumulate
+	addl $8, %edi
+	cmpl %edx, %edi
+	jne .loop_cheby_inner
+	
+	paddsw %mm4, %mm4	# compensate for 0.125 factor
+	paddsw %mm4, %mm4
+	paddsw %mm4, %mm4
+	movq %mm4, (%esi)	# store result in memory
+	addl $8, %esi		# increment source/dest pointer
+	decl %ecx
+	jnz .loop_cheby		# loop
+
+skip:	
+	emms
+
+	pop %edx
+	pop %edi
+	pop %esi
+	leave
+	ret
+	
diff --git a/system/mmx/pixel_resample_s16.s b/system/mmx/pixel_resample_s16.s
new file mode 100644
index 0000000..3959f9c
--- /dev/null
+++ b/system/mmx/pixel_resample_s16.s
@@ -0,0 +1,314 @@
+	
+
+#interpolation data:
+#* 4 vectors: neighbourhood for samples (TL, TR, BL, BR)
+#* 2 vectors: fractional part (unsigned)
+#* 2 vectors: addresses of pixel blocks
+
+#coord conversion data:
+#1 vector: 32bit splatted address	
+#1 vector: 16bit splatted w-1
+#1 vector: 16bit splatted h-1
+#1 vector: 16bit splatted w (reuse w-1 with add?)
+#1 dword:  32 bit line offset
+
+#coord generation data:	several vectors for parameter update stuff..
+
+#coordinate systems: 16 bit virtual coordinates (signed, center relative)
+#* 2 vectors: virtual coordinates
+#(evt tussenstap + conversie naar 16 bit virtual)
+
+
+#step 1:	generate virtual coords
+
+		
+#step 2:	virtual coords -> block adresses + fractional adresses
+#* mulhigh: real coords (x,y) (center relative)
+#* add center -> unsigned (top left relative)
+#* mullow: fractional part (x_frac, y_frac)
+#* mulhigh, mullow, pack 32bit: y_offset
+#* pack 32bit: x_offset
+#* add, shift, add start address: real addresses
+	
+
+#step3:		data fetch using generated addresses: 
+#		this step would be much simpler in 4x16bit rgba. life's a bitch..
+
+#step4:		billinear interpolation
+
+#stat5:		store
+
+
+
+		# this can be simplified by doing 32 bit unaligned moves
+		# and vector unpacking on the data
+
+	
+
+		# cooked image data structure
+		# pixel environment temp storage
+		TL1 = 0x00
+		TL2 = 0x02
+		TL3 = 0x04
+		TL4 = 0x06
+		TR1 = 0x08
+		TR2 = 0x0A
+		TR3 = 0x0C
+		TR4 = 0x0E
+		BL1 = 0x10
+		BL2 = 0x12
+		BL3 = 0x14
+		BL4 = 0x16
+		BR1 = 0x18
+		BR2 = 0x1A
+		BR3 = 0x1C
+		BR4 = 0x1E
+		# addresses of pixel blocks
+		ADDRESS1  = 0x20
+		ADDRESS2  = 0x24
+		ADDRESS3  = 0x28
+		ADDRESS4  = 0x2C
+
+		# second env + address buffer (testing:	 not used)
+		SECONDBUFFER = 0x30
+	
+		# 32bit splatted bitmap address
+		V2PLANEADDRESS = 0x60
+		# 16bit splatted image constants
+		V4TWOWIDTHM1 = 0x68
+		V4TWOHEIGHTM1 = 0x70
+		V4LINEOFFSET = 0x78
+		# data struct size
+		RESAMPLEDATASIZE = 0x80
+	
+	
+
+		# interpolation routine
+		# input:	%mm0, %mm1 4 x 16bit unsigned top left relative virtual x and y coordinates
+		#		%esi: temp & algo data structure
+
+getpixelsbilin:	psrlw $1, %mm0			# convert to range 0->0x7fff [0,0.5[
+		psrlw $1, %mm1
+		movq %mm0, %mm2
+		movq %mm1, %mm3
+		movq V4TWOWIDTHM1(%esi), %mm4	# 2 * (width - 1)
+		movq V4TWOHEIGHTM1(%esi), %mm5	# 2 * (height - 1)
+		pmulhw %mm5, %mm3		# mm3 == y coord (topleft relative)
+		pmulhw %mm4, %mm2		# mm2 == x coord (topleft relative)
+		pmullw %mm5, %mm1		# mm1 == y frac (unsigned)
+		pmullw %mm4, %mm0		# mm0 == x frac (unsigned)
+
+		movq %mm3, %mm5			# copy y coord 
+		pmullw V4LINEOFFSET(%esi), %mm3	# low part of line offset
+		pmulhw V4LINEOFFSET(%esi), %mm5	# high part of line offset
+
+		movq %mm2, %mm7			# copy x coord vector
+		pxor %mm4, %mm4
+		punpcklwd %mm4, %mm2		# low part in %mm2
+		punpckhwd %mm4, %mm7		# hight part in %mm7
+	
+		movq %mm3, %mm6			# copy
+		punpcklwd %mm5, %mm3		# unpack low part in %mm3
+		punpckhwd %mm5, %mm6		# high part int %mm6
+
+		paddd %mm2, %mm3
+		paddd %mm7, %mm6
+		pslld $1, %mm3			# convert to word adresses
+		pslld $1, %mm6
+
+		paddd V2PLANEADDRESS(%esi), %mm3	# add pixel plane address
+		paddd V2PLANEADDRESS(%esi), %mm6
+
+		movq %mm3, ADDRESS1(%esi)	# store adresses
+		movq %mm6, ADDRESS3(%esi)
+
+		pcmpeqw %mm2, %mm2		# all ones
+		movq %mm0, %mm4			# copy x frac
+		movq %mm1, %mm5			# copy y frac
+		pxor %mm2, %mm4			# compute compliment (approx negative)
+		pxor %mm2, %mm5
+
+		psrlw $1, %mm0			# shift right (0.5 * (frac x)
+		psrlw $1, %mm1			# shift right (0.5 * (frac y)
+		psrlw $1, %mm4			# shift right (0.5 * (1 - frac x)
+		psrlw $1, %mm5			# shift right (0.5 * (1 - frac y)
+
+		movq %mm0, %mm2			# copy of frac x
+		movq %mm4, %mm3			# copy of (1-frac x)
+						# fetch data
+
+		#jmp skipfetch			# seems the fetch is the real killer. try to optimize this
+						# using 32 bit accesses & shifts
+
+						# the src image data struct is padded to the cooked data struct
+		movl RESAMPLEDATASIZE(%esi), %edi
+		shll $1, %edi
+
+		movl ADDRESS1(%esi), %ecx 
+		movl ADDRESS2(%esi), %edx
+	
+		movw (%ecx), %ax
+		movw (%edx), %bx
+		movw %ax, TL1(%esi)
+		movw %bx, TL2(%esi)
+		movw 2(%ecx), %ax
+		movw 2(%edx), %bx
+		movw %ax, TR1(%esi)
+		movw %bx, TR2(%esi)
+
+		addl %edi, %ecx
+		addl %edi, %edx
+
+		movw (%ecx), %ax
+		movw (%edx), %bx
+		movw %ax, BL1(%esi)
+		movw %bx, BL2(%esi)
+		movw 2(%ecx), %ax
+		movw 2(%edx), %bx
+		movw %ax, BR1(%esi)
+		movw %bx, BR2(%esi)
+
+		
+		movl ADDRESS3(%esi), %ecx 
+		movl ADDRESS4(%esi), %edx
+
+
+		movw (%ecx), %ax
+		movw (%edx), %bx
+		movw %ax, TL3(%esi)
+		movw %bx, TL4(%esi)
+		movw 2(%ecx), %ax
+		movw 2(%edx), %bx
+		movw %ax, TR3(%esi)
+		movw %bx, TR4(%esi)
+	
+		addl %edi, %ecx
+		addl %edi, %edx
+
+		movw (%ecx), %ax
+		movw (%edx), %bx
+		movw %ax, BL3(%esi)
+		movw %bx, BL4(%esi)
+		movw 2(%ecx), %ax
+		movw 2(%edx), %bx
+		movw %ax, BR3(%esi)
+		movw %bx, BR4(%esi)
+
+	
+skipfetch:	
+		pmulhw TL1(%esi), %mm4		# bilin interpolation
+		pmulhw TR1(%esi), %mm0
+		pmulhw BL1(%esi), %mm3
+		pmulhw BR1(%esi), %mm2
+
+
+		paddw %mm4, %mm0
+		paddw %mm3, %mm2
+
+		pmulhw %mm5, %mm0
+		pmulhw %mm1, %mm2
+
+		paddw %mm2, %mm0
+		psllw $2, %mm0			# compensate for gain reduction
+
+		ret
+
+
+		// linear mapping data struct
+		ROWSTATEX = 0x0
+		ROWSTATEY = 0x8
+		COLSTATEX = 0x10
+		COLSTATEY = 0x18
+		ROWINCX = 0x20		
+		ROWINCY = 0x28
+		COLINCX = 0x30		
+		COLINCY = 0x38
+
+		// image data struct
+		LINEOFFSET = 0x0
+		IMAGEADDRESS = 0x4
+		WIDTH = 0x8
+		HEIGHT = 0xC
+		IMAGEDATASIZE = 0x10
+		
+
+
+# pixel_resample_linmap_s16(void *x)		
+.globl pixel_resample_linmap_s16
+.type  pixel_resample_linmap_s16,@function
+
+		SOURCEIMAGE = RESAMPLEDATASIZE
+		DESTIMAGE = SOURCEIMAGE + IMAGEDATASIZE
+		LINMAPDATA = DESTIMAGE + IMAGEDATASIZE
+	
+pixel_resample_linmap_s16:	
+		pushl %ebp
+		movl %esp, %ebp
+		pushl %esi
+		pushl %edi
+		pushl %ebx
+
+
+		movl 8(%ebp),  %esi			# get data struct
+		movl DESTIMAGE+HEIGHT(%esi), %edx	# image height
+		movl DESTIMAGE+IMAGEADDRESS(%esi), %edi # dest image address
+		movl DESTIMAGE+WIDTH(%esi), %ecx	# image width
+		shrl $2, %ecx				# vector count
+		.align 16
+	
+linmap_looprow:
+		movq LINMAPDATA+ROWSTATEX(%esi), %mm0	# get current coordinates
+		movq LINMAPDATA+ROWSTATEY(%esi), %mm1
+
+linmap_loopcol:		
+		movq %mm0, %mm4				# copy
+		movq %mm1, %mm5
+		paddd LINMAPDATA+ROWINCX(%esi), %mm4	# increment
+		paddd LINMAPDATA+ROWINCY(%esi), %mm5
+		movq %mm4, %mm6				# copy
+		movq %mm5, %mm7	
+		paddd LINMAPDATA+ROWINCX(%esi), %mm6	# increment
+		paddd LINMAPDATA+ROWINCY(%esi), %mm7
+		movq %mm6, LINMAPDATA+ROWSTATEX(%esi)	# store next state
+		movq %mm7, LINMAPDATA+ROWSTATEY(%esi) 
+
+		psrad $16, %mm0				# round to 16 bit
+		psrad $16, %mm1
+		psrad $16, %mm4
+		psrad $16, %mm5
+		packssdw %mm4, %mm0			# pack new coordinates
+		packssdw %mm5, %mm1
+	
+		push %ecx
+		push %edx
+		push %edi
+	
+		call getpixelsbilin			# do interpolation
+
+		pop %edi
+		pop %edx
+		pop %ecx
+		movq %mm0, (%edi)			# store 4 pixels
+		addl $0x8, %edi				# point to next 4 pixels
+		decl %ecx				# dec row counter
+		jnz linmap_looprow
+
+		movq LINMAPDATA+COLSTATEX(%esi), %mm0	# get column state vector
+		movq LINMAPDATA+COLSTATEY(%esi), %mm1
+		movl DESTIMAGE+WIDTH(%esi), %ecx	# image width
+		shrl $2, %ecx				# vector count
+		paddd LINMAPDATA+COLINCX(%esi), %mm0	# increment
+		paddd LINMAPDATA+COLINCY(%esi), %mm1
+		movq %mm0, LINMAPDATA+COLSTATEX(%esi)	# store
+		movq %mm1, LINMAPDATA+COLSTATEY(%esi)
+		decl %edx				# dec column counter
+		jnz linmap_loopcol
+		
+		emms
+		popl %ebx
+		popl %edi
+		popl %esi
+		leave
+		ret
+
+
diff --git a/system/pdp.c b/system/pdp.c
index 8651971..e3c311b 100644
--- a/system/pdp.c
+++ b/system/pdp.c
@@ -62,6 +62,10 @@ void pdp_chrot_setup(void);
 void pdp_scope_setup(void);
 void pdp_scale_setup(void);
 void pdp_zoom_setup(void);
+void pdp_scan_setup(void);
+void pdp_sdl_setup(void);
+void pdp_cheby_setup(void);
+
 
 
 /* library setup routine */
@@ -107,6 +111,10 @@ void pdp_setup(void){
     pdp_scope_setup();
     pdp_scale_setup();
     pdp_zoom_setup();
+    pdp_scan_setup();
+    pdp_sdl_setup();
+    pdp_cheby_setup();
+
 
 }
 
diff --git a/system/pdp_control.c b/system/pdp_control.c
index a7ee0c7..1053f6d 100644
--- a/system/pdp_control.c
+++ b/system/pdp_control.c
@@ -64,11 +64,11 @@ static void pdp_control_thread(t_pdp_control *x, t_floatarg f)
     int t = (int)f;
 
     if (t){
-	post("pdp_control: switching on processing in thread");
+	post("pdp_control: pdp is now using its own processing thread");
 	pdp_queue_use_thread(1);
     }
     else {
-	post("pdp_control: switching off processing in thread");
+	post("pdp_control: pdp is now using the main pd thread");
 	pdp_queue_use_thread(0);
     }
 }
diff --git a/system/pdp_imageproc_mmx.c b/system/pdp_imageproc_mmx.c
index 2f32c3f..4c347c6 100644
--- a/system/pdp_imageproc_mmx.c
+++ b/system/pdp_imageproc_mmx.c
@@ -21,8 +21,10 @@
 
 /* this is a c wrapper around platform specific (mmx) code */
 #include <stdlib.h>
+#include <math.h>
 #include "pdp_mmx.h"
 #include "pdp_imageproc.h"
+#include "m_pd.h"
 
 // utility stuff
 inline static s16 float2fixed(float f)
@@ -317,3 +319,236 @@ void pdp_imageproc_random_process(void *x, s16 *image, u32 width, u32 height)
 }
 
 
+/* resampling stuff
+   this is quite a zoo of data structures
+   the major point is this: the resampler mmx code is shared for all resampling code
+   it uses data specified in t_resample_cbrd (Cooked Bilinear Resampler Data)
+
+   then the there are several feeder algorithms. one is the linear mapper. it's
+   data is specified in t_resample_clrd (Cooked Linear Remapper Data)
+
+   for each feeder algorithm, there are several high level algorithms. like zoom,
+   rotate, ... 
+*/
+
+typedef struct
+{
+    u32 lineoffset;
+    s16 *image;
+    u32 width;
+    u32 height;
+    
+} t_resample_id; // Image Data
+
+/* initialize image meta data (dimensions + location) */
+static void pdp_imageproc_resample_init_id(t_resample_id *x, u32 offset, s16* image, u32 w, u32 h)
+{
+    x->lineoffset = offset;
+    x->image = image;
+    x->width = w;
+    x->height = h;
+}
+
+// mmx resampling source image resampling data + coefs
+typedef struct
+{
+    // vector data for resampling routine (resampling computation)
+    u8  reserved[0x60];  //internal data
+    s16 *address[2];     //64 bit splatted offset address
+    s16 twowidthm1[4];   //64 bit splatted 2*(width-1)
+    s16 twoheightm1[4];  //64 bit splatted 2*(height-1)
+    s16 lineoffset[4];   //64 bit splatted line offset in pixels
+
+} t_resample_cid; // Cooked Image Data
+
+/* convert image meta data into a cooked format used by the resampler routine */
+static void pdp_imageproc_resample_init_cid(t_resample_cid *r, t_resample_id *i)
+{
+    u32 twowm1 = (i->width-1)<<1;
+    u32 twohm1 = (i->height-1)<<1;
+    r->address[0] = i->image;
+    r->address[1] = i->image;
+    r->twowidthm1[0] = twowm1;
+    r->twowidthm1[1] = twowm1;
+    r->twowidthm1[2] = twowm1;
+    r->twowidthm1[3] = twowm1;
+    r->twoheightm1[0] = twohm1;
+    r->twoheightm1[1] = twohm1;
+    r->twoheightm1[2] = twohm1;
+    r->twoheightm1[3] = twohm1;
+    r->lineoffset[0] = i->lineoffset;
+    r->lineoffset[1] = i->lineoffset;
+    r->lineoffset[2] = i->lineoffset;
+    r->lineoffset[3] = i->lineoffset;
+}
+
+// linear mapping data struct (zoom, scale, rotate, shear, ...)
+typedef struct
+{
+    s32 rowstatex[2]; // row state x coord
+    s32 rowstatey[2]; // row state y coord
+    s32 colstatex[2]; // column state x coord
+    s32 colstatey[2]; // column state y coord
+    s32 rowincx[2];   // row inc vector x coord
+    s32 rowincy[2];   // row inc vector y coord
+    s32 colincx[2];   // column inc vector x coord
+    s32 colincy[2];   // column inc vector y coord
+} t_resample_clmd; // Cooked Linear Mapping Data
+
+/* convert incremental linear remapping vectors to internal cooked format */
+static void pdp_imageproc_resample_cookedlinmap_init(t_resample_clmd *l, s32 sx, s32 sy, s32 rix, s32 riy, s32 cix, s32 ciy)
+{
+    l->colstatex[0] = l->rowstatex[0] = sx;
+    l->colstatex[1] = l->rowstatex[1] = sx + rix;
+    l->colstatey[0] = l->rowstatey[0] = sy;
+    l->colstatey[1] = l->rowstatey[1] = sy + riy;
+    l->rowincx[0] = rix << 1;
+    l->rowincx[1] = rix << 1;
+    l->rowincy[0] = riy << 1;
+    l->rowincy[1] = riy << 1;
+    l->colincx[0] = cix;
+    l->colincx[1] = cix;
+    l->colincy[0] = ciy;
+    l->colincy[1] = ciy;
+}
+
+
+/* this struct contains all the data necessary for
+   bilin interpolation from src -> dst image
+   (src can be == dst) */
+typedef struct
+{
+    t_resample_cid csrc;     //cooked src image meta data for bilinear interpolator
+    t_resample_id src;       //src image meta
+    t_resample_id dst;       //dst image meta
+} t_resample_cbrd;            //Bilinear Resampler Data
+
+
+/* this struct contains high level zoom parameters,
+   all image relative */
+typedef struct
+{
+    float centerx;
+    float centery;
+    float zoomx;
+    float zoomy;
+    float angle;
+} t_resample_zrd;
+
+
+/* convert floating point center and zoom data to incremental linear remapping vectors */
+static void pdp_imageproc_resample_clmd_init_from_id_zrd(t_resample_clmd *l, t_resample_id *i, t_resample_zrd *z)
+{
+    double izx = 1.0f / (z->zoomx);
+    double izy = 1.0f / (z->zoomy);
+    double scale = (double)0xffffffff;
+    double scalew = scale / ((double)(i->width - 1));
+    double scaleh = scale / ((double)(i->height - 1));
+    double cx = ((double)z->centerx) * ((double)(i->width - 1));
+    double cy = ((double)z->centery) * ((double)(i->height - 1));
+    double angle = z->angle * (-M_PI / 180.0);
+    double c = cos(angle);
+    double s = sin(angle);
+
+    /* affine x, y mappings in screen coordinates */
+    double mapx(double x, double y){return cx + izx * ( c * (x-cx) + s * (y-cy));}
+    double mapy(double x, double y){return cy + izy * (-s * (x-cx) + c * (y-cy));}
+
+    u32 tl_x = (u32)(scalew * mapx(0,0));
+    u32 tl_y = (u32)(scaleh * mapy(0,0));
+
+
+    u32 row_inc_x = (u32)(scalew * (mapx(1,0)-mapx(0,0)));
+    u32 row_inc_y = (u32)(scaleh * (mapy(1,0)-mapy(0,0)));
+    u32 col_inc_x = (u32)(scalew * (mapx(0,1)-mapx(0,0)));
+    u32 col_inc_y = (u32)(scaleh * (mapy(0,1)-mapy(0,0)));
+
+
+    pdp_imageproc_resample_cookedlinmap_init(l, tl_x, tl_y, row_inc_x, row_inc_y, col_inc_x, col_inc_y);
+}
+
+/* this struct contains all data for the zoom object */
+typedef struct
+{
+    t_resample_cbrd cbrd;      // Bilinear Resampler Data
+    t_resample_clmd clmd;      // Cooked Linear Mapping data
+    t_resample_zrd   zrd;      // Zoom / Rotate Data
+} t_resample_zoom_rotate;
+
+// zoom + rotate
+void *pdp_imageproc_resample_affinemap_new(void)
+{
+    t_resample_zoom_rotate *z = (t_resample_zoom_rotate *)malloc(sizeof(t_resample_zoom_rotate));
+    z->zrd.centerx = 0.5;
+    z->zrd.centery = 0.5;
+    z->zrd.zoomx = 1.0;
+    z->zrd.zoomy = 1.0;
+    z->zrd.angle = 0.0f;
+    return (void *)z;
+}
+void pdp_imageproc_resample_affinemap_delete(void *x){free(x);}
+void pdp_imageproc_resample_affinemap_setcenterx(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.centerx = f;}
+void pdp_imageproc_resample_affinemap_setcentery(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.centery = f;}
+void pdp_imageproc_resample_affinemap_setzoomx(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.zoomx = f;}
+void pdp_imageproc_resample_affinemap_setzoomy(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.zoomy = f;}
+void pdp_imageproc_resample_affinemap_setangle(void *x, float f){((t_resample_zoom_rotate *)x)->zrd.angle = f;}
+void pdp_imageproc_resample_affinemap_process(void *x, s16 *srcimage, s16 *dstimage, u32 width, u32 height)
+{
+    t_resample_zoom_rotate *z = (t_resample_zoom_rotate *)x;
+
+    /* setup resampler image meta data */
+    pdp_imageproc_resample_init_id(&(z->cbrd.src), width, srcimage, width, height);
+    pdp_imageproc_resample_init_id(&(z->cbrd.dst), width, dstimage, width, height);
+    pdp_imageproc_resample_init_cid(&(z->cbrd.csrc),&(z->cbrd.src)); 
+
+    /* setup linmap data from zoom_rotate parameters */
+    pdp_imageproc_resample_clmd_init_from_id_zrd(&(z->clmd), &(z->cbrd.src), &(z->zrd));
+
+
+    /* call assembler routine */
+    pixel_resample_linmap_s16(z);   
+}
+
+
+
+// polynomials
+
+
+typedef struct
+{
+    u32 order;
+    u8 pad[4];
+    s16 coefs[0];
+} t_cheby;
+
+void *pdp_imageproc_cheby_new(int order)
+{
+    t_cheby *z;
+    int i;
+    if (order < 2) order = 2;
+    z = (t_cheby *)malloc(sizeof(t_cheby) + (order + 1) * sizeof(s16[4]));
+    z->order = order;
+    setvec(z->coefs + 0*4, 0);
+    setvec(z->coefs + 1*4, 0.25);
+    for (i=2; i<=order; i++)  setvec(z->coefs + i*4, 0);
+
+    return z;
+}
+void pdp_imageproc_cheby_delete(void *x){free(x);}
+void pdp_imageproc_cheby_setcoef(void *x, u32 n, float f)
+{
+    t_cheby *z = (t_cheby *)x;
+    if (n <= z->order){
+	setvec(z->coefs + n*4, f * 0.25); // coefs are in s2.13 format
+    }
+}
+void pdp_imageproc_cheby_process(void *x, s16 *image, u32 width, u32 height, u32 iterations)
+{
+    t_cheby *z = (t_cheby *)x;
+    u32 i,j;
+    for (j=0; j < (height*width); j += width)
+	for (i=0; i<iterations; i++)
+	    pixel_cheby_s16_3plus(image+j, width>>2, z->order+1, z->coefs);
+
+    //pixel_cheby_s16_3plus(image, (width*height)>>2, z->order+1, z->coefs);
+}
diff --git a/system/pdp_imageproc_portable.c b/system/pdp_imageproc_portable.c
index 60062d6..6feddd3 100644
--- a/system/pdp_imageproc_portable.c
+++ b/system/pdp_imageproc_portable.c
@@ -21,6 +21,7 @@
 
 
 #include <stdlib.h>
+#include <math.h>
 #include "pdp_imageproc.h"
 
 // utility stuff
@@ -490,3 +491,173 @@ void pdp_imageproc_random_process(void *x, s16 *image, u32 width, u32 height)
     
 }
 
+
+
+/* resampling code */
+// zoom + rotate
+
+/* bilinear resampling core routine */
+/* virtual coordinates are the lowest 16 bits in virt_x and virt_y*/
+static inline s32 pdp_resample_bilin(s16 *image, s32 width, s32 height, s32 virt_x, s32 virt_y)
+{
+
+    s32 fp_x, fp_y, frac_x, frac_y, f, offset, r_1, r_2;
+
+    //virt_x &= 0xffff;
+    //virt_y &= 0xffff;
+
+    fp_x = virt_x * (width - 1);
+    fp_y = virt_y * (height - 1);
+
+    frac_x = fp_x & (0xffff);
+    frac_y = fp_y & (0xffff);
+
+    offset = (fp_x >> 16) + (fp_y >> 16) * width;
+    image += offset;
+
+    f = 0x10000 - frac_x;
+
+    r_1 = ((f * (s32)(image[0])  +  frac_x * (s32)(image[1])))>>16;
+
+    image += width;
+
+    r_2 = ((f * (s32)(image[0])  +  frac_x * (s32)(image[1])))>>16;
+
+    f = 0x10000 - frac_y;
+
+    return ((f * r_1 + frac_y * r_2)>>16);
+    
+}
+
+typedef struct
+{
+    float centerx;
+    float centery;
+    float zoomx;
+    float zoomy;
+    float angle;
+} t_affine_map;
+
+
+void *pdp_imageproc_resample_affinemap_new(void)
+{
+
+    t_affine_map *a  = (t_affine_map *)malloc(sizeof(t_affine_map));
+    a->centerx = 0.5;
+    a->centery = 0.5;
+    a->zoomx = 1.0;
+    a->zoomy = 1.0;
+    a->angle = 0.0f;
+    return (void *)a;
+}
+void pdp_imageproc_resample_affinemap_delete(void *x){free(x);}
+void pdp_imageproc_resample_affinemap_setcenterx(void *x, float f){((t_affine_map *)x)->centerx = f;}
+void pdp_imageproc_resample_affinemap_setcentery(void *x, float f){((t_affine_map *)x)->centery = f;}
+void pdp_imageproc_resample_affinemap_setzoomx(void *x, float f){((t_affine_map *)x)->zoomx = f;}
+void pdp_imageproc_resample_affinemap_setzoomy(void *x, float f){((t_affine_map *)x)->zoomy = f;}
+void pdp_imageproc_resample_affinemap_setangle(void *x, float f){((t_affine_map *)x)->angle = f;}
+void pdp_imageproc_resample_affinemap_process(void *x, s16 *src_image, s16 *dst_image, u32 width, u32 height)
+{
+    t_affine_map *a = (t_affine_map *)x;
+    double izx = 1.0f / (a->zoomx);
+    double izy = 1.0f / (a->zoomy);
+    double scale = (double)0xffffffff;
+    double scalew = scale / ((double)(width - 1));
+    double scaleh = scale / ((double)(height - 1));
+    double cx = ((double)a->centerx) * ((double)(width - 1));
+    double cy = ((double)a->centery) * ((double)(height - 1));
+    double angle = a->angle * (-M_PI / 180.0);
+    double c = cos(angle);
+    double s = sin(angle);
+
+    /* affine x, y mappings in screen coordinates */
+    double mapx(double x, double y){return cx + izx * ( c * (x-cx) + s * (y-cy));}
+    double mapy(double x, double y){return cy + izy * (-s * (x-cx) + c * (y-cy));}
+
+    u32 colstate_x = (u32)(scalew * mapx(0,0));
+    u32 colstate_y = (u32)(scaleh * mapy(0,0));
+    u32 rowstate_x = colstate_x;
+    u32 rowstate_y = colstate_y;
+
+    u32 row_inc_x = (u32)(scalew * (mapx(1,0)-mapx(0,0)));
+    u32 row_inc_y = (u32)(scaleh * (mapy(1,0)-mapy(0,0)));
+    u32 col_inc_x = (u32)(scalew * (mapx(0,1)-mapx(0,0)));
+    u32 col_inc_y = (u32)(scaleh * (mapy(0,1)-mapy(0,0)));
+
+    u32 i,j;
+
+    for (j=0; j<height; j++){
+	for (i=0; i<width; i++){
+	    *dst_image++ = pdp_resample_bilin(src_image, width, height, rowstate_x>>16, rowstate_y>>16);
+	    rowstate_x += row_inc_x;
+	    rowstate_y += row_inc_y;
+	}
+	colstate_x += col_inc_x;
+	colstate_y += col_inc_y;
+	rowstate_x = colstate_x;
+	rowstate_y = colstate_y;
+    }
+
+}
+
+
+
+
+
+// polynomials
+
+
+
+
+typedef struct
+{
+    u32 order;
+    s32 coefs[0];
+} t_cheby;
+
+void *pdp_imageproc_cheby_new(int order)
+{
+    t_cheby *z;
+    int i;
+    if (order < 2) order = 2;
+    z = (t_cheby *)malloc(sizeof(t_cheby) + (order + 1) * sizeof(s32));
+    z->order = order;
+    z->coefs[0] = 0;
+    z->coefs[1] = 0x7fff;
+    for (i=2; i<=order; i++) z->coefs[i] = 0;
+    return z;
+}
+void pdp_imageproc_cheby_delete(void *x){free(x);}
+void pdp_imageproc_cheby_setcoef(void *x, u32 n, float f)
+{
+
+    t_cheby *z = (t_cheby *)x;
+    if (n <= z->order){
+	z->coefs[n] = (s32)(f * 32767.0f); // coefs are in s16.15 format
+    }
+
+}
+void pdp_imageproc_cheby_process(void *x, s16 *image, u32 width, u32 height, u32 iterations)
+{
+
+    t_cheby *z = (t_cheby *)x;
+    u32 i,j,k;
+    s32 *c = z->coefs;
+    for (j=0; j < (height*width); j++){
+	s32 acc = (s32)image[j];
+	for (i=0; i<iterations; i++){
+	    s32 T2 = 0x7fff; /* 1 */
+	    s32 T1 = acc;
+	    s32 t;
+	    s32 in = acc;
+	    acc = c[0] + ((in*c[1])>>15);
+	    for (k=2; k<=z->order; k++){
+		t = ((T1*in)>>14) - T2; /* T_n = 2 x T_n-1 - T_n-2 */
+		T2 = T1;
+		T1 = t;
+		acc += ((c[k] * t)>>15);
+	    }
+	}
+	image[j] = (s16)(CLAMP16(acc));
+    }
+}
diff --git a/system/pdp_queue.c b/system/pdp_queue.c
index 2932728..fe3748e 100644
--- a/system/pdp_queue.c
+++ b/system/pdp_queue.c
@@ -297,7 +297,7 @@ void pdp_queue_setup(void)
   curr = 0;
   q = getbytes(PDP_QUEUE_SIZE * sizeof(*q));
 
-  /* use threads by default */
+  /* enable threads */
   use_thread = 1;
 
   /* setup synchro stuff */
@@ -324,6 +324,9 @@ void pdp_queue_setup(void)
 
 
 
+  /* set default disable/enable thread here */
+  pdp_queue_use_thread(0);
+
 }
 
 
diff --git a/system/pdp_resample.c b/system/pdp_resample.c
index 2b5a9de..12bc639 100644
--- a/system/pdp_resample.c
+++ b/system/pdp_resample.c
@@ -31,7 +31,10 @@ i.e. 16 bit virtual coordinates: easy modular addressing
 
 */
 
-s32 pdp_resample_bilin(s16 *image, s32 width, s32 height, s32 virt_x, s32 virt_y)
+
+/* code in this file should go out to be replaced by code in pdp_imageproc */
+
+static s32 pdp_resample_bilin(s16 *image, s32 width, s32 height, s32 virt_x, s32 virt_y)
 {
 
     s32 fp_x, fp_y, frac_x, frac_y, f, offset, r_1, r_2;
@@ -106,6 +109,7 @@ void pdp_resample_scale_nn(s16 *src_image, s16 *dst_image, s32 src_w, s32 src_h,
 
 }
 
+/* USE pdp_resample_affinemap
 void pdp_resample_zoom_tiled_bilin(s16 *src_image, s16 *dst_image, s32 w, s32 h, 
 				   float zoom_x, float zoom_y, float center_x_relative, float center_y_relative)
 {
@@ -132,4 +136,4 @@ void pdp_resample_zoom_tiled_bilin(s16 *src_image, s16 *dst_image, s32 w, s32 h,
     }
 
 }
-
+*/
-- 
cgit v1.2.1