From 9b8745d5250c9d0b60c9aa5a77f58a3fcddf1076 Mon Sep 17 00:00:00 2001 From: Tom Schouten Date: Tue, 21 Jan 2003 10:27:33 +0000 Subject: This commit was generated by cvs2svn to compensate for changes in r352, which included commits to RCS files with non-trunk default branches. svn path=/trunk/externals/pdp/; revision=353 --- system/Makefile | 20 ++ system/Makefile.linux | 2 + system/Makefile.linux_mmx | 4 + system/mmx/Makefile | 29 +++ system/mmx/pdp_mmx_test.c | 62 +++++ system/mmx/pixel_add_s16.s | 55 +++++ system/mmx/pixel_affine_s16.s | 59 +++++ system/mmx/pixel_biquad_dirI_s16.s | 361 +++++++++++++++++++++++++++ system/mmx/pixel_biquad_s16.s | 451 ++++++++++++++++++++++++++++++++++ system/mmx/pixel_ca_s1.s | 189 ++++++++++++++ system/mmx/pixel_cascade_s16.s | 330 +++++++++++++++++++++++++ system/mmx/pixel_conv_hor_s16.s | 134 ++++++++++ system/mmx/pixel_conv_ver_s16.s | 128 ++++++++++ system/mmx/pixel_crot_s16.s | 153 ++++++++++++ system/mmx/pixel_gain.s | 83 +++++++ system/mmx/pixel_gain_s16.s | 71 ++++++ system/mmx/pixel_mix_s16.s | 68 +++++ system/mmx/pixel_mul_s16.s | 56 +++++ system/mmx/pixel_pack_s16u8.s | 126 ++++++++++ system/mmx/pixel_rand_s16.s | 76 ++++++ system/mmx/pixel_randmix_s16.s | 91 +++++++ system/mmx/pixel_s1.s | 201 +++++++++++++++ system/mmx/pixel_unpack_u8s16.s | 113 +++++++++ system/pdp.c | 115 +++++++++ system/pdp_comm.c | 119 +++++++++ system/pdp_control.c | 162 ++++++++++++ system/pdp_imageproc_mmx.c | 319 ++++++++++++++++++++++++ system/pdp_imageproc_portable.c | 492 +++++++++++++++++++++++++++++++++++++ system/pdp_llconv.c | 293 ++++++++++++++++++++++ system/pdp_llconv_mmx.c | 55 +++++ system/pdp_llconv_portable.c | 81 ++++++ system/pdp_packet.c | 239 ++++++++++++++++++ system/pdp_queue.c | 337 +++++++++++++++++++++++++ system/pdp_resample.c | 135 ++++++++++ system/pdp_type.c | 143 +++++++++++ system/pdp_ut.c | 195 +++++++++++++++ 36 files changed, 5547 insertions(+) create mode 100644 system/Makefile create mode 100644 system/Makefile.linux create mode 100644 system/Makefile.linux_mmx create mode 100644 system/mmx/Makefile create mode 100644 system/mmx/pdp_mmx_test.c create mode 100644 system/mmx/pixel_add_s16.s create mode 100644 system/mmx/pixel_affine_s16.s create mode 100644 system/mmx/pixel_biquad_dirI_s16.s create mode 100644 system/mmx/pixel_biquad_s16.s create mode 100644 system/mmx/pixel_ca_s1.s create mode 100644 system/mmx/pixel_cascade_s16.s create mode 100644 system/mmx/pixel_conv_hor_s16.s create mode 100644 system/mmx/pixel_conv_ver_s16.s create mode 100644 system/mmx/pixel_crot_s16.s create mode 100644 system/mmx/pixel_gain.s create mode 100644 system/mmx/pixel_gain_s16.s create mode 100644 system/mmx/pixel_mix_s16.s create mode 100644 system/mmx/pixel_mul_s16.s create mode 100644 system/mmx/pixel_pack_s16u8.s create mode 100644 system/mmx/pixel_rand_s16.s create mode 100644 system/mmx/pixel_randmix_s16.s create mode 100644 system/mmx/pixel_s1.s create mode 100644 system/mmx/pixel_unpack_u8s16.s create mode 100644 system/pdp.c create mode 100644 system/pdp_comm.c create mode 100644 system/pdp_control.c create mode 100644 system/pdp_imageproc_mmx.c create mode 100644 system/pdp_imageproc_portable.c create mode 100644 system/pdp_llconv.c create mode 100644 system/pdp_llconv_mmx.c create mode 100644 system/pdp_llconv_portable.c create mode 100644 system/pdp_packet.c create mode 100644 system/pdp_queue.c create mode 100644 system/pdp_resample.c create mode 100644 system/pdp_type.c create mode 100644 system/pdp_ut.c (limited to 'system') diff --git a/system/Makefile b/system/Makefile new file mode 100644 index 0000000..acdb944 --- /dev/null +++ b/system/Makefile @@ -0,0 +1,20 @@ +target: all_objects + +include ../Makefile.config +include Makefile.$(PDP_TARGET) + + + +OBJECTS = pdp.o pdp_ut.o pdp_packet.o pdp_type.o pdp_queue.o pdp_comm.o \ + pdp_control.o pdp_llconv.o pdp_resample.o + +pdp_main_clean: + rm -f pdp.o + +all_objects: pdp_main_clean $(OBJECTS) platform_targets + +clean: + rm -f *~ + rm -f *.o + make -C mmx clean + diff --git a/system/Makefile.linux b/system/Makefile.linux new file mode 100644 index 0000000..96660f7 --- /dev/null +++ b/system/Makefile.linux @@ -0,0 +1,2 @@ +platform_targets: pdp_imageproc_portable.o pdp_llconv_portable.o + diff --git a/system/Makefile.linux_mmx b/system/Makefile.linux_mmx new file mode 100644 index 0000000..a646e5e --- /dev/null +++ b/system/Makefile.linux_mmx @@ -0,0 +1,4 @@ +platform_subtree: + make -C mmx + +platform_targets: pdp_imageproc_mmx.o pdp_llconv_mmx.o platform_subtree diff --git a/system/mmx/Makefile b/system/mmx/Makefile new file mode 100644 index 0000000..0f8f836 --- /dev/null +++ b/system/mmx/Makefile @@ -0,0 +1,29 @@ +include ../../Makefile.config + +OBJ = \ +pixel_pack_s16u8.o \ +pixel_unpack_u8s16.o \ +pixel_add_s16.o \ +pixel_mul_s16.o \ +pixel_mix_s16.o \ +pixel_randmix_s16.o \ +pixel_conv_hor_s16.o \ +pixel_conv_ver_s16.o \ +pixel_affine_s16.o \ +pixel_biquad_s16.o \ +pixel_ca_s1.o \ +pixel_rand_s16.o \ +pixel_crot_s16.o \ +pixel_gain_s16.o + +all: $(OBJ) + +test: pdp_mmx_test.o $(OBJ) + gcc -o pdp_mmx_test pdp_mmx_test.o $(OBJ) -g + +clean: + rm -f *.o + rm -f *~ + rm -f pdp_mmx.a + rm -f pdp_mmx_test + diff --git a/system/mmx/pdp_mmx_test.c b/system/mmx/pdp_mmx_test.c new file mode 100644 index 0000000..e93539f --- /dev/null +++ b/system/mmx/pdp_mmx_test.c @@ -0,0 +1,62 @@ +#include "pdp_mmx.h" + +#define FP(x) ((short int)(((float)(x) * 2 * 256.0f))) + +#define nbp 256 + + short int a1[4] = {0x0100,0x0100,0x0100,0x0100}; + short int a2[4] = {0x0100,0x0100,0x0100,0x0100}; + short int b0[4] = {0x0100,0x0100,0x0100,0x0100}; + short int b1[4] = {0x0100,0x0100,0x0100,0x0100}; + short int b2[4] = {0x0100,0x0100,0x0100,0x0100}; + + short int u1[4] = {0x0100,0x0100,0x0100,0x0100}; + short int u2[4] = {0x0100,0x0100,0x0100,0x0100}; + + short int x0[4] = {0x0100,0x0100,0x0100,0x0100}; + short int x1[4] = {0x0100,0x0100,0x0100,0x0100}; + short int x2[4] = {0x0100,0x0100,0x0100,0x0100}; + short int x3[4] = {0x0100,0x0100,0x0100,0x0100}; + +void print_pixel(unsigned int i) +{ + if (i) printf("x "); + else printf(". "); +} + +void print_line(void) +{ + printf("\n"); +} + +void print_square(unsigned char *c) +{ + int i,j; + + for(j=7; j>=0; j--){ + for(i=0; i<8; i++) print_pixel(c[j] & (1<<(7-i))); + printf("\n"); + } + +} + +main() +{ + + unsigned char src[16]={1,2,3,4,5,6,7,8,-1,-2,-3,-4,-5,-6,-7,-8}; + unsigned char dst[8]; + + + print_square(src); + print_line(); + print_square(src+8); + print_line(); + + pixel_test_s1(dst,src,1,1); + + print_square(dst); + print_line(); + + + +} diff --git a/system/mmx/pixel_add_s16.s b/system/mmx/pixel_add_s16.s new file mode 100644 index 0000000..8d4c7df --- /dev/null +++ b/system/mmx/pixel_add_s16.s @@ -0,0 +1,55 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_add_s16 +.type pixel_add_s16,@function + +# simple add +# void pixel_add_s16(int *left, int *right, int nb_4pixel_vectors) + +pixel_add_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %edi # left array + movl 12(%ebp), %esi # right array + movl 16(%ebp), %ecx # pixel count + + + .align 16 + .loop_mix: + +# prefetch 128(%esi) + movq (%esi), %mm1 # load right 4 pixels from memory + movq (%edi), %mm0 # load 4 left pixels from memory + paddsw %mm1, %mm0 # mix + movq %mm0, (%edi) + addl $8, %esi + addl $8, %edi + decl %ecx + jnz .loop_mix # loop + + emms + + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_affine_s16.s b/system/mmx/pixel_affine_s16.s new file mode 100644 index 0000000..b357de3 --- /dev/null +++ b/system/mmx/pixel_affine_s16.s @@ -0,0 +1,59 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_affine_s16 +.type pixel_affine_s16,@function + +# void pixel_affine_s16(int *buf, int nb_8pixel_vectors, short int gain[4], short int offset[4]) + +pixel_affine_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 20(%ebp), %edi + movq (%edi), %mm6 # get offset vector + + movl 16(%ebp), %edi + movq (%edi), %mm7 # get gain vector + + movl 8(%ebp), %esi # input array + movl 12(%ebp), %ecx # pixel count + + + .align 16 + .loop_affine: + +# prefetch 128(%esi) + movq (%esi), %mm0 # load 4 pixels from memory + pmulhw %mm7, %mm0 # apply gain (s).15 fixed point + psllw $1, %mm0 # apply correction shift + paddsw %mm6, %mm0 # add offset + movq %mm0, (%esi) # store result in memory + + addl $8, %esi # increment source pointer + decl %ecx + jnz .loop_affine # loop + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_biquad_dirI_s16.s b/system/mmx/pixel_biquad_dirI_s16.s new file mode 100644 index 0000000..1729502 --- /dev/null +++ b/system/mmx/pixel_biquad_dirI_s16.s @@ -0,0 +1,361 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + + # TODO MOVE TO DIRECT FORM II + # y[k] = b0 * x[k] + u1[k-1] + # u1[k] = b1 * x[k] + u2[k-1] - a1 * y[k] + # u2[k] = b2 * x[k] - a2 * y[k] + + # input in register: + # %mm0-mm3: input 4x4 pixels {x0 x1 x2 x3} + # %esi: coef memory (a1, a2, b0, b1, b2) + # %edi: state memory (u1, u2) + + + # return in register: + # %mm0-mm4: 4x4 pixels result + + + .biquad_4x4_pixels: + .align 16 + # prescale + movq -8(%esi), %mm4 + pmulhw %mm4, %mm0 + pmulhw %mm4, %mm1 + pmulhw %mm4, %mm2 + pmulhw %mm4, %mm3 + psllw $1, %mm0 + psllw $1, %mm1 + psllw $1, %mm2 + psllw $1, %mm3 + + + # first vector + movq 0(%edi), %mm4 # mm4 <- u[-1] + movq 8(%edi), %mm5 # mm5 <- u[-2] + movq %mm4, %mm6 + movq %mm5, %mm7 + + pmulhw 0(%esi), %mm6 # multiply by a1 + pmulhw 8(%esi), %mm7 # multiply by a2 + + paddsw %mm6, %mm0 # accumulate + paddsw %mm7, %mm0 # accumulate + paddsw %mm0, %mm0 # scale by 2 (since all fixed point muls are x*y/2) + + movq %mm0, %mm6 # mm6 <- u[0] + movq %mm4, %mm7 # mm7 <- u[-1] + pmulhw 16(%esi), %mm0 # multiply by b0 + pmulhw 24(%esi), %mm4 # multiply by b1 + pmulhw 32(%esi), %mm5 # multiply by b2 + + paddsw %mm4, %mm0 # accumulate + paddsw %mm5, %mm0 # accumulate + + # mm0 is result 0 + + # second vector + movq %mm6, %mm4 # mm4 <- u[0] + movq %mm7, %mm5 # mm5 <- u[-1] + + pmulhw 0(%esi), %mm6 # multiply by a1 + pmulhw 8(%esi), %mm7 # multiply by a2 + + paddsw %mm6, %mm1 # accumulate + paddsw %mm7, %mm1 # accumulate + paddsw %mm1, %mm1 # scale by 2 + + + movq %mm1, %mm6 # mm6 <- u[1] + movq %mm4, %mm7 # mm7 <- u[0] + pmulhw 16(%esi), %mm1 # multiply by b0 + pmulhw 24(%esi), %mm4 # multiply by b1 + pmulhw 32(%esi), %mm5 # multiply by b2 + + paddsw %mm4, %mm1 # accumulate + paddsw %mm5, %mm1 # accumulate + + # mm1 is result 1 + + # third vector + movq %mm6, %mm4 # mm4 <- u[1] + movq %mm7, %mm5 # mm5 <- u[0] + + pmulhw 0(%esi), %mm6 # multiply by a1 + pmulhw 8(%esi), %mm7 # multiply by a2 + + paddsw %mm6, %mm2 # accumulate + paddsw %mm7, %mm2 # accumulate + paddsw %mm2, %mm2 # scale by 2 + + + movq %mm2, %mm6 # mm6 <- u[2] + movq %mm4, %mm7 # mm7 <- u[1] + pmulhw 16(%esi), %mm2 # multiply by b0 + pmulhw 24(%esi), %mm4 # multiply by b1 + pmulhw 32(%esi), %mm5 # multiply by b2 + + paddsw %mm4, %mm2 # accumulate + paddsw %mm5, %mm2 # accumulate + + # mm2 is result 2 + + # fourth vector + movq %mm6, %mm4 # mm4 <- u[2] + movq %mm7, %mm5 # mm5 <- u[1] + + pmulhw 0(%esi), %mm6 # multiply by a1 + pmulhw 8(%esi), %mm7 # multiply by a2 + + paddsw %mm6, %mm3 # accumulate + paddsw %mm7, %mm3 # accumulate + paddsw %mm3, %mm3 # scale by 2 + + + movq %mm3, 0(%edi) # store u[3] + movq %mm4, 8(%edi) # store u[2] + pmulhw 16(%esi), %mm3 # multiply by b0 + pmulhw 24(%esi), %mm4 # multiply by b1 + pmulhw 32(%esi), %mm5 # multiply by b2 + + paddsw %mm4, %mm3 # accumulate + paddsw %mm5, %mm3 # accumulate + + # mm3 is result 3 + + ret + + + # in order to use the 4 line parallel biquad routine on horizontal + # lines, we need to reorder (rotate or transpose) the matrix, since + # images are scanline encoded, and we want to work in parallell + # on 4 lines. + # + # since the 4 lines are independent, it doesnt matter in which order + # the the vector elements are present. + # + # this allows us to use the same routine for left->right and right->left + # processing. + # + # some comments on the non-abelean group of square isometries consisting of + # (I) identity + # (H) horizontal axis mirror + # (V) vertical axis mirror + # (T) transpose (diagonal axis mirror) + # (A) antitranspose (antidiagonal axis mirror) + # (R1) 90deg anticlockwize rotation + # (R2) 180deg rotation + # (R3) 90deg clockwize rotation + # + # + # we basicly have two options: (R1,R3) or (T,A) + # we opt for T and A because they are self inverting, which improves locality + # + # use antitranspose for right to left an transpose + # for left to right (little endian) + + + # antitranspose 4x4 + + # input + # %mm3 == {d0 d1 d2 d3} + # %mm2 == {c0 c1 c2 c3} + # %mm1 == {b0 b1 b2 b3} + # %mm0 == {a0 a1 a2 a3} + + # output + # %mm3 == {a3 b3 c3 d3} + # %mm2 == {a2 b2 c2 d2} + # %mm1 == {a1 b1 c1 d1} + # %mm0 == {a0 b0 c0 d0} + + + .antitranspose_4x4: + .align 16 + movq %mm3, %mm4 + punpcklwd %mm1, %mm4 # mm4 <- {b2 d2 b3 d3} + movq %mm3, %mm5 + punpckhwd %mm1, %mm5 # mm5 <- {b0 d0 b1 d1} + + movq %mm2, %mm6 + punpcklwd %mm0, %mm6 # mm6 <- {a2 c2 a3 c3} + movq %mm2, %mm7 + punpckhwd %mm0, %mm7 # mm7 <- {a0 c0 a1 c1} + + movq %mm4, %mm3 + punpcklwd %mm6, %mm3 # mm3 <- {a3 b3 c3 d3} + movq %mm4, %mm2 + punpckhwd %mm6, %mm2 # mm2 <- {a2 b2 c2 d2} + + movq %mm5, %mm1 + punpcklwd %mm7, %mm1 # mm1 <- {a1 b1 c1 d1} + movq %mm5, %mm0 + punpckhwd %mm7, %mm0 # mm0 <- {a0 b0 c0 d0} + + ret + + + + # transpose 4x4 + + # input + # %mm3 == {d3 d2 d1 d0} + # %mm2 == {c3 c2 c1 c0} + # %mm1 == {b3 b2 b1 b0} + # %mm0 == {a3 a2 a1 a0} + + # output + # %mm3 == {d3 c3 b3 a3} + # %mm2 == {d2 c2 b2 a2} + # %mm1 == {d1 c1 b1 a1} + # %mm0 == {d0 c0 b0 a0} + + + .transpose_4x4: + .align 16 + movq %mm0, %mm4 + punpcklwd %mm2, %mm4 # mm4 <- {c1 a1 c0 a0} + movq %mm0, %mm5 + punpckhwd %mm2, %mm5 # mm5 <- {c3 a3 c2 a2} + + movq %mm1, %mm6 + punpcklwd %mm3, %mm6 # mm6 <- {d1 b1 d0 b0} + movq %mm1, %mm7 + punpckhwd %mm3, %mm7 # mm7 <- {d3 b3 d2 b2} + + movq %mm4, %mm0 + punpcklwd %mm6, %mm0 # mm0 <- {d0 c0 b0 a0} + movq %mm4, %mm1 + punpckhwd %mm6, %mm1 # mm1 <- {d1 c1 b1 a1} + + movq %mm5, %mm2 + punpcklwd %mm7, %mm2 # mm2 <- {d2 c2 b2 a2} + movq %mm5, %mm3 + punpckhwd %mm7, %mm3 # mm3 <- {d3 c3 b3 a3} + + ret + + +.globl pixel_biquad_vertb_s16 +.type pixel_biquad_vertb_s16,@function + + +# pixel_biquad_vertbr_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + + +pixel_biquad_vertb_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + .align 16 + .biquad_vertb_line_loop: + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + movq (%ebx,%eax,1), %mm3 + call .biquad_4x4_pixels + movq %mm0, (%ebx) + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + addl %edx, %ebx + addl %eax, %ebx + decl %ecx + jnz .biquad_vertb_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + +.globl pixel_biquad_horlr_s16 +.type pixel_biquad_horlr_s16,@function + + +# pixel_biquad_hor_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + + +pixel_biquad_horlr_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + .align 16 + .biquad_horlr_line_loop: + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + movq (%ebx,%eax,1), %mm3 + call .transpose_4x4 + call .biquad_4x4_pixels + call .transpose_4x4 + movq %mm0, (%ebx) + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + addl $8, %ebx + decl %ecx + jnz .biquad_horlr_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + + + diff --git a/system/mmx/pixel_biquad_s16.s b/system/mmx/pixel_biquad_s16.s new file mode 100644 index 0000000..844b041 --- /dev/null +++ b/system/mmx/pixel_biquad_s16.s @@ -0,0 +1,451 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + + # DIRECT FORM II BIQUAD + # + # y[k] = b0 * x[k] + u1[k-1] + # u1[k] = b1 * x[k] + u2[k-1] - a1 * y[k] + # u2[k] = b2 * x[k] - a2 * y[k] + # MACRO: df2 + # + # computes a direct form 2 biquad + # does not use {mm0-mm3}\ + # + # input: == input + # %mm4 == state 1 + # %mm5 == state 2 + # (%esi) == biquad coefs (-a1 -a2 b0 b1 b2) in s1.14 + # output: == output + # %mm4 == state 1 + # %mm5 == state 2 + + .macro df2 reg + movq \reg, %mm6 # mm6 == x[k] + movq \reg, %mm7 # mm7 == x[k] + pmulhw 16(%esi), %mm6 # mm6 == x[k] * b0 + pmulhw 24(%esi), %mm7 # mm7 == x[k] * b1 + paddw %mm4, %mm6 # mm6 == x[k] * b0 + u1[k-1] == y[k] + paddw %mm5, %mm7 # mm7 == x[k] * b1 + u2[k-1] + paddsw %mm6, %mm6 # compensate for mul = x*y/4 (coefs are s1.14 fixed point) + paddsw %mm6, %mm6 # paddsw ensures saturation + movq \reg, %mm5 # mm5 == x[k] + movq %mm6, %mm4 # mm4 == y[k] + movq %mm6, \reg # reg == y[k] -------------------- + pmulhw 0(%esi), %mm4 # mm4 == y[k] * (-a1) + pmulhw 8(%esi), %mm6 # mm6 == y[k] * (-a2) + pmulhw 32(%esi), %mm5 # mm5 == x[k] * b2 + paddw %mm7, %mm4 # mm4 == u1[k] -------------------- + paddw %mm6, %mm5 # mm5 == u2[k] -------------------- + .endm + + + # input in register: + # %mm0-mm3: input 4x4 pixels {x0 x1 x2 x3} + # %esi: coef memory (-a1, -a2, b0, b1, b2) in s1.14 + # %edi: state memory (u1, u2) + + # return in register: + # %mm0-mm4: 4x4 pixels result + + + + + .macro biquad_4x4_pixels + .align 16 + movq 0(%edi), %mm4 # get state + movq 8(%edi), %mm5 + df2 %mm0 # compute 4 biquads + df2 %mm1 + df2 %mm2 + df2 %mm3 + movq %mm4, 0(%edi) # store state + movq %mm5, 8(%edi) + .endm + + + + # in order to use the 4 line parallel biquad routine on horizontal + # lines, we need to reorder (rotate or transpose) the matrix, since + # images are scanline encoded, and we want to work in parallell + # on 4 lines. + # + # since the 4 lines are independent, it doesnt matter in which order + # the the vector elements are present. + # + # this allows us to use the same routine for left->right and right->left + # processing. + # + # some comments on the non-abelean group of square isometries consisting of + # (I) identity + # (H) horizontal axis mirror + # (V) vertical axis mirror + # (T) transpose (diagonal axis mirror) + # (A) antitranspose (antidiagonal axis mirror) + # (R1) 90deg anticlockwize rotation + # (R2) 180deg rotation + # (R3) 90deg clockwize rotation + # + # + # we basicly have two options: (R1,R3) or (T,A) + # we opt for T and A because they are self inverting, which improves locality + # + # use antitranspose for right to left an transpose + # for left to right (little endian) + + + # antitranspose 4x4 + + # input + # %mm3 == {d0 d1 d2 d3} + # %mm2 == {c0 c1 c2 c3} + # %mm1 == {b0 b1 b2 b3} + # %mm0 == {a0 a1 a2 a3} + + # output + # %mm3 == {a3 b3 c3 d3} + # %mm2 == {a2 b2 c2 d2} + # %mm1 == {a1 b1 c1 d1} + # %mm0 == {a0 b0 c0 d0} + + + .macro antitranspose_4x4: + movq %mm3, %mm4 + punpcklwd %mm1, %mm4 # mm4 <- {b2 d2 b3 d3} + movq %mm3, %mm5 + punpckhwd %mm1, %mm5 # mm5 <- {b0 d0 b1 d1} + + movq %mm2, %mm6 + punpcklwd %mm0, %mm6 # mm6 <- {a2 c2 a3 c3} + movq %mm2, %mm7 + punpckhwd %mm0, %mm7 # mm7 <- {a0 c0 a1 c1} + + movq %mm4, %mm3 + punpcklwd %mm6, %mm3 # mm3 <- {a3 b3 c3 d3} + movq %mm4, %mm2 + punpckhwd %mm6, %mm2 # mm2 <- {a2 b2 c2 d2} + + movq %mm5, %mm1 + punpcklwd %mm7, %mm1 # mm1 <- {a1 b1 c1 d1} + movq %mm5, %mm0 + punpckhwd %mm7, %mm0 # mm0 <- {a0 b0 c0 d0} + + .endm + + + # transpose 4x4 + + # input + # %mm3 == {d3 d2 d1 d0} + # %mm2 == {c3 c2 c1 c0} + # %mm1 == {b3 b2 b1 b0} + # %mm0 == {a3 a2 a1 a0} + + # output + # %mm3 == {d3 c3 b3 a3} + # %mm2 == {d2 c2 b2 a2} + # %mm1 == {d1 c1 b1 a1} + # %mm0 == {d0 c0 b0 a0} + + + .macro transpose_4x4: + movq %mm0, %mm4 + punpcklwd %mm2, %mm4 # mm4 <- {c1 a1 c0 a0} + movq %mm0, %mm5 + punpckhwd %mm2, %mm5 # mm5 <- {c3 a3 c2 a2} + + movq %mm1, %mm6 + punpcklwd %mm3, %mm6 # mm6 <- {d1 b1 d0 b0} + movq %mm1, %mm7 + punpckhwd %mm3, %mm7 # mm7 <- {d3 b3 d2 b2} + + movq %mm4, %mm0 + punpcklwd %mm6, %mm0 # mm0 <- {d0 c0 b0 a0} + movq %mm4, %mm1 + punpckhwd %mm6, %mm1 # mm1 <- {d1 c1 b1 a1} + + movq %mm5, %mm2 + punpcklwd %mm7, %mm2 # mm2 <- {d2 c2 b2 a2} + movq %mm5, %mm3 + punpckhwd %mm7, %mm3 # mm3 <- {d3 c3 b3 a3} + + .endm + +.globl pixel_biquad_vertb_s16 +.type pixel_biquad_vertb_s16,@function + + +# pixel_biquad_vertbr_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + + +pixel_biquad_vertb_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + .align 16 + .biquad_vertb_line_loop: + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + movq (%ebx,%eax,1), %mm3 + biquad_4x4_pixels + movq %mm0, (%ebx) + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + addl %edx, %ebx + addl %eax, %ebx + decl %ecx + jnz .biquad_vertb_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret +.globl pixel_biquad_verbt_s16 +.type pixel_biquad_verbt_s16,@function + + +# pixel_biquad_vertbt_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + + +pixel_biquad_verbt_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %eax # line with + + shll $3, %eax # 4 line byte spacing + decl %ecx + mul %ecx + incl %ecx + addl %eax, %ebx # ebx points to last pixblock + + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + .align 16 + .biquad_verbt_line_loop: + movq (%ebx), %mm3 + movq (%ebx,%edx,1), %mm2 + movq (%ebx,%edx,2), %mm1 + movq (%ebx,%eax,1), %mm0 + biquad_4x4_pixels + movq %mm3, (%ebx) + movq %mm2, (%ebx,%edx,1) + movq %mm1, (%ebx,%edx,2) + movq %mm0, (%ebx,%eax,1) + subl %edx, %ebx + subl %eax, %ebx + decl %ecx + jnz .biquad_verbt_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + +.globl pixel_biquad_horlr_s16 +.type pixel_biquad_horlr_s16,@function +# pixel_biquad_hor_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + +pixel_biquad_horlr_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + .align 16 + .biquad_horlr_line_loop: + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + movq (%ebx,%eax,1), %mm3 + transpose_4x4 + biquad_4x4_pixels + transpose_4x4 + movq %mm0, (%ebx) + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + addl $8, %ebx + decl %ecx + jnz .biquad_horlr_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + + +.globl pixel_biquad_horrl_s16 +.type pixel_biquad_horrl_s16,@function +# pixel_biquad_horrl_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + +pixel_biquad_horrl_s16: + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + + movl %ecx, %eax + decl %eax + shll $3, %eax + addl %eax, %ebx # ebx points to last pixblock + + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + .align 16 + .biquad_horrl_line_loop: + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + movq (%ebx,%eax,1), %mm3 + antitranspose_4x4 + biquad_4x4_pixels + antitranspose_4x4 + movq %mm0, (%ebx) + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + subl $8, %ebx + decl %ecx + jnz .biquad_horrl_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + + +.globl pixel_biquad_time_s16 +.type pixel_biquad_time_s16,@function +# pixel_biquad_time_s16(short int *pixel_array, short int *s1, short int *s2, short int *coefs, int nb_4_pix_vectors) + +pixel_biquad_time_s16: + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %edx # state 1 array + movl 16(%ebp), %edi # state 2 array + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %ecx # nb of 4 pixel vectors + + + .align 16 + .biquad_time_loop: + movq (%ebx), %mm0 # get input + movq (%edx), %mm4 # get state 1 + movq (%edi), %mm5 # get state 2 + df2 %mm0 # compute direct form 2 + movq %mm0, (%ebx) # write output + movq %mm5, (%edi) # write state 2 + movq %mm4, (%edx) # write state 1 + addl $8, %ebx + addl $8, %edi + addl $8, %edx + decl %ecx + jnz .biquad_time_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + + diff --git a/system/mmx/pixel_ca_s1.s b/system/mmx/pixel_ca_s1.s new file mode 100644 index 0000000..d9c730f --- /dev/null +++ b/system/mmx/pixel_ca_s1.s @@ -0,0 +1,189 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + # this file contains assembler routines for 2D 1 bit cellular automata + # processing. it is organized around a feeder kernel and a + # stack based bit processor (virtual forth machine) + # + # the feeder kernel is responsable for loading/storing CA cells + # from/to memory. data in memory is organized as a scanline + # encoded toroidial bitplane (lsb = left). to simplify the kernel, the top + # left corner of the rectangular grid of pixels will shift down + # every processing step. + # + # the stack machine has the following architecture: + # CA stack: %esi, TOS: %mm0 (32x2 pixels. lsw = top row) + # CA horizon: %mm4-%mm7 (64x4 pixels. %mm4 = top row) + # + # the stack size / organization is not known to the stack machine. + # it can be thought of as operating on a 3x3 cell neightbourhood. + # the only purpose of forth program is to determine the CA local update rule. + # + # the machine is supposed to be very minimal. no looping control. + # no adressing modes. no conditional code (hey, this is an experiment!) + # so recursion is not allowed (no way to stop it) + # there are 9 words to load the cell neigbourhood on the stack. + # the rest is just logic and stack manips. + + + # this file contains pure asm macros. it is to be included before assembly + # after scaforth.pl has processed the .scaf file + + + # *************************** CA CELL ACCESS MACROS ***************************** + # fetchTL - fetchBR + + # shift / load rectangle macros: + + # shift rectangle horizontal + # result is in reg1 + .macro shift reg1 reg2 count + psllq $(32+\count), \reg1 + psrlq $(32-\count), \reg2 + psrlq $32, \reg1 + psllq $32, \reg2 + por \reg2, \reg1 + .endm + + .macro ldtop reg1 reg2 + movq %mm4, \reg1 + movq %mm5, \reg2 + .endm + + .macro ldcenter reg1 reg2 + movq %mm5, \reg1 + movq %mm6, \reg2 + .endm + + .macro ldbottom reg1 reg2 + movq %mm6, \reg1 + movq %mm7, \reg2 + .endm + + + # fetch from top row + + # fetch the top left square + .macro fetchTL + ldtop %mm0, %mm1 + shift %mm0, %mm1, -1 + .endm + + # fetch the top mid square + .macro fetchTM + ldtop %mm0, %mm1 + shift %mm0, %mm1, 0 + .endm + + # fetch the top right square + .macro fetchTR + ldtop %mm0, %mm1 + shift %mm0, %mm1, 1 + .endm + + + + # fetch from center row + + # fetch the mid left square + .macro fetchML + ldcenter %mm0, %mm1 + shift %mm0, %mm1, -1 + .endm + + # fetch the mid mid square + .macro fetchMM + ldcenter %mm0, %mm1 + shift %mm0, %mm1, 0 + .endm + + # fetch the mid right square + .macro fetchMR + ldcenter %mm0, %mm1 + shift %mm0, %mm1, 1 + .endm + + + + + + # fetch from bottom row + + # fetch the bottom left square + .macro fetchBL + ldbottom %mm0, %mm1 + shift %mm0, %mm1, -1 + .endm + + # fetch the bottom mid square + .macro fetchBM + ldbottom %mm0, %mm1 + shift %mm0, %mm1, 0 + .endm + + # fetch the bottom right square + .macro fetchBR + ldbottom %mm0, %mm1 + shift %mm0, %mm1, 1 + .endm + + + + # *************************** CA STACK MANIP MACROS ***************************** + # dup drop dropdup swap nip dropover + + .macro dup + lea -8(%esi), %esi + movq %mm0, (%esi) + .endm + + .macro drop + movq (%esi), %mm0 + lea 8(%esi), %esi + .endm + + .macro dropdup + movq (%esi), %mm0 + .endm + + .macro swap + movq (%esi), %mm1 + movq %mm0, (%esi) + movq %mm1, %mm0 + .endm + + .macro nip + lea 8(%esi), %esi + .endm + + .macro dropover + movq 8(%esi), %mm0 + .endm + + + # *************************** CA BOOLEAN LOGIC MACROS ***************************** + # overxor + + .macro overxor + pxor (%esi), %mm0 + .endm + + + + + diff --git a/system/mmx/pixel_cascade_s16.s b/system/mmx/pixel_cascade_s16.s new file mode 100644 index 0000000..bf88d08 --- /dev/null +++ b/system/mmx/pixel_cascade_s16.s @@ -0,0 +1,330 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + + # TODO: COUPLED CASCADE SECOND ORDER SECTION + # + # s1[k] = ar * s1[k-1] + ai * s2[k-1] + x[k] + # s2[k] = ar * s2[k-1] - ai * s1[k-1] + # y[k] = c0 * x[k] + c1 * s1[k-1] + c2 * s2[k-1] + + + # MACRO: df2 + # + # computes a coupled cascade + # + # input: %mm0 == input + # %mm1 == state 1 + # %mm2 == state 2 + # (%esi) == cascade coefs (ar ai c0 c1 c2) in s0.15 + # output: %mm0 == output + # %mm1 == state 1 + # %mm2 == state 2 + + + .macro coupled + pmovq %mm1, %mm3 # mm3 == s1[k-1] + pmovq %mm1, %mm4 # mm4 == s1[k-1] + pmovq %mm2, %mm5 # mm5 == s2[k-1] + pmovq %mm2, %mm6 # mm5 == s2[k-1] + pmulhw (%esi), %mm1 # mm1 == s1[k-1] * ar + pmulhw 8(%esi), %mm3 # mm3 == s1[k-1] * ai + pmulhw 24(%esi), %mm4 # mm4 == s1[k-1] * c1 + pmulhw (%esi), %mm2 # mm2 == s2[k-1] * ar + pmulhw 8(%esi), %mm5 # mm5 == s2[k-1] * ai + pmulhw 32(%esi), %mm6 # mm6 == s2[k-1] * c2 + paddw %mm5, %mm1 # mm1 == s1[k-1] * ar + s2[k-1] * ai + psubw %mm3, %mm2 # mm2 == s2[k-1] * ar - s1[k-1] * ai == s2[k] + paddw %mm0, %mm1 # mm1 == s1[k] + pmulhw 16(%esi), %mm0 # mm0 == x[k] * c0 + paddw %mm6, %mm4 # mm4 == s1[k-1] * c1 + s2[k-1] * c2 + paddw %mm4, %mm0 # mm0 == y[k] + .endm + + + + + # in order to use the 4 line parallel cascade routine on horizontal + # lines, we need to reorder (rotate or transpose) the matrix, since + # images are scanline encoded, and we want to work in parallell + # on 4 lines. + # + # since the 4 lines are independent, it doesnt matter in which order + # the the vector elements are present. + # + # this allows us to use the same routine for left->right and right->left + # processing. + # + # some comments on the non-abelean group of square isometries consisting of + # (I) identity + # (H) horizontal axis mirror + # (V) vertical axis mirror + # (T) transpose (diagonal axis mirror) + # (A) antitranspose (antidiagonal axis mirror) + # (R1) 90deg anticlockwize rotation + # (R2) 180deg rotation + # (R3) 90deg clockwize rotation + # + # + # we basicly have two options: (R1,R3) or (T,A) + # we opt for T and A because they are self inverting, which improves locality + # + # use antitranspose for right to left an transpose + # for left to right (little endian) + + + # antitranspose 4x4 + + # input + # %mm3 == {d0 d1 d2 d3} + # %mm2 == {c0 c1 c2 c3} + # %mm1 == {b0 b1 b2 b3} + # %mm0 == {a0 a1 a2 a3} + + # output + # %mm3 == {a3 b3 c3 d3} + # %mm2 == {a2 b2 c2 d2} + # %mm1 == {a1 b1 c1 d1} + # %mm0 == {a0 b0 c0 d0} + + + .macro antitranspose_4x4: + movq %mm3, %mm4 + punpcklwd %mm1, %mm4 # mm4 <- {b2 d2 b3 d3} + movq %mm3, %mm5 + punpckhwd %mm1, %mm5 # mm5 <- {b0 d0 b1 d1} + + movq %mm2, %mm6 + punpcklwd %mm0, %mm6 # mm6 <- {a2 c2 a3 c3} + movq %mm2, %mm7 + punpckhwd %mm0, %mm7 # mm7 <- {a0 c0 a1 c1} + + movq %mm4, %mm3 + punpcklwd %mm6, %mm3 # mm3 <- {a3 b3 c3 d3} + movq %mm4, %mm2 + punpckhwd %mm6, %mm2 # mm2 <- {a2 b2 c2 d2} + + movq %mm5, %mm1 + punpcklwd %mm7, %mm1 # mm1 <- {a1 b1 c1 d1} + movq %mm5, %mm0 + punpckhwd %mm7, %mm0 # mm0 <- {a0 b0 c0 d0} + + .endm + + + # transpose 4x4 + + # input + # %mm3 == {d3 d2 d1 d0} + # %mm2 == {c3 c2 c1 c0} + # %mm1 == {b3 b2 b1 b0} + # %mm0 == {a3 a2 a1 a0} + + # output + # %mm3 == {d3 c3 b3 a3} + # %mm2 == {d2 c2 b2 a2} + # %mm1 == {d1 c1 b1 a1} + # %mm0 == {d0 c0 b0 a0} + + + .macro transpose_4x4: + movq %mm0, %mm4 + punpcklwd %mm2, %mm4 # mm4 <- {c1 a1 c0 a0} + movq %mm0, %mm5 + punpckhwd %mm2, %mm5 # mm5 <- {c3 a3 c2 a2} + + movq %mm1, %mm6 + punpcklwd %mm3, %mm6 # mm6 <- {d1 b1 d0 b0} + movq %mm1, %mm7 + punpckhwd %mm3, %mm7 # mm7 <- {d3 b3 d2 b2} + + movq %mm4, %mm0 + punpcklwd %mm6, %mm0 # mm0 <- {d0 c0 b0 a0} + movq %mm4, %mm1 + punpckhwd %mm6, %mm1 # mm1 <- {d1 c1 b1 a1} + + movq %mm5, %mm2 + punpcklwd %mm7, %mm2 # mm2 <- {d2 c2 b2 a2} + movq %mm5, %mm3 + punpckhwd %mm7, %mm3 # mm3 <- {d3 c3 b3 a3} + + .endm + +.globl pixel_cascade_vertb_s16 +.type pixel_cascade_vertb_s16,@function + + +# pixel_cascade_vertbr_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + + +pixel_cascade_vertb_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + subl %edx, %ebx + + movq 0(%edi), %mm1 # s1[k-1] + movq 8(%edi), %mm2 # s2[k-1] + .align 16 + .cascade_vertb_line_loop: + + movq (%ebx,%edx,1), %mm3 + movq %mm3, %mm0 + addl %edx, %ebx + coupled + movq %mm0, (%ebx) + + movq (%ebx,%edx,1), %mm3 + movq %mm3, %mm0 + addl %edx, %ebx + coupled + movq %mm0, (%ebx) + + movq (%ebx,%edx,1), %mm3 + movq %mm3, %mm0 + addl %edx, %ebx + coupled + movq %mm0, (%ebx) + + movq (%ebx,%edx,1), %mm3 + movq %mm3, %mm0 + addl %edx, %ebx + coupled + movq %mm0, (%ebx) + + decl %ecx + jnz .cascade_vertb_line_loop + + movq %mm1, 0(%edi) # s1[k-1] + movq %mm2, 8(%edi) # s2[k-1] + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + +.globl pixel_cascade_horlr_s16 +.type pixel_cascade_horlr_s16,@function + + +# pixel_cascade_hor_s16(char *pixel_array, int nb_rows, int linewidth, short int coef[20], short int state[8]) + + +pixel_cascade_horlr_s16: + + + pushl %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + + movl 8(%ebp), %ebx # pixel array offset + movl 12(%ebp), %ecx # nb of 4x4 pixblocks + movl 16(%ebp), %edx # line with + + movl 20(%ebp), %esi # coefs + movl 24(%ebp), %edi # state + + shll $1, %edx # short int addressing + movl %edx, %eax + shll $1, %eax + addl %edx, %eax # eax = 3 * edx + + + .align 16 + .cascade_horlr_line_loop: + movq (%edi), %mm1 + movq 8(%edi), %mm2 + + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + movq (%ebx,%eax,1), %mm3 + + transpose_4x4 + + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + + coupled + + movq %mm0, (%ebx) + movq (%ebx,%edx,1), %mm3 + movq %mm3, %mm0 + + coupled + + movq %mm0, (%ebx, %edx,1) + movq (%ebx,%edx,2), %mm3 + movq %mm3, %mm0 + + coupled + + movq %mm0, (%ebx, %edx,2) + movq (%ebx,%eax,1), %mm3 + movq %mm3, %mm0 + + coupled + + movq %mm1, 0(%edi) # s1[k-1] + movq %mm2, 8(%edi) # s2[k-1] + + movq %mm0, %mm3 + movq (%ebx), %mm0 + movq (%ebx,%edx,1), %mm1 + movq (%ebx,%edx,2), %mm2 + + transpose_4x4 + + movq %mm0, (%ebx) + movq %mm1, (%ebx,%edx,1) + movq %mm2, (%ebx,%edx,2) + movq %mm3, (%ebx,%eax,1) + + addl $8, %ebx + decl %ecx + jnz .cascade_horlr_line_loop + + emms + + pop %edi + pop %esi + pop %ebx + leave + ret + + + diff --git a/system/mmx/pixel_conv_hor_s16.s b/system/mmx/pixel_conv_hor_s16.s new file mode 100644 index 0000000..e90a692 --- /dev/null +++ b/system/mmx/pixel_conv_hor_s16.s @@ -0,0 +1,134 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + # intermediate function + + # input in register: + # %mm0: left 4 pixels + # %mm1: middle 4 pixels + # %mm2: right 4 pixels + + # %mm5: left 4 pixel masks + # %mm6: middle 4 pixel masks + # %mm7: right 4 pixel masks + + # return in register: + # %mm0: middle 4 pixels result + + + .conv_hor_4_pixels: + .align 16 + + # compute quadruplet + + # get left pixels + psrlq $48, %mm0 # shift word 3 to byte 0 + movq %mm1, %mm4 + psllq $16, %mm4 # shift word 0,1,2 to 1,2,3 + por %mm4, %mm0 # combine + pmulhw %mm5, %mm0 + psllw $1, %mm0 + + + # get middle pixels + movq %mm1, %mm4 + pmulhw %mm6, %mm4 + psllw $1, %mm4 + paddsw %mm4, %mm0 + + + # get right pixels + movq %mm2, %mm3 + psllq $48, %mm3 # shift word 0 to word 3 + movq %mm1, %mm4 + psrlq $16, %mm4 # shift word 1,2,3 to 0,1,2 + por %mm4, %mm3 # combine + pmulhw %mm7, %mm3 + psllw $1, %mm3 + paddsw %mm3, %mm0 # accumulate + + ret + +.globl pixel_conv_hor_s16 +.type pixel_conv_hor_s16,@function + + +# pixel_conv_hor_s16(short int *pixel_array, int nb_4_pixel_vectors, short int border[4], short int mask[12]) +# horizontal unsigned pixel conv (1/4 1/2 1/4) not tested +# NOT TESTED + + +pixel_conv_hor_s16: + + + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %esi # pixel array offset + movl 12(%ebp), %ecx # nb of 8 pixel vectors in a row (at least 2) + + movl 20(%ebp), %edi # mask vector + movq (%edi), %mm5 + movq 8(%edi), %mm6 + movq 16(%edi), %mm7 + + movl 16(%ebp), %edi # boundary pixel vector + + + + movq (%edi), %mm0 # init regs (left edge, so mm0 is zero) + movq (%esi), %mm1 + movq 8(%esi), %mm2 + + decl %ecx # loop has 2 terminator stubs + decl %ecx # todo: handle if ecx < 3 + + jmp .conv_line_loop + + + .align 16 + .conv_line_loop: + call .conv_hor_4_pixels # compute conv + movq %mm0, (%esi) # store result + movq %mm1, %mm0 # mm0 <- prev (%esi) + movq %mm2, %mm1 # mm1 <- 8(%esi) + movq 16(%esi), %mm2 # mm2 <- 16(%esi) + + addl $8, %esi # increase pointer + decl %ecx + jnz .conv_line_loop + + call .conv_hor_4_pixels # compute conv + movq %mm0, (%esi) # store result + movq %mm1, %mm0 # mm0 <- prev (%esi) + movq %mm2, %mm1 # mm1 <- 8(%esi) + movq (%edi), %mm2 # mm2 <- border + + call .conv_hor_4_pixels # compute last vector + movq %mm0, 8(%esi) # store it + + emms + + pop %edi + pop %esi + leave + ret + + + diff --git a/system/mmx/pixel_conv_ver_s16.s b/system/mmx/pixel_conv_ver_s16.s new file mode 100644 index 0000000..ae2456f --- /dev/null +++ b/system/mmx/pixel_conv_ver_s16.s @@ -0,0 +1,128 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +#TODO: fix out of bound acces in conv_ver and conv_hor + + # intermediate function + + # input in register: + # %mm0: top 4 pixels + # %mm1: middle 4 pixels + # %mm2: bottom 4 pixels + + # %mm5: top 4 pixel mask + # %mm6: middle 4 pixel mask + # %mm7: bottom 4 pixel mask + + # return in register: + # %mm0: middle 4 pixels result + + + .conv_ver_4_pixels: + .align 16 + + # compute quadruplet + + # get top pixel + pmulhw %mm5, %mm0 + psllw $1, %mm0 + + # get middle pixel + movq %mm1, %mm4 + pmulhw %mm6, %mm4 + psllw $1, %mm4 + paddsw %mm4, %mm0 + + # get bottom pixel + movq %mm2, %mm3 + pmulhw %mm7, %mm3 + psllw $1, %mm3 # mm3 <- mm3/4 + paddsw %mm3, %mm0 + + ret + +.globl pixel_conv_ver_s16 +.type pixel_conv_ver_s16,@function + + +# pixel_conv_ver_s16(short int *pixel_array, int nb_4_pixel_vectors, int row_byte_size, short int border[4]) +# horizontal unsigned pixel conv (1/4 1/2 1/4) not tested +# NOT TESTED + + +pixel_conv_ver_s16: + + + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %esi # pixel array offset + movl 12(%ebp), %ecx # nb of 4 pixel vectors in a row (at least 2) + movl 16(%ebp), %edx # rowsize in bytes + + movl 24(%ebp), %edi # mask vector + movq (%edi), %mm5 + movq 8(%edi), %mm6 + movq 16(%edi), %mm7 + + movl 20(%ebp), %edi # edge vector + + + shll $1, %edx + decl %ecx # loop has a terminator stub + decl %ecx # loop has another terminator stub + + + movq (%edi), %mm0 # init regs (left edge, so mm0 is zero) + movq (%esi), %mm1 + movq (%esi,%edx,1), %mm2 + jmp .conv_line_loop + + + .align 16 + .conv_line_loop: + call .conv_ver_4_pixels # compute conv + movq %mm0, (%esi) # store result + movq %mm1, %mm0 # mm0 <- prev (%esi) + movq %mm2, %mm1 # mm1 <- (%esi,%edx,1) + movq (%esi,%edx,2), %mm2 # mm2 <- (%esi,%edx,2) + + addl %edx, %esi # increase pointer + decl %ecx + jnz .conv_line_loop + + call .conv_ver_4_pixels # compute conv + movq %mm0, (%esi) # store result + movq %mm1, %mm0 # mm0 <- prev (%esi) + movq %mm2, %mm1 # mm1 <- (%esi,%edx,1) + movq (%edi), %mm2 # clear invalid edge vector + + addl %edx, %esi # increase pointer + call .conv_ver_4_pixels # compute last vector + movq %mm0, (%esi) # store it + + emms + + pop %edi + pop %esi + leave + ret + + + diff --git a/system/mmx/pixel_crot_s16.s b/system/mmx/pixel_crot_s16.s new file mode 100644 index 0000000..2427869 --- /dev/null +++ b/system/mmx/pixel_crot_s16.s @@ -0,0 +1,153 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_crot3d_s16 +.type pixel_crot3d_s16,@function + + +# 3 dimensional colour space rotation +# 3x3 matrix is column encoded, each coefficient is a 4x16 bit fixed point vector + +# void pixel_crot3d_s16(int *buf, int nb_4pixel_vectors_per_plane, short int *matrix) + +pixel_crot3d_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + + movl 8(%ebp), %esi # input array + movl 12(%ebp), %ecx # pixel count + movl 16(%ebp), %edi # rotation matrix + movl %ecx, %edx + shll $3, %edx # %edx = plane spacing + + + .align 16 + .loop_crot3d: + + movq (%esi), %mm0 # get 1st component + movq (%esi,%edx,1), %mm6 # get 2nd component + movq (%esi,%edx,2), %mm7 # get 3rd component + + movq %mm0, %mm1 # copy 1st component + movq %mm0, %mm2 + + pmulhw (%edi), %mm0 # mul first column + pmulhw 8(%edi), %mm1 + pmulhw 16(%edi), %mm2 + + movq %mm6, %mm5 # copy 2nd component + movq %mm6, %mm3 + + pmulhw 24(%edi), %mm6 # mul second column + pmulhw 32(%edi), %mm5 + pmulhw 40(%edi), %mm3 + + paddsw %mm6, %mm0 # accumulate + paddsw %mm5, %mm1 + paddsw %mm3, %mm2 + + movq %mm7, %mm4 # copy 3rd component + movq %mm7, %mm6 + + pmulhw 48(%edi), %mm4 # mul third column + pmulhw 56(%edi), %mm6 + pmulhw 64(%edi), %mm7 + + paddsw %mm4, %mm0 # accumulate + paddsw %mm6, %mm1 + paddsw %mm7, %mm2 + + paddsw %mm0, %mm0 # double (fixed point normalization) + paddsw %mm1, %mm1 + paddsw %mm2, %mm2 + + movq %mm0, (%esi) # store + movq %mm1, (%esi, %edx, 1) + movq %mm2, (%esi, %edx, 2) + + addl $8, %esi # increment source pointer + decl %ecx + jnz .loop_crot3d # loop + + emms + + pop %edi + pop %esi + leave + ret + + +.globl pixel_crot2d_s16 +.type pixel_crot2d_s16,@function + +# 2 dimensional colour space rotation +# 2x2 matrix is column encoded, each coefficient is a 4x16 bit fixed point vector + +# void pixel_crot2d_s16(int *buf, int nb_4pixel_vectors_per_plane, short int *matrix) + +pixel_crot2d_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + + movl 8(%ebp), %esi # input array + movl 12(%ebp), %ecx # pixel count + movl 16(%ebp), %edi # rotation matrix + movl %ecx, %edx + shll $3, %edx # %edx = plane spacing + + + .align 16 + .loop_crot2d: + + movq (%esi), %mm0 # get 1st component + movq (%esi,%edx,1), %mm2 # get 2nd component + + movq %mm0, %mm1 # copy 1st component + movq %mm2, %mm3 # copy 2nd component + + pmulhw (%edi), %mm0 # mul first column + pmulhw 8(%edi), %mm1 + + pmulhw 16(%edi), %mm2 # mul second column + pmulhw 24(%edi), %mm3 + + paddsw %mm2, %mm0 # accumulate + paddsw %mm3, %mm1 + + paddsw %mm0, %mm0 # fixed point gain correction + paddsw %mm1, %mm1 + + movq %mm0, (%esi) # store + movq %mm1, (%esi, %edx, 1) + + addl $8, %esi # increment source pointer + decl %ecx + jnz .loop_crot2d # loop + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_gain.s b/system/mmx/pixel_gain.s new file mode 100644 index 0000000..5cd5057 --- /dev/null +++ b/system/mmx/pixel_gain.s @@ -0,0 +1,83 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_gain +.type pixel_gain,@function + +# mmx rgba pixel gain +# void asmtest(char *pixelarray, int32 nbpixels, int *rgba_gain) +# gains are 7.9 fixed point for rgba + +pixel_gain: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %esi # pixel array offset + movl 12(%ebp), %ecx # nb of elements + movl 16(%ebp), %edi # int16[4] array of gains + + prefetch (%esi) + + emms + sarl $2, %ecx # process 4 pixels per loop iteration + jz .exit + movq (%edi), %mm7 # read gain array from memory + jmp .loop_gain + + .align 16 + .loop_gain: + + prefetch 128(%esi) + movq (%esi), %mm5 # load pixel 1-2 from memory + movq 8(%esi), %mm6 # load pixel 3-4 from memory + pxor %mm0, %mm0 # zero mm0 - mm3 + pxor %mm1, %mm1 + pxor %mm2, %mm2 + pxor %mm3, %mm3 + punpcklbw %mm5, %mm0 # unpack 1st pixel into 8.8 bit ints + punpckhbw %mm5, %mm1 # unpack 2nd + punpcklbw %mm6, %mm2 # unpack 3rd + punpckhbw %mm6, %mm3 # unpack 4th + psrlw $0x1, %mm0 # shift right to clear sign bit 9.7 + psrlw $0x1, %mm1 + psrlw $0x1, %mm2 + psrlw $0x1, %mm3 + + pmulhw %mm7, %mm0 # multiply 1st pixel 9.7 * 7.9 -> 16.0 + pmulhw %mm7, %mm1 # multiply 2nd + pmulhw %mm7, %mm2 # multiply 3rd + pmulhw %mm7, %mm3 # multiply 4th + + packuswb %mm1, %mm0 # pack & saturate to 8bit vector + movq %mm0, (%esi) # store result in memory + packuswb %mm3, %mm2 # pack & saturate to 8bit vector + movq %mm2, 8(%esi) # store result in memory + + addl $16, %esi # increment source pointer + decl %ecx + jnz .loop_gain # loop + + .exit: + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_gain_s16.s b/system/mmx/pixel_gain_s16.s new file mode 100644 index 0000000..adcfdf5 --- /dev/null +++ b/system/mmx/pixel_gain_s16.s @@ -0,0 +1,71 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_gain_s16 +.type pixel_gain_s16,@function + +# gain is integer, shift count is down +# void pixel_gain_s16(int *buf, int nb_8pixel_vectors, short int gain[4], unsigned long long *shift) + +pixel_gain_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 20(%ebp), %edi + movq (%edi), %mm6 # get shift vector + + movl 16(%ebp), %edi + movq (%edi), %mm7 # get gain vector + + movl 8(%ebp), %esi # input array + movl 12(%ebp), %ecx # pixel count + + + .align 16 + .loop_gain: + + movq (%esi), %mm0 # load 4 pixels from memory + movq %mm0, %mm1 + pmulhw %mm7, %mm1 # apply gain (s15.0) fixed point, high word + pmullw %mm7, %mm0 # low word + + movq %mm0, %mm2 # copy + movq %mm1, %mm3 + + punpcklwd %mm1, %mm0 # unpack lsw components + punpckhwd %mm3, %mm2 # unpack msw components + + psrad %mm6, %mm0 # apply signed shift + psrad %mm6, %mm2 + + packssdw %mm2, %mm0 # pack result & saturate + movq %mm0, (%esi) # store result + + + addl $8, %esi # increment source pointer + decl %ecx + jnz .loop_gain # loop + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_mix_s16.s b/system/mmx/pixel_mix_s16.s new file mode 100644 index 0000000..9bf41eb --- /dev/null +++ b/system/mmx/pixel_mix_s16.s @@ -0,0 +1,68 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_mix_s16 +.type pixel_mix_s16,@function + +# mmx rgba pixel gain +# void pixel_mix_s16(int *left, int *right, int nb_4pixel_vectors, +# short int gain_left[4], short int gain_right[4]) + +pixel_mix_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 20(%ebp), %edi # int16[4] array of gains + movq (%edi), %mm6 # get left gain array + + movl 24(%ebp), %edi # int16[4] array of gains + movq (%edi), %mm7 # get right gain array + + movl 8(%ebp), %edi # left array + movl 12(%ebp), %esi # right array + movl 16(%ebp), %ecx # pixel count + + + .align 16 + .loop_mix: + +# prefetch 128(%esi) + movq (%esi), %mm1 # load right 4 pixels from memory + pmulhw %mm7, %mm1 # apply right gain + movq (%edi), %mm0 # load 4 left pixels from memory + pmulhw %mm6, %mm0 # apply left gain +# pslaw $1, %mm1 # shift left ((s).15 x (s).15 -> (s0).14)) +# pslaw $1, %mm0 + paddsw %mm0, %mm0 # no shift left arithmic, so use add instead + paddsw %mm1, %mm1 + paddsw %mm1, %mm0 # mix + movq %mm0, (%edi) + addl $8, %esi + addl $8, %edi + decl %ecx + jnz .loop_mix # loop + + emms + + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_mul_s16.s b/system/mmx/pixel_mul_s16.s new file mode 100644 index 0000000..240a024 --- /dev/null +++ b/system/mmx/pixel_mul_s16.s @@ -0,0 +1,56 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_mul_s16 +.type pixel_mul_s16,@function + +# simple add +# void pixel_mul_s16(int *left, int *right, int nb_4pixel_vectors) + +pixel_mul_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %edi # left array + movl 12(%ebp), %esi # right array + movl 16(%ebp), %ecx # pixel count + + + .align 16 + .loop_mix: + +# prefetch 128(%esi) + movq (%esi), %mm1 # load right 4 pixels from memory + movq (%edi), %mm0 # load 4 left pixels from memory + pmulhw %mm1, %mm0 # mul + psllw $1, %mm0 # fixed point shift correction + movq %mm0, (%edi) + addl $8, %esi + addl $8, %edi + decl %ecx + jnz .loop_mix # loop + + emms + + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_pack_s16u8.s b/system/mmx/pixel_pack_s16u8.s new file mode 100644 index 0000000..57df702 --- /dev/null +++ b/system/mmx/pixel_pack_s16u8.s @@ -0,0 +1,126 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_pack_s16u8_y +.type pixel_pack_s16u8_y,@function + +# mmx rgba pixel gain +# void pixel_pack_s16u8_y(int *input, int *output, int nb_8pixel_vectors) + +pixel_pack_s16u8_y: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + +# movl 20(%ebp), %edi # int16[4] array of gains +# movq (%edi), %mm7 # get gain array +# psllw $1, %mm7 # adjust for shifted sign bit + + movl 8(%ebp), %esi # input array + movl 12(%ebp), %edi # output array + movl 16(%ebp), %ecx # pixel count + + pxor %mm6, %mm6 + + .align 16 + .loop_pack_y: + +# prefetch 128(%esi) + movq (%esi), %mm0 # load 4 pixels from memory +# pmulhw %mm7, %mm0 # apply gain + movq 8(%esi), %mm1 # load 4 pixels from memory +# pmulhw %mm7, %mm1 # apply gain + +# movq %mm0, %mm2 +# pcmpgtw %mm6, %mm2 # mm2 > 0 ? 0xffff : 0 +# pand %mm2, %mm0 + +# movq %mm1, %mm3 +# pcmpgtw %mm6, %mm3 # mm3 > 0 ? 0xffff : 0 +# pand %mm3, %mm1 + +# psllw $1, %mm0 # shift out sign bit +# psllw $1, %mm1 # shift out sign bit + + psraw $7, %mm0 # shift to lsb + psraw $7, %mm1 # shift to lsb + + packuswb %mm1, %mm0 # pack & saturate to 8bit vector + movq %mm0, (%edi) # store result in memory + + addl $16, %esi # increment source pointer + addl $8, %edi # increment dest pointer + decl %ecx + jnz .loop_pack_y # loop + + emms + + pop %edi + pop %esi + leave + ret + +.globl pixel_pack_s16u8_uv +.type pixel_pack_s16u8_uv,@function + +pixel_pack_s16u8_uv: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + +# movl 20(%ebp), %edi # int16[4] array of gains +# movq (%edi), %mm7 # get gain array + movl 8(%ebp), %esi # pixel array offset + movl 12(%ebp), %edi # nb of elements + movl 16(%ebp), %ecx # pixel count + + pcmpeqw %mm6, %mm6 + psllw $15, %mm6 + movq %mm6, %mm5 + psrlw $8, %mm5 + por %mm5, %mm6 # mm6 <- 8 times 0x80 + + .align 16 + .loop_pack_uv: + +# prefetch 128(%esi) + movq (%esi), %mm0 # load 4 pixels from memory +# pmulhw %mm7, %mm0 # apply gain + movq 8(%esi), %mm1 # load 4 pixels from memory +# pmulhw %mm7, %mm1 # apply gain + + psraw $8, %mm0 # shift to msb + psraw $8, %mm1 + + packsswb %mm1, %mm0 # pack & saturate to 8bit vector + pxor %mm6, %mm0 # flip sign bits + movq %mm0, (%edi) # store result in memory + + addl $16, %esi # increment source pointer + addl $8, %edi # increment dest pointer + decl %ecx + jnz .loop_pack_uv # loop + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_rand_s16.s b/system/mmx/pixel_rand_s16.s new file mode 100644 index 0000000..649400b --- /dev/null +++ b/system/mmx/pixel_rand_s16.s @@ -0,0 +1,76 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_rand_s16 +.type pixel_rand_s16,@function + +# mmx rgba pixel gain +# void pixel_rand_s16(int *dst, nb_4pixel_vectors, short int random_seed[4]) + +pixel_rand_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 16(%ebp), %esi # int16[4] array of random seeds + movl 8(%ebp), %edi # dst array + movl 12(%ebp), %ecx # pixel count + + movq (%esi), %mm6 + + + pcmpeqw %mm3, %mm3 + psrlw $15, %mm3 # get bit mask 4 times 0x0001 + + .align 16 + .loop_rand: + +# prefetch 128(%esi) + + + movq %mm6, %mm4 # get random vector + psrlw $15, %mm4 # get first component + movq %mm6, %mm5 + psrlw $14, %mm5 # get second component + pxor %mm5, %mm4 + movq %mm6, %mm5 + psrlw $12, %mm5 # get third component + pxor %mm5, %mm4 + movq %mm6, %mm5 + psrlw $3, %mm5 # get forth component + pxor %mm5, %mm4 + + psllw $1, %mm6 # shift left original random vector + pand %mm3, %mm4 # isolate new bit + por %mm4, %mm6 # combine into new random vector + + movq %mm6, (%edi) + addl $8, %edi + decl %ecx + jnz .loop_rand # loop + + + movq %mm6, (%esi) # store random seeds + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_randmix_s16.s b/system/mmx/pixel_randmix_s16.s new file mode 100644 index 0000000..44e1702 --- /dev/null +++ b/system/mmx/pixel_randmix_s16.s @@ -0,0 +1,91 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_randmix_s16 +.type pixel_randmix_s16,@function + +# mmx rgba pixel gain +# void pixel_randmix_s16(int *left, int *right, int nb_4pixel_vectors, short int random_seed[4], short int threshold[4]) + +pixel_randmix_s16: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 20(%ebp), %edi # int16[4] array of random seeds + movq (%edi), %mm6 + + movl 24(%ebp), %edi # int16[4] array of thresholds + movq (%edi), %mm7 + + movl 8(%ebp), %edi # left array + movl 12(%ebp), %esi # right array + movl 16(%ebp), %ecx # pixel count + + pcmpeqw %mm3, %mm3 + psrlw $15, %mm3 # get bit mask 4 times 0x0001 + + .align 16 + .loop_randmix: + +# prefetch 128(%esi) + movq (%esi), %mm1 # load right 4 pixels from memory + movq (%edi), %mm0 # load 4 left pixels from memory + + movq %mm6, %mm2 # get random vector + pcmpgtw %mm7, %mm2 # compare random vector with threshold + movq %mm2, %mm5 + + pand %mm0, %mm2 # get left array's components + pandn %mm1, %mm5 # get right array's components + por %mm2, %mm5 + + movq %mm5, (%edi) # store pixels + + movq %mm6, %mm4 # get random vector + psrlw $15, %mm4 # get first component + movq %mm6, %mm5 + psrlw $14, %mm5 # get second component + pxor %mm5, %mm4 + movq %mm6, %mm5 + psrlw $12, %mm5 # get third component + pxor %mm5, %mm4 + movq %mm6, %mm5 + psrlw $3, %mm5 # get forth component + pxor %mm5, %mm4 + + psllw $1, %mm6 # shift left original random vector + pand %mm3, %mm4 # isolate new bit + por %mm4, %mm6 # combine into new random vector + + addl $8, %esi + addl $8, %edi + decl %ecx + jnz .loop_randmix # loop + + + movl 20(%ebp), %edi # int16[4] array of random seeds + movq %mm6, (%edi) # store random seeds + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_s1.s b/system/mmx/pixel_s1.s new file mode 100644 index 0000000..d6bc5ca --- /dev/null +++ b/system/mmx/pixel_s1.s @@ -0,0 +1,201 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + # this file contains ops for binary image processing + # 8x8 bit tile encoded + # low byte = bottom row + # low bit = right column + # %mm7 = scratch reg for all macros + + + # ************ load mask ******************* + # compute bit masks for rows and columns + # %mm7: scratch reg + + # load mask top + .macro ldmt count reg + pcmpeqb \reg, \reg + psllq $(64-(\count<<3)), \reg + .endm + + # load mask bottom + .macro ldmb count reg + pcmpeqb \reg, \reg + psrlq $(64-(\count<<3)), \reg + .endm + + # load mask top and bottom + .macro ldmtb count regt regb + ldmb \count, \regb + ldmt \count, \regt + .endm + + # load mask right + .macro ldmr count reg + pcmpeqb %mm7, %mm7 + psrlw $(16-\count), %mm7 + movq %mm7, \reg + psllq $8, %mm7 + por %mm7, \reg + .endm + + # load mask left + .macro ldml count reg + pcmpeqb %mm7, %mm7 + psllw $(16-\count), %mm7 + movq %mm7, \reg + psrlq $8, %mm7 + por %mm7, \reg + .endm + + # load mask left and right + .macro ldmlr count regl regr + pcmpeqb %mm7, %mm7 + psllw $(16-\count), %mm7 + movq %mm7, \regl + psrlq $8, %mm7 + por %mm7, \regl + movq \regl, \regr + psrlq $(8-\count), \regr + .endm + + # ************* shift square ********** + # shifts a square in reg, fills with zeros + + # shift square top + .macro sst count reg + psllq $(\count<<3), \reg + .endm + + # shift square bottom + .macro ssb count reg + psrlq $(\count<<3), \reg + .endm + + # not tested + # shift square left + .macro ssl count reg + movq \reg, %mm7 + pcmpeqb \reg, \reg + psllw $(16-\count), \reg + psrlw $8, \reg + pandn %mm7, \reg + psllw $(\count), \reg + .endm + + # shift square right + .macro ssr count reg + movq \reg, %mm7 + pcmpeqb \reg, \reg + psrlw $(16-\count), \reg + psllw $8, \reg + pandn %mm7, \reg + psrlw $(\count), \reg + .endm + + + # ********** combine square ************* + # combines 2 squares + + # combine right + .macro csr count regr reg + ssl \count, \reg + ssr (8-\count), \regr + por \regr, \reg + .endm + + # combine left + .macro csl count regl reg + ssr \count, \reg + ssl (8-\count), \regl + por \regl, \reg + .endm + + # combine top + .macro cst count regt reg + ssb \count, \reg + sst (8-\count), \regt + por \regt, \reg + .endm + + + # combine bottom + .macro csb count regb reg + sst \count, \reg + ssb (8-\count), \regb + por \regb, \reg + .endm + + + # ********** load combine square ************* + # loads combined square using mask + + # load combined square left + # mask should be count bits set right (i.e. 0x01) + .macro lcsml count mask source sourcel dstreg + movq \mask, \dstreg + movq \mask, %mm7 + pandn \source, \dstreg + pand \sourcel, %mm7 + psrlq $(\count), \dstreg + psllq $(8-\count), %mm7 + por %mm7, \dstreg + .endm + + + +.globl pixel_test_s1 +.type pixel_test_s1,@function + +# simple add +# void pixel_add_s16(void *dest, void *source, int nb_squares, int spacing) + + + + # + + +pixel_test_s1: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %edi # dest + movl 12(%ebp), %esi # source + movl 16(%ebp), %ecx # count + movl 20(%ebp), %edx # row distance + + ldmr 1, %mm6 + lcsml 1, %mm6, (%esi), 8(%esi), %mm0 + movq %mm0, (%edi) + + +# movq (%esi), %mm0 +# movq 8(%esi), %mm1 +# csl 4, %mm1, %mm0 +# movq %mm0, (%edi) + + emms + + + pop %edi + pop %esi + leave + ret + diff --git a/system/mmx/pixel_unpack_u8s16.s b/system/mmx/pixel_unpack_u8s16.s new file mode 100644 index 0000000..0fc14c2 --- /dev/null +++ b/system/mmx/pixel_unpack_u8s16.s @@ -0,0 +1,113 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +.globl pixel_unpack_u8s16_y +.type pixel_unpack_u8s16_y,@function + +# mmx rgba pixel gain +# void pixel_unpack_u8s16_y(char *input, char *output, int32 nb_pixels_div8) + +pixel_unpack_u8s16_y: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + +# movl 20(%ebp), %edi # int16[4] array of gains +# movq (%edi), %mm7 # get gain array + + movl 8(%ebp), %esi # input uint8 pixel array + movl 12(%ebp), %edi # output sint16 pixel array + movl 16(%ebp), %ecx # nb of elements div 8 + + + .align 16 + .loop_unpack_y: + + movq (%esi), %mm5 # load 8 pixels from memory + pxor %mm0, %mm0 # zero mm0 - mm3 + pxor %mm1, %mm1 + punpcklbw %mm5, %mm0 # unpack 1st 4 pixels + punpckhbw %mm5, %mm1 # unpack 2nd 4 pixles + psrlw $0x1, %mm0 # shift right to clear sign bit 9.7 + psrlw $0x1, %mm1 +# pmulhw %mm7, %mm0 # apply gain +# pmulhw %mm7, %mm1 +# paddsw %mm0, %mm0 # correct factor 2 +# paddsw %mm1, %mm1 + movq %mm0, (%edi) # store + movq %mm1, 8(%edi) + + addl $8, %esi # increment source pointer + addl $16, %edi # increment dest pointer + decl %ecx + jnz .loop_unpack_y # loop + + emms + + pop %edi + pop %esi + leave + ret + +.globl pixel_unpack_u8s16_uv +.type pixel_unpack_u8s16_uv,@function +pixel_unpack_u8s16_uv: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + +# movl 20(%ebp), %edi # int16[4] array of gains +# movq (%edi), %mm7 # get gain array + + movl 8(%ebp), %esi # input uint8 pixel array + movl 12(%ebp), %edi # output sint16 pixel array + movl 16(%ebp), %ecx # nb of elements div 8 + + pcmpeqw %mm6, %mm6 + psllw $15, %mm6 + + .align 16 + .loop_unpack_uv: + + movq (%esi), %mm5 # load 8 pixels from memory + pxor %mm0, %mm0 # zero mm0 - mm3 + pxor %mm1, %mm1 + punpcklbw %mm5, %mm0 # unpack 1st 4 pixels + punpckhbw %mm5, %mm1 # unpack 2nd 4 pixles + pxor %mm6, %mm0 # flip sign bit (Cr and Cb are ofset by 128) + pxor %mm6, %mm1 +# pmulhw %mm7, %mm0 # apply gain +# pmulhw %mm7, %mm1 +# paddsw %mm0, %mm0 # correct factor 2 +# paddsw %mm1, %mm1 + movq %mm0, (%edi) # store + movq %mm1, 8(%edi) + + addl $8, %esi # increment source pointer + addl $16, %edi # increment dest pointer + decl %ecx + jnz .loop_unpack_uv # loop + + emms + + pop %edi + pop %esi + leave + ret + diff --git a/system/pdp.c b/system/pdp.c new file mode 100644 index 0000000..8651971 --- /dev/null +++ b/system/pdp.c @@ -0,0 +1,115 @@ +/* + * Pure Data Packet system implementation: setup code + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#include "pdp.h" +#include + +/* all symbols are C style */ +#ifdef __cplusplus +extern "C" +{ +#endif + + + +/* module setup declarations (all C-style) */ + +/* pdp system / internal stuff */ +void pdp_packet_setup(void); +void pdp_ut_setup(void); +void pdp_queue_setup(void); +void pdp_control_setup(void); + +/* pdp modules */ +void pdp_xv_setup(void); +void pdp_add_setup(void); +void pdp_mul_setup(void); +void pdp_mix_setup(void); +void pdp_randmix_setup(void); +void pdp_qt_setup(void); +void pdp_v4l_setup(void); +void pdp_reg_setup(void); +void pdp_conv_setup(void); +void pdp_bq_setup(void); +void pdp_affine_setup(void); +void pdp_del_setup(void); +void pdp_snap_setup(void); +void pdp_trigger_setup(void); +void pdp_route_setup(void); +void pdp_noise_setup(void); +void pdp_gradient_setup(void); +void pdp_gain_setup(void); +void pdp_grey_setup(void); +void pdp_chrot_setup(void); +void pdp_scope_setup(void); +void pdp_scale_setup(void); +void pdp_zoom_setup(void); + + +/* library setup routine */ +void pdp_setup(void){ + + /* babble */ + post ("PDP: pure data packet"); + +#ifdef PDP_VERSION + fprintf(stderr, "PDP: version " PDP_VERSION "\n"); +#endif + + + /* setup pdp system */ + pdp_packet_setup(); + pdp_queue_setup(); + pdp_control_setup(); + + /* setup utility toolkit */ + pdp_ut_setup(); + + /* setup pdp modules*/ + pdp_add_setup(); + pdp_mul_setup(); + pdp_mix_setup(); + pdp_randmix_setup(); + pdp_xv_setup(); + pdp_qt_setup(); + pdp_v4l_setup(); + pdp_reg_setup(); + pdp_conv_setup(); + pdp_bq_setup(); + pdp_affine_setup(); + pdp_del_setup(); + pdp_snap_setup(); + pdp_trigger_setup(); + pdp_route_setup(); + pdp_noise_setup(); + pdp_gradient_setup(); + pdp_gain_setup(); + pdp_grey_setup(); + pdp_chrot_setup(); + pdp_scope_setup(); + pdp_scale_setup(); + pdp_zoom_setup(); + +} + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_comm.c b/system/pdp_comm.c new file mode 100644 index 0000000..80cbaaa --- /dev/null +++ b/system/pdp_comm.c @@ -0,0 +1,119 @@ +/* + * Pure Data Packet system implementation. + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* this file contains misc communication methods */ + + +#include "pdp.h" +#include "pdp_internals.h" +#include + +/* all symbols are C style */ +#ifdef __cplusplus +extern "C" +{ +#endif + + +/************** packet management and communication convenience functions ************/ + +/* send a packet to an outlet */ +void outlet_pdp(t_outlet *out, int packetid) +{ + t_atom atom[2]; + t_symbol *s = gensym("pdp"); + t_symbol *rro = gensym("register_ro"); + t_symbol *rrw = gensym("register_rw"); + t_symbol *proc = gensym("process"); + + + SETFLOAT(atom+1, (float)packetid); + + SETSYMBOL(atom+0, rro); + outlet_anything(out, s, 2, atom); + + SETSYMBOL(atom+0, rrw); + outlet_anything(out, s, 2, atom); + + /* this is not really necessary, it can be triggered by the rw message */ + SETSYMBOL(atom+0, proc); + outlet_anything(out, s, 1, atom); + +} + + +/* unregister a packet and send it to an outlet */ +void +pdp_pass_if_valid(t_outlet *outlet, int *packet) +{ + if (-1 != *packet){ + pdp_packet_mark_unused(*packet); + outlet_pdp(outlet, *packet); + *packet = -1; + } +} + +void +pdp_replace_if_valid(int *dpacket, int *spacket) +{ + if (-1 != *spacket){ + pdp_packet_mark_unused(*dpacket); + *dpacket = *spacket; + *spacket = -1; + } + +} + + +int +pdp_packet_copy_ro_or_drop(int *dpacket, int spacket) +{ + int drop = 0; + if (*dpacket == -1) *dpacket = pdp_packet_copy_ro(spacket); + else { + /* send a notification there is a dropped packet */ + pdp_control_notify_drop(spacket); + drop = 1; + } + return drop; +} + + +int +pdp_packet_copy_rw_or_drop(int *dpacket, int spacket) +{ + int drop = 0; + if (*dpacket == -1) *dpacket = pdp_packet_copy_rw(spacket); + else { + /* send a notification there is a dropped packet */ + pdp_control_notify_drop(spacket); + drop = 1; + } + return drop; +} + + + + + + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_control.c b/system/pdp_control.c new file mode 100644 index 0000000..a7ee0c7 --- /dev/null +++ b/system/pdp_control.c @@ -0,0 +1,162 @@ +/* + * Pure Data Packet system implementation: control object + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +/* this is an actual pd class that is used for communication with the + pdp framework */ + +#include "pdp.h" +#include "pdp_internals.h" +#include + +/* all symbols are C style */ +#ifdef __cplusplus +extern "C" +{ +#endif + + + +static long dropped_packets; + +static t_class* pdp_control_class; + + +/* pdp control instance data */ + +struct pdp_control_struct; +typedef struct pdp_control_struct +{ + t_object x_obj; + t_outlet *x_outlet0; + struct pdp_control_struct *x_next; + +} t_pdp_control; + +typedef void (t_pdp_control_method_notify)(t_pdp_control *x); + + +static t_pdp_control *pdp_control_list; + +static void pdp_control_info(t_pdp_control *x) +{ +} + +static void pdp_control_thread(t_pdp_control *x, t_floatarg f) +{ + int t = (int)f; + + if (t){ + post("pdp_control: switching on processing in thread"); + pdp_queue_use_thread(1); + } + else { + post("pdp_control: switching off processing in thread"); + pdp_queue_use_thread(0); + } +} + + +static void pdp_control_send_drop_message(t_pdp_control *x) +{ + t_atom atom[1]; + t_symbol *s = gensym("pdp_drop"); + + SETFLOAT(atom+0, (float)dropped_packets); + outlet_anything(x->x_outlet0, s, 1, atom); +} + + +static void pdp_control_free(t_pdp_control *x) +{ + /* remove from linked list */ + t_pdp_control *curr = pdp_control_list; + if (pdp_control_list == x) pdp_control_list = x->x_next; + else while (curr){ + if (curr->x_next == x) { + curr->x_next = x->x_next; + break; + } + else { + curr = curr->x_next; + } + + } +} + + +static void *pdp_control_new(void) +{ + t_pdp_control *x = (t_pdp_control *)pd_new(pdp_control_class); + x->x_outlet0 = outlet_new(&x->x_obj, &s_anything); + + /* add to list */ + x->x_next = pdp_control_list; + pdp_control_list = x; + return x; +} + +/************************* class methods ***************************************/ + + +void pdp_control_setup(void) +{ + + pdp_control_list = 0; + dropped_packets = 0; + + /* setup pd class data */ + pdp_control_class = class_new(gensym("pdp_control"), (t_newmethod)pdp_control_new, + (t_method)pdp_control_free, sizeof(t_pdp_control), 0, A_NULL); + + + class_addmethod(pdp_control_class, (t_method)pdp_control_info, gensym("info"), A_NULL); + class_addmethod(pdp_control_class, (t_method)pdp_control_thread, gensym("thread"), A_DEFFLOAT, A_NULL); +} + + + +void pdp_control_notify_broadcast(t_pdp_control_method_notify *notify) +{ + t_pdp_control *curr = pdp_control_list; + while (curr){ + (*notify)(curr); + curr = curr->x_next; + } +} + + + +/************************* notify class methods *************************/ + +void pdp_control_notify_drop(int packet) +{ + dropped_packets++; + + /* send drop notify to controller class instances */ + pdp_control_notify_broadcast(pdp_control_send_drop_message); + //post("dropped packet"); +} + + + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_imageproc_mmx.c b/system/pdp_imageproc_mmx.c new file mode 100644 index 0000000..2f32c3f --- /dev/null +++ b/system/pdp_imageproc_mmx.c @@ -0,0 +1,319 @@ +/* + * Pure Data Packet. c wrapper for mmx image processing routines. + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +/* this is a c wrapper around platform specific (mmx) code */ +#include +#include "pdp_mmx.h" +#include "pdp_imageproc.h" + +// utility stuff +inline static s16 float2fixed(float f) +{ + if (f > 1) f = 1; + if (f < -1) f = -1; + f *= 0x7fff; + return (s16)f; +} + +inline static void setvec(s16 *v, float f) +{ + s16 a = float2fixed(f); + v[0] = a; + v[1] = a; + v[2] = a; + v[3] = a; +} + + + +// add two images +void pdp_imageproc_add_process(s16 *image, s16 *image2, u32 width, u32 height) +{ + unsigned int totalnbpixels = width * height; + pixel_add_s16(image, image2, totalnbpixels>>2); +} + +// mul two images +void pdp_imageproc_mul_process(s16 *image, s16 *image2, u32 width, u32 height) +{ + unsigned int totalnbpixels = width * height; + pixel_mul_s16(image, image2, totalnbpixels>>2); +} + +// mix 2 images +void *pdp_imageproc_mix_new(void){return malloc(8*sizeof(s16));} +void pdp_imageproc_mix_delete(void *x) {free (x);} +void pdp_imageproc_mix_setleftgain(void *x, float gain){setvec((s16 *)x, gain);} +void pdp_imageproc_mix_setrightgain(void *x, float gain){setvec((s16 *)x + 4, gain);} +void pdp_imageproc_mix_process(void *x, s16 *image, s16 *image2, u32 width, u32 height) +{ + s16 *d = (s16 *)x; + unsigned int totalnbpixels = width * height; + pixel_mix_s16(image, image2, totalnbpixels>>2, d, d+4); +} + + +// random mix 2 images +void *pdp_imageproc_randmix_new(void){return malloc(8*sizeof(s16));} +void pdp_imageproc_randmix_delete(void *x) {free (x);} +void pdp_imageproc_randmix_setthreshold(void *x, float threshold){setvec((s16 *)x, 2*threshold-1);} +void pdp_imageproc_randmix_setseed(void *x, float seed) +{ + s16 *d = (s16 *)x; + srandom((u32)seed); + d[4] = (s16)random(); + d[5] = (s16)random(); + d[6] = (s16)random(); + d[7] = (s16)random(); + +} +void pdp_imageproc_randmix_process(void *x, s16 *image, s16 *image2, u32 width, u32 height) +{ + s16 *d = (s16 *)x; + unsigned int totalnbpixels = width * height; + pixel_randmix_s16(image, image2, totalnbpixels>>2, d+4, d); +} + +// affine transformation (applies gain + adds offset) +void *pdp_imageproc_affine_new(void){return malloc(8*sizeof(s16));} +void pdp_imageproc_affine_delete(void *x){free(x);} +void pdp_imageproc_affine_setgain(void *x, float gain){setvec((s16 *)x, gain);} +void pdp_imageproc_affine_setoffset(void *x, float offset){setvec((s16 *)x+4, offset);} +void pdp_imageproc_affine_process(void *x, s16 *image, u32 width, u32 height) +{ + s16 *d = (s16 *)x; + pixel_affine_s16(image, (width*height)>>2, d, d+4); +} + +// 3x1 or 1x3 in place convolution +// orientation +void *pdp_imageproc_conv_new(void){return(malloc(16*sizeof(s16)));} +void pdp_imageproc_conv_delete(void *x){free(x);} +void pdp_imageproc_conv_setmin1(void *x, float val){setvec((s16 *)x, val);} +void pdp_imageproc_conv_setzero(void *x, float val){setvec((s16 *)x+4, val);} +void pdp_imageproc_conv_setplus1(void *x, float val){setvec((s16 *)x+8, val);} +void pdp_imageproc_conv_setbordercolor(void *x, float val){setvec((s16 *)x+12, val);} +void pdp_imageproc_conv_process(void *x, s16 *image, u32 width, u32 height, u32 orientation, u32 nbp) +{ + s16 *d = (s16 *)x; + u32 i,j; + + if (orientation == PDP_IMAGEPROC_CONV_HORIZONTAL) + { + for(i=0; i>2, d+12, d); + } + + else + { + for (j=0; j>2, d, (u64 *)(d+4)); +} + +// colour rotation for 2 colour planes +void *pdp_imageproc_crot2d_new(void){return malloc(16*sizeof(s16));} +void pdp_imageproc_crot2d_delete(void *x){free(x);} +void pdp_imageproc_crot2d_setmatrix(void *x, float *matrix) +{ + s16 *d = (s16 *)x; + setvec(d, matrix[0]); + setvec(d+4, matrix[1]); + setvec(d+8, matrix[2]); + setvec(d+12, matrix[3]); +} +void pdp_imageproc_crot2d_process(void *x, s16 *image, u32 width, u32 height) +{ + s16 *d = (s16 *)x; + pixel_crot2d_s16(image, width*height >> 2, d); +} + +// biquad and biquad time +void *pdp_imageproc_bq_new(void){return malloc((5+2+2)*4*sizeof(s16));}//5xcoef, 2xstate, 2xsavestate +void pdp_imageproc_bq_delete(void *x){free(x);} +void pdp_imageproc_bq_setcoef(void *x, float *coef) // a0,-a1,-a2,b0,b1,b2,u0,u1 +{ + s16 *d = (s16 *)x; + float ia0 = 1.0f / coef[0]; + + /* all coefs are s1.14 fixed point */ + /* representing values -2 < x < 2 */ + /* so scale down before using the ordinary s0.15 float->fixed routine */ + + ia0 *= 0.5f; + + // coef + setvec(d, ia0*coef[1]); + setvec(d+4, ia0*coef[2]); + setvec(d+8, ia0*coef[3]); + setvec(d+12, ia0*coef[4]); + setvec(d+16, ia0*coef[5]); + + // state to reset too + setvec(d+28, coef[6]); + setvec(d+32, coef[7]); + +} +void pdp_imageproc_bqt_process(void *x, s16 *image, s16 *state0, s16 *state1, u32 width, u32 height) +{ + s16 *d = (s16 *)x; + pixel_biquad_time_s16(image, state0, state1, d, (width*height)>>2); +} + +void pdp_imageproc_bq_process(void *x, s16 *image, u32 width, u32 height, u32 direction, u32 nbp) +{ + s16 *d = (s16 *)x; + unsigned int i,j; + + + + /* VERTICAL */ + + if ((direction & PDP_IMAGEPROC_BIQUAD_TOP2BOTTOM) + && (direction & PDP_IMAGEPROC_BIQUAD_BOTTOM2TOP)){ + + for(i=0; i>2, width, d, d + (5*4)); + pixel_biquad_verbt_s16(image+i, height>>2, width, d, d + (5*4)); + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_TOP2BOTTOM){ + for(i=0; i>2, width, d, d + (5*4)); + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_BOTTOM2TOP){ + for(i=0; i>2, width, d, d + (5*4)); + } + } + } + + /* HORIZONTAL */ + + if ((direction & PDP_IMAGEPROC_BIQUAD_LEFT2RIGHT) + && (direction & PDP_IMAGEPROC_BIQUAD_RIGHT2LEFT)){ + + for(i=0; i<(width*height); i +=(width<<2)){ + for (j=0; j>2, width, d, d + (5*4)); + pixel_biquad_horrl_s16(image+i, width>>2, width, d, d + (5*4)); + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_LEFT2RIGHT){ + for(i=0; i<(width*height); i +=(width<<2)){ + for (j=0; j>2, width, d, d + (5*4)); + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_RIGHT2LEFT){ + for(i=0; i<(width*height); i +=(width<<2)){ + for (j=0; j>2, width, d, d + (5*4)); + } + } + } + +} + +// produce a random image +// note: random number generator can be platform specific +// however, it should be seeded. (same seed produces the same result) +void *pdp_imageproc_random_new(void){return malloc(4*sizeof(s16));} +void pdp_imageproc_random_delete(void *x){free(x);} +void pdp_imageproc_random_setseed(void *x, float seed) +{ + s16 *d = (s16 *)x; + srandom((u32)seed); + d[0] = (s16)random(); + d[1] = (s16)random(); + d[2] = (s16)random(); + d[3] = (s16)random(); + +} +void pdp_imageproc_random_process(void *x, s16 *image, u32 width, u32 height) +{ + s16 *d = (s16 *)x; + unsigned int totalnbpixels = width * height; + pixel_rand_s16(image, totalnbpixels>>2, d); +} + + diff --git a/system/pdp_imageproc_portable.c b/system/pdp_imageproc_portable.c new file mode 100644 index 0000000..60062d6 --- /dev/null +++ b/system/pdp_imageproc_portable.c @@ -0,0 +1,492 @@ +/* + * Pure Data Packet. portable image processing routines. + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + + +#include +#include "pdp_imageproc.h" + +// utility stuff +inline static s32 float2fixed(float f) +{ + if (f > 1) f = 1; + if (f < -1) f = -1; + f *= 0x7fff; + return (s32)f; +} + + + +#define CLAMP16(x) (((x) > 0x7fff) ? 0x7fff : (((x) < -0x7fff) ? -0x7fff : (x))) + +// add two images +void pdp_imageproc_add_process(s16 *image, s16 *image2, u32 width, u32 height) +{ + int a, b; + unsigned int i; + for (i=0; i>15); + } + +} + +// mix 2 images +void *pdp_imageproc_mix_new(void){return malloc(2*sizeof(s32));} +void pdp_imageproc_mix_delete(void *x) {free (x);} +void pdp_imageproc_mix_setleftgain(void *x, float gain) +{ + s32 *d = (s32 *)x; + d[0] = float2fixed(gain); +} +void pdp_imageproc_mix_setrightgain(void *x, float gain) +{ + s32 *d = (s32 *)x; + d[1] = float2fixed(gain); +} +void pdp_imageproc_mix_process(void *x, s16 *image, s16 *image2, u32 width, u32 height) +{ + s32 *d = (s32 *)x; + u32 i; + s32 a,b; + + for(i=0; i> 15; + image[i] = (s16)CLAMP16(a); + } + +} + + +// random mix 2 images +void *pdp_imageproc_randmix_new(void){return malloc(2*sizeof(s32));;} +void pdp_imageproc_randmix_delete(void *x) {free(x);} +void pdp_imageproc_randmix_setthreshold(void *x, float threshold) +{ + s32 *d = (s32 *)x; + if (threshold > 1.0f) threshold = 1.0f; + if (threshold < 0.0f) threshold = 0.0f; + d[0] = float2fixed(threshold); +} +void pdp_imageproc_randmix_setseed(void *x, float seed) +{ + s32 *d = (s32 *)x; + d[1] = float2fixed(seed); +} +void pdp_imageproc_randmix_process(void *x, s16 *image, s16 *image2, u32 width, u32 height) +{ + s32 *d = (s32 *)x; + u32 i; + s16 r; + srandom((u32)d[1]); + + + for(i=0; i> 15; + a += d[1]; + image[i] = (s16)CLAMP16(a); + } +} + +// 3x1 or 1x3 in place convolution +// orientation +void *pdp_imageproc_conv_new(void){return(malloc(4*sizeof(s32)));} +void pdp_imageproc_conv_delete(void *x){free(x);} +void pdp_imageproc_conv_setmin1(void *x, float val) +{ + s32 *d = (s32 *)x; + d[0] = float2fixed(val); +} +void pdp_imageproc_conv_setzero(void *x, float val) +{ + s32 *d = (s32 *)x; + d[1] = float2fixed(val); +} +void pdp_imageproc_conv_setplus1(void *x, float val) +{ + s32 *d = (s32 *)x; + d[2] = float2fixed(val); +} +void pdp_imageproc_conv_setbordercolor(void *x, float val) +{ + s32 *d = (s32 *)x; + d[3] = float2fixed(val); +} + +static inline void pdp_imageproc_conv_scanline(void *x, s16 *data, u32 count, s32 stride) +{ + s32 *d = (s32 *)x; + s32 a,b,c,r; + u32 i; + + a = d[3]; //border + b = data[0]; + c = data[stride]; + + for(i = 0; i < count-2; i++){ + r = a*d[0] + b*d[1] + c*d[2]; + a = data[0]; + b = data[stride]; + c = data[stride<<1]; + data[0] = (s16)CLAMP16(r>>15); + data += stride; + } + r = a*d[0] + b*d[1] + c*d[2]; + a = data[0]; + b = data[stride]; + c = d[3]; //border + data[0] = (s16)CLAMP16(r>>15); + r = a*d[0] + b*d[1] + c*d[2]; + data[stride] = (s16)CLAMP16(r>>15); + +} + +void pdp_imageproc_conv_process(void *x, s16 *image, u32 width, u32 height, u32 orientation, u32 nbp) +{ + s32 *d = (s32 *)x; + u32 i, j; + + if (orientation == PDP_IMAGEPROC_CONV_HORIZONTAL){ + for(i=0; i> d[1])); + } +} + +// colour rotation for 2 colour planes +void *pdp_imageproc_crot2d_new(void){return malloc(4*sizeof(s32));} +void pdp_imageproc_crot2d_delete(void *x){free(x);} +void pdp_imageproc_crot2d_setmatrix(void *x, float *matrix) +{ + s32 *d = (s32 *)x; + d[0] = float2fixed(matrix[0]); + d[1] = float2fixed(matrix[1]); + d[2] = float2fixed(matrix[2]); + d[3] = float2fixed(matrix[3]); + +} +void pdp_imageproc_crot2d_process(void *x, s16 *image, u32 width, u32 height) +{ + s32 *d = (s32 *)x; + u32 i,j; + s32 a1,a2,c1,c2; + + for(i=0, j=width*height; i>= 15; + a2 >>= 15; + + image[i] = (s16)CLAMP16(a1); + image[j] = (s16)CLAMP16(a2); + } +} + +// biquad and biquad time +void *pdp_imageproc_bq_new(void){return malloc((5+2+2)*sizeof(s32));}//5xcoef, 2xstate, 2xsavestate +void pdp_imageproc_bq_delete(void *x){free(x);} +void pdp_imageproc_bq_setcoef(void *x, float *coef) // a0,-a1,-a2,b0,b1,b2,u0,u1 +{ + s32 *d = (s32 *)x; + float ia0 = 1.0f / coef[0]; + + /* all coefs are s1.14 fixed point */ + /* representing values -2 < x < 2 */ + /* so scale down before using the ordinary s0.15 float->fixed routine */ + + ia0 *= 0.5f; + + // coef + d[0] = float2fixed(ia0*coef[1]); // -a1 + d[1] = float2fixed(ia0*coef[2]); // -a2 + d[2] = float2fixed(ia0*coef[3]); // b0 + d[3] = float2fixed(ia0*coef[4]); // b1 + d[4] = float2fixed(ia0*coef[5]); // b2 + + + // state to reset too + d[5] = float2fixed(coef[6]); + d[6] = float2fixed(coef[7]); + +} + +#define A1 d[0] +#define A2 d[1] +#define B0 d[2] +#define B1 d[3] +#define B2 d[4] +/* + # DIRECT FORM II BIQUAD (from pixel_biquad_s16.s) + # + # y[k] = b0 * x[k] + u1[k-1] + # u1[k] = b1 * x[k] + u2[k-1] - a1 * y[k] + # u2[k] = b2 * x[k] - a2 * y[k] +*/ + +/* remark A1 and A2 are already negated) */ + + +static inline void pdp_imageproc_bq_scanline(void *x, s16 *data, u32 count, s32 stride) +{ + + s32 *d = (s32 *)x; + s32 u1,u2, xx, yy; + + u32 i; + + u1 = d[7]; + u2 = d[8]; + + for(i = 0; i < count; i++){ + + xx = (s32)data[0]; + + yy = ((B0 * xx)>>14) + u1; + u1 = ((B1 * xx)>>14) + u2 + ((A1 * yy)>>14); + u2 = ((B2 * xx)>>14) + ((A2 * yy)>>14); + + data[0] = (s16)CLAMP16(yy); + + data += stride; + + } + + d[7] = u1; + d[8] = u2; + +} + +void pdp_imageproc_bqt_process(void *x, s16 *image, s16 *state1, s16 *state2, u32 width, u32 height) +{ + s32 *d = (s32 *)x; + u32 i; + s32 u1, u2, xx, yy; + + for (i=0; i>14) + u1; + u1 = ((B1 * xx)>>14) + u2 + ((A1 * yy)>>14); + u2 = ((B2 * xx)>>14) + ((A2 * yy)>>14); + + image[i] = (s16)CLAMP16(yy); + state1[i] = (s16)CLAMP16(u1); + state2[i] = (s16)CLAMP16(u2); + } + + +} + +void pdp_imageproc_bq_process(void *x, s16 *data, u32 width, u32 height, u32 direction, u32 nbp) +{ + s32 *d = (s32 *)x; + unsigned int i,j, offset; + + /* VERTICAL */ + offset = (height-1)*width; + + if ((direction & PDP_IMAGEPROC_BIQUAD_TOP2BOTTOM) + && (direction & PDP_IMAGEPROC_BIQUAD_BOTTOM2TOP)){ + + for(i=0; iB + pdp_imageproc_bq_scanline(x, data+offset+i, height, -width); //B->T + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_TOP2BOTTOM){ + for(i=0; iB + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_BOTTOM2TOP){ + for(i=0; iT + } + } + } + + /* HORIZONTAL */ + + offset = width-1; + if ((direction & PDP_IMAGEPROC_BIQUAD_LEFT2RIGHT) + && (direction & PDP_IMAGEPROC_BIQUAD_RIGHT2LEFT)){ + + for(i=0; i<(width*height); i += width){ + for (j=0; jR + pdp_imageproc_bq_scanline(x, data+offset+i, width, -1); //R->L + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_LEFT2RIGHT){ + for(i=0; i<(width*height); i += width){ + for (j=0; jR + } + } + } + + else if (direction & PDP_IMAGEPROC_BIQUAD_RIGHT2LEFT){ + for(i=0; i<(width*height); i += width){ + for (j=0; jL + + } + } + } + +} + +// produce a random image +// note: random number generator can be platform specific +// however, it should be seeded. (same seed produces the same result) +void *pdp_imageproc_random_new(void){return malloc(sizeof(s32));} +void pdp_imageproc_random_delete(void *x){free(x);} +void pdp_imageproc_random_setseed(void *x, float seed) +{ + s32 *d = (s32 *)x; + d[0] = float2fixed(seed); +} +void pdp_imageproc_random_process(void *x, s16 *image, u32 width, u32 height) +{ + s32 *d = (s32 *)x; + u32 i; + srandom((u32)d[0]); + for (i=0; i + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* this file contains low level image conversion code + nominated as "the ugliest part of pdp" + some code is mmx, most is not. */ + +#include "pdp_llconv.h" +#include "pdp_mmx.h" + + +/* all symbols are C style */ +#ifdef __cplusplus +extern "C" +{ +#endif + + +#define CLAMP(x) (((x)<0) ? 0 : ((x>255)? 255 : (x))) +#define CLAMP16(x) (((x)<-0x7fff) ? -0x7fff : ((x>0x7fff) ? 0x7fff : (x))) +#define FP(x) ((int)(((float)(x)) * 256.0f)) + + +/* some prototypes for functions defined elsewhere */ +void llconv_yvu_planar_s16u8(short int *src, unsigned char *dst, unsigned int nbpixels); +void llconv_yuv_planar_u8s16(unsigned char* source, short int *dest, int nbpixels); +void llconv_grey_s16u8(short int *src, unsigned char *dst, unsigned int nbpixels); +void llconv_yvu_planar_u8s16(unsigned char* source, short int *dest, int nbpixels); + + +static inline int rgb2y(int r, int g, int b){return (FP(0.257) * r) + (FP(0.504) * g) + (FP(0.098) * b) + FP(16);} +static inline int rgb2v(int r, int g, int b){return (FP(0.439) * r) - (FP(0.368) * g) - (FP(0.071) * b) + FP(128);} +static inline int rgb2u(int r, int g, int b){return -(FP(0.148) * r) - (FP(0.291) * g) + (FP(0.439) * b) + FP(128);} + + +/* "standard" 8 bit conversion routine */ +static void llconv_rgb2yvu(unsigned char* src, unsigned char* dst, int nbpixels) +{ + int r,g,b,y,v,u,i; + for (i=0; i>8); + dst[1] = CLAMP(v>>8); + dst[2] = CLAMP(u>>8); + + src += 3; + dst += 3; + } +} + + + +/* 8 bit rgb to 16 bit planar subsampled yvu */ +static void llconv_rgb2yvu_planar16sub(unsigned char* src, short int* dst, int w, int h) +{ + int r,g,b,y,v,u,i,j,k; + int size = w*h; + + int voffset = size; + int uoffset = size + (size>>2); + + + int loffset = w * 3; + + k=0; + for (j=0; j> 1); + + b = src[k+3]; + g = src[k+4]; + r = src[k+5]; + + y = (FP(0.257) * r) + (FP(0.504) * g) + (FP(0.098) * b) + FP(16); + v += (FP(0.439) * r) - (FP(0.368) * g) - (FP(0.071) * b); + u += -(FP(0.148) * r) - (FP(0.291) * g) + (FP(0.439) * b); + + dst[i+j+1] = CLAMP16(y >> 1); + + + + b = src[loffset + k]; + g = src[loffset + k+1]; + r = src[loffset + k+2]; + + y = (FP(0.257) * r) + (FP(0.504) * g) + (FP(0.098) * b) + FP(16); + v = (FP(0.439) * r) - (FP(0.368) * g) - (FP(0.071) * b); + u = -(FP(0.148) * r) - (FP(0.291) * g) + (FP(0.439) * b); + + dst[w+i+j] = CLAMP16(y >> 1); + + b = src[loffset + k+3]; + g = src[loffset + k+4]; + r = src[loffset + k+5]; + + k += 6; + + y = (FP(0.257) * r) + (FP(0.504) * g) + (FP(0.098) * b) + FP(16); + v += (FP(0.439) * r) - (FP(0.368) * g) - (FP(0.071) * b); + u += -(FP(0.148) * r) - (FP(0.291) * g) + (FP(0.439) * b); + + dst[w+i+j+1] = CLAMP16(y >> 1); + + dst[uoffset+ (i>>1) + (j>>2)] = (CLAMP16(u >> 1)); + dst[voffset+ (i>>1) + (j>>2)] = (CLAMP16(v >> 1)); + } + } +} + +/* these seem to be pretty slow */ + +static void llconv_yvu2rgb(unsigned char* src, unsigned char* dst, int nbpixels) +{ + int r,g,b,y,v,u,i; + for (i=0; i>8); + dst[1] = CLAMP(g>>8); + dst[2] = CLAMP(b>>8); + + src += 3; + dst += 3; + } +} + + + +/* convert yvu to yuyv */ +static void llconv_yvu2yuyv(unsigned char *src, unsigned char *dst, unsigned int nbpixels) +{ + unsigned int y1, y2, u, v, i; + + for (i = 0; i < nbpixels/2; i++){ + + y1 = src[0]; + y2 = src[3]; + v = (src[1] + src[4]) >> 1; + u = (src[2] + src[5]) >> 1; + dst[0] = y1; + dst[1] = u; + dst[2] = y2; + dst[3] = v; + + src += 6; + dst += 4; + + } + +} + + + +/* convert yuvu packed 8 bit unsigned to yv12 planar 16bit signed */ +static void llconv_yuyv_packed_u8s16(unsigned char* ucsource, short int *sidest, unsigned int w, unsigned int h) +{ + unsigned int i, j; + unsigned int *source = (unsigned int *)ucsource; + + unsigned int *dest = (unsigned int *)sidest; + unsigned int uoffset = (w*h)>>1; + unsigned int voffset = (w*h + ((w*h) >> 2)) >> 1; + + for(j=0; j < (h*w)>>1; j +=(w)){ + for(i=0; i< (w>>1); i+=2){ + unsigned int y,u,v; + unsigned int v00, v01, v10, v11; + v00 = source[i+j]; + v01 = source[i+j+1]; + v10 = source[i+j+(w>>1)]; + v11 = source[i+j+(w>>1)+1]; + + // save luma + dest[i+j] = ((v00 & 0x00ff00ff) << 7); + dest[i+j+1] = ((v01 & 0x00ff00ff) << 7); + dest[i+j+(w>>1)] = ((v10 & 0x00ff00ff) << 7); + dest[i+j+(w>>1)+1] = ((v11 & 0x00ff00ff) << 7); + + // compute chroma + + // mask out luma & shift right + v00 = (v00 & 0xff00ff00)>>1; + v01 = (v01 & 0xff00ff00)>>1; + v10 = (v10 & 0xff00ff00)>>1; + v11 = (v11 & 0xff00ff00)>>1; + + // average 2 scan lines + v00 += v10; + v01 += v11; + + // combine + v = (v01 << 16) | (v00 & 0x0000ffff); + u = (v01 & 0xffff0000) | (v00 >> 16); + + // flip sign bits for u,v + u ^= 0x80008000; + v ^= 0x80008000; + + // save chroma + dest[uoffset + (i>>1) + (j>>2)] = u; + dest[voffset + (i>>1) + (j>>2)] = v; + } + } + + +} + +#define CONVERT(x,y) ((x) + ((y)<<16)) + +void pdp_llconv(void *src, int stype, void *dst, int dtype, int w, int h) +{ + int conversion = CONVERT(stype, dtype); + void *tmpbuf; + + switch(CONVERT(stype, dtype)){ + + case CONVERT( RIF_YVU__P411_U8, RIF_YVU__P411_S16 ): + llconv_yvu_planar_u8s16((unsigned char*)src, (short int *)dst, w*h); + break; + + case CONVERT( RIF_YUV__P411_U8, RIF_YVU__P411_S16 ): + llconv_yuv_planar_u8s16((unsigned char*)src, (short int *)dst, w*h); + break; + + case CONVERT( RIF_YUYV_P____U8, RIF_YVU__P411_S16 ): + llconv_yuyv_packed_u8s16((unsigned char*)src, (short int *)dst, w, h); + break; + + case CONVERT( RIF_RGB__P____U8, RIF_YVU__P411_S16 ): + llconv_rgb2yvu_planar16sub((unsigned char*) src, (short int*) dst, w, h); + break; + + case CONVERT( RIF_YVU__P411_S16, RIF_YVU__P411_U8 ): + llconv_yvu_planar_s16u8((short int*)src, (unsigned char*)dst, w*h); + break; + + case CONVERT( RIF_GREY______S16, RIF_GREY______U8 ): + llconv_grey_s16u8((short int*)src, (unsigned char*)dst, w*h); + break; + default: + post("pdp_llconv: WARNING: no conversion routine defined for (%d)->(%d)", stype, dtype); + + } + +} + + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_llconv_mmx.c b/system/pdp_llconv_mmx.c new file mode 100644 index 0000000..8070bac --- /dev/null +++ b/system/pdp_llconv_mmx.c @@ -0,0 +1,55 @@ + +/* + * Pure Data Packet system implementation. : wrapper for mmx low level format conversion code + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#include "pdp_mmx.h" + + + +/* convert greyscale 8 bit unsigned to 16bit signed */ +void llconv_grey_s16u8(short int *src, unsigned char *dst, unsigned int nbpixels) +{ + pixel_pack_s16u8_y(src, dst, nbpixels>>3); +} + +/* convert yvu planar 411 16 bit signed to 8 bit unsigned */ +void llconv_yvu_planar_s16u8(short int *src, unsigned char *dst, unsigned int nbpixels) +{ + pixel_pack_s16u8_y(src, dst, nbpixels>>3); + pixel_pack_s16u8_uv(src + nbpixels, dst + nbpixels, nbpixels>>4); +} + + +/* convert yvu planar 411 8 bit unsigned to yv12 planar 16bit signed */ +void llconv_yvu_planar_u8s16(unsigned char* source, short int *dest, int nbpixels) +{ + pixel_unpack_u8s16_y(source, dest, nbpixels>>3); + pixel_unpack_u8s16_uv(&source[nbpixels], &dest[nbpixels], nbpixels>>4); +} + +/* convert yuv planar 411 8 bit unsigned to yv12 planar 16bit signed */ +void llconv_yuv_planar_u8s16(unsigned char* source, short int *dest, int nbpixels) +{ + pixel_unpack_u8s16_y(source, dest, nbpixels>>3); + pixel_unpack_u8s16_uv(&source[nbpixels], &dest[nbpixels + (nbpixels>>2)], nbpixels>>5); + pixel_unpack_u8s16_uv(&source[nbpixels + (nbpixels>>2)], &dest[nbpixels], nbpixels>>5); +} + diff --git a/system/pdp_llconv_portable.c b/system/pdp_llconv_portable.c new file mode 100644 index 0000000..de65ef5 --- /dev/null +++ b/system/pdp_llconv_portable.c @@ -0,0 +1,81 @@ + +/* + * Pure Data Packet system implementation. : portable low level format conversion code + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#define CLAMP(x) (((x)<0) ? 0 : ((x>255)? 255 : (x))) +#define FP(x) ((int)(((float)(x)) * 256.0f)) + +void pixel_unpack_portable_u8s16_y(unsigned char *src ,short int *dst, unsigned int nbpixels) +{ + unsigned int i; + for (i=0; i>7)); +} + +void pixel_pack_portable_s16u8_uv(short int *src, unsigned char *dst, unsigned int nbpixels) +{ + unsigned int i; + for (i=0; i>8); +} + + +/* convert greyscale 8 bit unsigned to 16bit signed */ +void llconv_grey_s16u8(short int *src, unsigned char *dst, unsigned int nbpixels) +{ + pixel_pack_portable_s16u8_y(src, dst, nbpixels); +} + +/* convert yvu planar 411 16 bit signed to 8 bit unsigned */ +void llconv_yvu_planar_s16u8(short int *src, unsigned char *dst, unsigned int nbpixels) +{ + pixel_pack_portable_s16u8_y(src, dst, nbpixels); + pixel_pack_portable_s16u8_uv(src + nbpixels, dst + nbpixels, nbpixels>>1); + +} + + +/* convert yvu planar 411 8 bit unsigned to yv12 planar 16bit signed */ +void llconv_yvu_planar_u8s16(unsigned char* source, short int *dest, int nbpixels) +{ + pixel_unpack_portable_u8s16_y(source, dest, nbpixels); + pixel_unpack_portable_u8s16_uv(&source[nbpixels], &dest[nbpixels], nbpixels>>1); +} + +/* convert yuv planar 411 8 bit unsigned to yv12 planar 16bit signed */ +void llconv_yuv_planar_u8s16(unsigned char* source, short int *dest, int nbpixels) +{ + pixel_unpack_portable_u8s16_y(source, dest, nbpixels); + pixel_unpack_portable_u8s16_uv(&source[nbpixels], &dest[nbpixels + (nbpixels>>2)], nbpixels>>2); + pixel_unpack_portable_u8s16_uv(&source[nbpixels + (nbpixels>>2)], &dest[nbpixels], nbpixels>>2); +} + + diff --git a/system/pdp_packet.c b/system/pdp_packet.c new file mode 100644 index 0000000..0c0b2c2 --- /dev/null +++ b/system/pdp_packet.c @@ -0,0 +1,239 @@ +/* + * Pure Data Packet system implementation: + * code for allocation/deallocation/copying/... + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#include "pdp.h" +#include + +/* all symbols are C style */ +#ifdef __cplusplus +extern "C" +{ +#endif + +/* this needs to be able to grow dynamically, think about it later */ +#define PDP_OBJECT_ARRAY_SIZE 1024 +static t_pdp* pdp_stack[PDP_OBJECT_ARRAY_SIZE]; + + +/* some global vars */ +static t_symbol* pdp_sym_register_rw; +static t_symbol* pdp_sym_register_ro; +static t_symbol* pdp_sym_process; + + +/* setup methods */ + +void +pdp_packet_setup(void) +{ + bzero(pdp_stack, PDP_OBJECT_ARRAY_SIZE * sizeof(t_pdp *)); + pdp_sym_register_rw = gensym("register_rw"); + pdp_sym_register_ro = gensym("register_ro"); + pdp_sym_process = gensym("process"); +} + +void +pdp_packet_destroy(void) +{ + int i = 0; + /* dealloc all the data in object stack */ + while ((i < PDP_OBJECT_ARRAY_SIZE) && (pdp_stack[i])) free(pdp_stack[i++]); +} + + +/* private pdp_mem methods */ + + +/* public object manips */ + + +/* alloc method: alloc time is linear in the number of used packets */ +/* think about a better (tree) method when this number grows large */ +int +pdp_packet_new(unsigned int datatype, unsigned int datasize /*without header*/) +{ + unsigned int totalsize = datasize + PDP_HEADER_SIZE; + int i = 0; + unsigned int align; + t_pdp* p; + for (i=0; i < PDP_OBJECT_ARRAY_SIZE; i++){ + p = pdp_stack[i]; + /* check if we can reuse this one if it is already allocated */ + if (p) { + /* remark: if p->size >= totalsize we can give away the packet */ + /* but that would lead to unefficient use if we have a lot of packets */ + /* of different sizes */ + if ((p->users == 0) && (p->size == totalsize) && (p->type == datatype)){ + //post("pdp_new_object: can reuse %d", i); + p->users = 1; + return i; + } + else{ + //post("pdp_new_object: can't reuse %d, (%d users)", i, p->users); + //post("size:%d, newsize:%d, type:%d, newtype:%d", p->size, totalsize, p->type, datatype); + } + } + /* allocate new one */ + else { + p = (t_pdp *)malloc(totalsize); + align = ((unsigned int)p) & (PDP_ALIGN - 1); + if (align) post("pdp_new_object: warning data misaligned by %x", align); + pdp_stack[i] = p; + p->type = datatype; + p->size = totalsize; + p->users = 1; + //post("pdp_new_object: allocating new (%d)", i); + return i; + } + } + post("pdp_new_object: WARNING: out of memory"); + + return -1; + +} + + +t_pdp* +pdp_packet_header(int handle) +{ + if ((handle >= 0) && (handle < PDP_OBJECT_ARRAY_SIZE)) return pdp_stack[handle]; + else return 0; +} + +void* +pdp_packet_data(int handle) +{ + if ((handle >= 0) && (handle < PDP_OBJECT_ARRAY_SIZE)) + return (char *)(pdp_stack[handle]) + PDP_HEADER_SIZE; + else return 0; +} + + + +int +pdp_packet_copy_ro(int handle) +{ + int out_handle; + + t_pdp* p; + if ((handle >= 0) + && (handle < PDP_OBJECT_ARRAY_SIZE) + && (p = pdp_stack[handle])){ + /* increase the number of users and return */ + p->users++; + out_handle = handle; + } + else out_handle = -1; + + //post("pdp_copy_ro: outhandle:%d", out_handle); + + return out_handle; +} + +int +pdp_packet_copy_rw(int handle) +{ + int out_handle; + + t_pdp* p; + if ((handle >= 0) + && (handle < PDP_OBJECT_ARRAY_SIZE) + && (p = pdp_stack[handle])){ + /* if there are other users, copy the object otherwize return the same handle */ + if (p->users){ + int new_handle = pdp_packet_new(p->type, p->size - PDP_HEADER_SIZE); + t_pdp* new_p = pdp_packet_header(new_handle); + memcpy(new_p, p, p->size); + new_p->users = 1; + out_handle = new_handle; + } + else { + p->users++; + out_handle = handle; + } + //post("pdp_copy_rw: inhandle:%d outhandle:%d", handle, out_handle); + + } + else out_handle = -1; + + return out_handle; +} + +int +pdp_packet_clone_rw(int handle) +{ + int out_handle; + + t_pdp* p; + if ((handle >= 0) + && (handle < PDP_OBJECT_ARRAY_SIZE) + && (p = pdp_stack[handle])){ + + /* clone the packet header, don't copy the data */ + int new_handle = pdp_packet_new(p->type, p->size - PDP_HEADER_SIZE); + t_pdp* new_p = pdp_packet_header(new_handle); + memcpy(new_p, p, PDP_HEADER_SIZE); + new_p->users = 1; + out_handle = new_handle; + } + + else out_handle = -1; + + return out_handle; +} + +void +pdp_packet_mark_unused(int handle) +{ + t_pdp* p; + if ((handle >= 0) && (handle < PDP_OBJECT_ARRAY_SIZE)){ + if (p = pdp_stack[handle]) { + if (p->users) { + p->users--; + //post("pdp_mark_unused: handle %d, users left %d", handle, p->users); + } + else { + post("pdp_mark_unused: WARNING: handle %d has zero users (duplicate pdp_mark_unused ?)", handle); + } + } + else { + post("pdp_mark_unused: WARNING: invalid handle %d: no associated object", handle); + } + } + + else { + /* -1 is the only invalid handle that doesn't trigger a warning */ + if (handle != -1) post("pdp_mark_unused: WARNING: invalid handle %d: out of bound", handle); + } + + +} + +/* remark. if an owner frees a rw copy, he can still pass it along to clients. +the first copy instruction revives the object. maybe free should not be called free but unregister. +as long as no new_object method is called, or no copy on another object is performed, +the "undead" copy can be revived. this smells a bit, i know...*/ + + + + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_queue.c b/system/pdp_queue.c new file mode 100644 index 0000000..2932728 --- /dev/null +++ b/system/pdp_queue.c @@ -0,0 +1,337 @@ +/* + * Pure Data Packet - processor queue module. + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + + +/* + this is a the processor queue pdp system module + it receives tasks from objects that are schedules to + be computed in another thread. the object is signalled back + when the task is completed. + + this is not a standard pd class. it is a sigleton class + using a standard pd clock to poll for compleded methods on + every scheduler run. this is a hack to do thread synchronization + in a thread unsafe pd. + + */ + +#include "pdp.h" +#include +#include +#include + + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define PDP_QUEUE_SIZE 1024 +#define PDP_QUEUE_DELTIME 1.0f; + + + + +/********************* pdp process queue data *********************/ + +typedef void (*t_pdpmethod)(void *client); + +/* the process queue data record */ +typedef struct process_queue_struct +{ + void *x_owner; /* the object we are dealing with */ + t_pdpmethod x_process; /* the process method */ + t_pdpmethod x_callback; /* the function to be called when finished */ + int *x_queue_id; /* place to store the queue id for task */ +} t_process_queue; + + + +/* clock members */ +static t_clock *pdp_clock; +static double deltime; + +/* some bookkeeping vars */ +static long long ticks; +static long long packets; + +/* queue members */ +static t_process_queue *q; /* queue */ +static int mask; +static int head; /* last entry in queue + 1 */ +static int tail; /* first entry in queque */ +static int curr; /* the object currently processed in other thread */ + +/* pthread vars */ +static pthread_mutex_t mut; +static pthread_cond_t cond_dataready; +static pthread_cond_t cond_processingdone; +static pthread_t thread_id; + +/* synchro pipes */ +static int pipe_fd[2]; + +/* toggle for thread usage */ +static int use_thread; + + + +/* the methods */ +void pdp_queue_wait() +{ + //post("pdp_pq_wait: waiting for pdp_queue_thread to finish processing"); + pthread_mutex_lock(&mut); + while(((curr - head) & mask) != 0){ + + pthread_cond_wait(&cond_processingdone, &mut); + } + pthread_mutex_unlock(&mut); + //post("pdp_pq_wait: pdp_queue_thread has finished processing"); + +} +void pdp_queue_finish(int index) +{ + + if (-1 == index) { + //post("pdp_pq_remove: index == -1"); + return; + } + /* wait for processing thread to finish*/ + pdp_queue_wait(); + + /* invalidate callback at index */ + q[index & mask].x_callback = 0; + q[index & mask].x_queue_id = 0; + +} + +static void pdp_queue_signal_processor(void) +{ + + pthread_mutex_lock(&mut); + //post("signalling process thread"); + pthread_cond_signal(&cond_dataready); + pthread_mutex_unlock(&mut); + //post("signalling process thread done"); + +} + +static void pdp_queue_wait_for_feeder(void) +{ + + + /* only use locking when there is no data */ + if(((curr - head) & mask) == 0){ + pthread_mutex_lock(&mut); + + /* signal processing done */ + //post("pdp_queue_thread: signalling processing is done"); + pthread_cond_signal(&cond_processingdone); + + /* wait until there is an item in the queue */ + while(((curr - head) & mask) == 0){ + //post("waiting for feeder"); + pthread_cond_wait(&cond_dataready, &mut); + //post("waiting for feeder done"); + } + + pthread_mutex_unlock(&mut); + + } +} + +void pdp_queue_add(void *owner, void *process, void *callback, int *queue_id) +{ + int i; + + /* if processing is in not in thread, just call the funcs */ + if (!use_thread){ + //post("pdp_queue_add: calling processing routine directly"); + *queue_id = -1; + ((t_pdpmethod) process)(owner); + ((t_pdpmethod) callback)(owner); + return; + } + + + + /* schedule method in thread queue */ + if (1 == ((tail - head) & mask)) { + post("pdp_queue_add: WARNING: processing queue is full.\n"); + post("pdp_queue_add: WARNING: skipping process method, calling callback directly.\n"); + *queue_id = -1; + ((t_pdpmethod) callback)(owner); + } + + + + i = head & mask; + q[i].x_owner = owner; + q[i].x_process = process; + q[i].x_callback = callback; + q[i].x_queue_id = queue_id; + *queue_id = i; + //post("pdp_queue_add: added method to queue, index %d", i); + + + // increase the packet count + packets++; + + // move head forward + head++; + + pdp_queue_signal_processor(); + +} + + +/* processing thread */ +static void *pdp_queue_thread(void *dummy) +{ + while(1){ + + + /* wait until there is data available */ + pdp_queue_wait_for_feeder(); + + + //post("pdp_queue_thread: processing %d", curr); + + + /* call the process routine */ + (q[curr & mask].x_process)(q[curr & mask].x_owner); + + /* advance */ + curr++; + + + } +} + + +/* call back all the callbacks */ +static void pdp_queue_callback (void) +{ + + /* call callbacks for finished packets */ + while(0 != ((curr - tail) & mask)) + { + int i = tail & mask; + /* invalidate queue id */ + if(q[i].x_queue_id) *q[i].x_queue_id = -1; + /* call callback */ + if(q[i].x_callback) (q[i].x_callback)(q[i].x_owner); + //else post("pdp_pq_tick: callback %d is disabled",i ); + tail++; + } + +} + +/* the clock method */ +static void pdp_queue_tick (void) +{ + /* do work */ + //if (!(ticks % 1000)) post("pdp tick %d", ticks); + + if (!use_thread) return; + + /* call callbacks */ + pdp_queue_callback(); + + /* increase counter */ + ticks++; + + /* set clock for next update */ + clock_delay(pdp_clock, deltime); +} + + +void pdp_queue_use_thread(int t) +{ + /* if thread usage is being disabled, + wait for thread to finish processing first */ + if (t == 0) { + pdp_queue_wait(); + use_thread = 0; + pdp_queue_callback(); + clock_unset(pdp_clock); + } + else { + clock_unset(pdp_clock); + clock_delay(pdp_clock, deltime); + use_thread = 1; + } + +} + +void pdp_queue_setup(void) +{ + pthread_attr_t attr; + + /* setup pdp queue processor object */ + ticks = 0; + deltime = PDP_QUEUE_DELTIME; + + /* setup queue data */ + mask = PDP_QUEUE_SIZE - 1; + head = 0; + tail = 0; + curr = 0; + q = getbytes(PDP_QUEUE_SIZE * sizeof(*q)); + + /* use threads by default */ + use_thread = 1; + + /* setup synchro stuff */ + pthread_mutex_init(&mut, NULL); + pthread_cond_init(&cond_dataready, NULL); + pthread_cond_init(&cond_processingdone, NULL); + + + /* allocate the clock */ + pdp_clock = clock_new(0, (t_method)pdp_queue_tick); + + /* set the clock */ + clock_delay(pdp_clock, 0); + + /* start processing thread */ + + /* glibc doc says SCHED_OTHER is default, + but it seems not to be when initiated from a RT thread + so we explicitly set it here */ + pthread_attr_init (&attr); + //pthread_attr_setschedpolicy(&attr, SCHED_FIFO); + pthread_attr_setschedpolicy(&attr, SCHED_OTHER); + pthread_create(&thread_id, &attr, pdp_queue_thread, (void *)0); + + + +} + + + + + + + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_resample.c b/system/pdp_resample.c new file mode 100644 index 0000000..2b5a9de --- /dev/null +++ b/system/pdp_resample.c @@ -0,0 +1,135 @@ +/* + * Pure Data Packet system file. - image resampling routines + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#include "pdp_resample.h" +#include "pdp.h" + +/* + +efficient bilinear resampling ?? +performance: how to eliminate divides? -> virtual coordinates 2^k x 2^k (conf. opengl) + +i.e. 16 bit virtual coordinates: easy modular addressing + +*/ + +s32 pdp_resample_bilin(s16 *image, s32 width, s32 height, s32 virt_x, s32 virt_y) +{ + + s32 fp_x, fp_y, frac_x, frac_y, f, offset, r_1, r_2; + + virt_x &= 0xffff; + virt_y &= 0xffff; + + fp_x = virt_x * (width - 1); + fp_y = virt_y * (height - 1); + + frac_x = fp_x & (0xffff); + frac_y = fp_y & (0xffff); + + offset = (fp_x >> 16) + (fp_y >> 16) * width; + image += offset; + + f = 0x10000 - frac_x; + + r_1 = ((f * (s32)(image[0]) + frac_x * (s32)(image[1])))>>16; + + image += width; + + r_2 = ((f * (s32)(image[0]) + frac_x * (s32)(image[1])))>>16; + + f = 0x10000 - frac_y; + + return ((f * r_1 + frac_y * r_2)>>16); + +} + + +void pdp_resample_scale_bilin(s16 *src_image, s16 *dst_image, s32 src_w, s32 src_h, s32 dst_w, s32 dst_h) +{ + s32 i,j; + s32 virt_x=0; + s32 virt_y=0; /* virtual coordinates in 30 bit */ + s32 scale_x = 0x40000000 / dst_w; + s32 scale_y = 0x40000000 / dst_h; + + for (j=0; j>14, virt_y>>14); + virt_x += scale_x; + } + virt_x = 0; + virt_y += scale_y; + } + +} + +void pdp_resample_scale_nn(s16 *src_image, s16 *dst_image, s32 src_w, s32 src_h, s32 dst_w, s32 dst_h) +{ + s32 i,j; + s32 x=0; + s32 y=0; + s32 frac_x=0; + s32 frac_y=0; + s32 scale_x = (src_w << 20 ) / dst_w; + s32 scale_y = (src_h << 20 ) / dst_h; + + for (j=0; j> 20; + } + x = 0; + frac_x = 0; + frac_y += scale_y; + y = (frac_y >> 20) * src_w; + } + +} + +void pdp_resample_zoom_tiled_bilin(s16 *src_image, s16 *dst_image, s32 w, s32 h, + float zoom_x, float zoom_y, float center_x_relative, float center_y_relative) +{ + float izx = 1.0f / zoom_x; + float izy = 1.0f / zoom_y; + s32 scale_x = (s32)((float)0x100000 * izx / (float)w); + s32 scale_y = (s32)((float)0x100000 * izy / (float)h); + + s32 top_virt_x = (s32)((1.0f - izx) * (float)0x100000 * center_x_relative); + s32 top_virt_y = (s32)((1.0f - izy) * (float)0x100000 * center_y_relative); + + s32 virt_x = top_virt_x; + s32 virt_y = top_virt_y; + + s32 i,j; + + for (j=0; j>4, virt_y>>4); + virt_x += scale_x; + } + virt_x = top_virt_x; + virt_y += scale_y; + } + +} + diff --git a/system/pdp_type.c b/system/pdp_type.c new file mode 100644 index 0000000..b23b9cd --- /dev/null +++ b/system/pdp_type.c @@ -0,0 +1,143 @@ +/* + * Pure Data Packet system implementation. : code for handling different packet types + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#include "pdp.h" +#include + +/* all symbols are C style */ +#ifdef __cplusplus +extern "C" +{ +#endif + + +/****************** packet type checking methods ********************/ + + +/* check if two packets are allocated and of the same type */ +int pdp_type_compat(int packet0, int packet1) +{ + + t_pdp *header0 = pdp_packet_header(packet0); + t_pdp *header1 = pdp_packet_header(packet1); + + if (!(header1)){ + //post("pdp_type_compat: invalid header packet1"); + return 0; + } + if (!(header0)){ + //post("pdp_type_compat: invalid header packet 0"); + return 0; + } + if (header0->type != header1->type){ + //post("pdp_type_compat: types do not match"); + return 0; + } + + return 1; +} + +/* check if two image packets are allocated and of the same type */ +int pdp_type_compat_image(int packet0, int packet1) +{ + t_pdp *header0 = pdp_packet_header(packet0); + t_pdp *header1 = pdp_packet_header(packet1); + + + if (!(pdp_type_compat(packet0, packet1))) return 0; + if (header0->type != PDP_IMAGE){ + //post("pdp_type_compat_image: not a PDP_IMAGE"); + return 0; + } + if (header0->info.image.encoding != header1->info.image.encoding){ + //post("pdp_type_compat_image: encodings differ"); + return 0; + } + if (header0->info.image.width != header1->info.image.width){ + //post("pdp_type_compat_image: image withs differ"); + return 0; + } + if (header0->info.image.height != header1->info.image.height){ + //post("pdp_type_compat_image: image heights differ"); + return 0; + } + return 1; +} + +/* check if packet is a valid image packet */ +int pdp_type_isvalid_image(int packet) +{ + t_pdp *header = pdp_packet_header(packet); + if (!header) return 0; + if (PDP_IMAGE != header->type) return 0; + if ((PDP_IMAGE_YV12 != header->info.image.encoding) + && (PDP_IMAGE_GREY != header->info.image.encoding)) return 0; + + return 1; + +} + + + +int pdp_packet_new_image_yv12(u32 w, u32 h) +{ + t_pdp *header; + int packet; + + + u32 size = w*h; + u32 totalnbpixels = size + (size >> 1); + u32 packet_size = totalnbpixels << 1; + + packet = pdp_packet_new(PDP_IMAGE, packet_size); + header = pdp_packet_header(packet); + + header->info.image.encoding = PDP_IMAGE_YV12; + header->info.image.width = w; + header->info.image.height = h; + + return packet; +} + +int pdp_packet_new_image_grey(u32 w, u32 h) +{ + t_pdp *header; + int packet; + + + u32 size = w*h; + u32 totalnbpixels = size; + u32 packet_size = totalnbpixels << 1; + + packet = pdp_packet_new(PDP_IMAGE, packet_size); + header = pdp_packet_header(packet); + + header->info.image.encoding = PDP_IMAGE_GREY; + header->info.image.width = w; + header->info.image.height = h; + + return packet; +} + + + + +#ifdef __cplusplus +} +#endif diff --git a/system/pdp_ut.c b/system/pdp_ut.c new file mode 100644 index 0000000..83b4cb0 --- /dev/null +++ b/system/pdp_ut.c @@ -0,0 +1,195 @@ +/* + * Pure Data Packet - Utility toolkit objects. + * Copyright (c) by Tom Schouten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +/* This file contains some small utility pd objects that make working with + pdp objects a lot easier. Mainly as glue to be used in the abstractions + in the distro. */ + +#include "pdp.h" +#include + +/* this object does an add, scale, clip operation */ + +t_class *pdp_ut_addscaleclip_class; + +typedef struct pdp_ut_addscaleclip_struct +{ + t_object x_obj; + t_outlet *x_outlet0; + t_float x_min; + t_float x_max; + t_float x_offset; + t_float x_scale; +} t_pdp_ut_addscaleclip; + + +static void pdp_ut_addscaleclip_float(t_pdp_ut_addscaleclip *x, t_floatarg f) +{ + f += x->x_offset; + f *= x->x_scale; + f = (f < x->x_min) ? x->x_min : f; + f = (f > x->x_max) ? x->x_max : f; + outlet_float(x->x_outlet0, f); +} + +static void pdp_ut_addscaleclip_free(t_pdp_ut_addscaleclip *x){} + +void *pdp_ut_addscaleclip_new(t_floatarg offset, t_floatarg scale, t_floatarg min, t_floatarg max) +{ + t_pdp_ut_addscaleclip *x = (t_pdp_ut_addscaleclip *)pd_new(pdp_ut_addscaleclip_class); + x->x_outlet0 = outlet_new(&x->x_obj, &s_float); + x->x_offset = offset; + x->x_scale = scale; + x->x_min = min; + x->x_max = max; + return (void *)x; +} + +void pdp_ut_addscaleclip_setup(void) +{ + pdp_ut_addscaleclip_class = class_new(gensym("pdp_ut_addscaleclip"), (t_newmethod)pdp_ut_addscaleclip_new, + (t_method)pdp_ut_addscaleclip_free, sizeof(t_pdp_ut_addscaleclip), 0, + A_FLOAT, A_FLOAT, A_FLOAT, A_FLOAT, A_NULL); + class_addfloat(pdp_ut_addscaleclip_class, pdp_ut_addscaleclip_float); +} + + +/* pdp_ut_logmap does a logarithmic parameter mapping [0->1] x -> min(max/min)^x max an add, scale, clip operation */ +/* pdp_ut_logmap_comp does x -> min(max/min)^(1-x) */ +/* pdp_ut_linmap dos x -> min + (max - min * x */ + +t_class *pdp_ut_linmap_class; +t_class *pdp_ut_logmap_class; +t_class *pdp_ut_logmap_comp_class; + +typedef struct pdp_ut_map_struct +{ + t_object x_obj; + t_outlet *x_outlet0; + t_float x_min; + t_float x_max; +} t_pdp_ut_map; + + +static void pdp_ut_logmap_float(t_pdp_ut_map *x, t_floatarg f) +{ + f = (f < 0.0f) ? 0.0f : f; + f = (f > 1.0f) ? 1.0f : f; + + f = x->x_min * pow((x->x_max / x->x_min), f); + + outlet_float(x->x_outlet0, f); +} + +static void pdp_ut_linmap_float(t_pdp_ut_map *x, t_floatarg f) +{ + f = (f < 0.0f) ? 0.0f : f; + f = (f > 1.0f) ? 1.0f : f; + + f = x->x_min + ((x->x_max - x->x_min) * f); + + outlet_float(x->x_outlet0, f); +} + +static void pdp_ut_logmap_comp_float(t_pdp_ut_map *x, t_floatarg f) +{ + f = (f < 0.0f) ? 0.0f : f; + f = (f > 1.0f) ? 1.0f : f; + + f = x->x_min * pow((x->x_max / x->x_min), (1.0f - f)); + + outlet_float(x->x_outlet0, f); +} + +static void pdp_ut_map_free(t_pdp_ut_map *x){} + + +void pdp_ut_map_init(t_pdp_ut_map *x, t_floatarg min, t_floatarg max) +{ + x->x_outlet0 = outlet_new(&x->x_obj, &s_float); + x->x_min = min; + x->x_max = max; +} + +void *pdp_ut_logmap_new(t_floatarg min, t_floatarg max) +{ + t_pdp_ut_map *x = (t_pdp_ut_map *)pd_new(pdp_ut_logmap_class); + pdp_ut_map_init(x, min, max); + return (void *)x; +} + +void *pdp_ut_linmap_new(t_floatarg min, t_floatarg max) +{ + t_pdp_ut_map *x = (t_pdp_ut_map *)pd_new(pdp_ut_linmap_class); + pdp_ut_map_init(x, min, max); + return (void *)x; +} + +void *pdp_ut_logmap_comp_new(t_floatarg min, t_floatarg max) +{ + t_pdp_ut_map *x = (t_pdp_ut_map *)pd_new(pdp_ut_logmap_comp_class); + pdp_ut_map_init(x, min, max); + return (void *)x; +} + +void pdp_ut_logmap_setup(void) +{ + pdp_ut_logmap_class = class_new(gensym("pdp_ut_logmap"), (t_newmethod)pdp_ut_logmap_new, + (t_method)pdp_ut_map_free, sizeof(t_pdp_ut_map), 0, + A_FLOAT, A_FLOAT, A_NULL); + class_addfloat(pdp_ut_logmap_class, pdp_ut_logmap_float); +} + +void pdp_ut_logmap_comp_setup(void) +{ + pdp_ut_logmap_comp_class = class_new(gensym("pdp_ut_logmap_comp"), (t_newmethod)pdp_ut_logmap_comp_new, + (t_method)pdp_ut_map_free, sizeof(t_pdp_ut_map), 0, + A_FLOAT, A_FLOAT, A_NULL); + class_addfloat(pdp_ut_logmap_comp_class, pdp_ut_logmap_comp_float); +} + +void pdp_ut_linmap_setup(void) +{ + pdp_ut_linmap_class = class_new(gensym("pdp_ut_linmap"), (t_newmethod)pdp_ut_linmap_new, + (t_method)pdp_ut_map_free, sizeof(t_pdp_ut_map), 0, + A_FLOAT, A_FLOAT, A_NULL); + class_addfloat(pdp_ut_linmap_class, pdp_ut_linmap_float); +} + + + +#ifdef __cplusplus +extern "C" +{ +#endif + +void pdp_ut_setup(void) +{ + pdp_ut_addscaleclip_setup(); + pdp_ut_logmap_setup(); + pdp_ut_logmap_comp_setup(); + pdp_ut_linmap_setup(); +} + + +#ifdef __cplusplus +} +#endif -- cgit v1.2.1