From 37b6643df2df7d784a31ca73f7bb90dc109c2401 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Thu, 15 Dec 2005 07:26:47 +0000 Subject: removing PDP source (except debian files) before import of PDP 0.12.4 svn path=/trunk/externals/pdp/; revision=4217 --- system/mmx/pixel_resample_s16.s | 314 ---------------------------------------- 1 file changed, 314 deletions(-) delete mode 100644 system/mmx/pixel_resample_s16.s (limited to 'system/mmx/pixel_resample_s16.s') diff --git a/system/mmx/pixel_resample_s16.s b/system/mmx/pixel_resample_s16.s deleted file mode 100644 index 3959f9c..0000000 --- a/system/mmx/pixel_resample_s16.s +++ /dev/null @@ -1,314 +0,0 @@ - - -#interpolation data: -#* 4 vectors: neighbourhood for samples (TL, TR, BL, BR) -#* 2 vectors: fractional part (unsigned) -#* 2 vectors: addresses of pixel blocks - -#coord conversion data: -#1 vector: 32bit splatted address -#1 vector: 16bit splatted w-1 -#1 vector: 16bit splatted h-1 -#1 vector: 16bit splatted w (reuse w-1 with add?) -#1 dword: 32 bit line offset - -#coord generation data: several vectors for parameter update stuff.. - -#coordinate systems: 16 bit virtual coordinates (signed, center relative) -#* 2 vectors: virtual coordinates -#(evt tussenstap + conversie naar 16 bit virtual) - - -#step 1: generate virtual coords - - -#step 2: virtual coords -> block adresses + fractional adresses -#* mulhigh: real coords (x,y) (center relative) -#* add center -> unsigned (top left relative) -#* mullow: fractional part (x_frac, y_frac) -#* mulhigh, mullow, pack 32bit: y_offset -#* pack 32bit: x_offset -#* add, shift, add start address: real addresses - - -#step3: data fetch using generated addresses: -# this step would be much simpler in 4x16bit rgba. life's a bitch.. - -#step4: billinear interpolation - -#stat5: store - - - - # this can be simplified by doing 32 bit unaligned moves - # and vector unpacking on the data - - - - # cooked image data structure - # pixel environment temp storage - TL1 = 0x00 - TL2 = 0x02 - TL3 = 0x04 - TL4 = 0x06 - TR1 = 0x08 - TR2 = 0x0A - TR3 = 0x0C - TR4 = 0x0E - BL1 = 0x10 - BL2 = 0x12 - BL3 = 0x14 - BL4 = 0x16 - BR1 = 0x18 - BR2 = 0x1A - BR3 = 0x1C - BR4 = 0x1E - # addresses of pixel blocks - ADDRESS1 = 0x20 - ADDRESS2 = 0x24 - ADDRESS3 = 0x28 - ADDRESS4 = 0x2C - - # second env + address buffer (testing: not used) - SECONDBUFFER = 0x30 - - # 32bit splatted bitmap address - V2PLANEADDRESS = 0x60 - # 16bit splatted image constants - V4TWOWIDTHM1 = 0x68 - V4TWOHEIGHTM1 = 0x70 - V4LINEOFFSET = 0x78 - # data struct size - RESAMPLEDATASIZE = 0x80 - - - - # interpolation routine - # input: %mm0, %mm1 4 x 16bit unsigned top left relative virtual x and y coordinates - # %esi: temp & algo data structure - -getpixelsbilin: psrlw $1, %mm0 # convert to range 0->0x7fff [0,0.5[ - psrlw $1, %mm1 - movq %mm0, %mm2 - movq %mm1, %mm3 - movq V4TWOWIDTHM1(%esi), %mm4 # 2 * (width - 1) - movq V4TWOHEIGHTM1(%esi), %mm5 # 2 * (height - 1) - pmulhw %mm5, %mm3 # mm3 == y coord (topleft relative) - pmulhw %mm4, %mm2 # mm2 == x coord (topleft relative) - pmullw %mm5, %mm1 # mm1 == y frac (unsigned) - pmullw %mm4, %mm0 # mm0 == x frac (unsigned) - - movq %mm3, %mm5 # copy y coord - pmullw V4LINEOFFSET(%esi), %mm3 # low part of line offset - pmulhw V4LINEOFFSET(%esi), %mm5 # high part of line offset - - movq %mm2, %mm7 # copy x coord vector - pxor %mm4, %mm4 - punpcklwd %mm4, %mm2 # low part in %mm2 - punpckhwd %mm4, %mm7 # hight part in %mm7 - - movq %mm3, %mm6 # copy - punpcklwd %mm5, %mm3 # unpack low part in %mm3 - punpckhwd %mm5, %mm6 # high part int %mm6 - - paddd %mm2, %mm3 - paddd %mm7, %mm6 - pslld $1, %mm3 # convert to word adresses - pslld $1, %mm6 - - paddd V2PLANEADDRESS(%esi), %mm3 # add pixel plane address - paddd V2PLANEADDRESS(%esi), %mm6 - - movq %mm3, ADDRESS1(%esi) # store adresses - movq %mm6, ADDRESS3(%esi) - - pcmpeqw %mm2, %mm2 # all ones - movq %mm0, %mm4 # copy x frac - movq %mm1, %mm5 # copy y frac - pxor %mm2, %mm4 # compute compliment (approx negative) - pxor %mm2, %mm5 - - psrlw $1, %mm0 # shift right (0.5 * (frac x) - psrlw $1, %mm1 # shift right (0.5 * (frac y) - psrlw $1, %mm4 # shift right (0.5 * (1 - frac x) - psrlw $1, %mm5 # shift right (0.5 * (1 - frac y) - - movq %mm0, %mm2 # copy of frac x - movq %mm4, %mm3 # copy of (1-frac x) - # fetch data - - #jmp skipfetch # seems the fetch is the real killer. try to optimize this - # using 32 bit accesses & shifts - - # the src image data struct is padded to the cooked data struct - movl RESAMPLEDATASIZE(%esi), %edi - shll $1, %edi - - movl ADDRESS1(%esi), %ecx - movl ADDRESS2(%esi), %edx - - movw (%ecx), %ax - movw (%edx), %bx - movw %ax, TL1(%esi) - movw %bx, TL2(%esi) - movw 2(%ecx), %ax - movw 2(%edx), %bx - movw %ax, TR1(%esi) - movw %bx, TR2(%esi) - - addl %edi, %ecx - addl %edi, %edx - - movw (%ecx), %ax - movw (%edx), %bx - movw %ax, BL1(%esi) - movw %bx, BL2(%esi) - movw 2(%ecx), %ax - movw 2(%edx), %bx - movw %ax, BR1(%esi) - movw %bx, BR2(%esi) - - - movl ADDRESS3(%esi), %ecx - movl ADDRESS4(%esi), %edx - - - movw (%ecx), %ax - movw (%edx), %bx - movw %ax, TL3(%esi) - movw %bx, TL4(%esi) - movw 2(%ecx), %ax - movw 2(%edx), %bx - movw %ax, TR3(%esi) - movw %bx, TR4(%esi) - - addl %edi, %ecx - addl %edi, %edx - - movw (%ecx), %ax - movw (%edx), %bx - movw %ax, BL3(%esi) - movw %bx, BL4(%esi) - movw 2(%ecx), %ax - movw 2(%edx), %bx - movw %ax, BR3(%esi) - movw %bx, BR4(%esi) - - -skipfetch: - pmulhw TL1(%esi), %mm4 # bilin interpolation - pmulhw TR1(%esi), %mm0 - pmulhw BL1(%esi), %mm3 - pmulhw BR1(%esi), %mm2 - - - paddw %mm4, %mm0 - paddw %mm3, %mm2 - - pmulhw %mm5, %mm0 - pmulhw %mm1, %mm2 - - paddw %mm2, %mm0 - psllw $2, %mm0 # compensate for gain reduction - - ret - - - // linear mapping data struct - ROWSTATEX = 0x0 - ROWSTATEY = 0x8 - COLSTATEX = 0x10 - COLSTATEY = 0x18 - ROWINCX = 0x20 - ROWINCY = 0x28 - COLINCX = 0x30 - COLINCY = 0x38 - - // image data struct - LINEOFFSET = 0x0 - IMAGEADDRESS = 0x4 - WIDTH = 0x8 - HEIGHT = 0xC - IMAGEDATASIZE = 0x10 - - - -# pixel_resample_linmap_s16(void *x) -.globl pixel_resample_linmap_s16 -.type pixel_resample_linmap_s16,@function - - SOURCEIMAGE = RESAMPLEDATASIZE - DESTIMAGE = SOURCEIMAGE + IMAGEDATASIZE - LINMAPDATA = DESTIMAGE + IMAGEDATASIZE - -pixel_resample_linmap_s16: - pushl %ebp - movl %esp, %ebp - pushl %esi - pushl %edi - pushl %ebx - - - movl 8(%ebp), %esi # get data struct - movl DESTIMAGE+HEIGHT(%esi), %edx # image height - movl DESTIMAGE+IMAGEADDRESS(%esi), %edi # dest image address - movl DESTIMAGE+WIDTH(%esi), %ecx # image width - shrl $2, %ecx # vector count - .align 16 - -linmap_looprow: - movq LINMAPDATA+ROWSTATEX(%esi), %mm0 # get current coordinates - movq LINMAPDATA+ROWSTATEY(%esi), %mm1 - -linmap_loopcol: - movq %mm0, %mm4 # copy - movq %mm1, %mm5 - paddd LINMAPDATA+ROWINCX(%esi), %mm4 # increment - paddd LINMAPDATA+ROWINCY(%esi), %mm5 - movq %mm4, %mm6 # copy - movq %mm5, %mm7 - paddd LINMAPDATA+ROWINCX(%esi), %mm6 # increment - paddd LINMAPDATA+ROWINCY(%esi), %mm7 - movq %mm6, LINMAPDATA+ROWSTATEX(%esi) # store next state - movq %mm7, LINMAPDATA+ROWSTATEY(%esi) - - psrad $16, %mm0 # round to 16 bit - psrad $16, %mm1 - psrad $16, %mm4 - psrad $16, %mm5 - packssdw %mm4, %mm0 # pack new coordinates - packssdw %mm5, %mm1 - - push %ecx - push %edx - push %edi - - call getpixelsbilin # do interpolation - - pop %edi - pop %edx - pop %ecx - movq %mm0, (%edi) # store 4 pixels - addl $0x8, %edi # point to next 4 pixels - decl %ecx # dec row counter - jnz linmap_looprow - - movq LINMAPDATA+COLSTATEX(%esi), %mm0 # get column state vector - movq LINMAPDATA+COLSTATEY(%esi), %mm1 - movl DESTIMAGE+WIDTH(%esi), %ecx # image width - shrl $2, %ecx # vector count - paddd LINMAPDATA+COLINCX(%esi), %mm0 # increment - paddd LINMAPDATA+COLINCY(%esi), %mm1 - movq %mm0, LINMAPDATA+COLSTATEX(%esi) # store - movq %mm1, LINMAPDATA+COLSTATEY(%esi) - decl %edx # dec column counter - jnz linmap_loopcol - - emms - popl %ebx - popl %edi - popl %esi - leave - ret - - -- cgit v1.2.1