# Pure Data Packet mmx routine. # Copyright (c) by Tom Schouten <tom@zwizwa.be> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # this file contains ops for binary image processing # 8x8 bit tile encoded # low byte = bottom row # low bit = right column # %mm7 = scratch reg for all macros # ************ load mask ******************* # compute bit masks for rows and columns # %mm7: scratch reg # load mask top .macro ldmt count reg pcmpeqb \reg, \reg psllq $(64-(\count<<3)), \reg .endm # load mask bottom .macro ldmb count reg pcmpeqb \reg, \reg psrlq $(64-(\count<<3)), \reg .endm # load mask top and bottom .macro ldmtb count regt regb ldmb \count, \regb ldmt \count, \regt .endm # load mask right .macro ldmr count reg pcmpeqb %mm7, %mm7 psrlw $(16-\count), %mm7 movq %mm7, \reg psllq $8, %mm7 por %mm7, \reg .endm # load mask left .macro ldml count reg pcmpeqb %mm7, %mm7 psllw $(16-\count), %mm7 movq %mm7, \reg psrlq $8, %mm7 por %mm7, \reg .endm # load mask left and right .macro ldmlr count regl regr pcmpeqb %mm7, %mm7 psllw $(16-\count), %mm7 movq %mm7, \regl psrlq $8, %mm7 por %mm7, \regl movq \regl, \regr psrlq $(8-\count), \regr .endm # ************* shift square ********** # shifts a square in reg, fills with zeros # shift square top .macro sst count reg psllq $(\count<<3), \reg .endm # shift square bottom .macro ssb count reg psrlq $(\count<<3), \reg .endm # not tested # shift square left .macro ssl count reg movq \reg, %mm7 pcmpeqb \reg, \reg psllw $(16-\count), \reg psrlw $8, \reg pandn %mm7, \reg psllw $(\count), \reg .endm # shift square right .macro ssr count reg movq \reg, %mm7 pcmpeqb \reg, \reg psrlw $(16-\count), \reg psllw $8, \reg pandn %mm7, \reg psrlw $(\count), \reg .endm # ********** combine square ************* # combines 2 squares # combine right .macro csr count regr reg ssl \count, \reg ssr (8-\count), \regr por \regr, \reg .endm # combine left .macro csl count regl reg ssr \count, \reg ssl (8-\count), \regl por \regl, \reg .endm # combine top .macro cst count regt reg ssb \count, \reg sst (8-\count), \regt por \regt, \reg .endm # combine bottom .macro csb count regb reg sst \count, \reg ssb (8-\count), \regb por \regb, \reg .endm # ********** load combine square ************* # loads combined square using mask # load combined square left # mask should be count bits set right (i.e. 0x01) .macro lcsml count mask source sourcel dstreg movq \mask, \dstreg movq \mask, %mm7 pandn \source, \dstreg pand \sourcel, %mm7 psrlq $(\count), \dstreg psllq $(8-\count), %mm7 por %mm7, \dstreg .endm .globl pixel_test_s1 .type pixel_test_s1,@function # simple add # void pixel_add_s16(void *dest, void *source, int nb_squares, int spacing) # pixel_test_s1: pushl %ebp movl %esp, %ebp push %esi push %edi movl 8(%ebp), %edi # dest movl 12(%ebp), %esi # source movl 16(%ebp), %ecx # count movl 20(%ebp), %edx # row distance ldmr 1, %mm6 lcsml 1, %mm6, (%esi), 8(%esi), %mm0 movq %mm0, (%edi) # movq (%esi), %mm0 # movq 8(%esi), %mm1 # csl 4, %mm1, %mm0 # movq %mm0, (%edi) emms pop %edi pop %esi leave ret