From 9b8745d5250c9d0b60c9aa5a77f58a3fcddf1076 Mon Sep 17 00:00:00 2001 From: Tom Schouten Date: Tue, 21 Jan 2003 10:27:33 +0000 Subject: This commit was generated by cvs2svn to compensate for changes in r352, which included commits to RCS files with non-trunk default branches. svn path=/trunk/externals/pdp/; revision=353 --- system/mmx/pixel_s1.s | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 system/mmx/pixel_s1.s (limited to 'system/mmx/pixel_s1.s') diff --git a/system/mmx/pixel_s1.s b/system/mmx/pixel_s1.s new file mode 100644 index 0000000..d6bc5ca --- /dev/null +++ b/system/mmx/pixel_s1.s @@ -0,0 +1,201 @@ +# Pure Data Packet mmx routine. +# Copyright (c) by Tom Schouten +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + # this file contains ops for binary image processing + # 8x8 bit tile encoded + # low byte = bottom row + # low bit = right column + # %mm7 = scratch reg for all macros + + + # ************ load mask ******************* + # compute bit masks for rows and columns + # %mm7: scratch reg + + # load mask top + .macro ldmt count reg + pcmpeqb \reg, \reg + psllq $(64-(\count<<3)), \reg + .endm + + # load mask bottom + .macro ldmb count reg + pcmpeqb \reg, \reg + psrlq $(64-(\count<<3)), \reg + .endm + + # load mask top and bottom + .macro ldmtb count regt regb + ldmb \count, \regb + ldmt \count, \regt + .endm + + # load mask right + .macro ldmr count reg + pcmpeqb %mm7, %mm7 + psrlw $(16-\count), %mm7 + movq %mm7, \reg + psllq $8, %mm7 + por %mm7, \reg + .endm + + # load mask left + .macro ldml count reg + pcmpeqb %mm7, %mm7 + psllw $(16-\count), %mm7 + movq %mm7, \reg + psrlq $8, %mm7 + por %mm7, \reg + .endm + + # load mask left and right + .macro ldmlr count regl regr + pcmpeqb %mm7, %mm7 + psllw $(16-\count), %mm7 + movq %mm7, \regl + psrlq $8, %mm7 + por %mm7, \regl + movq \regl, \regr + psrlq $(8-\count), \regr + .endm + + # ************* shift square ********** + # shifts a square in reg, fills with zeros + + # shift square top + .macro sst count reg + psllq $(\count<<3), \reg + .endm + + # shift square bottom + .macro ssb count reg + psrlq $(\count<<3), \reg + .endm + + # not tested + # shift square left + .macro ssl count reg + movq \reg, %mm7 + pcmpeqb \reg, \reg + psllw $(16-\count), \reg + psrlw $8, \reg + pandn %mm7, \reg + psllw $(\count), \reg + .endm + + # shift square right + .macro ssr count reg + movq \reg, %mm7 + pcmpeqb \reg, \reg + psrlw $(16-\count), \reg + psllw $8, \reg + pandn %mm7, \reg + psrlw $(\count), \reg + .endm + + + # ********** combine square ************* + # combines 2 squares + + # combine right + .macro csr count regr reg + ssl \count, \reg + ssr (8-\count), \regr + por \regr, \reg + .endm + + # combine left + .macro csl count regl reg + ssr \count, \reg + ssl (8-\count), \regl + por \regl, \reg + .endm + + # combine top + .macro cst count regt reg + ssb \count, \reg + sst (8-\count), \regt + por \regt, \reg + .endm + + + # combine bottom + .macro csb count regb reg + sst \count, \reg + ssb (8-\count), \regb + por \regb, \reg + .endm + + + # ********** load combine square ************* + # loads combined square using mask + + # load combined square left + # mask should be count bits set right (i.e. 0x01) + .macro lcsml count mask source sourcel dstreg + movq \mask, \dstreg + movq \mask, %mm7 + pandn \source, \dstreg + pand \sourcel, %mm7 + psrlq $(\count), \dstreg + psllq $(8-\count), %mm7 + por %mm7, \dstreg + .endm + + + +.globl pixel_test_s1 +.type pixel_test_s1,@function + +# simple add +# void pixel_add_s16(void *dest, void *source, int nb_squares, int spacing) + + + + # + + +pixel_test_s1: + pushl %ebp + movl %esp, %ebp + push %esi + push %edi + + movl 8(%ebp), %edi # dest + movl 12(%ebp), %esi # source + movl 16(%ebp), %ecx # count + movl 20(%ebp), %edx # row distance + + ldmr 1, %mm6 + lcsml 1, %mm6, (%esi), 8(%esi), %mm0 + movq %mm0, (%edi) + + +# movq (%esi), %mm0 +# movq 8(%esi), %mm1 +# csl 4, %mm1, %mm0 +# movq %mm0, (%edi) + + emms + + + pop %edi + pop %esi + leave + ret + -- cgit v1.2.1