aboutsummaryrefslogtreecommitdiff
path: root/system/mmx/pixel_s1.s
diff options
context:
space:
mode:
Diffstat (limited to 'system/mmx/pixel_s1.s')
-rw-r--r--system/mmx/pixel_s1.s201
1 files changed, 201 insertions, 0 deletions
diff --git a/system/mmx/pixel_s1.s b/system/mmx/pixel_s1.s
new file mode 100644
index 0000000..d6bc5ca
--- /dev/null
+++ b/system/mmx/pixel_s1.s
@@ -0,0 +1,201 @@
+# Pure Data Packet mmx routine.
+# Copyright (c) by Tom Schouten <pdp@zzz.kotnet.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+ # this file contains ops for binary image processing
+ # 8x8 bit tile encoded
+ # low byte = bottom row
+ # low bit = right column
+ # %mm7 = scratch reg for all macros
+
+
+ # ************ load mask *******************
+ # compute bit masks for rows and columns
+ # %mm7: scratch reg
+
+ # load mask top
+ .macro ldmt count reg
+ pcmpeqb \reg, \reg
+ psllq $(64-(\count<<3)), \reg
+ .endm
+
+ # load mask bottom
+ .macro ldmb count reg
+ pcmpeqb \reg, \reg
+ psrlq $(64-(\count<<3)), \reg
+ .endm
+
+ # load mask top and bottom
+ .macro ldmtb count regt regb
+ ldmb \count, \regb
+ ldmt \count, \regt
+ .endm
+
+ # load mask right
+ .macro ldmr count reg
+ pcmpeqb %mm7, %mm7
+ psrlw $(16-\count), %mm7
+ movq %mm7, \reg
+ psllq $8, %mm7
+ por %mm7, \reg
+ .endm
+
+ # load mask left
+ .macro ldml count reg
+ pcmpeqb %mm7, %mm7
+ psllw $(16-\count), %mm7
+ movq %mm7, \reg
+ psrlq $8, %mm7
+ por %mm7, \reg
+ .endm
+
+ # load mask left and right
+ .macro ldmlr count regl regr
+ pcmpeqb %mm7, %mm7
+ psllw $(16-\count), %mm7
+ movq %mm7, \regl
+ psrlq $8, %mm7
+ por %mm7, \regl
+ movq \regl, \regr
+ psrlq $(8-\count), \regr
+ .endm
+
+ # ************* shift square **********
+ # shifts a square in reg, fills with zeros
+
+ # shift square top
+ .macro sst count reg
+ psllq $(\count<<3), \reg
+ .endm
+
+ # shift square bottom
+ .macro ssb count reg
+ psrlq $(\count<<3), \reg
+ .endm
+
+ # not tested
+ # shift square left
+ .macro ssl count reg
+ movq \reg, %mm7
+ pcmpeqb \reg, \reg
+ psllw $(16-\count), \reg
+ psrlw $8, \reg
+ pandn %mm7, \reg
+ psllw $(\count), \reg
+ .endm
+
+ # shift square right
+ .macro ssr count reg
+ movq \reg, %mm7
+ pcmpeqb \reg, \reg
+ psrlw $(16-\count), \reg
+ psllw $8, \reg
+ pandn %mm7, \reg
+ psrlw $(\count), \reg
+ .endm
+
+
+ # ********** combine square *************
+ # combines 2 squares
+
+ # combine right
+ .macro csr count regr reg
+ ssl \count, \reg
+ ssr (8-\count), \regr
+ por \regr, \reg
+ .endm
+
+ # combine left
+ .macro csl count regl reg
+ ssr \count, \reg
+ ssl (8-\count), \regl
+ por \regl, \reg
+ .endm
+
+ # combine top
+ .macro cst count regt reg
+ ssb \count, \reg
+ sst (8-\count), \regt
+ por \regt, \reg
+ .endm
+
+
+ # combine bottom
+ .macro csb count regb reg
+ sst \count, \reg
+ ssb (8-\count), \regb
+ por \regb, \reg
+ .endm
+
+
+ # ********** load combine square *************
+ # loads combined square using mask
+
+ # load combined square left
+ # mask should be count bits set right (i.e. 0x01)
+ .macro lcsml count mask source sourcel dstreg
+ movq \mask, \dstreg
+ movq \mask, %mm7
+ pandn \source, \dstreg
+ pand \sourcel, %mm7
+ psrlq $(\count), \dstreg
+ psllq $(8-\count), %mm7
+ por %mm7, \dstreg
+ .endm
+
+
+
+.globl pixel_test_s1
+.type pixel_test_s1,@function
+
+# simple add
+# void pixel_add_s16(void *dest, void *source, int nb_squares, int spacing)
+
+
+
+ #
+
+
+pixel_test_s1:
+ pushl %ebp
+ movl %esp, %ebp
+ push %esi
+ push %edi
+
+ movl 8(%ebp), %edi # dest
+ movl 12(%ebp), %esi # source
+ movl 16(%ebp), %ecx # count
+ movl 20(%ebp), %edx # row distance
+
+ ldmr 1, %mm6
+ lcsml 1, %mm6, (%esi), 8(%esi), %mm0
+ movq %mm0, (%edi)
+
+
+# movq (%esi), %mm0
+# movq 8(%esi), %mm1
+# csl 4, %mm1, %mm0
+# movq %mm0, (%edi)
+
+ emms
+
+
+ pop %edi
+ pop %esi
+ leave
+ ret
+