1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
# Pure Data Packet mmx routine.
# Copyright (c) by Tom Schouten <pdp@zzz.kotnet.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# this file contains ops for binary image processing
# 8x8 bit tile encoded
# low byte = bottom row
# low bit = right column
# %mm7 = scratch reg for all macros
# ************ load mask *******************
# compute bit masks for rows and columns
# %mm7: scratch reg
# load mask top
.macro ldmt count reg
pcmpeqb \reg, \reg
psllq $(64-(\count<<3)), \reg
.endm
# load mask bottom
.macro ldmb count reg
pcmpeqb \reg, \reg
psrlq $(64-(\count<<3)), \reg
.endm
# load mask top and bottom
.macro ldmtb count regt regb
ldmb \count, \regb
ldmt \count, \regt
.endm
# load mask right
.macro ldmr count reg
pcmpeqb %mm7, %mm7
psrlw $(16-\count), %mm7
movq %mm7, \reg
psllq $8, %mm7
por %mm7, \reg
.endm
# load mask left
.macro ldml count reg
pcmpeqb %mm7, %mm7
psllw $(16-\count), %mm7
movq %mm7, \reg
psrlq $8, %mm7
por %mm7, \reg
.endm
# load mask left and right
.macro ldmlr count regl regr
pcmpeqb %mm7, %mm7
psllw $(16-\count), %mm7
movq %mm7, \regl
psrlq $8, %mm7
por %mm7, \regl
movq \regl, \regr
psrlq $(8-\count), \regr
.endm
# ************* shift square **********
# shifts a square in reg, fills with zeros
# shift square top
.macro sst count reg
psllq $(\count<<3), \reg
.endm
# shift square bottom
.macro ssb count reg
psrlq $(\count<<3), \reg
.endm
# not tested
# shift square left
.macro ssl count reg
movq \reg, %mm7
pcmpeqb \reg, \reg
psllw $(16-\count), \reg
psrlw $8, \reg
pandn %mm7, \reg
psllw $(\count), \reg
.endm
# shift square right
.macro ssr count reg
movq \reg, %mm7
pcmpeqb \reg, \reg
psrlw $(16-\count), \reg
psllw $8, \reg
pandn %mm7, \reg
psrlw $(\count), \reg
.endm
# ********** combine square *************
# combines 2 squares
# combine right
.macro csr count regr reg
ssl \count, \reg
ssr (8-\count), \regr
por \regr, \reg
.endm
# combine left
.macro csl count regl reg
ssr \count, \reg
ssl (8-\count), \regl
por \regl, \reg
.endm
# combine top
.macro cst count regt reg
ssb \count, \reg
sst (8-\count), \regt
por \regt, \reg
.endm
# combine bottom
.macro csb count regb reg
sst \count, \reg
ssb (8-\count), \regb
por \regb, \reg
.endm
# ********** load combine square *************
# loads combined square using mask
# load combined square left
# mask should be count bits set right (i.e. 0x01)
.macro lcsml count mask source sourcel dstreg
movq \mask, \dstreg
movq \mask, %mm7
pandn \source, \dstreg
pand \sourcel, %mm7
psrlq $(\count), \dstreg
psllq $(8-\count), %mm7
por %mm7, \dstreg
.endm
.globl pixel_test_s1
.type pixel_test_s1,@function
# simple add
# void pixel_add_s16(void *dest, void *source, int nb_squares, int spacing)
#
pixel_test_s1:
pushl %ebp
movl %esp, %ebp
push %esi
push %edi
movl 8(%ebp), %edi # dest
movl 12(%ebp), %esi # source
movl 16(%ebp), %ecx # count
movl 20(%ebp), %edx # row distance
ldmr 1, %mm6
lcsml 1, %mm6, (%esi), 8(%esi), %mm0
movq %mm0, (%edi)
# movq (%esi), %mm0
# movq 8(%esi), %mm1
# csl 4, %mm1, %mm0
# movq %mm0, (%edi)
emms
pop %edi
pop %esi
leave
ret
|