1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# Pure Data Packet mmx routine.
# Copyright (c) by Tom Schouten <tom@zwizwa.be>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
.globl pixel_crot3d_s16
.type pixel_crot3d_s16,@function
# 3 dimensional colour space rotation
# 3x3 matrix is column encoded, each coefficient is a 4x16 bit fixed point vector
# void pixel_crot3d_s16(int *buf, int nb_4pixel_vectors_per_plane, short int *matrix)
pixel_crot3d_s16:
pushl %ebp
movl %esp, %ebp
push %esi
push %edi
movl 8(%ebp), %esi # input array
movl 12(%ebp), %ecx # pixel count
movl 16(%ebp), %edi # rotation matrix
movl %ecx, %edx
shll $3, %edx # %edx = plane spacing
.align 16
.loop_crot3d:
movq (%esi), %mm0 # get 1st component
movq (%esi,%edx,1), %mm6 # get 2nd component
movq (%esi,%edx,2), %mm7 # get 3rd component
movq %mm0, %mm1 # copy 1st component
movq %mm0, %mm2
pmulhw (%edi), %mm0 # mul first column
pmulhw 8(%edi), %mm1
pmulhw 16(%edi), %mm2
movq %mm6, %mm5 # copy 2nd component
movq %mm6, %mm3
pmulhw 24(%edi), %mm6 # mul second column
pmulhw 32(%edi), %mm5
pmulhw 40(%edi), %mm3
paddsw %mm6, %mm0 # accumulate
paddsw %mm5, %mm1
paddsw %mm3, %mm2
movq %mm7, %mm4 # copy 3rd component
movq %mm7, %mm6
pmulhw 48(%edi), %mm4 # mul third column
pmulhw 56(%edi), %mm6
pmulhw 64(%edi), %mm7
paddsw %mm4, %mm0 # accumulate
paddsw %mm6, %mm1
paddsw %mm7, %mm2
paddsw %mm0, %mm0 # double (fixed point normalization)
paddsw %mm1, %mm1
paddsw %mm2, %mm2
movq %mm0, (%esi) # store
movq %mm1, (%esi, %edx, 1)
movq %mm2, (%esi, %edx, 2)
addl $8, %esi # increment source pointer
decl %ecx
jnz .loop_crot3d # loop
emms
pop %edi
pop %esi
leave
ret
.globl pixel_crot2d_s16
.type pixel_crot2d_s16,@function
# 2 dimensional colour space rotation
# 2x2 matrix is column encoded, each coefficient is a 4x16 bit fixed point vector
# void pixel_crot2d_s16(int *buf, int nb_4pixel_vectors_per_plane, short int *matrix)
pixel_crot2d_s16:
pushl %ebp
movl %esp, %ebp
push %esi
push %edi
movl 8(%ebp), %esi # input array
movl 12(%ebp), %ecx # pixel count
movl 16(%ebp), %edi # rotation matrix
movl %ecx, %edx
shll $3, %edx # %edx = plane spacing
.align 16
.loop_crot2d:
movq (%esi), %mm0 # get 1st component
movq (%esi,%edx,1), %mm2 # get 2nd component
movq %mm0, %mm1 # copy 1st component
movq %mm2, %mm3 # copy 2nd component
pmulhw (%edi), %mm0 # mul first column
pmulhw 8(%edi), %mm1
pmulhw 16(%edi), %mm2 # mul second column
pmulhw 24(%edi), %mm3
paddsw %mm2, %mm0 # accumulate
paddsw %mm3, %mm1
paddsw %mm0, %mm0 # fixed point gain correction
paddsw %mm1, %mm1
movq %mm0, (%esi) # store
movq %mm1, (%esi, %edx, 1)
addl $8, %esi # increment source pointer
decl %ecx
jnz .loop_crot2d # loop
emms
pop %edi
pop %esi
leave
ret
|