aboutsummaryrefslogtreecommitdiff
path: root/externals/gridflow/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'externals/gridflow/cpu')
-rw-r--r--externals/gridflow/cpu/mmx.rb225
1 files changed, 225 insertions, 0 deletions
diff --git a/externals/gridflow/cpu/mmx.rb b/externals/gridflow/cpu/mmx.rb
new file mode 100644
index 00000000..1a3b15d3
--- /dev/null
+++ b/externals/gridflow/cpu/mmx.rb
@@ -0,0 +1,225 @@
+=begin
+ $Id: mmx.rb,v 1.1 2005-10-04 02:02:14 matju Exp $
+
+ GridFlow
+ Copyright (c) 2001,2002,2003,2004 by Mathieu Bouchard
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ See file ../COPYING for further informations on licensing terms.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+=end
+
+STDOUT.reopen ARGV[0], "w"
+$loader = File.open ARGV[1], "w"
+$count = 0
+$lines = 0
+
+puts "; generated by/for GridFlow 0.8.0"
+$loader.puts "#include \"../base/grid.h.fcs\"\nextern \"C\" {"
+
+# this class is not really used yet (only self.make)
+class AsmFunction
+ def initialize(name)
+ @name = name
+ @label_count = 1
+ end
+ def self.make(name)
+ puts "", "GLOBAL #{name}", "#{name}:"
+ puts "push ebp", "mov ebp,esp", "push esi", "push edi"
+ yield AsmFunction.new(name)
+ puts "pop edi", "pop esi", "leave", "ret", ""
+ end
+ def make_until(*ops)
+ a = @label_count
+ b = @label_count+1
+ @label_count+=2
+ ops[-1]<<" #{@name}_#{b}"
+ puts "#{@name}_#{a}: ", *ops
+ yield
+ puts "jmp #{@name}_#{a}"
+ puts "#{@name}_#{b}:"
+ end
+end
+
+$sizeof = {
+ :uint8 => 1,
+ :int16 => 2,
+ :int32 => 4,
+ :int64 => 8,
+ :float32 => 4,
+ :float64 => 8,
+}
+
+$accum = {
+ :uint8 => "al",
+ :int16 => "ax",
+ :int32 => "eax",
+}
+
+$asm_type = {
+ :uint8 => "byte",
+ :int16 => "word",
+ :int32 => "dword",
+ :int64 => "qword",
+}
+
+# in the following, the opcode "_" means no such thing seems available.
+# also >> for x86 ought to be shr in the uint8 case.
+# btw, i got all of the MMX information from the NASM manual, Appendix B.
+$opcodes = {
+# [--GF--|--x86--|--mmx-et-al----------------------------------------]
+# [ | |-uint8-|-int16-|-int32-|-int64-|-float32-|-float64-]
+ :add => %w[ + add paddb paddw paddd paddq ],
+ :sub => %w[ - sub psubb psubw psubd psubq ],
+ :and => %w[ & and pand pand pand pand ],
+ :xor => %w[ ^ xor pxor pxor pxor pxor ],
+ :or => %w[ | or por por por por ],
+# :max => %w[ max _ pmaxub pmaxsw _ _ ], # not plain MMX !!! (req.Katmai)
+# :min => %w[ min _ pminub pminsw _ _ ], # not plain MMX !!! (req.Katmai)
+# :eq => %w[ == _ pcmpeqb pcmpeqw pcmpeqd _ ],
+# :gt => %w[ > _ pcmpgtb pcmpgtw pcmpgtd _ ],
+# :shl => %w[ << shl _ psllw pslld psllq ], # noncommutative
+# :shr => %w[ >> sar _ psraw psrad _ ], # noncommutative
+# :clipadd => %w[ clip+ _ paddusb paddsw _ _ ], # future use
+# :clipsub => %w[ clip- _ psubusb psubsw _ _ ], # future use
+# :andnot => %w[ &not _ pandn pandn pandn pandn ], # not planned
+}
+
+$opcodes.each {|k,op|
+ op.map! {|x| if x=="_" then nil else x end }
+ STDERR.puts op.inspect
+}
+
+$decls = ""
+$install = ""
+
+def make_fun_map(op,type)
+ s="mmx_#{type}_map_#{op}"
+ size = $sizeof[type]
+ accum = $accum[type]
+ sym = $opcodes[op][0]
+ opcode = $opcodes[op][1]
+ mopcode = $opcodes[op][size+(size<4 ? 1 : 0)]
+ return if not mopcode
+ AsmFunction.make(s) {|a|
+ puts "mov ecx,[ebp+8]", "mov esi,[ebp+12]", "mov eax,[ebp+16]"
+ puts "mov dx,ax", "shl eax,8", "mov al,dl" if size==1
+ puts "mov edx,eax", "shl eax,16", "mov ax,dx" if size<=2
+ puts "push eax", "push eax", "movq mm7,[esp]", "add esp,8"
+ foo = proc {|n|
+ a.make_until("cmp ecx,#{8/size*n}","jb near") {
+ 0.step(n,4) {|k|
+ nn=[n-k,4].min
+ o=(0..3).map{|x| 8*(x+k) }
+ for i in 0...nn do puts "movq mm#{i},[esi+#{o[i]}]" end
+ for i in 0...nn do puts "#{mopcode} mm#{i},mm7" end
+ for i in 0...nn do puts "movq [esi+#{o[i]}],mm#{i}" end
+ }
+ puts "lea esi,[esi+#{8*n}]", "lea ecx,[ecx-#{8/size*n}]"
+ }
+ }
+ foo.call 4
+ foo.call 1
+ a.make_until("test ecx,ecx", "jz") {
+ puts "#{opcode} #{$asm_type[type]} [esi],#{accum}", "lea esi,[esi+#{size}]"
+ puts "dec ecx"
+ }
+ puts "emms"
+ }
+ $decls << "void #{s}(int,#{type}*,#{type});\n"
+ $install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))"
+ $install << "->on_#{type}.op_map = #{s};\n"
+ $count += 1
+end
+
+def make_fun_zip(op,type)
+s="mmx_#{type}_zip_#{op}"
+ size = $sizeof[type]
+ accum = $accum[type]
+ sym = $opcodes[op][0]
+ opcode = $opcodes[op][1]
+ mopcode = $opcodes[op][size+(size<4 ? 1 : 0)]
+ return if not mopcode
+ AsmFunction.make(s) {|a|
+ puts "mov ecx,[ebp+8]", "mov edi,[ebp+12]",
+ "mov esi,[ebp+16]"#, "mov ebx,[ebp+20]"
+ foo = proc {|n|
+ a.make_until("cmp ecx,#{8/size*n}","jb near") {
+ 0.step(n,4) {|k|
+ nn=[n-k,4].min
+ o=(0..3).map{|x| 8*(x+k) }
+ for i in 0...nn do puts "movq mm#{i},[edi+#{o[i]}]" end
+ for i in 0...nn do puts "movq mm#{i+4},[esi+#{o[i]}]" end
+ for i in 0...nn do puts "#{mopcode} mm#{i},mm#{i+4}" end
+ for i in 0...nn do puts "movq [edi+#{o[i]}],mm#{i}" end
+ }
+ #for i in 0...n do puts "movq [ebx+#{8*i}],mm#{i}" end
+ puts "lea edi,[edi+#{8*n}]"
+ puts "lea esi,[esi+#{8*n}]"
+ #puts "lea ebx,[ebx+#{8*n}]"
+ puts "lea ecx,[ecx-#{8/size*n}]"
+ }
+ }
+ foo.call 4
+ foo.call 1
+ a.make_until("test ecx,ecx", "jz") {
+ # requires commutativity ??? fails with shl, shr
+ puts "mov #{accum},[esi]"
+ puts "#{opcode} #{$asm_type[type]} [edi],#{accum}"
+ #puts "mov #{accum},[edi]"
+ #puts "#{opcode} #{accum},[esi]"
+ #puts "mov [ebx],#{accum}"
+ puts "lea edi,[edi+#{size}]"
+ puts "lea esi,[esi+#{size}]"
+ #puts "lea ebx,[ebx+#{size}]"
+ puts "dec ecx"
+ }
+ puts "emms"
+ }
+ #$decls << "void #{s}(int,#{type}*,#{type}*,#{type}*);\n"
+ $decls << "void #{s}(int,#{type}*,#{type}*);\n"
+ $install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))"
+ $install << "->on_#{type}.op_zip = #{s};\n"
+ $count += 1
+end
+
+for op in $opcodes.keys do
+ for type in [:uint8, :int16#, :int32
+ ] do
+ make_fun_map(op,type)
+ make_fun_zip(op,type)
+ end
+end
+
+$loader.puts $decls
+$loader.puts %`
+}; /* extern */
+#include <stdlib.h>
+void startup_mmx_loader () {/*bogus*/}
+void startup_mmx () {
+ if (getenv("NO_MMX")) return;
+ if (EVAL(\"GridFlow.bridge_name\")!=Qnil) gfpost(\"startup_cpu: using MMX optimisations\");
+ #{$install}
+}`
+
+STDERR.puts "automatically generated #{$count} MMX asm functions"
+
+=begin notes:
+CPUID has a bit for detecting MMX
+PACKSSDW PACKSSWB PACKUSWB = saturation-casting
+PCMPxx: Compare Packed Integers
+PMULHW, PMULLW: Multiply Packed _unsigned_ 16-bit Integers, and Store
+PUNPCKxxx: Unpack and Interleave Data
+=end