diff options
Diffstat (limited to 'externals/gridflow/cpu')
-rw-r--r-- | externals/gridflow/cpu/mmx.rb | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/externals/gridflow/cpu/mmx.rb b/externals/gridflow/cpu/mmx.rb new file mode 100644 index 00000000..1a3b15d3 --- /dev/null +++ b/externals/gridflow/cpu/mmx.rb @@ -0,0 +1,225 @@ +=begin + $Id: mmx.rb,v 1.1 2005-10-04 02:02:14 matju Exp $ + + GridFlow + Copyright (c) 2001,2002,2003,2004 by Mathieu Bouchard + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + See file ../COPYING for further informations on licensing terms. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +=end + +STDOUT.reopen ARGV[0], "w" +$loader = File.open ARGV[1], "w" +$count = 0 +$lines = 0 + +puts "; generated by/for GridFlow 0.8.0" +$loader.puts "#include \"../base/grid.h.fcs\"\nextern \"C\" {" + +# this class is not really used yet (only self.make) +class AsmFunction + def initialize(name) + @name = name + @label_count = 1 + end + def self.make(name) + puts "", "GLOBAL #{name}", "#{name}:" + puts "push ebp", "mov ebp,esp", "push esi", "push edi" + yield AsmFunction.new(name) + puts "pop edi", "pop esi", "leave", "ret", "" + end + def make_until(*ops) + a = @label_count + b = @label_count+1 + @label_count+=2 + ops[-1]<<" #{@name}_#{b}" + puts "#{@name}_#{a}: ", *ops + yield + puts "jmp #{@name}_#{a}" + puts "#{@name}_#{b}:" + end +end + +$sizeof = { + :uint8 => 1, + :int16 => 2, + :int32 => 4, + :int64 => 8, + :float32 => 4, + :float64 => 8, +} + +$accum = { + :uint8 => "al", + :int16 => "ax", + :int32 => "eax", +} + +$asm_type = { + :uint8 => "byte", + :int16 => "word", + :int32 => "dword", + :int64 => "qword", +} + +# in the following, the opcode "_" means no such thing seems available. +# also >> for x86 ought to be shr in the uint8 case. +# btw, i got all of the MMX information from the NASM manual, Appendix B. +$opcodes = { +# [--GF--|--x86--|--mmx-et-al----------------------------------------] +# [ | |-uint8-|-int16-|-int32-|-int64-|-float32-|-float64-] + :add => %w[ + add paddb paddw paddd paddq ], + :sub => %w[ - sub psubb psubw psubd psubq ], + :and => %w[ & and pand pand pand pand ], + :xor => %w[ ^ xor pxor pxor pxor pxor ], + :or => %w[ | or por por por por ], +# :max => %w[ max _ pmaxub pmaxsw _ _ ], # not plain MMX !!! (req.Katmai) +# :min => %w[ min _ pminub pminsw _ _ ], # not plain MMX !!! (req.Katmai) +# :eq => %w[ == _ pcmpeqb pcmpeqw pcmpeqd _ ], +# :gt => %w[ > _ pcmpgtb pcmpgtw pcmpgtd _ ], +# :shl => %w[ << shl _ psllw pslld psllq ], # noncommutative +# :shr => %w[ >> sar _ psraw psrad _ ], # noncommutative +# :clipadd => %w[ clip+ _ paddusb paddsw _ _ ], # future use +# :clipsub => %w[ clip- _ psubusb psubsw _ _ ], # future use +# :andnot => %w[ ¬ _ pandn pandn pandn pandn ], # not planned +} + +$opcodes.each {|k,op| + op.map! {|x| if x=="_" then nil else x end } + STDERR.puts op.inspect +} + +$decls = "" +$install = "" + +def make_fun_map(op,type) + s="mmx_#{type}_map_#{op}" + size = $sizeof[type] + accum = $accum[type] + sym = $opcodes[op][0] + opcode = $opcodes[op][1] + mopcode = $opcodes[op][size+(size<4 ? 1 : 0)] + return if not mopcode + AsmFunction.make(s) {|a| + puts "mov ecx,[ebp+8]", "mov esi,[ebp+12]", "mov eax,[ebp+16]" + puts "mov dx,ax", "shl eax,8", "mov al,dl" if size==1 + puts "mov edx,eax", "shl eax,16", "mov ax,dx" if size<=2 + puts "push eax", "push eax", "movq mm7,[esp]", "add esp,8" + foo = proc {|n| + a.make_until("cmp ecx,#{8/size*n}","jb near") { + 0.step(n,4) {|k| + nn=[n-k,4].min + o=(0..3).map{|x| 8*(x+k) } + for i in 0...nn do puts "movq mm#{i},[esi+#{o[i]}]" end + for i in 0...nn do puts "#{mopcode} mm#{i},mm7" end + for i in 0...nn do puts "movq [esi+#{o[i]}],mm#{i}" end + } + puts "lea esi,[esi+#{8*n}]", "lea ecx,[ecx-#{8/size*n}]" + } + } + foo.call 4 + foo.call 1 + a.make_until("test ecx,ecx", "jz") { + puts "#{opcode} #{$asm_type[type]} [esi],#{accum}", "lea esi,[esi+#{size}]" + puts "dec ecx" + } + puts "emms" + } + $decls << "void #{s}(int,#{type}*,#{type});\n" + $install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))" + $install << "->on_#{type}.op_map = #{s};\n" + $count += 1 +end + +def make_fun_zip(op,type) +s="mmx_#{type}_zip_#{op}" + size = $sizeof[type] + accum = $accum[type] + sym = $opcodes[op][0] + opcode = $opcodes[op][1] + mopcode = $opcodes[op][size+(size<4 ? 1 : 0)] + return if not mopcode + AsmFunction.make(s) {|a| + puts "mov ecx,[ebp+8]", "mov edi,[ebp+12]", + "mov esi,[ebp+16]"#, "mov ebx,[ebp+20]" + foo = proc {|n| + a.make_until("cmp ecx,#{8/size*n}","jb near") { + 0.step(n,4) {|k| + nn=[n-k,4].min + o=(0..3).map{|x| 8*(x+k) } + for i in 0...nn do puts "movq mm#{i},[edi+#{o[i]}]" end + for i in 0...nn do puts "movq mm#{i+4},[esi+#{o[i]}]" end + for i in 0...nn do puts "#{mopcode} mm#{i},mm#{i+4}" end + for i in 0...nn do puts "movq [edi+#{o[i]}],mm#{i}" end + } + #for i in 0...n do puts "movq [ebx+#{8*i}],mm#{i}" end + puts "lea edi,[edi+#{8*n}]" + puts "lea esi,[esi+#{8*n}]" + #puts "lea ebx,[ebx+#{8*n}]" + puts "lea ecx,[ecx-#{8/size*n}]" + } + } + foo.call 4 + foo.call 1 + a.make_until("test ecx,ecx", "jz") { + # requires commutativity ??? fails with shl, shr + puts "mov #{accum},[esi]" + puts "#{opcode} #{$asm_type[type]} [edi],#{accum}" + #puts "mov #{accum},[edi]" + #puts "#{opcode} #{accum},[esi]" + #puts "mov [ebx],#{accum}" + puts "lea edi,[edi+#{size}]" + puts "lea esi,[esi+#{size}]" + #puts "lea ebx,[ebx+#{size}]" + puts "dec ecx" + } + puts "emms" + } + #$decls << "void #{s}(int,#{type}*,#{type}*,#{type}*);\n" + $decls << "void #{s}(int,#{type}*,#{type}*);\n" + $install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))" + $install << "->on_#{type}.op_zip = #{s};\n" + $count += 1 +end + +for op in $opcodes.keys do + for type in [:uint8, :int16#, :int32 + ] do + make_fun_map(op,type) + make_fun_zip(op,type) + end +end + +$loader.puts $decls +$loader.puts %` +}; /* extern */ +#include <stdlib.h> +void startup_mmx_loader () {/*bogus*/} +void startup_mmx () { + if (getenv("NO_MMX")) return; + if (EVAL(\"GridFlow.bridge_name\")!=Qnil) gfpost(\"startup_cpu: using MMX optimisations\"); + #{$install} +}` + +STDERR.puts "automatically generated #{$count} MMX asm functions" + +=begin notes: +CPUID has a bit for detecting MMX +PACKSSDW PACKSSWB PACKUSWB = saturation-casting +PCMPxx: Compare Packed Integers +PMULHW, PMULLW: Multiply Packed _unsigned_ 16-bit Integers, and Store +PUNPCKxxx: Unpack and Interleave Data +=end |