diff options
Diffstat (limited to 'externals/gridflow/cpu')
-rw-r--r-- | externals/gridflow/cpu/mmx.rb | 225 |
1 files changed, 0 insertions, 225 deletions
diff --git a/externals/gridflow/cpu/mmx.rb b/externals/gridflow/cpu/mmx.rb deleted file mode 100644 index 1a3b15d3..00000000 --- a/externals/gridflow/cpu/mmx.rb +++ /dev/null @@ -1,225 +0,0 @@ -=begin - $Id: mmx.rb,v 1.1 2005-10-04 02:02:14 matju Exp $ - - GridFlow - Copyright (c) 2001,2002,2003,2004 by Mathieu Bouchard - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - See file ../COPYING for further informations on licensing terms. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -=end - -STDOUT.reopen ARGV[0], "w" -$loader = File.open ARGV[1], "w" -$count = 0 -$lines = 0 - -puts "; generated by/for GridFlow 0.8.0" -$loader.puts "#include \"../base/grid.h.fcs\"\nextern \"C\" {" - -# this class is not really used yet (only self.make) -class AsmFunction - def initialize(name) - @name = name - @label_count = 1 - end - def self.make(name) - puts "", "GLOBAL #{name}", "#{name}:" - puts "push ebp", "mov ebp,esp", "push esi", "push edi" - yield AsmFunction.new(name) - puts "pop edi", "pop esi", "leave", "ret", "" - end - def make_until(*ops) - a = @label_count - b = @label_count+1 - @label_count+=2 - ops[-1]<<" #{@name}_#{b}" - puts "#{@name}_#{a}: ", *ops - yield - puts "jmp #{@name}_#{a}" - puts "#{@name}_#{b}:" - end -end - -$sizeof = { - :uint8 => 1, - :int16 => 2, - :int32 => 4, - :int64 => 8, - :float32 => 4, - :float64 => 8, -} - -$accum = { - :uint8 => "al", - :int16 => "ax", - :int32 => "eax", -} - -$asm_type = { - :uint8 => "byte", - :int16 => "word", - :int32 => "dword", - :int64 => "qword", -} - -# in the following, the opcode "_" means no such thing seems available. -# also >> for x86 ought to be shr in the uint8 case. -# btw, i got all of the MMX information from the NASM manual, Appendix B. -$opcodes = { -# [--GF--|--x86--|--mmx-et-al----------------------------------------] -# [ | |-uint8-|-int16-|-int32-|-int64-|-float32-|-float64-] - :add => %w[ + add paddb paddw paddd paddq ], - :sub => %w[ - sub psubb psubw psubd psubq ], - :and => %w[ & and pand pand pand pand ], - :xor => %w[ ^ xor pxor pxor pxor pxor ], - :or => %w[ | or por por por por ], -# :max => %w[ max _ pmaxub pmaxsw _ _ ], # not plain MMX !!! (req.Katmai) -# :min => %w[ min _ pminub pminsw _ _ ], # not plain MMX !!! (req.Katmai) -# :eq => %w[ == _ pcmpeqb pcmpeqw pcmpeqd _ ], -# :gt => %w[ > _ pcmpgtb pcmpgtw pcmpgtd _ ], -# :shl => %w[ << shl _ psllw pslld psllq ], # noncommutative -# :shr => %w[ >> sar _ psraw psrad _ ], # noncommutative -# :clipadd => %w[ clip+ _ paddusb paddsw _ _ ], # future use -# :clipsub => %w[ clip- _ psubusb psubsw _ _ ], # future use -# :andnot => %w[ ¬ _ pandn pandn pandn pandn ], # not planned -} - -$opcodes.each {|k,op| - op.map! {|x| if x=="_" then nil else x end } - STDERR.puts op.inspect -} - -$decls = "" -$install = "" - -def make_fun_map(op,type) - s="mmx_#{type}_map_#{op}" - size = $sizeof[type] - accum = $accum[type] - sym = $opcodes[op][0] - opcode = $opcodes[op][1] - mopcode = $opcodes[op][size+(size<4 ? 1 : 0)] - return if not mopcode - AsmFunction.make(s) {|a| - puts "mov ecx,[ebp+8]", "mov esi,[ebp+12]", "mov eax,[ebp+16]" - puts "mov dx,ax", "shl eax,8", "mov al,dl" if size==1 - puts "mov edx,eax", "shl eax,16", "mov ax,dx" if size<=2 - puts "push eax", "push eax", "movq mm7,[esp]", "add esp,8" - foo = proc {|n| - a.make_until("cmp ecx,#{8/size*n}","jb near") { - 0.step(n,4) {|k| - nn=[n-k,4].min - o=(0..3).map{|x| 8*(x+k) } - for i in 0...nn do puts "movq mm#{i},[esi+#{o[i]}]" end - for i in 0...nn do puts "#{mopcode} mm#{i},mm7" end - for i in 0...nn do puts "movq [esi+#{o[i]}],mm#{i}" end - } - puts "lea esi,[esi+#{8*n}]", "lea ecx,[ecx-#{8/size*n}]" - } - } - foo.call 4 - foo.call 1 - a.make_until("test ecx,ecx", "jz") { - puts "#{opcode} #{$asm_type[type]} [esi],#{accum}", "lea esi,[esi+#{size}]" - puts "dec ecx" - } - puts "emms" - } - $decls << "void #{s}(int,#{type}*,#{type});\n" - $install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))" - $install << "->on_#{type}.op_map = #{s};\n" - $count += 1 -end - -def make_fun_zip(op,type) -s="mmx_#{type}_zip_#{op}" - size = $sizeof[type] - accum = $accum[type] - sym = $opcodes[op][0] - opcode = $opcodes[op][1] - mopcode = $opcodes[op][size+(size<4 ? 1 : 0)] - return if not mopcode - AsmFunction.make(s) {|a| - puts "mov ecx,[ebp+8]", "mov edi,[ebp+12]", - "mov esi,[ebp+16]"#, "mov ebx,[ebp+20]" - foo = proc {|n| - a.make_until("cmp ecx,#{8/size*n}","jb near") { - 0.step(n,4) {|k| - nn=[n-k,4].min - o=(0..3).map{|x| 8*(x+k) } - for i in 0...nn do puts "movq mm#{i},[edi+#{o[i]}]" end - for i in 0...nn do puts "movq mm#{i+4},[esi+#{o[i]}]" end - for i in 0...nn do puts "#{mopcode} mm#{i},mm#{i+4}" end - for i in 0...nn do puts "movq [edi+#{o[i]}],mm#{i}" end - } - #for i in 0...n do puts "movq [ebx+#{8*i}],mm#{i}" end - puts "lea edi,[edi+#{8*n}]" - puts "lea esi,[esi+#{8*n}]" - #puts "lea ebx,[ebx+#{8*n}]" - puts "lea ecx,[ecx-#{8/size*n}]" - } - } - foo.call 4 - foo.call 1 - a.make_until("test ecx,ecx", "jz") { - # requires commutativity ??? fails with shl, shr - puts "mov #{accum},[esi]" - puts "#{opcode} #{$asm_type[type]} [edi],#{accum}" - #puts "mov #{accum},[edi]" - #puts "#{opcode} #{accum},[esi]" - #puts "mov [ebx],#{accum}" - puts "lea edi,[edi+#{size}]" - puts "lea esi,[esi+#{size}]" - #puts "lea ebx,[ebx+#{size}]" - puts "dec ecx" - } - puts "emms" - } - #$decls << "void #{s}(int,#{type}*,#{type}*,#{type}*);\n" - $decls << "void #{s}(int,#{type}*,#{type}*);\n" - $install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))" - $install << "->on_#{type}.op_zip = #{s};\n" - $count += 1 -end - -for op in $opcodes.keys do - for type in [:uint8, :int16#, :int32 - ] do - make_fun_map(op,type) - make_fun_zip(op,type) - end -end - -$loader.puts $decls -$loader.puts %` -}; /* extern */ -#include <stdlib.h> -void startup_mmx_loader () {/*bogus*/} -void startup_mmx () { - if (getenv("NO_MMX")) return; - if (EVAL(\"GridFlow.bridge_name\")!=Qnil) gfpost(\"startup_cpu: using MMX optimisations\"); - #{$install} -}` - -STDERR.puts "automatically generated #{$count} MMX asm functions" - -=begin notes: -CPUID has a bit for detecting MMX -PACKSSDW PACKSSWB PACKUSWB = saturation-casting -PCMPxx: Compare Packed Integers -PMULHW, PMULLW: Multiply Packed _unsigned_ 16-bit Integers, and Store -PUNPCKxxx: Unpack and Interleave Data -=end |