
    xKgg                          d dl Z d dlmZ d dlmZmZ ddlmZ d Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zddddd d!d"d#d$d%d&d'd(d)d*Zd+ Z G d, d-e       Z!d. Z"y)/    N)ir)cgutilstargetconfig   )nvvmc                 B   dt        |      z   dz   }t        j                  t        j                  |      t        j                  t        j                  |            t        j                  |      t        j                  |      f      }t        j                  | ||      S )N___numba_atomic_i	_cas_hack)strr   FunctionTypeIntTypePointerTyper   get_or_insert_function)lmodisizefnamefntys       X/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/nvvmutils.pydeclare_atomic_cas_intr      sw    #e*,{:E??2::e,NN2::e+<=JJu-JJu-/0D ))$e<<    c                 P    | j                  |||dd      }| j                  |d      S )N	monotonicr   )cmpxchgextract_value)builderr   r   ptrcmpvalouts          r   atomic_cmpxchgr       s+    
//#sCk
BC  a((r   c                     d}t        j                  t        j                         t        j                  t        j                         d      t        j                         f      }t	        j
                  | ||      S )Nz#llvm.nvvm.atomic.load.add.f32.p0f32r   r   r   	FloatTyper   r   r   r   r   r   s      r   declare_atomic_add_float32r%      sR    1E??2<<>NN2<<>1=r||~NPD))$e<<r   c                 ^   t        j                         j                         }|j                  dk\  rd}nd}t	        j
                  t	        j                         t	        j                  t	        j                               t	        j                         f      }t        j                  | ||      S )N)   r   z#llvm.nvvm.atomic.load.add.f64.p0f64___numba_atomic_double_add)
r   ConfigStacktopcompute_capabilityr   r   
DoubleTyper   r   r   )r   flagsr   r   s       r   declare_atomic_add_float64r.      sz    $$&**,E6)5,??2==?NN2==?;R]]_MOD))$e<<r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_float_subr"   r$   s      r   declare_atomic_sub_float32r1   '   P    'E??2<<>NN2<<>:BLLNKMD))$e<<r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_double_subr   r   r,   r   r   r   r$   s      r   declare_atomic_sub_float64r6   .   P    (E??2==?NN2==?;R]]_MOD))$e<<r   c                     d}t        j                  t        j                  d      t        j                  t        j                  d            t        j                  d      f      }t	        j
                  | ||      S )Nz"llvm.nvvm.atomic.load.inc.32.p0i32    r   r   r   r   r   r   r$   s      r   declare_atomic_inc_int32r;   5   V    0E??2::b>NN2::b>:BJJrNKMD))$e<<r   c                     d}t        j                  t        j                  d      t        j                  t        j                  d            t        j                  d      f      }t	        j
                  | ||      S )N___numba_atomic_u64_inc@   r:   r$   s      r   declare_atomic_inc_int64r@   <   V    %E??2::b>NN2::b>:BJJrNKMD))$e<<r   c                     d}t        j                  t        j                  d      t        j                  t        j                  d            t        j                  d      f      }t	        j
                  | ||      S )Nz"llvm.nvvm.atomic.load.dec.32.p0i32r9   r:   r$   s      r   declare_atomic_dec_int32rC   C   r<   r   c                     d}t        j                  t        j                  d      t        j                  t        j                  d            t        j                  d      f      }t	        j
                  | ||      S )N___numba_atomic_u64_decr?   r:   r$   s      r   declare_atomic_dec_int64rF   J   rA   r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_float_maxr"   r$   s      r   declare_atomic_max_float32rI   Q   r2   r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_double_maxr5   r$   s      r   declare_atomic_max_float64rL   X   r7   r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_float_minr"   r$   s      r   declare_atomic_min_float32rO   _   r2   r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_double_minr5   r$   s      r   declare_atomic_min_float64rR   f   r7   r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_float_nanmaxr"   r$   s      r   declare_atomic_nanmax_float32rU   m   P    *E??2<<>NN2<<>:BLLNKMD))$e<<r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_double_nanmaxr5   r$   s      r   declare_atomic_nanmax_float64rY   t   P    +E??2==?NN2==?;R]]_MOD))$e<<r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_float_nanminr"   r$   s      r   declare_atomic_nanmin_float32r]   {   rV   r   c                     d}t        j                  t        j                         t        j                  t        j                               t        j                         f      }t	        j
                  | ||      S )N___numba_atomic_double_nanminr5   r$   s      r   declare_atomic_nanmin_float64r`      rZ   r   c                     d}t        j                  t        j                  d      t        j                  d      f      }t        j                  | ||      S )NcudaCGGetIntrinsicHandler?   r9   r   r   r   r   r   r$   s      r    declare_cudaCGGetIntrinsicHandlerd      sB    &E??2::b>JJrN,.D))$e<<r   c                     d}t        j                  t        j                  d      t        j                  d      t        j                  d      f      }t        j                  | ||      S )NcudaCGSynchronizer9   r?   rc   r$   s      r   declare_cudaCGSynchronizerg      sL    E??2::b>JJrNBJJrN;=D))$e<<r   c                    | j                   j                  j                  }t        j                  |j                  d      dz         }t        j                  ||j                  dt        j                        }d|_
        d|_        ||_        | j                  |t        j                  t        j                   d            d      S )	Nzutf-8    _str)name	addrspaceinternalT   generic)basic_blockfunctionmoduler   make_bytearrayencodeadd_global_variabletyper   ADDRSPACE_CONSTANTlinkageglobal_constantinitializeraddrspacecastr   r   r   )r   valuer   cvalgls        r   declare_stringr      s    ''..D!!%,,w"7'"ABD		$	$T4996/3/F/F
HBBJBBN  R^^BJJqM%BINNr   c                     t        j                  t        j                  d            }t        j                  t        j                  d      ||g      }t	        j
                  | |d      }|S )Nrn   r9   vprintf)r   r   r   r   r   r   )r   	voidptrty	vprintftyr   s       r   declare_vprintr      sP    rzz!}-I 

2I0FGI,,T9iHGNr   zllvm.nvvm.read.ptx.sreg.tid.xzllvm.nvvm.read.ptx.sreg.tid.yzllvm.nvvm.read.ptx.sreg.tid.zzllvm.nvvm.read.ptx.sreg.ntid.xzllvm.nvvm.read.ptx.sreg.ntid.yzllvm.nvvm.read.ptx.sreg.ntid.zzllvm.nvvm.read.ptx.sreg.ctaid.xzllvm.nvvm.read.ptx.sreg.ctaid.yzllvm.nvvm.read.ptx.sreg.ctaid.zz llvm.nvvm.read.ptx.sreg.nctaid.xz llvm.nvvm.read.ptx.sreg.nctaid.yz llvm.nvvm.read.ptx.sreg.nctaid.zz llvm.nvvm.read.ptx.sreg.warpsizezllvm.nvvm.read.ptx.sreg.laneid)ztid.xztid.yztid.zzntid.xzntid.yzntid.zzctaid.xzctaid.yzctaid.zznctaid.xznctaid.yznctaid.zwarpsizelaneidc                     | j                   }t        j                  t        j                  d      d      }t	        j
                  ||t        |         }| j                  |d      S )Nr9    )rr   r   r   r   r   r   SREG_MAPPINGcall)r   rk   rr   r   fns        r   	call_sregr      sM    ^^F??2::b>2.D		'	'l46H	IB<<Br   c                   0    e Zd Zd Zd Zd Zd Zd Zd Zy)SRegBuilderc                     || _         y N)r   )selfr   s     r   __init__zSRegBuilder.__init__   s	    r   c                 4    t        | j                  d|z        S )Nztid.%sr   r   r   xyzs     r   tidzSRegBuilder.tid   s    x#~66r   c                 4    t        | j                  d|z        S )Nzctaid.%sr   r   s     r   ctaidzSRegBuilder.ctaid   s    zC'788r   c                 4    t        | j                  d|z        S )Nzntid.%sr   r   s     r   ntidzSRegBuilder.ntid   s    y377r   c                 4    t        | j                  d|z        S )Nz	nctaid.%sr   r   s     r   nctaidzSRegBuilder.nctaid   s    {S'899r   c                    t        j                  d      }| j                  j                  | j	                  |      |      }| j                  j                  | j                  |      |      }| j                  j                  | j                  |      |      }| j                  j                  | j                  j                  ||      |      }|S )Nr?   )	r   r   r   sextr   r   r   addmul)r   r   i64r   r   r   ress          r   getdimzSRegBuilder.getdim   s    jjnlls3||  35""4::c?C8llt||//f=sC
r   N)	__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r   r      s     798:r   r   c                     t        |       fddD        }t        t        j                  |d |            }|dk(  r|d   S |S )Nc              3   @   K   | ]  }j                  |        y wr   )r   ).0r   sregs     r   	<genexpr>z get_global_id.<locals>.<genexpr>   s     	,es$++c
es   r   r   r   )r   list	itertoolsislice)r   dimitseqr   s       @r   get_global_idr      sE    wD	,e	,B
yD#.
/C
ax1v
r   )#r   llvmliter   
numba.corer   r   cudadrvr   r   r    r%   r.   r1   r6   r;   r@   rC   rF   rI   rL   rO   rR   rU   rY   r]   r`   rd   rg   r   r   r   r   objectr   r   r   r   r   <module>r      s      , =)
==================	O -,,...0002222.%, & 2r   