
    xKgZ                     &   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ  e       Zej8                  Zej:                  Zej<                  Z ee        G d	 d
e      Ze G d de             Z e G d de             Z!e G d de             Z"e G d de             Z#e G d de             Z$e G d de             Z%e G d de             Z&e G d de             Z'e G d de             Z(e G d de             Z)e G d d e             Z*e G d! d"e             Z+e G d# d$e             Z,e G d% d&e             Z-e G d' d(e             Z.e G d) d*e             Z/e G d+ d,e             Z0e G d- d.e             Z1e G d/ d0e             Z2e G d1 d2e             Z3e G d3 d4e             Z4d5 Z5d6 Z6d7 Z7 ee8       G d8 d9e             Z9d: Z:d; Z;d< Z<d= Z= e7ej|                  j~                        Z@ e=e j                        ZB e=e j                        ZD e7ej|                  j                        ZF e=e j                        ZH e=e j                        ZJ e7ej|                  j                        ZL e=e j                        ZN e=e j                        ZP e7ej|                  j                        ZR e7ej|                  j                        ZT e5ej|                  j                        ZV e6e j                        ZX e5ej|                  j                        ZZ e6e[      Z\ e:ej|                  j                        Z^ e<e j                          e:ej|                  j                        Za e<e j                          e:ej|                  j                        Zd e<e j                          e:ej|                  j                        Zg e<e j                          e:ej|                  j                        Zj e<e j                          e:ej|                  j                        Zm e<e j                          e=e j                          e=e j                         d> Zqd? Zr eqd@      Zs eqdA      Zt eqdB      Zu eqdC      Zv eqdD      Zw eqdE      Zx eqdF      Zy eqdG      Zz eqdH      Z{ eqdI      Z| eqdJ      Z} eqdK      Z~ eqdL      Z eqdM      Z eqdN      Z erdO      ZdP Zej                  ej
                  ej                  ej                  ej                  ej                  fZej                  ej                  ej                  ej                  fZej                  ej                  fZ eej                  j                  e      Z eej                  j                  e      Z eej                  j                   e      Z eej                  j$                  e      Z eej                  j(                  e      Z eej                  j,                  e      Z eej                  j0                  e      Z eej                  j4                  e      Z eej                  j8                  e      Z eej                  j<                  e      Z eej                  j@                  e      Z eej                  jD                  e      Ze G dQ dRe             Ze G dS dTe             Ze G dU dVe             Ze G dW dXe             Ze G dY dZe             Ze G d[ d\e             Ze G d] d^e             Ze G d_ d`e             Ze G da dbe             Ze G dc dde             Z ee ej\                  e             eD ]  Z eee        e	D ]  Z eee        eD ]  Z eee        e
D ]  Zedev s eee        y)f    N)types)parse_dtypeparse_shaperegister_number_classesregister_numpy_ufunctrigonometric_functionscomparison_functionsmath_operationsbit_twiddling_functions)AttributeTemplateConcreteTemplateAbstractTemplateCallableTemplate	signatureRegistrydim3)
Conversion)cuda) declare_device_function_templatec                       e Zd Zd Zy)Cuda_array_declc                     d }|S )Nc           	         t        | t        j                        rt        | t        j                        s_y t        | t        j                  t        j
                  f      r3t        | D cg c]  }t        |t        j                          c}      ry y t        |       }t        |      }||t        j                  ||d      S y y c c}w )NC)dtypendimlayout)

isinstancer   IntegerIntegerLiteralTupleUniTupleanyr   r   Array)shaper   sr   nb_dtypes        W/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/cudadecl.pytyperz&Cuda_array_decl.generic.<locals>.typer   s     %/!%)=)=>EEKK#@A!&(!&A 'q%*>*>??!&( )u%D"5)H#(8{{SII )9#(s   )"C selfr*   s     r)   genericzCuda_array_decl.generic   s    	J&     N__name__
__module____qualname__r.   r+   r/   r)   r   r      s    r/   r   c                   8    e Zd Zej                  j
                  Zy)Cuda_shared_arrayN)r1   r2   r3   r   sharedarraykeyr+   r/   r)   r5   r5   2   s    
++

Cr/   r5   c                   8    e Zd Zej                  j
                  Zy)Cuda_local_arrayN)r1   r2   r3   r   localr7   r8   r+   r/   r)   r:   r:   7   s    
**

Cr/   r:   c                   >    e Zd Zej                  j
                  Zd Zy)Cuda_const_array_likec                     d }|S )Nc                     | S Nr+   )ndarrays    r)   r*   z,Cuda_const_array_like.generic.<locals>.typerA   s    Nr/   r+   r,   s     r)   r.   zCuda_const_array_like.generic@   s    	r/   N)r1   r2   r3   r   const
array_liker8   r.   r+   r/   r)   r=   r=   <   s    
**

Cr/   r=   c                   J    e Zd Zej                  Z eej                        gZ	y)Cuda_threadfence_deviceN)
r1   r2   r3   r   threadfencer8   r   r   nonecasesr+   r/   r)   rE   rE   F   s    


Cuzz"#Er/   rE   c                   J    e Zd Zej                  Z eej                        gZ	y)Cuda_threadfence_blockN)
r1   r2   r3   r   threadfence_blockr8   r   r   rG   rH   r+   r/   r)   rJ   rJ   L   s    

 
 Cuzz"#Er/   rJ   c                   J    e Zd Zej                  Z eej                        gZ	y)Cuda_threadfence_systemN)
r1   r2   r3   r   threadfence_systemr8   r   r   rG   rH   r+   r/   r)   rM   rM   R   s    

!
!Cuzz"#Er/   rM   c                       e Zd Zej                  Z eej                         eej                  ej                        gZ
y)Cuda_syncwarpN)r1   r2   r3   r   syncwarpr8   r   r   rG   i4rH   r+   r/   r)   rP   rP   X   s-    
--Cuzz"Iejj%(($CDEr/   rP   c                   H   e Zd Zej                  Z e ej                  ej                  ej                  f      ej                  ej                  ej                  ej                  ej                         e ej                  ej                  ej                  f      ej                  ej                  ej                  ej                  ej                         e ej                  ej                  ej                  f      ej                  ej                  ej                  ej                  ej                         e ej                  ej                  ej                  f      ej                  ej                  ej                  ej                  ej                        gZy)Cuda_shfl_sync_intrinsicN)r1   r2   r3   r   shfl_sync_intrinsicr8   r   r   r"   rR   b1i8f4f8rH   r+   r/   r)   rT   rT   ^   s   

"
"C+%++uxx23((EHHehh%((	D+%++uxx23((EHHehh%((	D+%++uxx23((EHHehh%((	D+%++uxx23((EHHehh%((	D	Er/   rT   c                       e Zd Zej                  Z e ej                  ej                  ej                  f      ej                  ej                  ej                        gZy)Cuda_vote_sync_intrinsicN)r1   r2   r3   r   vote_sync_intrinsicr8   r   r   r"   rR   rV   rH   r+   r/   r)   r[   r[   m   sI    

"
"C{u{{EHHehh#78xx5885 6Er/   r[   c                   `   e Zd Zej                  Z eej                  ej                  ej                         eej                  ej                  ej                         eej                  ej                  ej                         eej                  ej                  ej                        gZy)Cuda_match_any_syncN)r1   r2   r3   r   match_any_syncr8   r   r   rR   rW   rX   rY   rH   r+   r/   r)   r^   r^   t   st    


C%((EHHehh/%((EHHehh/%((EHHehh/%((EHHehh/	Er/   r^   c            	       @   e Zd Zej                  Z e ej                  ej                  ej                  f      ej                  ej                         e ej                  ej                  ej                  f      ej                  ej                         e ej                  ej                  ej                  f      ej                  ej                         e ej                  ej                  ej                  f      ej                  ej                        gZy)Cuda_match_all_syncN)r1   r2   r3   r   match_all_syncr8   r   r   r"   rR   rV   rW   rX   rY   rH   r+   r/   r)   ra   ra      s    


C+%++uxx23UXXuxxH+%++uxx23UXXuxxH+%++uxx23UXXuxxH+%++uxx23UXXuxxH	Er/   ra   c                   J    e Zd Zej                  Z eej                        gZ	y)Cuda_activemaskN)
r1   r2   r3   r   
activemaskr8   r   r   uint32rH   r+   r/   r)   rd   rd      s    
//Cu||$%Er/   rd   c                   J    e Zd Zej                  Z eej                        gZ	y)Cuda_lanemask_ltN)
r1   r2   r3   r   lanemask_ltr8   r   r   rf   rH   r+   r/   r)   rh   rh      s    


Cu||$%Er/   rh   c                      e Zd ZdZej
                  Z eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                   ej                         gZy)	Cuda_popcz
    Supported types from `llvm.popc`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r1   r2   r3   __doc__r   popcr8   r   r   int8int16int32int64uint8uint16rf   uint64rH   r+   r/   r)   rk   rk      s     ))C%**ejj)%++u{{+%++u{{+%++u{{+%++u{{+%,,-%,,-%,,-	Er/   rk   c                       e Zd ZdZej
                  Z eej                  ej                  ej                  ej                         eej                  ej                  ej                  ej                        gZy)Cuda_fmaz
    Supported types from `llvm.fma`
    [here](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#standard-c-library-intrinics)
    N)r1   r2   r3   rl   r   fmar8   r   r   float32float64rH   r+   r/   r)   rv   rv      sU     ((C%--u}}M%--u}}MEr/   rv   c                       e Zd Zej                  j
                  Z eej                  ej                  ej                  ej                        gZ
y)	Cuda_hfmaN)r1   r2   r3   r   fp16hfmar8   r   r   float16rH   r+   r/   r)   r{   r{      s4    
))..C%--u}}MEr/   r{   c                       e Zd Zej                  Z eej                  ej                         eej                  ej                        gZ
y)	Cuda_cbrtN)r1   r2   r3   r   cbrtr8   r   r   rx   ry   rH   r+   r/   r)   r   r      s8     ))C%--/%--/Er/   r   c                       e Zd Zej                  Z eej                  ej                         eej                  ej                        gZ
y)	Cuda_brevN)r1   r2   r3   r   brevr8   r   r   rf   rt   rH   r+   r/   r)   r   r      s6    
))C%,,-%,,-Er/   r   c                      e Zd ZdZej
                  Z eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                   ej                         gZy)Cuda_clzz
    Supported types from `llvm.ctlz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r1   r2   r3   rl   r   clzr8   r   r   rn   ro   rp   rq   rr   rs   rf   rt   rH   r+   r/   r)   r   r      s     ((C%**ejj)%++u{{+%++u{{+%++u{{+%++u{{+%,,-%,,-%,,-	Er/   r   c                      e Zd ZdZej
                  Z eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         eej                  ej                         gZy)Cuda_ffsz
    Supported types from `llvm.cttz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r1   r2   r3   rl   r   ffsr8   r   r   rf   rn   ro   rp   rq   rr   rs   rt   rH   r+   r/   r)   r   r      s     ((C%,,

+%,,,%,,,%,,,%,,,%,,-%,,-%,,-	Er/   r   c                   *    e Zd Zej                  Zd Zy)	Cuda_selpc                 :   |rJ |\  }}}t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  t         j                  t         j                  f}||k7  s||vry t        ||||      S r@   )
r   ry   rx   ro   rs   rp   rf   rq   rt   r   )r-   argskwstestabsupported_typess          r)   r.   zCuda_selp.generic   su    w
a !==%-- ;; ;; ;;6
 6Qo-D!Q''r/   N)r1   r2   r3   r   selpr8   r.   r+   r/   r)   r   r      s    
))C(r/   r   c                 <     t          G  fddt                     }|S )Nc                   P    e Zd ZW  Z eej                  ej                        gZy)'_genfp16_unary.<locals>.Cuda_fp16_unaryNr1   r2   r3   r8   r   r   r~   rH   l_keys   r)   Cuda_fp16_unaryr     s    5==%--89r/   r   registerr   r   r   s   ` r)   _genfp16_unaryr     s%    :* : : r/   c                 F     t                G  fddt                     }|S )Nc                       e Zd ZW  Zd Zy)0_genfp16_unary_operator.<locals>.Cuda_fp16_unaryc                     |rJ t        |      dk(  r?|d   t        j                  k(  r(t        t        j                  t        j                        S y y )N   r   )lenr   r~   r   )r-   r   r   s      r)   r.   z8_genfp16_unary_operator.<locals>.Cuda_fp16_unary.generic  s?    N74yA~$q'U]]": >> #;~r/   Nr1   r2   r3   r8   r.   r   s   r)   r   r     s    	?r/   r   register_globalr   r   s   ` r)   _genfp16_unary_operatorr     s*    U?* ? ? r/   c                 <     t          G  fddt                     }|S )Nc                   f    e Zd ZW  Z eej                  ej                  ej                        gZy))_genfp16_binary.<locals>.Cuda_fp16_binaryNr   r   s   r)   Cuda_fp16_binaryr   #  s%    5==%--GHr/   r   r   )r   r   s   ` r)   _genfp16_binaryr   "  s(    I+ I I r/   c                       e Zd Zd Zy)Floatc                 R    |rJ |\  }|t         j                  k(  rt        ||      S y r@   )r   r~   r   )r-   r   r   args       r)   r.   zFloat.generic.  s.    w%--S#&&  r/   Nr0   r+   r/   r)   r   r   +  s    'r/   r   c                 <     t          G  fddt                     }|S )Nc                   f    e Zd ZW  Z eej                  ej                  ej                        gZy)1_genfp16_binary_comparison.<locals>.Cuda_fp16_cmpN)	r1   r2   r3   r8   r   r   rV   r~   rH   r   s   r)   Cuda_fp16_cmpr   8  s)     ehhu}}=
r/   r   r   )r   r   s   ` r)   _genfp16_binary_comparisonr   7  s%    
( 
 
 r/   c                 J     t                G  fddt                     }|S )Nc                       e Zd ZW  ZfdZy)1_fp16_binary_operator.<locals>.Cuda_fp16_operatorc                     |rJ t        |      dk(  r|d   t        j                  k(  s|d   t        j                  k(  r|d   t        j                  k(  r#| j                  j	                  |d   |d         }n"| j                  j	                  |d   |d         }|t
        j                  k(  s&|t
        j                  k(  s|t
        j                  k(  r)t        t        j                  t        j                        S y y y )N   r   r   )
r   r   r~   contextcan_convertr   exactpromotesafer   )r-   r   r   convertiblerettys       r)   r.   z9_fp16_binary_operator.<locals>.Cuda_fp16_operator.genericT  s    N74yA~!W-aEMM1IGu}},"&,,":":47DG"LK"&,,":":47DG"LK  :#3#33:#5#55:??2$UEMM5==II 3! 2J r/   Nr   )r   r   s   r)   Cuda_fp16_operatorr   P  s    	Jr/   r   r   )r   r   r   s   `` r)   _fp16_binary_operatorr   O  s-    UJ- J J4 r/   c                 6    t        | t        j                        S r@   )r   r   rV   ops    r)   _genfp16_comparison_operatorr   n  s     UXX..r/   c                 6    t        | t        j                        S r@   )r   r   r~   r   s    r)   _genfp16_binary_operatorr   r  s     U]]33r/   c                     t        d|  t        j                  t        j                  f      }t        j                  |      S N__numba_wrapper_r   r   r~   Functionfnamedecls     r)   _resolve_wrapped_unaryr     s8    +.>ug,F,1MM-2]],<>D >>$r/   c                     t        d|  t        j                  t        j                  t        j                  f      }t        j                  |      S r   r   r   s     r)   _resolve_wrapped_binaryr     s?    +.>ug,F,1MM-2]]EMM,KMD >>$r/   hsinhcoshloghlog10hlog2hexphexp10hexp2hsqrthrsqrthfloorhceilhrcphrinthtrunchdivc                 @     t          G  fddt                     }|S )Nc                       e Zd ZW  ZfdZy)_gen.<locals>.Cuda_atomicc                    |rJ |\  }}}|j                   vry |j                  dk(  r0t        |j                   |t        j                  |j                         S |j                  dkD  r"t        |j                   |||j                         S y Nr   )r   r   r   r   intp)r-   r   r   aryidxvalr   s         r)   r.   z!_gen.<locals>.Cuda_atomic.generic  su    N7 MCcyy/xx1} CSYYGGA Ccii@@ r/   Nr   )r   r   s   r)   Cuda_atomicr     s    
	Ar/   r   )r   r   )r   r   r   s   `` r)   _genr     s(    A& A A r/   c                   >    e Zd Zej                  j
                  Zd Zy)Cuda_atomic_compare_and_swapc                 |    |rJ |\  }}}|j                   }|t        v r|j                  dk(  rt        ||||      S y y r   )r   integer_numba_typesr   r   )r-   r   r   r   oldr   dtys          r)   r.   z$Cuda_atomic_compare_and_swap.generic  sI    wS#ii%%#((a-S#sC00 +8%r/   N)r1   r2   r3   r   atomiccompare_and_swapr8   r.   r+   r/   r)   r   r     s    
++
&
&C1r/   r   c                   >    e Zd Zej                  j
                  Zd Zy)Cuda_atomic_casc                     |rJ |\  }}}}|j                   }|t        vry |j                  dk(  rt        ||t        j
                  ||      S |j                  dkD  rt        |||||      S y r   )r   r   r   r   r   r   )r-   r   r   r   r   r   r   r   s           r)   r.   zCuda_atomic_cas.generic  sq    w!S#sii))88q=S#uzz3<<XX\S#sC55 r/   N)r1   r2   r3   r   r   casr8   r.   r+   r/   r)   r   r     s    
++//C6r/   r   c                   `    e Zd Zej                  Z eej                  ej                        gZ
y)Cuda_nanosleepN)r1   r2   r3   r   	nanosleepr8   r   r   voidrf   rH   r+   r/   r)   r   r     s"    
..Cuzz5<<01Er/   r   c                   "    e Zd ZeZd Zd Zd Zy)
Dim3_attrsc                 "    t         j                  S r@   r   rp   r-   mods     r)   	resolve_xzDim3_attrs.resolve_x
      {{r/   c                 "    t         j                  S r@   r  r  s     r)   	resolve_yzDim3_attrs.resolve_y  r	  r/   c                 "    t         j                  S r@   r  r  s     r)   	resolve_zzDim3_attrs.resolve_z  r	  r/   N)r1   r2   r3   r   r8   r  r  r  r+   r/   r)   r  r    s    
Cr/   r  c                   J    e Zd Z ej                  ej                        Zd Zy)CudaSharedModuleTemplatec                 4    t        j                  t              S r@   )r   r   r5   r  s     r)   resolve_arrayz&CudaSharedModuleTemplate.resolve_array  s    ~~/00r/   N)	r1   r2   r3   r   Moduler   r6   r8   r  r+   r/   r)   r  r    s    
%,,t{{
#C1r/   r  c                   J    e Zd Z ej                  ej                        Zd Zy)CudaConstModuleTemplatec                 4    t        j                  t              S r@   )r   r   r=   r  s     r)   resolve_array_likez*CudaConstModuleTemplate.resolve_array_like   s    ~~344r/   N)	r1   r2   r3   r   r  r   rB   r8   r  r+   r/   r)   r  r    s    
%,,tzz
"C5r/   r  c                   J    e Zd Z ej                  ej                        Zd Zy)CudaLocalModuleTemplatec                 4    t        j                  t              S r@   )r   r   r:   r  s     r)   r  z%CudaLocalModuleTemplate.resolve_array(      ~~.//r/   N)	r1   r2   r3   r   r  r   r;   r8   r  r+   r/   r)   r  r  $  s    
%,,tzz
"C0r/   r  c                       e Zd Z ej                  ej                        Zd Zd Z	d Z
d Zd Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zy)CudaAtomicTemplatec                 4    t        j                  t              S r@   )r   r   Cuda_atomic_addr  s     r)   resolve_addzCudaAtomicTemplate.resolve_add0      ~~o..r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_subr  s     r)   resolve_subzCudaAtomicTemplate.resolve_sub3  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_andr  s     r)   resolve_and_zCudaAtomicTemplate.resolve_and_6  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_orr  s     r)   resolve_or_zCudaAtomicTemplate.resolve_or_9      ~~n--r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_xorr  s     r)   resolve_xorzCudaAtomicTemplate.resolve_xor<  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_incr  s     r)   resolve_inczCudaAtomicTemplate.resolve_inc?  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_decr  s     r)   resolve_deczCudaAtomicTemplate.resolve_decB  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_exchr  s     r)   resolve_exchzCudaAtomicTemplate.resolve_exchE  r  r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_maxr  s     r)   resolve_maxzCudaAtomicTemplate.resolve_maxH  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_minr  s     r)   resolve_minzCudaAtomicTemplate.resolve_minK  r   r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_nanminr  s     r)   resolve_nanminz!CudaAtomicTemplate.resolve_nanminN      ~~011r/   c                 4    t        j                  t              S r@   )r   r   Cuda_atomic_nanmaxr  s     r)   resolve_nanmaxz!CudaAtomicTemplate.resolve_nanmaxQ  r@  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_compare_and_swapz+CudaAtomicTemplate.resolve_compare_and_swapT  s    ~~:;;r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_caszCudaAtomicTemplate.resolve_casW  r   r/   N)r1   r2   r3   r   r  r   r   r8   r  r#  r&  r)  r-  r0  r3  r6  r9  r<  r?  rC  rE  rG  r+   r/   r)   r  r  ,  s\    
%,,t{{
#C///.///0//22</r/   r  c                       e Zd Z ej                  ej                        Zd Zd Z	d Z
d Zd Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$d Z%y) CudaFp16Templatec                 4    t        j                  t              S r@   )r   r   	Cuda_haddr  s     r)   resolve_haddzCudaFp16Template.resolve_hadd_      ~~i((r/   c                 4    t        j                  t              S r@   )r   r   	Cuda_hsubr  s     r)   resolve_hsubzCudaFp16Template.resolve_hsubb  rM  r/   c                 4    t        j                  t              S r@   )r   r   	Cuda_hmulr  s     r)   resolve_hmulzCudaFp16Template.resolve_hmule  rM  r/   c                     t         S r@   )hdiv_devicer  s     r)   resolve_hdivzCudaFp16Template.resolve_hdivh      r/   c                 4    t        j                  t              S r@   )r   r   	Cuda_hnegr  s     r)   resolve_hnegzCudaFp16Template.resolve_hnegk  rM  r/   c                 4    t        j                  t              S r@   )r   r   	Cuda_habsr  s     r)   resolve_habszCudaFp16Template.resolve_habsn  rM  r/   c                 4    t        j                  t              S r@   )r   r   r{   r  s     r)   resolve_hfmazCudaFp16Template.resolve_hfmaq  rM  r/   c                     t         S r@   )hsin_devicer  s     r)   resolve_hsinzCudaFp16Template.resolve_hsint  rW  r/   c                     t         S r@   )hcos_devicer  s     r)   resolve_hcoszCudaFp16Template.resolve_hcosw  rW  r/   c                     t         S r@   )hlog_devicer  s     r)   resolve_hlogzCudaFp16Template.resolve_hlogz  rW  r/   c                     t         S r@   )hlog10_devicer  s     r)   resolve_hlog10zCudaFp16Template.resolve_hlog10}      r/   c                     t         S r@   )hlog2_devicer  s     r)   resolve_hlog2zCudaFp16Template.resolve_hlog2      r/   c                     t         S r@   )hexp_devicer  s     r)   resolve_hexpzCudaFp16Template.resolve_hexp  rW  r/   c                     t         S r@   )hexp10_devicer  s     r)   resolve_hexp10zCudaFp16Template.resolve_hexp10  rl  r/   c                     t         S r@   )hexp2_devicer  s     r)   resolve_hexp2zCudaFp16Template.resolve_hexp2  rp  r/   c                     t         S r@   )hfloor_devicer  s     r)   resolve_hfloorzCudaFp16Template.resolve_hfloor  rl  r/   c                     t         S r@   )hceil_devicer  s     r)   resolve_hceilzCudaFp16Template.resolve_hceil  rp  r/   c                     t         S r@   )hsqrt_devicer  s     r)   resolve_hsqrtzCudaFp16Template.resolve_hsqrt  rp  r/   c                     t         S r@   )hrsqrt_devicer  s     r)   resolve_hrsqrtzCudaFp16Template.resolve_hrsqrt  rl  r/   c                     t         S r@   )hrcp_devicer  s     r)   resolve_hrcpzCudaFp16Template.resolve_hrcp  rW  r/   c                     t         S r@   )hrint_devicer  s     r)   resolve_hrintzCudaFp16Template.resolve_hrint  rp  r/   c                     t         S r@   )htrunc_devicer  s     r)   resolve_htrunczCudaFp16Template.resolve_htrunc  rl  r/   c                 4    t        j                  t              S r@   )r   r   Cuda_heqr  s     r)   resolve_heqzCudaFp16Template.resolve_heq      ~~h''r/   c                 4    t        j                  t              S r@   )r   r   Cuda_hner  s     r)   resolve_hnezCudaFp16Template.resolve_hne  r  r/   c                 4    t        j                  t              S r@   )r   r   Cuda_hger  s     r)   resolve_hgezCudaFp16Template.resolve_hge  r  r/   c                 4    t        j                  t              S r@   )r   r   Cuda_hgtr  s     r)   resolve_hgtzCudaFp16Template.resolve_hgt  r  r/   c                 4    t        j                  t              S r@   )r   r   Cuda_hler  s     r)   resolve_hlezCudaFp16Template.resolve_hle  r  r/   c                 4    t        j                  t              S r@   )r   r   Cuda_hltr  s     r)   resolve_hltzCudaFp16Template.resolve_hlt  r  r/   c                 4    t        j                  t              S r@   )r   r   	Cuda_hmaxr  s     r)   resolve_hmaxzCudaFp16Template.resolve_hmax  rM  r/   c                 4    t        j                  t              S r@   )r   r   	Cuda_hminr  s     r)   resolve_hminzCudaFp16Template.resolve_hmin  rM  r/   N)&r1   r2   r3   r   r  r   r|   r8   rL  rP  rS  rV  rZ  r]  r_  rb  re  rh  rk  ro  rs  rv  ry  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r+   r/   r)   rI  rI  [  s    
%,,tyy
!C))))))(((((())r/   rI  c                       e Zd Z ej                  e      Zd Zd Zd Z	d Z
d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#y)CudaModuleTemplatec                 H    t        j                  t        j                        S r@   )r   r  r   cgr  s     r)   
resolve_cgzCudaModuleTemplate.resolve_cg  s    ||DGG$$r/   c                     t         S r@   r   r  s     r)   resolve_threadIdxz$CudaModuleTemplate.resolve_threadIdx      r/   c                     t         S r@   r   r  s     r)   resolve_blockIdxz#CudaModuleTemplate.resolve_blockIdx  r  r/   c                     t         S r@   r   r  s     r)   resolve_blockDimz#CudaModuleTemplate.resolve_blockDim  r  r/   c                     t         S r@   r   r  s     r)   resolve_gridDimz"CudaModuleTemplate.resolve_gridDim  r  r/   c                 "    t         j                  S r@   r  r  s     r)   resolve_laneidz!CudaModuleTemplate.resolve_laneid  r	  r/   c                 H    t        j                  t        j                        S r@   )r   r  r   r6   r  s     r)   resolve_sharedz!CudaModuleTemplate.resolve_shared      ||DKK((r/   c                 4    t        j                  t              S r@   )r   r   rk   r  s     r)   resolve_popczCudaModuleTemplate.resolve_popc  rM  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_brevzCudaModuleTemplate.resolve_brev  rM  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_clzzCudaModuleTemplate.resolve_clz  r  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_ffszCudaModuleTemplate.resolve_ffs  r  r/   c                 4    t        j                  t              S r@   )r   r   rv   r  s     r)   resolve_fmazCudaModuleTemplate.resolve_fma  r  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_cbrtzCudaModuleTemplate.resolve_cbrt  rM  r/   c                 4    t        j                  t              S r@   )r   r   rE   r  s     r)   resolve_threadfencez&CudaModuleTemplate.resolve_threadfence      ~~566r/   c                 4    t        j                  t              S r@   )r   r   rJ   r  s     r)   resolve_threadfence_blockz,CudaModuleTemplate.resolve_threadfence_block  s    ~~455r/   c                 4    t        j                  t              S r@   )r   r   rM   r  s     r)   resolve_threadfence_systemz-CudaModuleTemplate.resolve_threadfence_system  r  r/   c                 4    t        j                  t              S r@   )r   r   rP   r  s     r)   resolve_syncwarpz#CudaModuleTemplate.resolve_syncwarp  s    ~~m,,r/   c                 4    t        j                  t              S r@   )r   r   rT   r  s     r)   resolve_shfl_sync_intrinsicz.CudaModuleTemplate.resolve_shfl_sync_intrinsic      ~~677r/   c                 4    t        j                  t              S r@   )r   r   r[   r  s     r)   resolve_vote_sync_intrinsicz.CudaModuleTemplate.resolve_vote_sync_intrinsic  r  r/   c                 4    t        j                  t              S r@   )r   r   r^   r  s     r)   resolve_match_any_syncz)CudaModuleTemplate.resolve_match_any_sync      ~~122r/   c                 4    t        j                  t              S r@   )r   r   ra   r  s     r)   resolve_match_all_syncz)CudaModuleTemplate.resolve_match_all_sync  r  r/   c                 4    t        j                  t              S r@   )r   r   rd   r  s     r)   resolve_activemaskz%CudaModuleTemplate.resolve_activemask  r   r/   c                 4    t        j                  t              S r@   )r   r   rh   r  s     r)   resolve_lanemask_ltz&CudaModuleTemplate.resolve_lanemask_lt   r  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_selpzCudaModuleTemplate.resolve_selp  rM  r/   c                 4    t        j                  t              S r@   )r   r   r   r  s     r)   resolve_nanosleepz$CudaModuleTemplate.resolve_nanosleep  r*  r/   c                 H    t        j                  t        j                        S r@   )r   r  r   r   r  s     r)   resolve_atomicz!CudaModuleTemplate.resolve_atomic	  r  r/   c                 H    t        j                  t        j                        S r@   )r   r  r   r|   r  s     r)   resolve_fp16zCudaModuleTemplate.resolve_fp16  s    ||DII&&r/   c                 H    t        j                  t        j                        S r@   )r   r  r   rB   r  s     r)   resolve_constz CudaModuleTemplate.resolve_const      ||DJJ''r/   c                 H    t        j                  t        j                        S r@   )r   r  r   r;   r  s     r)   resolve_localz CudaModuleTemplate.resolve_local  r  r/   N)$r1   r2   r3   r   r  r   r8   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r+   r/   r)   r  r    s    
%,,t
C%)))((()767-8833/0).)'((r/   r  )loglog2log10)operator
numba.corer   numba.core.typing.npydeclr   r   r   r   r   r	   r
   r   numba.core.typing.templatesr   r   r   r   r   r   numba.cuda.typesr   numba.core.typeconvr   numbar   numba.cuda.compilerr   registryr   register_attrr   r   r5   r:   r=   rE   rJ   rM   rP   rT   r[   r^   ra   rd   rh   rk   rv   r{   r   r   r   r   r   r   r   r   floatr   r   r   r   r   r|   haddrK  addCuda_addiadd	Cuda_iaddhsubrO  subCuda_subisub	Cuda_isubhmulrR  mulCuda_mulimul	Cuda_imulhmaxr  hminr  hnegrY  negCuda_neghabsr\  absCuda_absheqr  eqhner  nehger  gehgtr  gthler  lehltr  lttruedivitruedivr   r   ra  rd  rg  rj  rn  rr  ru  rx  r  r  r{  r~  r  r  r  rU  r   ry   rx   rp   rf   rq   rt   all_numba_typesr   unsigned_int_numba_typesr   r  r"  maxr8  minr;  nanmaxrB  nanminr>  and_r%  or_r(  xorr,  incr/  decr2  exchr5  r   r   r   r  r  r  r  r  rI  r  r  funcr+   r/   r)   <module>r+     ss    @ @ @> > " *  @:&&**  (& 0 
  
 
  
 
,  
 
$. $ 
$
 
$- $ 
$
 
$. $ 
$
 
E$ E 
E
 
/  
 
6/ 6 
6 
*  
 
*  
 
&& & 
&
 
&' & 
&
 
   
$ 
	 	 
	 
   
 
   
 
   
 
  
$ 
  
$ 
(  ( 
((
 ' ' '0>/4 DIINN+	#HLL1$X]]3	DIINN+	#HLL1$X]]3	DIINN+	#HLL1$X]]3	DIINN+	DIINN+	499>>*	"8<<0499>>*	"3'%diimm4 X[[ )%diimm4 X[[ )%diimm4 X[[ )%diimm4 X[[ )%diimm4 X[[ )%diimm4 X[[ ) )) * ** +   %V,$V,$V,&x0%g.$V,&x0%g.%g.&x0&x0%g.$V,%g.&x0%f-& ==%--;;;;. {{ELL{{ELL2  "LL%,,7 t{{8t{{8t{{8t{{8$++,,o> $++,,o> t{{'')<=dkkoo':;t{{(;<t{{(@At{{(@A((*=>  
	1#3 	1 
	1 
6& 6 
6" 
2% 2 
2 
" 
 
 10 1 1 5/ 5 5 0/ 0 0 +/* +/ +/\ [)( [) [)| X(* X( X(v lell4( )
 $D/ $ !D/ ! $D/ $ D''T?3 r/   