
    xKgm0                        d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ  G d d	e      Z G d
 d      Z G d d      Z G d de      Z G d de      Z G d de      Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z ej(                         Z  G d de      Z! G d de      Z" G d de      Z#ed        Z$y)zf
Implements the cuda module as called from within an executing kernel
(@cuda.jit-decorated function).
    )contextmanagerN)types)numpy_support   )vector_typesc                   (    e Zd ZdZd Zd Zd Zd Zy)Dim3z;
    Used to implement thread/block indices/dimensions
    c                 .    || _         || _        || _        y Nxyz)selfr   r   r   s       b/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/simulator/kernelapi.py__init__zDim3.__init__   s        c                 V    d| j                   d| j                  d| j                  dS )N(, )r   r   s    r   __str__zDim3.__str__   s    !%88r   c                 V    d| j                   d| j                  d| j                  dS )NzDim3(r   r   r   r   s    r   __repr__zDim3.__repr__   s    %)VVTVVTVV<<r   c              #   `   K   | j                    | j                   | j                   y wr   r   r   s    r   __iter__zDim3.__iter__!   s!     ffffffs   ,.N)__name__
__module____qualname____doc__r   r   r   r    r   r   r	   r	      s    
9=r   r	   c                       e Zd ZdZd Zy)	GridGroupz+
    Used to implement the grid group.
    c                 H    t        j                         j                          y r   	threadingcurrent_threadsyncthreadsr   s    r   synczGridGroup.sync,   s     	  "..0r   N)r   r   r    r!   r*   r"   r   r   r$   r$   '   s    1r   r$   c                       e Zd ZdZd Zy)
FakeCUDACgz!
    CUDA Cooperative Groups
    c                     t               S r   )r$   r   s    r   	this_gridzFakeCUDACg.this_grid7   s
    {r   N)r   r   r    r!   r.   r"   r   r   r,   r,   3   s    r   r,   c                       e Zd ZdZd Zy)FakeCUDALocalz
    CUDA Local arrays
    c                     t        |t        j                        rt        j                  |      }t        j                  ||      S r   )
isinstancer   Typer   as_dtypenpempty)r   shapedtypes      r   arrayzFakeCUDALocal.array?   s1    eUZZ(!**51Exxu%%r   N)r   r   r    r!   r9   r"   r   r   r0   r0   ;   s    &r   r0   c                       e Zd ZdZd Zy)FakeCUDAConstz
    CUDA Const arrays
    c                     |S r   r"   )r   arys     r   
array_likezFakeCUDAConst.array_likeI   s    
r   N)r   r   r    r!   r>   r"   r   r   r;   r;   E   s    r   r;   c                       e Zd ZdZd Zd Zy)FakeCUDAShareda  
    CUDA Shared arrays.

    Limitations: assumes that only one call to cuda.shared.array is on a line,
    and that that line is only executed once per thread. i.e.::

        a = cuda.shared.array(...); b = cuda.shared.array(...)

    will erroneously alias a and b, and::

        for i in range(10):
            sharedarrs[i] = cuda.shared.array(...)

    will alias all arrays created at that point (though it is not certain that
    this would be supported by Numba anyway).
    c                 t    i | _         || _        t        j                  |t        j                        | _        y N)r8   )_allocations_dynshared_sizer5   zerosbyte
_dynshared)r   dynshared_sizes     r   r   zFakeCUDAShared.__init___   s(    -((>Ar   c                    t        |t        j                        rt        j                  |      }|dk(  rE| j
                  |j                  z  }t        j                  | j                  j                  ||      S t        j                  t        j                               }|d   dd }| j                  j!                  |      }|%t        j"                  ||      }|| j                  |<   |S )Nr   )r8   count   )r2   r   r3   r   r4   rD   itemsizer5   
frombufferrG   data	tracebackextract_stacksys	_getframerC   getr6   )r   r7   r8   rJ   stackcallerress          r   r9   zFakeCUDAShared.arrayd   s    eUZZ(!**51E A: ((ENN:E==!5!5U%PP
 ''8r1Q##F+;((5%(C(+Df%
r   N)r   r   r    r!   r   r9   r"   r   r   r@   r@   M   s    "B
r   r@   c                   `    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zy)FakeCUDAAtomicc                 b    t         5  ||   }||xx   |z  cc<   d d d        |S # 1 sw Y   S xY wr   )addlockr   r9   indexvalolds        r   addzFakeCUDAAtomic.add   5    ,C%LCL  
  
   $.c                 b    t         5  ||   }||xx   |z  cc<   d d d        |S # 1 sw Y   S xY wr   )sublockr\   s        r   subzFakeCUDAAtomic.sub   ra   rb   c                 b    t         5  ||   }||xx   |z  cc<   d d d        |S # 1 sw Y   S xY wr   )andlockr\   s        r   and_zFakeCUDAAtomic.and_   ra   rb   c                 b    t         5  ||   }||xx   |z  cc<   d d d        |S # 1 sw Y   S xY wr   )orlockr\   s        r   or_zFakeCUDAAtomic.or_   s5    ,C%LCL  
  
rb   c                 b    t         5  ||   }||xx   |z  cc<   d d d        |S # 1 sw Y   S xY wr   )xorlockr\   s        r   xorzFakeCUDAAtomic.xor   ra   rb   c                 x    t         5  ||   }||k\  rd||<   n||xx   dz  cc<   d d d        |S # 1 sw Y   S xY wNr   r   )inclockr\   s        r   inczFakeCUDAAtomic.inc   sE    ,Ccz ee!  
  
s   /9c                     t         5  ||   }|dk(  s||kD  r|||<   n||xx   dz  cc<   d d d        |S # 1 sw Y   S xY wrp   )declockr\   s        r   deczFakeCUDAAtomic.dec   sK    ,CqcCi"ee!  
  
   #4>c                 R    t         5  ||   }|||<   d d d        |S # 1 sw Y   S xY wr   )exchlockr\   s        r   exchzFakeCUDAAtomic.exch   s0    ,CE%L  
  
s   &c                 f    t         5  ||   }t        ||      ||<   d d d        |S # 1 sw Y   S xY wr   )maxlockmaxr\   s        r   r|   zFakeCUDAAtomic.max   6    ,CsC=E%L  
  
   &0c                 f    t         5  ||   }t        ||      ||<   d d d        |S # 1 sw Y   S xY wr   )minlockminr\   s        r   r   zFakeCUDAAtomic.min   r}   r~   c                     t         5  ||   }t        j                  ||   |g      ||<   d d d        |S # 1 sw Y   S xY wr   )r{   r5   nanmaxr\   s        r   r   zFakeCUDAAtomic.nanmax   B    ,C99eElC%89E%L  
  
rv   c                     t         5  ||   }t        j                  ||   |g      ||<   d d d        |S # 1 sw Y   S xY wr   )r   r5   nanminr\   s        r   r   zFakeCUDAAtomic.nanmin   r   rv   c                 z    t         5  d|j                  z  }||   }||k(  r|||<   |cd d d        S # 1 sw Y   y xY w)N)r   )compare_and_swaplockndim)r   r9   r_   r^   r]   loadeds         r   compare_and_swapzFakeCUDAAtomic.compare_and_swap   s;    !5::%E5\F}"e "!!s    1:c                 \    t         5  ||   }||k(  r|||<   |cd d d        S # 1 sw Y   y xY wr   )caslock)r   r9   r]   r_   r^   r   s         r   caszFakeCUDAAtomic.cas   s+    5\F}"e	 WWs   "+N)r   r   r    r`   re   rh   rk   rn   rr   ru   ry   r|   r   r   r   r   r   r"   r   r   rY   rY      sH    r   rY   c                       e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z y) FakeCUDAFp16c                     ||z   S r   r"   r   abs      r   haddzFakeCUDAFp16.hadd       1ur   c                     ||z
  S r   r"   r   s      r   hsubzFakeCUDAFp16.hsub   r   r   c                     ||z  S r   r"   r   s      r   hmulzFakeCUDAFp16.hmul   r   r   c                     ||z  S r   r"   r   s      r   hdivzFakeCUDAFp16.hdiv   r   r   c                     ||z  |z   S r   r"   r   r   r   cs       r   hfmazFakeCUDAFp16.hfma       1uqyr   c                     | S r   r"   r   r   s     r   hnegzFakeCUDAFp16.hneg   s	    r	r   c                     t        |      S r   )absr   s     r   habszFakeCUDAFp16.habs   s    1vr   c                 L    t        j                  |t         j                        S rB   )r5   sinfloat16r   r   s     r   hsinzFakeCUDAFp16.hsin       vvarzz**r   c                 L    t        j                  |t         j                        S rB   )r5   cosr   r   s     r   hcoszFakeCUDAFp16.hcos  r   r   c                 L    t        j                  |t         j                        S rB   )r5   logr   r   s     r   hlogzFakeCUDAFp16.hlog  r   r   c                 L    t        j                  |t         j                        S rB   )r5   log2r   r   s     r   hlog2zFakeCUDAFp16.hlog2      wwq

++r   c                 L    t        j                  |t         j                        S rB   )r5   log10r   r   s     r   hlog10zFakeCUDAFp16.hlog10      xx,,r   c                 L    t        j                  |t         j                        S rB   )r5   expr   r   s     r   hexpzFakeCUDAFp16.hexp  r   r   c                 L    t        j                  |t         j                        S rB   )r5   exp2r   r   s     r   hexp2zFakeCUDAFp16.hexp2  r   r   c                 2    t        j                  d|z        S )N
   r5   r   r   s     r   hexp10zFakeCUDAFp16.hexp10  s    zz"'""r   c                 L    t        j                  |t         j                        S rB   )r5   sqrtr   r   s     r   hsqrtzFakeCUDAFp16.hsqrt  r   r   c                 2    t        j                  |dz        S )Ng      r   r   s     r   hrsqrtzFakeCUDAFp16.hrsqrt  s    zz!t)$$r   c                 L    t        j                  |t         j                        S rB   r5   ceilr   r   s     r   hceilzFakeCUDAFp16.hceil  r   r   c                 L    t        j                  |t         j                        S rB   r   r   s     r   hfloorzFakeCUDAFp16.hfloor   r   r   c                 L    t        j                  |t         j                        S rB   )r5   
reciprocalr   r   s     r   hrcpzFakeCUDAFp16.hrcp#  s    }}Qbjj11r   c                 L    t        j                  |t         j                        S rB   )r5   truncr   r   s     r   htrunczFakeCUDAFp16.htrunc&  r   r   c                 L    t        j                  |t         j                        S rB   )r5   rintr   r   s     r   hrintzFakeCUDAFp16.hrint)  r   r   c                     ||k(  S r   r"   r   s      r   heqzFakeCUDAFp16.heq,      Avr   c                     ||k7  S r   r"   r   s      r   hnezFakeCUDAFp16.hne/  r   r   c                     ||k\  S r   r"   r   s      r   hgezFakeCUDAFp16.hge2  r   r   c                     ||kD  S r   r"   r   s      r   hgtzFakeCUDAFp16.hgt5  r   r   c                     ||k  S r   r"   r   s      r   hlezFakeCUDAFp16.hle8  r   r   c                     ||k  S r   r"   r   s      r   hltzFakeCUDAFp16.hlt;  r   r   c                     t        ||      S r   )r|   r   s      r   hmaxzFakeCUDAFp16.hmax>      1ayr   c                     t        ||      S r   )r   r   s      r   hminzFakeCUDAFp16.hminA  r   r   N)!r   r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   r   r   r   r      s    +++,-+,#,%,,2-,r   r   c                      e Zd ZdZd Zed        Zed        Zed        Zed        Z	ed        Z
ed        Zed	        Zed
        Zed        Zed        Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)FakeCUDAModulea7  
    An instance of this class will be injected into the __globals__ for an
    executing function in order to implement calls to cuda.*. This will fail to
    work correctly if the user code does::

        from numba import cuda as something_else

    In other words, the CUDA module must be called cuda.
    c                 |   t        | | _        t        | | _        t               | _        t               | _        t        |      | _        t               | _
        t               | _        t               | _        t        j                          D ]0  \  }}t#        | ||       |j$                  D ]  }t#        | ||        2 y r   )r	   gridDimblockDimr,   _cgr0   _localr@   _sharedr;   _constrY   _atomicr   _fp16r   itemssetattraliases)r   grid_dim	block_dimrH   namesvtyaliass          r   r   zFakeCUDAModule.__init__P  s    Xi(<#o%n5#o%'!^
 ',,.JD$D$%eT* & /r   c                     | j                   S r   )r   r   s    r   cgzFakeCUDAModule.cgc  s    xxr   c                     | j                   S r   )r   r   s    r   localzFakeCUDAModule.localg      {{r   c                     | j                   S r   )r   r   s    r   sharedzFakeCUDAModule.sharedk      ||r   c                     | j                   S r   )r   r   s    r   constzFakeCUDAModule.consto  r  r   c                     | j                   S r   )r   r   s    r   atomiczFakeCUDAModule.atomics  r  r   c                     | j                   S r   )r   r   s    r   fp16zFakeCUDAModule.fp16w  s    zzr   c                 >    t        j                         j                  S r   )r'   r(   	threadIdxr   s    r   r  zFakeCUDAModule.threadIdx{  s    '')333r   c                 >    t        j                         j                  S r   )r'   r(   blockIdxr   s    r   r  zFakeCUDAModule.blockIdx  s    '')222r   c                      yN    r"   r   s    r   warpsizezFakeCUDAModule.warpsize  s    r   c                 D    t        j                         j                  dz  S r  )r'   r(   	thread_idr   s    r   laneidzFakeCUDAModule.laneid  s    '')33b88r   c                 H    t        j                         j                          y r   r&   r   s    r   r)   zFakeCUDAModule.syncthreads  s      "..0r   c                      y r   r"   r   s    r   threadfencezFakeCUDAModule.threadfence      r   c                      y r   r"   r   s    r   threadfence_blockz FakeCUDAModule.threadfence_block  r  r   c                      y r   r"   r   s    r   threadfence_systemz!FakeCUDAModule.threadfence_system  r  r   c                 H    t        j                         j                  |      S r   )r'   r(   syncthreads_countr   r^   s     r   r!  z FakeCUDAModule.syncthreads_count  s    '');;C@@r   c                 H    t        j                         j                  |      S r   )r'   r(   syncthreads_andr"  s     r   r$  zFakeCUDAModule.syncthreads_and  s    '')99#>>r   c                 H    t        j                         j                  |      S r   )r'   r(   syncthreads_orr"  s     r   r&  zFakeCUDAModule.syncthreads_or  s    '')88==r   c                 6    t        |      j                  d      S )N1)binrJ   r"  s     r   popczFakeCUDAModule.popc  s    3x~~c""r   c                     ||z  |z   S r   r"   r   s       r   fmazFakeCUDAModule.fma  r   r   c                     |dz  S )NgUUUUUU?r"   r   s     r   cbrtzFakeCUDAModule.cbrt  s    U|r   c                 D    t        dj                  |      d d d   d      S )N{:032b}rL   )intformatr"  s     r   brevzFakeCUDAModule.brev  s#    9##C(2.22r   c                 p    dj                  |      }t        |      t        |j                  d            z
  S )Nr0  0)r3  lenlstrip)r   r^   ss      r   clzzFakeCUDAModule.clz  s.    S!1vAHHSM***r   c                     dj                  |      }t        |      t        |j                  d            z
  dz   dz  }|S )Nr0  r6  r   !   )r3  r7  rstrip)r   r^   r9  rs       r   ffszFakeCUDAModule.ffs  s>     S!Vc!((3-((1,2r   c                     |r|S |S r   r"   r   s       r   selpzFakeCUDAModule.selp  s    q1r   c                 ~   | j                   }| j                  }| j                  }|j                  |j                  z  |j                  z   }|dk(  r|S |j                  |j                  z  |j                  z   }|dk(  r||fS |j
                  |j
                  z  |j
                  z   }|dk(  r|||fS t        d|z        )Nr   rL      z*Global ID has 1-3 dimensions. %d requested)r   r  r  r   r   r   RuntimeError)r   nbdimbidtidr   r   r   s           r   gridzFakeCUDAModule.grid  s    }}mmnnEEDFFNSUU"6HEEDFFNSUU"6q6MEEDFFNSUU"6q!9G!KLLr   c                    | j                   }| j                  }|j                  |j                  z  }|dk(  r|S |j                  |j                  z  }|dk(  r||fS |j                  |j                  z  }|dk(  r|||fS t        d|z        )Nr   rL   rC  z,Global grid has 1-3 dimensions. %d requested)r   r   r   r   r   rD  )r   rE  rF  gdimr   r   r   s          r   gridsizezFakeCUDAModule.gridsize  s    }}||FFTVVO6HFFTVVO6q6MFFTVVO6q!9IAMNNr   N) r   r   r    r!   r   propertyr   r  r  r  r
  r  r  r  r  r  r)   r  r  r  r!  r$  r&  r*  r,  r.  r4  r:  r?  rA  rI  rL  r"   r   r   r   r   E  s'   +&             4 4 3 3   9 91A?>#3+M Or   r   c              #   6  K   ddl m | j                  }t        fd|j	                         D              }t        fd|j	                         D              }|j                  |       	 d  |j                  |       y # |j                  |       w xY ww)Nr   )cudac              3   6   K   | ]  \  }}|u s||f  y wr   r"   ).0kvrO  s      r   	<genexpr>z&swapped_cuda_module.<locals>.<genexpr>  s"     A#341aqDyA#3s   	c              3   ,   K   | ]  \  }}|f  y wr   r"   )rQ  rR  rS  fake_cuda_modules      r   rT  z&swapped_cuda_module.<locals>.<genexpr>  s     ?,$!Q$%,s   )numbarO  __globals__dictr   update)fnrV  fn_globsorigreplrO  s    `   @r   swapped_cuda_moduler_    sn     ~~HA8>>#3AAD?$**,??DOOD 	s   A(B-B 1BBB)%r!   
contextlibr   rR   r'   rP   
numba.corer   numpyr5   numba.npr   r   objectr	   r$   r,   r0   r;   r@   Lockr[   rd   rg   rj   rm   r{   r   r   r   rq   rt   rx   rY   r   r   r_  r"   r   r   <module>rf     sW  
 & 
     " &6 *	1 	1 &F &F ,V ,^ )..

)..

)..
		
)..

)..

)..
%y~~' 
)..

)..

)..
9>>\V \~Y6 YxXOV XOv  r   