
    xKga)                        d dl mZ d dlZd dlZd dlZd dlZddlmZm	Z	 ddl
mZmZmZ ddlmZ ddlmZmZ 	 daed	        Zd
 Z G d d      Z G d de      Z G d de      Z G d dej6                        Z G d de      Zy)    )contextmanagerN   )FakeCUDAArrayFakeWithinKernelCUDAArray)Dim3FakeCUDAModuleswapped_cuda_module   )normalize_kernel_dimensions)wrap_argArgHintc              #   F   K   t         J d       | a 	 d da y# da w xY ww)z*
    Push the current kernel context.
    Nz)concurrent simulated kernel not supported_kernel_context)mods    _/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/simulator/kernel.py_push_kernel_contextr      s1      "O$OO"O$s   ! !!c                      t         S )zT
    Get the current kernel context. This is usually done by a device function.
    r        r   _get_kernel_contextr   $   s
     r   c                       e Zd ZdZd Zy)FakeOverloadzE
    Used only to provide the max_cooperative_grid_blocks method
    c                      y)Nr   r   )selfblockdims     r   max_cooperative_grid_blocksz(FakeOverload.max_cooperative_grid_blocks/   s     r   N)__name__
__module____qualname____doc__r   r   r   r   r   r   +   s    r   r   c                       e Zd Zd Zy)FakeOverloadDictc                     t               S N)r   )r   keys     r   __getitem__zFakeOverloadDict.__getitem__6   s     ~r   N)r   r   r    r'   r   r   r   r#   r#   5   s    r   r#   c                   ^    e Zd ZdZdg dfdZd Zd Zd Zd ZddZ	e
d	        Ze
d
        Zy)FakeCUDAKernelz(
    Wraps a @cuda.jit-ed function.
    Fc                     || _         || _        || _        || _        t	        |      | _        d | _        d | _        d| _        d| _	        t        j                  | |       y )Nr   )fn_device	_fastmath_debuglist
extensionsgrid_dim	block_dimstreamdynshared_size	functoolsupdate_wrapper)r   r+   devicefastmathr0   debugs         r   __init__zFakeCUDAKernel.__init__A   sY    !z*   r*r   c           	           j                   r7t         j                  t                     5    j                  | cd d d        S t	         j
                   j                        \  }}t        || j                        }t        |      5  g  fd}|D cg c]
  } ||       }}t         j                  |      5  t        j                  | D ]7  }t         j                  || j                        }	 |	j                  |g|  9 	 d d d        D ]	  }
 |
         	 d d d        y # 1 sw Y   xY wc c}w # 1 sw Y   2xY w# 1 sw Y   y xY w)Nc                    t        j                  fdj                  d | f      \  }} t        | t        j
                        r*| j                  dkD  rt        |       j                        }nJt        | t              r| j                        }n(t        | t        j                        rt        |       }n| }t        |t              rt        |      S |S )Nc                 *     |j                   | ddS )Nr   )r3   retr)prepare_args)ty_val	extensionr>   s     r   <lambda>z;FakeCUDAKernel.__call__.<locals>.fake_arg.<locals>.<lambda>b   s    .Di.D.D !/#r   r   )r5   reducer0   
isinstancenpndarrayndimr   	to_devicer   voidr   r   )arg_retr>   r   s      r   fake_argz)FakeCUDAKernel.__call__.<locals>.fake_arg_   s    "))# OO3K3 c2::.388a<"3-11$7CW----CRWW-',CCc=14S99
r   )r,   r	   r+   r   r   r1   r2   r   r4   r   rE   ndindexBlockManagerr.   run)r   argsr1   r2   fake_cuda_modulerM   rJ   	fake_args
grid_pointbmwbr>   s   `          @r   __call__zFakeCUDAKernel.__call__O   s&   <<$TWW.A.CDtww~ ED :$--:>..J) *(I*.*=*=?!"23 D. 377$3#$I7$TWW.>?"$**h"7J%dggxDKKPBBFF:2	2 #8 @  G 43 EDJ 8??; 43sB   D-E
D9'E
?AD>E
-D69E
>E	E

Ec                 f    t        |d d  \  | _        | _        t        |      dk(  r
|d   | _        | S )Nr
         )r   r1   r2   lenr4   )r   configurations     r   r'   zFakeCUDAKernel.__getitem__   s?    'r):; 	&t~ }""/"2Dr   c                      y r%   r   r   s    r   bindzFakeCUDAKernel.bind   s    r   c                     | S r%   r   )r   rQ   s     r   
specializezFakeCUDAKernel.specialize   s    r   c                 :    |dk  rt        d|z        | |d||f   S )Nr   z0Can't create ForAll with negative task count: %sr   )
ValueError)r   ntaskstpbr3   	sharedmems        r   forallzFakeCUDAKernel.forall   s5    A:O%& ' 'FAvy011r   c                     t               S r%   )r#   r^   s    r   	overloadszFakeCUDAKernel.overloads   s    !!r   c                     | j                   S r%   )r+   r^   s    r   py_funczFakeCUDAKernel.py_func   s    wwr   N)r   r   r   )r   r   r    r!   r:   rW   r'   r_   ra   rg   propertyri   rk   r   r   r   r)   r)   <   sW     -2b +/b2 " "  r   r)   c                   J     e Zd ZdZ fdZ fdZd Zd Zd Zd Z	d Z
 xZS )	BlockThreadzG
    Manages the execution of a function for a single CUDA thread.
    c                     |rfd}|}n}t         t        |   |       t        j                         | _        d| _        || _        t        | | _	        t        | | _
        d | _        d| _        d| _        || _        t        | j                  j                   }| j                  j                   |j                   | j                  j"                  |j"                  | j                  j$                  z  z   z  z   | _        y )Nc                  B    t        j                  d        | i | y )Nraise)divide)rE   seterr)rQ   kwargsfs     r   debug_wrapperz+BlockThread.__init__.<locals>.debug_wrapper   s    		)4"6"r   )targetFT)superrn   r:   	threadingEventsyncthreads_eventsyncthreads_blocked_managerr   blockIdx	threadIdx	exceptiondaemonabortr9   
_block_dimxyz	thread_id)
r   ru   managerr~   r   r9   rv   rw   blockDim	__class__s
    `       r   r:   zBlockThread.__init__   s    # #FFk4))8!*!2#( hy)

112))XZZ4>>;K;K;C::;?>>;K;K<L<L .M Nr   c                 V   	 t         t        |           y # t        $ r}dt	        | j
                        z  }dt	        | j                        z  }t        |      dk(  r|d|}n
|d|d|}t        j                         d   } t        |      |      |f| _        Y d }~y d }~ww xY w)Nztid=%szctaid=%s  z: r
   )rx   rn   rP   	Exceptionr/   r   r~   strsysexc_infotyper   )r   etidctaidmsgtbr   s         r   rP   zBlockThread.run   s    	0+t(* 
	0T$..11Cdmm!44E1v|!$e,%(%3"B &d1gclB/DNN
	0s    	B(A?B##B(c                     | j                   rt        d      d| _        | j                  j	                          | j                  j                          | j                   rt        d      y )Nz"abort flag set on syncthreads callTz#abort flag set on syncthreads clear)r   RuntimeErrorr|   r{   waitclearr^   s    r   syncthreadszBlockThread.syncthreads   sY    ::CDD#' ##%$$&::DEE r   c                 L   | j                   j                  | j                   j                  | j                   j                  f}|| j                  j
                  |<   | j                          t        j                  | j                  j
                        }| j                          |S r%   )	r   r   r   r   r}   block_stater   rE   count_nonzero)r   valueidxcounts       r   syncthreads_countzBlockThread.syncthreads_count   sw    nn 0 0$..2B2BB).!!#&  !:!:;r   c                 T   | j                   j                  | j                   j                  | j                   j                  f}|| j                  j
                  |<   | j                          t        j                  | j                  j
                        }| j                          |rdS dS Nr   r   )	r   r   r   r   r}   r   r   rE   allr   r   r   tests       r   syncthreads_andzBlockThread.syncthreads_and   }    nn 0 0$..2B2BB).!!#&vvdmm//0qar   c                 T   | j                   j                  | j                   j                  | j                   j                  f}|| j                  j
                  |<   | j                          t        j                  | j                  j
                        }| j                          |rdS dS r   )	r   r   r   r   r}   r   r   rE   anyr   s       r   syncthreads_orzBlockThread.syncthreads_or   r   r   c                 <    d| j                   d| j                  dS )Nz
Thread <<<z, z>>>)r~   r   r^   s    r   __str__zBlockThread.__str__   s    (,t~~FFr   )r   r   r    r!   r:   rP   r   r   r   r   r   __classcell__)r   s   @r   rn   rn      s.    N00
F  Gr   rn   c                       e Zd ZdZd Zd Zy)rO   a  
    Manages the execution of a thread block.

    When run() is called, all threads are started. Each thread executes until it
    hits syncthreads(), at which point it sets its own syncthreads_blocked to
    True so that the BlockManager knows it is blocked. It then waits on its
    syncthreads_event.

    The BlockManager polls threads to determine if they are blocked in
    syncthreads(). If it finds a blocked thread, it adds it to the set of
    blocked threads. When all threads are blocked, it unblocks all the threads.
    The thread are unblocked by setting their syncthreads_blocked back to False
    and setting their syncthreads_event.

    The polling continues until no threads are alive, when execution is
    complete.
    c                     || _         || _        || _        || _        t	        j
                  |t        j                        | _        y )N)dtype)	_grid_dimr   _fr.   rE   zerosbool_r   )r   ru   r1   r2   r9   s        r   r:   zBlockManager.__init__  s5    !#88IRXX>r   c                     t               }t               }t               }t        j                   j                   D ]S  } fd}t	        | || j
                        }|j                          |j                  |       |j                  |       U |r|D ]  }|j                  r|j                  |       !|j                  s.|D ]*  }	d|	_
        d|	_        |	j                  j                          , |j                  d   j                  |j                  d          ||k(  r2|D ]#  }d|_        |j                  j                          % t               }t        |D cg c]  }|j                         s| c}      }|r|D ]9  }|j                  s|j                  d   j                  |j                  d          y c c}w )Nc                  $     j                      y r%   )r   )rQ   r   s   r   rw   z BlockManager.run.<locals>.target  s    r   TFr   r   )setrE   rN   r   rn   r.   startaddr|   r   r   r{   with_tracebackis_alive)
r   rT   rQ   threadslivethreadsblockedthreadsblock_pointrw   tt_others
   ` `       r   rP   zBlockManager.run  s~   %e::t7KFD*k4;;OAGGIKKNOOA 8  (("&&q)[[ $+(,6;311557 $+
 ++a.77AGG ! n,'A,1A)''++- ( "%;H;a!**,;HIK' , A{{kk!n33AKKNCC  Is    G6GN)r   r   r    r!   r:   rP   r   r   r   rO   rO      s    "?(Dr   rO   )
contextlibr   r5   r   ry   numpyrE   cudadrv.devicearrayr   r   	kernelapir   r   r	   errorsr   rQ   r   r   r   r   r   r   dictr#   objectr)   Threadrn   rO   r   r   r   <module>r      s    %  
   I @ @ 0 $
  
 
 t _V _HPG)"" PGfAD6 ADr   