
    xKg                         d dl Zd dlmZmZmZ d dlmZmZm	Z	 d dl
mZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Z G d de	      Zedk(  r ej8                          yy)    N)cudaint32float32)skip_on_cudasimunittestCUDATestCase)ENABLE_CUDASIMc                 `    t        j                  d      }t        j                          || |<   y N   )r   gridsyncthreadsaryis     e/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_sync.pyuseless_syncthreadsr      s$    		!ACF    c                 `    t        j                  d      }t        j                          || |<   y r   r   r   syncwarpr   s     r   useless_syncwarpr      s!    		!AMMOCFr   c                 b    t        j                  d      }t        j                  d       || |<   y )Nr     r   r   s     r   useless_syncwarp_with_maskr      s$    		!AMM&CFr   c                 (   t         j                  j                  dt              }t        j                  d      }|||<   t        j
                          |dk  r&||   ||dz      z   ||<   t        j
                  d       |dk  r&||   ||dz      z   ||<   t        j
                  d       |dk  r&||   ||dz      z   ||<   t        j
                  d       |d	k  r&||   ||d	z      z   ||<   t        j
                  d
       |dk(  r|d   |d   z   | d<   y y )N    r      r                     r   )r   sharedarrayr   r   r   )ressmr   s      r   coop_syncwarpr)      s   			2u	%B		!ABqEMMO2v11r6
"1f1u11q5	!1d1u11q5	!1c1u11q5	!1cAvAAA r   c                     d}t         j                  j                  |t              }t        j                  d      }|dk(  rt        |      D ]  }|||<   	 t        j                          ||   | |<   y )Nd   r   r   )r   r%   r&   r   r   ranger   )r   Nr(   r   js        r   simple_smemr/   4   sa    A			1e	$B		!AAvqABqE UCFr   c                     t        j                  d      \  }}t         j                  j                  dt              }|dz   |dz   z  |||f<   t        j
                          |||f   | ||f<   y )Nr#   
      r   r   r   r%   r&   r   r   )r   r   r.   r(   s       r   coop_smem2dr5   ?   sd    99Q<DAq			8W	-BA!a% Bq!tH1a4C1Ir   c                     t        j                  d      }t         j                  j                  dt              }|dz  ||<   t        j
                          ||   | |<   y )Nr   r   r#   r4   )r   r   r(   s      r   dyn_shared_memoryr7   G   sK    		!A			1g	&BEBqEUCFr   c                 `    | dxx   dz  cc<   t        j                          | dxx   dz  cc<   y Nr   {   iA  )r   threadfencer   s    r   use_threadfencer=   O   s(    FcMFFcMFr   c                 `    | dxx   dz  cc<   t        j                          | dxx   dz  cc<   y r9   )r   threadfence_blockr<   s    r   use_threadfence_blockr@   U   s(    FcMFFcMFr   c                 `    | dxx   dz  cc<   t        j                          | dxx   dz  cc<   y r9   )r   threadfence_systemr<   s    r   use_threadfence_systemrC   [   s(    FcMFFcMFr   c                 d    t        j                  d      }t        j                  | |         ||<   y r   )r   r   syncthreads_countary_inary_outr   s      r   use_syncthreads_countrI   a   s'    		!A''q	2GAJr   c                 d    t        j                  d      }t        j                  | |         ||<   y r   )r   r   syncthreads_andrF   s      r   use_syncthreads_andrL   f   s'    		!A%%fQi0GAJr   c                 d    t        j                  d      }t        j                  | |         ||<   y r   )r   r   syncthreads_orrF   s      r   use_syncthreads_orrO   k   s'    		!A$$VAY/GAJr   c                 R    t         ryt        j                         j                  | k\  S )NT)r	   r   get_current_devicecompute_capability)ccs    r   _safe_cc_checkrT   p   s"    &&(;;rAAr   c                   H   e Zd Zd Zd Z ed      d        Z ed       ej                   e	d      d      d               Z
 ed       ej                   e	d      d      d               Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)TestCudaSyncc                 ,    t        j                  d      |      }d}t        j                  |t        j                        }t        j
                  |t        j                        } |d|f   |       t        j                  j                  ||       y )Nvoid(int32[::1])r2   dtyper   )r   jitnpemptyr   arangetestingassert_equal)selfkernelcompilednelemr   exps         r   _test_uselesszTestCudaSync._test_uselessx   sl    /488./7hhuBHH-iiRXX.E3


S)r   c                 .    | j                  t               y N)rf   r   ra   s    r   test_useless_syncthreadsz%TestCudaSync.test_useless_syncthreads   s    ./r   z#syncwarp not implemented on cudasimc                 .    | j                  t               y rh   )rf   r   ri   s    r   test_useless_syncwarpz"TestCudaSync.test_useless_syncwarp   s    +,r   )   r   z'Partial masks require CC 7.0 or greaterc                 .    | j                  t               y rh   )rf   r   ri   s    r   test_useless_syncwarp_with_maskz,TestCudaSync.test_useless_syncwarp_with_mask   s     	56r   c                     d}d}d} t        j                  d      t              }t        j                  dt        j
                        } |||f   |       t        j                  j                  ||d          y )Ni  r   r   rX   rY   r   )r   r[   r)   r\   zerosr   r_   r`   )ra   expectednthreadsnblocksrc   r'   s         r   test_coop_syncwarpzTestCudaSync.test_coop_syncwarp   si     /488./>hhq)#("#C(


#a&1r   c           	      >    t        j                  d      t              }d}t        j                  |t        j
                        } |d|f   |       | j                  t        j                  |t        j                  |t        j
                        k(               y )NrX   r+   rY   r   )	r   r[   r/   r\   r]   r   
assertTrueallr^   )ra   rc   rd   r   s       r   test_simple_smemzTestCudaSync.test_simple_smem   sm    /488./<hhuBHH-E3sbiiRXX&FFGHr   c                     t        j                  d      t              }d}t        j                  |t        j
                        } |d|f   |       t        j                  |      }t        |j                  d         D ]/  }t        |j                  d         D ]  }|dz   |dz   z  |||f<    1 | j                  t        j                  ||             y )Nzvoid(float32[:,::1])r1   rY   r   r   )r   r[   r5   r\   r]   r   
empty_liker,   shaperw   allclose)ra   rc   r|   r   re   r   r.   s          r   test_coop_smem2dzTestCudaSync.test_coop_smem2d   s    348823K@hhuBJJ/E3mmC syy|$A399Q<(Uq1u-AqD	 ) % 	C-.r   c           
      v    t        j                  d      t              }d}t        j                  |t        j
                        } |d|d|j                  dz  f   |       | j                  t        j                  |dt        j                  |j                  t        j                        z  k(               y )Nzvoid(float32[::1])2   rY   r   r   r!   r#   )r   r[   r7   r\   r]   r   sizerw   rx   r^   r   )ra   rc   r|   r   s       r   test_dyn_shared_memoryz#TestCudaSync.test_dyn_shared_memory   s    1488012CDhhuBJJ/+E1chhl*+C0sa"))CHHBHH*M&MMNOr   c                 0   t         d d  f} t        j                  |      t              }t	        j
                  dt        j                         } |d   |       | j                  d|d          t        s"| j                  d|j                  |             y y )Nr2   rY   r   r     r   z
membar.gl;)
r   r   r[   r=   r\   rq   assertEqualr	   assertIninspect_asmra   sigrc   r   s       r   test_threadfence_codegenz%TestCudaSync.test_threadfence_codegen   sx    Qxk 488C=1hhr*sCF+MM,(<(<S(AB r   c                 0   t         d d  f} t        j                  |      t              }t	        j
                  dt        j                         } |d   |       | j                  d|d          t        s"| j                  d|j                  |             y y )Nr2   rY   r   r   r   zmembar.cta;)
r   r   r[   r@   r\   rq   r   r	   r   r   r   s       r   test_threadfence_block_codegenz+TestCudaSync.test_threadfence_block_codegen   sy    Qxk 488C=!67hhr*sCF+MM-)=)=c)BC r   c                 0   t         d d  f} t        j                  |      t              }t	        j
                  dt        j                         } |d   |       | j                  d|d          t        s"| j                  d|j                  |             y y )Nr2   rY   r   r   r   zmembar.sys;)
r   r   r[   rC   r\   rq   r   r	   r   r   r   s       r   test_threadfence_system_codegenz,TestCudaSync.test_threadfence_system_codegen   sy    Qxk 488C=!78hhr*sCF+MM-)=)=c)BC r   c                 (   t        j                  t              }t        j                  d|      }t        j
                  dt        j                        }d|d<   d|d<    |d   ||       | j                  t        j                  |dk(               y )NH   rY   r      *   )r   r   F   )	r   r[   rI   r\   onesrq   r   rw   rx   )ra   in_dtyperc   rG   rH   s        r   _test_syncthreads_countz$TestCudaSync._test_syncthreads_count   sp    88128,((2RXX.r
r
(w"}-.r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_countz#TestCudaSync.test_syncthreads_count       $$RXX.r   c                 B    | j                  t        j                         y rh   )r   r\   int16ri   s    r   test_syncthreads_count_upcastz*TestCudaSync.test_syncthreads_count_upcast   r   r   c                 B    | j                  t        j                         y rh   )r   r\   int64ri   s    r   test_syncthreads_count_downcastz,TestCudaSync.test_syncthreads_count_downcast   r   r   c                    t        j                  t              }d}t        j                  ||      }t        j
                  |t        j                        } |d|f   ||       | j                  t        j                  |dk(               d|d<    |d|f   ||       | j                  t        j                  |dk(               y Nr+   rY   r   r   r   )	r   r[   rL   r\   r   rq   r   rw   rx   ra   r   rc   rd   rG   rH   s         r   _test_syncthreads_andz"TestCudaSync._test_syncthreads_and   s    88/0h/((51E67+w!|,-r
E67+w!|,-r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_andz!TestCudaSync.test_syncthreads_and       ""288,r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_and_upcastz(TestCudaSync.test_syncthreads_and_upcast   r   r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_and_downcastz*TestCudaSync.test_syncthreads_and_downcast   r   r   c                    t        j                  t              }d}t        j                  ||      }t        j                  |t        j
                        } |d|f   ||       | j                  t        j                  |dk(               d|d<    |d|f   ||       | j                  t        j                  |dk(               y r   )r   r[   rO   r\   rq   r   rw   rx   r   s         r   _test_syncthreads_orz!TestCudaSync._test_syncthreads_or   s    88./%x0((51E67+w!|,-r
E67+w!|,-r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_orz TestCudaSync.test_syncthreads_or      !!"((+r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_or_upcastz'TestCudaSync.test_syncthreads_or_upcast  r   r   c                 B    | j                  t        j                         y rh   )r   r\   r   ri   s    r   test_syncthreads_or_downcastz)TestCudaSync.test_syncthreads_or_downcast
  r   r   N)__name__
__module____qualname__rf   rj   r   rl   r   
skipUnlessrT   ro   ru   ry   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   rV   rV   w   s    *0 :;- <- :;X/BD7D <7 :;X/BD
2D <
2I	/PCDD////	.---	.,,,r   rV   __main__)numpyr\   numbar   r   r   numba.cuda.testingr   r   r   numba.core.configr	   r   r   r   r)   r/   r5   r7   r=   r@   rC   rI   rL   rO   rT   rV   r   mainr   r   r   <module>r      s     & & F F ,63
1
0
BT,< T,n zHMMO r   