
    xKg$                         d dl Zd dlmZmZmZ d dlmZmZ d dl	m
Z
 e
j                  rd\  ZZnd\  ZZeez  ZeefZ G d de      Zed	k(  r ej$                          yy)
    N)cudafloat32void)unittestCUDATestCase)config)      )2       c                       e Zd Zd Zy)TestCudaMatMulc                    t        j                  t        t        d d d d df   t        d d d d df   t        d d d d df               d        }t        j
                  j                  d       t	        j                  t        j
                  j                  t        t        f      t        j                        }t	        j                  t        j
                  j                  t        t        f      t        j                        }t	        j                  |      }t        j                         }|j                         5  t        j                  ||      }t        j                  ||      }t        j                  ||      } |t        t        ft        t        f|f   |||       |j                  ||       d d d        t	        j                   ||      }	t        j"                  j%                  ||	d       y # 1 sw Y   BxY w)N   c                    t         j                  j                  t        t              }t         j                  j                  t
        t
        ft              }t         j                  j                  }t         j                  j                  }t         j                  j                  }t         j                  j                  }t         j                  j                  }	t         j                  j                  }
|||	z  z   }|||
z  z   }t	        d      }t        t              D ]  }|t        k  r5|t        k  r,| |||t
        z  z   f   |||f<   |||t
        z  z   |f   |||f<   t        j                          |t        k  r/|t        k  r&t        t
              D ]  }||||f   |||f   z  z  } t        j                           |t        k  r|t        k  r||||f<   y y y )N)shapedtyper   )r   sharedarraySM_SIZEr   tpb	threadIdxxyblockIdxblockDimrangebpgnsyncthreads)ABCsAsBtxtybxbybwbhr   r   accijs                   g/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_matmul.pycu_square_matrix_mulz6TestCudaMatMul.test_func.<locals>.cu_square_matrix_mul   s{   """@B""#s7"CB!!B!!BBBBBR"WAR"WA!*C3Zq5QU!"1b1s7l?!3Br2vJ!"2C<?!3Br2vJ  "q5QU"3Zr"a%y2ae944 (   "   1uQ!Q$ u    *   )r   gh㈵>)rtol)r   jitr   r   nprandomseedr   r   
empty_likestreamauto_synchronize	to_devicer   r   copy_to_hostdottestingassert_allclose)
selfr0   r!   r"   r#   r9   dAdBdCCanss
             r/   	test_funczTestCudaMatMul.test_func   sk   	$wq#A#v3Q3CaCI	J	 
K	> 			rHHRYY%%q!f-RZZ@HHRYY%%q!f-RZZ@MM!$$&6*B6*B6*B@ #sc3Z!?@RLOOAv& ' vva| 	

""1d"6 '&s   A9G;;HN)__name__
__module____qualname__rE    r1   r/   r   r      s    37r1   r   __main__)numpyr5   numbar   r   r   numba.cuda.testingr   r   
numba.corer   ENABLE_CUDASIMr   r   r   r   r   rF   mainrI   r1   r/   <module>rQ      sj     % % 5  
HCHC#I*57\ 57p zHMMO r1   