
    xKgy                     "   d Z ddlZddlZddlZddlZddlmZ ddlZddl	Z	ddl	m
Z
 ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ 	   e ed      d      Z!d Z#d Z$d Z% G d de
jL                        Z' G d de'      Z(e!d        Z) G d de'      Z* G d de+      Z, G d de'ejZ                        Z. G d d e'ejZ                        Z/d)d!Z0d)d"Z1d# Z2d$ Z3d%Z4d& Z5d*d'Z6d( Z7y# e"$ r d Z!Y w xY w)+z
A CUDA ND Array is recognized by checking the __cuda_memory__ attribute
on the object.  If it exists and evaluate to True, it must define shape,
strides, dtype and size attributes similar to a NumPy ndarray.
    N)c_void_p)_devicearray)devices)driver)typesconfig)to_fixed_tuple)numpy_version)
dummyarray)numpy_support)prepare_shape_strides_dtype)NumbaPerformanceWarning)warn	lru_cachec                     | S N )funcs    b/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/cudadrv/devicearray.pyr   r       s        c                     t        | dd      S )z$Check if an object is a CUDA ndarray__cuda_ndarray__F)getattrobjs    r   is_cuda_ndarrayr   $   s    3*E22r   c                      t                 fd} |dt                |dt                |dt        j                          |dt               y)z,Verify the CUDA ndarray interface for an objc                     t        |       st        |       t        t        |       |      st        | d|      y )Nz must be of type )hasattrAttributeError
isinstancer   )attrtypr   s     r   requires_attrz4verify_cuda_ndarray_interface.<locals>.requires_attr-   s>    sD! &&'#t,c2 D#!FGG 3r   shapestridesdtypesizeN)require_cuda_ndarraytuplenpr'   int)r   r$   s   ` r   verify_cuda_ndarray_interfacer-   )   s?    H '5!)U#'288$&#r   c                 0    t        |       st        d      y)z9Raises ValueError is is_cuda_ndarray(obj) evaluates Falsezrequire an cuda ndarray objectN)r   
ValueErrorr   s    r   r)   r)   9   s    39::  r   c                       e Zd ZdZdZdZddZed        ZddZ	ed        Z
ddZd	 Zed
        Zed        Zej                   dd       Zej                   dd       ZddZd Zd ZddZd Zed        Zy)DeviceNDArrayBasez$A on GPU NDArray representation
    TNc                 :   t        |t              r|f}t        |t              r|f}t        j                  |      }t	        |      | _        t	        |      | j
                  k7  rt        d      t        j                  j                  d|||j                        | _        t        |      | _        t        |      | _        || _        t        t        j                   t"        j$                  | j                  d            | _        | j&                  dkD  r|rt)        j*                  | j                  | j                  | j                  j                        | _        t/        j0                         j3                  | j,                        }nt)        j4                  |      | _        nlt(        j6                  r t(        j8                  j;                  d      }nt=        d      }t)        j>                  t/        j0                         |d      }d| _        || _         || _!        y)a5  
        Args
        ----

        shape
            array shape.
        strides
            array strides.
        dtype
            data type as np.dtype coercible object.
        stream
            cuda stream.
        gpu_data
            user provided device memory for the ndarray data buffer
        zstrides not match ndimr      N)contextpointerr(   )"r!   r,   r+   r'   lenndimr/   r   Array	from_descitemsize_dummyr*   r%   r&   	functoolsreduceoperatormulr(   _drivermemory_size_from_info
alloc_sizer   get_contextmemallocdevice_memory_sizeUSE_NV_BINDINGbindingCUdeviceptrr   MemoryPointergpu_datastream)selfr%   r&   r'   rK   rJ   nulls          r   __init__zDeviceNDArrayBase.__init__E   s     eS!HEgs#jGJ	w<499$566 &&00E716A5\
W~
	((tzz1EF	99q=")"?"?JJdjj.A.A#C"..099$//J")"<"<X"F %%2215{,,W5H5H5J59CHDO r   c                    t         j                  r%| j                  t        | j                        }n2d}n/| j                  j                  | j                  j                  }nd}t        | j                        t        |       rd nt        | j                        |df| j                  j                  | j                  dk7  rt        | j                        ddS d ddS )Nr   F   )r%   r&   datatypestrrK   version)r@   rF   device_ctypes_pointerr,   valuer*   r%   is_contiguousr&   r'   strrK   )rL   ptrs     r   __cuda_array_interface__z*DeviceNDArrayBase.__cuda_array_interface__x   s    !!))5$445))//;0066 4::&,T2tdll8K%Lzz~~*.++*:c$++&
 	

 AE
 	
r   c                 >    t        j                   |       }||_        |S )zBind a CUDA stream to this object so that all subsequent operation
        on this array defaults to the given stream.
        )copyrK   )rL   rK   clones      r   bindzDeviceNDArrayBase.bind   s     		$r   c                 "    | j                         S r   	transposerL   s    r   TzDeviceNDArrayBase.T   s    ~~r   c                 &   |r,t        |      t        t        | j                              k(  r| S | j                  dk7  rd}t        |      |8t	        |      t	        t        | j                              k7  rt        d|      ddlm}  ||       S )N   z2transposing a non-2D DeviceNDArray isn't supportedzinvalid axes list r   r_   )r*   ranger7   NotImplementedErrorsetr/   numba.cuda.kernels.transposer`   )rL   axesmsgr`   s       r   r`   zDeviceNDArrayBase.transpose   sx    E$K5tyy)9#::KYY!^FC%c**#d)s53C/D"Dt=>>>T?"r   c                 "    |s| j                   S |S r   rK   )rL   rK   s     r   _default_streamz!DeviceNDArrayBase._default_stream   s    "(t{{4f4r   c                     d| j                   v }| j                  d   r|sd}n| j                  d   r|sd}nd}t        j                  | j                        }t        j                  || j                  |      S )n
        Magic attribute expected by Numba to get the numba type that
        represents this object.
        r   C_CONTIGUOUSCF_CONTIGUOUSFA)r&   flagsr   
from_dtyper'   r   r8   r7   )rL   	broadcastlayoutr'   s       r   _numba_type_zDeviceNDArrayBase._numba_type_   sh    ( %	::n%iFZZ'	FF((4{{5$))V44r   c                     | j                   :t        j                  rt        j                  j	                  d      S t        d      S | j                   j                  S )z:Returns the ctypes pointer to the GPU data buffer
        r   )rJ   r@   rF   rG   rH   r   rT   ra   s    r   rT   z'DeviceNDArrayBase.device_ctypes_pointer   sF     == %%22155{"==666r   c                    |j                   dk(  ryt        |        | j                  |      }t        |       t        |      }}t	        j
                  |      r;t        |       t        ||       t	        j                  | || j                  |       yt        j                  ||j                  d   rdnddt        dk  r|j                  d	    nd
      }t        ||       t	        j                  | || j                  |       y)zCopy `ary` to `self`.

        If `ary` is a CUDA memory, perform a device-to-device transfer.
        Otherwise, perform a a host-to-device transfer.
        r   Nrl   rp   rq   rs   Trd   r   	WRITEABLE)ordersubokr[   )r(   sentry_contiguousrm   
array_corer@   is_device_memorycheck_array_compatibilitydevice_to_devicerB   r+   arrayru   r
   host_to_device)rL   aryrK   	self_coreary_cores        r   copy_to_devicez DeviceNDArrayBase.copy_to_device   s     88q=$%%f-(.
38	##C(c"%i:$$T3O xx&__^<c# 6) #..55/35H &i:""44??*02r   c                    t        d | j                  D              r&d}t        |j                  | j                              | j                  dk\  sJ d       | j                  |      }|0t        j                  | j                  t        j                        }nt        | |       |}| j                  dk7  r#t        j                  || | j                  |       |t| j                  dk(  r.t        j                  | j                  | j                  |      }|S t        j                  | j                  | j                  | j                  |      }|S )	a^  Copy ``self`` to ``ary`` or create a new Numpy ndarray
        if ``ary`` is ``None``.

        If a CUDA ``stream`` is given, then the transfer will be made
        asynchronously as part as the given stream.  Otherwise, the transfer is
        synchronous: the function returns after the copy is finished.

        Always returns the host array.

        Example::

            import numpy as np
            from numba import cuda

            arr = np.arange(1000)
            d_arr = cuda.to_device(arr)

            my_kernel[100, 100](d_arr)

            result_array = d_arr.copy_to_host()
        c              3   &   K   | ]	  }|d k    ywr   Nr   ).0ss     r   	<genexpr>z1DeviceNDArrayBase.copy_to_host.<locals>.<genexpr>  s     +lq1uls   z2D->H copy not implemented for negative strides: {}r   zNegative memory sizer%   r'   rl   )r%   r'   buffer)r%   r'   r&   r   )anyr&   rf   formatrB   rm   r+   emptybyter   r@   device_to_hostr(   ndarrayr%   r'   )rL   r   rK   rj   hostarys        r   copy_to_hostzDeviceNDArrayBase.copy_to_host   s   . +dll++FC%cjj&>??!#;%;;#%%f-;hhT__BGGDG%dC0G??a""7D$//*02 ;yyA~**4::TZZ,35
  **4::TZZ-1\\'Kr   c              #   T  K   | j                  |      }| j                  dk7  rt        d      | j                  d   | j                  j
                  k7  rt        d      t        t        j                  t        | j                        |z              }| j                  }| j                  j
                  }t        |      D ]d  }||z  }t        ||z   | j                        }||z
  f}	| j                  j                  ||z  ||z        }
t        |	|| j                  ||
       f yw)zSplit the array into equal partition of the `section` size.
        If the array cannot be equally divided, the last section will be
        smaller.
        r3   zonly support 1d arrayr   zonly support unit strider'   rK   rJ   N)rm   r7   r/   r&   r'   r:   r,   mathceilfloatr(   re   minrJ   viewDeviceNDArray)rL   sectionrK   nsectr&   r:   ibeginendr%   rJ   s              r   splitzDeviceNDArrayBase.split#  s     
 %%f-99>455<<?djj111788DIIeDII.89:,,::&&uAKEegotyy1C5[NE}}))%(*:C(NKHwdjj)13 3 s   D&D(c                     | j                   S )zEReturns a device memory object that is used as the argument.
        )rJ   ra   s    r   as_cuda_argzDeviceNDArrayBase.as_cuda_arg8  s     }}r   c                     t        j                         j                  | j                        }t	        | j
                  | j                  | j                        }t        ||      S )z
        Returns a *IpcArrayHandle* object that is safe to serialize and transfer
        to another process to share the local allocation.

        Note: this feature is only available on Linux.
        )r%   r&   r'   )
ipc_handle
array_desc)	r   rC   get_ipc_handlerJ   dictr%   r&   r'   IpcArrayHandle)rL   ipchdescs      r   r   z DeviceNDArrayBase.get_ipc_handle=  sH     ""$33DMMB$**dll$**M$??r   c                     | j                   j                  |      \  }}t        |j                  |j                  | j
                  | j                  |      | j                        S )a(  
        Remove axes of size one from the array shape.

        Parameters
        ----------
        axis : None or int or tuple of ints, optional
            Subset of dimensions to remove. A `ValueError` is raised if an axis
            with size greater than one is selected. If `None`, all axes with
            size one are removed.
        stream : cuda stream or 0, optional
            Default stream for the returned view of the array.

        Returns
        -------
        DeviceNDArray
            Squeezed view into the array.

        )axisr%   r&   r'   rK   rJ   )r;   squeezer   r%   r&   r'   rm   rJ   )rL   r   rK   	new_dummy_s        r   r   zDeviceNDArrayBase.squeezeH  sX    & {{***5	1//%%**''/]]
 	
r   c                    t        j                  |      }t        | j                        }t        | j                        }| j                  j
                  |j
                  k7  rp| j                         st        d      t        |d   | j                  j
                  z  |j
                        \  |d<   }|dk7  rt        d      |j
                  |d<   t        |||| j                  | j                        S )zeReturns a new object by reinterpretting the dtype without making a
        copy of the data.
        zHTo change to a dtype of a different size, the array must be C-contiguousr   zuWhen changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.r   )r+   r'   listr%   r&   r:   is_c_contiguousr/   divmodr   rK   rJ   )rL   r'   r%   r&   rems        r   r   zDeviceNDArrayBase.viewd  s     TZZ t||$::%..0'') 6 
 $b	DJJ///NE"Is
 ax 6   ..GBK;;]]
 	
r   c                 H    | j                   j                  | j                  z  S r   )r'   r:   r(   ra   s    r   nbyteszDeviceNDArrayBase.nbytes  s    
 zz""TYY..r   r   r   r   Nr   )__name__
__module____qualname____doc____cuda_memory__r   rN   propertyrY   r]   rb   r`   rm   ry   rT   r   require_contextr   r   r   r   r   r   r   r   r   r   r   r1   r1   ?   s    O1f 
 
*    
#5 5 5< 	7 	7 2 2> , ,\3*
	@
8#
J / /r   r1   c                        e Zd ZdZd fd	Zed        Zed        Zej                  d        Z
ej                  dd       ZddZej                  d        Zej                  dd	       Zdd
Z xZS )DeviceRecordz
    An on-GPU record type
    c                 <    d}d}t         t        |   |||||       y Nr   )superr   rN   )rL   r'   rK   rJ   r%   r&   	__class__s         r   rN   zDeviceRecord.__init__  s'    lD*5'5&+3	5r   c                 @    t        | j                  j                        S z
        For `numpy.ndarray` compatibility. Ideally this would return a
        `np.core.multiarray.flagsobj`, but that needs to be constructed
        with an existing `numpy.ndarray` (as the C- and F- contiguous flags
        aren't writeable).
        r   r;   ru   ra   s    r   ru   zDeviceRecord.flags       DKK%%&&r   c                 @    t        j                  | j                        S )ro   )r   rv   r'   ra   s    r   ry   zDeviceRecord._numba_type_  s     ''

33r   c                 $    | j                  |      S r   _do_getitemrL   items     r   __getitem__zDeviceRecord.__getitem__      %%r   c                 &    | j                  ||      S z0Do `__getitem__(item)` with CUDA stream
        r   rL   r   rK   s      r   getitemzDeviceRecord.getitem       f--r   c                    | j                  |      }| j                  j                  |   \  }}| j                  j	                  |      }|j
                  dk(  rY|j                  t        |||      S t        j                  d|      }t        j                  |||j                  |       |d   S t        |j
                  d |j                  d   d      \  }}}	t        |||	||      S )	Nr   r   r3   r'   dstsrcr(   rK   r   rq   r%   r&   r'   rJ   rK   )rm   r'   fieldsrJ   r   r%   namesr   r+   r   r@   r   r:   r   subdtyper   )
rL   r   rK   r#   offsetnewdatar   r%   r&   r'   s
             r   r   zDeviceRecord._do_getitem  s    %%f-jj''-V--$$V,99?yy$##f-46 6 ((1C0&&7,/LL.46 1: ,CII,0,/LLOSB "E7E !ug',w(.0 0r   c                 &    | j                  ||      S r   _do_setitemrL   keyrU   s      r   __setitem__zDeviceRecord.__setitem__      U++r   c                 *    | j                  |||      S z6Do `__setitem__(key, value)` with CUDA stream
        rl   r   rL   r   rU   rK   s       r   setitemzDeviceRecord.setitem       U6::r   c                    | j                  |      }| }|r$t        j                         }|j                         }| j                  j
                  |   \  }}| j                  j                  |      } t        |       |||      }	t        |	j                  j                  |      |      \  }
}t        j                  |	|
|
j                  j                  |       |r|j                          y y )Nr   rl   )rm   r   rC   get_default_streamr'   r   rJ   r   typeauto_devicer@   r   r:   synchronize)rL   r   rU   rK   synchronousctxr#   r   r   lhsrhsr   s               r   r   zDeviceRecord._do_setitem  s    %%f-
 !j%%'C++-F jj'',V--$$V,d4js6GD SYY^^E26BQ 	  c399+=+=vF  r   r   r   )r   r   r   r   rN   r   ru   ry   r   r   r   r   r   r   r   r   __classcell__)r   s   @r   r   r     s    5 ' ' 4 4 & & . .
00 , , ; ;
!r   r   c                 r     ddl m  dk(  rj                  d        }|S j                   fd       }|S )z
    A separate method so we don't need to compile code every assignment (!).

    :param ndim: We need to have static array sizes for cuda.local.array, so
        bake in the number of dimensions into the kernel
    r   )cudac                     |d   | d<   y r   r   )r   r   s     r   kernelz_assign_kernel.<locals>.kernel  s    "gCGr   c                    j                  d      }d}t        | j                        D ]  }|| j                  |   z  } ||k\  ry j                  j                  dft        j                        }t        dz
  dd      D ]U  }|| j                  |   z  |d|f<   || j                  |   z  |j                  |   dkD  z  |d|f<   || j                  |   z  }W |t        |d            | t        |d         <   y )Nr3   rd   r   r   r   )	gridre   r7   r%   localr   r   int64r	   )r   r   location
n_elementsr   idxr  r7   s         r   r  z_assign_kernel.<locals>.kernel	  s   99Q<
sxxA#))A,&J !z!  jjd)++   taxR(A 399Q</C1I!CIIaL0SYYq\A5EFC1I1%H )
 -0s1vt0L,MN3q64()r   )numbar  jit)r7   r  r  s   ` @r   _assign_kernelr    sI     qy		 
		XXN N. Mr   c                       e Zd ZdZd Zed        Zd ZddZd Z	d Z
dd	Zej                  d
        Zej                  dd       ZddZej                  d        Zej                  dd       ZddZy)r   z
    An on-GPU array type
    c                 .    | j                   j                  S )zA
        Return true if the array is Fortran-contiguous.
        )r;   is_f_contigra   s    r   is_f_contiguouszDeviceNDArray.is_f_contiguous(       {{&&&r   c                 @    t        | j                  j                        S r   r   ra   s    r   ru   zDeviceNDArray.flags.  r   r   c                 .    | j                   j                  S )z;
        Return true if the array is C-contiguous.
        )r;   is_c_contigra   s    r   r   zDeviceNDArray.is_c_contiguous8  r  r   Nc                     |r| j                         j                  |      S | j                         j                         S )zE
        :return: an `numpy.ndarray`, so copies to the host.
        )r   	__array__)rL   r'   s     r   r  zDeviceNDArray.__array__>  s9     $$&0077$$&0022r   c                      | j                   d   S r   )r%   ra   s    r   __len__zDeviceNDArray.__len__G  s    zz!}r   c                    t        |      dk(  rt        |d   t        t        f      r|d   }t	        |       }|| j
                  k(  r4 || j
                  | j                  | j                  | j                        S  | j                  j                  |i |\  }}|| j                  j                  gk(  r4 ||j
                  |j                  | j                  | j                        S t        d      )z
        Reshape the array without changing its contents, similarly to
        :meth:`numpy.ndarray.reshape`. Example::

            d_arr = d_arr.reshape(20, 50, order='F')
        r3   r   )r%   r&   r'   rJ   operation requires copying)r6   r!   r*   r   r   r%   r&   r'   rJ   r;   reshapeextentrf   )rL   newshapekwsclsnewarrextentss         r   r  zDeviceNDArray.reshapeJ  s     x=A*Xa[5$-"H{H4jtzz!TZZ!ZZ$--A A .$++--x?3?t{{))**V\\6>>!ZZ$--A A &&BCCr   c                 ,   | j                  |      }t        |       }| j                  j                  |      \  }}|| j                  j                  gk(  r5 ||j
                  |j                  | j                  | j                  |      S t        d      )z
        Flattens a contiguous array without changing its contents, similar to
        :meth:`numpy.ndarray.ravel`. If the array is not contiguous, raises an
        exception.
        )r~   r   r  )
rm   r   r;   ravelr  r%   r&   r'   rJ   rf   )rL   r~   rK   r"  r#  r$  s         r   r&  zDeviceNDArray.ravelb  s     %%f-4j++++%+8t{{))**V\\6>>!ZZ$--$& &
 &&BCCr   c                 $    | j                  |      S r   r   r   s     r   r   zDeviceNDArray.__getitem__t  r   r   c                 &    | j                  ||      S r   r   r   s      r   r   zDeviceNDArray.getitemx  r   r   c                    | j                  |      }| j                  j                  |      }t        |j	                               }t        |       }t        |      dk(  r | j                  j                  |d    }|j                  s| j                  j                  t        | j                  ||      S t        j                  d| j                        }t        j                   ||| j                  j"                  |       |d   S  ||j$                  |j&                  | j                  ||      S  | j                  j                  |j(                   } ||j$                  |j&                  | j                  ||      S )Nr3   r   r   r   r   r   )rm   r;   r   r   iter_contiguous_extentr   r6   rJ   r   is_arrayr'   r   r   r+   r   r@   r   r:   r%   r&   r  )rL   r   rK   arrr$  r"  r   r   s           r   r   zDeviceNDArray._do_getitem~  sC   %%f-kk%%d+s11344jw<1(dmm(('!*5G<<::##/'djj18: : !hhq

;G**wG040D0D28: qz!CKK!%gfN N )dmm((#**5GSYY!ZZ'&J Jr   c                 &    | j                  ||      S r   r   r   s      r   r   zDeviceNDArray.__setitem__  r   r   c                 *    | j                  |||      S r   r   r   s       r   r   zDeviceNDArray.setitem  r   r   c                    | j                  |      }| }|r$t        j                         }|j                         }| j                  j                  |      } | j                  j                  |j                   }t        |t        j                        rd}d}	n|j                  }|j                  }	 t        |       ||	| j                  ||      }
t!        ||d      \  }}|j"                  |
j"                  kD  r&t%        d|j"                  d|
j"                  d      t'        j(                  |
j"                  t&        j*                        }|j                  ||
j"                  |j"                  z
  d   |j,                  | }t/        t1        |
j                  |j                              D ]$  \  }\  }}|d	k7  s||k7  st%        d
|||fz         t3        j4                  t6        j8                  |
j                  d	      } t;        |
j"                        j=                  ||      |
|       |r|j?                          y y )Nr   r   T)rK   user_explicitzCan't assign z-D array to z-D selfr   r3   zCCan't copy sequence with size %d to array axis %d with dimension %drl   ) rm   r   rC   r   r;   r   rJ   r   r  r!   r   Elementr%   r&   r   r'   r   r7   r/   r+   onesr	  r  	enumeratezipr<   r=   r>   r?   r  forallr   )rL   r   rU   rK   r   r   r,  r   r%   r&   r   r   r   	rhs_shaper   lrr  s                     r   r   zDeviceNDArray._do_setitem  s   %%f-
 !j%%'C++-F kk%%c*$$--$$cjj1c:--.EGIIEkkGd4j** U6FQ88chh   GGCHHBHH5	*-))	#((SXX%&'ckk9%"3syy#))#<=IAv1Av!q&  "=ABAqz"J K K > %%hllCIIqA
Bsxx ''
6'B3L  r   r   )rq   r   r   )r   r   r   r   r  r   ru   r   r  r  r  r&  r   r   r   r   r   r   r   r   r   r   r   r   r   $  s    ' ' ''3D0D$ & & . .
J: , , ; ;
5!r   r   c                   .    e Zd ZdZd Zd Zd Zd Zd Zy)r   a"  
    An IPC array handle that can be serialized and transfer to another process
    in the same machine for share a GPU allocation.

    On the destination process, use the *.open()* method to creates a new
    *DeviceNDArray* object that shares the allocation from the original process.
    To release the resources, call the *.close()* method.  After that, the
    destination can no longer use the shared array object.  (Note: the
    underlying weakref to the resource is now dead.)

    This object implements the context-manager interface that calls the
    *.open()* and *.close()* method automatically::

        with the_ipc_array_handle as ipc_array:
            # use ipc_array here as a normal gpu array object
            some_code(ipc_array)
        # ipc_array is dead at this point
    c                      || _         || _        y r   )_array_desc_ipc_handle)rL   r   r   s      r   rN   zIpcArrayHandle.__init__  s    %%r   c                     | j                   j                  t        j                               }t	        dd|i| j
                  S )z
        Returns a new *DeviceNDArray* that shares the allocation from the
        original process.  Must not be used on the original process.
        rJ   r   )r<  openr   rC   r   r;  )rL   dptrs     r   r>  zIpcArrayHandle.open  s<    
 $$W%8%8%:;?d?d.>.>??r   c                 8    | j                   j                          y)z5
        Closes the IPC handle to the array.
        N)r<  closera   s    r   rA  zIpcArrayHandle.close  s     	 r   c                 "    | j                         S r   )r>  ra   s    r   	__enter__zIpcArrayHandle.__enter__  s    yy{r   c                 $    | j                          y r   )rA  )rL   r   rU   	tracebacks       r   __exit__zIpcArrayHandle.__exit__  s    

r   N)	r   r   r   r   rN   r>  rA  rC  rF  r   r   r   r   r     s!    $&@!r   r   c                       e Zd ZdZddZy)MappedNDArrayz4
    A host array that uses CUDA mapped memory.
    c                      || _         || _        y r   rJ   rK   rL   rJ   rK   s      r   device_setupzMappedNDArray.device_setup       r   Nr   r   r   r   r   rL  r   r   r   rH  rH  	      r   rH  c                       e Zd ZdZddZy)ManagedNDArrayz5
    A host array that uses CUDA managed memory.
    c                      || _         || _        y r   rJ  rK  s      r   rL  zManagedNDArray.device_setup  rM  r   Nr   rN  r   r   r   rQ  rQ    rO  r   rQ  c                 ^    t        | j                  | j                  | j                  ||      S )z/Create a DeviceNDArray object that is like ary.rK   rJ   )r   r%   r&   r'   )r   rK   rJ   s      r   from_array_likerU    s&    CKK6"*, ,r   c                 2    t        | j                  ||      S )z.Create a DeviceRecord object that is like rec.rT  )r   r'   )recrK   rJ   s      r   from_record_likerX  #  s    		&8DDr   c                     | j                   r| j                  s| S g }| j                   D ]#  }|j                  |dk(  rdn
t        d             % | t	        |         S )aG  
    Extract the repeated core of a broadcast array.

    Broadcast arrays are by definition non-contiguous due to repeated
    dimensions, i.e., dimensions with stride 0. In order to ascertain memory
    contiguity and copy the underlying data from such arrays, we must create
    a view without the repeated dimensions.

    r   N)r&   r(   appendslicer*   )r   
core_indexstrides      r   r   r   (  sS     ;;chh
J++v{!d< uZ !!r   c                     | j                   j                  }t        t        | j                        t        | j
                              D ]  \  }}|dkD  s|dk7  s||k7  r y||z  } y)z
    Returns True iff `ary` is C-style contiguous while ignoring
    broadcasted and 1-sized dimensions.
    As opposed to array_core(), it does not call require_context(),
    which can be quite expensive.
    r3   r   FT)r'   r:   r4  reversedr%   r&   )r   r(   r%   r]  s       r   rV   rV   :  s`     99DXcii0(3;;2GHv191v~EMD	 I
 r   zArray contains non-contiguous buffer and cannot be transferred as a single memory region. Please ensure contiguous buffer with numpy .ascontiguousarray()c                 v    t        |       }|j                  d   s|j                  d   st        t              y y )Nrp   rr   )r   ru   r/   errmsg_contiguous_buffer)r   cores     r   r   r   P  s7    c?D::n%djj.H122 /I%r   c                 *   t        j                  |       r| dfS t        | d      r!t        j                  j                  |       dfS t        | t        j                        rt        | |      }n;t        j                  | t        dk  rdndd      } t        |        t        | |      }|ret        j                  rB|s@t        | t               s0t        | t        j"                        rd}t%        t'        |             |j)                  | |       |dfS )	z
    Create a DeviceRecord or DeviceArray like obj and optionally copy data from
    host to device. If obj already represents device memory, it is returned and
    no copy is made.
    FrY   rl   r|   NT)r[   r   zGHost array used in CUDA kernel will incur copy overhead to/from device.)r@   r   r   r  r  as_cuda_arrayr!   r+   voidrX  r   r
   r   rU  r   CUDA_WARN_ON_IMPLICIT_COPYr   r   r   r   r   )r   rK   r[   r0  devobjrj   s         r   r   r   V  s     $Ez	0	1zz'',e33c277#%c&9F ((+f4U$C c"$S8F00%#C7#C4;C056!!#f!5t|r   c                    | j                         |j                         }}| j                  |j                  k7  r%t        d| j                  d|j                        |j                  |j                  k7  r%t	        d| j                  d|j                        | j
                  r?|j                  |j                  k7  r%t	        d| j                  d|j                        y y )Nzincompatible dtype: z vs. zincompatible shape: zincompatible strides: )r   r'   	TypeErrorr%   r/   r(   r&   )ary1ary2ary1sqary2sqs       r   r   r   }  s    \\^T\\^FFzzTZZTZZ1 2 	2||v||#**djj2 3 	3 yyV^^v~~5,,6 7 	7 6yr   r   )r   TF)8r   r   r<   r>   r[   ctypesr   numpyr+   r  r   numba.cuda.cudadrvr   r   r@   
numba.corer   r   numba.np.unsafe.ndarrayr	   numba.np.numpy_supportr
   
numba.miscr   numba.npr   numba.cuda.api_utilr   numba.core.errorsr   warningsr   r   r   r    r   r-   r)   DeviceArrayr1   r   r  r   objectr   r   rH  rQ  rU  rX  r   rV   ra  r   r   r   r   r   r   <module>r{     s*           & 0 $ 2 0 ! " ; 5 /	;/5I3
 ;O/00 O/d
d!$ d!N ( (Vv!% v!r)V )X%rzz &

 ,E
"$ 3 3$N7  s   &D DD