
    xKgA                     f   d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d
dlmZ d dlmZmZmZ d dl m!Z!  G d dej&                        Z" e jF                  de jH                        Z% G d de      Z& G d de      Z' G d de      Z(y)    N)cached_property)ir)cgutilsconfig	debuginfoitanium_manglertypestypingutils)
Dispatcher)BaseContext)BaseCallConvMinimalCallConv)	cmathdecl)	datamodel   )nvvm)codegen	nvvmutilsufuncs)cuda_data_managerc                   $     e Zd Zd Z fdZ xZS )CUDATypingContextc                    ddl m}m}m}m} ddlm}m} | j                  |j                         | j                  |j                         | j                  |j                         | j                  t        j                         | j                  |j                         | j                  |j                         | j                  |j                         y )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl
cffi_utils) r   r   r   r   numba.core.typingr   r    install_registryregistryr   typing_registry)selfr   r   r   r   r   r    s          U/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/numba/cuda/target.pyload_additional_registriesz,CUDATypingContext.load_additional_registries   s    EE:h//0j112h//0i001m445h//0l::;    c                    ddl m} t        |t              rt        ||      s	 |j                  }t        t        | ;  |      S # t
        $ r |j                  st        d      |j                  j                         }d|d<   |j                  dd      |d<   |j                  dd      |d<    ||j                  |      }||_        |}Y w xY w)	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTdevicedebugFopt)numba.cuda.dispatcherr+   
isinstancer   _CUDATypingContext__dispatcherAttributeError_can_compile
ValueErrortargetoptionscopygetpy_funcsuperr   resolve_value_type)r&   valr+   r5   disp	__class__s        r'   r:   z$CUDATypingContext.resolve_value_type#   s    8sJ'3/&&  &@EE " ''$ &G H H # 1 1 6 6 8*.h')6):):7E)Jg&'4'8'8'Ee$%ckk=A $( s   A BCC)__name__
__module____qualname__r(   r:   __classcell__r=   s   @r'   r   r      s    
<F Fr)   r   z	[^a-z0-9]c                        e Zd ZdZdZd fd	Zed        Zed        Zd Z	d Z
d Zd Zed	        Zed
        Zed        ZddddZ	 ddZd Zd Zd Zd Zd Zd Z xZS )CUDATargetContextTc                 v    t         |   ||       t        j                  t        j
                        | _        y N)r9   __init__r   chainr   default_managerdata_model_manager)r&   	typingctxtargetr=   s      r'   rG   zCUDATargetContext.__init__G   s.    F+"3"9"9%%#
r)   c                 "    t         j                  S rF   )r   	DIBuilderr&   s    r'   rN   zCUDATargetContext.DIBuilderM   s    """r)   c                      y)NF rO   s    r'   enable_boundscheckz$CUDATargetContext.enable_boundscheckQ   s     r)   c                 8    | j                   j                  |      S rF   )_internal_codegen_create_empty_module)r&   names     r'   create_modulezCUDATargetContext.create_moduleW   s    %%::4@@r)   c                 F    t        j                  d      | _        d | _        y )Nznumba.cuda.jit)r   JITCUDACodegenrT   _target_datarO   s    r'   initzCUDATargetContext.initZ   s    !(!7!78H!I r)   c                    ddl m}m}m} ddl m}m}m} ddl m}m} ddl m	}	 ddl
m}
 ddlm} ddlm} d	d
lm}m}m}m}m} ddlm} | j/                  |j0                         | j/                  |
j0                         | j/                  |j0                         | j/                  |j0                         | j/                  |	j0                         | j/                  |j0                         | j/                  |j2                         y )Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)cffiimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )ndarray)numba.cpythonr]   r^   r_   r`   ra   rb   rc   rd   re   
numba.miscrf   numba.nprg   rh   r!   ri   rj   rk   rl   r   numba.np.unsaferm   r#   r$   impl_registry)r&   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   r   rm   s                      r'   r(   z,CUDATargetContext.load_additional_registries^   s     	=<??2+'%'	
 	
 	,h//0h//0i001m445i001h//0l889r)   c                     | j                   S rF   )rT   rO   s    r'   r   zCUDATargetContext.codegenv   s    %%%r)   c                     | j                   6t        j                  t        j                         j
                        | _         | j                   S rF   )rZ   llcreate_target_datar   NVVMdata_layoutrO   s    r'   target_datazCUDATargetContext.target_datay   s9    $ " 5 5diik6M6M ND   r)   c                 |    ddl m} d}t        |D cg c]  }t        j                  |      |f c}      }|S c c}w )z
        Some CUDA intrinsics are at the module level, but cannot be treated as
        constants, because they are loaded from a special register in the PTX.
        These include threadIdx, blockDim, etc.
        r   cuda)	threadIdxblockDimblockIdxgridDimlaneidwarpsize)numbar|   tupler	   Module)r&   r|   	nonconstsncnonconsts_with_mods        r'   nonconst_module_attrsz'CUDATargetContext.nonconst_module_attrs   sM     	!	".7$9.7 &+\\$%7$<.7$9 :!!$9s   9c                     t        |       S rF   )CUDACallConvrO   s    r'   	call_convzCUDATargetContext.call_conv   s    D!!r)   rQ   Nabi_tagsuidc                4    t        j                  ||||      S )Nr   )r   mangle)r&   rV   argtypesr   r   s        r'   manglerzCUDATargetContext.mangler   s    %%dHx*-/ 	/r)   c	           	          t        j                  |j                  d      }	| j                         j	                  |j
                   d|	||      }
|
j                  |       | j                  |
||	||||      }|
|fS )a  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        fndesc:        The FunctionDescriptor of the source function.
        debug:         Whether to compile with debug.
        lineinfo:      Whether to emit line info.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        filename:      The source filename that the function is contained in.
        linenum:       The source line that the function is on.
        max_registers: The max_registers argument for the code library.
        cudapyns_kernel_)
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   create_libraryrV   add_linking_librarygenerate_kernel_wrapper)r&   codelibfndescr-   lineinfor   filenamelinenumr   kernel_namelibrarywrappers               r'   prepare_cuda_kernelz%CUDATargetContext.prepare_cuda_kernel   s    . &77!!h
 ,,.//7<<.0I;F=I>K 0 M 	##G,..w/4h/68 r)   c                   !" |j                   }| j                  |      }	t        |	j                        }
t	        j
                  t	        j                         |
      }| j                  d      "t	        j
                  t	        j                  d      | j                  j                  t        j                        g|
z         }t	        j                  "||j                        }t        j                   |j"                  d      }t	        j                  "||      !t	        j$                  !j'                  d            }|s|rO|xr | }| j)                  "|| |      } |j*                  !||j,                  ||        |j.                  ||       !"fd} |d      }g }g }d	D ]6  }|j1                   |d
|z               |j1                   |d|z               8 |	j3                  |!j,                        }| j                  j5                  ||t        j6                  ||      \  }}|ret9        j:                  ||j<                        5  |j?                          ddd       |jA                  |jC                  |jD                              5  t	        jF                  |jH                  jJ                  d      }|jM                  |||jN                  dd      }|jQ                  |d      }tS        jT                  |      }|jA                  |      5  tW        d	|      D ](  \  }}|jY                  |      } |j[                  | |       * tW        d	|      D ](  \  }}|j]                  |      } |j[                  | |       * 	 ddd       ddd       |j?                          t_        j`                  !       |jc                  "       |s|r jd                          |je                          tf        jh                  rtk        jl                  |"       |jo                  !j"                        S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped is described by ``fndesc``.
        The wrapper function is returned.
        zcuda.kernel.wrapper    r   r   r!   )modulefilepathcgctxdirectives_onlyc                     j                   | z   }t        j                  t        j                  d      |      }t        j
                  |j                  j                  d       |_        |S )Nr   )	rV   r   add_global_variabler   IntTypeConstanttypepointeeinitializer)postfixrV   gvwrapfnwrapper_modules      r'   define_error_gvzBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gv   sO    ;;(D,,^RZZ^-13B[[$?BNIr)   __errcode__xyzz	__tid%s__z__ctaid%s__N	monotonicr   )8r   get_arg_packerlistargument_typesr   FunctionTypeVoidTyperW   r   r   get_return_typer	   pyobjectFunctionr   r   r   rV   	IRBuilderappend_basic_blockrN   mark_subprogramargsmark_locationappendfrom_argumentscall_functionvoidr   	if_likelyis_okret_voidif_thennot_is_python_excr   r   r   cmpxchgcodeextract_valuer   SRegBuilderziptidstorectaidr   set_cuda_kerneladd_ir_modulefinalizer   	DUMP_LLVMr   	dump_llvmget_function)#r&   r   r   r   r-   r   r   r   r   arginfoargtyswrapfntyfntyfuncprefixedbuilderr   r   r   gv_excgv_tidgv_ctaidicallargsstatus_oldxchgchangedsregdimptrr;   r   r   s#                                    @@r'   r   z)CUDATargetContext.generate_kernel_wrapper   s    ??%%h/g,,-??2;;=&9++,ABrzz"~ $ > >u~~ NO!' () {{>41F1FG"44TYY8L^Xx@,,v88<=H&4u9On08-17F ' HI &I%%V[[(G $I##GW5	 !/AMM/+/:;OOOMA,=>?  ))'6;;?NN00T5::x;	 ""7FLL9  " : f.B.B!CDkk&++"5"5t<
 vsFKK'2KA!//a8 !,,W5__W-%(%7	S"hhsmc3/ &8 &)%9	S"jjoc3/ &: . E, 	V$n-HI OOFN3##FKK00I :9  .- EDs2   P/BQA/P<Q/P9<Q	QQc           	         |j                   }t        |j                  d            D cg c]"  }| j                  t        j
                  |      $ }}t        j                  t        j                  d      t        |            }t        j                  ||      }t        j                  }	t        j                  ||j                  d|	      }
d|
_        d|
_        ||
_        | j'                  |j(                        }| j+                  |      }d|d	z
  j-                         z  |
_        t        j0                  t        j                  d            }|j3                  |
|d
      } | j5                  |      | |      }|j6                  D cg c]"  }| j                  t        j8                  |      $ }}|j:                  D cg c]"  }| j                  t        j8                  |      $ }}| j=                  ||j?                  ||j@                  j                        |||jB                  |jD                  d       |jG                         S c c}w c c}w c c}w )i
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        A)order   _cudapy_cmem	addrspaceinternalT   r   genericN)datashapestridesitemsizeparentmeminfo)$r   itertobytesget_constantr	   byter   	ArrayTyper   lenr   r   ADDRSPACE_CONSTANTr   r   r   linkageglobal_constantr   get_data_typedtypeget_abi_sizeof
bit_lengthalignPointerTypeaddrspacecast
make_arrayr  intpr  populate_arraybitcastr  r  r  	_getvalue)r&   r   arytyarrlmodr   	constvals
constarytyconstaryr   r   lldtyper  ptrtygenptraryskshapekstridess                      r'   make_constant_arrayz%CUDATargetContext.make_constant_array  s    ~~ #++C+01
1 ejj!,1 	 
 \\"**Q-Y@
;;z95++	((x}}n3<>
!! $$U[[1##G,..00 rzz!}-&&r5)< %dooe$T73<?IIFIq$##EJJ2IF>AkkJkD%%ejj!4kJCgoofchhmm&L"($,%(\\#**$(	 	 	* }}C
2 GJs   'I'I'Ic                    t        j                  |j                  d      dz         }dj                  dt	        j
                  |      g      }|j                  j                  |      }|Ft        j                  ||j                  |t        j                        }d|_        d|_        ||_        |j                  j                  j                   }|j#                  |j%                  t        j                              S )r   zutf-8    $__conststring__r   r   T)r   make_bytearrayencodejoinr   mangle_identifierglobalsr7   r   r   r   r  r  r  r   r   elementr  
as_pointer)r&   modstringtextrV   r   chartys          r'   insert_const_stringz%CUDATargetContext.insert_const_string@  s    
 %%fmmG&<w&FGxx*(::6BD E [[__T":,,S$))T7;7N7NPB#BJ!%B!BN ((zz&++D,C,CDEEr)   c                     |j                   }| j                  ||      }t        j                  t        j                  d            }|j                  ||d      S )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        r   r  )r   r;  r   r  r   r  )r&   r   r8  r   r   	charptrtys         r'   insert_string_const_addrspacez/CUDATargetContext.insert_string_const_addrspaceV  sJ     ~~%%dF3NN2::a=1	$$RI>>r)   c                      y)zRun O1 function passes
        NrQ   r&   r   s     r'   optimize_functionz#CUDATargetContext.optimize_functionb  s     	r)   c                 ,    t        j                  |      S rF   )r   get_ufunc_info)r&   	ufunc_keys     r'   rC  z CUDATargetContext.get_ufunc_infoo  s    $$Y//r)   r{   rF   )r>   r?   r@   implement_powi_as_math_callstrict_alignmentrG   propertyrN   rR   rW   r[   r(   r   ry   r   r   r   r   r   r   r+  r;  r>  rA  rC  rA   rB   s   @r'   rD   rD   C   s    "&
 # #  
A!:0& ! !
 " " " " 35$ / +/" HZ1x)VF,
?0r)   rD   c                       e Zd Zy)r   N)r>   r?   r@   rQ   r)   r'   r   r   s  s    r)   r   c                   N    e Zd ZdZd Zd Z	 	 ddZd Zd ZddZ	d	 Z
d
 Zd Zy)CUDACABICallConvz
    Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
    function signature is:

        <Python return type> (<Python arguments>)

    Exceptions are unsupported in this convention.
    c                      y rF   rQ   )r&   r   s     r'   _make_call_helperz"CUDACABICallConv._make_call_helper  s     r)   c                 $    |j                  |      S rF   )ret)r&   r   retvals      r'   return_valuezCUDACABICallConv.return_value  s    {{6""r)   Nc                     d}t        |      )Nz7Python exceptions are unsupported in the CUDA C/C++ ABINotImplementedError)r&   r   excexc_argsloc	func_namemsgs          r'   return_user_excz CUDACABICallConv.return_user_exc  s    G!#&&r)   c                     d}t        |      )Nz2Return status is unsupported in the CUDA C/C++ ABIrR  )r&   r   r   rX  s       r'   return_status_propagatez(CUDACABICallConv.return_status_propagate  s    B!#&&r)   c                     | j                  |      }t        |j                        }t        j                  | j                  |      |      }|S )zM
        Get the LLVM IR Function type for *restype* and *argtypes*.
        )_get_arg_packerr   r   r   r   r   )r&   restyper   r   r   s        r'   get_function_typez"CUDACABICallConv.get_function_type  sD     &&x0../t33G<hGr)   c                     |rJ | j                  |      }|j                  | j                  |      |D cg c]  }d|z   	 c}       yc c}w )zA
        Set names and attributes of function arguments.
        zarg.N)r]  assign_namesget_arguments)r&   fnr   fe_argtypesnoaliasr   as          r'   decorate_functionz"CUDACABICallConv.decorate_function  sP     {&&{3T//3267$Qfqj$7	97s   A	
c                     |j                   S )z@
        Get the Python-level arguments of LLVM *func*.
        )r   r@  s     r'   rb  zCUDACABICallConv.get_arguments  s     yyr)   c                     | j                  |      }|j                  ||      }|j                  ||      }d}	| j                  j	                  |||      }
|	|
fS )z3
        Call the Numba-compiled *callee*.
        N)r]  as_argumentscallcontextget_returned_value)r&   r   calleerestyr   r   r   realargsr   r   outs              r'   r   zCUDACABICallConv.call_function  s_     &&v.''6||FH- ll--gudCs{r)   c                 P    | j                   j                  |   j                         S rF   )rl  rJ   r   )r&   tys     r'   r   z CUDACABICallConv.get_return_type  s     ||..r2BBDDr)   )NNN)F)r>   r?   r@   __doc__rL  rP  rY  r[  r_  rg  rb  r   r   rQ   r)   r'   rJ  rJ  w  s?    
# @D"&'
'9Er)   rJ  ))re	functoolsr   llvmlite.bindingbindingru   llvmliter   
numba.corer   r   r   r   r	   r
   r   numba.core.dispatcherr   numba.core.baser   numba.core.callconvr   r   r"   r   r   cudadrvr   
numba.cudar   r   r   numba.cuda.modelsr   r   compileIVALID_CHARSrD   r   rJ  rQ   r)   r'   <module>r     s    	 %  ' ' ' , ' = '    1 1 /$F** $FT bjjrtt,m0 m0`		? 	AE| AEr)   