
    tKg?                         d Z g dZddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z ddlmZmZmZmZ  eg d	      dd
fdZddZ eg d	      dfdZ	 	 ddZg d	dfdZg d	ddfdZddZddZddZddZy)zB
Additional statistics functions with support for masked arrays.

)
compare_medians_mshdquantileshdmedianhdquantiles_sdidealfourthsmedian_cihsmjcimquantiles_cimjrshtrimmed_mean_ci    N)float64ndarray)MaskedArray   )_mstats_basic)normbetatbinom)g      ?      ?g      ?Fc                 v   d }t        j                  | dt              } t        j                  t        j
                  |            }|| j                  dk(  r || ||      }n@| j                  dkD  rt        d| j                  z        t        j                  ||| ||      }t        j                  |d      S )a$  
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of probabilities at which to compute the quantiles.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    See Also
    --------
    hdquantiles_sd

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.stats.mstats import hdquantiles
    >>>
    >>> # Sample data
    >>> data = np.array([1.2, 2.5, 3.7, 4.0, 5.1, 6.3, 7.0, 8.2, 9.4])
    >>>
    >>> # Probabilities at which to compute quantiles
    >>> probabilities = [0.25, 0.5, 0.75]
    >>>
    >>> # Compute Harrell-Davis quantile estimates
    >>> quantile_estimates = hdquantiles(data, prob=probabilities)
    >>>
    >>> # Display the quantile estimates
    >>> for i, quantile in enumerate(probabilities):
    ...     print(f"{int(quantile * 100)}th percentile: {quantile_estimates[i]}")
    25th percentile: 3.1505820231763066 # may vary
    50th percentile: 5.194344084883956
    75th percentile: 7.430626414674935

    c                    t        j                  t        j                  | j                         j	                  t
                          }|j                  }t        j                  dt        |      ft              }|dk  rt         j                  |_        |r|S |d   S t        j                  |dz         t        |      z  }t        j                  }t!        |      D ]g  \  }}	 |||dz   |	z  |dz   d|	z
  z        }
|
dd |
dd z
  }t        j"                  ||      }||d|f<   t        j"                  |||z
  dz        |d|f<   i |d   |d|dk(  f<   |d   |d|dk(  f<   |r$t         j                  x|d|dk(  f<   |d|dk(  f<   |S |d   S )zGComputes the HD quantiles for a 1D array. Returns nan for invalid data.   r   r   N)npsqueezesort
compressedviewr   sizeemptylenr   nanflatarangefloatr   cdf	enumeratedot)dataprobvarxsortednhdvbetacdfip_wwhd_means                ^/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/scipy/stats/_mstats_extras.py_hd_1Dzhdquantiles.<locals>._hd_1DP   s   **RWWT__%6%;%;G%DEFLLXXqTmW-q5ffBG	a5LIIacNU1X%((t_EQqQqS!GacAaC[1B12CR AffQ(GBqsGffQ1 45BqsG % #1:1dai<"2;1dai<24&&8Bq$!)|r!TQY,/I!u    Fcopydtyper   r   DArray 'data' must be at most two dimensional, but got data.ndim = %dr;   )
maarrayr   r   
atleast_1dasarrayndim
ValueErrorapply_along_axisfix_invalid)r*   r+   axisr,   r8   r3   results          r7   r   r      s    h< 88DuG4D
bjj&'A$))q.a%99q= 68<		B C C$$VT4C@>>&u--r9   c                 B    t        | dg||      }|j                         S )a9  
    Returns the Harrell-Davis estimate of the median along the given axis.

    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdmedian : MaskedArray
        The median values.  If ``var=True``, the variance is returned inside
        the masked array.  E.g. for a 1-D array the shape change from (1,) to
        (2,).

    r   )rG   r,   )r   r   )r*   rG   r,   rH   s       r7   r   r   |   s#    , se$C8F>>r9   c                 p   d }t        j                  | dt              } t        j                  t        j
                  |            }|
 || |      }n?| j                  dkD  rt        d| j                  z        t        j                  ||| |      }t        j                  |d      j                         S )a  
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    c                    t        j                  | j                               }t        |      }t        j                  t        |      t
              }|dk  rt         j                  |_        t        j                  |      t        |dz
        z  }t        j                  }t        |      D ]  \  }} ||||z  |d|z
  z        }	|	dd |	dd z
  }
t        j                  |      }t        j                  |
|dd z        |dd |ddxxx t        j                  |
ddd   |ddd   z        ddd   z  ccc t        j                  |j!                         |dz
  z        ||<    |S )z%Computes the std error for 1D arrays.r   r   Nr   r   )r   r   r   r"   r!   r   r#   r$   r%   r&   r   r'   r(   
zeros_likecumsumsqrtr,   )r*   r+   r-   r.   hdsdvvr1   r2   r3   r4   r5   mx_s               r7   _hdsd_1Dz hdquantiles_sd.<locals>._hdsd_1D   s;   ''$//+,LxxD	7+q5DIYYq\E!A#J&((t_EQqQqS!QqS'*B12CR A --(CiiGCRL 01CGH		!DbD'GEQrEN":;DbDAAHggcggi1q512DG % r9   Fr:   r   r=   r>   )r?   r@   r   r   rA   rB   rC   rD   rE   rF   ravel)r*   r+   rG   rR   r3   rH   s         r7   r   r      s    02 88DuG4D
bjj&'A$"99q= 68<		B C C$$XtT1=>>&u-3355r9   皙?c                 \   t        j                  | d      } t        j                  | |||      }|j	                  |      }t        j
                  | |||      }|j                  |      dz
  }t        j                  d|dz  z
  |      }	t        j                  ||	|z  z
  ||	|z  z   f      S )a  
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    Fr>   )limits	inclusiverG   r          @)
r?   r@   mstatstrimrmeantrimmed_stdecountr   ppfr   )
r*   rV   rW   alpharG   trimmedtmeantstdedftppfs
             r7   r   r      s    T 88Du%Dll4)$OGLLEFYDQE	t	q	 B5558BD88UT%Z'tEz)9:;;r9   c                    d }t        j                  | d      } | j                  dkD  rt        d| j                  z        t	        j
                  t	        j                  |            }|	 || |      S t        j                  ||| |      S )a  
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    c                 |   t        j                  | j                               } | j                  }t        j                  |      |z  dz   j                  t              }t        j                  }t        j                  t        |      t              }t        j                  d|dz   t              |z  }|d|z  z
  }t        |      D ]s  \  }}	 |||	dz
  ||	z
         |||	dz
  ||	z
        z
  }
t        j                  |
|       }t        j                  |
| dz        }t        j                  ||dz  z
        ||<   u |S )Nr   r   )r<   g      ?r   )r   r   r   r    r@   astypeintr   r'   r!   r"   r   r%   r(   r)   rN   )r*   r3   r.   r+   r1   mjxyr2   mWC1C2s                r7   _mjci_1Dzmjci.<locals>._mjci_1D  s   wwt()IIa#%--c2((XXc$i)IIa!7+a/1Ht_EQq!A#ac"WQqs1Q3%77A$B$'"BGGBQJ'BqE	 %
 	r9   Fr>   r   r=   )r?   r@   rC   rD   r   rA   rB   rE   )r*   r+   rG   rp   r3   s        r7   r   r     s       88Du%Dyy1} 248II> ? 	? 	bjj&'Aa  ""8T4;;r9   c                     t        |d|z
        }t        j                  d|dz  z
        }t        j                  | |dd|      }t        | ||      }|||z  z
  |||z  z   fS )a  
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles.
        If None, use a flattened array.

    Returns
    -------
    ci_lower : ndarray
        The lower boundaries of the confidence interval.  Of the same length as
        `prob`.
    ci_upper : ndarray
        The upper boundaries of the confidence interval.  Of the same length as
        `prob`.

    r   rX   r   )alphapbetaprG   rG   )minr   r^   rY   
mquantilesr   )r*   r+   r_   rG   zxqsmjs          r7   r	   r	   5  sk    6 q5y!EU2XA			4aqt	DB
tT
%CSL"q3w,''r9   c                     d }t        j                  | d      } | || |      }|S | j                  dkD  rt        d| j                  z        t        j                  ||| |      }|S )aA  
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    c                    t        j                  | j                               } t        |       }t	        |d|z
        }t        t        j                  |dz  |d            }t        j                  ||z
  |d      t        j                  |dz
  |d      z
  }|d|z
  k  r:|dz  }t        j                  ||z
  |d      t        j                  |dz
  |d      z
  }t        j                  ||z
  dz
  |d      t        j                  ||d      z
  }|dz
  |z   ||z
  z  }||z
  |z  t        ||d|z  z
  |z  z         z  }|| |   z  d|z
  | |dz
     z  z   || ||z
  dz
     z  d|z
  | ||z
     z  z   f}|S )Nr   rX   r   r   )
r   r   r   r"   ru   rh   r   _ppfr'   r&   )	r*   r_   r.   kgkgkkIlambdlimss	            r7   _cihs_1Dzmedian_cihs.<locals>._cihs_1Dn  sw   wwt()IE1U7#

58Q,-YYqs1S!EIIac!C$88%<FA1Q3q%		!A#a(<<Bii!Aa$uyy1S'99!Ve^b3h'1	E!q1uai-00d1g5$qs) 33d1Q3q5k!QuWd1Q3i$779r9   Fr>   r   r=   )r?   r@   rC   rD   rE   )r*   r_   rG   r   rH   s        r7   r   r   W  sz    . 88Du%D$& M 99q= 68<		B C C$$XtT5AMr9   c                 X   t        j                  | |      t        j                  ||      }}t        j                  | |      t        j                  ||      }}t	        j
                  ||z
        t        j                  |dz  |dz  z         z  }dt        j                  |      z
  S )a"  
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    Examples
    --------

    >>> from scipy import stats
    >>> a = [1, 2, 3, 4, 5, 6, 7]
    >>> b = [8, 9, 10, 11, 12, 13, 14]
    >>> stats.mstats.compare_medians_ms(a, b, axis=None)
    1.0693225866553746e-05

    The function is vectorized to compute along a given axis.

    >>> import numpy as np
    >>> rng = np.random.default_rng()
    >>> x = rng.random(size=(3, 7))
    >>> y = rng.random(size=(3, 8))
    >>> stats.mstats.compare_medians_ms(x, y, axis=1)
    array([0.36908985, 0.36092538, 0.2765313 ])

    References
    ----------
    .. [1] McKean, Joseph W., and Ronald M. Schrader. "A comparison of methods
       for studentizing the sample median." Communications in
       Statistics-Simulation and Computation 13.6 (1984): 751-773.

    rt   r   r   )	r?   medianrY   stde_medianr   absrN   r   r'   )group_1group_2rG   med_1med_2std_1std_2rm   s           r7   r   r     s    d iiT2BIIg44PEU((t<((t< U
uu}q5!8(; <<Atxx{?r9   c                     d }t        j                  | |      j                  t              } | ||       S t        j                  |||       S )aC  
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    c                 0   | j                         }t        |      }|dk  r t        j                  t        j                  gS t	        |dz  dz   d      \  }}t        |      }d|z
  ||dz
     z  |||   z  z   }||z
  }d|z
  ||   z  |||dz
     z  z   }||gS )N   g      @g?r   )r   r"   r   r#   divmodrh   )r*   rj   r.   jhqlor}   qups           r7   _idfzidealfourths.<locals>._idf  s    OOFq5FF266?"qte|A&1FsAacFlQqtV#EsAaDj1QqsV8#Szr9   rt   )r?   r   r   r   rE   )r*   rG   r   s      r7   r   r     sH    ,
 774d#((5DDz""4t44r9   c                    t        j                  | d      } || }n(t        j                  t        j                  |            }| j
                  dk7  rt        d      | j                         }t        | d      }d|d   |d	   z
  z  |d
z  z  }| dddf   |dddf   |z   k  j                  d	      }| dddf   |dddf   |z
  k  j                  d	      }||z
  d|z  |z  z  S )a  
    Evaluates Rosenblatt's shifted histogram estimators for each data point.

    Rosenblatt's estimator is a centered finite-difference approximation to the
    derivative of the empirical cumulative distribution function.

    Parameters
    ----------
    data : sequence
        Input data, should be 1-D. Masked values are ignored.
    points : sequence or None, optional
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.

    Fr>   Nr   z#The input array should be 1D only !rt   g333333?r   r   皙?rX   )
r?   r@   r   rA   rB   rC   AttributeErrorr]   r   sum)r*   pointsr.   rr   nhinlos          r7   r
   r
     s      88Du%D~rzz&12yyA~BCC

AT%AquQqTzQY&A$<6$q&>A--
2
21
5C$<&a.1,,
1
1!
4CG1Qr9   )r   F))r   r   )TTrT   N)rT   N)N)__doc____all__numpyr   r   r   numpy.mar?   r    r   rY   scipy.stats.distributionsr   r   r   r   listr   r   r   r   r   r	   r   r   r   r
    r9   r7   <module>r      s     "    % : :  -De ].@4 #<0t <6~ 7B%)0<f $$ *<Z  0t$ (D0f6r%5P r9   