
    {KgZX                         d Z ddlZddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ d Zd Zd Zd ZddZdddZd Zd Zd Zd Zd Zd ZddZ d dZ!d Z"d Z#d Z$d Z%y)!zBA collection of utilities to work with sparse matrices and arrays.    N)LinearOperator   )_sparse_min_max_sparse_nan_min_max)_check_sample_weight   )csc_mean_variance_axis0)csr_mean_variance_axis0)incr_mean_variance_axis0c                 z    t        j                  |       r| j                  n
t        |       }d|z  }t	        |      )z2Raises a TypeError if X is not a CSR or CSC matrixz,Expected a CSR or CSC sparse matrix, got %s.)spissparseformattype	TypeError)X
input_typeerrs      ]/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/utils/sparsefuncs.py_raise_typeerrorr      s/    [[^aJ
8:
EC
C.    c                 (    | dvrt        d| z        y )N)r   r   z8Unknown axis value: %d. Use 0 for rows, or 1 for columns)
ValueErroraxiss    r   _raise_error_wrong_axisr       s$    6FM
 	
 r   c                     |j                   d   | j                   d   k(  sJ | xj                  |j                  | j                  d      z  c_        y)a  Inplace column scaling of a CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features.
        It should be of CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_csr_column_scale(csr, scale)
    >>> csr.todense()
    matrix([[16,  3,  4],
            [ 0,  0, 10],
            [ 0,  0,  0],
            [ 0,  0,  0]])
    r   r   clip)modeN)shapedatatakeindicesr   scales     r   inplace_csr_column_scaler&   '   sB    J ;;q>QWWQZ'''FFejjj00Fr   c                     |j                   d   | j                   d   k(  sJ | xj                  t        j                  |t        j                  | j
                              z  c_        y)a  Inplace row scaling of a CSR matrix.

    Scale each sample of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR format.

    scale : ndarray of float of shape (n_samples,)
        Array of precomputed sample-wise values to use for scaling.
    r   N)r    r!   nprepeatdiffindptrr$   s     r   inplace_csr_row_scaler,   P   sH     ;;q>QWWQZ'''FFbiirwwqxx011Fr   c                 l   t        |       t        j                  |       r:| j                  dk(  r+|dk(  rt	        | ||      S t        | j                  ||      S t        j                  |       r:| j                  dk(  r+|dk(  rt        | ||      S t	        | j                  ||      S t        |        y)a{  Compute mean and variance along an axis on a CSR or CSC matrix.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It can be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    return_sum_weights : bool, default=False
        If True, returns the sum of weights seen for each feature
        if `axis=0` or each sample if `axis=1`.

        .. versionadded:: 0.24

    Returns
    -------

    means : ndarray of shape (n_features,), dtype=floating
        Feature-wise means.

    variances : ndarray of shape (n_features,), dtype=floating
        Feature-wise variances.

    sum_weights : ndarray of shape (n_features,), dtype=floating
        Returned if `return_sum_weights` is `True`.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.mean_variance_axis(csr, axis=0)
    (array([2.  , 0.25, 1.75]), array([12.    ,  0.1875,  4.1875]))
    csrr   )weightsreturn_sum_weightscscN)r   r   r   r   _csr_mean_var_axis0_csc_mean_var_axis0Tr   )r   r   r/   r0   s       r   mean_variance_axisr5   b   s    l D!	{{1~!((e+19&77I  'W9K  
QAHH-19&77I  'W9K  	r   )r/   c                   t        |       t        j                  |       r| j                  dv st	        |        t        j                  |      dk(  r,t        j                  |j                  ||j                        }t        j                  |      t        j                  |      cxk(  r"t        j                  |      k(  st        d       t        d      |dk(  rWt        j                  |      | j                  d   k7  rt        d| j                  d    dt        j                  |       d      t        j                  |      | j                  d   k7  r2t        d	| j                  d    dt        j                  |       d      |dk(  r| j                  n| } |t        || | j                        }t        | ||||
      S )a  Compute incremental mean and variance along an axis on a CSR or CSC matrix.

    last_mean, last_var are the statistics computed at the last step by this
    function. Both must be initialized to 0-arrays of the proper size, i.e.
    the number of features in X. last_n is the number of samples encountered
    until now.

    Parameters
    ----------
    X : CSR or CSC sparse matrix of shape (n_samples, n_features)
        Input data.

    axis : {0, 1}
        Axis along which the axis should be computed.

    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of means to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of variances to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_n : float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating
        Sum of the weights seen so far, excluding the current weights
        If not float, it should be of shape (n_features,) if
        axis=0 or (n_samples,) if axis=1. If float it corresponds to
        having same weights for all samples (or features).

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    Returns
    -------
    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise means if axis = 0 or
        sample-wise means if axis = 1.

    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise variances if axis = 0 or
        sample-wise variances if axis = 1.

    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral
        Updated number of seen samples per feature if axis=0
        or number of seen features per sample if axis=1.

        If weights is not None, n is a sum of the weights of the seen
        samples or features instead of the actual number of seen
        samples or features.

    Notes
    -----
    NaNs are ignored in the algorithm.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.incr_mean_variance_axis(
    ...     csr, axis=0, last_mean=np.zeros(3), last_var=np.zeros(3), last_n=2
    ... )
    (array([1.3..., 0.1..., 1.1...]), array([8.8..., 0.1..., 3.4...]),
    array([6., 6., 6.]))
    )r1   r.   r   )dtypez8last_mean, last_var, last_n do not have the same shapes.r   zHIf axis=1, then last_mean, last_n, last_var should be of size n_samples z (Got z).zIIf axis=0, then last_mean, last_n, last_var should be of size n_features )	last_meanlast_varlast_nr/   )r   r   r   r   r   r(   sizefullr    r7   r   r4   r   _incr_mean_var_axis0)r   r   r8   r9   r:   r/   s         r   incr_mean_variance_axisr>      s   b D!KKNqxx>9	wwv!&	HGGI"''("3FrwwvFSTT GSTTqy779+""#''!*VBGGI4F3GrK 
 779+##$771:,fRWWY5G4HL 
 qyaA&wA	Y&' r   c                     t        j                  |       r&| j                  dk(  rt        | j                  |       yt        j                  |       r| j                  dk(  rt        | |       yt        |        y)a  Inplace column scaling of a CSC/CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features. It should be
        of CSC or CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_column_scale(csr, scale)
    >>> csr.todense()
    matrix([[16,  3,  4],
            [ 0,  0, 10],
            [ 0,  0,  0],
            [ 0,  0,  0]])
    r1   r.   N)r   r   r   r,   r4   r&   r   r$   s     r   inplace_column_scaler@   #  sQ    J 
{{1~!((e+acc5)	QAHH- E*r   c                     t        j                  |       r&| j                  dk(  rt        | j                  |       yt        j                  |       r| j                  dk(  rt        | |       yt        |        y)a  Inplace row scaling of a CSR or CSC matrix.

    Scale each row of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR or CSC format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed sample-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 4, 5])
    >>> indices = np.array([0, 1, 2, 3, 3])
    >>> data = np.array([8, 1, 2, 5, 6])
    >>> scale = np.array([2, 3, 4, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 0, 0],
            [0, 0, 2, 0],
            [0, 0, 0, 5],
            [0, 0, 0, 6]])
    >>> sparsefuncs.inplace_row_scale(csr, scale)
    >>> csr.todense()
     matrix([[16,  2,  0,  0],
             [ 0,  0,  6,  0],
             [ 0,  0,  0, 20],
             [ 0,  0,  0, 30]])
    r1   r.   N)r   r   r   r&   r4   r,   r   r$   s     r   inplace_row_scalerB   P  sQ    H 
{{1~!((e+ e,	QAHH-a'r   c                 0   ||fD ]'  }t        |t        j                        st        d       |dk  r|| j                  d   z  }|dk  r|| j                  d   z  }| j
                  |k(  }|| j
                  | j
                  |k(  <   || j
                  |<   y)aK  Swap two rows of a CSC matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
     m and n should be valid integersr   N)
isinstancer(   ndarrayr   r    r#   )r   mntm_masks        r   inplace_swap_row_cscrK   |  s     Va$>??  	1u	QWWQZ1u	QWWQZYY!^F !AIIaii1nAIIfr   c           	      B   ||fD ]'  }t        |t        j                        st        d       |dk  r|| j                  d   z  }|dk  r|| j                  d   z  }||kD  r||}}| j
                  }||   }||dz      }||   }||dz      }||z
  }	||z
  }
|	|
k7  rE| j
                  |dz   |xxx |
|	z
  z  ccc ||
z   | j
                  |dz   <   ||	z
  | j
                  |<   t        j                  | j                  d| | j                  || | j                  || | j                  || | j                  |d g      | _        t        j                  | j                  d| | j                  || | j                  || | j                  || | j                  |d g      | _        y)aK  Swap two rows of a CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSR format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    rD   r   r   r   N)	rE   r(   rF   r   r    r+   concatenater#   r!   )r   rG   rH   rI   r+   m_startm_stopn_startn_stopnz_mnz_ns              r   inplace_swap_row_csrrT     s    Va$>??  	1u	QWWQZ1u	QWWQZ 	1u!1XXFQiGAE]FQiGAE]FGDGDt|	Qtd{*!D.QtmIIhwIIgf%IIfW%IIgf%IIfg	
AI ^^FF8GFF76"FF6'"FF76"FF67O	
AFr   c                     t        j                  |       r| j                  dk(  rt        | ||       yt        j                  |       r| j                  dk(  rt	        | ||       yt        |        y)a  
    Swap two rows of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of CSR or
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 3, 3])
    >>> indices = np.array([0, 2, 2])
    >>> data = np.array([8, 2, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 0, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_swap_row(csr, 0, 1)
    >>> csr.todense()
    matrix([[0, 0, 5],
            [8, 0, 2],
            [0, 0, 0],
            [0, 0, 0]])
    r1   r.   N)r   r   r   rK   rT   r   r   rG   rH   s      r   inplace_swap_rowrW     sQ    J 
{{1~!((e+Q1%	QAHH-Q1%r   c                 >   |dk  r|| j                   d   z  }|dk  r|| j                   d   z  }t        j                  |       r| j                  dk(  rt	        | ||       yt        j                  |       r| j                  dk(  rt        | ||       yt        |        y)a  
    Swap two columns of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two columns are to be swapped. It should be of
        CSR or CSC format.

    m : int
        Index of the column of X to be swapped.

    n : int
        Index of the column of X to be swapped.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 3, 3])
    >>> indices = np.array([0, 2, 2])
    >>> data = np.array([8, 2, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 0, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_swap_column(csr, 0, 1)
    >>> csr.todense()
    matrix([[0, 8, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    r   r   r1   r.   N)r    r   r   r   rT   rK   r   rV   s      r   inplace_swap_columnrY     s    J 	1u	QWWQZ1u	QWWQZ	{{1~!((e+Q1%	QAHH-Q1%r   c                     t        j                  |       r*| j                  dv r|rt        | |      S t	        | |      S t        |        y)a  Compute minimum and maximum along an axis on a CSR or CSC matrix.

     Optionally ignore NaN values.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    ignore_nan : bool, default=False
        Ignore or passing through NaN values.

        .. versionadded:: 0.20

    Returns
    -------

    mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise minima.

    maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise maxima.
    )r.   r1   r   N)r   r   r   r   r   r   )r   r   
ignore_nans      r   min_max_axisr\   6  s?    6 
{{1~!((n4&qt44"1400r   c                    |dk(  rd}n;|dk(  rd}n3| j                   dk7  r$t        dj                  | j                               |A|| j                  S t        j                  t        j
                  | j                        |      S |dk(  r7t        j
                  | j                        }||j                  d      S ||z  S |dk(  r|.t        j                  | j                  | j                  d         S t        j                  |t        j
                  | j                              }t        j                  | j                  | j                  d   |	      S t        d
j                  |            )a  A variant of X.getnnz() with extension to weighting on axis 0.

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_labels)
        Input data. It should be of CSR format.

    axis : {0, 1}, default=None
        The axis on which the data is aggregated.

    sample_weight : array-like of shape (n_samples,), default=None
        Weight for each row of X.

    Returns
    -------
    nnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)
        Number of non-zero values in the array along a given axis. Otherwise,
        the total number of non-zero values in the array is returned.
    r   r   r.   z#Expected CSR sparse format, got {0}intp)	minlength)ra   r/   zUnsupported axis: {0})r   r   nnzr(   dotr*   r+   astypebincountr#   r    r)   r   )r   r   sample_weightoutr/   s        r   count_nonzerorh   Z  s-   , rz		
U	=DDQXXNOO | 55L66"''!((+];;	ggahh ::f%%]""	 ;;qyyAGGAJ??iirwwqxx/@AG;;qyyAGGAJPP077=>>r   c                    t        |       |z   }|st        j                  S t        j                  | dk        }t	        |d      \  }}| j                          |rt        || ||      S t        |dz
  | ||      t        || ||      z   dz  S )zCompute the median of data with n_zeros additional zeros.

    This function is used to support sparse matrices; it modifies data
    in-place.
    r   r   r   g       @)lenr(   nanrh   divmodsort_get_elem_at_rank)r!   n_zerosn_elems
n_negativemiddleis_odds         r   _get_medianrt     s     $i'!Gvv!!$(+JGQ'NFFIIK z7CC 	&1*dJ@
FD*g
>	? r   c                 8    | |k  r||    S | |z
  |k  ry|| |z
     S )z@Find the value in data augmented with n_zeros for the given rankr    )rankr!   rq   ro   s       r   rn   rn     s3    jDzj7"wr   c                    t        j                  |       r| j                  dk(  st        d| j                  z        | j                  }| j
                  \  }}t        j                  |      }t        t        |dd |dd             D ]H  \  }\  }}t        j                  | j                  ||       }||j                  z
  }	t        ||	      ||<   J |S )aC  Find the median across axis 0 of a CSC matrix.

    It is equivalent to doing np.median(X, axis=0).

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSC format.

    Returns
    -------
    median : ndarray of shape (n_features,)
        Median.
    r1   z%Expected matrix of CSC format, got %sNr^   r   )r   r   r   r   r+   r    r(   zeros	enumeratezipcopyr!   r;   rt   )
r   r+   	n_samples
n_featuresmedianf_indstartendr!   nzs
             r   csc_median_axis_0r     s     KKNqxx50?!((JKKXXFGGIzXXj!F(VCR[&*)EF|swwqvveC()"#D"-u	  G Mr   c                      dddf    j                   t         fd fdfdfd j                   j                        S )aA  Create an implicitly offset linear operator.

    This is used by PCA on sparse data to avoid densifying the whole data
    matrix.

    Params
    ------
        X : sparse matrix of shape (n_samples, n_features)
        offset : ndarray of shape (n_features,)

    Returns
    -------
    centered : LinearOperator
    Nc                     | z  | z  z
  S Nrv   xr   offsets    r   <lambda>z)_implicit_column_offset.<locals>.<lambda>      Q!+r   c                     | z  | z  z
  S r   rv   r   s    r   r   z)_implicit_column_offset.<locals>.<lambda>  r   r   c                 6    | z  | j                         z  z
  S r   )sumr   XTr   s    r   r   z)_implicit_column_offset.<locals>.<lambda>  s    "q&FQUUW$45r   c                 \    | z  j                   | j                  d      d d d f   z  z
  S )Nr   r   )r4   r   r   s    r   r   z)_implicit_column_offset.<locals>.<lambda>  s*    "q&688aeeemD!G.D#DDr   )matvecmatmatrmatvecrmatmatr7   r    )r4   r   r7   r    )r   r   r   s   ``@r   _implicit_column_offsetr     sB     D!G_F	
B++5Dgggg r   )NF)F)NN)&__doc__numpyr(   scipy.sparsesparser   scipy.sparse.linalgr   utils.fixesr   r   utils.validationr   sparsefuncs_fastr	   r3   r
   r2   r   r=   r   r   r&   r,   r5   r>   r@   rB   rK   rT   rW   rY   r\   rh   rt   rn   r   r   rv   r   r   <module>r      s    H   . > 3

&1R2$K\ NR pf*Z)X:<~*Z.b!H3?l, >r   