
    {Kg                          d Z ddlmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ  G d de
e	      Zy)z!
Nearest Centroid Classification
    )RealN)sparse   )BaseEstimatorClassifierMixin_fit_context)pairwise_distances_argmin)LabelEncoder)Interval
StrOptions)check_classification_targets)csc_median_axis_0)check_is_fittedc                       e Zd ZU dZ eddh      g eeddd      dgdZee	d	<   ddd
dZ
 ed      d        Zd Zy)NearestCentroida	  Nearest centroid classifier.

    Each class is represented by its centroid, with test samples classified to
    the class with the nearest centroid.

    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.

    Parameters
    ----------
    metric : {"euclidean", "manhattan"}, default="euclidean"
        Metric to use for distance computation.

        If `metric="euclidean"`, the centroid for the samples corresponding to each
        class is the arithmetic mean, which minimizes the sum of squared L1 distances.
        If `metric="manhattan"`, the centroid is the feature-wise median, which
        minimizes the sum of L1 distances.

        .. versionchanged:: 1.5
            All metrics but `"euclidean"` and `"manhattan"` were deprecated and
            now raise an error.

        .. versionchanged:: 0.19
            `metric='precomputed'` was deprecated and now raises an error

    shrink_threshold : float, default=None
        Threshold for shrinking centroids to remove features.

    Attributes
    ----------
    centroids_ : array-like of shape (n_classes, n_features)
        Centroid of each class.

    classes_ : array of shape (n_classes,)
        The unique classes labels.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    KNeighborsClassifier : Nearest neighbors classifier.

    Notes
    -----
    When used for text classification with tf-idf vectors, this classifier is
    also known as the Rocchio classifier.

    References
    ----------
    Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of
    multiple cancer types by shrunken centroids of gene expression. Proceedings
    of the National Academy of Sciences of the United States of America,
    99(10), 6567-6572. The National Academy of Sciences.

    Examples
    --------
    >>> from sklearn.neighbors import NearestCentroid
    >>> import numpy as np
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([1, 1, 1, 2, 2, 2])
    >>> clf = NearestCentroid()
    >>> clf.fit(X, y)
    NearestCentroid()
    >>> print(clf.predict([[-0.8, -1]]))
    [1]

    For a more detailed example see:
    :ref:`sphx_glr_auto_examples_neighbors_plot_nearest_centroid.py`
    	manhattan	euclideanr   Nneither)closedmetricshrink_threshold_parameter_constraints)r   c                     || _         || _        y )Nr   )selfr   r   s      g/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/neighbors/_nearest_centroid.py__init__zNearestCentroid.__init__l   s     0    T)prefer_skip_nested_validationc                    | j                   dk(  r| j                  ||dg      \  }}n| j                  ||ddg      \  }}t        j                  |      }|r| j                  rt        d      t        |       |j                  \  }}t               }|j                  |      }|j                  x| _
        }|j                  }	|	dk  rt        d|	z        t        j                  |	|ft        j                        | _        t        j                   |	      }
t#        |	      D ]  }||k(  }t        j$                  |      |
|<   |rt        j&                  |      d	   }| j                   dk(  rF|s(t        j(                  ||   d	
      | j                  |<   st+        ||         | j                  |<   ||   j-                  d	
      | j                  |<    | j                  rrt        j.                  t        j0                  |d	
      d	k(        rt        d      t        j,                  |d	
      }t        j2                  d|
z  d|z  z
        }|| j                  |   z
  dz  }|j%                  d	
      }t        j2                  |||	z
  z        }|t        j(                  |      z  }|j5                  t7        |      d      }||z  }| j                  |z
  |z  }t        j8                  |      }t        j:                  |      | j                  z
  }t        j<                  |d	d|       ||z  }||z  }|t        j>                  ddf   |z   | _        | S )a0  
        Fit the NearestCentroid model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.
            Note that centroid shrinking cannot be used with sparse matrices.
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : object
            Fitted estimator.
        r   csc)accept_sparsecsrz2threshold shrinking not supported for sparse inputr   z>The number of classes has to be greater than one; got %d class)dtyper   )axisz2All features have zero variance. Division by zero.g      ?   N)out) r   _validate_dataspissparser   
ValueErrorr   shaper
   fit_transformclasses_sizenpemptyfloat64
centroids_zerosrangesumwheremedianr   meanallptpsqrtreshapelensignabsclipnewaxis)r   Xyis_X_sparse	n_samples
n_featuresley_indclasses	n_classesnk	cur_classcenter_maskdataset_centroid_mvariancesmmms	deviationsignsmsds                         r   fitzNearestCentroid.fitp   s   * ;;+%&&q!E7&CDAq&&q!E5>&JDAqkk!n400QRR$Q' !	:^  #"$++-LL	q=P  ((Iz#:"**MXXi y)I9,KFF;/ByM hh{3A6{{k)"131[>PQ1RDOOI.1B1[>1RDOOI.-.{^-@-@a-@-H	* *   vvbffQQ'1,- !UVV " 2 rcIo67ADOOE22q8H|||+HI	$9:;A1A3q61%BaB+<<BI GGI&Ey)D,A,AAIGGIq$I6Iy.C/

A>DDOr   c                     t        |        | j                  |dd      }| j                  t        || j                  | j
                           S )a}  Perform classification on an array of test vectors `X`.

        The predicted class `C` for each sample in `X` is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Test samples.

        Returns
        -------
        C : ndarray of shape (n_samples,)
            The predicted classes.
        r#   F)r"   reset)r   )r   r(   r.   r	   r3   r   )r   rC   s     r   predictzNearestCentroid.predict   sH     	eD}}%aM
 	
r   )r   )__name__
__module____qualname____doc__r   r   r   r   dict__annotations__r   r   rX   r[    r   r   r   r      sj    L^ {K89:%dAtIFM$D 
1t 1 5T 6Tl
r   r   )r_   numbersr   numpyr0   scipyr   r)   baser   r   r   metrics.pairwiser	   preprocessingr
   utils._param_validationr   r   utils.multiclassr   utils.sparsefuncsr   utils.validationr   r   rb   r   r   <module>rm      s=       ? ? 8 ( : ; 1 .C
o} C
r   