
    {Kg_                         d Z ddlZddlmZmZ ddlZddlmZ	 ddl
mZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZm Z   G d deee      Z! G d deee      Z"y)z6Dummy estimators that implement simple rules of thumb.    N)IntegralReal   )BaseEstimatorClassifierMixinMultiOutputMixinRegressorMixin_fit_context)check_random_state)Interval
StrOptions)class_distribution)_random_choice_csc)_weighted_percentile)_check_sample_weight_num_samplescheck_arraycheck_consistent_lengthcheck_is_fittedc                        e Zd ZU dZ eh d      gdgeeddgdZee	d<   ddddd	Z
 ed
      dd       Zd Zd Zd Zd Zd fd	Z xZS )DummyClassifiera]  DummyClassifier makes predictions that ignore the input features.

    This classifier serves as a simple baseline to compare against other more
    complex classifiers.

    The specific behavior of the baseline is selected with the `strategy`
    parameter.

    All strategies make predictions that ignore the input feature values passed
    as the `X` argument to `fit` and `predict`. The predictions, however,
    typically depend on values observed in the `y` parameter passed to `fit`.

    Note that the "stratified" and "uniform" strategies lead to
    non-deterministic predictions that can be rendered deterministic by setting
    the `random_state` parameter if needed. The other strategies are naturally
    deterministic and, once fit, always return the same constant prediction
    for any value of `X`.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"most_frequent", "prior", "stratified", "uniform",             "constant"}, default="prior"
        Strategy to use to generate predictions.

        * "most_frequent": the `predict` method always returns the most
          frequent class label in the observed `y` argument passed to `fit`.
          The `predict_proba` method returns the matching one-hot encoded
          vector.
        * "prior": the `predict` method always returns the most frequent
          class label in the observed `y` argument passed to `fit` (like
          "most_frequent"). ``predict_proba`` always returns the empirical
          class distribution of `y` also known as the empirical class prior
          distribution.
        * "stratified": the `predict_proba` method randomly samples one-hot
          vectors from a multinomial distribution parametrized by the empirical
          class prior probabilities.
          The `predict` method returns the class label which got probability
          one in the one-hot vector of `predict_proba`.
          Each sampled row of both methods is therefore independent and
          identically distributed.
        * "uniform": generates predictions uniformly at random from the list
          of unique classes observed in `y`, i.e. each class has equal
          probability.
        * "constant": always predicts a constant label that is provided by
          the user. This is useful for metrics that evaluate a non-majority
          class.

          .. versionchanged:: 0.24
             The default value of `strategy` has changed to "prior" in version
             0.24.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness to generate the predictions when
        ``strategy='stratified'`` or ``strategy='uniform'``.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    constant : int or str or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,) or list of such arrays
        Unique class labels observed in `y`. For multi-output classification
        problems, this attribute is a list of arrays as each output has an
        independent set of possible classes.

    n_classes_ : int or list of int
        Number of label for each output.

    class_prior_ : ndarray of shape (n_classes,) or list of such arrays
        Frequency of each class observed in `y`. For multioutput classification
        problems, this is computed independently for each output.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X` has
        feature names that are all strings.

    n_outputs_ : int
        Number of outputs.

    sparse_output_ : bool
        True if the array returned from predict is to be in sparse CSC format.
        Is automatically set to True if the input `y` is passed in sparse
        format.

    See Also
    --------
    DummyRegressor : Regressor that makes predictions using simple rules.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyClassifier
    >>> X = np.array([-1, 1, 1, 1])
    >>> y = np.array([0, 1, 1, 1])
    >>> dummy_clf = DummyClassifier(strategy="most_frequent")
    >>> dummy_clf.fit(X, y)
    DummyClassifier(strategy='most_frequent')
    >>> dummy_clf.predict(X)
    array([1, 1, 1, 1])
    >>> dummy_clf.score(X, y)
    0.75
    >   prioruniformconstant
stratifiedmost_frequentrandom_state
array-likeNstrategyr   r   _parameter_constraintsr   c                .    || _         || _        || _        y Nr   )selfr    r   r   s       Q/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/dummy.py__init__zDummyClassifier.__init__   s     (     Tprefer_skip_nested_validationc                 <   | j                  |d       | j                  | _        | j                  dk(  r?t        j                  |      r*|j                         }t        j                  dt               t        j                  |      | _	        | j                  s*t        j                  |      }t        j                  |      }|j                  dk(  rt        j                  |d      }|j                  d   | _        t#        ||       |t%        ||      }| j                  dk(  r~| j&                  t)        d      t        j                  t        j                  | j&                        d      j                  d	   | j                   k7  rt)        d
| j                   z        t+        ||      \  | _        | _        | _        | j                  dk(  r~t3        | j                         D ]f  t5        fd| j,                     D              r&dj7                  | j&                  | j,                     j9                               }t)        |       | j                   dk(  r<| j.                  d	   | _        | j,                  d	   | _        | j0                  d	   | _        | S )a  Fit the baseline classifier.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
            Returns the instance itself.
        Fcast_to_ndarrayr   zA local copy of the target data has been converted to a numpy array. Predicting on sparse target data with the uniform strategy would not save memory and would be slower.r   r   r   MConstant target value has to be specified when the constant strategy is used.r   0Constant target value should have shape (%d, 1).c              3   4   K   | ]  }   d    |k(    yw)r   N ).0cr   ks     r%   	<genexpr>z&DummyClassifier.fit.<locals>.<genexpr>   s!     I8H18A;q>Q.8Hs   zrThe constant target value must be present in the training data. You provided constant={}. Possible values are: {}.)_validate_datar    	_strategyspissparsetoarraywarningswarnUserWarningsparse_output_npasarray
atleast_1dndimreshapeshape
n_outputs_r   r   r   
ValueErrorr   classes_
n_classes_class_prior_rangeanyformattolist)r$   Xysample_weighterr_msgr   r5   s        @@r%   fitzDummyClassifier.fit   s(   ( 	Au5>>Y&2;;q>		AMM+
  !kk!n""

1Aa A66Q;

1g&A''!*1%$0BM>>Z'}}$ : 
 ::bmmDMM&BGL>>!$7$J//* 
 ?Q}?
;): >>Z'4??+Ia8HII3396 MM4==+;+B+B+D4  %W-- , ??a"ooa0DO MM!,DM $ 1 1! 4Dr'   c           	      &   t        |        t        |      }t        | j                        }| j                  }| j
                  }| j                  }| j                  }| j                  dk(  r|g}|g}|g}|g}| j                  dk(  r#| j                  |      }| j                  dk(  r|g}| j                  rd}	| j                  dv r2|D 
cg c]&  }
t        j                  |
j                         g      ( }}
n^| j                  dk(  r|}	nL| j                  dk(  rt        d      | j                  dk(  r#|D cg c]  }t        j                  |g       }}t!        |||	| j                        }|S | j                  dv rRt        j"                  t%        | j                        D cg c]  }||   ||   j                             c}|dg      }n| j                  dk(  rZt        j&                  t%        | j                        D cg c]  }||   |   j                  d          c}      j(                  }n| j                  dk(  r\t%        | j                        D cg c]  }||   |j+                  ||   |	           }}t        j&                  |      j(                  }n1| j                  dk(  r"t        j"                  | j                  |df      }| j                  dk(  rt        j,                        }S c c}
w c c}w c c}w c c}w c c}w )
a;  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.
        r   r   N)r   r   r   zCSparse target prediction is not supported with the uniform strategyr   axissize)r   r   r   r   rI   rH   rJ   r   rF   r8   predict_probar?   r@   arrayargmaxrG   r   tilerK   vstackTrandintravel)r$   rO   	n_samplesrsrI   rH   rJ   r   proba
class_probcpr4   rP   r5   rets                  r%   predictzDummyClassifier.predict   s    	 !O	 1 12__
==((==??a$J zH(>L zH>>\)&&q)E!#J~~!;;>JKlBHHbiik]3lK</)
9, : 
 :-3;<8aBHHaSM8<"9h
DDUDUVA@ = ~~!;;GG "'t!7!7A !LO$:$:$<=!7 N </II "'t!7!7A !E!HOOO$;<!7
 !  9, #4??33 QK

:a=y
 IJ3   IIcN$$:-GGDMMIq>:!#HHQK] L =s   	+K:6K?# L"L	#Lc                 P   t        |        t        |      }t        | j                        }| j                  }| j
                  }| j                  }| j                  }| j                  dk(  r|g}|g}|g}|g}g }t        | j                        D ]  }	| j                  dk(  rH||	   j                         }
t        j                  |||	   ft        j                        }d|dd|
f<   n| j                  dk(  rt        j                  |df      ||	   z  }n| j                  dk(  r7|j!                  d||	   |      }|j#                  t        j                        }n| j                  d	k(  r3t        j                  |||	   ft        j                        }|||	   z  }n`| j                  d
k(  rQt        j$                  ||	   ||	   k(        }
t        j                  |||	   ft        j                        }d|dd|
f<   |j'                          | j                  dk(  r|d   }|S )a  
        Return probability estimates for the test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the probability of the sample for each class in
            the model, where classes are ordered arithmetically, for each
            output.
        r   r   dtype      ?Nr   r   rW   r   r   r   )r   r   r   r   rI   rH   rJ   r   rF   rK   r8   r[   r@   zerosfloat64onesmultinomialastypewhereappend)r$   rO   ra   rb   rI   rH   rJ   r   Pr5   indouts               r%   rY   zDummyClassifier.predict_probaT  s     	 !O	 1 12__
==((==??a$J zH(>L zHt'A~~0"1o,,.hh	:a=9L!AsF7*ggy!n-Q?</nnQQinHjj,9,ggy*Q-8

Kz!}$:-hhx{hqk9:hh	:a=9L!AsFHHSM+ (. ??a!Ar'   c                     | j                  |      }| j                  dk(  rt        j                  |      S |D cg c]  }t        j                  |       c}S c c}w )a  
        Return log probability estimates for the test vectors X.

        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the log probability of the sample for each class in
            the model, where classes are ordered arithmetically for each
            output.
        r   )rY   rF   r@   log)r$   rO   rc   ps       r%   predict_log_probaz!DummyClassifier.predict_log_proba  sN      ""1%??a66%= ',-u!BFF1Iu---s   Ac                     ddddddS )NTzfails for the predict method)check_methods_subset_invariance%check_methods_sample_order_invariance)
poor_scoreno_validation_xfail_checksr2   r$   s    r%   
_more_tagszDummyClassifier._more_tags  s    !3Q9W
 	
r'   c                 l    |!t        j                  t        |      df      }t        |   |||      S )ak  Return the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy
        which is a harsh metric since you require for each sample that
        each label set be correctly predicted.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since DummyClassifier
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True labels for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of self.predict(X) w.r.t. y.
        r   rE   r@   rl   lensuperscorer$   rO   rP   rQ   	__class__s       r%   r   zDummyClassifier.score  s3    2 9A{+Aw}Q=11r'   r#   )__name__
__module____qualname____doc__r   r   strr!   dict__annotations__r&   r
   rS   rg   rY   ry   r   r   __classcell__r   s   @r%   r   r   #   s    of VW
 ((sL$7$D  $+ !
 5X 6XtUn=~.,
2 2r'   r   c                        e Zd ZU dZ eh d      g eeddd      dg eeddd      d	dgd
Zee	d<   dddddZ
 ed      dd       ZddZd Zd fd	Z xZS )DummyRegressora  Regressor that makes predictions using simple rules.

    This regressor is useful as a simple baseline to compare with other
    (real) regressors. Do not use it for real problems.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"mean", "median", "quantile", "constant"}, default="mean"
        Strategy to use to generate predictions.

        * "mean": always predicts the mean of the training set
        * "median": always predicts the median of the training set
        * "quantile": always predicts a specified quantile of the training set,
          provided with the quantile parameter.
        * "constant": always predicts a constant value that is provided by
          the user.

    constant : int or float or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    quantile : float in [0.0, 1.0], default=None
        The quantile to predict using the "quantile" strategy. A quantile of
        0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the
        maximum.

    Attributes
    ----------
    constant_ : ndarray of shape (1, n_outputs)
        Mean or median or quantile of the training targets or constant value
        given by the user.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X` has
        feature names that are all strings.

    n_outputs_ : int
        Number of outputs.

    See Also
    --------
    DummyClassifier: Classifier that makes predictions using simple rules.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyRegressor
    >>> X = np.array([1.0, 2.0, 3.0, 4.0])
    >>> y = np.array([2.0, 3.0, 5.0, 10.0])
    >>> dummy_regr = DummyRegressor(strategy="mean")
    >>> dummy_regr.fit(X, y)
    DummyRegressor()
    >>> dummy_regr.predict(X)
    array([5., 5., 5., 5.])
    >>> dummy_regr.score(X, y)
    0.0
    >   meanmedianr   quantileg        rk   both)closedNneitherr   )r    r   r   r!   r   r    r   r   c                .    || _         || _        || _        y r#   r   )r$   r    r   r   s       r%   r&   zDummyRegressor.__init__  s       r'   Tr(   c                    | j                  |d       t        |dd      }t        |      dk(  rt        d      |j                  dk(  rt        j                  |d      }|j                  d   | _        t        |||       |t        ||      }| j                  d
k(  rt        j                  |d|      | _        n| j                  dk(  r]|t        j                  |d      | _        ngt        | j                        D cg c]  }t!        |d	d	|f   |d       c}| _        n*| j                  dk(  r| j"                  t        d      | j"                  dz  }|t        j$                  |d|      | _        nt        | j                        D cg c]  }t!        |d	d	|f   ||       c}| _        n| j                  dk(  r| j&                  t)        d      t        | j&                  g ddd      | _        | j                  dk7  rD| j                  j                  d   |j                  d   k7  rt        d|j                  d   z        t        j                  | j                  d      | _        | S c c}w c c}w )a  Fit the random regressor.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
            Fitted estimator.
        Fr+   rP   )	ensure_2d
input_namer   zy must not be empty.r   r-   Nr   )rV   weightsr   rU   g      I@)
percentiler   z^When using `strategy='quantile', you have to specify the desired quantile in the range [0, 1].g      Y@)rV   qr   r/   )csrcsccoo)accept_sparser   ensure_min_samplesr0   )r   r.   )r7   r   r   rG   rC   r@   rD   rE   rF   r   r   r    average	constant_r   rK   r   r   r   r   	TypeError)r$   rO   rP   rQ   r5   r   s         r%   rS   zDummyRegressor.fit"  sL   ( 	Au5Us;q6Q;34466Q;

1g&A''!*1m4$0BM==F"ZZ=IDN]]h&$!#11!5 #4??3"3 )1a4-DQ3"
 ]]j(}}$ 4  .J$!#qqJ!G #4??3"3 )1a4-JW3"
 ]]j(}}$: 
 )3#$	DN !#(<(<Q(?1771:(M FQRS  DNNG<Q""s   I:I?c                    t        |        t        |      }t        j                  || j                  f| j
                  t        j                  | j
                        j                        }t        j                  || j                  f      }| j                  dk(  r*t        j                  |      }t        j                  |      }|r||fS |S )a  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        return_std : bool, default=False
            Whether to return the standard deviation of posterior prediction.
            All zeros in this case.

            .. versionadded:: 0.20

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.

        y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Standard deviation of predictive distribution of query points.
        ri   r   )
r   r   r@   fullrF   r   rZ   rj   rl   r`   )r$   rO   
return_stdra   rP   y_stds         r%   rg   zDummyRegressor.predictv  s    , 	 O	GG(NN((4>>*00

 )T__56??aAHHUOE'5z.Q.r'   c                     dddS )NT)r}   r~   r2   r   s    r%   r   zDummyRegressor._more_tags  s    "T::r'   c                 l    |!t        j                  t        |      df      }t        |   |||      S )a  Return the coefficient of determination R^2 of the prediction.

        The coefficient R^2 is defined as `(1 - u/v)`, where `u` is the
        residual sum of squares `((y_true - y_pred) ** 2).sum()` and `v` is the
        total sum of squares `((y_true - y_true.mean()) ** 2).sum()`. The best
        possible score is 1.0 and it can be negative (because the model can be
        arbitrarily worse). A constant model that always predicts the expected
        value of y, disregarding the input features, would get a R^2 score of
        0.0.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since `DummyRegressor`
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            R^2 of `self.predict(X)` w.r.t. y.
        r   r   r   r   s       r%   r   zDummyRegressor.score  s3    : 9A{+Aw}Q=11r'   r#   )F)r   r   r   r   r   r   r   r!   r   r   r&   r
   rS   rg   r   r   r   r   s   @r%   r   r     s    ?D   JKLdCV<dCT4i8
$D  $*D4 !
 5Q 6Qf$/L;2 2r'   r   )#r   r<   numbersr   r   numpyr@   scipy.sparsesparser9   baser   r   r   r	   r
   utilsr   utils._param_validationr   r   utils.multiclassr   utils.randomr   utils.statsr   utils.validationr   r   r   r   r   r   r   r2   r'   r%   <module>r      sb    <  "    & 9 0 , - k2& k2\m2%~} m2r'   