
    {Kg0              	          d Z ddlZddlmZmZ ddlZddlZddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZmZ ddlmZmZ  e
       Z e	       Z d Z!ejD                  jG                  deez         d        Z$d Z%d Z&d Z'd Z(d Z)ejD                  jG                  dddg      d        Z*d Z+d Z,d Z-d Z.d Z/ ed ed,i ddi       ejD                  jG                  d!d"d#g      d$               Z0 ed ed,i dd%i       ejD                  jG                  d!d"d#g      d&               Z1d' Z2ejD                  jG                  d(e      d)        Z3d* Z4ejD                  jG                  deez         d+        Z5y)-zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warnings)CSC_CONTAINERSCSR_CONTAINERSc                 <   t        j                  ddgddgg      }t        j                  ddgddgg      }t        dgg dddgd      }t               5  |D ]-  }t	        dd	| i|j                  |      j                  |       / 	 d
d
d
       y
# 1 sw Y   y
xY w)z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN )nparrayr   r   r   fitpredict)global_random_seedX_trainX_testgridparamss        g/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforestr)   "   s    hhAA'(GXX1v1v&'F]$PUWD 
	FF);FvFJJgfo  
		s   3BBsparse_containerc                 ~   t        |       }t        t        j                  dd |      \  }}t	        ddgddgd      } ||      } ||      }|D ]j  }t        dd	| d
|j                  |      }	|	j                  |      }
t        dd	| d
|j                  |      }|j                  |      }t        |
|       l y)z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   r   )	r   r   diabetesdatar   r   r!   r"   r   )r#   r*   rngr$   r%   r&   X_train_sparseX_test_sparser'   sparse_classifiersparse_resultsdense_classifierdense_resultss                r(   test_iforest_sparser8   2   s     /
0C&x}}Sb'9LOGV#s4-PQD%g.N$V,M+ 
*<
@F

#n
 	 +22=A + 
*<
@F

#g, 	 )008>=9     c                     t         j                  } d}t        j                  t        |      5  t        d      j                  |        ddd       t        j                         5  t        j                  dt               t        d      j                  |        ddd       t        j                         5  t        j                  dt               t        t        j                  d            j                  |        ddd       t        j                  t              5  t               j                  |       j                  | ddd	df          ddd       y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   vxY w# 1 sw Y   yxY w)
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   Nerrorautor   r   )irisr0   pytestwarnsUserWarningr   r!   warningscatch_warningssimplefilterr   int64raises
ValueErrorr"   )Xwarn_msgs     r(   test_iforest_errorrN   L   s	   		A
 EH	k	2D)--a0 
3		 	 	"g{3F+//2 
# 
	 	 	"g{3BHHQK044Q7 
#
 
z	"a ((1ab52 
#	" 
3	2	"	" 
#	"
 
#	"s0   E&6E*8A	E6"2FE'*E36E?Fc            
         t         j                  } t               j                  |       }|j                  D ]O  }|j
                  t        t        j                  t        j                  | j                  d                     k(  rOJ  y)zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)rB   r0   r   r!   estimators_	max_depthintr   ceillog2shape)rL   clfests      r(   test_recalculate_max_depthrX   b   s]    		A




"C}}BGGBGGAGGAJ,?$@ AAAA r9   c                     t         j                  } t               j                  |       }|j                  | j
                  d   k(  sJ t        d      }d}t        j                  t        |      5  |j                  |        d d d        |j                  | j
                  d   k(  sJ t        d      j                  |       }|j                  d| j
                  d   z  k(  sJ y # 1 sw Y   dxY w)Nr   i  r?   r;   r<   g?)	rB   r0   r   r!   max_samples_rU   rC   rD   rE   )rL   rV   rM   s      r(   test_max_samples_attributer[   j   s    		A




"Cqwwqz)))
c
*CDH	k	2
 
3qwwqz)))
c
*
.
.q
1CsQWWQZ//// 
3	2s   1C&&C/c                    t        |       }t        t        j                  |      \  }}t	        d|       j                  |      }|j                  d       |j                  |      }|j                  d       |j                  |      }t        ||       t	        d|       j                  |      }|j                  |      }t        ||       y)zCheck parallel regression.r-   r   )n_jobsr   r   )r]   r   N)	r   r   r/   r0   r   r!   
set_paramsr"   r   )r#   r1   r$   r%   ensembley1y2y3s           r(    test_iforest_parallel_regressionrc   y   s    
/
0C&x}}3GOGVa6HIMMgVHq!			&	!Bq!			&	!Bb"%a6HIMMgVH			&	!Bb"%r9   c                    t        |       }d|j                  dd      z  }|j                  t        j                  |dz   |dz
  f            }|dd }|j                  ddd	      }t        j                  |dd |f      }t        j                  d
gdz  dgdz  z         }t        d|      j                  |      }|j                  |       }t        ||      dkD  sJ y)z#Test Isolation Forest performs wellg333333?iX  r   Nr>   r   )   r   )lowhighsizer   rf   d   )r   r   g\(\?)r   randnpermutationr   vstackuniformr    r   r!   decision_functionr
   )	r#   r1   rL   r$   
X_outliersr%   y_testrV   y_preds	            r(   test_iforest_performancers      s     /
0CciiQA		1q5!a%.12AhG !(;JYY$%*-.FXXqcCi1#)+,F c
<
@
@
IC ##F++F (4///r9   contamination      ?rA   c           	      N   ddgddgddgddgddgddgddgddgg}t        || 	      }|j                  |       |j                  |       }|j                  |      }t	        j
                  |dd        t	        j                  |d d       kD  sJ t        |d
dgz  ddgz  z          y )Nre   r   r         	   r   rt      )r   r!   ro   r"   r   minmaxr   )rt   r#   rL   rV   decision_funcpreds         r(   test_iforest_worksr      s     bB8b"X1v1v1v1vAwOA '9
WCGGAJ**1--M;;q>D66-$%}Sb/A(BBBBtQ!WqB4x/0r9   c                      t         j                  } t               j                  |       }|j                  |j
                  k(  sJ y N)rB   r0   r   r!   rZ   _max_samples)rL   rV   s     r(   test_max_samples_consistencyr      s8    		A




"Cs/////r9   c                      t        d      } t        t        j                  d d t        j                  d d |       \  }}}}t        d      }|j                  ||       |j                  |       y )Nr   r,   r-   g?)max_features)r   r   r/   r0   targetr   r!   r"   )r1   r$   r%   y_trainrq   rV   s         r(    test_iforest_subsampled_featuresr      sb    
Q
C'7crHOOCR0s($GVWf s
+CGGGWKKr9   c                  p   dt        j                  d      t         j                  z   z  dz
  } dt        j                  d      t         j                  z   z  dz
  }t        t	        dg      dg       t        t	        dg      dg       t        t	        d	g      d
g       t        t	        dg      | g       t        t	        dg      |g       t        t	        t        j
                  g d            dd
| |g       t	        t        j                  d            }t        |t        j                  |             y )N       @g      @g?g     0@g}?r   g        r   r   r        )r   r   r   r   )	r   logeuler_gammar   r	   r    aranger   sort)
result_one
result_twoavg_path_lengths      r(    test_iforest_average_path_lengthr      s     sbnn45GJu67:MMJ(!-u5(!-u5(!-u5(!-
|<(#/*>RXXn56	c:z*
 +299Q<8O(@Ar9   c                     ddgddgddgg} t        d      j                  |       }t               j                  |       }t        |j                  ddgg      |j	                  ddgg      |j
                  z          t        |j                  ddgg      |j	                  ddgg      |j
                  z          t        |j                  ddgg      |j                  ddgg             y )Nr   r   皙?)rt   r   )r   r!   r   score_samplesro   offset_)r$   clf1clf2s      r(   test_score_samplesr      s    1v1v1v&G-11':D  )DS#J<(c
|,t||; S#J<(c
|,t||; S#J<($*<*<sCj\*Jr9   c                  <   t        d      } | j                  dd      }t        dd| d      }|j                  |       |j                  d   }|j                  d       |j                  |       t        |j                        dk(  sJ |j                  d   |u sJ y)	z/Test iterative addition of iTrees to an iForestr      r   r.   T)r   r   r   
warm_start)r   N)r   rk   r   r!   rP   r^   len)r1   rL   rV   tree_1s       r(   test_iforest_warm_startr      s     Q
C		"aA RcdC GGAJ__QFNNN#GGAJs2%%%??1'''r9   z*sklearn.ensemble._iforest.get_chunk_n_rowsreturn_valuer   )side_effectzcontamination, n_predict_calls)ru   r   )rA   r   c                 >    t        ||       | j                  |k(  sJ y r   r   
call_countmocked_get_chunkrt   n_predict_callsr#   s       r(   test_iforest_chunks_works1r     #     }&89&&/999r9   r.   c                 >    t        ||       | j                  |k(  sJ y r   r   r   s       r(   test_iforest_chunks_works2r     r   r9   c                     t        j                  d      } t               }|j                  |        t         j                  j                  d      }t        |j                  |       dk(        sJ t        |j                  |j                  dd            dk(        sJ t        |j                  | dz         dk(        sJ t        |j                  | dz
        dk(        sJ t        j                  |j                  dd      dd      } t               }|j                  |        t        |j                  |       dk(        sJ t        |j                  |j                  dd            dk(        sJ t        |j                  t        j                  d            dk(        sJ |j                  dd      } t               }|j                  |        t        |j                  |       dk(        sJ t        |j                  |j                  dd            dk(        sJ t        |j                  t        j                  d            dk(        sJ y)z=Test whether iforest predicts inliers when using uniform data)rj   r.   r   r   rj   r.   N)
r   onesr   r!   randomRandomStateallr"   rk   repeat)rL   iforestr1   s      r(   test_iforest_with_uniform_datar     s    		AGKKN
))


"Cwq!Q&'''wsyyb12a7888wq1u%*+++wq1u%*+++ 			#))Ar"C+AGKKNwq!Q&'''wsyyb12a7888wrwwy12a7888 			!RAGKKNwq!Q&'''wsyyb12a7888wrwwy12a7888r9   csc_containerc                 p    t        ddd      \  }} | |      }t        ddd      j                  |       y	)
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rj   r   	n_samples
n_featuresr   r.      r   )r   r   r]   N)r   r   r!   )r   rL   _s      r(   *test_iforest_with_n_jobs_does_not_segfaultr   =  s7     CaPDAqaAQ?CCAFr9   c                  t   t        j                  d      } t        j                  j	                  d      }| j                  |j                  d      dg      }t        dd      }t        j                         5  t        j                  dt               |j                  |       d	d	d	       y	# 1 sw Y   y	xY w)
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    pandasr   ry   a)r0   columnsg?r|   r@   N)rC   importorskipr   r   r   	DataFramerk   r   rF   rG   rH   rE   r!   )pdr1   rL   models       r(   #test_iforest_preserve_feature_namesr   H  s     
		X	&B
))


"C
#))A,6A$?E		 	 	"g{3		! 
#	"	"s   9,B..B7c                 ,   t        ddd      \  }} | |      }|j                          d}t        d|d      j                  |      }|j	                  |      }|dk  j                         |j                  d   z  t        j                  |      k(  sJ y)	zCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r,   ry   r   r   r   r   )r   rt   r   N)	r   sort_indicesr   r!   ro   sumrU   rC   approx)r*   rL   r   rt   r   
X_decisions         r(   -test_iforest_sparse_input_float_contaminationr   Z  s     JDAqANNMm!	c!f  **1-JN!AGGAJ.&--2NNNNr9   r   )6__doc__rF   unittest.mockr   r   numpyr   rC   sklearn.datasetsr   r   r   sklearn.ensembler   sklearn.ensemble._iforestr	   sklearn.metricsr
   sklearn.model_selectionr   r   sklearn.utilsr   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   rB   r/   r)   markparametrizer8   rN   rX   r[   rc   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r9   r(   <module>r      s    %   J J , : ) C ,  ? {?  +^n-LM: N:23,B0&(00 4.91 :10B("(. 0+*+ 9I{;ST: U	
: 0,+, 9I{;ST: U	
:9D .9G :G$ +^n-LMO NOr9   