
    {KgF                        d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$ dZ%dZ& G d d      Z' eed      Zd Z(e$ejR                  jU                  ddddidddfdddddddfdddidddfdddddddfdddid d!dfdd"ddd d!dfd#dd#id$d%d&fd'dd'id(d)dfd'd*d+id(d)dfd,dd,id!d-dfd,d*d.id!d-dfd/dd/id0d$dfg      ejR                  jU                  d1d2d3g      ejR                  jU                  d4dd5g      d6                             Z+e$ejR                  jU                  ddddidddfdddddddfdddidddfdddddddfdddid d!dfdd"ddd d!dfd#dd#id$d%d&fd'dd'id(d)dfd'd*d+id(d)dfd,dd,id!d-dfd,d*d.id!d-dfg      ejR                  jU                  d1d2d3g      d7                      Z,e$ejR                  jU                  dg d8      d9               Z-e$ejR                  jU                  d1d2d3g      d:               Z.e$ejR                  jU                  d1d2d3g      d;               Z/e$ejR                  jU                  d1d2d3g      ejR                  jU                  d<d=d=d>gg      d?                      Z0e$ejR                  jU                  dg d@      ejR                  jU                  d1d2d3g      dA                      Z1e$ejR                  jU                  dg dB      ejR                  jU                  d1d2d3g      dC                      Z2e$dD        Z3 ejh                  dEF      dG        Z5 ejh                  dEF      dH        Z6e$ejR                  jU                  dIg dJ      ejR                  jU                  d4dd5g      dK                      Z7ejR                  jU                  dLd1dMidNfdOdMidPfg      dQ        Z8ejR                  jU                  dRddSdTdSdSdTd5d3dTd5dSdTg      dU        Z9ejR                  ju                  dV      ejR                  jU                  dLd1d3idWfdOdidXfd3ddYdXfg      dZ               Z;e$ejR                  ju                  dV      ejR                  jU                  d[d\d]g      d^                      Z<e$d_        Z=ejR                  jU                  d4dd5g      d`        Z>ejR                  jU                  d4dd5g      da        Z?ejR                  jU                  d4dd5g      ejR                  jU                  d1d2d3g      db               Z@ejR                  jU                  d4dd5g      ejR                  jU                  dcdddiddeddfg      dg               ZAejR                  jU                  d4dd5g      ejR                  jU                  dhddd*deieBdifdddjdkgdleBdmfd/d/d5dneBdofdddpddqeBdrfdddpd5dqeBdrfdddsdleCdtfdddsdkgdleCdtfg      ejR                  jU                  d1d2d3g      du                      ZDejR                  jU                  dvdwddxdfeBdyfdwdzd{eBd|fdwdzdxdfeBd}fi eBd~fg      d        ZEejR                  jU                  d4dd5g      d        ZFejR                  jU                  d4dd5g      d        ZGejR                  jU                  d4dd5g      d        ZHd ZIejR                  jU                  d4dd5g      d        ZJejR                  jU                  ddd5g      d        ZKd ZLd ZMejR                  jU                  d4dd5g      d        ZNe$ejR                  jU                  dg d      d               ZOd ZPejR                  jU                  d4dd5g      ejR                  jU                  d1d      d               ZQd ZRd ZSd ZTy)zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_OPENML_PREFIX_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalfails_if_pypyz"sklearn.datasets.tests.data.openmlTc                   8    e Zd Zd Zd	dZd Zd Zd Zd Zd Z	y)
_MockHTTPResponsec                      || _         || _        y N)datais_gzip)selfr   r   s      f/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/datasets/tests/test_openml.py__init__z_MockHTTPResponse.__init__(   s    	    c                 8    | j                   j                  |      S r   )r   read)r   amts     r   r   z_MockHTTPResponse.read,   s    yy~~c""r   c                 8    | j                   j                          y r   )r   closer   s    r   r!   z_MockHTTPResponse.close/   s    		r   c                 &    | j                   rddiS i S )NzContent-Encodinggzipr   r"   s    r   infoz_MockHTTPResponse.info2   s    <<&//	r   c                 ,    t        | j                        S r   )iterr   r"   s    r   __iter__z_MockHTTPResponse.__iter__7   s    DIIr   c                     | S r    r"   s    r   	__enter__z_MockHTTPResponse.__enter__:   s    r   c                      y)NFr+   )r   exc_typeexc_valexc_tbs       r   __exit__z_MockHTTPResponse.__exit__=   s    r   N))
__name__
__module____qualname__r   r   r!   r&   r)   r,   r1   r+   r   r   r   r   '   s%    #
r   r   )	data_homec                 <  	
 dddddt         j                  t        dz   d| z   
fd
fd			fd
	fd	fd
fdfd}t        r,| j	                  t
        j                  j                  d|       y y )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/zhttps://api.openml.org/data/v1/z-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    t        j                  dd| t        d      d        |z   z   }|j                  dd      j                  dd      j                  dd	      j                  d
d      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      S )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutputpath_suffixs      r   
_file_namez4_monkey_patch_webbased_functions.<locals>._file_nameV   s    FF5#s3'@#A#CDE 	 NN,f5W*F3W+V4W\5)W\5)W[%(WXt$W_e,WY%W^W-WY'	
r   c                 z   | j                  |      sJ  	| |      }t        j                  
      |z  }|j                  d      5 }|r0r.t	        |j                               }t        |d      cd d d        S  |d      }t	        |j                               }t        |d      cd d d        S # 1 sw Y   y xY w)NrbTF)
startswithr   filesopenr   r   r   )r@   has_gzip_headerexpected_prefixrA   data_file_namedata_file_pathffpdecompressed_frD   data_modulegzip_responseread_fns            r   _mock_urlopen_sharedz>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedl   s    ~~o...#C0"5F  &!=QVVX&(T2 '&
 ")D!1^0023(U3 '&&s   )B1:-B11B:c                      | |d      S N.jsonr@   rJ   rK   rA   r+   )r@   rJ   rT   url_prefix_data_descriptions     r   _mock_urlopen_data_descriptionzH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_description{   s    #+7	
 	
r   c                      | |d      S rV   r+   )r@   rJ   rT   url_prefix_data_featuress     r   _mock_urlopen_data_featureszE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_features       #+4	
 	
r   c                      | |d      S )Nz.arffrX   r+   )r@   rJ   rT   url_prefix_download_datas     r   _mock_urlopen_download_datazE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_data   r^   r   c                 t   | j                        sJ  	| d      }t        j                  
      |z  }|j                  d      5 } |d      }|j	                         j                  d      }t        j                  |      }d d d        dv rt        d ddd t                     |j                  d      5 }|r.t        |j	                               }t        |d      cd d d        S  |d      }t        |j	                               }t        |d	      cd d d        S # 1 sw Y   xY w# 1 sw Y   y xY w)
NrW   rF   zutf-8error  Simulated mock errorr@   codemsghdrsrO   TF)rG   r   rH   rI   r   decodejsonloadsr   r   r   )r@   rJ   rL   rM   rN   rP   	decoded_s	json_datarO   rD   rQ   rS   url_prefix_data_lists            r   _mock_urlopen_data_listzA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_list   s    ~~2333#C1"5F   &!$Q-N&++-44W=I

9-I ' is(>Tgi    &!QVVX&(T2 '&
 ")D!1^0023(U3 '& '& '&s   >D":'D.+-D."D+.D7c                 8   | j                         }| j                  d      dk(  }|j                        r	 ||      S |j                  
      r	 ||      S |j                        r	 ||      S |j                  	      r	 ||      S t        d|z        )NzAccept-encodingr$   zUnknown mocking URL pattern: %s)get_full_url
get_headerrG   
ValueError)requestargskwargsr@   rJ   rZ   r]   rp   ra   rY   r\   ro   r`   s        r   _mock_urlopenz7_monkey_patch_webbased_functions.<locals>._mock_urlopen   s    ""$!,,->?6I>>./*3@@^^45.sODD^^45.sODD^^781#GG>DEEr   urlopen)r$   rI   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrR   rx   rD   rZ   r]   rp   ra   rT   rQ   rC   rS   rY   r\   ro   r`   s     ` @@@@@@@@@@@@@r    _monkey_patch_webbased_functionsr   H   s     #MR@JKiiG)C/Cy/AK
,4


42F F ((00)]K r   z9data_id, dataset_params, n_samples, n_features, n_targets=   r            iris)nameversion      &   anneal1        cpu鍞     H      _  
      r   zadult-census  M   MiceProtein  i  parser	liac-arffpandasrR   Fc                    t        j                  d      }t        | ||       t        d	dd|d|}	t	        |	j
                  d         |k(  sJ t        |	t              sJ t        |	j                  |j                        sJ |	j                  j                  |||z   fk(  sJ t        |	j                  |j                        sJ |	j                  j                  ||fk(  sJ |dk(  r>t        |	j                  |j                        sJ |	j                  j                  |fk(  sAJ t        |	j                  |j                        sJ |	j                  j                  ||fk(  sJ |	j                  J y)
zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rR   TFas_framecacher   idr   Nr+   )pytestimportorskipr   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchr   dataset_params	n_samples
n_features	n_targetsr   rR   pdbunchs
             r   test_fetch_openml_as_frame_truer      s`   R 
		X	&B$['W  	E u}}T"#w...eU###ekk2<<000;;J,B CCCCejj",,///::	:6666A~%,,		222||!!i\111%,,555||!!i%;;;;###r   c                 h   t        j                  d       t        | |d       t        d	dd|d|}t	        |j
                  d         |k(  sJ t        |t              sJ |j                  J t        |j                  t        j                        sJ |j                  j                  ||fk(  sJ t        |j                  t        j                        sJ |dk(  r|j                  j                  |fk(  sJ |j                  j                  ||fk(  sJ t        |j                  t              sJ y)
znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   Nr   r+   )r   r   r   r   r   r   r   r   r   r   npndarrayr   r   r   dict)r   r   r   r   r   r   r   r   s           r    test_fetch_openml_as_frame_falser     s    J !$['N  	E u}}T"#w...eU###;;ejj"**---::	:6666ellBJJ///A~||!!i\111||!!i%;;;;e&&---r   )r   r   r   c                   
 t        j                  d      t        | |d       t        |ddd      }t        |ddd      }|j                  |j                  c}

fd}|j                  |      }j                  j                  |
       |j                  |j                  c}j                  j                  |j                     
       fd}|j                  |      }	j                  j                  |	       y	)
z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   r   r   r   r   c                     | j                      }j                  j                  j                  |      r| j	                  |j
                        S | S r   )r   apitypesis_numeric_dtypeastypedtype)seriespandas_seriesdata_pandasr   s     r   convert_numerical_dtypeszFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypesg  sA    #FKK066<<((7==!4!455Mr   c                 D   | j                      }j                  j                  j                  |      r| j	                  |j
                        S t        |j
                  j                        r/| j                  j                  |j                  j                        S | S r   )r   r   r   r   r   r   r   CategoricalDtypecatrename_categoriesr   )r   r   frame_pandasr   s     r   (convert_numerical_and_categorical_dtypeszVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypes{  sz    $V[[166<<((7==!4!455++R-@-@A :://0A0A0L0LMMMr   N)
r   r   r   r   r   applytestingassert_frame_equalr   feature_names)r   r   
bunch_liacbunch_pandas	data_liacr   data_liac_with_fixed_dtypes
frame_liacr   frame_liac_with_fixed_dtypesr   r   r   s             @@@r   $test_fetch_openml_consistency_parserr   N  s     
		X	&B$['N	J  	L (__l.?.?I{ #,//2J"KJJ!!"={K  *//1C1CJ JJ!!,|/I/I"JKX $.#3#30$  JJ!!">Mr   c                 
   t        j                  d       d}t        | |d       t        |dd|      }t        |dd|      }t	        |j
                  |j
                         t        |j                  |j                         y)z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   r   Tr   Fr   N)r   r   r   r   r   r   r   r   )r   r   r   bunch_as_frame_truebunch_as_frame_falses        r   -test_fetch_openml_equivalence_array_dataframer     s     !G$['N&	 (	 (--/B/G/GH+224G4N4NOr   c                 ,   t        j                  d      }|j                  j                  j                  }d}d}d}d} |g d      }t
        j                  gdz  }	g d}
d	}t        | |d
       t        |d
d|      }|j                  }|j                  }|j                  }t        ||j                        sJ t        j                  |j                  |	k(        sJ |j                   |k(  sJ t        j                  |j"                  |
k(        sJ t        j                  |j$                  |
k(        sJ |j&                  |gk(  sJ t        ||j(                        sJ |j*                  |k(  sJ |j                   |k(  sJ |j,                  |k(  sJ |j.                  j0                  sJ t        ||j                        sJ |j                   |k(  sJ t        j                  |j                  |	|gz   k(        sJ |j.                  j0                  sJ y)z>Check fetching on a numerical only dataset with string labels.r   r   r   r   )r   )r      )zIris-setosazIris-versicolorzIris-virginicar   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   float64r   r   r   r   r   r   r   alldtypesr   columnsr   target_namesr   r   r   index	is_unique)r   r   r   r   r   
data_shapetarget_shapeframe_shapetarget_dtypedata_dtypes
data_namestarget_namer   r   r   r   s                   r   test_fetch_openml_iris_pandasr     s    
		X	&Bvv||44GJLK#<L ::,"KKJK$['4@	E ::D\\FKKEdBLL)))66$++,---::###66$,,*,---66%%%3444+...fbii(((<<<'''<<<''';;+%%%<<!!!!eR\\***;;+%%%66%,,+">>???;;    r   target_columnr   r   c                 ,   t        j                  d      }d}t        | |d       t        |dd||      }t        |dd|      }|j                  j                  |j                  |j                         t        |t              r[|j                  j                  |j                  j                  |j                  |             |j                  j                  dk(  sJ y	|j                  j                  |k(  sJ |j                  j                  dk(  sJ y	)
z@Check that we can force the target to not be the default target.r   r   TF)r   r   r   r   r   r   )r      r   N)r   r   r   r   r   r   r   r   listassert_index_equalr   r   Indexr   r   r   )r   r   r   r   r   bunch_forcing_targetbunch_defaults          r   !test_fetch_openml_forcing_targetsr     s    
 
		X	&BG$['4@'# !	M JJ!!"6"<"<m>Q>QR-&


%% ''//-1H	
 $((..(:::#**//=@@@#((..(:::r   )r   r   r   r   r   c                    t        j                  d      }t        | |d       t        |ddd|      }t        |ddd|      \  }}|j                  j                  |j                  |       t        ||j                        r'|j                  j                  |j                  |       y|j                  j                  |j                  |       y)z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Fr   r   r   
return_X_yr   N)r   r   r   r   r   r   r   r   r   assert_series_equalr   )r   r   r   r   r   Xys          r   .test_fetch_openml_equivalence_frame_return_X_yr    s    
 
		X	&B$['NE DAq JJ!!%**a0!RYY


&&u||Q7


%%ellA6r   )r   r   r   r   c                     t        j                  d       t        | |d       t        |ddd|      }t        |ddd|      \  }}t	        |j
                  |       t	        |j                  |       y)z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr   N)r   r   r   r   r   r   r   )r   r   r   r   r  r  s         r   .test_fetch_openml_equivalence_array_return_X_yr  &  st    
 !$['NE DAq uzz1%u||Q'r   c                    t        j                  d       d}t        | |d       d}t        ||dd      }t        ||dd      }|j                  j
                  j                  dk(  sJ |j                  j
                  d	k(  sJ y
)z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   rN   ON)r   r   r   r   r   r   kind)r   r   r   bunch_liac_arffr   s        r   $test_fetch_openml_difference_parsersr  C  s     !G$['N H"	O  	L %%**c111""c)))r   module)scopec                  2    g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typesteelcarbonhardnesstemper_rolling	conditionformabilitystrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   thickwidthr>   oilborepackingr   )vendorMYCTMMINMMAXCACHCHMINCHMAXr   )N Mean_Acc1298_Mean_Mem40_CentroidMean_Acc1298_Mean_Mem40_RolloffMean_Acc1298_Mean_Mem40_FluxMean_Acc1298_Mean_Mem40_MFCC_0Mean_Acc1298_Mean_Mem40_MFCC_1Mean_Acc1298_Mean_Mem40_MFCC_2Mean_Acc1298_Mean_Mem40_MFCC_3Mean_Acc1298_Mean_Mem40_MFCC_4Mean_Acc1298_Mean_Mem40_MFCC_5Mean_Acc1298_Mean_Mem40_MFCC_6Mean_Acc1298_Mean_Mem40_MFCC_7Mean_Acc1298_Mean_Mem40_MFCC_8Mean_Acc1298_Mean_Mem40_MFCC_9Mean_Acc1298_Mean_Mem40_MFCC_10Mean_Acc1298_Mean_Mem40_MFCC_11Mean_Acc1298_Mean_Mem40_MFCC_12Mean_Acc1298_Std_Mem40_CentroidMean_Acc1298_Std_Mem40_RolloffMean_Acc1298_Std_Mem40_FluxMean_Acc1298_Std_Mem40_MFCC_0Mean_Acc1298_Std_Mem40_MFCC_1Mean_Acc1298_Std_Mem40_MFCC_2Mean_Acc1298_Std_Mem40_MFCC_3Mean_Acc1298_Std_Mem40_MFCC_4Mean_Acc1298_Std_Mem40_MFCC_5Mean_Acc1298_Std_Mem40_MFCC_6Mean_Acc1298_Std_Mem40_MFCC_7Mean_Acc1298_Std_Mem40_MFCC_8Mean_Acc1298_Std_Mem40_MFCC_9Mean_Acc1298_Std_Mem40_MFCC_10Mean_Acc1298_Std_Mem40_MFCC_11Mean_Acc1298_Std_Mem40_MFCC_12Std_Acc1298_Mean_Mem40_CentroidStd_Acc1298_Mean_Mem40_RolloffStd_Acc1298_Mean_Mem40_FluxStd_Acc1298_Mean_Mem40_MFCC_0Std_Acc1298_Mean_Mem40_MFCC_1Std_Acc1298_Mean_Mem40_MFCC_2Std_Acc1298_Mean_Mem40_MFCC_3Std_Acc1298_Mean_Mem40_MFCC_4Std_Acc1298_Mean_Mem40_MFCC_5Std_Acc1298_Mean_Mem40_MFCC_6Std_Acc1298_Mean_Mem40_MFCC_7Std_Acc1298_Mean_Mem40_MFCC_8Std_Acc1298_Mean_Mem40_MFCC_9Std_Acc1298_Mean_Mem40_MFCC_10Std_Acc1298_Mean_Mem40_MFCC_11Std_Acc1298_Mean_Mem40_MFCC_12Std_Acc1298_Std_Mem40_CentroidStd_Acc1298_Std_Mem40_RolloffStd_Acc1298_Std_Mem40_FluxStd_Acc1298_Std_Mem40_MFCC_0Std_Acc1298_Std_Mem40_MFCC_1Std_Acc1298_Std_Mem40_MFCC_2Std_Acc1298_Std_Mem40_MFCC_3Std_Acc1298_Std_Mem40_MFCC_4Std_Acc1298_Std_Mem40_MFCC_5Std_Acc1298_Std_Mem40_MFCC_6Std_Acc1298_Std_Mem40_MFCC_7Std_Acc1298_Std_Mem40_MFCC_8Std_Acc1298_Std_Mem40_MFCC_9Std_Acc1298_Std_Mem40_MFCC_10Std_Acc1298_Std_Mem40_MFCC_11Std_Acc1298_Std_Mem40_MFCC_12BH_LowPeakAmpBH_LowPeakBPMBH_HighPeakAmpBH_HighPeakBPMBH_HighLowRatioBHSUM1BHSUM2BHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)age	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NDYRK1A_NITSN1_NBDNF_NNR1_NNR2A_NpAKT_NpBRAF_N	pCAMKII_NpCREB_NpELK_NpERK_NpJNK_NPKCA_NpMEK_NpNR1_NpNR2A_NpNR2B_NpPKCAB_NpRSK_NAKT_NBRAF_NCAMKII_NCREB_NELK_NERK_NGSK3B_NJNK_NMEK_NTRKA_NRSK_NAPP_N
Bcatenin_NSOD1_NMTOR_NP38_NpMTOR_NDSCR1_NAMPKA_NNR2B_NpNUMB_NRAPTOR_NTIAM1_NpP70S6_NNUMB_NP70S6_NpGSK3B_NpPKCG_NCDK5_NS6_NADARB1_NAcetylH3K9_NRRP1_NBAX_NARC_NERBB4_NnNOS_NTau_NGFAP_NGluR3_NGluR4_NIL1B_NP3525_NpCASP9_NPSD95_NSNCA_NUbiquitin_NpGSK3B_Tyr216_NSHH_NBAD_NBCL2_NpS6_NpCFOS_NSYP_N	H3AcK18_NEGR1_NH3MeK4_NCaNA_Nr   )pclasssurvivedr   sexr  sibspparchticketfarecabinembarkedboatbody	home.destr   r   r   r   r   r   r   r+   r+   r   r   datasets_column_namesr  c  s9     P(
R SO
`
"O
`
{m mr   c                      i i ddddddddd	d
dddddddd
dddddddddddddd
ddddddddddd
dd
i i i ddiddddddd d!d"S )#Nr  r   r  	   r  r   r  r   r  r   r  r  r  r  r  r     r  r   r!  r"  r#  r$  )
r%  r&  r'  r(  r)  r*  r+  r,  r/  r1  r  r   i  r   i  i7  i  i4  )r  r  r  r  r  r  r  r  r+   r+   r   r   datasets_missing_valuesr  V  s0    
b
a
 
 1	

 "
 b
 R
 "
 "
 "
 q
 !
 
 R
 B
  R!
" R#
$ ,.7
: !}
G, ,r   zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))r   r   r   r   r   )r   r   r   r   r   )r   r   !   r   r   )r   r   r  r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   E   r   )r   r   r  r   r   )r   r   r  r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   c	           	      8   t        j                  d      }	|	j                  j                  j                  }
t        | ||       t        |dd|      }|j                  }t        |j                  D cg c]  }t        ||
      s| c}      }t        |j                  D cg c]  }|j                  dk(  s| c}      }t        |j                  D cg c]  }|j                  dk(  s| c}      }||k(  sJ ||k(  sJ ||k(  sJ |j                  j                         ||   k(  sJ |j                         j                         j!                         }|j#                         D ]!  \  }}||   j%                  |d      }||k(  r!J  y	c c}w c c}w c c}w )
zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   rN   ir   N)r   r   r   r   r   r   r   r   r>   r   r   r
  r   tolistisnasumto_dictitemsget)r   r   r   expected_n_categoriesexpected_n_floatsexpected_n_intsrR   r  r  r   r   r   r   r   n_categoriesn_floatsn_intsframe_feature_to_n_nanr   	n_missingexpected_missings                        r   !test_fetch_openml_types_inferencer    s   R 
		X	&Bvv||44$['W	E KKE!LLPL5Ju>N,OLPL u||I|euzzS7HE|IJHU\\G\EUZZ35F%\GHF0000((((_$$$==!%:7%CCCC"ZZ\--/7791779i27;??aH,,,, : 	QIGs$   2FF"F7FF*Fzparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                     d}t        | |d       t        j                  t        |      5  t	        dd|i| d d d        y # 1 sw Y   y xY w)Nr   Tmatchr   r+   )r   r   raisesrt   r   r   paramserr_msgr   s       r   &test_fetch_openml_validation_parameterr    s?     G$['4@	z	1/W// 
2	1	1s   AAr  auto)r   r   c                     d}	 t        d       t        d      # t        $ rP t        | |d       d}t	        j
                  t        |      5  t        d	d|i| ddd       Y y# 1 sw Y   Y yxY ww xY w)
z=Check that we raise the proper errors when we require pandas.r   !test_fetch_openml_requires_pandasz.This test requires pandas to not be installed.Tz:requires pandas to be installed. Alternatively, explicitlyr  r   Nr+   )r   r   ImportErrorr   r   r  r   )r   r  r   r  s       r   'test_fetch_openml_requires_pandas_errorr    sr     GI@A GHH  4(gtDN]];g633F3 7664s'    3A3A%A3%A/	*A3/A3z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                     t        j                  d       d}t        | |d       t        j                  t        |      5  t        d|dd| ddd       y# 1 sw Y   yxY w)	ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)r   r   Nr+   )r   r   r   r  rt   r   r  s       r   #test_fetch_openml_sparse_arff_errorr     sZ    , !G$['4@	z	1 	
	
 	
 
2	1	1s    AA!zdata_id, data_type)r   	dataframe)r  sparsec                     t        j                  d      }t        | |d       t        |dd      }|dk(  r|j                  nt
        j                  j                  }t        |j                  |      sJ y)z&Check the auto mode of `fetch_openml`.r   Tr  F)r   r   r   r  N)
r   r   r   r   r   scipyr  
csr_matrixr   r   )r   r   	data_typer   r   klasss         r   test_fetch_openml_auto_moder  $  s^     
		X	&B$['4@&FD%4BLL%,,:Q:QEdii'''r   c                    t        j                  d       d}t        | |d       d}t        j                  t        |      5  t        d      5  t        |ddd	
       ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)working_memoryFr   r   N)r   r   r   warnsUserWarningr   r   )r   r   rh   s      r   :test_convert_arff_data_dataframe_warning_low_memory_pandasr  9  so     !G$['4@
6C	k	-40"	 1 
.	-00 
.	-s$   A<A0A<0A9	5A<<Bc                     d}d}t        | ||       t        j                  d      }t        j                  t
        |      5  t        |ddd       ddd       y# 1 sw Y   yxY w)	z\Check that a warning is raised when multiple versions exist and no version is
    requested.r   r   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )r   r   r   r   N)r   r<   escaper   r  r  r   )r   rR   r   	data_namerh   s        r   ,test_fetch_openml_iris_warn_multiple_versionr  M  s^     GI$['=I
))	BC 
k	-		
 
.	-	-s   AA$c                     d}d}d}d}t        | ||       t        ||ddd      }|j                  j                  ||fk(  sJ |j                  J y)z/Check that we can get a dataset without target.r   Nr   r   Fr   r   r   r   r   r   )r   r   r   r   r   )r   rR   r   r   expected_observationsexpected_featuresr   s          r   test_fetch_openml_no_targetr  h  sn     GM$['=I#D 99??46GHHHH;;r   c                 (   t        j                  d       d}t        | ||       t        |dd|      }|j                  j
                  d   }|j                  d   j                         j                         sJ t        |j                  g d       y	)
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTr   r   r   r   r  )FEMALEMALE_N)
r   r   r   r   r   r   r  anyr   r   )r   rR   r   r   penguins	cat_dtypes         r   test_missing_values_pandasr%  |  s    
 !G$['W	H $$U+I==$$&**,,,y++-DEr   r     glass2)r   r   r   c                    d}t        | ||       d}t        j                  t        |      5  t	        ddddd|}ddd       j
                  j                  dk(  sJ |j                  d	   d
k(  sJ y# 1 sw Y   9xY w)z;Check that we raise a warning when the dataset is inactive.r&  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r  r   40675r+   )r   r   r  r  r   r   r   r   )r   rR   r   r   rh   r'  s         r   test_fetch_openml_inactiver+    s     G$['=I
4C	k	- 
%
?M
 
. ;;(((>>$7*** 
.	-s   A55A>z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )r   r   z1Can only handle homogeneous multi-target datasets)r   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer  )r   r   r   zTarget column 'family'	undefinedz(Could not find target_column='undefined'c                     t        | ||       |j                  dd      s|dk(  rt        j                  d       t        j                  ||      5  t        dd|d| d d d        y # 1 sw Y   y xY w)Nr   Tr   r  F)r   r   r+   )r   r  r   r   r  r   )r   rR   r   r  err_typer  r   s          r   test_fetch_openml_errorr/    s`    d %['=Izz*d#v'9H%	xw	/:5:6: 
0	/	/s   A))A2zparams, err_type, err_msgr2   r   zCThe 'version' parameter of fetch_openml must be an int in the rangenAmE)r   r   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 r    t        j                  ||      5  t        di |  d d d        y # 1 sw Y   y xY w)Nr  r+   )r   r  r   )r  r.  r  s      r   )test_fetch_openml_raises_illegal_argumentr2    s)    4 
xw	/v 
0	/	/s   -6c                    d}d}d}t        | ||       d}|j                  |      }t        j                  t        |      5  t        ||ddd       d d d        d	}|j                  |      }t        j                  t        |      5  t        ||ddd       d d d        d}|j                  |      }t        j                  t        |      5  t        ||d
gddd       d d d        d	}|j                  |      }t        j                  t        |      5  t        ||d
gddd       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   y xY w)Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.MouseIDr  Fr   r  Genotyper   )r   formatr   r  r  r   )r   rR   r   expected_row_id_msgexpected_ignore_msg
target_colrh   s          r   test_warn_ignore_attributer:    sR   GJB$['=IJ

$
$Z
0C	k	-$	
 
. J

$
$Z
0C	k	-$	
 
. J

$
$Z
0C	k	-%w/	
 
. J

$
$Z
0C	k	-%w/	
 
.	-? 
.	- 
.	- 
.	- 
.	-s0   D5	EEE5D>E
EE"c                     d}t        | ||       d}t        j                  t        |      5  t	        |ddd       d d d        y # 1 sw Y   y xY w)Nr   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r  r   r   r  r  r   r   rR   r   rh   s       r   test_dataset_with_openml_errorr>  0  s@    G$['=I
VC	k	-WEE+V 
.	-	-   AAc                     d}t        | ||       d}t        j                  t        |      5  t	        |ddd       d d d        y # 1 sw Y   y xY w)Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r  r<  r=  s       r    test_dataset_with_openml_warningrA  9  s@    G$['=I
RC	k	-WEE+V 
.	-	-r?  c                 h   t        j                  d       d}t        | |d       |dddd}t        di |}t        di |dddii}t	        d	 |j
                  d
   j                  j                  D              sJ t        d |j
                  d
   j                  j                  D              rJ y)zACheck that we can overwrite the default parameters of `read_csv`.r   6  Fr   rR   Tr   read_csv_kwargsskipinitialspacec              3   >   K   | ]  }|j                  d         yw NrG   .0r   s     r   	<genexpr>zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>U  s      'Vs'V   r   c              3   >   K   | ]  }|j                  d         ywrH  rJ  rK  s     r   rM  zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>X  s!      EC 	sErN  Nr+   )	r   r   r   r   r   r   r   r   r"  )r   r   common_paramsadult_without_spacesadult_with_spacess        r   3test_fetch_openml_overwrite_default_params_read_csvrS  B  s    
!G$['QVW 	M (8-8$ 
*<e)D  '8'>'>w'G'K'K'V'V     '--g6::EE    r   c                    d}t        | ||       t        j                  j                  j                  j                  |      }t        |j                  d            }t        ||      }t        ||      }t        j                  j                  |      sJ t        ||      }|j                         |j                         k(  sJ y )Nr   scikit_learn_data)r   r}   r~   r   
_DATA_FILEr6  strmkdirr   r
   ospathisfiler   )	r   rR   tmpdirr   openml_pathcache_directory	response1location	response2s	            r   test_open_openml_url_cacherb  b  s    G$['=I""**55<<WEK&,,':;<O o>I{O<H77>>(### o>I>>y~~////r   write_to_diskc                    d}t         j                  j                  j                  j	                  |      }t        |j                  d            }t        ||      fd}| j                  t         j                  j                  d|       t        j                  t        d      5  t        ||       d d d        t        j                  j                        rJ y # 1 sw Y   +xY w)Nr   rU  c                     r1t        d      5 }|j                  d       d d d        t        d      t        d      # 1 sw Y   t        d      xY w)Nw Invalid request)rI   writert   )ru   rv   rw   rN   r`  rc  s       r   rx   z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenz  sK    h$ %*++j*++ %*++s   A  Ary   rh  r  )r}   r~   r   rV  r6  rW  rX  r
   r|   r   r  rt   r   rY  rZ  exists)r   r\  rc  r   r]  r^  rx   r`  s     `    @r   'test_open_openml_url_unlinks_local_pathrk  s  s    G""**55<<WEK&,,':;<O{O<H, ((00)]K	z):	;o6 
< ww~~h'''' 
<	;s   *C!!C*c                 ,   d}t         j                  j                  j                  j	                  |      }t        | j                  d            }t        ||      t        j                  t        j                  j                               t        d      5 }|j                  d       d d d        t        ||      fd       }d}t        j                   t"        |      5   |       }d d d        dk(  sJ y # 1 sw Y   RxY w# 1 sw Y   xY w)	Nr   rU  rf  rg  c                  Z    t         j                  j                         rt        d      y)NzFile exist!r   )rY  rZ  rj  	Exception)r`  s   r   
_load_dataz/test_retry_with_clean_cache.<locals>._load_data  s#     77>>(#M**r   z!Invalid cache, redownloading filer  r   )r}   r~   r   rV  r6  rW  rX  r
   rY  makedirsrZ  dirnamerI   ri  r   r   r  RuntimeWarning)	r\  r   r]  r^  rN   ro  warn_msgresultr`  s	           @r   test_retry_with_clean_cacheru    s    G""**55<<WEK&,,':;<O{O<HKK)*	h		 
 [/: ; 3H	nH	5 
6Q;; 
	 
6	5s   C>&D
>D
Dc                 8   d}t         j                  j                  j                  j	                  |      }t        | j                  d            }t        ||      d        }d}t        j                  t        |      5   |        d d d        y # 1 sw Y   y xY w)Nr   rU  c                  2    t        d ddd t                     )Nrd   re   rf   r   r   r+   r   r   ro  z:test_retry_with_clean_cache_http_error.<locals>._load_data  s    3$:')
 	
r   re   r  )r}   r~   r   rV  r6  rW  rX  r   r   r  r   )r\  r   r]  r^  ro  	error_msgs         r   &test_retry_with_clean_cache_http_errorrz    s|    G""**55<<WEK&,,':;<O[/:
 ;

 'I	y		2 
3	2	2s   ?BBc                    d }d}t        |j                  d            }t        | ||       t        |d|ddd      \  }}| j	                  t
        j                  j                  d|       t        |d|ddd      \  }}	t        j                  j                  ||       t        j                  j                  ||	       y )	Nc                 :    t        d| j                         z        )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)rt   rr   ru   rv   rw   s      r   _mock_urlopen_raisez4test_fetch_openml_cache.<locals>._mock_urlopen_raise  s%     ")"6"6"89
 	
r   r   rU  TFr   )r   r   r6   r  r   r   ry   )rW  rX  r   r   r|   r}   r~   r   r   r   r   )
r   rR   r\  r~  r   r^  	X_fetched	y_fetchedX_cachedy_cacheds
             r   test_fetch_openml_cacher    s    
 G&,,':;<O$['=I'!Iy ((00)=PQ%!Hh JJ!!)X6JJ!!)X6r   zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                 p   |s|dk(  rt        j                  d       d}t        | |d       t        dz   d| z   }d}t	        j
                  |      |z  }|dz  |j                  d      5 }	t        j                  |	d      }
t        |
j                               }d	|t        |      d
z
  <   ddd       t        j                  d      5 }|j                         ddd       t        j                  j                  j                   fd}| j#                  t        j                  j                  d|       t        j$                  t&              5 }t        j                  j)                  |d||       ddd       j+                  d      sJ y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   5xY w)z/Check that the checksum is working as expected.r   r   Tr8   r9   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffrF   %   r   Nwbc                     | j                         }|j                  d      r;t        d      5 }|j                         }d d d        t	        t              d      S  |       S # 1 sw Y   'xY w)Nzdata/v1/download/1666876rF   Tr%   )rr   endswithrI   r   r   r   )ru   rv   rw   r@   rN   corrupted_datacorrupt_copy_pathmocked_openml_urls         r   swap_file_mockz9test_fetch_openml_verify_checksum.<locals>.swap_file_mock  sa    ""$<<23'.!!" /$W^%<dKK$W--	 /.s   A&&A/ry   Fr  1666876)r   r   r   rz   r   rH   rI   r$   	bytearrayr   r>   GzipFileri  r}   r~   r   ry   r|   r  rt   r   r  )r   r   r   r\  r   r   original_data_moduleoriginal_data_file_nameoriginal_data_path	orig_file	orig_gzipr   modified_gzipr  excr  r  s                  @@r   !test_fetch_openml_verify_checksumr    s    6X%H%G$['4@ 3S8S	?J:")=>AXX!==		 	 	&)IIi.	)* SY] 
'
 
($	/=D! 
0  ((0088. ((00)^L 
z	"c%%58F 	& 	
 
#
 99Y= 
'	&
 
0	/( 
#	"s%   -AFF $F,F F),F5c                    d }| j                  t        j                  j                  d|       d}t	        j
                  t        t        j                  dt        |z    d            5 }t	        j                  t        d      5  t        |d d	       d d d        t        |      d
k(  sJ 	 d d d        y # 1 sw Y   #xY w# 1 sw Y   y xY w)Nc                 2    t        d ddd t                     )Ni  Simulated network errorrf   rx  r}  s      r   _mock_urlopen_network_errorzPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_error  s    3$=DWY
 	
r   ry   zinvalid-urlz+A network error occurred while downloading z. Retrying...r  r  r   )delayr   )r|   r}   r~   r   r   r  r  r<   r  r	   r  r   r   r>   )r   r  invalid_openml_urlrecords       r   +test_open_openml_url_retry_on_network_errorr    s    

   )-H '	ii!334MC

 
]]9,EF/Q? G6{a
 
 GF
 
s$   *CB7C7C 	<CC)r   r   c                     |dk(  rt        j                  d       d}t        | ||       t        j                  j                  |dd|      }|J |d   j                  dk(  sJ d|d	   vsJ y)
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr  Nr   )e      animalr   )r   r   r   r}   r~   r   r   )r   rR   r   r   datasets        r   &test_fetch_openml_with_ignored_featurer  (  s     H%G$['=I++uuV , G  6?  I---7?3333r   c                 F   t        j                  d      }d}t        | |d       dd|d}t        dddi|}t        dddi|}|j                  j                  |j                  |j                         |j                  j                  j                  d	      j                         rJ |j                  j                  j                  d	      j                         rJ t        ddd
d|}t        ddd
d|}|j                  j                  |j                  d   |j                  d          |j                  d   j                  j                  d	      j                         rJ |j                  d   j                  j                  d	      j                         rJ y)zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   FrD  Tr   r   r   r   r   'r  )r   r   r   Nr+   )r   r   r   r   r   r  r   rW  rG   r"  r  r   )r   r   r   rP  mice_pandasmice_liac_arffs         r   test_fetch_openml_strip_quotesr  >  s    
		X	&BG$['QVW!%'JM@h@-@K!FFFNJJ"";#5#5~7L7LM!!%%00599;;;!!%%..s377999 XhhX-XK! (6CN JJ""'"N$8$8$A   )--88=AACCC  )--66s;??AAAAr   c                     t        j                  d      }d}t        | |d       dd|d}t        dddi|}t        dddi|}|j                  j                  |j                  d	   |j                  d	          y
)zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   rC  FrD  Tr  r   r   r   Nr+   )r   r   r   r   r   r  r   r   r   r   rP  adult_pandasadult_liac_arffs         r   $test_fetch_openml_leading_whitespacer  [  s     
		X	&BG$['QVW!%'JMAxA=AL"G+GGOJJ""7#_%:%:7%Cr   c                     t        j                  d      }d}t        | |d       dd|d}t        d
ddi|}t        d
ddi|}|j                  j                  |j                  |j                         y	)zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  FrD  Tr  r   r   Nr+   )r   r   r   r   r   r   r   r  s         r   &test_fetch_openml_quotechar_escapecharr  m  sv     
		X	&BG$['QVW!%'JMAxA=AL"G+GGOJJ!!,"4"4o6K6KLr   )U__doc__r$   rk   rY  r<   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   scipy.sparser
  r}   r   sklearn.datasetsr   fetch_openml_origsklearn.datasets._openmlr	   r
   r   r   sklearn.utilsr   $sklearn.utils._optional_dependenciesr   sklearn.utils._testingr   r   r   r   rz   r{   r   r   markparametrizer   r   r   r   r   r   r  r  r  fixturer  r  r  r  r  filterwarningsr  r  r  r  r  r%  r+  rt   KeyErrorr/  r2  r:  r>  rA  rS  rb  rk  ru  rz  r  r  r  r  r  r  r  r+   r   r   <module>r     s
      	 	    "     " >    E  ?  < (D9tLz ? 
i_c1a(	f+S!Q7	
YNBA&	
X!,b"a8	y#Q*	u+S!Q7	E"BA.		4 "b!,	'R3	E"Ar1-	'B2	E"D"a0'0 K#:;4-8($ 9 <1 6($Z ? 
i_c1a(	f+S!Q7	
YNBA&	
X!,b"a8	y#Q*	u+S!Q7	E"BA.		4 "b!,	'R3	E"Ar1-	'B2#, K#:;$. <- 0$.R $56<N 7 <NB K#:;P < P8 K#:;,! < ,!b K#:;<,9V*WX; Y < ;B $=>K#:;7 < ? 7: $:;K#:;( < < (4 * *> ho  od h-  -d P4 4-8)- 95 8)-`  y!H	

 #J	
	00 V,v.h/f-	II PQ x H	

 G	

  T2G	
"
# R$
$ PQ( R (  & 4-8
 9
4 4-8 9& 4-8K#:;F < 9F( 4-8	E(q9
+ 9
+ 4-8(	"J0PQlG-DE?		
 51!	
 H$G$		
 H%H$		
 [96		
 k7-CD6		
G),Z K#:;; <[, 9^;  DY?Q	
 F+Q	
 FyAD	
 T	
!232
 4-8.
 9.
b 4-8W 9W 4-8W 9W@ 4-80 90  4-8( 9((.  4-87 97H +  + \ 8 4-8#:;4 < 94(B:$Mr   