
    {Kg                         d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZmZmZ ddlmZ ddlmZ d Zd	 Zd
 Zd Zd Zd Zd Zy)zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for CI jobs).    )partial)patchN)check_as_framecheck_pandas_dependency_messagecheck_return_X_y	normalize)assert_allclose_dense_sparsec                     | dd      }|j                   j                  d      sJ  | d|j                  ddd   d      }|j                  |j                  dd  k(  sJ t        j                  |j
                        j                         d	d
gk(  sJ t        |j                        t        |j
                        k(  sJ t        |j                        t        |j                        k(  sJ |j                  d	   }|j                  |j
                  d	      }|j                  j                  |      }|j                  t        j                  |j
                  |k(        d	   d	      }||k(  sJ  | ddd      \  }}t        |      t        |j                        k(  sJ |j                  |j
                  j                  k(  sJ y )NallF)subsetshuffle.. _20newsgroups_dataset:)r   
categoriesr   r      T)r   r   
return_X_y)DESCR
startswithtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr   	data2catsentry1categorylabelentry2Xys	            f/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/datasets/tests/test_20news.pytest_20newsr,      s   !>D::  !<=== '!2!22b8!<eI
 !!T%6%6rs%;;;;99Y%%&--/Aq6999 y""#s9+;+;'<<<<y""#s9>>':::: ^^AF%%i&6&6q&9:H##H-EYYrxxu 45a8;<FV "$ODAqq6S^###77dkk'''''    c                      | d      }t        |d         t        |j                        k(  sJ t        |d         t        |j                        k(  sJ t        |d         t        |j                        k(  sJ y)zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r   r   r   r   r   N)r   r   r   r   )r#   r   s     r+   test_20news_length_consistencyr0   5   sn     "/DtF|DII...tH~#dkk"2222tK !S%8888r-   c                     | d      }t        j                  |j                        r|j                  j                  dk(  sJ |j                  j                  dk(  sJ |j
                  j                  d   dk(  sJ |j                  j                  t        j                  k(  sJ |j                  j                  d      sJ  | d      }t        j                  |j                        r|j                  j                  dk(  sJ |j                  j                  d	k(  sJ |j
                  j                  d   d
k(  sJ |j                  j                  t        j                  k(  sJ |j                  j                  d      sJ t        | d      }t        ||        | d      }t        j                  |j                        r|j                  j                  dk(  sJ |j                  j                  dk(  sJ |j
                  j                  d   dk(  sJ |j                  j                  t        j                  k(  sJ |j                  j                  d      sJ y )Ntrainr/   csr)2,  ; r   r4   r   test)l  r5   r7   r   )I  r5   r8   )spissparser   formatr"   r   dtyper   float64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunch
fetch_funcs      r+   test_20news_vectorizedrA   A   s   -W=E;;uzz"uzz'8'8E'AAA::...<<a E)))::rzz)));;!!"=>>> .V<E;;uzz"uzz'8'8E'AAA::~---<<a D(((::rzz)));;!!"=>>> :6JJUJ' .U;E;;uzz"uzz'8'8E'AAA::5555<<a L000::rzz)));;!!"=>>>r-   c                      | d      } | d      }|d   d d }|d   d d }t        |t        |             t        j                  t        j                  j                  |j                         d      d      sJ y )NFr   Tr   d   r   )axis)r
   r	   r   allcloselinalgnormtodense)r>   r)   X_X_norms       r+   test_20news_normalizationrK   _   so    )E:A	*T	:BZF	&	$3A 16;;ryy~~fnn&6Q~?CCCr-   c           	         t        j                  d      } | d      }t        ||        |j                  }|j                  dk(  sJ t        |j                  j                  D cg c]  }t        ||j                         c}      sJ dD ]  }||j                         v rJ  d|j                         v sJ |j                  j                  dk(  sJ y c c}w )NpandasTas_frame)r4   i< )beginner	beginners	beginning
beginningsbeginsbegleybegonecategory_class)pytestimportorskipr   framer"   r   r   dtypes
isinstanceSparseDtypekeysr   name)r>   pdr?   rZ   colexpected_features         r+   test_20news_as_framerc   i   s    			X	&B-t<E5;<KKE;;/)))5::;L;LM;LC
3/;LMNNN  5::</// uzz|+++<< 0000 Ns   %Cc                     t        |        y )N)r   )r>   hide_available_pandass     r+   test_as_frame_no_pandasrf      s    #$EFr-   c                    t        d      5 }t        d      5 }d|_        d|_        d}t        j                  t        |      5   | d       d d d        d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nzos.path.existszjoblib.loadT)r)   r*   zThe cached dataset located in)matchrN   )r   return_valuerX   raises
ValueError)r>   mock_is_exist	mock_loaderr_msgs       r+   test_outdated_picklero      so    		 M=!Y)-M&%/I"5Gz914@ : " 
!	  :9 "! 
!	 s:   A?,A3
A'A3A?'A0,A33A<	8A??B)__doc__	functoolsr   unittest.mockr   numpyr   rX   scipy.sparsesparser9   "sklearn.datasets.tests.test_commonr   r   r   sklearn.preprocessingr	   sklearn.utils._testingr
   r,   r0   rA   rK   rc   rf   ro    r-   r+   <module>rz      sT         
 , ?(@	9?<D12G	Ar-   