
    {Kg                     6   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlZd dlZd dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZmZmZ d dl m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1 dZ2dZ3e2e3z   Z4d Z5d Z6d Z7d Z8d Z9d Z:ejv                  jy                  deef      d        Z=d Z>d Z?d Z@d ZAd ZBd ZCd  ZDd! ZEd" ZFd# ZGd$ ZHd% ZId& ZJd' ZKd( ZLd) ZMd* ZNejv                  j                  e/d+,      d-        ZPd. ZQd/ ZRd0 ZSe+d1        ZTd2 ZUejv                  jy                  deef      d3        ZVd4 ZWd5 ZXd6 ZYd7 ZZe+d8        Z[ejv                  jy                  deef      d9        Z\d: Z]d; Z^d< Z_e+d=        Z`d> Zad? Zbejv                  jy                  d@ej                  ej                  ej                  g      dA        ZfdB ZgdC ZhdD ZidE ZjdF ZkdG ZldH Zme+dI        ZndJ ZodK ZpdL Zqejv                  jy                  deeef      dM        Zrejv                  jy                  dNej                  ej                  g      dO        Zuejv                  jy                  dP ee0e1            dQ        Zvejv                  jy                  dRej                  ej                  dSfej                  ej                  dSfej                  ej                  dTfej                  ej                  dTfg      dU        Zyejv                  jy                  dV edWX       edWX       edWX      g      dY        ZzdZ Z{e+d[        Z|e,ejv                  jy                  d\e1      d]               Z}e+ejv                  jy                  d^eeeg      d_               Z~ejv                  jy                  d^eeeg      ejv                  jy                  d`daedbfdceddfg      de               Zejv                  jy                  d^ee ej                  ee+f      g      ejv                  jy                  dgdh di g      ejv                  jy                  djdcdag      dk                      Zejv                  jy                  d^eeeg      dl        Zejv                  jy                  deeeg      ejv                  jy                  dmdndogdddpddqdrdsdtf	ddu ddpddqdvdsdtf	ddw ddpdxdydzdvd{f	ddd| dpdxd} d~dsdf	dddddd ddsdf	dg      d               Zejv                  jy                  deddddddgfee2ff      d        Zd Ze+d        Zejv                  jy                  d^eeeeg      d        Zejv                  jy                  d\e1      d        Zy)    N)defaultdict)Mapping)partial)StringIO)product)assert_array_almost_equalassert_array_equal)sparse)clone)ENGLISH_STOP_WORDSCountVectorizerHashingVectorizerTfidfTransformerTfidfVectorizerstrip_accents_asciistrip_accents_unicode
strip_tags)GridSearchCVcross_val_scoretrain_test_split)Pipeline)	LinearSVC)assert_allclose_dense_sparseassert_almost_equalfails_if_pypyskip_if_32bit)_IS_PYPY_IS_WASMCSC_CONTAINERSCSR_CONTAINERS)zthe pizza pizza beer copyrightzthe pizza burger beer copyrightz!the the pizza beer beer copyrightzthe burger beer beer copyrightzthe coke burger coke copyrightzthe coke burger burger)zthe salad celeri copyrightz)the salad salad sparkling water copyrightzthe the celeri celeri copyrightzthe tomato tomato salad waterz the tomato salad water copyrightc                 4    t        |       j                         S N)r   upperss    n/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/feature_extraction/tests/test_text.py	uppercaser'   9   s     #))++    c                 &    | j                  dd      S )N   ée)replacer$   s    r&   strip_eacuter-   =   s    99T3r(   c                 "    | j                         S r"   splitr$   s    r&   split_tokenizer1   A   s    779r(   c                     dgS )Nthe_ultimate_feature r$   s    r&   lazy_analyzer5   E   s    "##r(   c                     d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d	} d
}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d
}t        |       |k(  sJ y )N   àáâãäåçèéêëaaaaaaceeee   ìíîïñòóôõöùúûüýiiiinooooouuuuy   إu   ا   this is à testthis is a testu   öou   ̀́̂̃ u   ȫ)r   aexpecteds     r&   test_strip_accentsrC   I   s     AH #x///(A H #x/// 	AH #x/// 	AH #x/// 	AH #x/// 	#AH #x/// 	AH #x///r(   c                      d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ y )	Nr7   r8   r9   r:   r;   r?   r<   r=   )r   r@   s     r&   test_to_asciirE   m   sz     AHq!X---(A Hq!X--- 	AHq!X--- 	AHq!X---r(   
Vectorizerc                     | d      j                         }d}g d} ||      |k(  sJ d}g d} ||      |k(  sJ  | d      j                         }t        d	      }g d
} ||      |k(  sJ  | t              j                         }d}g d} ||      |k(  sJ  | t        d      j                         }d}g d} ||      |k(  sJ y )Nasciistrip_accents:   J'ai mangé du kangourou  ce midi, c'était pas très bon.)
aimangedu	kangouroucemidietaitpastresbonz0This is a test, really.

 I met Harry yesterday.)thisistestreallymetharry	yesterdayfile)input'This is a test with a file-like object!)rV   rW   rX   withr]   likeobjectpreprocessoru;   J'ai mangé du kangourou  ce midi,  c'était pas très bon.)
AIMANGEDU	KANGOUROUCEMIDIETAITPASTRESBON)	tokenizerrJ   )
zj'airM   rN   rO   rP   zmidi,zc'etaitrS   rT   zbon.)build_analyzerr   r'   r1   )rF   watextrB   s       r&   test_word_analyzer_unigramsrs      s    	'	*	9	9	;BGDH d8x?DLHd8x	&	!	0	0	2B=>DGHd8x 
	+	:	:	<BHDH d8x 
nG	D	S	S	UBGDH d8xr(   c                  b    t        ddd      j                         } d}g d} | |      |k(  sJ y )Nwordunicode      analyzerrJ   ngram_rangerK   )rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   zai mangezmange duzdu kangourouzkangourou cezce midiz
midi etaitz	etait paszpas tresztres bon)r   rp   )rq   rr   rB   s      r&   'test_word_analyzer_unigrams_and_bigramsr}      sA    	yf
n  HDH* d8xr(   c                  p   d} | j                  d      }t        dd      j                         }t        j                  t
              5   ||       d d d        t        ddd      j                         }t        j                  t
              5   ||       d d d        y # 1 sw Y   PxY w# 1 sw Y   y xY w)	NrK   zutf-8rw   rH   )r|   encodingchar      )r{   r|   r   )encoder   rp   pytestraisesUnicodeDecodeError)rr   
text_bytesrq   cas       r&   test_unicode_decode_errorr      s     HDW%J 
Vg	>	M	M	OB	)	*
: 
+ 
Vg
n  
)	*
: 
+	* 
+	* 
+	*s   	B 	B, B),B5c                  Z   t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ d
}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        ddd      j                         } t        d      }g d} | |      d d |k(  sJ y )Nr   rv   r   rz   u9   J'ai mangé du kangourou  ce midi, c'était pas très bon)zj'az'aizai zi mz ma   )zs tresz tres ztres bzres bozes bon1This 
	is a test, really.

 I met Harry yesterday)thihisis zs iz is)z yesteyesteresterdsterdaterdayr]   r^   r{   r|   r_   r   rp   r   cngarr   rB   s      r&   test_char_ngram_analyzerr      s    yfn 	 GD2H:bq>X%%%AH:bc?h&&&BD2H:bq>X%%%AH:bc?h&&&v6n 	 =>D2H:bq>X%%%r(   c                     t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        d
dd      j                         } t        d      }g d} | |      d d |k(  sJ y )Nchar_wbrv   r   rz   r   )z thr   r   r   z thir   )r   r   r   r   zerday r   r]   r   zA test with a file-like object!)z a z tetesestzst z tesr   r   r   s      r&   test_char_wb_ngram_analyzerr     s    )n 	 CD3H:bq>X%%%AH:bc?h&&&yfn 	 56D:H:bq>X%%%r(   c                     t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        d
dd      j                         }t        |      } ||       | |      k(  sJ y )Nru   rv   r   rz   r   )zthis is testzis test reallyztest really metr   )ztest really met harry yesterdayzthis is test really met harryz"is test really met harry yesterdayr]   r   r   )r   rr   rB   	cnga_filer]   s        r&   test_word_ngram_analyzerr     s    yfn 	 CDDH:bq>X%%%H
 :bc?h&&&v6n  D>DT?d4j(((r(   c                  B   ddd} t        | j                               }t        t        t        t        t        t              fD ]  } ||       }t        |      }|j                  t               t        |t              r|j                  | k(  sJ t        |j                        |k(  sJ |j                  t              }|j                  d   t!        |      k(  sJ  ||       }t        |      }|j#                  |      }t!        |      |j                  d   k(  rJ  y )Nr   rx   pizzabeer
vocabulary)setkeysdictlistiterr   r   intr   fitJUNK_FOOD_DOCS
isinstancer   vocabulary_	transformshapeleninverse_transform)vocabtermstypvvectXinvs          r&   &test_countvectorizer_custom_vocabularyr   6  s    #E

E dD'+s";<J!, a!##u,,,t''(E111NN>*wwqzSZ'''J!,$$Q'3x1771:%%% =r(   c                     ddg} t        dt        |       fdt               fg      }|j                  t              }t        |j                  d   j                        t        |       k(  sJ |j                  d   t        |       k(  sJ y )Nr   r   countr   tfidfrx   )
r   r   r   fit_transformALL_FOOD_DOCSr   named_stepsr   r   r   )what_we_likepiper   s      r&   /test_countvectorizer_custom_vocabulary_pipeliner   K  s    V$Lo>?&()	
D 	=)At(445\9JJJJ771:\****r(   c                      ddd} d}t        j                  t        |      5  t        |       }|j	                  dg       d d d        y # 1 sw Y   y xY w)Nr   r   z$Vocabulary contains repeated indicesmatchr   pasta_sizilianar   r   
ValueErrorr   r   )r   msgr   s      r&   7test_countvectorizer_custom_vocabulary_repeated_indicesr   X  sE    #E
0C	z	-%0#$% 
.	-	-s   AAc                      ddd} t        j                  t        d      5  t        |       }|j	                  dg       d d d        y # 1 sw Y   y xY w)Nrx   ry   r   zdoesn't contain indexr   r   pasta_verdurar   r   r   s     r&   0test_countvectorizer_custom_vocabulary_gap_indexr   `  sA    #E	z)@	A%0/"# 
B	A	As   A		Ac                     t               } | j                  d       | j                         t        k(  sJ | j                  d       t	        j
                  t              5  | j                          d d d        | j                  d       t	        j
                  t              5  | j                          d d d        g d}| j                  |       | j                         t        |      k(  sJ y # 1 sw Y   xY w# 1 sw Y   JxY w)Nenglish
stop_words_bad_str_stop__bad_unicode_stop_)someotherwords)r   
set_paramsget_stop_wordsr   r   r   r   r   )cvstoplists     r&   test_countvectorizer_stop_wordsr   g  s    		BMMYM'"4444MM-M.	z	"
 
#MM1M2	z	"
 
#)HMMXM&#h-/// 
#	" 
#	"s   !C3%C?3C<?Dc                  @   t        j                  t        d      5  t        g       } | j	                  dg       d d d        t        j                  t        d      5  t        dd      }|j	                  g d       d d d        y # 1 sw Y   NxY w# 1 sw Y   y xY w)	Nzempty vocabularyr   r   foo      ?r   )max_dfr   )zto be or not to bez
and me toozand so do your   )r   r   s     r&   %test_countvectorizer_empty_vocabularyr   v  sr    	z);	<"-% 
= 
z);	<39=	CD 
=	<	 
=	< 
=	<s   B!BBBc                      t               } | j                  t        d d       }| j                  t        dd        }|j                  d   |j                  d   k7  sJ y )Nr   rx   )r   r   r   r   )r   X1X2s      r&   test_fit_countvectorizer_twicer     sV    		B			-+	,B			-+	,B88A;"((1+%%%r(   c                      g d} d}t        |      }|j                  |        g d}|j                         }t        ||       y)zCheck `get_feature_names_out()` when a custom token pattern is passed.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12971
    z&This is the 1st document in my corpus.z This document is the 2nd sample.zAnd this is the 3rd one.zIs this the 4th document?z'[0-9]{1,3}(?:st|nd|rd|th)\s\b(\w{2,})\btoken_pattern)documentonesampleN)r   r   get_feature_names_outr	   )corpusr   
vectorizerrB   feature_names_outs        r&   )test_countvectorizer_custom_token_patternr     sG    
F ?M }=JV$,H"88:((3r(   c                      g d} d}d}t        |      }t        j                  t        |      5  |j	                  |        ddd       y# 1 sw Y   yxY w)zCheck that we raise an error if token pattern capture several groups.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12971
    r   z)([0-9]{1,3}(?:st|nd|rd|th))\s\b(\w{2,})\bz,More than 1 capturing group in token patternr   r   Nr   r   r   r   r   )r   r   err_msgr   s       r&   <test_countvectorizer_custom_token_pattern_with_several_groupr     sF    
F AM<G }=J	z	1v 
2	1	1s   AAc                  T   g d} d}t        d|       }t        j                  t        |      5  |j	                  |        d d d        t        j                         5  t        j                  dt               |j                  |        d d d        y # 1 sw Y   RxY w# 1 sw Y   y xY w)N)SampleUpperCase
VocabularyzyUpper case characters found in vocabulary while 'lowercase' is True. These entries will not be matched with any documentsT)	lowercaser   r   error)	r   r   warnsUserWarningr   warningscatch_warningssimplefilterr   )r   messager   s      r&   'test_countvectorizer_uppercase_in_vocabr	    s     ;J	)  !4JGJ	k	1z" 
2 
	 	 	"g{3Z( 
#	" 
2	1 
#	"s   B,BBB'c                      g dg dg dg} t        dd      j                  |       }g d}|j                  |      }t        ||       y)	z0Check get_feature_names_out for TfidfTransformerrx   rx   rx   rx   rx   r   rx   r   r   Tl2
smooth_idfnorm)rA   cbN)r   r   r   r	   )r   trfeature_names_inr   s       r&   %test_tf_transformer_feature_names_outr    sI    	Iy)A	T	5	9	9!	<B&001AB'):;r(   c                  v   g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ t	        |dz  j                  d	
      g d       g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ y )Nr  r  r  Tr  r  r   ry   rx   axisr   r   r   )r   r   toarrayallr   sumr   r  r   s      r&   test_tf_idf_smoothingr    s    	Iy)A	T	5BQ'')EQJ uaxnn!n4oF 
Iy)A	T	5BQ'')EQJr(   zcno floating point exceptions, see https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881reasonc                     g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ t	        |dz  j                  d	
      g d       g dg dg dg} t        dd      }d}t        j                  t        |      5  |j                  |       j                          d d d        y # 1 sw Y   y xY w)Nr  r  r  Fr  r  r   ry   rx   r  r  zdivide by zeror   )	r   r   r  r  r   r  r   r  RuntimeWarning)r   r  r   in_warning_messages       r&   test_tfidf_no_smoothingr%    s     
Iy)A	U	6BQ'')EQJ uaxnn!n4oF 
Iy)A	U	6B)	n,>	?
##% 
@	?	?s   # CCc                      dgdgdgg} t        ddd       }|j                  |       j                         }|d   dk(  sJ |d   |d   kD  sJ |d   |d   kD  sJ |d   dk  sJ |d   dk  sJ y )Nrx   ry   r   TF)sublinear_tfuse_idfr  r   )r   r   r  r  s      r&   test_sublinear_tfr)    s    
qcA3A	tU	FBQ'')E8q==8eAh8eAh8a<<8a<<r(   c                  .	   t        t        d d       } t        d   g}t        t              dz
  }t        d      }|j	                  |       }t        |d      r|j                         }|d|j                  d   f   dk(  sJ t        |j                  	      }||fD ]  }|j                  |      }t        |d      r|j                         }|j                  }|d|d
   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ d|vsJ d|vsJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  rJ  t        d      }	|	j                  |      j                  |      j                         }
t        |	j                        t        |j                        k(  sJ |
j                  |t        |j                        fk(  sJ |	j                        j                         }|j                  t        |      t        |j                        fk(  sJ t        dd      }|j                  |      j                  |      j                         }t        |d      rJ t        d      }t        j                  t               5  |j                  |       d d d        t#        t%        j&                  |d      dg|z         t        t        d d       } t)        d      }|j*                  |_        |j	                  |       j                         }|j,                  rJ t#        |
|       |j                  |      j                         }t#        ||       t        d 	      }t        j                  t               5  |j                  |        d d d        |j/                  dd       |j1                         }d}t3        |      } ||      }||k(  sJ |j/                  dd        t        j                  t               5  |j1                          d d d        d |_        t        j                  t               5  |j5                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   \xY w# 1 sw Y   y xY w)!Nrx         ?r   tocsrr   r   ry   r   saladtomatowaterthe	copyrightcokeburgerr   l1r  F)r  r(  idf_Tr(  r  r   rH   )rJ   r  rK   _gabbledegook_)rJ   rd   _invalid_analyzer_type_)r   r   r   r   r   hasattrr.  r   r   r   r   r  r8  r   r   r   r   r   npr  r   r   fixed_vocabulary_r   build_preprocessorr   rp   )
train_data	test_datan_trainv1counts_trainv2r   counts_testr   t1r   
tfidf_testt2tft3tvtfidf2tfidf_test2v3	processorrr   rB   results                          r&   test_vectorizerrR    s\   mCR()Jr"#I- 1$G 
	$B##J/L|W%#))+2>>'223q888 
BNN	3B "Xkk),;(%++-K]]
1j112a7771j223q8881j112a777 J&&& *,,, 1j001Q6661j223q8881j001Q6661j112a777/ 4 
t	$BFF< **<8@@BErww<3r~~....;;7C$78888 k*224JIBNN0CDDDD 
tU	3B			'	'	5	=	=	?Br6""" 
$	'B	z	"
\" 
# bffRa03%'/B mCR()J	d	#B		BIj)113F####eV, ,,y)113Kj+6 
D	)B	z	"
Z  
# MM5M9%%'IGD"4(Ht_Fv MM 0tMD	z	"
 
# .BM	z	"
 
#	"Q 
#	", 
#	" 
#	"
 
#	"s0   Q&Q3Q?R&Q03Q<?RRc                     d\  } }}}t        | |||      }|j                  t               |j                  j                  | k(  sJ |j                  j
                  |k(  sJ |j                  j                  |k(  sJ |j                  j                  |k(  sJ d|_        d|_        d|_        d|_        |j                  j                  | k(  sJ |j                  j
                  |k(  sJ |j                  j                  |k(  sJ |j                  j                  |k(  sJ |j                  t               |j                  j                  |j                  k(  sJ |j                  j
                  |j
                  k(  sJ |j                  j                  |j                  k(  sJ |j                  j                  |j                  k(  sJ y )N)r  FFF)r  r(  r  r'  r6  T)r   r   r   _tfidfr  r(  r  r'  )r  r(  r  r'  rL  s        r&   test_tfidf_vectorizer_settersrU  i  s   .G+D':|	7z
B FF>99>>T!!!99'''99:---99!!\111 BGBJBMBO99>>T!!!99'''99:---99!!\111FF>99>>RWW$$$99

***992==00099!!R__444r(   c                     t               } | j                  t              }|j                  }|j                  t        t              | j                  fk(  sJ |j                  | j                  k(  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        |j                  d         D ]9  }t        t        j                  j                  |d   j                  d      d       ; t        dd      } | j                  t              }|j                  t        t              | j                  fk(  sJ |j                  | j                  k(  sJ |j                  }||kD  sJ |d|z  k  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        |j                  d         D ]9  }t        t        j                  j                  |d   j                  d      d       ; y )	Nr+  r   rx   ry   r   rw   r6  )r|   r  )r   r   r   nnzr   r   
n_featuresdtyper=  mindatamaxranger   linalgr  )r   r   	token_nnzi
ngrams_nnzs        r&   test_hashing_vectorizerrb    s   A	M"AI77s=)1<<888877agg 66!&&>B66!&&>A66!&&>A66!&&>A 1771:BIINN1Q499a8#>  	f48A	M"A77s=)1<<888877agg J	!!!I%%% 66!&&>B66!&&>A 1771:BIINN1Q499a8#> r(   c                  4   t        d      } t        j                  t              5  | j	                          d d d        | j
                  rJ | j                  t              }|j                  \  }}t        | j                        |k(  sJ | j	                         }t        |t        j                        sJ |j                  t        k(  sJ t        |      |k(  sJ t!        g d|       t#        |      D ]%  \  }}|| j                  j%                  |      k(  r%J  g d}t        |      } | j	                         }t!        g d|       | j
                  sJ t#        |      D ]%  \  }}|| j                  j%                  |      k(  r%J  y # 1 sw Y   XxY w)Nr,  r-  	r   r5  celerir4  r   r/  	sparklingr0  r1  r   )r   r   r   r   r   r>  r   r   r   r   r   r   r=  ndarrayrY  rb   r	   	enumerateget)r   r   	n_samplesrX  feature_namesidxnamer   s           r&   test_feature_namesrn    s|   		$B 
z	"
  " 
##### 	'AGGIzr~~*,,,,,.MmRZZ000&(((}+++
	
 	 }-	Tbnn((.... .
E 
E	*B,,.M
	
 	 }-	Tbnn((.... . 
#	"s   FFc                 ~    h d} | dd      }|j                  t               t        |j                        |k(  sJ y )N>   r   r   r/  r5  g333333?   )r   max_features)r   r   r   r   )rF   expected_vocabularyr   s      r&   test_vectorizer_max_featuresrs    s<    > 3Q7JNN=!z%%&*====r(   c                     t        d      } t        d      }t        d       }| j                  t              j                  d      }|j                  t              j                  d      }|j                  t              j                  d      }| j	                         }|j	                         }|j	                         }d|j                         k(  sJ d|j                         k(  sJ d|j                         k(  sJ d|t        j                  |         k(  sJ d|t        j                  |         k(  sJ d|t        j                  |         k(  sJ y )Nrx   rq  r   r   r     r2  )r   r   r   r  r   r\  r=  argmax)	cv_1cv_3cv_Nonecounts_1counts_3counts_None
features_1
features_3features_Nones	            r&   "test_count_vectorizer_max_featuresr    s;    *D*D40G!!.15515=H!!.15515=H''7;;;CK++-J++-J113M !!!! Jryy23333Jryy23333M"))K"89999r(   c                  L   g d} t        dd      }|j                  |        d|j                  j                         v sJ t	        |j                  j                               dk(  sJ d|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ d	|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ y )
Nabcdeaeatr   r   r{   r   rA   r   r,  rp  rx   )r   r   r   r   r   r   rA  r   s     r&   test_vectorizer_max_dfr    s   %IF37DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r(   c                  L   g d} t        dd      }|j                  |        d|j                  j                         v sJ t	        |j                  j                               dk(  sJ d|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ d	|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ y )
Nr  r   rx   )r{   min_dfrA   r   ry   r  g?)r   r   r   r   r   r  r  s     r&   test_vectorizer_min_dfr  *  s   %IF15DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r(   c                     ddg} t        dd      }|j                  |       j                         }t        g d|j	                                t        g dg dg|       t        ddd	
      }|j                  |       j                         }t        g dg dg|       t        ddd	t
        j                        }|j                  |       }|j                  t
        j                  k(  sJ y )Naaabcabbder   r   r  )rA   r  r  dr+   )r   rx   rx   r   r   )rx   ry   r   rx   rx   T)r{   r   binary)rx   rx   rx   r   r   )rx   rx   r   rx   rx   )r{   r   r  rY  )r   r   r  r	   r   r=  float32rY  )rA  r   r   X_sparses       r&   test_count_binary_occurrencesr  <  s    '"IF37D9%--/A0$2L2L2NO91= F3tDD9%--/A91= F3t2::VD!!),H>>RZZ'''r(   c                     ddg} t        ddd       }|j                  |       }t        j                  |dd j                        dk(  sJ t        j                  |dd	 j                        d	k(  sJ |j
                  t        j                  k(  sJ t        ddd
d       }|j                  |       }t        j                  |j                        dk(  sJ |j
                  t        j                  k(  sJ t        ddd
d t        j                        }|j                  |       }|j
                  t        j                  k(  sJ y )Nr  r  Fr   )alternate_signr{   r  r   rx   r   ry   T)r{   r  r  r  )r{   r  r  r  rY  )r   r   r=  r\  r[  rY  float64)rA  r   r   s      r&   test_hashed_binary_occurrencesr  P  s    '"IEFNDy!A66!Aa&++!###66!Aa&++!###77bjj    dD 	y!A66!&&>Q77bjj    dRZZD 	y!A77bjj   r(   c                 ~   t         } |        }|j                  |      }|j                  |      }t        |t              sJ |j                         }t        ||      D ]g  \  }}t        j                  t        j                   ||                  }t        j                  t        j                  |            }t        ||       i t        j                  |      sJ |j                  dk(  sJ |j                         }	|j                  |	      }
t        ||
      D ]7  \  }}t        t        j                  |      t        j                  |             9 |j                         }|j                  |      }t        ||      D ]7  \  }}t        t        j                  |      t        j                  |             9 y )Ncsr)r   r   r   r   r   rp   zipr=  sortuniquer	   r
   issparseformatr  tocsc)rF   r[  r   transformed_datainversed_dataanalyzedocinversed_termsr   transformed_data2inversed_data2terms2transformed_data3inversed_data3terms3s                  r&   !test_vectorizer_inverse_transformr  k  sm    DJ!//5001ABMmT***'')G"47^		'#,/0>!:;5.1  8
 ??+,,,""e+++ )002112CDN]N;v2775>2776?; < )..0112CDN]N;v2775>2776?; <r(   c                     t         t        z   } dgt        t               z  dgt        t              z  z   }t        | |dd      \  }}}}t	        dt               fdt               fg      }dd	gd
d}t        ||dd      }|j                  ||      j                  |      }	t        |	|       |j                  dk(  sJ |j                  j                  d   }
|
j                  dk(  sJ y )Nr+  rx   g?r   	test_sizerandom_stater   svcrx   rx   rw   hingesquared_hinge)vect__ngram_range	svc__lossr   )n_jobsr   r   )r   NOTJUNK_FOOD_DOCSr   r   r   r   r   r   r   predictr	   best_score_best_estimator_r   r|   r[  targetr@  rA  target_traintarget_testpipeline
parametersgrid_searchpredbest_vectorizers              r&   -test_count_vectorizer_pipeline_grid_selectionr    s    --D TC''1#4E0F*FFF 8Hf!84J	< &/"34uik6JKLH %f-/J xA!DK ??:|4<<YGDt[)
 ""c)))!11==fEO&&&000r(   c                     t         t        z   } dgt        t               z  dgt        t              z  z   }t        | |dd      \  }}}}t	        dt               fdt               fg      }dd	gd
dd}t        ||d      }|j                  ||      j                  |      }	t        |	|       |j                  dk(  sJ |j                  j                  d   }
|
j                  dk(  sJ |
j                  dk(  sJ |
j                   rJ y )Nr+  rx   g?r   r  r   r  r  rw   )r6  r  r  )r  
vect__normr  )r  r   r  )r   r  r   r   r   r   r   r   r   r  r	   r  r  r   r|   r  r>  r  s              r&   'test_vectorizer_pipeline_grid_selectionr    s%   --D TC''1#4E0F*FFF 8Hf!84J	< &/"34uik6JKLH %f-"/J xA>K ??:|4<<YGDt[)
 ""c)))!11==fEO&&&0004'''00000r(   c                      t         t        z   } dgt        t               z  dgt        t              z  z   }t        dt	               fdt               fg      }t        || |d      }t        |g d       y )Nr+  rx   r   r  r   )r   r  )r   r  r   r   r   r   r   r	   )r[  r  r  	cv_scoress       r&   )test_vectorizer_pipeline_cross_validationr    sj    --D TC''1#4E0F*FFF&/"34uik6JKLH$1=Iy/2r(   c                     d} t               }|j                  | g      }|j                  dk(  sJ t        d d      }|j	                  | g      }|j                  dk(  sJ |j
                  |j
                  k(  sJ t        t        j                  |j                        t        j                  |j                               y )Nu   Машинное обучение — обширный подраздел искусственного интеллекта, изучающий методы построения алгоритмов, способных обучаться.)rx      F)r  r  )rx   i   )
r   r   r   r   r   rW  r	   r=  r  r[  )r   r   	X_countedX_hasheds       r&   test_vectorizer_unicoder    s    	1  D""H:.I??g%%%$u=D~~xj)H>>Z''' ==HLL((( rwwy~~.0FGr(   c                      ddg} t        |       }|j                  t              }|j                  t              }t	        |j                         |j                                |j                  sJ y )Nr   re  r   )r   r   r   r   r   r  r>  )r   r   X_1X_2s       r&   +test_tfidf_vectorizer_with_fixed_vocabularyr    sY    8$Jj1D


]
+C
..
'CckkmS[[];!!!!r(   c                     t               t        d      t        d      t        d      t               t        t              t        t              t        t              j	                  t
              t        t        	      j	                  t
              t               t        t              t               j	                  t
              g} | D ]  }t        j                  |      }t        j                  |      }t        |      |j                  k(  sJ |j                         |j                         k(  sJ t        rt        |t               rt!        |j#                  t
              |j#                  t
                      y )
Nr6  r7  T)r  rw   r|   rc   )r{   rI   )r   r   r   r5   r   r   r-   r   pickledumpsloadstype	__class__
get_paramsr   r   r   r   )	instancesorigr%   copys       r&   test_pickling_vectorizerr    s   t$&f-Z0.Z044^Dl377G.n-I LL||ADzT^^+++ DOO$5555
4):;("">2"">2 r(   factoryc                     t               } | |      }d}t        j                  t        j                  |            } ||      } ||      }||k(  sJ y)z_Tokenizers cannot be pickled
    https://github.com/scikit-learn/scikit-learn/issues/12833
    rK   N)r   r  r  r  )r  vecfunctionrr   roundtripped_functionrB   rQ  s          r&   test_pickling_built_processorsr  $  sS     
Cs|HGD"LLh)?@~H"4(FXr(   c                     t         j                  j                  d      } t        j                  g d      }t	        dd      D ]  }t        | j                  |dd            }t        |      }t        j                  t        j                  |            }|j                  t               |j                  t               t        |j                         |j                                 y Nr   rd  d   r   F)sizer,   r   )r=  randomRandomStatearrayr]  r   choicer   r  r  r  r   r   r	   r   )rngvocab_wordsx	vocab_setr   unpickled_cvs         r&   -test_countvectorizer_vocab_sets_when_picklingr  9  s     ))


"C((
	
K 1c]

;Q
FG		2||FLL$45
}'$$&(J(J(L	
 r(   c                     t         j                  j                  d      } t        j                  g d      }t	        dd      D ]  }t               }| j                  |dd      }t	        dd      D ]
  }||||   <    t        |      }t        j                  t        j                  |            }|j                  t               |j                  t               t        |j                         |j                                 y r  )r=  r  r  r  r]  r   r  r   r  r  r  r   r   r	   r   )r  r  r  
vocab_dictr   yr   r  s           r&   .test_countvectorizer_vocab_dicts_when_picklingr  U  s    
))


"C((
	
K 1c]V


;Q
>q!A#$JuQx  
3||FLL$45
}'$$&(J(J(L	
 r(   c                     t               j                  t              } t               j	                  |       }t        j                  |      }t        j                  |      }t        |      |j                  k(  sJ t        |j                  |       j                         |j                  |       j                                y r"   )r   r   r   r   r   r  r  r  r  r  r	   r  )r   r  r%   r  s       r&   test_pickling_transformerr  r  s    ''7A!!!$DTA<<?D:'''t))!,4468J8J18M8U8U8WXr(   c                  2   t               j                  t              } t               j	                  |       }t               }|j
                  |_        t        |j                  |       j                         |j                  |       j                                y r"   )	r   r   r   r   r   r8  r	   r   r  )r   r  r  s      r&   test_transformer_idf_setterr  {  si    ''7A!!!$DD		DIt~~a(002DNN14E4M4M4OPr(   c                     t        d      } | j                  t               t        | j                  d      }| j                  |_        t        |j                  t              j                         | j                  t              j                                t        | j                  d      }d}t        j                  t        |      5  | j                  |_        d d d        y # 1 sw Y   y xY w)NTr9  r   r(  Fz+`idf_` cannot be set when `user_idf=False`.r   )r   r   r   r   r8  r	   r   r  r   r   r   )r  r  r   s      r&   test_tfidf_vectorizer_setterr     s    4(DHH^d&6&6ED		DI~&..0~&..0
 d&6&6FD;G	z	1II	 
2	1	1s   C''C0c                  &   t        d      } | j                  t               t        | j                  d      }t	        | j
                        }dg|dz   z  }t        j                  t              5  t        |d|       d d d        y # 1 sw Y   y xY w)NTr9  r  r   rx   r8  )
r   r   r   r   r   r8  r   r   r   setattr)r   r  expected_idf_leninvalid_idfs       r&   %test_tfidfvectorizer_invalid_idf_attrr    sn    4(DHH^d&6&6ED499~%+a/0K	z	"fk* 
#	"	"s   0BBc                      g d} t        |       }t        j                  t              5  |j	                  g        d d d        y # 1 sw Y   y xY w)N)rA   r  r  rA   rA   r   r   r   s     r&   test_non_unique_vocabr    s4    %Ee,D	z	" 
#	"	"s   AAc                      d} t         }d }t        j                  ||       5   |        d d d        y # 1 sw Y   y xY w)Nz?np.nan is an invalid document, expected byte or unicode string.c                  \    t               } | j                  dt        j                  dg       y )Nhello worldhello hello)r   r   r=  nan)hvs    r&   funcz0test_hashingvectorizer_nan_in_docs.<locals>.func  s#     
-?@r(   r   )r   r   r   )r  	exceptionr  s      r&   "test_hashingvectorizer_nan_in_docsr    s6     PGIA 
y	0 
1	0	0s   4=c                  0   t        ddd       } | j                  sJ | j                  ddg      j                         }t	        |j                         g d       | j                  ddg      j                         }t	        |j                         g d       y )NTF)r  r(  r  r
  r  )rx   rx   rx   r   )r   r  r   r  r	   ravelr   )r   r   r   s      r&   test_tfidfvectorizer_binaryr    ss    tU>A88O8	67??AAqwwy,/	
m]3	4	<	<	>Brxxz<0r(   c                      t        d      } | j                  t               t        | j                  | j
                  j                         y )NTr9  )r   r   r   r   r8  rT  )r   s    r&   test_tfidfvectorizer_export_idfr    s0    4(DHH^dii)9)9:r(   c                      t        dg      } t        |       }| j                  t               |j                  t               |j                  | j                  k(  sJ y )Nr2  r   )r   r   r   r   r   )
vect_vocabvect_vocab_clones     r&   test_vectorizer_vocab_cloner    sM     UG4JZ(NN=!''':+A+AAAAr(   c                    d} |        }t        j                  t        |      5  |j                  d       d d d        t        j                  t        |      5  |j	                  d       d d d        |j	                  ddg       t        j                  t        |      5  |j                  d       d d d        y # 1 sw Y   xY w# 1 sw Y   ^xY w# 1 sw Y   y xY w)NzBIterable over raw text documents expected, string object received.r   zhello world!	some textzsome other text)r   r   r   r   r   r   )rF   r  r  s      r&   &test_vectorizer_string_object_as_inputr    s     SG
,C	z	1.) 
2 
z	1 
2GG[+,-	z	1n% 
2	1 
2	1 
2	1 
2	1s#   B=C	"C=C	CCX_dtypec                     t        j                  dd| d      }t               j                  |      }|j                  |j                  k(  sJ y N
    N  *   rY  r  )r
   randr   r   rY  )r  r   X_transs      r&   test_tfidf_transformer_typer&    s?    BW2>A ..q1G==AGG###r(   zcsc_container, csr_containerc                 $   t        j                  ddt        j                  d      } | |      } ||      }t	               j                  |      }t	               j                  |      }t        ||       |j                  |j                  k(  sJ y r  )r
   r$  r=  r  r   r   r   r  )csc_containercsr_containerr   X_cscX_csrX_trans_cscX_trans_csrs          r&   test_tfidf_transformer_sparser.    sz     	BRZZbAA!E!E"$2259K"$2259K k:!3!3333r(   z0vectorizer_dtype, output_dtype, warning_expectedTFc                    t        j                  g d      }t        |       }d}|r6t        j                  t
        |      5  |j                  |      }d d d        nHt        j                         5  t        j                  dt
               |j                  |      }d d d        j                  |k(  sJ y # 1 sw Y   xY w# 1 sw Y   'xY w)N)numpyscipysklearnrY  z'dtype' should be used.r   r  )r=  r  r   r   r  r  r   r  r  r  rY  )vectorizer_dtypeoutput_dtypewarning_expectedr   r   warning_msg_matchX_idfs          r&   test_tfidf_vectorizer_typer9    s     	./A '78J1\\+->?,,Q/E @? $$&!!';7,,Q/E ' ;;,&&& @? '&s   B82,C8CCr  )ry   rx   r  c                 T   | j                   }t        j                  d| d      }t        | t              rt
        rt        j                  d       t        j                  t        |      5  | j                  dg       d d d        t        j                  t        |      5  | j                  dg       d d d        t        | t              r7t        j                  t        |      5  | j                  dg       d d d        y y # 1 sw Y   xY w# 1 sw Y   ]xY w# 1 sw Y   y xY w)NzInvalid value for ngram_range=z/ lower boundary larger than the upper boundary.*HashingVectorizer is not supported on PyPyr   r   zgood news everyone)r|   reescaper   r   r   r   xfailr   r   r   r   r   )r  invalid_ranger  s      r&   $test_vectorizers_invalid_ngram_ranger@    s     OOMii
( 89 	9G #()hHI	z	1%&' 
2 
z	1/01 
2 #()]]:W5MM/01 65 * 
2	1 
2	1 65s$   -D#D)DDDD'c                     | j                         }| j                         }| j                         }| j                  |||      S r"   )r   build_tokenizerr?  _check_stop_words_consistency)	estimatorr   tokenize
preprocesss       r&   rC  rC  )  sA    ))+J((*H--/J22:z8TTr(   c                     d} d| z  }t               t               t               fD ]]  }|j                  g d       t	        j
                  t        |      5  |j                  dg       d d d        |`t        |      du r]J  t        j                         5  t        j                  dt               j                  dg       d d d        t              J |j                  g d	       t	        j
                  t        |      5  |j                  dg       d d d        y # 1 sw Y   xY w# 1 sw Y   mxY w# 1 sw Y   y xY w)
Nz\['and', 'll', 've'\]z}Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens %s not in stop_words.)you'veyouyou'llANDr   r   r
  Fr  )rH  rI  rJ  blahrK  )r   r   r   r   r   r  r  r   _stop_words_idrC  r  r  r  )lstrr  r  s      r&   'test_vectorizer_stop_words_inconsistentrO  0  s   #D	')-	. 
  !?#46G6IJ"DE\\+W5}o. 6 ,S1U::: K 
	 	 	"g{3=/* 
# )-555 NNHNI	k	1=/* 
2	1 65 
#	" 
2	1s$   D&-D2
D>&D/	2D;>Er)  c                 J    | dt         j                        }t         j                  }|j                  j                  |      |_        |j                  j                  |      |_        dddd}t               j                  ||      }||j                  j                  k(  sJ y)z
    Check that CountVectorizer._sort_features preserves the dtype of its sparse
    feature matrix.

    This test is skipped on 32bit platforms, see:
        https://github.com/scikit-learn/scikit-learn/pull/11295
    for more details.
    )r   r   r3  r   rx   ry   )zscikit-learnrW   zgreat!N)r=  int64indicesastypeindptrr   _sort_featuresrY  )r)  r   INDICES_DTYPEr   Xss        r&   7test_countvectorizer_sort_features_64bit_sparse_indicesrX  L  s     	fBHH-A HHM		  /AIxx}-AH"#1:J			)	)!Z	8BBJJ,,,,,r(   	Estimatorc                 .   ddig} |        }t        |      du sJ  | d dg      }t        |      dk(  sJ t        |      J |j                  |        G d d	|       } |dg
      }t        |      dk(  sJ  | d dg      }t        |      du sJ y )Nrr   r  Tc                     | d   S Nrr   r4   r  s    r&   <lambda>z?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>p  s    1V9r(   and)rd   r   r  c                       e Zd Zd Zy)Ftest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimatorc                     d S )Nc                     | d   S r\  r4   r]  s    r&   r^  zktest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessor.<locals>.<lambda>x  s    QvYr(   r4   )selfs    r&   r?  zYtest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessorw  s    &&r(   N)__name__
__module____qualname__r?  r4   r(   r&   CustomEstimatorra  v  s    	'r(   rh  r   c                 J    t        j                  d      j                  |       S )Nz\w{1,})r<  compilefindallr  s    r&   r^  z?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>~  s    bjj3;;C@r(   )ro   r   )rC  r   )rY  r[  r  rh  s       r&   -test_stop_word_validation_custom_preprocessorrm  f  s    
 [!"D
+C(-555
!4%
IC(-888(-555d') ' eW
-C(-888
@eWC )-555r(   zinput_type, err_type, err_msgfilenamer?   r]   z$'str' object has no attribute 'read'c                     t        | t              rt        rt        j                  d       dg}t        j
                  ||      5   | d |      j                  |       d d d        y # 1 sw Y   y xY w)Nr;  "this is text, not file or filenamer   c                 "    | j                         S r"   r/   r]  s    r&   r^  z.test_callable_analyzer_error.<locals>.<lambda>  s
    QWWYr(   r{   r^   )
issubclassr   r   r   r>  r   r   )rY  
input_typeerr_typer   r[  s        r&   test_callable_analyzer_errorrv    sW     )./HAB01D	xw	/.jAOOPTU 
0	/	/s   A**A3)marksr{   c                     t        | d      S )Nr)openrl  s    r&   r^  r^    s
    T#s^r(   c                 "    | j                         S r"   )readrl  s    r&   r^  r^    s
    r(   rt  c                     dg}t        j                  t        t        f      5   | ||      j	                  |       d d d        y # 1 sw Y   y xY w)Nrp  rr  )r   r   FileNotFoundErrorAttributeErrorr   )rY  r{   rt  r[  s       r&   &test_callable_analyzer_change_behaviorr    s?     11D	)>:	;8:6DDTJ 
<	;	;s   AAc                 8   d }t        |t              rt        rt        j                  d       | j                  d      }|j                  d       t        j                  t        d      5   ||d      j                  |g       d d d        y # 1 sw Y   y xY w)	Nc                     t        d      )Ntesting)	Exceptionrl  s    r&   r{   z6test_callable_analyzer_reraise_error.<locals>.analyzer  s    	""r(   r;  zfile.txtzsample content
r  r   r]   rr  )
rs  r   r   r   r>  joinwriter   r  r   )tmpdirrY  r{   fs       r&   $test_callable_analyzer_reraise_errorr    so    
# )./HABJAGG	y		2862@@!E 
3	2	2s   ,BBzjstop_words, tokenizer, preprocessor, ngram_range, token_pattern,analyzer, unused_name, ovrd_name, ovrd_msgrH  rJ  r  r   z'stop_words'
'analyzer'	!= 'word'c                 "    | j                         S r"   r/   r$   s    r&   r^  r^    
    aggir(   z'tokenizer'c                 "    | j                         S r"   r/   r$   s    r&   r^  r^    r  r(   \w+ru   'token_pattern'zis not Nonec                 "    | j                         S r"   r#   r$   s    r&   r^  r^    r  r(   c                 "    | j                         S r"   r  r$   s    r&   r^  r^    r  r(   z'preprocessor'zis callablerw   c                 "    | j                         S r"   r  r$   s    r&   r^  r^    r  r(   z'ngram_range')	NNNr  r  r   r  r  r  c
                     t         }
 |        }|j                  ||||||       d|d|d|	}t        j                  t        |      5  |j                  |
       d d d        y # 1 sw Y   y xY w)N)r   ro   rd   r|   r   r{   zThe parameter z will not be used since  r   )r   r   r   r  r  r   )rF   r   ro   rd   r|   r   r{   unused_name	ovrd_nameovrd_msgr@  r   r   s                r&   test_unused_parameters_warnr    sn    r  J<DOO!#   	C
 
k	- 
.	-	-s   A&&A/zVectorizer, Xrx   ry   )r   barr   )r   bazc                 l     |        }t        |d      rJ |j                  |       t        |d      rJ y )Nn_features_in_)r<  r   )rF   r   r   s      r&   test_n_features_inr  &  s<     Jz#3444NN1z#34444r(   c                      t        d      } | j                  ddg      j                  }| j                  ddg      j                  }||k(  sJ y )Nrx   ru  helloworld)r   r   r   )r  vocab1vocab2s      r&   )test_tie_breaking_sample_order_invariancer  5  sN     q
)CWWgw'(44FWWgw'(44FVr(   c                  j    t        dd      } | j                  dg      j                  }|d   dk\  sJ y )Ni@B )ry   r   )rX  r|   z22pcs efuturer   )r   r   rR  )hashingrR  s     r&   2test_nonnegative_hashing_vectorizer_result_indicesr  >  s9      7GG 12::G1:??r(   c                 .     |        }t        |d      rJ y)z0Check that vectorizers do not define set_output.
set_outputN)r<  )rY  r   s     r&   'test_vectorizers_do_not_have_set_outputr  F  s    
 +CsL))))r(   c                    t        j                  ddt        j                  d      } | |      }|j	                         }t               j                  |      }|j                  |d      }t        ||       ||usJ |j                  |d      }||u sJ t        j                  t              5  t        ||       ddd       y# 1 sw Y   yxY w)	zJCheck the behaviour of TfidfTransformer.transform with the copy parameter.r   r!  r"  r#  T)r  FN)r
   r$  r=  r  r  r   r   r   r   r   r   AssertionError)r)  r   r+  X_csr_originaltransformerX_transforms         r&   test_tfidf_transformer_copyr  O  s     	BRZZbAA!E ZZ\N"$((/K''D'9K 7e###''E':K%	~	&$UN; 
'	&	&s   0CC)r  r<  r  collectionsr   collections.abcr   	functoolsr   ior   	itertoolsr   r0  r=  r   numpy.testingr   r	   r1  r
   sklearn.baser   sklearn.feature_extraction.textr   r   r   r   r   r   r   r   sklearn.model_selectionr   r   r   sklearn.pipeliner   sklearn.svmr   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   r   r    r   r  r   r'   r-   r1   r5   rC   rE   markparametrizers   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r	  r  r  r>  r%  r)  rR  rU  rb  rn  rs  r  r  r  r  r  r  r  r  r  r  r  r  rp   r?  rB  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r&  r.  int32rQ  r9  r@  rC  rO  rX  rm  r~  r  rv  paramr  r  r  r  r  r  r  r  r4   r(   r&   <module>r     sk    	  # #      G  	 	 	 T S % !  S R  !22, $!0H.* 9J'KL:  M: z <&&4&().&*
+&$0E&4&&)*<  	M  &&&dN5: #? #?LD/N 'IJ> K>:4-$-$(( ! !4 'IJ< K<>!1H$1N
3 H H0"< &&**''


8
:YQ +  1;B ?O5FG&& RZZ$<=$ >$ "GNN$K44 6	2::t$	2::t$	RZZ'	RZZ'	'' 	f-F+F+22,U + +6 .9- : -0 /?4EF6 62 /?4EF #	&+	!GHVV &m< +-CD 
';<K =K /?4EFFF ?$5G 	5
 x 
	
 
	
 
	
 
	
 
	

	
qCITUIZ@ 	Qq11Q3GHI	.)55   /?4DFWX** .9< :<r(   