
    {Kg=                        d dl Z d dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d Zej6                  j9                  de      d        Zej6                  j9                  de      d        Zej6                  j9                  de      d        Zej6                  j9                  de      d        Z ej6                  j9                  de      d        Z!d Z"ej6                  j9                  dd      d        Z#d Z$d Z%eej6                  j9                  de      ej6                  j9                  dd      d                      Z&eej6                  j9                  de      d               Z'd Z(ej6                  j9                  dd      ej6                  j9                  de      d               Z)ej6                  j9                  dd      ej6                  j9                  de      d               Z*ej6                  j9                  de      d        Z+ej6                  j9                  de      d        Z,ej6                  j9                  de      d        Z-ej6                  j9                  de      d         Z.d! Z/d" Z0ej6                  j9                  d#g d$      ej6                  j9                  de      d%               Z1ej6                  j9                  de      d&        Z2ej6                  j9                  d'd(      d)        Z3ej6                  j9                  d'd(      d*        Z4y)+    N)StringIO)assert_array_equal)
block_diag)psi)LatentDirichletAllocation)_dirichlet_expectation_1d_dirichlet_expectation_2d)NotFittedError)assert_allcloseassert_almost_equalassert_array_almost_equal!if_safe_multiprocessing_with_blas)CSR_CONTAINERSc                 r    d}t        j                  d|t              }|g|z  }t        | } | |      }||fS )N   )r   r   )dtype)npfullintr   )csr_containern_componentsblockblocksXs        o/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_online_lda.py_build_sparse_arrayr      sF     LGGFL4EW|#FFAaA!    r   c                     t        |       \  }}d|z  }t        |||d      }t        |d      }|j                  |      }|j                  |      }t        ||       y )Ng      ?r   )r   doc_topic_priortopic_word_priorrandom_stater   r!   )r   r   fit_transformr   )r   r   r   priorlda_1lda_2topic_distr_1topic_distr_2s           r   test_lda_default_prior_paramsr)   $   sj     *-8OL!,E%!	E &<aPE''*M''*M}5r   c                 <   t         j                  j                  d      }t        |       \  }}t	        |dd|      }|j                  |       g d}|j                  D ]<  }t        |j                         dd  d d d         }t        t        |            |v r<J  y )Nr      batch)r   evaluate_everylearning_methodr!   )r   r+      )r         )         r   randomRandomStater   r   fitcomponents_setargsorttuplesortedr   rngr   r   ldacorrect_idx_grps	componenttop_idxs           r   test_lda_fit_batchrG   6   s     ))


"C)-8OL!
#!	C GGAJ8__	i'')"#.tt45VG_%)9999 %r   c                 >   t         j                  j                  d      }t        |       \  }}t	        |ddd|      }|j                  |       g d}|j                  D ]<  }t        |j                         dd  d d d         }t        t        |            |v r<J  y )	Nr         $@r+   online)r   learning_offsetr-   r.   r!   r/   r6   r7   r8   rA   s           r   test_lda_fit_onlinerL   J   s     ))


"C)-8OL!
#! C GGAJ8__	i'')"#.tt45VG_%)9999 %r   c                 \   t         j                  j                  d      }t        |       \  }}t	        |dd|      }t        d      D ]  }|j                  |        g d}|j                  D ]<  }t        |j                         dd  d d d         }t        t        |            |v r<J  y )	Nr   rI   d   r   rK   total_samplesr!   r   r/   r6   r7   r   r9   r:   r   r   rangepartial_fitr<   r=   r>   r?   r@   	r   rB   r   r   rC   irD   crF   s	            r   test_lda_partial_fitrW   _   s     ))


"C)-8OL!
#!	C 1X  9__aiik"#&tt,-VG_%)9999 r   c                 V   t         j                  j                  d      }t        |       \  }}t	        |d|      }|j                  |j                                g d}|j                  D ]<  }t        |j                         dd  d d d         }t        t        |            |v r<J  y )Nr   r,   r   r.   r!   r/   r6   r7   )r   r9   r:   r   r   r;   toarrayr<   r=   r>   r?   r@   rA   s           r   test_lda_dense_inputr[   t   s     ))


"C)-8OL!
#!7C GGAIIK8__	i'')"#.tt45VG_%)9999 %r   c                  T   t         j                  j                  d      } | j                  dd      }d}t	        ||       }|j                  |      }|dkD  j                         sJ t        t        j                  |d	      t        j                  |j                  d                y )
Nr   r2      
   sizer   r"   g        r+   axis)r   r9   r:   randintr   r#   anyr   sumonesshape)rB   r   r   rC   X_transs        r   test_lda_transformrj      s     ))


"CAH%AL
#C
PC"GcM   bffW15rwww}}Q?O7PQr   method)rJ   r,   c                     t         j                  j                  d      }|j                  dd      }t	        d| |      }|j                  |      }|j                  |      }t        ||d       y )Nr   r_   )2   r^   r`   r2   rY   r1   )r   r9   r:   rd   r   r#   	transformr   )rk   rB   r   rC   X_fitri   s         r   test_lda_fit_transformrp      sg     ))


"CBX&A
#SC a EmmAGeWa0r   c                      t        j                  dd      } t               }d}t        j                  t
        |      5  |j                  |        d d d        y # 1 sw Y   y xY w)N)r2   r_         z^Negative values in data passedmatch)r   r   r   pytestraises
ValueErrorr;   )r   rC   regexs      r   test_lda_negative_inputry      sD    
A
#
%C.E	z	/
 
0	/	/s   AA"c                     t         j                  j                  d      } | j                  dd      }t	               }d}t        j                  t        |      5  |j                  |       d d d        y # 1 sw Y   y xY w)Nr   r1   r]   r`   z}This LatentDirichletAllocation instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rs   )	r   r9   r:   rd   r   ru   rv   r
   
perplexity)rB   r   rC   rx   s       r   test_lda_no_component_errorr|      sb    
))


"CAH%A
#
%C	 

 
~U	3q 
4	3	3s   A55A>c                 >   t        |      \  }}t        j                  j                  d      }t	        |d| d|      }|j                  |       g d}|j                  D ]<  }t        |j                         dd  d d d         }t        t        |            |v r<J  y )Nr   r0   r+   )r   n_jobsr.   r-   r!   r/   r6   r7   )r   r   r9   r:   r   r;   r<   r=   r>   r?   r@   )	rk   r   r   r   rB   rC   rD   rV   rF   s	            r   test_lda_multi_jobsr      s     *-8OL!
))


"C
#!C GGAJ8__aiik"#&tt,-VG_%)9999 r   c                 ^   t         j                  j                  d      }t        |       \  }}t	        |ddd|      }t        d      D ]  }|j                  |        g d}|j                  D ]<  }t        |j                         dd  d d d         }t        t        |            |v r<J  y )	Nr   r0         @   )r   r~   rK   rP   r!   r/   r6   r7   rQ   rT   s	            r   test_lda_partial_fit_multi_jobsr      s     ))


"C)-8OL!
#!C 1X  9__aiik"#&tt,-VG_%)9999 r   c                  x   t         j                  j                  d      } | j                  dd      }| j                  dd      }t         j                  j                  d|df      }t	        |dd| 	      }|j                  |       | j                  d|d
z   |f      }t        j                  t        d      5  |j                  ||       d d d        | j                  d||d
z   f      }t        j                  t        d      5  |j                  ||       d d d        y # 1 sw Y   XxY w# 1 sw Y   y xY w)Nr   r   r3   r_   r1   r`   r   r^   rO   r+   zNumber of samplesrs   zNumber of topics)
r   r9   r:   rd   r   r;   ru   rv   rw   _perplexity_precomp_distr)rB   r   	n_samplesr   rC   invalid_n_samplesinvalid_n_componentss          r   test_lda_preplexity_mismatchr      s   
))


"C;;q!$LAr"I
		!9b/2A
#!	C GGAJAY]L,IJ	z)=	>%%a):; 
? ;;q	<!;K/L;M	z)<	=%%a)=> 
>	=	 
?	> 
>	=s   :D$D0$D-0D9c                 X   t        |      \  }}t        |d| dd      }t        |d| dd      }|j                  |       |j                  |d      }|j                  |       |j                  |d      }||k\  sJ |j                  |d      }|j                  |d      }	||	k\  sJ y )	Nr+   rN   r   r   max_iterr.   rP   r!   r_   Fsub_samplingT)r   r   r;   r{   )
rk   r   r   r   r%   r&   perp_1perp_2perp_1_subsamplingperp_2_subsamplings
             r   test_lda_perplexityr      s    
 *-8OL!%!E &!E 
IIaLae4F	IIaLae4FV))!$)?))!$)?!3333r   c                     t        |      \  }}t        |d| dd      }t        |d| dd      }|j                  |       |j                  |      }|j                  |       |j                  |      }||k\  sJ y )Nr+   rN   r   r   r_   )r   r   r#   score)rk   r   r   r   r%   r&   score_1score_2s           r   test_lda_scorer     s    
 *-8OL!%!E &!E 
kk!nG	kk!nGgr   c                     t        |       \  }}t        |dddd      }|j                  |       |j                  |      }|j                  |j	                               }t        ||       y )Nr+   r,   rN   r   r   )r   r   r;   r{   rZ   r   )r   r   r   rC   r   r   s         r   test_perplexity_input_formatr   5  sd     *-8OL!
#!C GGAJ^^AF^^AIIK(F'r   c                 .   t        |       \  }}t        |dd      }|j                  |       |j                  |d      }|j	                  |      }t        j                  d|t        j                  |j                        z  z        }t        ||       y )Nr_   r   )r   r   r!   Fr   rr   )
r   r   r;   r{   r   r   exprf   datar   )r   r   r   rC   perplexity_1r   perplexity_2s          r   test_lda_score_perplexityr   G  s}     *-8OL!
#!BQC GGAJ>>!%>8LIIaLE66$%"&&."89:Ll3r   c                     t        |       \  }}t        |dddd      }|j                  |       |j                  }|j	                  |      }t        ||       y )Nr+   r,   r   )r   r   r.   r!   r-   )r   r   r;   bound_r{   r   )r   r   r   rC   perplexity1perplexity2s         r   test_lda_fit_perplexityr   V  s]     *-8OL!
#!C GGAJ **K ..#K[1r   c                 "   t        j                  d      }| | |      fD ]m  }t        d      j                  |      }t	        |j
                  j                  d      t        j                  |j
                  j                  d                o y)z+Test LDA on empty document (all-zero rows).)r2   r1   i  )r   r   rb   r+   N)	r   zerosr   r;   r   r<   rf   rg   rh   )r   Zr   rC   s       r   test_lda_empty_docsr   m  sr     	Aq!"'599!<OOQ'1F1Fq1I)J	
 #r   c                     t        j                  ddd      } t        j                  |       }t        | d|       t	        |t        j
                  t        |       t        t        j                  |             z
        d       | j                  dd      } t	        t        |       t        |       t        t        j                  | d	      d
d
t         j                  f         z
  dd       y
)z9Test Cython version of Dirichlet expectation calculation.ir_   i'  r   gҶOɃ;)atolrN   r+   rb   Ngdy=gA:)>)rtolr   )r   logspace
empty_liker   r   r   r   rf   reshaper	   newaxis)xexpectations     r   test_dirichlet_expectationr   x  s    
D"e$A--"KaK0KARVVAY(?!@uM			#sA!!$ARVVAA&q"**}566	r   c                    t        |      \  }}t        |dd| |d      }t               }t        j                  |c}	t        _        	 |j                  |       |	t        _        |j                         j                  d      }
|j                         j                  d      }||
k(  sJ ||k(  sJ y # |	t        _        w xY w)Nr   r,   r   )r   r   r.   verboser-   r!   
r{   )r   r   r   sysstdoutr;   getvaluecount)r   r-   expected_linesexpected_perplexitiesr   r   r   rC   outold_outn_linesn_perplexitys               r   check_verbosityr     s     *-8OL!
#!%C *C**cGSZ

lln""4(G<<>''5LW$$$ L000 
s   B0 0B=z;verbose,evaluate_every,expected_lines,expected_perplexities))Fr+   r   r   )Fr   r   r   )Tr   r   r   )Tr+   r   r   )Tr0   r   r+   c                 "    t        | ||||       y )N)r   )r   r-   r   r   r   s        r   test_verbosityr     s     1Fr   c                     t        |       \  }}t        |      j                  |      }|j                         }t	        t        |      D cg c]  }d| 	 c}|       yc c}w )z6Check feature names out for LatentDirichletAllocation.)r   latentdirichletallocationN)r   r   r;   get_feature_names_outr   rR   )r   r   r   rC   namesrU   s         r   test_lda_feature_names_outr     sc     *-8OL!
#
>
B
B1
EC%%'E272EF2EQ$QC	(2EFFs   A!r.   )r,   rJ   c                 2   t         j                  j                  d      }|j                  d      j	                  |d      }t        dd|       }|j                  |       |j                  j                  |k(  sJ |j                  j                  |k(  sJ y)	z2Check data type preservation of fitted attributes.r   r]   r`   F)copyr2   r   r!   r.   N)
r   r9   r:   uniformastyper   r;   r<   r   exp_dirichlet_component_)r.   global_dtyperB   r   rC   s        r   test_lda_dtype_matchr     s     ))


"C")),U)CA
#QC GGAJ??  L000''--===r   c                    t         j                  j                  |      }|j                  d      }|j	                  t         j
                        }t        d||       j                  |      }t        d||       j                  |      }t        |j                  |j                         t        |j                  |      |j                  |             y)z>Check numerical consistency between np.float32 and np.float64.r]   r`   r2   r   N)r   r9   r:   r   r   float32r   r;   r   r<   rn   )r.   global_random_seedrB   X64X32lda_64lda_32s          r   test_lda_numerical_consistencyr     s     ))

 2
3C
++8+
$C
**RZZ
 C&%7	c#h  '%7	c#h  F&&(:(:;F$$S)6+;+;C+@Ar   )5r   ior   numpyr   ru   numpy.testingr   scipy.linalgr   scipy.specialr   sklearn.decompositionr   &sklearn.decomposition._online_lda_fastr   r	   sklearn.exceptionsr
   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   markparametrizer)   rG   rL   rW   r[   rj   rp   ry   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   <module>r      s|   
    , #  ; .  / .96 :6" .9: ::& .9: ::( .9: ::( .9: :: 	R #67
1 8
1 #.9#67: 8 : #:& #.9: : #:(?. #67.94 : 84< #67.9 : 84 .9( :(" .94 :4 .92 :2, .9
 :
 12 A	 .9 :	 .9 : *,?@
> A
> *,?@B ABr   