
    {Kg>6                        d Z ddlZddlZddlZddlmZ ddlmZmZ ddl	m
Z
mZ ddlZddlZddlmZmZ ddlmZ dd	lmZmZmZ d
dlmZ d
dlmZmZmZmZ  eddd      Z eddd      Z  ejB                  e"      Z# e eh d      dge$ejJ                  dgdgdgdgdgdgdg eed
dd      g eeddd      gd
d      dddddddddd d
d!       Z&	 d$d"Z'd# Z(y)%zKDDCUP 99 dataset.

A classic dataset for anomaly detection.

The dataset page is available from UCI Machine Learning Repository

https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz

    N)GzipFile)IntegralReal)existsjoin   )Bunchcheck_random_state)shuffle)Interval
StrOptionsvalidate_params   )get_data_home)RemoteFileMetadata_convert_data_dataframe_fetch_remote
load_descrkddcup99_dataz.https://ndownloader.figshare.com/files/5976045@3b6c942aa0356c0ca35b7b595a26c89d343652c9db428893e7494f837b274292)filenameurlchecksumkddcup99_10_dataz.https://ndownloader.figshare.com/files/5976042@8045aca0d84e70e622d1148d7df782496f6333bf6eb979a1b0837c42a9fd9561>   SASFhttpsmtpbooleanrandom_stateleft)closedg        neither)
subset	data_homer   r!   	percent10download_if_missing
return_X_yas_frame	n_retriesdelayT)prefer_skip_nested_validationF         ?c        
         N   t        |      }t        |||||	      }
|
j                  }|
j                  }|
j                  }|
j
                  }| dk(  r|dk(  }t        j                  |      }||ddf   }||   }||ddf   }||   }|j                  d   }t        |      }|j                  d|d      }||   }||   }t        j                  ||f   }t        j                  ||f   }| dk(  s| d	k(  s| d
k(  r|dddf   dk(  }t        j                  ||ddf   ||ddf   f   }|dd |dd z   }||   }t        j                  |dddf   dz   j                  t        d            |dddf<   t        j                  |dddf   dz   j                  t        d            |dddf<   t        j                  |dddf   dz   j                  t        d            |dddf<   | d	k(  rO|dddf   dk(  }||   }||   }t        j                  |dddf   |dddf   |dddf   f   }|d   |d   |d   g}| d
k(  rO|dddf   dk(  }||   }||   }t        j                  |dddf   |dddf   |dddf   f   }|d   |d   |d   g}| dk(  rEt        j                  |dddf   |dddf   |dddf   |dddf   f   }|d   |d   |d   |d   g}|rt!        |||      \  }}t#        d      }d}|rt%        d||||      \  }}}|r||fS t'        ||||||      S )a  Load the kddcup99 dataset (classification).

    Download it if necessary.

    =================   ====================================
    Classes                                               23
    Samples total                                    4898431
    Dimensionality                                        41
    Features            discrete (int) or continuous (float)
    =================   ====================================

    Read more in the :ref:`User Guide <kddcup99_dataset>`.

    .. versionadded:: 0.18

    Parameters
    ----------
    subset : {'SA', 'SF', 'http', 'smtp'}, default=None
        To return the corresponding classical subsets of kddcup 99.
        If None, return the entire kddcup 99 dataset.

    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

        .. versionadded:: 0.19

    shuffle : bool, default=False
        Whether to shuffle dataset.

    random_state : int, RandomState instance or None, default=None
        Determines random number generation for dataset shuffling and for
        selection of abnormal samples if `subset='SA'`. Pass an int for
        reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    percent10 : bool, default=True
        Whether to load only 10 percent of the data.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : bool, default=False
        If True, returns ``(data, target)`` instead of a Bunch object. See
        below for more information about the `data` and `target` object.

        .. versionadded:: 0.20

    as_frame : bool, default=False
        If `True`, returns a pandas Dataframe for the ``data`` and ``target``
        objects in the `Bunch` returned object; `Bunch` return object will also
        have a ``frame`` member.

        .. versionadded:: 0.24

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : {ndarray, dataframe} of shape (494021, 41)
            The data matrix to learn. If `as_frame=True`, `data` will be a
            pandas DataFrame.
        target : {ndarray, series} of shape (494021,)
            The regression target for each sample. If `as_frame=True`, `target`
            will be a pandas Series.
        frame : dataframe of shape (494021, 42)
            Only present when `as_frame=True`. Contains `data` and `target`.
        DESCR : str
            The full description of the dataset.
        feature_names : list
            The names of the dataset columns
        target_names: list
            The names of the target columns

    (data, target) : tuple if ``return_X_y`` is True
        A tuple of two ndarray. The first containing a 2D array of
        shape (n_samples, n_features) with each row representing one
        sample and each column representing the features. The second
        ndarray of shape (n_samples,) containing the target samples.

        .. versionadded:: 0.20
    r&   )r&   r'   r(   r+   r,   r   s   normal.Nr   i1  r   r   r      r      g?F)copy      r   s   https   smtp)r!   zkddcup99.rstfetch_kddcup99)datatargetframetarget_namesfeature_namesDESCR)r   _fetch_brute_kddcup99r8   r9   r<   r;   nplogical_notshaper
   randintr_c_logastypefloatshuffle_methodr   r   r	   )r%   r&   r   r!   r'   r(   r)   r*   r+   r,   kddcup99r8   r9   r<   r;   stnormal_samplesnormal_targetsabnormal_samplesabnormal_targetsn_samples_abnormalrfdescrr:   s                            ^/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/datasets/_kddcup99.pyr7   r7   3   s   t 	2I$/H ==D__F**M((L~j NN1ad1:!!9-33A6),7  $6=+A.+A.uu^%556~'778~6)Vv-=BK1uuT!SbS&\423</0%cr*]23-??VVT!Q$Z#-55e%5HIQT
VVT!Q$Z#-55e%5HIQT
VVT!Q$Z#-55e%5HIQT
VQT
g%A7DAYF55adT!Q$Zad;<D*1-}Q/?qAQRMVQT
g%A7DAYF55adT!Q$Zad;<D*1-}Q/?qAQRMT>55adT!Q$ZadT!Q$ZGHDa a a a 	M %dFNf'FE5dFM<
tV V|!#     c                    t        |       } d}|rt        | d|z         }t        }nt        | d|z         }t        }t        |d      }t        |d      }	t	        |      }
g dt
        fdd	d
dt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt
        fdt        fd t        fd!t        fd"t        fd#t        fd$t        fd%t        fd&t
        fd't
        fd(t        fd)t        fd*t        fd+t        fd,t        fd-t        fd.t        fd/t        fd0}|D cg c]  }|d1   	 }}|d2   }|d3d2 }|
r-	 t        j                  |      }t        j                  |	      }n|rt        |       t        j                  d6|j                  z         t!        ||||7       t#        j$                  |      }t        j'                  d8       t        ||j(                        }t+        |d9:      }g }|j-                         D ]B  }|j/                         }|j1                  |j3                  d;d<      j5                  d=             D |j7                          t        j'                  d>       t9        j:                  |       t#        j<                  |t>        ?      }tA        d@      D ]$  }|d3d3|f   jC                  ||         |d3d3|f<   & |d3d3d3d2f   }|d3d3d2f   }t        jD                  ||d1A       t        jD                  ||	d1A       nt        dB      tG        ||||gC      S c c}w # t        $ r}t        d4t        |       d5      |d3}~ww xY w)Da5  Load the kddcup99 dataset, downloading it if necessary.

    Parameters
    ----------
    data_home : str, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    percent10 : bool, default=True
        Whether to load only 10 percent of the data.

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

    delay : float, default=1.0
        Number of seconds between retries.

    Returns
    -------
    dataset : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : ndarray of shape (494021, 41)
            Each row corresponds to the 41 features in the dataset.
        target : ndarray of shape (494021,)
            Each value corresponds to one of the 21 attack types or to the
            label 'normal.'.
        feature_names : list
            The names of the dataset columns
        target_names: list
            The names of the target columns
        DESCR : str
            Description of the kddcup99 dataset.

    r1   z-py3kddcup99_10rI   samplestargetsduration)protocol_typeS4)serviceS11)flagS6	src_bytes	dst_byteslandwrong_fragmenturgenthotnum_failed_logins	logged_innum_compromised
root_shellsu_attemptednum_rootnum_file_creations
num_shellsnum_access_filesnum_outbound_cmdsis_host_loginis_guest_logincount	srv_countserror_ratesrv_serror_ratererror_ratesrv_rerror_ratesame_srv_ratediff_srv_ratesrv_diff_host_ratedst_host_countdst_host_srv_countdst_host_same_srv_ratedst_host_diff_srv_ratedst_host_same_src_port_ratedst_host_srv_diff_host_ratedst_host_serror_ratedst_host_srv_serror_ratedst_host_rerror_ratedst_host_srv_rerror_rate)labelsS16r   Nz7The cache for fetch_kddcup99 is invalid, please delete z! and run the fetch_kddcup99 againzDownloading %s)dirnamer+   r,   zextracting archiverQ   )r   mode
 ,zextraction done)dtype*   )compressz1Data not found and `download_if_missing` is False)r8   r9   r<   r;   )$r   r   ARCHIVE_10_PERCENTARCHIVEr   intrG   joblibload	ExceptionOSErrorstr_mkdirploggerinfor   r   r?   r   debugr   r   	readlinesdecodeappendreplacesplitcloseosremoveasarrayobjectrangerF   dumpr	   )r&   r(   r'   r+   r,   
dir_suffix
kddcup_dirarchivesamples_pathtargets_path	availabledtccolumn_namesr;   r<   XyeDTarchive_pathfile_Xylinejs                            rS   r>   r>     s   V 	2IJ)]Z%?@
$)Z*%<=

I.L
I.L|$I+
	S+
+
 	+
 		+

 
c+
 
c+
 
+
 
3+
 
3+
 
+
 
c"+
 
c+
 
C +
 
s+
 
+
  
S!+
" 
s##+
$ 
s%+
& 
S!'+
( 
c")+
* 
#++
, 
3-+
. 
#/+
0 
c1+
2 
3+
4 
E"5+
6 
7+
8 
E"9+
: 
% ;+
< 
% =+
> 
u%?+
@ 
3A+
B 
s#C+
D 
"5)E+
F 
"5)G+
H 
'.I+
J 
'.K+
L 
 'M+
N 
$U+O+
P 
 'Q+
R 
$U+S+
T 	U+
BZ #%%"QAaD"L%#L "%M	L)AL)A 

$w{{23gzYeTXXb\)*J(8(89,S9OO%D;;=DIIdll4,22378 & 	&'
		,ZZ&)rA!Q$xr!u-Bq!tH  q#2#vJq"uI
 	A|a0A|a0IJJ#"^	 [ &  	Iz?##DF 	s   N.8*N3 3	O<OOc                     	 t        j                  |        y# t        $ r(}|j                  t        j                  k7  r Y d}~yd}~ww xY w)zgEnsure directory d exists (like mkdir -p on Unix)
    No guarantee that the directory is writable.
    N)r   makedirsr   errnoEEXIST)dr   s     rS   r   r     s:    
A 77ell" #s    	A	AA	)NTTr.   r/   ))__doc__r   loggingr   gzipr   numbersr   r   os.pathr   r   r   numpyr?   utilsr	   r
   r   rH   utils._param_validationr   r   r   r   r   _baser   r   r   r   r   r   	getLogger__name__r   r   PathLiker7   r>   r    rT   rS   <module>r      s2     	  "     - - K K   8O (8O  
		8	$ :;TB2;;-;'([ ){ kKxD@A4d9=> #'" 
BBL RUXvrT   