
    {Kg0&                        d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
 ddlmZ ddlZddlZddlmZ dd	lmZmZ d
dlmZ d
dlmZmZmZ  eddd      Z eddd      ZdZ ej<                  e      Z dejB                  fdZ"d Z#d Z$ ee%edgdg eed
dd      g eeddd      gdd       ddd!d"dd#       Z&y)$a  
=============================
Species distribution dataset
=============================

This dataset represents the geographic distribution of species.
The dataset is provided by Phillips et. al. (2006).

The two species are:

 - `"Bradypus variegatus"
   <http://www.iucnredlist.org/details/3038/0>`_ ,
   the Brown-throated Sloth.

 - `"Microryzomys minutus"
   <http://www.iucnredlist.org/details/13408/0>`_ ,
   also known as the Forest Small Rice Rat, a rodent that lives in Peru,
   Colombia, Ecuador, Peru, and Venezuela.

References
----------

`"Maximum entropy modeling of species geographic distributions"
<http://rob.schapire.net/papers/ecolmod.pdf>`_ S. J. Phillips,
R. P. Anderson, R. E. Schapire - Ecological Modelling, 190:231-259, 2006.

Notes
-----

For an example of using this dataset, see
:ref:`examples/applications/plot_species_distribution_modeling.py
<sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.
    N)BytesIO)IntegralReal)PathLikemakedirsremove)exists   )Bunch)Intervalvalidate_params   )get_data_home)RemoteFileMetadata_fetch_remote_pkl_filepathzsamples.zipz.https://ndownloader.figshare.com/files/5976075@abb07ad284ac50d9e6d20f1c4211e0fd3c098f7f85955e89d321ee8efe37ac28)filenameurlchecksumzcoverages.zipz.https://ndownloader.figshare.com/files/5976078@4d862674d72e79d6cee77e63b98651ec7926043ba7d39dcb31329cf3f6073807zspecies_coverage.pkz   c                    t        |      D cg c]  }| j                          }}d }t        |D cg c]
  } ||       c}      }t        j                  | |      }t        |d         }|dk7  rd||<   |S c c}w c c}w )zjLoad a coverage file from an open file object.

    This will return a numpy array of the given dtype
    c                 `    | j                         d   t        | j                         d         fS )Nr   r   )splitfloat)ts    k/home/alanp/www/video.onchill/myenv/lib/python3.12/site-packages/sklearn/datasets/_species_distributions.py<lambda>z _load_coverage.<locals>.<lambda>R   s"    AGGIaL%	!*=>    dtypes   NODATA_valuei)rangereadlinedictnploadtxtint)	Fheader_lengthr"   _header
make_tuplelineMnodatas	            r   _load_coverager1   L   s    
 %*-$89$8qajjl$8F9>J7:d#78F


1E"A()F&	H :7s
   A:A?c                     | j                         j                  d      j                         j                  d      }t	        j
                  | ddd      }||j                  _        |S )zLoad csv file.

    Parameters
    ----------
    F : file object
        CSV file open in byte mode.

    Returns
    -------
    rec : np.ndarray
        record array representing the data
    ascii,r   z	S22,f4,f4)skiprows	delimiterr"   )r$   decodestripr   r&   r'   r"   names)r)   r9   recs      r   	_load_csvr;   \   sR     JJL(..066s;E
**Qc
ECCIIOJr    c                 b   | j                   | j                  z   }|| j                  | j                  z  z   }| j                  | j                  z   }|| j                  | j                  z  z   }t        j                  ||| j                        }t        j                  ||| j                        }||fS )a%  Construct the map grid from the batch object

    Parameters
    ----------
    batch : Batch object
        The object returned by :func:`fetch_species_distributions`

    Returns
    -------
    (xgrid, ygrid) : 1-D arrays
        The grid corresponding to the values in batch.coverages
    )x_left_lower_corner	grid_sizeNxy_left_lower_cornerNyr&   arange)batchxminxmaxyminymaxxgridygrids          r   construct_gridsrJ   p   s     $$u6D588eoo-.D$$u6D588eoo-.D IIdD%//2EIIdD%//2E5>r    booleanleft)closedg        neither)	data_homedownload_if_missing	n_retriesdelayT)prefer_skip_nested_validation   g      ?c                    t        |       } t        |       st        |        t        ddddd      }t        j
                  }t        | t              }t        |      s|st        d      t        j                  dt        j                  d	|        t        t        | ||
      }t	        j                  |      5 }|j                  D ]/  }	t!        ||	         }
d|	v rt#        |
      }d|	v s%t#        |
      }1 	 ddd       t%        |       t        j                  dt&        j                  d	|        t        t&        | ||
      }t	        j                  |      5 }g }|j                  D ]N  }	t!        ||	         }
t        j)                  dj+                  |	             |j-                  t/        |
             P t	        j0                  ||      }ddd       t%        |       t3        dd|}t5        j6                  ||d       |S t5        j                  |      }|S # 1 sw Y   0xY w# 1 sw Y   axY w)au  Loader for species distribution dataset from Phillips et. al. (2006).

    Read more in the :ref:`User Guide <species_distribution_dataset>`.

    Parameters
    ----------
    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        coverages : array, shape = [14, 1592, 1212]
            These represent the 14 features measured
            at each point of the map grid.
            The latitude/longitude values for the grid are discussed below.
            Missing data is represented by the value -9999.
        train : record array, shape = (1624,)
            The training points for the data.  Each point has three fields:

            - train['species'] is the species name
            - train['dd long'] is the longitude, in degrees
            - train['dd lat'] is the latitude, in degrees
        test : record array, shape = (620,)
            The test points for the data.  Same format as the training data.
        Nx, Ny : integers
            The number of longitudes (x) and latitudes (y) in the grid
        x_left_lower_corner, y_left_lower_corner : floats
            The (x,y) position of the lower-left corner, in degrees
        grid_size : float
            The spacing between points of the grid, in degrees

    Notes
    -----

    This dataset represents the geographic distribution of species.
    The dataset is provided by Phillips et. al. (2006).

    The two species are:

    - `"Bradypus variegatus"
      <http://www.iucnredlist.org/details/3038/0>`_ ,
      the Brown-throated Sloth.

    - `"Microryzomys minutus"
      <http://www.iucnredlist.org/details/13408/0>`_ ,
      also known as the Forest Small Rice Rat, a rodent that lives in Peru,
      Colombia, Ecuador, Peru, and Venezuela.

    - For an example of using this dataset with scikit-learn, see
      :ref:`examples/applications/plot_species_distribution_modeling.py
      <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.

    References
    ----------

    * `"Maximum entropy modeling of species geographic distributions"
      <http://rob.schapire.net/papers/ecolmod.pdf>`_
      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
      190:231-259, 2006.

    Examples
    --------
    >>> from sklearn.datasets import fetch_species_distributions
    >>> species = fetch_species_distributions()
    >>> species.train[:5]
    array([(b'microryzomys_minutus', -64.7   , -17.85  ),
           (b'microryzomys_minutus', -67.8333, -16.3333),
           (b'microryzomys_minutus', -67.8833, -16.3   ),
           (b'microryzomys_minutus', -67.8   , -16.2667),
           (b'microryzomys_minutus', -67.9833, -15.9   )],
          dtype=[('species', 'S22'), ('dd long', '<f4'), ('dd lat', '<f4')])
    g33333Wi  gfffffLi8  g?)r=   r?   r@   rA   r>   z1Data not found and `download_if_missing` is FalsezDownloading species data from z to )dirnamerQ   rR   traintestNzDownloading coverage data from z - converting {}r!   )	coveragesrX   rW   	   )compress )r   r	   r   r%   r&   int16r   DATA_ARCHIVE_NAMEOSErrorloggerinfoSAMPLESr   r   loadfilesr   r;   r   	COVERAGESdebugformatappendr1   asarrayr   joblibdump)rO   rP   rQ   rR   extra_paramsr"   archive_pathsamples_pathXffhandlerW   rX   coverages_pathrY   bunchs                   r   fetch_species_distributionsrt      s   T i(I)
 !"L HHE ,=>L,"MNNYWX$Y)5
 WW\"aWW!!A$-a<%g.EQ;$W-D  # 	|9B	R	
 'yIU
 WW^$IWW!!A$-/66q9:  !89  

9E:I % 	~R	ER\RE<!4 L L)L; #" %$s   <1H*.H* A7H7*H47I )'__doc__loggingior   numbersr   r   osr   r   r   os.pathr	   rj   numpyr&   utilsr   utils._param_validationr   r    r   _baser   r   r   rb   re   r^   	getLogger__name__r`   r]   r1   r;   rJ   strrt   r\   r    r   <module>r      s    N   " ) )     ?  C C 8O 8O	 +  
		8	$ %&RXX  (6 8T* ){xD@A4d9=>	 #' 
WWr    