
    ,YHh                     x    d dl Zd dlZddi dfdedededed	ed
ej                  fdZdded
ej                  fdZ	y)    NlatestFTnameversionrawkwargs_read_csvverbosereturnc                 	   |dk(  rdn| }i dd| ddddd	d
ddddd| ddddd	d
ddddd| ddddd	d
ddddd| dddddd
ddddd| dddd d!d
d"d#dd$d| d%ddd d!d
d&d#dd'd| d(dddd)d
d*d+dd,d| d-dd.d d!d
d/d0dd1d| d2dd.d d!d
d3d0dd4d| d5dd.dd6d
d7d8dd9d| d:dd;d<dd
d=d>dd?d| d@ddddd
dAdBddCd| dDddEdFdGd
dHdIddJd| dKddddd
dLdMddNd| dOddddd
dPdQddRd| dSdTdUd ddVdWdXddYd| dZdTdUd d!dVd[dXdd\dTdUdddVd]d^dd_dTdUdddVd`daddbddUddd
dcdaddddTdUd d!dVdedaddfddUd d!d
dcdadg dgdTdUdddVdhdiddj| dkddd dld
dmdndd| doddd dld
dmdndd| dpddd dld
dqdnddj| drdddd	d
dsdtddj| duddddd
dvdwddx}| |j                         vr(t        dy|  dzt        |j                                      ||    d{   }||    d|   }t        |t              sl|j	                  d}      r|j                  d~      d   }d|z   }|d
k(  r!	 ||    d   }	t        j                  |fd|	i|}
|dVk(  r	 t        j                  |      }
nm	 g }
|D ]=  }d|j                  d~      d   z   }|
j                  t        j                  |             ? 	 t        j                  |
d      j                  d      }
|s	 ||    d   }||    d   }|d!k(  rt        j                  dk\  rd}||    d   }
j                  |      }
t        j                   |
j"                  |      |
_        |
j%                  |      }
|
j'                         }
|rt)        |        t)        dt+        |       z         t-        j.                  ||    d   d      }t-        j.                  ||    d   d      }t)        |       t)        |       t)        d
j0                          
S # t        $ r#}t        d|  d| dt        |       d      dT}~ww xY w# t        $ r#}t        d|  d| dt        |       d      dT}~ww xY w# t        $ r#}t        d|  d| dt        |       d      dT}~ww xY w#  Y  xY w)a  
    Fetch a dataset from the skforecast-datasets repository.

    Parameters
    ----------
    name: str
        Name of the dataset to fetch.
    version: str, int, default `'latest'`
        Version of the dataset to fetch. If 'latest', the latest version will be 
        fetched (the one in the main branch). For a list of available versions, 
        see the repository branches.
    raw: bool, default `False`
        If True, the raw dataset is fetched. If False, the preprocessed dataset 
        is fetched. The preprocessing consists of setting the column with the 
        date/time as index and converting the index to datetime. A frequency is 
        also set to the index.
    kwargs_read_csv: dict, default `{}`
        Kwargs to pass to pandas `read_csv` function.
    verbose: bool, default `True`
        If True, print information about the dataset.
    
    Returns
    -------
    df: pandas DataFrame
        Dataset.
    
    r   mainh2oAhttps://raw.githubusercontent.com/skforecast/skforecast-datasets//data/h2o.csv,fecha%Y-%m-%dMScsvzpMonthly expenditure ($AUD) on corticosteroid drugs that the Australian health system had between 1991 and 2008. zHyndman R (2023). fpp3: Data for Forecasting: Principles and Practice(3rd Edition). http://pkg.robjhyndman.com/fpp3package/,https://github.com/robjhyndman/fpp3package, http://OTexts.com/fpp3.)urlsep	index_coldate_formatfreq	file_typedescriptionsourceh2o_exogz/data/h2o_exog.csvzMonthly expenditure ($AUD) on corticosteroid drugs that the Australian health system had between 1991 and 2008. Two additional variables (exog_1, exog_2) are simulated.zHyndman R (2023). fpp3: Data for Forecasting: Principles and Practice (3rd Edition). http://pkg.robjhyndman.com/fpp3package/, https://github.com/robjhyndman/fpp3package, http://OTexts.com/fpp3.fuel_consumptionz'/data/consumos-combustibles-mensual.csvFechaz@Monthly fuel consumption in Spain from 1969-01-01 to 2022-08-01.u   Obtained from Corporación de Reservas Estratégicas de Productos Petrolíferos and Corporación de Derecho Público tutelada por el Ministerio para la Transición Ecológica y el Reto Demográfico. https://www.cores.es/es/estadisticasitems_salesz/data/simulated_items_sales.csvdateDz9Simulated time series for the sales of 3 different items.zSimulated data.air_quality_valenciaz/data/air_quality_valencia.csvdatetimez%Y-%m-%d %H:%M:%SHu+  Hourly measures of several air chemical pollutant at Valencia city (Avd. Francia) from 2019-01-01 to 20213-12-31. Including the following variables: pm2.5 (µg/m³), CO (mg/m³), NO (µg/m³), NO2 (µg/m³), PM10 (µg/m³), NOx (µg/m³), O3 (µg/m³), Veloc. (m/s), Direc. (degrees), SO2 (µg/m³).u   Red de Vigilancia y Control de la Contaminación Atmosférica, 46250047-València - Av. França, https://mediambient.gva.es/es/web/calidad-ambiental/datos-historicos.air_quality_valencia_no_missingz)/data/air_quality_valencia_no_missing.csvuh  Hourly measures of several air chemical pollutant at Valencia city (Avd. Francia) from 2019-01-01 to 20213-12-31. Including the following variables: pm2.5 (µg/m³), CO (mg/m³), NO (µg/m³), NO2 (µg/m³), PM10 (µg/m³), NOx (µg/m³), O3 (µg/m³), Veloc. (m/s), Direc. (degrees), SO2 (µg/m³). Missing values have been imputed using linear interpolation.website_visitsz,/data/visitas_por_dia_web_cienciadedatos.csv1Dz\Daily visits to the cienciadedatos.net website registered with the google analytics service.zdAmat Rodrigo, J. (2021). cienciadedatos.net (1.0.0). Zenodo. https://doi.org/10.5281/zenodo.10006330bike_sharingz$/data/bike_sharing_dataset_clean.csv	date_timezHourly usage of the bike share system in the city of Washington D.C. during the years 2011 and 2012. In addition to the number of users per hour, information about weather conditions and holidays is available.znFanaee-T,Hadi. (2013). Bike Sharing Dataset. UCI Machine Learning Repository. https://doi.org/10.24432/C5W894.bike_sharing_extended_featuresz(/data/bike_sharing_extended_features.csva   Hourly usage of the bike share system in the city of Washington D.C. during the years 2011 and 2012. In addition to the number of users per hour, the dataset was enriched by introducing supplementary features. Addition includes calendar-based variables (day of the week, hour of the day, month, etc.), indicators for sunlight, incorporation of rolling temperature averages, and the creation of polynomial features generated from variable pairs. All cyclic variables are encoded using sine and cosine functions to ensure accurate representation.australia_tourismz/data/australia_tourism.csvQa%  Quarterly overnight trips (in thousands) from 1998 Q1 to 2016 Q4 across Australia. The tourism regions are formed through the aggregation of Statistical Local Areas (SLAs) which are defined by the various State and Territory tourism authorities according to their research and marketing needs.zWang, E, D Cook, and RJ Hyndman (2020). A new tidy data structure to support exploration and modeling of temporal data, Journal of Computational and Graphical Statistics, 29:3, 466-478, doi:10.1080/10618600.2019.1695624.uk_daily_flightsz/data/uk_daily_flights.csvDatez%d/%m/%Yz<Daily number of flights in UK from 02/01/2019 to 23/01/2022.a  Experimental statistics published as part of the Economic activity and social change in the UK, real-time indicators release, Published 27 January 2022. Daily flight numbers are available in the dashboard provided by the European Organisation for the Safety of Air Navigation (EUROCONTROL). https://www.ons.gov.uk/economy/economicoutputandproductivity/output/bulletins/economicactivityandsocialchangeintheukrealtimeindicators/latestwikipedia_visitsz/data/wikipedia_visits.csvzsLog daily page views for the Wikipedia page for Peyton Manning. Scraped data using the Wikipediatrend package in R.zXhttps://github.com/facebook/prophet/blob/main/examples/example_wp_log_peyton_manning.csvvic_electricityz/data/vic_electricity.csvTimez%Y-%m-%dT%H:%M:%SZ30minz6Half-hourly electricity demand for Victoria, AustraliazO'Hara-Wild M, Hyndman R, Wang E, Godahewa R (2022).tsibbledata: Diverse Datasets for 'tsibble'. https://tsibbledata.tidyverts.org/, https://github.com/tidyverts/tsibbledata/. https://tsibbledata.tidyverts.org/reference/vic_elec.htmlstore_salesz/data/store_sales.csvzrThis dataset contains 913,000 sales transactions from 2013-01-01 to 2017-12-31 for 50 products (SKU) in 10 stores.zThe original data was obtained from: inversion. (2018). Store Item Demand Forecasting Challenge. Kaggle. https://kaggle.com/competitions/demand-forecasting-kernels-onlybicimadz/data/bicimad_users.csvzThis dataset contains the daily users of the bicycle rental service (BiciMad) in the city of Madrid (Spain) from 2014-06-23 to 2022-09-30.zThe original data was obtained from: Portal de datos abiertos del Ayuntamiento de Madrid https://datos.madrid.es/portal/site/egobm4_dailyz/data/m4_daily.parquetN	timestampparquetz9Time series with daily frequency from the M4 competition.a  Monash Time Series Forecasting Repository  (https://zenodo.org/communities/forecasting) Godahewa, R., Bergmeir, C., Webb, G. I., Hyndman, R. J., & Montero-Manso, P. (2021). Monash Time Series Forecasting Archive. In Neural Information Processing Systems Track on Datasets and Benchmarks. 
Raw data, available in .tsf format, has been converted to Pandas format using the code provided by the authors in https://github.com/rakshitha123/TSForecasting/blob/master/utils/data_loader.py 
The category of each time series has been included in the dataset. This information has been obtained from the Kaggle competition page: https://www.kaggle.com/datasets/yogesh94/m4-forecasting-competition-dataset	m4_hourlyz/data/m4_hourly.parquetz:Time series with hourly frequency from the M4 competition.zRhttps://drive.google.com/file/d/1fMsYjfhrFLmeFjKG3jenXjDa5s984ThC/view?usp=sharingzbDaily energy consumption data from the ASHRAE competition with building metadata and weather data.zKaggle competition Addison Howard, Chris Balbach, Clayton Miller, Jeff Haberl, Krishnan Gowri, Sohier Dane. (2019). ASHRAE - Great Energy Predictor III. Kaggle. https://www.kaggle.com/c/ashrae-energy-prediction/overviewzRhttps://drive.google.com/file/d/1KHYopzclKvS1F6Gt6GoJWKnxiuZ2aqen/view?usp=sharingzDaily energy consumption data from the The Building Data Genome Project 2 with building metadata and weather data. https://github.com/buds-lab/building-data-genome-project-2zMiller, C., Kathirgamanathan, A., Picchetti, B. et al. The Building Data Genome Project 2, energy meter data from the ASHRAE Great Energy Predictor III competition. Sci Data 7, 368 (2020). https://doi.org/10.1038/s41597-020-00712-xzkhttps://raw.githubusercontent.com/skforecast/skforecast-datasets/refs/heads/main/data/bdg2_daily_sample.csvzDaily energy consumption data of two buildings sampled from the The Building Data Genome Project 2. https://github.com/buds-lab/building-data-genome-project-2zRhttps://drive.google.com/file/d/1I2i5mZJ82Cl_SHPTaWJmLoaXnntdCgh7/view?usp=sharingzHourly energy consumption data from the The Building Data Genome Project 2 with building metadata and weather data. https://github.com/buds-lab/building-data-genome-project-2zlhttps://raw.githubusercontent.com/skforecast/skforecast-datasets/refs/heads/main/data/bdg2_hourly_sample.csv)zRhttps://drive.google.com/file/d/1JOqBsSHegly6iSJFgmkugAko734c6ZW5/view?usp=sharingzRhttps://drive.google.com/file/d/1BhO1BUvs-d7ipXrm7caC3Wd_d0C_6PZ8/view?usp=sharingzRhttps://drive.google.com/file/d/1oHwkQ_QycJVTZMb6bH8C2klQB971gXXA/view?usp=sharingzRhttps://drive.google.com/file/d/1OvYzFlDG04YgTvju2k02vHEOj0nIuwei/view?usp=sharingzQDaily sales data from the M5 competition with product metadata and calendar data.zAddison Howard, inversion, Spyros Makridakis, and vangelis. M5 Forecasting - Accuracy. https://kaggle.com/competitions/m5-forecasting-accuracy, 2020. Kaggle.zLhttps://raw.githubusercontent.com/skforecast/skforecast-datasets/refs/heads/z/data/ETTm1.csv15mina  Data from an electricity transformer station was collected between July 2016 and July 2018 (2 years x 365 days x 24 hours x 4 intervals per hour = 70,080 data points). Each data point consists of 8 features, including the date of the point, the predictive value "Oil Temperature (OT)", and 6 different types of external power load features: High UseFul Load (HUFL), High UseLess Load (HULL), Middle UseFul Load (MUFL), Middle UseLess Load (MULL), Low UseFul Load (LUFL), Low UseLess Load (LULL).a'  Zhou, Haoyi & Zhang, Shanghang & Peng, Jieqi & Zhang, Shuai & Li, Jianxin & Xiong, Hui & Zhang, Wancai. (2020). Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting. [10.48550/arXiv.2012.07436](https://arxiv.org/abs/2012.07436). https://github.com/zhouhaoyi/ETDatasetz/data/ETTm2.csvz/data/ETTm2_extended.csva  Data from an electricity transformer station was collected between July 2016 and July 2018 (2 years x 365 days x 24 hours x 4 intervals per hour = 70,080 data points). Each data point consists of 8 features, including the date of the point, the predictive value "Oil Temperature (OT)", and 6 different types of external power load features: High UseFul Load (HUFL), High UseLess Load (HULL), Middle UseFul Load (MUFL), Middle UseLess Load (MULL), Low UseFul Load (LUFL), Low UseLess Load (LULL). Additional variables are created based on calendar information (year, month, week, day of the week, and hour). These variables have been encoded using the cyclical encoding technique (sin and cos transformations) to preserve the cyclical nature of the data.z /data/expenditures_australia.csvz~Monthly expenditure on cafes, restaurants and takeaway food services in Victoria (Australia) from April 1982 up to April 2024.zAustralian Bureau of Statistics. Catalogue No. 8501.0 https://www.abs.gov.au/statistics/industry/retail-and-wholesale-trade/retail-trade-australia/apr-2024/8501011.xlsxz!/data/public-transport-madrid.csvzPDaily users of public transport in Madrid (Spain) from 2023-01-01 to 2024-12-15.zConsorcio Regional de Transportes de Madrid CRTM, CRTM Evolucion demanda diaria https://datos.crtm.es/documents/a7210254c4514a19a51b1617cfd61f75/about)ashrae_daily
bdg2_dailybdg2_daily_samplebdg2_hourlybdg2_hourly_samplem5ett_m1ett_m2ett_m2_extendedexpenditures_australiapublic_transport_madridz	Dataset 'z%' not found. Available datasets are: r   r   zhttps://drive.google.com/zhttps://drive.google.com/uc?id=r   zError reading dataset 'z' from z: .z/https://drive.google.com/uc?export=download&id=r   )axisT)dropr   r   z2.2.0hr   format-r   P   )widthr   zShape of the dataset: )keys
ValueErrorlist
isinstance
startswithsplitpdread_csv	Exceptionstrread_parquetappendconcatreset_index__version__	set_indexto_datetimeindexasfreq
sort_indexprintlentextwrapfillshape)r   r   r   r   r   datasetsr   r   file_idr   dfeurl_partitionpathr   r   r   r   r   s                      U/var/www/html/planif/env/lib/python3.12/site-packages/skforecast/datasets/datasets.pyfetch_datasetrp      s6	   F  8+fG9GQ	''.i}>  %GV
Q	* 	''.i/AC  %<
V!
+Q	T 	''.i/VX  %R7
UQ	| 	''.i/NP %V'
}Q	V 	''.i/MO #.!X%!
WQ	D 	*''.i/XZ #.^X%,
EQ	r 	''.i/[] %,:
sQ	X 	''.i/SU $.X
?!
YQ	@ 	)''.i/WY $.O?++
AQ	r 	''.i/JL $%65%
sQ	b 	''.i/IK %Y\
cQ	L 	''.i/IK %F4
MQ	r 	''.i/HJ /SL
sQ	V 	''.i/DF %AR
WQ	~ 	''.i/FH %!
V!
Q	f	 	''.i/EG $."V
^
g	Q	X
 	''.i/FH $."W
^
Y
Q	L h$%"6d
$ h$%"M
=
( A$%M
=
( h$."M
=
( B$.M
=
( $%"ct!
,229/K .C9)
:''.i@ .C9)
:''.i/GI .
91
B _);= %L?#
, _)<> %bY$
Q	Hf 8==?"vB4CXBYZ
 	
 4.
C{+Ic4 >>45iinR(G3g=CtnU+[[A#AA
 	!__S)	B!$ 1H=K^K^_bKcdfKgg		"//$/01 YYr"..D.9	 {3ID>&)Ds{r~~8"4.7Ki(B~~bhh{CBH4BB dcCIommHTN=$ALx~h7rBkf&rxxj12I]   -dV73%r#a&K    -dV73%r#a&K   	)$wse2c!fXQG 	"	sV   . O2 P! +AQ B
Q? 2	P;PP!	Q*QQ	Q<Q77Q<?Rc                    | dk(  rdn|  } d|  d}t        j                  |ddddg	      }t        j                  |d   d
      |d<   |j                  d      }|j	                  d      }|d   }|j                         }|S )a  
    Load demo data set with monthly expenditure ($AUD) on corticosteroid drugs that
    the Australian health system had between 1991 and 2008. Obtained from the book:
    Forecasting: Principles and Practice by Rob J Hyndman and George Athanasopoulos.
    Index is set to datetime with monthly frequency and sorted.

    Parameters
    ----------
    version: str, default `'latest'`
        Version of the dataset to fetch. If 'latest', the latest version will be
        fetched (the one in the main branch). For a list of available versions,
        see the repository branches.

    Returns
    -------
    df: pandas Series
        Dataset.
    
    r   r   r   r   r   r   yr#   )r   headernamesr   rK   r   )rV   rW   r`   r_   rb   rc   )r   r   rk   s      ro   load_demo_datasetru     s    *  8+fG9G LG9 U 	 
 
Sc!C3D	EB^^BzN:FBzN	j	!B	4B	CB	BI    )r   )
pandasrV   rf   rY   booldict	DataFramerp   Seriesru    rv   ro   <module>r}      s{     
 v

v
v
 
v
 	v

 v
 \\v
r#s #")) #rv   