
    *YHh6                    6(   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZ d Zej*                  j-                  d	g d
      d        Zej*                  j-                  d	g d
      d        Zej*                  j-                  dej2                  ej4                  ej6                  g      ej*                  j-                  dej2                  ej4                  ej6                  g      d               Zej*                  j-                  dej2                  ej4                  ej6                  g      d        Zd Zd Zd Z d Z!d Z"ej*                  j-                  dg dg dg ejF                  g dg dg       ejF                  g dg dge$       ejF                  g ddejJ                  d gge$       ejF                  g dd e&d!      d gge$       ejF                  g d"g d#ge$       ejF                  g d$dejJ                  dgge$       ejF                  g d$d e&d!      dgge$      gg d%&      d'        Z'ej*                  j-                  d	g d
      ej*                  j-                  d(d)d*g      ej*                  j-                  d+dd,g      d-                      Z(ej*                  j-                  d(d)d*g      ej*                  j-                  d.d/d0gd1d0gd/d0ggg d2g d3g d2gfd4dgd5dgd6d7gd5dggg d8g d9g d:gfg      d;               Z)d< Z*ej*                  j-                  d+g d=      ej*                  j-                  d>g d=      d?               Z+ej*                  j-                  d@dAdBg      ej*                  j-                  dd1d/g ejF                  dCdDg      g      dE               Z,ej*                  j-                  d@dAdBg      dF        Z-ej*                  j-                  dGdHd0gdId0ggdHdIgd0ggej\                  f ejF                  d1d/gdJd/gg      d1dJgd/ggej^                  f ejF                  dKd gdLd gge$      dKdLgd ggej\                  f ejF                  dKd gdLd gg      dKdLgd ggej`                  f ejF                  d1d/gejJ                  d/gg      d1ejJ                  gd/ggej6                  f ejF                  dKejJ                  gdejJ                  gge$      dKdgejJ                  ggej\                  f ejF                  dK e&d!      gd e&d!      gge$      dKdg e&d!      ggej\                  fgg dM&      dN        Z1ej*                  j-                  d	g d
      ej*                  j-                  dO ejF                  dd7gge$      jd                   ejF                  ddPgge$      jd                  g dQgej\                  f ejF                  d1d/ggdR      jd                   ejF                  d1dSggdR      jd                  g dTgejf                  f ejF                  dd7gge$      jd                   ejF                  ddPgge$      jd                   ejF                  g dQ      gej\                  f ejF                  ddgge$      jd                   ejF                  dd7gge$      jd                  g dUge$f ejF                  dd7gge$      jd                   ejF                  dejJ                  gge$      jd                  g dVge$f ejF                  ddgge$      jd                   ejF                  dejJ                  gge$      jd                  g dWge$fgg dX&      dY               Z4dZ Z5ej*                  j-                  d[e
eg      d\        Z6d] Z7d^ Z8ej*                  j-                  d_d,d`dagfdbg dcfg dddedfgfgg dg&      dh        Z9di Z:ej*                  j-                  dg dg dg ejF                  g djg dkg       ejF                  g dg dge$      gg dl&      dm        Z;ej*                  j-                  dO ejF                  dd7gge$      jd                   ejF                  ddPgge$      jd                  g dQgej\                  f ejF                  d1d/ggdR      jd                   ejF                  d1dSggdR      jd                  g dTgejf                  f ejF                  dd7gge$      jd                   ejF                  ddPgge$      jd                   ejF                  g dQ      gej\                  fgg dn&      do        Z<dp Z=dq Z>ej*                  j-                  dre&e?g      ds        Z@dt ZAdu ZBdv ZCdw ZDdx ZEdy ZFej*                  j-                  d+dbd,g      dz        ZGej*                  j-                  d{ejJ                  d e&d!      g      d|        ZHej*                  j-                  d+dHdJgg d}g      d~        ZIej*                  j-                  dd*d)gddg&      ej*                  j-                  d+d,g dgd,dg&      d               ZJej*                  j-                  d[e
eg      d        ZKej*                  j-                  ddd/iddiddid/dddSddg      ej*                  j-                  ddg dgg      d               ZLej*                  j-                  d+dbd,d7gg      d        ZMej*                  j-                  d+dgdPgg      d        ZNej*                  j-                  dddJiddiddiddiddidJdddSddg      d        ZOej*                  j-                  d+d,d7gg      d        ZPej*                  j-                  d+dgdPgg      d        ZQd ZRej*                  j-                  ddJd1dddSig      d        ZSd ZTd ZUd ZVd ZWd ZXej*                  j-                  ddd1dg      d        ZYej*                  j-                  dd/dJdg      d        ZZej*                  j-                  dg d      ej*                  j-                  dg d      d               Z[d Z\ej*                  j-                  d{ejJ                  dg      d        Z]d Z^ej*                  j-                  d	g d
      ej*                  j-                  dddg      d               Z_ej*                  j-                  d	g d
      d        Z`ej*                  j-                  d	g d
      d        Zaej*                  j-                  d	g d
      d        Zbd Zcd Zdej*                  j-                  dejJ                  dg      d        Zeej*                  j-                  dddg      ej*                  j-                  dejJ                  dg      d               Zfej*                  j-                  dO ejF                  dejJ                  gge$      jd                   ejF                  dd7gge$      jd                   ejF                  ddPejJ                  ge$      gej\                  f ejF                  dejJ                  gge$      jd                   ejF                  dd7gge$      jd                   ejF                  ddPejJ                  ge$      gej\                  f ejF                  dejJ                  ggej6                        jd                   ejF                  dCggej6                        jd                   ejF                  ddDejJ                  g      gej6                  fgg d&      d        Zgej*                  j-                  d[e
eg      d        Zhej*                  j-                  d ejF                  dejJ                  dCgg      jd                   ejF                  dejJ                  dgg      jd                   ejF                  dDgg      f ejF                  g d¢g      jd                   ejF                  g dâg      jd                   ejF                  ejJ                  gg      f ejF                  dejJ                  d7gge$      jd                   ejF                  dejJ                  dgg      jd                   ejF                  dPgge$      f ejF                  g dŢge$      jd                   ejF                  g dƢg      jd                   ejF                  ejJ                  gge$      fg      dǄ        Ziej*                  j-                  de      dɄ        Zjdʄ Zkej*                  j-                  dddLgg ejF                  ddLggdͬ       ejF                  ddLggdά      g      ej*                  j-                  ddKdLgg ejF                  dKdLggdͬ       ejF                  dKdLggdά      g      dЄ               Zldф Zmd҄ Zndӄ Zoej*                  j-                  dd*d)g      dՄ        Zpej*                  j-                  d ejF                  dgdgge$      d gejJ                  gejJ                  gg ej                  dgdgdgge$      f ejF                  ejJ                  gdgdgge$      d gejJ                  gejJ                  gg ej                  dgejJ                  gejJ                  gge$      fg      d؄        Zrdل Zsdڄ Ztdۄ Zud܄ Zvd݄ Zwej*                  j-                  dddJiddiddiddiddidJdddSddg      dބ        Zxd߄ Zyd Zzd Z{d Z|ej*                  j-                  dddidd/ig      d        Z}ej*                  j-                  ddd1iddig      d        Z~d Zd Zej*                  j-                  d[e
eg      d        Zy)    N)sparse)NotFittedError)OneHotEncoderOrdinalEncoder)is_scalar_nan)_convert_containerassert_allcloseassert_array_equal)CSR_CONTAINERSc                     t        j                  g dg dg      } t               }t        d      }|j                  |       }|j                  |       }|j                  dk(  sJ |j                  dk(  sJ t        j                  |      sJ t        j                  |      rJ t        |j                         g dg dg       t        |j                         |       y )N         r   r   r   Fsparse_outputr      )              ?r   r   r   )r   r   r   r   r   )	nparrayr   fit_transformshaper   issparser
   toarray)X
enc_sparse	enc_denseX_trans_sparseX_trans_denses        b/var/www/html/planif/env/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_encoders.py!test_one_hot_encoder_sparse_denser$      s     	)Y'(AJE2I--a0N++A.M6)))&(((??>***}---  #<>W"X ~--/?    handle_unknown)ignoreinfrequent_if_existwarnc                    t        j                  g dg dg dg      }t        j                  g dg      }t        d      }|j                  |       t	        j
                  t        d      5  |j                  |       d d d        t        |       }|j                  |       |j                         }t        |j                  |      j                         t        j                  g d	g             t        ||       y # 1 sw Y   xY w)
N)r   r   r   )r   r   r   )r   r   r   )   r   r   errorr&   Found unknown categoriesmatch)r   r   r   r   r   r   r   )r   r   r   fitpytestraises
ValueError	transformcopyr
   r   r	   r&   r   X2oh	X2_passeds        r#   #test_one_hot_encoder_handle_unknownr;   *   s    
)Y	23A	9+	B 
g	.BFF1I	z)C	D 
R 
n	5BFF1I	I
Y'')
567
 B	" s   /DDc                    t        j                  g d      j                  d      }t        j                  ddg      j                  d      }t        |       }|j	                  |       |j                         }t        |j                  |      j                         t        j                  g dg dg             t        ||       y )N)11111111223334444)r   55555r>   r-   )r   r   r   r   r   r   r   r   )	r   r   reshaper   r1   r6   r
   r5   r   r7   s        r#   +test_one_hot_encoder_handle_unknown_stringsrE   B   s    
23;;GDA	7D/	"	*	*7	3B
 
n	5BFF1I	I
Y'')
&(<=>
 r9%r%   output_dtypeinput_dtypec                    t        j                  ddgg|       j                  }t        j                  ddgddgg|      }t        d|      }t	        |j                  |      j                         |       t	        |j                  |      j                  |      j                         |       t        d|d      }t	        |j                  |      |       t	        |j                  |      j                  |      |       y )Nr   r   dtypeauto)
categoriesrJ   F)rL   rJ   r   )	r   asarrayTr   r
   r   r   r1   r5   )rG   rF   r   
X_expectedr9   s        r#   test_one_hot_encoder_dtyperP   U   s     	

QF8;/11AaVaV,LAJ	&	=Br''*224jArvvay**1-557D	&E	RBr''*J7rvvay**1-z:r%   c                    t        j                  d      }|j                  ddgddgd      }t        j                  g dg dg| 	      }t        | 	      }t        |j                  |      j                         |       t        |j                  |      j                  |      j                         |       t        | d
      }t        |j                  |      |       t        |j                  |      j                  |      |       y )Npandasabr   r   ABr   r   r   r   r   r   r   r   rI   F)rJ   r   )r2   importorskip	DataFramer   r   r   r
   r   r   r1   r5   )rF   pdX_dfrO   r9   s        r#   !test_one_hot_encoder_dtype_pandasr^   d   s    			X	&B<<sCj1v67D<6lKJ	\	*Br''-557Drvvd|--d3;;=zJ	\	?Br''-z:rvvd|--d3Z@r%   c                  Z   t               } g dg dg dg dg}| j                  |       | j                         }t        g d|       | j                  g d      }t        g d|       t	        j
                  t        d	      5  | j                  d
dg       d d d        y # 1 sw Y   y xY w)N)Maler   girlr   r   )Female)   ra   r   
   )r`   3   boy   r   )r`   [   ra         )	x0_Femalex0_Malex1_1x1_41x1_51x1_91x2_boyx2_girlx3_1x3_2x3_12x3_21x4_3x4_10x4_30)onetwothreefourfive)
one_Femaleone_Maletwo_1two_41two_51two_91	three_boy
three_girlfour_1four_2four_12four_21five_3five_10five_30z!input_features should have lengthr/   rz   r{   )r   r1   get_feature_names_outr
   r2   r3   r4   )encr   feature_namesfeature_names2s       r#   "test_one_hot_encoder_feature_namesr   t   s    
/C!%"$		A GGAJ--/M	
" 	%* ../VWN	
" 	%* 
z)L	M 2!!5%.12 2 2s   B!!B*c                     t               } t        j                  ddggt              j                  }| j                  |       | j                         }t        ddg|       | j                  dg      }t        dd	g|       y )
Nu   c❤t1dat2rI   u	   x0_c❤t1x0_dat2u   n👍meinput_featuresu   n👍me_c❤t1u   n👍me_dat2)r   r   r   objectrN   r1   r   r
   )r   r   r   s      r#   *test_one_hot_encoder_feature_names_unicoder      st    
/C
8V$%V466AGGAJ--/MY/?--i[-IM(.9=Ir%   c                     d } t        |       }t        j                  ddggt              j                  }|j                  |       |j                         }t        ddg|       |j                  dg	      }t        d
dg|       d }t        |      j                  |      }d}t        j                  t        |      5  |j                          ddd       y# 1 sw Y   yxY w)z=Check the behaviour of `feature_name_combiner` as a callable.c                 $    | dz   t        |      z   S )N_)reprfeaturecategorys     r#   name_combinerzHtest_one_hot_encoder_custom_feature_name_combiner.<locals>.name_combiner   s    }tH~--r%   )feature_name_combinerNoneNrI   z	x0_'None'x0_NonerS   r   za_'None'a_Nonec                      y)Nr    r   s     r#   wrong_combinerzItest_one_hot_encoder_custom_feature_name_combiner.<locals>.wrong_combiner   s    r%   zMWhen `feature_name_combiner` is a callable, it should return a Python string.r/   )r   r   r   r   rN   r1   r   r
   r2   r3   	TypeError)r   r   r   r   r   err_msgs         r#   1test_one_hot_encoder_custom_feature_name_combinerr      s    . m
<C
64.!022AGGAJ--/MY/?--cU-CM
H-}= n
=
A
A!
DCW  
y	0 $!!#$ $ $s   CC&c                     t        j                  ddgg      j                  } t               }|j	                  g dg       |j                         d   g dgk(  sJ |j                  |       j                         j                  dk(  sJ |j	                  g dg       |j                  |       j                         j                  dk(  sJ y )	Nr   r   )r   r   r   r   rL   rL   )r   r+   )r   r   r   r   r+   r   )	r   r   rN   r   
set_params
get_paramsr   r   r   )r   r9   s     r#   test_one_hot_encoder_set_paramsr      s    
1a&A	BMMl^M,==?<(\N:::A&&(..&888MMo.M/A&&(..&888r%   c                    t        d      }|j                  |       }t        dd      }|j                  |       }t        |j                         |       t	        j
                  |      r|j                  dk(  sJ |j                         S )NrK   r   FrL   r   csr)r   r   r	   r   r   r   format)r   r   Xtr1Xtr2s       r#   check_categorical_onehotr      sq    
6
*CQD
6
?CQDDLLND)??4 T[[E%999<<>r%   r   defr   7   abcr   r   )rd   r   r   )r   r   r   )rT   rV   cat)rS   rW   r   rI   )rT   r   r   rS   r   nan)Nr   r   )rS   r   r   )Nr   N)mixednumericr   z	mixed-nanzmixed-float-nanz
mixed-Nonezmixed-None-nanzmixed-None-float-nan)idsc                 \   t        t        j                  |       d d dgf         }t        |ddgddgg       t        t        j                  |       d d ddgf         }t        |g dg dg       t	        d      j                  |       }t        |j                         g dg dg       y )	Nr   r   )r   r   r   r   r   r   r   r   rK   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r	   r   r   r   )r   Xtrs     r#   test_one_hot_encoderr      s    0 #288A;q1#v#6
7CC1a&1a&)*
"288A;q1a&y#9
:CC,56
6
*
8
8
;CCKKMO_#EFr%   sparse_FTdropfirstc                     g dg dg dg}t        ||      }|j                  |      }t        j                  |t              }t        |j                  |      |       ddgddgd	dgg}t        |d
|      }|j                  |      }t        j                  |      }t        |j                  |      |       |g dg dg dg}t        || ddgddgg dg      }|j                  |      }t        j                  |t              }d |d<   t        |j                  |      |       ddgddgd	dgg}t        |ddgddgg|       }|j                  |      }t        j                  |t              }d |d<   d |d d df<   t        |j                  |      |       t        j                  g dg dg      }t        j                  d      }t        j                  t        |      5  |j                  |       d d d        y # 1 sw Y   y xY w)Nr   r   )r   r   r   r   r   rI   r   r   r   r   rK   )r   rL   r   r   r   )6   r   8   )r   r&   rL   )r   r   r   r   )r   rL   r&   r   r   r   r   r   r   )Shape of the passed X data is not correctr/   )r   r   r   r   r   r
   inverse_transformreescaper2   r3   r4   )r&   r   r   r   r   X_trexpmsgs           r#   test_one_hot_encoder_inverser     s    
8A
gD
9CQD
((1F
#Cs,,T2C8
R1b'Ar7#A
g&t
LCQD
((1+Cs,,T2C8| ^^<!)A=

   #hhq'D	3006< Wq"g2w'!AR))

   #hhq'D	AqD	3006< 88Y	*+D
))?
@C	z	- $d#$ $ $s   )HHz
X, X_transr   r   r   r   r   r   r   rz   r{   r|   rT   r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   c                     t        |      j                  |       }d}|rt        |d      }t        j                  t
        |      5  |j                  |       ddd       y# 1 sw Y   yxY w)zCheck that `inverse_transform` raise an error with unknown samples, no
    dropped feature, and `handle_unknow="error`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14934
    r   zqSamples \[(\d )*\d\] can not be inverted when drop=None and handle_unknown='error' because they contain all zerosr   r/   N)r   r1   r   r2   r3   r4   r   )r   X_transr   r   r   s        r#   ?test_one_hot_encoder_inverse_transform_raise_error_with_unknownr   A  sf    & g
.
2
21
5C	A 
 $Wh7	z	- 'g&' ' '   A""A+c                      t        j                  ddgddgddggt              } t        dd	      }|j	                  |       }t        |j                  |      |        y )
Nr`   r   rb   r   r   rI   	if_binaryFr   r   )r   r   r   r   r   r
   r   )r   oher   s      r#   &test_one_hot_encoder_inverse_if_binaryr   a  sV    
61+!}xm<FKA
[
>CQDs,,T2A6r%   )r   r   N
reset_dropc                    t        j                  ddgddgddggt              }t        | d      }|j	                  |       |j                  |      }|j                         }|j                  |	       t        |j                  |      |       t        |j                  |      |       t        |j                         |       y )
Nr`   r   rb   r   r   rI   Fr   r   )r   r   r   r   r1   r5   r   r   r
   r   r	   )r   r   r   r   r   r   s         r#   test_one_hot_encoder_drop_resetr   h  s     	61+!}xm<FKA
T
7CGGAJ==D--/MNN
N#s,,T2A6CMM!$d+s002MBr%   methodr1   r         @      @c                     t               }d}t        j                  t        |      5   t	        ||      |        d d d        y # 1 sw Y   y xY w)Nz'Expected 2D array, got 1D array insteadr/   )r   r2   r3   r4   getattr)r   r   r9   r   s       r#   test_X_is_not_1Dr   w  sD     
B
3C	z	- FA  s   AAc                 
   t        j                  d      }|j                  g d      }t               }dt	        |       d}t        j
                  t        |      5   t        ||       |       d d d        y # 1 sw Y   y xY w)NrR   )   r   r+   r   z+Expected a 2-dimensional container but got z	 instead.r/   )r2   rZ   Seriesr   typer3   r4   r   )r   r\   r   r9   r   s        r#   test_X_is_not_1D_pandasr     sm    			X	&B
		,A	B7Qy	
JC	z	- FA  s   A99BzX, cat_exp, cat_dtyper   r   r   rV   rW   )r   r   r   stringzmissing-floatzmissing-np.nan-objectzmissing-float-nan-objectc                    | | d d d   fD ]  }t        d      }|j                  |       t        |j                  t              sJ t        |j                  |      D ]w  \  }}|j                         }t        |d         rt        |d         sJ |d d |d d k(  sJ |j                         |k(  sJ t        j                  |j                  |      rwJ   y )NrA   rK   r   )r   r1   
isinstancecategories_listziptolistr   r   
issubdtyperJ   )r   cat_exp	cat_dtypeXir   resr   res_lists           r#   test_one_hot_encoder_categoriesr     s    F !DbD'l 7v.#//4000COOW5 	7HCzz|HSW%$Xb\222}CR000zz|s***==I666	77r%   zX, X2, cats, cat_dtypedrS   rT   cint64r+   r   r   r   )NrS   z)rS   rT   r  )rS   Nr  )r   r   zobject-stringzobject-string-nonezobject-string-nanzobject-None-and-nanc                    t        |      }t        j                  g dg dg      }t        |j	                  |       j                         |       t        |j                  d         t        |d         k(  sJ |j                  d   j                         t        |d         k(  sJ |j                  d   j                  |k(  sJ t        |      }t        j                  t        d      5  |j                  |       d d d        t        ||      }t        j                  g dg dg      }t        |j                  |      j                  |      j                         |       y # 1 sw Y   jxY w)	Nr   r   r   r   r   r   r   r   r.   r/   rL   r&   )r   r   r   )r   r   r   r
   r   r   r   rL   r   r   rJ   r2   r3   r4   r1   r5   )r   r8   catsr   r&   r   r   s          r#   )test_one_hot_encoder_specified_categoriesr
    s+   f 4
(C
((O_5
6Cs((+335s;q!"d47m333??1$$&$tAw-777 ??1##y000 4
(C	z)C	D 
4
GC
((O_5
6Cswwr{,,R088:C@	 s   -E((E1c                     t        j                  ddggt              j                  } t	        g dg      }t        j                  g dg dg      }t        |j                  |       j                  |       j                         |       t        |j                  |       j                         |       |j                  d   j                         g dk(  sJ t        j                  |j                  d   j                  t         j                        sJ t        j                  d	d
gg      j                  } t	        g dg      }d}t        j                   t"        |      5  |j                  |        d d d        y # 1 sw Y   y xY w)NrS   rT   rI   )rT   rS   r  r   r  r  r   r   r   )r   r   r   z%Unsorted categories are not supportedr/   )r   r   r   rN   r   r
   r1   r5   r   r   r   r   r   rJ   object_r2   r3   r4   )r   r   r   r   s       r#   (test_one_hot_encoder_unsorted_categoriesr    s'   
3*V,..A
O#4
5C
((O_5
6Cswwqz++A.668#>s((+335s;??1$$&/999==+112::>>> 	1a&A
I;
/C
1C	z	- !  s   #E>>FEncoderc                 6   t        j                  dt         j                  dg      g} | |      }t        j                  ddggt              j                  }t        j                  t        d      5  |j                  |       ddd       y# 1 sw Y   yxY w)zTest encoder for specified categories that nan is at the end.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27088
    r   r   r   rI   zNan should be the last elementr/   N)	r   r   r   r   rN   r2   r3   r4   r1   r  r	  r   r   s       r#   ,test_encoder_nan_ending_specified_categoriesr    st     HHa^$%D
T
"C
1a&(**A	z)I	J 
  s   4BBc                     t        j                  ddgddggt              j                  } t	        g dg dg      }t        j                  g d	g d
g      }t        |j                  |       j                         |       |j                  d   j                         g dk(  sJ t        j                  |j                  d   j                  t         j                        sJ |j                  d   j                         g dk(  sJ t        j                  |j                  d   j                  t         j                        sJ y )NrS   rT   r   r   rI   r   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   rN   r   r
   r   r   r   r   r   rJ   r  r   r   r   s      r#   7test_one_hot_encoder_specified_categories_mixed_columnsr  $  s    
3*q!f%V466A
OY#?
@C
((24RS
TCs((+335s;??1$$&/999==+112::>>>??1$$&)333==+112::>>>r%   c                      t        j                  d      } | j                  ddgddgd      }t        |      }t	        |g dg dg       y )	NrR   rS   rT   r   r   rU   rX   rY   )r2   rZ   r[   r   r	   )r\   r]   r   s      r#   test_one_hot_encoder_pandasr  1  sF    			X	&B<<sCj1v67D
"4
(CC,56r%   zdrop, expected_namesx0_cx2_br   )r  x1_2r  )r  r   rT   x0_bx2_a)r   binarymanualc                     g dg dg}t        |       }|j                  |       |j                         }t        ||       y )N)r  r   rS   )rT   r   rT   r   )r   r1   r   r
   )r   expected_namesr   r   r   s        r#   'test_one_hot_encoder_feature_names_dropr   :  s;     
&A
T
"CGGAJ--/M~}5r%   c                     ddgddgddgg} t        j                  g dg dg dg      }t        j                  d d	g      }t        d
d      }|j                  |       }t	        |j
                  |       t        ||       ddgddgddgg} t        j                  ddgddgddgg      }t        j                  d	d g      }t        d
d      }|j                  |       }t	        |j
                  |       t        ||       y )Nrd   yes   norj   )r   r   r   r   rC   )r   r   r   r   r   r   Fr   truerS   falser   r   )r   r   r   r   r
   	drop_idx_r	   )r   expectedexpected_drop_idxr   results        r#   *test_one_hot_encoder_drop_equals_if_binaryr+  L  s   
er4j2u+.Axx	35IJH $+
[
>Cq!Fs}}&78FH% ###7Axx#sc3Z#s<=H!T+
[
>Cq!Fs}}&78FH%r%   )rd   r   r   )r#  r   r   )r   r   r   c                     t               }t        j                  g dg dgd      }t        |j	                  |       |j                  d             t        d      }t        |j	                  |       |       y )Nr   r   r   r   r   r   r  rI   float64)r   r   r   r
   r   astyper  s      r#   test_ordinal_encoderr1  d  s^     
C
((Iy)
9Cs((+SZZ	-BC
w
'Cs((+S1r%   )r   r   zobject-string-catc                    t        |      }t        j                  dgdgg      }t        |j	                  |       |       t        |j                  d         t        |d         k(  sJ |j                  d   j                         t        |d         k(  sJ |j                  d   j                  |k(  sJ t        |      }t        j                  t        d      5  |j                  |       d d d        y # 1 sw Y   y xY w)Nr   r   r   r   r.   r/   )r   r   r   r
   r   r   rL   r   r   rJ   r2   r3   r4   r1   )r   r8   r	  r   r   r   s         r#   )test_ordinal_encoder_specified_categoriesr3  u  s    2 D
)C
((SEC5>
"Cs((+S1q!"d47m333??1$$&$tAw-777 ??1##y000 D
)C	z)C	D   s   C88Dc                     g dg dg} t               }|j                  |       }t        j                  | t              }t        |j                  |      |       t        j                  g dg dg      }t        j                  d      }t        j                  t        |      5  |j                  |       d d d        y # 1 sw Y   y xY w)Nr   r   rI   )r   r   r   r   rX   r   r/   )r   r   r   r   r   r
   r   r   r   r2   r3   r4   )r   r   r   r   r   s        r#   test_ordinal_encoder_inverser5    s    	(A

CQD
((1F
#Cs,,T2C8 88\<01D
))?
@C	z	- $d#$ $ $s   %C  C	c                     t        dd      } t        j                  ddgddgdd	ggt        
      }t        j                  ddgddgddggt        
      }| j	                  |       | j                  |      }t        j                  ddgddgddggd
      }t        ||       | j                  |      }t        j                  dd gd dgddggt        
      }t        ||       y )Nuse_encoded_valuer&   unknown_valuerS   xrT   yr  r  rI   xyblar   r   r   r  )r   r   r   r   r1   r5   r
   r   )r   X_fitr   X_trans_encr   X_trans_invinv_exps          r#   +test_ordinal_encoder_handle_unknowns_stringrC    s    
(;2
NCHHsCj3*sCj9HEhhdeS\C:>fMGGGEN--(K
((QGb!Wq!f-W
=C{C(''4KhhddC[3*=VLG{G,r%   rJ   c                    t        dd      }t        j                  ddgddgdd	gg| 
      }t        j                  ddgddgddgg| 
      }|j                  |       |j	                  |      }t        j                  ddgddgddggd
      }t        ||       |j                  |      }t        j                  dd gd dgddggt        
      }t        ||       y )Nr7  r9  r      r      r   	   rI   rg      r   r  )r   r   r   r1   r5   r
   r   r   )rJ   r   r?  r   r@  r   rA  rB  s           r#   ,test_ordinal_encoder_handle_unknowns_numericrJ    s    
(;4
PCHHq!fq!fq!f-U;EhhB"a1a&1?GGGEN--(K
((QIay1a&1
AC{C(''4KhhD	D!9q!f5VDG{G,r%   c                      t        dt        j                        } t        j                  dgdgdgg      }| j	                  |       | j                  dgdgdgg      }t        |dgdgt        j                  gg       y )Nr7  r9  r   r   r   r+   r   )r   r   r   r   r1   r5   r
   )r   r?  r   s      r#   (test_ordinal_encoder_handle_unknowns_nanrL    so     (;266
RCHHqcA3_%EGGENmmaS1#sO,Gw!qcBFF8 45r%   c                      t        dt        j                  t              } t        j                  dgdgdgg      }t        j                  t        d      5  | j                  |       d d d        y # 1 sw Y   y xY w)Nr7  )r&   r:  rJ   r   r   r   z'dtype parameter should be a float dtyper/   )	r   r   r   intr   r2   r3   r4   r1   )r   r?  s     r#   8test_ordinal_encoder_handle_unknowns_nan_non_float_dtyperO    sd     *"&&C HHqcA3_%E	z)R	S   s   A22A;c                      t        j                  g dgt              j                  } g d}t	        |      }d}t        j                  t        |      5  |j                  |        d d d        y # 1 sw Y   y xY w)N)LowMediumHighrR  rQ  rI   )rQ  rR  rS  r   z*Shape mismatch: if categories is an array,r/   )	r   r   r   rN   r   r2   r3   r4   r1   )r   r	  r   r   s       r#   +test_ordinal_encoder_raise_categories_shaperT    s^    
<=VLNNA$D
D
)C
6C	z	- 
  s   A11A:c            	         t        d      } t        j                  g dg dgd      }t        j                  ddgd	d
ggd      t        j                  ddgd	d
ggd      t        j                  ddgddgg      t        j                  ddgddgg      t        j                  ddgd	dggd      fD ]  }| j                  |       t	        t        d      D cg c](  }| j                  |   j                  |j                  k(  * c}      sJ t        | j                  |      j                         |        ddgd	d
gg}| j                  |       t	        t        d      D cg c]=  }t        j                  | j                  |   j                  t        j                        ? c}      sJ t        | j                  |      j                         |       ddgd	dgg}| j                  |       t	        t        d      D cg c]  }| j                  |   j                  dk(    c}      sJ t        | j                  |      j                         |       y c c}w c c}w c c}w )NrK   r   )r   r   r   r   )r   r   r   r   r/  rI   r   r   r   r+   r  rS   rT   r  r      a   b   c   dr   )r   r   r   r1   allranger   rJ   r
   r5   r   r   integer)r   r   r   is       r#   test_encoder_dtypesr^    s
   
6
*C
(((*>?y
QC 	1a&1a&!1
1a&1a&!3
3*sCj)*
4,t-.
1c(QH%X6 	< 	
qJACOOA&,,7JKKK3==+335s;	< Q!QAGGAJUSTXVcooa066

CVWWWs}}Q'//137
SAs8AGGAJeAhG"((H4GHHHs}}Q'//137 K
 W
 Hs   -I
&AI #I%c                  B   t        j                  d      } t        d      }t        j                  g dg dgd      }| j                  dd	gd
dgddgdd      }|j                  |       t        t        d	      D cg c]  }|j                  |   j                  dk(    c}      sJ t        |j                  |      j                         |       | j                  dd	gddgddgd      }|d   j                  |d   j                  |d   j                  g}|j                  |       t        t        d
      D cg c]!  }|j                  |   j                  ||   k(  # c}      sJ t        |j                  |      j                         |       y c c}w c c}w )NrR   rK   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r/  rI   r   r   r   r+   r   r   rV   rW   Cr  rS   rT   r   r   rV   rW   ra  )r2   rZ   r   r   r   r[   r1   rZ  r[  r   rJ   r
   r5   r   )r\   r   r   r   r]  X_types         r#   test_encoder_dtypes_pandasrc    sb   			X	&B
6
*C
((	')GHC
 	Aq6AaV<GLAGGAJU1XF"((G3FGGGs}}Q'//137
Aq6c
#sDEAfllAcFLL!C&,,7FGGAJuQxH!"((F1I5HIIIs}}Q'//137 G Is   ?#F>&Fc                      t               } ddgddgg}t        j                         5  t        j                  d       | j	                  |       d d d        y # 1 sw Y   y xY w)Nr`   r   rb   r   r,   )r   warningscatch_warningssimplefilterr   )r   r   s     r#   test_one_hot_encoder_warningrh    sX    
/C
!xm$A		 	 	" g&!  s   'AA c                 >   ddgddgddgg}t        | ddddgddgg      }|j                  |       d	dgg}t        j                  ddgg      }d
}t	        j
                  t        |      5  |j                  |      }ddd       t        |       y# 1 sw Y   xY w)z,Check handle_unknown='warn' works correctly.rS   r   rT   r   r   Fr)   r   r   r&   rL   r  qFound unknown categories in columns \[0\] during transform. These unknown categories will be encoded as all zerosr/   N	r   r1   r   r   r2   warnsUserWarningr5   r	   )r   r   r   X_testrO   warn_msgr   s          r#   test_ohe_handle_unknown_warnrq  %  s     qC8c1X&A
#JA'	C GGAJAhZFAq6(#J	A  
k	2 (--'(GZ(( (   ,BBmissing_valuec                    dddd| g}t        |      }g dg ddddd| gg}|j                  |      j                         }g dg d	g d
g}t        ||       |j                  |u sJ t        |j                  |j                        D cg c]
  \  }}||    }}}|j                  |      }	t        j                  |t              }
t        |d         rt        |d d |d d        t        |d         sJ t        |d         sJ t        |
d d d df   |	d d d df          t        |
dd df   |	dd df          t        |
d         sJ t        |	d         sJ y t        ||       t        |
|	       y c c}}w )Nr   rg   r   r   r   )r   rg   r   r   rS   )r   rg   r   r   rS   )r   r   r   r   r   )r   r   r   r   r   r   rI   rA   )rA   rA   )r   r   r   r
   r   r   r   r'  r   r   r   r   r   )rs  cats_to_dropr   r   transr   r   r   dropped_catsX_inv_transX_arrays              r#    test_one_hot_encoder_drop_manualrz  ?  s   2q"m4L
\
*C	Ar=)	A
 a ((*EO_
=Cuc"88|### *-S__cmm)L%gGL  ''.Khhq'G \"%&<,l3B.?@\"-...\"-...71crc6?K3B3,?@ 	72ss7+[SbS-ABWV_---[0111<67K0)s   E;)r   r   rc   rS   c                     t        |       }d}t        j                  t        |      5  |j	                  g dg dg dg       d d d        y # 1 sw Y   y xY w)Nr   z-`drop` should have length equal to the numberr/   r   r   )r   r   ;   )r   r2   r3   r4   r1   )r   r   r   s      r#   test_invalid_drop_lengthr}  d  sK    
T
"C=G	z	1 B@AB B Bs   AAdensityr   denserS   r   rT   r  c                    t        |       }t        | |      }g dg dg}|j                  |       |j                  |       t        |j                  |j                         |dk(  rt        |j                  d       n=t        ||j                  |j                        D ]  \  }}}|t        |         |k(  rJ  t        |j                  t        j                        sJ |j                  j                  t        k(  sJ y )Nr   r   )r  r   rS   r  r   r   )r   r1   r
   r   r'  r   rN  r   r   ndarrayrJ   r   )r~  r   ohe_baseohe_testr   drop_catdrop_idxcat_lists           r#   test_categoriesr  l  s     73H7>H	&ALLOLLOx++X-A-ABw8--q1,/($$h&:&:-
 	7(Hh CM*h666	7 h(("**555##v---r%   c                 Z     |        j                         j                  j                  sJ y )N)__sklearn_tags__
input_tagscategorical)r  s    r#   "test_encoders_has_categorical_tagsr    s"    9%%'22>>>>r%   kwargsmax_categoriesmin_frequency   g(\?r   )r  r  rg   rL   rK   rS   rT   r  r   c                 .   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        d|d	d
d| j	                  |      }t        |j                  g dg       dgdgdgdgdgg}t        j                  ddgddgddgddgddgg      }|j                  |      }t        ||       dgdgdz  z   D cg c]  }|g }}|j                  |      }	t        ||	       |j                         }
t        ddg|
       yc c}w )zpTest that different parameters for combine 'a', 'c', and 'd' into
    the infrequent category works as expected.rS   r   rT   r#  r  rd   r   r   r(   F)rL   r&   r   rS   r  r   er   r   infrequent_sklearnr+   r  x0_infrequent_sklearnNr   r   r   rN   r   r1   r
   infrequent_categories_r5   r	   r   r   )r  rL   X_trainr   ro  r(  r   colexpected_invX_invr   s              r#   test_ohe_infrequent_two_levelsr    sI    hh	SEBJ.#;seaiGHIKKG
 , 	
 
c'l  s11O3DEecUSEC53%0Fxx!Q!Q!Q!Q!Q@AHmmF#GHg&&)U.B-Ca-G%GHcSEHLH!!'*E|U+--/M 78-H Is   

Dc                    t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        d	d
d|       j	                  |      }|j
                  d   |j                  d      dk(  sJ t        j                  dgdgg      }|j                  |      }t        dgdgg|       |j                         }t        dg|       |j                  |      }t        dgdgg|       y)z3Test two levels and dropping the frequent category.rS   r   rT   r#  r  rd   r   r   r(   Fr   r&   r   r  r   r   r   r  r  N)r   r   rN   r   r1   r   r'  r5   r	   r   r
   r   )r   r  r   ro  r   r   	X_inverses          r#   ,test_ohe_infrequent_two_levels_drop_frequentr    s    hh	SEBJ.#;seaiGHIKKG
,	
 
c'l  ??1cmmA./3666XXusen%FmmF#GaS1#J(--/M/0-@%%g.I 456	Br%   c                 (   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        d	d
d|       }d| d   d}t	        j
                  t        |      5  |j                  |       ddd       y# 1 sw Y   yxY w)z_Test two levels and dropping any infrequent category removes the
    whole infrequent category.rS   r   rT   r#  r  rd   r   r   r(   Fr   r  Unable to drop category r   ( from feature 0 because it is infrequentr/   Nr   r   rN   r   r2   r3   r4   r1   r   r  r   r   s       r#   5test_ohe_infrequent_two_levels_drop_infrequent_errorsr    s    
 hh	SEBJ.#;seaiGHIKKG
,	C %T!WK/W
XC	z	-      -BBrH  gQ?g{Gz?rG  c                 
   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        dd	d
d| j	                  |      }t        |j                  ddgg       dgdgdgdgdgg}t        j                  g dg dg dg dg dg      }|j                  |      }t        ||       dgdgdgdgdgg}|j                  |      }t        ||       |j                         }t        g d|       y)zkTest that different parameters for combing 'a', and 'd' into
    the infrequent category works as expected.rS   r   rT   r#  r  rd   r   r   r(   Fr&   r   r  r.  r   r   r   r-  r  )r  r  r  Nr   r  )	r  r  r   ro  r(  r   r  r  r   s	            r#    test_ohe_infrequent_three_levelsr    s'     hh	SEBJ.#;seaiGHIKKG
 ,EEK	c'l  s11S#J<@ecUSEC53%0FxxIy)YOPHmmF#GHg& 
				L !!'*E|U+--/M@-Pr%   c                 $   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        d	d
d|       j	                  |      }t        j                  dgdgdgg      }t        ddgddgddgg|j                  |             |j                  d      j	                  |       d}t        j                  t        |      5  |j                  dgdgg      }ddd       t        ddgddgg       y# 1 sw Y   xY w)z5Test three levels and dropping the frequent category.rS   r   rT   r#  r  rd   r   r   r(   Fr  r   r   r'   r-   r.   r/   r  N)r   r   rN   r   r1   r	   r5   r   r2   rm  rn  )r   r  r   ro  r   r   s         r#   .test_ohe_infrequent_three_levels_drop_frequentr    s    hh	SEBJ.#;seaiGHIKKG
,	
 
c'l  XXusecU+,FaVaVaV,cmmF.CD NN(N+//8
$C	k	- 0--#/0 aVaV$g.0 0s   DDc                 (   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        d	d
d|       }d| d   d}t	        j
                  t        |      5  |j                  |       ddd       y# 1 sw Y   yxY w)z7Test three levels and dropping the infrequent category.rS   r   rT   r#  r  rd   r   r   r(   Fr  r  r   r  r/   Nr  r  s       r#   7test_ohe_infrequent_three_levels_drop_infrequent_errorsr    s     hh	SEBJ.#;seaiGHIKKG
,	C %T!WK/W
XC	z	-   r  c                      t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  } t        d	d
d      j	                  |       }t        |j                  ddgg       dgdgdgdgg}t        j                  g dg dg dg dg      }|j                  |      }t        ||       dgg}d}t        j                  t        |      5  |j                  |       ddd       y# 1 sw Y   yxY w)zmTest that different parameters for combining 'a', and 'd' into
    the infrequent category works as expected.rS   r   rT   r#  r  rd   r   r   r,   F)r&   r   r  r.  r  r-  badz.Found unknown categories \['bad'\] in column 0r/   N)r   r   rN   r   r1   r
   r  r5   r	   r2   r3   r4   )r  r   ro  r(  r   r   s         r#   (test_ohe_infrequent_handle_unknown_errorr  '  s     hh	SEBJ.#;seaiGHIKKG
eA	c'l  s11S#J<@ ecUSEC5)FxxIy)DEHmmF#GHg& gYF
;C	z	- f  s   C44C=c                    t        j                  dgdz  dgdz  z   gt              j                  }t	        dg dgddd	| j                  |      }dgd
gdgdgdgg}t        j                  ddgddgddgddgddgg      }|j                  |      }t        ||       dddgg}dgdgg}|D ]B  }|j                  |      j                  |       t        dgdgg|j                  |             D y)zG'a' is the only frequent category, all other categories are infrequent.rS   r   r  rj   rI   r  r   rS   rT   Fr(   rL   r   r&   rT   r  r   r   r   r   r   r   Nr   )	r   r   r   rN   r   r1   r5   r	   r   )r  r  r   ro  r(  r   dropsr   s           r#   5test_ohe_infrequent_two_levels_user_cats_one_frequentr  ?  s"    hh	SEBJ./v>@@G
 (), 	
 
c'l  ecUSEC53%0Fxx!Q!Q!Q!Q!Q@AHmmF#GHg& kC5)EecU^F ;D!%%g.!qc
CMM&$9:;r%   c                     t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   gt        	      j                  } t	        g d
gddd      j                  |       }t        |j                  g dg       dgdgdgdgdgg}t        j                  ddgddgddgddgddgg      }|j                  |      }t        ||       dgdgdz  z   D cg c]  }|g }}|j                  |      }t        ||       yc c}w )zFTest that the order of the categories provided by a user is respected.rS   r   rT   r#  r  rd   r   r   rI   r  Fr(   r   rL   r   r&   r  )r  r   rS   r  r   r   r  r+   Nr   r   r   rN   r   r1   r
   r  r5   r	   r   )r  r   ro  r(  r   r  r  r  s           r#   (test_ohe_infrequent_two_levels_user_catsr  [  s+   hh
cURZ	3%"*	,uqy	89a  (),	
 
c'l  s11O3DEecUSEC53%0Fxx!Q!Q!Q!Q!Q@AHmmF#GHg& '*U.B-Ca-G%GHcSEHLH!!'*E|U+ Is   
C=c                     t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   gt        	      j                  } t	        g d
gddd      j                  |       }t        |j                  ddgg       dgdgdgdgdgg}t        j                  g dg dg dg dg dg      }|j                  |      }t        ||       dgdgdgdgdgg}|j                  |      }t        ||       y)zTest that the order of the categories provided by a user is respected.
    In this case 'c' is encoded as the first category and 'b' is encoded
    as the second one.rS   r   rT   r#  r  rd   r   r   rI   r  r   rT   rS   Fr(   r  r  r-  r  r.  r  Nr  )r  r   ro  r(  r   r  r  s          r#   *test_ohe_infrequent_three_levels_user_catsr  v  s   
 hh
cURZ	3%"*	,uqy	89a  (),	
 
c'l  s11S#J<@ecUSEC53%0FxxIy)YOPHmmF#GHg&
 
				L !!'*E|U+r%   c                      t         j                  g dg df   } t        ddd      }|j                  |        ddgddgg}|j	                  |      }t        |g d	g d
g       y)zaTest infrequent categories where feature 0 has infrequent categories,
    and feature 1 does not.	r   r   r   r   r   r   r   r   r   	r   r   r   r   r   r   r   r   r   r   r   F)r  r   r   r   r   r   r   r   r   )r   r   r   r   N)r   c_r   r1   r5   r	   )r   r   ro  r   s       r#   test_ohe_infrequent_mixedr    sc     	)+FFGA
q{%
PCGGAJ!fq!fFmmF#G GlL9:r%   c            
      b   t         j                  g dg dg df   } t        ddd      }|j                  |       j	                         }t        |j                  d   d	d
g       t        |j                  d	   d	dg       t        |j                  d
   d       |j                         }t        g d|       g dg dg dg dg dg dg dg dg dg	}t        ||       g dg dg}|j                  |      }g dg dg}t        ||j	                                |j                  |      }t        j                  g dg dgt              }t        ||       t        ddd      j                  |       }t        j                  t         d      5  |j                  |       ddd       g d g d!g}|j                  |      }g d"g dg}t        ||j	                                |j                  |      }t        j                  g d#g d$gt              }t        ||       y# 1 sw Y   xY w)%z?Test infrequent categories with feature matrix with 3 features.r  )	r   r   r   r   r   rd   r   r   r   )	r   r   r   r   r   r   r   r   r   rK   r   r(   rL   r  r&   r   r   r   rd   N)x0_0x0_3r  x1_0x1_5x1_infrequent_sklearnx2_0x2_1)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   )r+   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r  N)r  r   NrI   r,   r.   r/   )r   r   r   )r   rd   r   )r   r   r   r   r   r   r   r   )r  r  r   )r   r  r   )r   r  r   r   r   r
   r  r   r	   r5   r   r   r   r1   r2   r3   r4   )	r   r   r   r   r(  ro  X_test_transr  r  s	            r#   'test_ohe_infrequent_multiple_categoriesr    s    	#$#	%	A !<QC "**,Gs11!4q!f=s11!4q"g>s11!4d;
 --/M		
 	 	!        
H Hg&#F==(L )*BCHHl2245!!,/E88	(*IJRXL |U+ !G	c!f  
z)C	D f $F==(L(*BCHHl2245!!,/E88	8:VWL |U+! s   H%%H.c            
         t        j                  d      } | j                  g dg ddddg      }t        dd	d
      }|j	                  |      j                         }t        |j                  d   ddg       t        |j                  d   g d       g dg dg dg dg dg dg dg dg dg	}t        ||       | j                  ddgddgdddg      }g dg dg}|j                  |      }t        ||j                                |j                  |      }t        j                  ddgddggt              }t        ||       | j                  ddgddgdddg      }|j                  |      j                         }g dg dg}t        ||       |j                  |      }t        j                  ddgddggt              }t        ||       y)zHTest infrequent categories with a pandas dataframe with multiple dtypes.rR   	rS   fr  r  r  rS   r  rT   rT   	r   r   r   rd   rd   rg   r   r   r   )strrN  r  rN  columnsrK   r   r(   r  r   rS   rT   r   r   r   rg   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r     rg   r  rI   r  r   N)r2   rZ   r[   r   r   r   r
   r  r	   r5   r   r   r   r   )	r\   r   r   r   r(  ro  r  r  r  s	            r#   .test_ohe_infrequent_multiple_categories_dtypesr    s    
		X	&B
@1	
  	 	A !<QC "**,Gs11!4sCjAs11!4jA 	
H Hg&\\3*b"X>PU\WF"$67H==(LHl2245!!,/E88
 4	5=Q7RSL |U+ \\3*b!W=u~\VF==(002L"$67HHl+!!,/E88
#	$';Q&?@L |U+r%   ri   )r  r  c                     t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        dd	d
d| }|j	                  |       |j                  dgg      }t        |dgg       y),All user provided categories are infrequent.rS   r   rT   r#  r  rd   r   r   r(   Fr  r   Nr   )r   r   rN   r   r1   r5   r	   r  r  r   r   s       r#   $test_ohe_infrequent_one_level_errorsr  H  s     hh	SEBJ.#;seaiGHIKKG
 ,EEKC GGGmmcUG$GGqcU#r%   c                     t        j                  dgdz  gt              j                  }t	        dg dgddd| j                  |      }|j                  dgdgg      }t        |d	gd	gg       y
)r  r  r   rI   r  Fr(   r  rS   r   Nr   )r   r   r   rN   r   r1   r5   r	   r  s       r#   5test_ohe_infrequent_user_cats_unknown_training_errorsr  V  s     hh	{&133G
 (), 	
 
c'l  mmcUSEN+GGqcA3Z(r%   zinput_dtype, category_dtype)OOOUUOUUSOSUSS
array_type)r   r   	dataframec                    t        j                  dgdgg|       }t        j                  ddg|      g}t        |d      j                  |      }t	        dgdgdgdgg||       }|j                  |      }t        j                  ddgddgddgddgg      }t        ||       t        |      j                  |      }	|	j                  |      }t        j                  dgdgdgdgg      }t        ||       y	)
a"  Check that encoding work with object, unicode, and byte string dtypes.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15616
    https://github.com/scikit-learn/scikit-learn/issues/15726
    https://github.com/scikit-learn/scikit-learn/issues/19677
    rT   rS   rI   Fr   r   r   r   N)	r   r   r   r1   r   r5   r	   r   r
   )
rG   category_dtyper  r   rL   r   ro  r   r(  oes
             r#   test_encoders_string_categoriesr  g  s     	3%#{3A((C:^<=J
:U
C
G
G
JC
use$jF mmF#Gxx!Q!Q!Q!Q89HGX&	:	.	2	21	5Bll6"Gxx!qcA3,-Hw)r%   c                  4   t        j                  dgdggd      } t        j                  ddgd      g}t        |d      }t        j                  d      }t        j                  t        |	      5  |j                  |        d
d
d
       y
# 1 sw Y   y
xY w)zCheck that this mixture of predefined categories and X raises an error.

    Categories defined as bytes can not easily be compared to data that is
    a string.
    rT   rS   UrI   SFr   zjIn column 0, the predefined categories have type 'bytes' which is incompatible with values of type 'str_'.r/   N)	r   r   r   r   r   r2   r3   r4   r1   )r   rL   r   r   s       r#   $test_mixed_string_bytes_categoricalsr    s     	3%#s+A((C:S12J
:U
CC
))	'C
 
z	- 
  s   3BBc                     t        j                  dd| d| ggt              j                  }t	        dd      j                  |      }|j                         }t        |ddd	|  g       y )
NrS   rT   rI   Fr'   r   r&   x0_ar  x0_)r   r   r   rN   r   r1   r   r
   )rs  r   r   namess       r#   )test_ohe_missing_values_get_feature_namesr    se     	3]C?@OQQA
eH
E
I
I!
LC%%'Euvv]O/DEFr%   c            	      (   t        j                  d      } | j                  g dt        j                  dddt        j
                  gt              ddd	g
      }t        j                  g dg dg dg dg      }t        |      }t        ||       y )NrR   )dogr   Nr   r   r   r+   rI   )col1col2r  r  r  )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )	r2   rZ   r[   r   r   r   floatr   r	   )r\   dfexpected_df_transr   s       r#   %test_ohe_missing_value_support_pandasr     s    			X	&B	/HHaArvv.e<	
   
 
B !!!!		
 #2
&CC*+r%   pd_nan_typepd.NAznp.nanc           
         t        j                  d      }| dk(  r|j                  nt        j                  }|j                  d|j                  dd|ddgd      i      }t        j                  g d	g d
g dg dg d
g      }t        d|      }|j                  |      }t        ||       t        |j                        dk(  sJ t        |j                  d   d d g d       t        j                  |j                  d   d         sJ y )NrR   r  r  r  rS   rT   r   rI   )r   r   r   r   )r   r   r   r   )r   r   r   r   r  Fr  r   r   rA   r   )r2   rZ   NAr   r   r[   r   r   r   r   r	   lenr   r
   isnan)r  r&   r\   pd_missing_valuer  r  r   df_transs           r#   1test_ohe_missing_value_support_pandas_categoricalr	    s     
		X	&B +w 6ruuBFF	BIIsC)93DJIW	

B
 	
 eN
KC  $H%x0s1$$$sq)#2.@88COOA&r*+++r%   c                    ddgddgddgg}t        dd|       }|j                  |      }t        j                  g d	g d
g dg      }t	        ||       ddgg}t        j                  g d	g      }d}t        j                  t        |      5  |j                  |      }ddd       t	        ||       |j                  |      }t        |t        j                  ddggt                     y# 1 sw Y   OxY w)zZCheck drop='first' and handle_unknown='ignore'/'infrequent_if_exist'
    during transform.rS   r   rT   r   r   r   Fr   r   r&   r   r   )r   r   r   r  r   tFound unknown categories in columns \[0, 1\] during transform. These unknown categories will be encoded as all zerosr/   NrI   r   r   r   r   r	   r2   rm  rn  r5   r   r
   r   r&   r   r   r   rO   ro  rp  r  s           r#   /test_ohe_drop_first_handle_unknown_ignore_warnsr    s     qC8c1X&A
E.C "G	
J GZ( AhZF9+&J	 
 
k	2 (--'(GZ( !!*-Eubhhaz@A( (   C//C8c                    ddgddgddgg}t        dd|       }|j                  |      }t        j                  g d	g d
g dg      }t	        ||       ddgg}t        j                  g dg      }d}t        j                  t        |      5  |j                  |      }ddd       t	        ||       |j                  |      }t        |t        j                  ddggt                     y# 1 sw Y   OxY w)zDCheck drop='if_binary' and handle_unknown='ignore' during transform.rS   r   rT   r   r   r   Fr  r  r   rX   r  r   )r   r   r   r   r  r/   NrI   r  r  s           r#   3test_ohe_drop_if_binary_handle_unknown_ignore_warnsr    s     qC8c1X&A
nC "G	
J GZ( AhZF<.)J	 
 
k	2 (--'(GZ( !!*-Eubhhd}FCD( (r  c                 >   ddgddgddgg}t        dd| ddgddgg      }|j                  |       d	dgg}t        j                  ddgg      }d
}t	        j
                  t        |      5  |j                  |      }ddd       t        |       y# 1 sw Y   xY w)znCheck drop='first' and handle_unknown='ignore'/'infrequent_if_exist'
    during fit with categories passed in.rS   r   rT   r   r   r   Frj  r  rk  r/   Nrl  )r&   r   r   ro  rO   rp  r   s          r#   'test_ohe_drop_first_explicit_categoriesr  &  s    
 qC8c1X&A
%#JA'	C GGAJAhZFAq6(#J	A  
k	2 (--'(GZ(( (rr  c                     t        j                  d      } | j                  g dg ddddg      }t        d	      }|j	                  d
       d}t        j
                  t        |      5  |j                  |       ddd       |j                  |       t        j
                  t        |      5  |j                  |       ddd       y# 1 sw Y   PxY w# 1 sw Y   yxY w)zJRaise informative error message when pandas output and sparse_output=True.rR   r  )r  rT   rT   )rS   rT   rS   rT   r  Tr   r5   zxPandas output does not support sparse data. Set sparse_output=False to output pandas dataframes or disable Pandas outputr/   N)
r2   rZ   r[   r   
set_outputr3   r4   r   r1   r5   )r\   r  r   r   s       r#   'test_ohe_more_informative_error_messager  A  s    			X	&B	IO<sCj	QB
d
+CNNXN&	S  
z	- " GGBK	z	- b 	  s   -C3CCC#c                  D   t        j                  t         j                  dddgg      j                  } t	        t         j
                        }dt         j
                   }t        j                  t        |      5  |j                  |        ddd       y# 1 sw Y   yxY w)zDTest ordinal encoder with nan passthrough fails when dtype=np.int32.r   r   rI   zdThere are missing values in features \[0\]. For OrdinalEncoder to encode missing values with dtype: r/   N)
r   r   r   rN   r   int32r2   r3   r4   r1   )r   r  r   s      r#   Btest_ordinal_encoder_passthrough_missing_values_float_errors_dtyper  U  s~     	2663S)*+--A	bhh	'B	002z	;  
z	- 
q	  s   ;BBencoded_missing_valuer8  c                    t        j                  t         j                  dddggt         j                        j                  }t        |       j                  |      }t        |j                        dk(  sJ t        |j                  d   ddt         j                  g       |j                  |      }t        || gdgdgdgg       |j                  |      }t        ||       y)	z.Test ordinal encoder with nan on float dtypes.r   r   rI   r  r   r   r   N)r   r   r   r/  rN   r   r1   r  r   r	   r5   r   )r  r   r  r   r  s        r#   5test_ordinal_encoder_passthrough_missing_values_floatr  c  s     	2663S)*"**=??A	.C	D	H	H	KBr~~!###BNN1%S"&&'9:ll1oGG45usecUKL$$W-IIq!r%   c           
         t        j                  d      }| dk(  r|j                  nt        j                  }|j                  d|j                  dd|ddgd      i      }t        |	      j                  |      }t        |j                        d
k(  sJ t        |j                  d   dd g d       t        j                  |j                  d   d         sJ |j                  |      }t        |dgdg|gdgdgg       |j                  |      }|j                   dk(  sJ t        |dddf   ddg       t        |dddf   ddg       t        j                  |d         sJ y)z0Check ordinal encoder is compatible with pandas.rR   r  r  r  rS   rT   r   rI   r  r   r   Nr   r   rA          @r   r   )r   r   r   r   )r2   rZ   r  r   r   r[   r   r   r1   r  r   r
   r  r5   r	   r   r   )r  r  r\   r  r  r  r  r  s           r#   =test_ordinal_encoder_missing_value_support_pandas_categoricalr"  u  s`    
		X	&B +w 6ruuBFF	BIIsC)93DJIW	

B 
.C	D	H	H	LBr~~!###r~~a(!,o>88BNN1%b)***||BHHuse.C-DsecUST$$X.I??f$$$y!Q'#s4yQ'#s488IdO$$$r%   r!  )zobject-None-missing-valuezobject-nan-missing_valueznumeric-missing-valuec                 v   t        |      }t        j                  dgt        j                  gg      }t	        |j                  |       |       |j                  d   j                  |k(  sJ t        |      }t        j                  t        d      5  |j                  |       ddd       y# 1 sw Y   yxY w)z.Test ordinal encoder for specified categories.r   r   r   r.   r/   N)r   r   r   r   r
   r   r   rJ   r2   r3   r4   r1   )r   r8   r	  r   r  r   s         r#   =test_ordinal_encoder_specified_categories_missing_passthroughr$    s    L 
4	(B
((SEBFF8$
%Cr''*C0 >>!""i/// 
4	(B	z)C	D 
r
  s   B//B8c                 $   t        j                  g dt              g} | |      }t        j                  ddggt              j                  }t	        j
                  t        d      5  |j                  |       ddd       y# 1 sw Y   yxY w)	zTest encoder for specified categories have duplicate values.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27088
    )rS   rT   rS   rI   r   rS   rT   z5the predefined categories contain duplicate elements.r/   N)r   r   r   rN   r2   r3   r4   r1   r  s       r#   +test_encoder_duplicate_specified_categoriesr&    sq     HH_F34D
T
"C
3*V,..A	Q
  	
  s   +BBzX, expected_X_trans, X_testr   r   )r   r   r   )r   r!  r   r  )r  rS   rT   )r!  r   r   c                     t        dd      }|j                  |       }t        ||       t        |j                  |      dgg       y)z>Test the interaction between missing values and handle_unknownr7  rA   r9  g      N)r   r   r	   r5   )r   expected_X_transro  r  r   s        r#   /test_ordinal_encoder_handle_missing_and_unknownr)    sC    8 
':"	MBq!GG-.BLL(D6(3r%   csr_containerc                    t        j                  g dg dg      } | |      }t               }d}t        j                  t
        |      5  |j                  |       ddd       t        j                  t
        |      5  |j                  |       ddd       |j                  |      } | |      }t        j                  t
        |      5  |j                  |       ddd       y# 1 sw Y   xY w# 1 sw Y   dxY w# 1 sw Y   yxY w)zCheck that we raise proper error with sparse input in OrdinalEncoder.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19878
    r   r   z2Sparse data was passed, but dense data is requiredr/   N)	r   r   r   r2   r3   r   r1   r   r   )r*  r   X_sparseencoderr   r   r!   s          r#   test_ordinal_encoder_sparser.    s     	)Y'(AQHGBG	y	0 H	y	0 (h'( ##A&G"7+N	y	0 2!!.12 2 ( (
2 2s$   C) C5D)C25C>D
c                  B   t        j                  g d      ddt         j                  f   } t        g dgdd      }|j	                  |        t        g dgd      }t        j                  t        d	
      5  |j	                  |        ddd       y# 1 sw Y   yxY w)zCheck OrdinalEncoder.fit works with unseen category when
    `handle_unknown="use_encoded_value"`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19872
    )r   r   r   r   r   r   N)rA   r   r   r7  rE  )rL   r&   r:  r,   r  r.   r/   )r   r   newaxisr   r1   r2   r3   r4   )r   r  s     r#   -test_ordinal_encoder_fit_with_unseen_categoryr1    s     	#$Q

]3A	<0CSW
B FF1I	J<	HB	z)C	D 
q	  s   :BBr  AAOr  ro  c                     t        dd      }|j                  |        |j                  |      }t        |ddgg       y)zChecks that `OrdinalEncoder` transforms string dtypes.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19872
    r7  ir9  r   N)r   r1   r5   r	   )r  ro  r   r   s       r#   1test_ordinal_encoder_handle_unknown_string_dtypesr5  &  s;    * (;2
NCGGGmmF#GGr1gY'r%   c                  8   t        j                  g d      j                  dd      } t               j	                  |       }t        |j                  t        j                  | d      j                         |j                  |       }t        |dgdgdgdgg       y)	zCheck that `OrdinalEncoder` accepts Python integers that are potentially
    larger than 64 bits.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20721
    )l   	HP
1& l   	H]viel   	 :?i}Ga l   IRK2e6krA   r   r   )axisr   r   N)
r   r   rD   r   r1   r
   r   sortrN   r5   )r   r-  r   s      r#   #test_ordinal_encoder_python_integerr9  B  s     		
	 gb!n  ""1%Gw**BGGAA,>,@,@A"Gw!qcA3 45r%   c                      t        j                  d      } g d}| j                  g dg|      }t               j	                  |      }|j                         }t        ||       y)z-Check feature names out is same as the input.rR   )rT   r  rS   r  r  N)r2   rZ   r[   r   r1   r   r
   )r\   r  r   r   feature_names_outs        r#   .test_ordinal_encoder_features_names_out_pandasr<  V  sX    			X	&BE
i[%0A



q
!C113u/0r%   c                  &   t        j                  dgdgt         j                  ggt              } t	        dt         j                  d      j                  |       }|j                  |       }t        |dgdgdgg       t        j                  d	gt         j                  ggt              }|j                  |      }t        |t         j                  gdgg       |j                  |      }|d   d   J t        j                  |d   d         sJ y
)zECheck interactions between encode_unknown and missing value encoding.rS   rT   rI   r7  r&   r:  r  r   r   r  N)
r   r   r   r   r   r1   r5   r	   r   r  )r   r  r   ro  r  X_roundtrips         r#   0test_ordinal_encoder_unknown_missing_interactionrA  b  s     	3%#)8A	*ff 
 
c!f	  ll1oGGqcA3-. XXurvvh'v6F<<'LLBFF8bT"23 &&|4K q>!$$$ 88KN1%&&&r%   with_pandasc                 t   t        j                  ddgddgdt         j                  ggt              }d}| r0t	        j
                  d      }|j                  |d	d
g      }|dz   }n|dz   }t        d      }t	        j                  t        |      5  |j                  |       ddd       y# 1 sw Y   yxY w)zXCheck OrdinalEncoder errors when encoded_missing_value is used by
    an known category.rS   r  rT   r   r  rI   zTencoded_missing_value \(1\) is already used to encode a known category in features: rR   letterpetr  z	\['pet'\]z\[1\]r   r  r/   N)r   r   r   r   r2   rZ   r[   r   r3   r4   r1   )rB  r   	error_msgr\   r  s        r#   0test_ordinal_encoder_encoded_missing_value_errorrG    s     	3,esBFFm<FKA
	 
   *LLXu$5L6,	(		a	0B	z	3 
q	  s   B..B7z4X_train, X_test_trans_expected, X_roundtrip_expected1c                    t        dt        j                  t        j                        j                  |       }t        j                  dgt        j                  gdgg      }|j                  |      }t        ||       |j                  |      }|j                  d   }t        |      D ]A  }||df   }	||df   }
|	|
J t        |	      rt        j                  |
      r9J |
|	k(  rAJ  y)znCheck transform when unknown_value and encoded_missing_value is nan.

    Non-regression test for #24082.
    r7  r?  rH  rT   r   N)r   r   r   r1   r   r5   r	   r   r   r[  r   r  )r  X_test_trans_expectedX_roundtrip_expectedr  ro  r  r@  	n_samplesr]  expected_valvals              r#   9test_ordinal_encoder_unknown_missing_interaction_both_nanrO    s    4 
*ff ff
 
c'l	  XXurvvh./F<<'L L"78&&|4K$**1-I9 	'+AqD1!Q$;;<(88C= =,&&&	'r%   c                  L   t        j                  d      } | j                  ddgddgd      }t               }|j	                  d       d}t        j
                  t        |	      5  |j                  |       d
d
d
       t        d      j	                  d      }t        d      j	                  d      }|j                  |      }|j                  |      }t        |j                         |       t        |j                         |j                         y
# 1 sw Y   xY w)z*Check OneHotEncoder works with set_output.rR   rS   rT   r   r   rU   r  zCPandas output does not support sparse data. Set sparse_output=Falser/   NFr   default)r2   rZ   r[   r   r  r3   r4   r   r	   to_numpyr
   r   r  )r\   r]   r   r0   ohe_default
ohe_pandas	X_defaultX_pandass           r#   test_one_hot_encoder_set_outputrW    s    			X	&B<<sCj1v67D
/CNNXN&QE	z	/  $   e4??)?TKU3>>>RJ))$/I''-HH%%'3z7798;K;KL   s   'DD#c                     t        j                  d      } | j                  ddgddgd      }t               j	                  d      }t               j	                  d      }|j                  |      }|j                  |      }t        |j                         |       t        |j                         |j                         y	)
z+Check OrdinalEncoder works with set_output.rR   rS   rT   r   r   rU   rQ  r  N)r2   rZ   r[   r   r  r   r	   rR  r
   r   r  )r\   r]   ord_default
ord_pandasrU  rV  s         r#   test_ordinal_set_outputr[    s    			X	&B<<sCj1v67D "--	-BK!,,x,@J))$/I''-HH%%'3z7798;K;KLr%   c                     g dddgg} t        |       }|j                  ddgg       t        |       t        |j                        k(  sJ t	        |j                        D ])  \  }}|j
                  t        k(  sJ t        | |   |       + y)zjCheck that the categories_ dtype is `object` for string categories

    Regression test for gh-25171.
    )asmmaseasrasacsrH  2r   r]  N)r   r1   r  r   	enumeraterJ   r   r
   )rL   r   nr   s       r#    test_predefined_categories_dtypere    s    
 6SzBJ
:
.CGGdC[Mz?c#//2222COO, /3yyF""":a=#./r%   c                  `   t        j                  dgdgt         j                  ggt              } t	        d      j                  |       }t        |dgdgdgg       t	        dd	      j                  |       }t        j                  d
gg      }|j                  |      }t        |dgg       y)zBCheck missing value or unknown encoding can equal the cardinality.r  r   rI   r   r  r   r   r7  r9  snakeN)	r   r   r   r   r   r   r	   r1   r5   )r   r   r   ro  s       r#   1test_ordinal_encoder_missing_unknown_encoding_maxrh    s    
5'E7RVVH-V<A15CCAFGGqcA3_-
(;1
M
Q
QRS
TCXXyk"FmmF#GGqcU#r%   c                  H   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   dgdz  z   gt              j                  } t	        dd	d
      j                  |       }t        |j                         g d       |j                  d   |j                  d      dk(  sJ t        j                  dgdz  dgdz  z   dgdz  z   gt              j                  } t	        dd	d      j                  |       }t        |j                         dg       |j                  d   |j                  d      dk(  sJ t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   dgdz  z   gt              j                  } t	        dd	dg      j                  |       }t        |j                         g d       |j                  d   |j                  d      dk(  sJ t	        dd	d      j                  |       }t        |j                         g d       |j                  J y)zkCheck drop_idx is defined correctly with infrequent categories.

    Non-regression test for gh-25550.
    rS   r   rT   r+   r  r   r  rI   Fr   )r  r   r   )r  x0_dx0_er  r   rd   r   r  )r  r  rk  r  N)r  r  rj  rk  r  )
r   r   r   rN   r   r1   r
   r   r   r'  )r   r   s     r#   #test_drop_idx_infrequent_categoriesrl    s&   
 	
cUQY	#	*cUQY	6#	BC6	a  au7
K
O
OPQ
RC!!#%V ??1cmmA./3666
3%!)seai'3%"*45VDFFA
au;
O
S
STU
VCs0025L4MN??1cmmA./3666

cUQY	#	*cUQY	6#	BC6	a  auC5
I
M
Ma
PC!!#%V ??1cmmA./3666
au4
H
L
LQ
OC!!#A ==   r%   c                    t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   g      j                  }t        dd	d
d| j	                  |      }t        |j                  g dg       t        |j                  ddgg       dgdgdgdgdgg}dgdgdgdgd
gg}|j                  |      }t        ||       |j                  |      }dgdgdgdgdgg}t        ||       y)zGTest parameters for grouping 'a', and 'd' into the infrequent category.rS   r   rT   r#  r  rd   r   r   r7  rA   r9  r  r  r   r   r   r  Nr   )r   r   rN   r   r1   r
   r   r  r5   r	   r   )r  r  ordinalro  expected_transr   r  expected_inverses           r#   ,test_ordinal_encoder_infrequent_three_levelsrq  6  s$    hh	SEBJ.#;seaiGHIKKG *"@F	c'l  w**-A,BCw55c
|DecUSEC53%0FcA3aS2$/N'GG^,))'2I					 y"23r%   c                     t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   gt        	      j                  } t	        g d
gddd      j                  |       }t        |j                  g d
g       t        |j                  ddgg       dgdgdgdgdgg}dgdgdgdgdgg}|j                  |      }t        ||       |j                  |      }dgdgdgdgdgg}t        ||       y)zTest that the order of the categories provided by a user is respected.

    In this case 'c' is encoded as the first category and 'b' is encoded
    as the second one.
    rS   r   rT   r#  r  rd   r   r   rI   r  r7  rA   )rL   r  r&   r:  r  r   r   r   r  N)r   r   r   rN   r   r1   r
   r   r  r5   r	   r   )r  rn  ro  ro  r   r  rp  s          r#   6test_ordinal_encoder_infrequent_three_levels_user_catsrs  ]  s,    hh
cURZ	3%"*	,uqy	89a  ()*	
 
c'l  w**-A,BCw55c
|DecUSEC53%0FcA3aS2$/N'GG^,))'2I					 y"23r%   c                     t        j                  g dg df      } t        d      j                  |       }t	        |j
                  d   ddg       |j
                  d   J ddgddgg}ddgddgg}|j                  |      }t        ||       |j                  |      }t        j                  ddgd	dggt        
      }t	        ||       y)zETest when feature 0 has infrequent categories and feature 1 does not.r  r  r   r  r   r   r   Nr  rI   )r   column_stackr   r1   r
   r  r5   r	   r   r   r   )r   rn  ro  ro  r   r  rp  s          r#   %test_ordinal_encoder_infrequent_mixedrw    s     	46QRSAA.2215Gw55a81a&A))!,444!fq!fF!fq!f%N'GG^,))'2Ixx!Q*>)B C6Ry"23r%   c            	      z   t        j                  d      } | j                  g d      }| j                  g dg d| j	                  dgdz  dgdz  z   d	gz   d
gz   |      dg d      }t        d      j                  |      }t        |j                  d   ddg       t        |j                  d   g d       t        |j                  d   d
d	g       | j                  g dg d| j	                  dgd	gz   d
gz   dgz   |      dg d      }g dg dg dg dg}|j                  |      }t        ||       y)zHTest infrequent categories with a pandas DataFrame with multiple dtypes.rR   )birdr   r  rg  r  r  r  r+   r   r   rg  ry  rI   )r  rN  r  r  ru  r   rS   rT   r   r  r   )rS   rT   r  r  )rg   r   rd   r   )r   r   r   )r   r   r   )r   r   r   r  N)r2   rZ   CategoricalDtyper[   r   r   r1   r
   r  r5   r	   )r\   categorical_dtyper   rn  ro  ro  r   s          r#   :test_ordinal_encoder_infrequent_multiple_categories_dtypesr|    s[    
		X	&B++,KL
@199!ugk)WI5@' % 	
 . 	 
	A A.2215G w55a83*Ew55a8*Ew55a867:KL\\'!997)#vh.%8' % 	
 .  
F  IyAN'GG^,r%   c                     t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   t         j                  gz   gt        	      j                  } t        d
ddd      j                  |       }t        |j                  g dg       t        j                  dgdgdgdgdgt         j                  ggt        	      }dgdgdgdgdgdgg}|j                  |      }t        ||       y)zJCheck behavior of unknown_value and encoded_missing_value with infrequent.rS   r   rT   r#  r  rd   r   r   rI   r7  r   )r&   r:  r  r  r  r  r   r   N)r   r   r   r   rN   r   r1   r
   r  r5   r	   )r  rn  ro  ro  r   s        r#   .test_ordinal_encoder_infrequent_custom_mappingr~    s    hh
cURZ	3%"*	,uqy	8BFF8	CDFa  *	
 
c'l  w557HIXXusecUSEC5266(C6RFcA3aS1#s3N'GG^,r%   c                 d   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   gt        	      j                  }t	        di | d
ddj                  |      }t	        d
d      j                  |      }dgdgdgdgdgg}t        |j                  |      |j                  |             y)zMAll categories are considered frequent have same encoding as default encoder.rS   r   rT   r#  r  rd   r   r   rI   r7  rA   r9  r  Nr   r   r   r   rN   r   r1   r	   r5   )r  r  adjusted_encoderdefault_encoderro  s        r#   !test_ordinal_encoder_all_frequentr    s     hh
cURZ	3%"*	,uqy	89a  & 
!4B	c'l  %*"	c'l  ecUSEC53%0F""6*O,E,Ef,Mr%   d   c                 "   t        j                  dgdz  dgdz  z   dgdz  z   dgdz  z   gt        	      j                  }t	        di | d
ddj                  |      }dgdgdgdgdgg}t        |j                  |      dgdgdgdgdgg       y)zAWhen all categories are infrequent, they are all encoded as zero.rS   r   rT   r#  r  rd   r   r   rI   r7  rA   r9  r  r   Nr   r  )r  r  r-  ro  s       r#   #test_ordinal_encoder_all_infrequentr    s     hh
cURZ	3%"*	,uqy	89a   
!4B	c'l  ecUSEC53%0FG%%f-aS1#sRD/IJr%   c                     t        j                  t         j                  gdz  dgdz  z   dgdz  z   dgz   dgz   gt              j                  } t        d	
      j                  |       }t        j                  dddt         j                  ggt              j                  }|j                  |      }t        |dgdgdgt         j                  gg       y)z5Check behavior when missing value appears frequently.r#  r  rd   r   r   rg  deerrI   r   ru  r   r   r   N	r   r   r   r   rN   r   r1   r5   r	   r   rn  ro  r   s       r#   -test_ordinal_encoder_missing_appears_frequentr  
	  s    

&&B%2	%!	3wi	?6(	JK	 a  A.2215GXXrvv67vFHHF'GGqcA3bffX67r%   c            	         t        j                  t         j                  gdgdz  z   dgdz  z   dgz   dgz   dgdz  d	gdz  z   gt        
      j                  } t        d      j                  |       }t        j                  ddgdd	gt         j                  d	gdd	gddggt        
      }|j                  |      }t        |ddgddgt         j                  dgddgddgg       y)z7Check behavior when missing value appears infrequently.r  rd   r   r   rg  r  redrH  greenrI   r+   )r  r   r   r   Nr  r  s       r#   /test_ordinal_encoder_missing_appears_infrequentr  	  s    
 	VVHw|#ugk1WI=HGaK7)a-'	
 	 a  1-11!4GXXeWVVWGEN	
 	F 'GGq!fq!frvvqkAq6Aq6JKr%   c                     t        j                  dgdgdggt              } | g dg      }t        j                  t
              5  |j                  |       ddd       y# 1 sw Y   yxY w)a!  Check that we raise a `NotFittedError` by calling transform before fit with
    the encoders.

    One could expect that the passing the `categories` argument to the encoder
    would make it stateless. However, `fit` is making a couple of check, such as the
    position of `np.nan`.
    rV   rW   ra  rI   r`  r   N)r   r   r   r2   r3   r   r5   )r  r   r-  s      r#   test_encoder_not_fittedr  3	  s]     	3%#&f5A/!23G	~	& !  r   )r   re  numpyr   r2   scipyr   sklearn.exceptionsr   sklearn.preprocessingr   r   sklearn.utils._missingr   sklearn.utils._testingr   r	   r
   sklearn.utils.fixesr   r$   markparametrizer;   rE   r  float32r/  rP   r^   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r  r\  str_r   rN   r  r
  r  r  r  r  r   r+  r1  r3  r5  rC  rN  rJ  rL  rO  rT  r^  rc  rh  rq  rz  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r	  r  r  r  r  r  r  r"  r$  r&  r)  r.  r1  r5  r9  r<  rA  rG  rM   rO  rW  r[  re  rh  rl  rq  rs  rw  r|  r~  r  r  r  r  r  r   r%   r#   <module>r     s   	     - ? 0 
 /@. )+TU# V#. )+TU& V&$ "((BJJ

)KL2::rzz(JK
; L M
; "((BJJ

)KLA MA92xJ$4	9
 	(+z*+#%67vF/C#78G/Cuu#=>fM"O4FC/C#67vF/Cut#<=VL			  .G/.G )+TUUDM2$1,$ 2 3 V,$^ UDM2b'Ar7QG	$y)Y&GHS\E3<'3%Fo?	
	'	 3'*7 !=>'CD
C E ?
C E?#;<1vxrxxc
';<= > = E?#;< = "+r{	#uenrd%;RZZH	Aq6Aq6"	#q!fqc]BJJ?BHHsElS%L1@3Z%!JJ	

 
C<#u.	/3*ug1FP	Aq6BFFA;'	(Arvv;*<bjjIBHHsBFFmdBFF^4FC4[266(#JJ	
 BHHsE%L)D%,+?@O4[5<.)JJ	
*	/   B7C B7" )+TU BHHsCj\022BHHsCj\022JJ		
 BHHq!fXW-//BHHq!fXW-//KHH		
 BHHsCj\022BHHsCj\022RXXo&'JJ		
 BHHtSk]&133BHHtSk]&133		
 BHHsCj\022BHHsBFFm_F355		
 BHHsDk]&133BHHsBFFm_F355		
?%L	Q  0bAc0 VdA($ ]N$CD
 E

?7 	66"#	./	()
 	&  66&0 	(+{+,#%67vF
 	'  22  BHHsCj\022BHHsCj\022JJ		
 BHHq!fXW-//BHHq!fXW-//KHH		
 BHHsCj\022BHHsCj\022RXXo&'JJ		
( 	3-  010"$- 5#,/- 0-6	868, +w!78) 9)2 2664u*FG!1 H!1H 5!*.A!BCB DB T5M'7JK'=!9?RS. T L.$ ]N$CD? E? 	1	"	$q1r2	 1E0F'GHI I	I6 +w!>?C @C. 3%#0 1" 	1	!	!	$	$q1q1QQ< 'C5!12/ 3/. 3%#0 10 !a8?A:NO;;2,6!,H;$X,v>,B bA$N#OP
$ Q
$ a1$M#NO) P)  !#M 'EF* G*6* 2664.9G :G,. )+TU((;<, = V,< )+TU"B V"BJ )+TU!E V!EH )+TU) V)4( 02662,?" @"" ((;<02662,?% @ =%>  3-7993*V4663RVV,F;<

	 3-7993*V4663RVV,F;<

	 3-

;==3%

3553RVV,-.

	%4	9  !DE!D$ ]N$CD E ! BHHsBFFC()*,,BHHsBFFC()*,,BHHseW	
 BHHo&'))BHHo&'))BHHrvvhZ 	
 BHHsBFFC()8::BHHsBFFC()*,,BHHseWF+	
 BHHo&f577BHHo&'))BHHrvvhZv.	
!24324 .92 :2," 
4+c*4+c* 
s3*S)3*S)	( 	(6(	1'< u6 72 :
 BHHsecU^62S266(RVVH%BJJvv.f=	
 BHHrvvhu-V<S266(RVVH%BJJx"&&2&A	
&''&'BM.M /"	$!!H 	1	!	!	$	$q1q1446!4H4*--`-* 	1	!( 	1	#
K
K
8L8 ]N$CD Er%   