
    'YHh7                         d Z ddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZ g dZ G d d	      Z G d
 de	      Z G d de	      Zy)aY  
Multivariate Conditional and Unconditional Kernel Density Estimation
with Mixed Data Types

References
----------
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
    Princeton University Press. (2007)
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
    and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
    http://dx.doi.org/10.1561/0800000009
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
    with Categorical and Continuous Data." Working Paper. (2000)
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
    Distributions Annals of Economics and Finance 5, 211-235 (2004)
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
    cumulative distribution function."
    Journal of Nonparametric Statistics (2008)
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
    with Categorical and Continuous Data." Working Paper
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
    regression" Statistica Sinica 14(2004), pp. 485-512
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
        Regression" Journal of Business & Economics Statistics
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
        Categorical Predictor Variables in Nonparametric Regression
        Models", 2006, Econometric Reviews 25, 523-544

    N)optimize)
mquantiles)KDEMultivariate	KernelReg)gpkeLeaveOneOut_get_type_pos_adjust_shape)SingleIndexModel
SemiLinear	TestFFormc                   $    e Zd ZdZddZd Zd Zy)r   a]  
    Nonparametric test for functional form.

    Parameters
    ----------
    endog : list
        Dependent variable (training set)
    exog : list of array_like objects
        The independent (right-hand-side) variables
    bw : array_like, str
        Bandwidths for exog or specify method for bandwidth selection
    fform : function
        The functional form ``y = g(b, x)`` to be tested. Takes as inputs
        the RHS variables `exog` and the coefficients ``b`` (betas)
        and returns a fitted ``y_hat``.
    var_type : str
        The type of the independent `exog` variables:

            - c: continuous
            - o: ordered
            - u: unordered

    estimator : function
        Must return the estimated coefficients b (betas). Takes as inputs
        ``(endog, exog)``.  E.g. least square estimator::

            lambda (x,y): np.dot(np.pinv(np.dot(x.T, x)), np.dot(x.T, y))

    References
    ----------
    See Racine, J.: "Consistent Significance Testing for Nonparametric
    Regression" Journal of Business & Economics Statistics.

    See chapter 12 in [1]  pp. 355-357.
    c                     || _         || _        || _        || _        || _        || _        t        |||      j                  | _        | j                         | _	        y )N)bwvar_type)
endogexogr   fform	estimatornbootr   r   _compute_sigsig)selfr   r   r   r   r   r   r   s           h/var/www/html/planif/env/lib/python3.12/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.py__init__zTestFForm.__init__P   sR    
	 
"
!$2ADD$$&    c                    | j                   }| j                  }| j                  ||      }| j                  ||      }t	        j
                  |      d   }||z
  }|t	        j                  |      z
  }| j                  |      | _        t	        j                  d      }d|z
  dz  }d|z   dz  }	||z  }
|	|z  }|	|z  }t	        j                  | j                  df      }t        | j                        D ]  }|j                         }t        j                  j                  dd|f      }||k  }|
|   ||<   ||z   }| j                  ||      }| j                  ||      }||z
  }| j                  |      ||<    || _        d}| j                  t#        |d      kD  rd}| j                  t#        |d	      kD  rd
}| j                  t#        |d      kD  rd}|S )Nr   g      @   g       @sizezNot Significantg?*gffffff?z**gGz?z***)r   r   r   r   npshapemean_compute_test_stat	test_statsqrtemptyr   rangecopyrandomuniformboots_resultsr   )r   YXbmnresidsqrt5fct1fct2u1u2rI_distju_bootprobindY_bootb_hatm_hat
u_boot_hatr   s                          r   r   zTestFForm._compute_sigZ   s   JJIINN1a JJq!HHQKNA&007E	RE	RE\E\5L4::a.)tzz" 
	<AWWYF99$$Qq!$6D(CS'F3KZFNN61-EJJq%(E%J//
;F1I
	< $>>Jvs33C>>Jvt44C>>Jvt44C
r   c           	      f   t        j                  |      d   }t        | j                        }t        |d d d f         j	                         }d}d}t        |      D ]  \  }}t        |      }	t        j                  |	      }	t        | j                  | | j                  |d d f    | j                  d      }
||   |	z  |
z  }|	j                  |
j                  k(  sJ ||j                         z  }||dz  j                         z  }t        j                  |      dk(  sJ t        j                  |      dk(  rJ  |d||dz
  z  z  z  }t        | j                        d   }| j                  |   j                         }|d|z  ||dz
  z  z  z  }||z  t        j                  ||z        z  }|S )Nr   F)datadata_predictr   tosum   r   g      ?)r"   r#   r   r   __iter__	enumeratenextsqueezer   r   r   sumr    r	   prodr'   )r   ur2   XLOOuLOOivalS2iX_not_iu_jKf_iix_conthpTs                  r   r%   zTestFForm._compute_test_stat   s   HHQKN499%1QtV9%..0#D/ 	$JAwt*C**S/CTWWG8499QT?:J"mm5:AQ4#:>C99'''CGGID36,,. B774=A%%%772;!###	$ 	a1q5k"".q1WWW""$
a"fQU$$HrwwrBw''r   N)d   )__name__
__module____qualname____doc__r   r   r%    r   r   r   r   ,   s    "F'$Lr   r   c                   0    e Zd ZdZd Zd Zd ZddZd Zy)	r   a  
    Single index semiparametric model ``y = g(X * b) + e``.

    Parameters
    ----------
    endog : array_like
        The dependent variable
    exog : array_like
        The independent variable(s)
    var_type : str
        The type of variables in X:

            - c: continuous
            - o: ordered
            - u: unordered

    Attributes
    ----------
    b : array_like
        The linear coefficients b (betas)
    bw : array_like
        Bandwidths

    Methods
    -------
    fit(): Computes the fitted values ``E[Y|X] = g(X * b)``
           and the marginal effects ``dY/dX``.

    References
    ----------
    See chapter on semiparametric models in [1]

    Notes
    -----
    This model resembles the binary choice models. The user knows
    that X and b interact linearly, but ``g(X * b)`` is unknown.
    In the parametric binary choice models the user usually assumes
    some distribution of g() such as normal or logistic.
    c                    || _         t        |      | _        | j                   d   | _         t        |d      | _        t        || j                        | _        t        j                  | j
                        d   | _        | j                   | _	        d| _
        d| _        d| _        | j                  | _        | j                         \  | _        | _        y )Nr   r   gaussian	wangryzinaitchisonaitken)r   lenrV   r
   r   r   r"   r#   nobs	data_typeckertypeokertypeukertype_est_loc_linearfunc	_est_b_bwr0   r   )r   r   r   r   s       r   r   zSingleIndexModel.__init__   s     Xa("5!,
!$/	HHTYY'*	"#)((	..*r   c                    t         j                  j                  | j                  dz   f      }t	        j
                  | j                  |d      }|d| j                   }|| j                  d  }| j                  |      }||fS )Nr   r   r   disp)r"   r+   r,   rV   r   fmincv_loo_set_bw_boundsr   params0b_bwr0   r   s        r   rn   zSingleIndexModel._est_b_bw   sp    ))##$&&1*#8}}T[[':466N$&&']  $"ur   c                 $   t        j                  |      }|d| j                   }|| j                  d  }t        | j                        }t        | j
                        j                         }d}t        |      D ]  \  }}t        |      }	| j                  ||	t        j                  ||      d d d f    t        j                  | j                  ||dz   d d f   |             d   }
|| j
                  |   |
z
  dz  z  } || j                  z  S )Nr   r   r   r   rE   rG   )r"   asarrayrV   r   r   r   rH   rI   rJ   rm   dotrg   )r   paramsr0   r   LOO_XLOO_YLrS   rT   r.   Gs              r   rs   zSingleIndexModel.cv_loo   s   F#1tvvDFFG_DII&DJJ'002#E* 	*JAwUA		"ARVVGQ-?$-G,G(*tyy1Q3/BA(F'F  HHIKA $**Q-!#))A	* 499}r   Nc                 h   || j                   }nt        || j                        }t        j                  |      d   }t        j
                  |f      }t        j
                  || j                  f      }t        |      D ]  }| j                  | j                  | j                  t        j                  | j                   | j                        d d d f   t        j                  |||dz   d d f   | j                              }|d   ||<   t        j                  |d         }|||d d f<    ||fS )Nr   r   rE   )r   r
   rV   r"   r#   r(   r)   rm   r   r   r{   r0   rK   )r   rE   N_data_predictr$   mfxrS   mean_mfxmfx_cs           r   fitzSingleIndexModel.fit   s   99L(tvv>L,/2xx)*hh/0~& 	Ayy$**!#		466!:1T6!B.0ff\!AaC%(5KDFF.S ! UH qkDGJJx{+EC1I	 Syr   c                     d}|dt        | j                        z   dz   z  }|dt        | j                        z   dz   z  }|d| j                  z   dz   z  }|dz  }|dz  }|S ) Provide something sane to print.zSingle Index Model 
Number of variables: K = 
zNumber of samples:   nobs = Variable types:      BW selection method: cv_ls
Estimator type: local constant
strrV   rg   r   r   reprs     r   __repr__zSingleIndexModel.__repr__  su    &+c$&&k9D@@.TYY?$FF'$--7$>>3377r   )N	r\   r]   r^   r_   r   rn   rs   r   r   r`   r   r   r   r      s!    &N+(&r   r   c                   0    e Zd ZdZd Zd Zd ZddZd Zy)	r   a}  
    Semiparametric partially linear model, ``Y = Xb + g(Z) + e``.

    Parameters
    ----------
    endog : array_like
        The dependent variable
    exog : array_like
        The linear component in the regression
    exog_nonparametric : array_like
        The nonparametric component in the regression
    var_type : str
        The type of the variables in the nonparametric component;

            - c: continuous
            - o: ordered
            - u: unordered

    k_linear : int
        The number of variables that comprise the linear component.

    Attributes
    ----------
    bw : array_like
        Bandwidths for the nonparametric component exog_nonparametric
    b : array_like
        Coefficients in the linear component
    nobs : int
        The number of observations.
    k_linear : int
        The number of variables that comprise the linear component.

    Methods
    -------
    fit
        Returns the fitted mean and marginal effects dy/dz

    Notes
    -----
    This model uses only the local constant regression estimator

    References
    ----------
    See chapter on Semiparametric Models in [1]
    c                    t        |d      | _        t        ||      | _        t        |      | _        t        || j                        | _        || _        t        j                  | j                        d   | _	        || _
        | j                  | _        d| _        d| _        d| _        | j                  | _        | j#                         \  | _        | _        y )Nr   r   rc   rd   re   )r
   r   r   rf   rV   exog_nonparametrick_linearr"   r#   rg   r   rh   ri   rj   rk   rl   rm   rn   r0   r   )r   r   r   r   r   r   s         r   r   zSemiLinear.__init__;  s    "5!,
!$1	X"/0BDFF"K HHTYY'*	 "#)((	..*r   c                     t         j                  j                  | j                  | j                  z   f      }t        j                  | j                  |d      }|d| j                   }|| j                  d }||fS )z
        Computes the (beta) coefficients and the bandwidths.

        Minimizes ``cv_loo`` with respect to ``b`` and ``bw``.
        r   r   rp   N)r"   r+   r,   r   rV   r   rr   rs   ru   s        r   rn   zSemiLinear._est_b_bwK  sj     ))##$--$&&*@)C#D}}T[[':T]]#$--.!"ur   c           
         t        j                  |      }|d| j                   }|| j                  d }t        | j                        }t        | j
                        j                         }t        | j                        j                         }t        j                  | j                  |      dddf   }d}t        |      D ]  \  }	}
t        |      }t        |      }t        j                  |
|      dddf   }||z
  }| j                  ||| | j                  |	ddf          d   }||	ddf   }|| j
                  |	   |z
  |z
  dz  z  } |S )a  
        Similar to the cross validation leave-one-out estimator.

        Modified to reflect the linear components.

        Parameters
        ----------
        params : array_like
            Vector consisting of the coefficients (b) and the bandwidths (bw).
            The first ``k_linear`` elements are the coefficients.

        Returns
        -------
        L : float
            The value of the objective function

        References
        ----------
        See p.254 in [1]
        r   Nry   rG   )r"   rz   r   r   r   r   rH   r   r{   rI   rJ   rm   )r   r|   r0   r   r}   r~   LOO_ZXbr   iirT   r.   ZXb_jYxr   lts                    r   rs   zSemiLinear.cv_looX  sM   * F#1t}}%DMMN#DII&DJJ'002D334==?VVDIIq!!D&)$U+ 	0KBUAUA66'1%af-DTB		"BaR(,(?(?A(F'F  HHIKABEB$**R.2%)a//A	0 r   Nc           
      v   || j                   }nt        || j                        }|| j                  }nt        || j                        }t        j                  |      d   }t        j                  |f      }t        j                  || j                  f      }| j                  t        j                  || j                        dddf   z
  }t        |      D ][  }| j                  | j                  || j                  ||ddf         }|d   ||<   t        j                  |d         }	|	||ddf<   ] ||fS )z+Computes fitted values and marginal effectsNr   r   r   )r   r
   r   r   rV   r"   r#   r(   r   r{   r0   r)   rm   r   rK   )
r   exog_predictexog_nonparametric_predictr   r$   r   r.   rS   r   r   s
             r   r   zSemiLinear.fit  s(    99L(t}}EL%-)-)@)@&)67QSWSYSY)Z&"<=a@xx)*hh/0JJdff5af==~& 	Ayy!T-D-D.HA.N ! PHqkDGJJx{+EC1I	 Syr   c                     d}|dt        | j                        z   dz   z  }|dt        | j                        z   dz   z  }|d| j                  z   dz   z  }|dz  }|dz  }|S )r   z'Semiparamatric Partially Linear Model 
r   r   zNumber of samples:   N = r   r   r   r   r   s     r   r   zSemiLinear.__repr__  su    9+c$&&k9D@@+c$))n<tCC'$--7$>>3377r   )NNr   r`   r   r   r   r     s"    ,\+ 'R4r   r   )r_   numpyr"   scipyr   scipy.stats.mstatsr   statsmodels.nonparametric.apir   r   &statsmodels.nonparametric._kernel_baser   r   r	   r
   __all__r   r   r   r`   r   r   <module>r      sS   >   ) D4 4 :l l^ny nbW Wr   