
    'YHh;<                         d Z ddlZddlZddlZddlmZ ddl	m
Z
 d ZddZddZddZdd	Zd
 ZddZd Zd Zd ZddZd Zd Zd Z G d de      ZddZddZy)z
Utility functions models code
    N)_is_using_pandas)
array_likec                 ~    t        | t              r| S t        | t              r| j                  d      S t        |       S )Nlatin1)
isinstancestrbytesdecode)ss    P/var/www/html/planif/env/lib/python3.12/site-packages/statsmodels/tools/tools.pyasstr2r      s3    !S	Au	xx!!1v    c                 @    i }t        |       D ]  \  }}||||z   <    |S )zd
    Helper function to create a dictionary mapping a column number
    to the name in tmp_arr.
    )	enumerate)tmp_arroffsetcol_mapicol_names        r   _make_dictnamesr      s4    
 G ) '8&F
'Nr   c                    t        j                  |       } | j                  dk(  r	| dddf   } |t        j                  |      }|j                  dk(  r	|dddf   }t        j                  t        j
                  |       j                  |       t        j
                  |      j                  |             }| |   ||   fS t        j
                  |       j                  |       }| |   S )a  
    Returns views on the arrays Y and X where missing observations are dropped.

    Y : array_like
    X : array_like, optional
    axis : int
        Axis along which to look for missing observations.  Default is 1, ie.,
        observations in rows.

    Returns
    -------
    Y : ndarray
        All Y where the
    X : ndarray

    Notes
    -----
    If either Y or X is 1d, it is reshaped to be 2d.
       N)npasarrayndimarraylogical_andisnanany)YXaxiskeepidxs       r   drop_missingr$       s    ( 	

1Avv{agJ}HHQK66Q;!T'
A.."((1+//$"7!7"$((1+//$"7!79z1W:%%88A;??4((zr   c                     t        d      )ax  
    Construct a dummy matrix from categorical variables

    .. deprecated:: 0.12

       Use pandas.get_dummies instead.

    Parameters
    ----------
    data : array_like
        An array, Series or DataFrame.  This can be either a 1d vector of
        the categorical variable or a 2d array with the column specifying
        the categorical variable specified by the col argument.
    col : {str, int, None}
        If data is a DataFrame col must in a column of data. If data is a
        Series, col must be either the name of the Series or None. For arrays,
        `col` can be an int that is the (zero-based) column index
        number.  `col` can only be None for a 1d array.  The default is None.
    dictnames : bool, optional
        If True, a dictionary mapping the column number to the categorical
        name is returned.  Used to have information about plain arrays.
    drop : bool
        Whether or not keep the categorical variable in the returned matrix.

    Returns
    -------
    dummy_matrix : array_like
        A matrix of dummy (indicator/binary) float variables for the
        categorical data.
    dictnames :  dict[int, str], optional
        Mapping between column numbers and categorical names.

    Notes
    -----
    This returns a dummy variable for *each* distinct variable.  If a
    a DaataFrame is provided, the names for the new variable is the
    old variable name - underscore - category name.  So if the a variable
    'vote' had answers as 'yes' or 'no' then the returned array would have to
    new variables-- 'vote_yes' and 'vote_no'.  There is currently
    no name checking.

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm

    Univariate examples

    >>> import string
    >>> string_var = [string.ascii_lowercase[0:5],
    ...               string.ascii_lowercase[5:10],
    ...               string.ascii_lowercase[10:15],
    ...               string.ascii_lowercase[15:20],
    ...               string.ascii_lowercase[20:25]]
    >>> string_var *= 5
    >>> string_var = np.asarray(sorted(string_var))
    >>> design = sm.tools.categorical(string_var, drop=True)

    Or for a numerical categorical variable

    >>> instr = np.floor(np.arange(10,60, step=2)/10)
    >>> design = sm.tools.categorical(instr, drop=True)

    With a structured array

    >>> num = np.random.randn(25,2)
    >>> struct_ar = np.zeros((25,1),
    ...                      dtype=[('var1', 'f4'),('var2', 'f4'),
    ...                             ('instrument','f4'),('str_instr','a5')])
    >>> struct_ar['var1'] = num[:,0][:,None]
    >>> struct_ar['var2'] = num[:,1][:,None]
    >>> struct_ar['instrument'] = instr[:,None]
    >>> struct_ar['str_instr'] = string_var[:,None]
    >>> design = sm.tools.categorical(struct_ar, col='instrument', drop=True)

    Or

    >>> design2 = sm.tools.categorical(struct_ar, col='str_instr', drop=True)
    zcategorical has been removed)NotImplementedError)datacol	dictnamesdrops       r   categoricalr+   G   s    ` <
==r   c                    t        | d      rddlm}  || d||      S t        j                  |       }|j
                  }|dk(  r
|dddf   }n|j
                  dkD  rt        d      t        j                  |d	      dk(  }|t        j                  |d
k7  d	      z  }|j                         rx|dk(  r|S |dk(  rl|dk(  rt        d      t        j                  |j                  d         }dj                  ||   D cg c]  }t        |       c}      }	t        d|	 d      t        j                  |j                  d         |g}|r|n|ddd   }t        j                  |      S c c}w )aq  
    Add a column of ones to an array.

    Parameters
    ----------
    data : array_like
        A column-ordered design matrix.
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if any column has a constant value. Using 'add' will add a
        column of 1s if a constant column is present.

    Returns
    -------
    array_like
        The original values with a constant (column of ones) as the first or
        last column. Returned value type depends on input type.

    Notes
    -----
    When the input is a pandas Series or DataFrame, the added column's name
    is 'const'.
    Nr   )	add_trendc)trendprependhas_constantr      z)Only implemented for 2-dimensional arraysr"           skipraisezdata is constant.,z
Column(s) z are constant.)r   statsmodels.tsa.tsatoolsr-   r   r   r   
ValueErrorptpallr   arangeshapejoinr   onescolumn_stack)
r'   r0   r1   r-   xr   is_nonzero_constcolumnsr.   colss
             r   add_constantrF      sX   8 d#6S'UU 	

4A66DqyagJ	
!DEEvvaa(A-qCxa006!HW$qy !455))AGGAJ/xx9I1J KAQ KL :dV>!BCC		a A!DbD'A??1 !Ls    E-c                    t        | dd      } t        |dd      }| j                  dk(  r	| dddf   n| } | j                  d   |j                  d   k7  rt        d|j                  d   z        t	        j
                  | |g      }t        j                  j                  |      t        j                  j                  |      k7  ry	y
)a6  
    True if (Q, P) contrast `c` is estimable for (N, P) design `d`.

    From an Q x P contrast matrix `C` and an N x P design matrix `D`, checks if
    the contrast `C` is estimable by looking at the rank of ``vstack([C,D])``
    and verifying it is the same as the rank of `D`.

    Parameters
    ----------
    c : array_like
        A contrast matrix with shape (Q, P). If 1 dimensional assume shape is
        (1, P).
    d : array_like
        The design matrix, (N, P).

    Returns
    -------
    bool
        True if the contrast `c` is estimable on design `d`.

    Examples
    --------
    >>> d = np.array([[1, 1, 1, 0, 0, 0],
    ...               [0, 0, 0, 1, 1, 1],
    ...               [1, 1, 1, 1, 1, 1]]).T
    >>> isestimable([1, 0, 0], d)
    False
    >>> isestimable([1, -1, 0], d)
    True
    r.   r2   )maxdimdr   r   NzContrast should have %d columnsFT)r   r   r>   r:   r   vstacklinalgmatrix_rank)r.   rI   news      r   isestimablerO      s    > 	1c!$A1c"Affk$'
qAwwqzQWWQZ:QWWQZGHH
))QF
C	yyS!RYY%:%:1%==r   c           	      v   t        j                  |       } | j                         } t         j                  j	                  | d      \  }}}t        j
                  |      }|j                  d   }|j                  d   }|t         j                  j                  |      z  }t        t        ||            D ]  }	||	   |kD  rd||	   z  ||	<   d||	<    t        j                  t        j                  |      t        j                  |ddt         j                  f   t        j                  |                  }
|
|fS )z}
    Return the pinv of an array X as well as the singular values
    used in computation.

    Code adapted from numpy.
    Fr   r         ?r4   N)r   r   	conjugaterL   svdcopyr>   maximumreducerangemindot	transposemultiplynewaxis)rB   rcondur   vts_origmncutoffr   ress              r   pinv_extendedre      s    	

1A	Ayy}}Q&HAq"WWQZF	
A
ARZZ&&q))F3q!9 Q4&=ad7AaDAaD	
 &&b!2;;qBJJ/?/1||A$@ AC;r   c                 l   t        j                  |       } t        j                  | t         j                        }t        j                  | j
                        }| }||   | j
                  |   dkD  z  ||<   d| j
                  |   z  |j
                  |<   t         j                  |j
                  |<   |S )z
    Reciprocal of an array with entries less than or equal to 0 set to 0.

    Parameters
    ----------
    x : array_like
        The input array.

    Returns
    -------
    ndarray
        The array with 0-filled reciprocals.
    dtyper   rQ   r   r   
zeros_likefloat64r   flatnan)rB   outnansposs       r   reciprrq     s     	

1A
--
,C88AFFD%C3x166#;?+CH!&&+%CHHSMVVCHHTNJr   c                 l   t        j                  |       } t        j                  | t         j                        }t        j                  | j
                        }| }||   | j
                  |   dk7  z  ||<   d| j
                  |   z  |j
                  |<   t         j                  |j
                  |<   |S )z
    Reciprocal of an array with entries less than 0 set to 0.

    Parameters
    ----------
    x : array_like
        The input array.

    Returns
    -------
    ndarray
        The array with 0-filled reciprocals.
    rg   r   rQ   ri   )rB   rn   ro   non_zeros       r   recipr0rt   /  s     	

1A
--
,C88AFFDuH!(+qvvh/?1/DEHXqvvh//CHHXVVCHHTNJr   c                     t         j                  j                  | dz  d      }t        j                  |      D cg c]  }| dd|f    }}t        j                  t        j
                  |            S c c}w )z
    Erase columns of zeros: can save some time in pseudoinverse.

    Parameters
    ----------
    matrix : ndarray
        The array to clean.

    Returns
    -------
    ndarray
        The cleaned array.
    r2   r   N)r   addrV   flatnonzeror   rZ   )matrixcolsumr   vals       r   clean0r{   G  s]     VV]]619a(F!#!7
8A6!Q$<
8C
888BLL%&& 9s   A5c                    |t         j                  j                  |       }t         j                  j                  | d      \  }}}t        j                  |      }|ddd   }g }t        |      D ]  }|j                  |dd||   f           t        j                  t        j                  |            j                  t         j                        S )a  
    Return an array whose column span is the same as x.

    Parameters
    ----------
    x : ndarray
        The array to adjust, 2d.
    r : int, optional
        The rank of x. If not provided, determined by `np.linalg.matrix_rank`.

    Returns
    -------
    ndarray
        The array adjusted to have full rank.

    Notes
    -----
    If the rank of x is known it can be specified as r -- no check
    is made to ensure that this really is the rank of x.
    NF)full_matricesr8   )r   rL   rM   rS   argsortrW   appendr   rZ   astyperk   )rB   rvrI   r^   ordervaluer   s           r   fullrankr   Z  s    * 	yII!!!$iimmAUm3GAq!JJqME$B$KEE1X %Qq%({^$%::bll5)*11"**==r   c                 D    t        |      }d||<   | j                  |      S )aW  
    Unsqueeze a collapsed array.

    Parameters
    ----------
    data : ndarray
        The data to unsqueeze.
    axis : int
        The axis to unsqueeze.
    oldshape : tuple[int]
        The original shape before the squeeze or reduce operation.

    Returns
    -------
    ndarray
        The unsqueezed array.

    Examples
    --------
    >>> from numpy import mean
    >>> from numpy.random import standard_normal
    >>> x = standard_normal((3,4,5))
    >>> m = mean(x, axis=1)
    >>> m.shape
    (3, 5)
    >>> m = unsqueeze(m, 1, x.shape)
    >>> m.shape
    (3, 1, 5)
    >>>
    r   )listreshape)r'   r"   oldshapenewshapes       r   	unsqueezer   {  s%    > H~HHTN<<!!r   c                 ^   t        j                  t        j                  |       |dk7        }t        j                  | dk7  t        j                  |            }||z   }t        j                  t        j                  |       t        j                  |            }t         j                  ||<   |S )z
    Returns np.dot(left_matrix, right_matrix) with the convention that
    nan * 0 = 0 and nan * x = nan if x != 0.

    Parameters
    ----------
    A, B : ndarray
    r   )r   rY   r   
nan_to_numrm   )ABshould_be_nan_1should_be_nan_2should_be_nanCs         r   nan_dotr     s}     ffRXXa[163Offa1frxx{3O#o5M 	r}}Qq!12AvvAmHr   c                     t        | d|       S )z
    Gets raw results back from wrapped results.

    Can be used in plotting functions or other post-estimation type
    routines.
    _results)getattr)resultss    r   maybe_unwrap_resultsr     s     7J00r   c                   "     e Zd ZdZ fdZ xZS )Buncha  
    Returns a dict-like object with keys accessible via attribute lookup.

    Parameters
    ----------
    *args
        Arguments passed to dict constructor, tuples (key, value).
    **kwargs
        Keyword agument passed to dict constructor, key=value.
    c                 2    t        |   |i | | | _        y N)super__init____dict__)selfargskwargs	__class__s      r   r   zBunch.__init__  s    $)&)r   )__name__
__module____qualname____doc__r   __classcell__)r   s   @r   r   r     s    	 r   r   c                 *   | | S t        | d      }| j                  dk(  r|r| | j                  fS | dfS | j                  dkD  rt        d      |r| j                  nd}|rt        j                  |       dddf   |fS t        j                  |       |fS )a  

    Parameters
    ----------
    x : ndarray, Series, DataFrame or None
        Input to verify dimensions, and to transform as necesary
    ndarray : bool
        Flag indicating whether to always return a NumPy array. Setting False
        will return an pandas DataFrame when the input is a Series or a
        DataFrame.

    Returns
    -------
    out : ndarray, DataFrame or None
        array or DataFrame with 2 dimensiona.  One dimensional arrays are
        returned as nobs by 1. None is returned if x is None.
    names : list of str or None
        list containing variables names when the input is a pandas datatype.
        Returns None if the input is an ndarray.

    Notes
    -----
    Accepts None for simplicity
    Nr2   zx mst be 1 or 2-dimensional.)	r   r   rD   r:   namer   r   pd	DataFrame)rB   ndarray	is_pandasr   s       r   
_ensure_2dr     s    2 	y D)Ivv{aii<d7N	
!788166DDzz!}QW%t++||A$$r   c                    t        | dd      } |dk(  r|| ddt        j                  | dk7  d      f   } | t        j                  | dz  j	                  d            z  } | j
                  | z  } t        j                  j                  | |d	      S |d
k(  rt        j                  j                  | d      \  }t        j                  t        j                  |            }|9|d   | j                  d   z  t        j                  t              j                  z  }t!        ||kD  j	                               S t        j                  j                  | |      S )a(  
    Matrix rank calculation using QR or SVD

    Parameters
    ----------
    m : array_like
        A 2-d array-like object to test
    tol : float, optional
        The tolerance to use when testing the matrix rank. If not provided
        an appropriate value is selected.
    method : {"ip", "qr", "svd"}
        The method used. "ip" uses the inner-product of a normalized version
        of m and then computes the rank using NumPy's matrix_rank.
        "qr" uses a QR decomposition and is the default. "svd" defers to
        NumPy's matrix_rank.

    Returns
    -------
    int
        The rank of m.

    Notes
    -----
    When using a QR factorization, the rank is determined by the number of
    elements on the leading diagonal of the R matrix that are above tol
    in absolute value.
    ra   r2   rJ   ipNr   r3   T)tol	hermitianqrr   )moder   )r   )r   r   r   sqrtsumTrL   rM   scipyr   absdiagr>   finfofloatepsint)ra   r   methodr   abs_diags        r   rM   rM     s   8 	1c"A~aQQ''(aQ((CC!Gyy$$QC4$@@	4\\__QS_)66"''!*%;1+
*RXXe_-@-@@CHsN'')**yy$$QC$00r   )r   )Nr   )NFF)Tr5   )gV瞯<r   )F)Nr   )r   numpyr   pandasr   scipy.linalgr   statsmodels.tools.datar   statsmodels.tools.validationr   r   r   r$   r+   rF   rO   re   rq   rt   r{   r   r   r   r   dictr   r   rM    r   r   <module>r      s       3 3 NP>h7t'T000'&>B!"H.1D  (%V)1r   