o
    d8y                     @  sP  d Z ddlmZ ddlmZmZ ddlmZmZm	Z	 ddl
ZddlmZmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlm Z m!Z!m"Z" er`ddl#m$Z$ dyddZ%dzddZ&d{d|ddZ'g dZ(g d Z)d}d$d%Z*d~d)d*Z+	+				,				ddd:d;Z,	<		,		ddd=d>Z-dd?d@Z.	<		,				dddEdFZ/			dddGdHZ0	dddLdMZ1dddNdOZ2		P	dddSdTZ3ddVdWZ4	+			dddYdZZ5	ddd\d]Z6dd`daZ7e7		dddcddZ8e7		dddedfZ9e7dddgdhZ:e7dddidjZ;e8e9dkZ<dddndoZ=ddpdqZ>ddsdtZ?ddwdxZ@dS )z$
Routines for filling missing data.
    )annotations)partialwraps)TYPE_CHECKINGAnycastN)NaTalgoslib)	ArrayLikeAxisAxisIntFnpt)import_optional_dependency)infer_dtype_from)is_array_likeis_numeric_v_string_likeis_object_dtypeneeds_i8_conversion)is_valid_na_for_dtypeisnana_value_for_dtype)Indexmasknpt.NDArray[np.bool_]lengthintc                 C  s8   t | rt| |krtdt|  d| | | } | S )zJ
    Validate the size of the values passed to ExtensionArray.fillna.
    z'Length of 'value' does not match. Got (z)  expected )r   len
ValueError)valuer   r    r!   G/app/.heroku/python/lib/python3.10/site-packages/pandas/core/missing.pycheck_value_size1   s   r#   arrr   returnc           
      C  s   t |\}}tj||d}d}t| rd}t|  }t|}||  }tj| jtd}|D ]1}t| |r5q-|rItj| jtj	d}	| | |k|	|< n| |k}	t
|	tjsZ|	jtdd}	||	O }q-| ri|t| O }|S )a	  
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True

    Parameters
    ----------
    arr : ArrayLike
    values_to_mask: list, tuple, or scalar

    Returns
    -------
    np.ndarray[bool]
    )dtypeFT)r&   Zna_value)r   nparrayr   r   Zzerosshapeboolr   Zbool_
isinstancendarrayZto_numpyany)
r$   Zvalues_to_maskr&   Zpotential_naZarr_maskZna_maskZnonnar   xZnew_maskr!   r!   r"   mask_missing@   s,   



r/   Fmethod
str | Noneallow_nearestr*   c                 C  sv   | dv rd S t | tr|  } | dkrd} n| dkrd} ddg}d}|r+|d d}| |vr9td	| d
|  | S )N)NZasfreqZffillpadZbfillbackfillzpad (ffill) or backfill (bfill)nearestz(pad (ffill), backfill (bfill) or nearestzInvalid fill method. Expecting z. Got )r+   strlowerappendr   )r0   r2   Zvalid_methodsZ	expectingr!   r!   r"   clean_fill_methody   s    

r9   )lineartimeindexvalues)r5   zeroslinear	quadraticcubicbarycentrickroghspline
polynomialfrom_derivativespiecewise_polynomialpchipakimacubicspliner6   r<   r   c                 K  sh   | d}| dv r|d u rtdtt }| |vr$td| d|  d| dv r2|js2t|  d| S )	Norder)rD   rE   z7You must specify the order of the spline or polynomial.zmethod must be one of z. Got 'z
' instead.)rC   rG   rH   z4 interpolation requires that the index be monotonic.)getr   
NP_METHODS
SP_METHODSZis_monotonic_increasing)r0   r<   kwargsrK   validr!   r!   r"   clean_interp_method   s   
rQ   howis_valid
int | Nonec                C  s   |dv sJ t | dkrdS | jdkr|jdd}|dkr&|dd  }n|dkr9t | d |ddd	   }|| }|sAdS |S )
aG  
    Retrieves the index of the first valid value.

    Parameters
    ----------
    values : ndarray or ExtensionArray
    how : {'first', 'last'}
        Use this parameter to change between the first or last valid index.
    is_valid: np.ndarray
        Mask to find na_values.

    Returns
    -------
    int or None
    )firstlastr   N      axisrU   rV   )r   ndimr-   Zargmax)r=   rR   rS   ZidxposZ	chk_notnar!   r!   r"   find_valid_index   s   
r]   r3   forwarddata
np.ndarrayrZ   r   Index | Nonelimitlimit_direction
limit_area
fill_value
Any | NonecoercedowncastNonec
                 K  s   zt |}W n ty   d}Y nw |dur)|durtdt| ||||d dS |dus/J td| |||||||d|
 dS )z
    Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill.

    Notes
    -----
    Alters 'data' in-place.
    Nz&Cannot pass both fill_value and method)r0   rZ   rb   rd   )r_   r<   rZ   r0   rb   rc   rd   re   r!   )r9   r   interpolate_2d_interpolate_2d_with_fill)r_   r0   rZ   r<   rb   rc   rd   re   rg   rh   rO   mr!   r!   r"   interpolate_array_2d   s8   
	
rm   r:   c                   s   t |fi  t | jrt| jdd dkr%t|js#tddg d}	 |	vr<td|	 d d	d
urWddg}
 |
vrWtd|
 d dtjd
dt	|d fdd}t
|||  d
S )z
    Column-wise application of _interpolate_1d.

    Notes
    -----
    Alters 'data' in-place.

    The signature does differ from _interpolate_1d because it only
    includes what is needed for Block.interpolate.
    F)compatr;   zStime-weighted interpolation only works on Series or DataFrames with a DatetimeIndexr=   )r^   backwardZbothz*Invalid limit_direction: expecting one of z, got 'z'.Ninsideoutsidez%Invalid limit_area: expecting one of z, got .)Znobsrb   yvaluesr`   r%   ri   c                   s$   t d|  dd d S )NF)indicesrs   r0   rb   rc   rd   re   bounds_errorr!   )_interpolate_1d)rs   re   rt   rO   rb   rd   rc   r0   r!   r"   funcR  s   	
z'_interpolate_2d_with_fill.<locals>.func)rs   r`   r%   ri   )rQ   r   r&   r   r   r   r7   r	   Zvalidate_limit_index_to_interp_indicesr'   apply_along_axis)r_   r<   rZ   r0   rb   rc   rd   re   rO   Zvalid_limit_directionsZvalid_limit_areasrx   r!   rw   r"   rk     s@   

rk   c                 C  sb   | j }t|jr|d}|dkr|}ttj|}|S t|}|dv r/|jtjkr/t	
|}|S )zE
    Convert Index to ndarray of indices to pass to NumPy/SciPy.
    i8r:   )r=   r<   )_valuesr   r&   viewr   r'   r,   asarrayZobject_r
   Zmaybe_convert_objects)r<   r0   ZxarrZindsr!   r!   r"   ry   j  s   



ry   rt   rs   ru   rK   c	                 K  s  t |}
|
 }| sdS | rdS tt|
}t|d|d}|du r'd}tt|}t|d|d}|du r<t|}ttd| t|}|dkrV|tt	|
|dB }n|dkre|tt	|
d|B }ntt	|
||}|d	krx|||B O }n|d
kr|| | }||O }t
|}t|j}|r|d}|tv rt| | }t| |
 | | | || | ||
< nt| | || | |
 f||||d|	||
< |rtj||< dS tj||< dS )a  
    Logic for the 1-d interpolation.  The input
    indices and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argument.

    Notes
    -----
    Fills 'yvalues' in-place.
    NrU   rR   rS   r   rV   rX   r^   ro   rp   rq   r{   )r0   re   ru   rK   )r   r-   allsetr'   Zflatnonzeror]   ranger   _interp_limitsortedr   r&   r}   rM   ZargsortZinterp_interpolate_scipy_wrapperr   r    nan)rt   rs   r0   rb   rc   rd   re   ru   rK   rO   invalidrP   Zall_nansZfirst_valid_indexZ
start_nansZlast_valid_indexZend_nansZpreserve_nansZmid_nansZis_datetimelikeZindexerr!   r!   r"   rv     sf   





rv   c                 K  sn  | d}t d|d ddlm}	 t|}|	j|	jttd}
t| ddr1| j	
d	|
d	} }|d
kr;|	j|
d
< n|dkrDt|
d< n|dkrLt|
d< g d}||v rj|dkrZ|}|	j| ||||d}||}|S |dkrt|sv|dkr}td| |	j| |fd|i|}||}|S | jjs|  } |jjs| }|jjs| }|
| }|| ||fi |}|S )z
    Passed off to scipy.interpolate.interp1d. method is scipy's kind.
    Returns an array interpolated at new_x.  Add any new methods to
    the list in _clean_interp_method.
    z interpolation requires SciPy.scipy)extrar   interpolate)rB   rC   rF   rG   Z_is_all_datesFr{   rH   rI   rJ   )r5   r>   r?   r@   rA   rE   rE   )kindre   ru   rD   z;order needs to be specified and greater than 0; got order: k)r   r   r   r'   r~   Zbarycentric_interpolateZkrogh_interpolate_from_derivativesgetattrr|   ZastypeZpchip_interpolate_akima_interpolate_cubicspline_interpolateZinterp1dr   r   ZUnivariateSplineflagsZ	writeablecopy)r.   yZnew_xr0   re   ru   rK   rO   r   r   Zalt_methodsZinterp1d_methodsZterpZnew_yr!   r!   r"   r     sV   



r   derint | list[int] | Noneextrapolatec           	      C  s4   ddl m} |jj}|| |dd||d}||S )a  
    Convenience function for interpolate.BPoly.from_derivatives.

    Construct a piecewise polynomial in the Bernstein basis, compatible
    with the specified values and derivatives at breakpoints.

    Parameters
    ----------
    xi : array-like
        sorted 1D array of x-coordinates
    yi : array-like or list of array-likes
        yi[i][j] is the j-th derivative known at xi[i]
    order: None or int or array-like of ints. Default: None.
        Specifies the degree of local polynomials. If not None, some
        derivatives are ignored.
    der : int or list
        How many derivatives to extract; None for all potentially nonzero
        derivatives (that is a number equal to the number of points), or a
        list of derivatives to extract. This number includes the function
        value as 0th derivative.
     extrapolate : bool, optional
        Whether to extrapolate to ouf-of-bounds points based on first and last
        intervals, or to return NaNs. Default: True.

    See Also
    --------
    scipy.interpolate.BPoly.from_derivatives

    Returns
    -------
    y : scalar or array-like
        The result, of length R or length M or M by R.
    r   r   r[   rX   )Zordersr   )r   r   ZBPolyrF   reshape)	xiyir.   rK   r   r   r   r0   rl   r!   r!   r"   r   9  s   $r   c                 C  s(   ddl m} |j| ||d}|||dS )a[  
    Convenience function for akima interpolation.
    xi and yi are arrays of values used to approximate some function f,
    with ``yi = f(xi)``.

    See `Akima1DInterpolator` for details.

    Parameters
    ----------
    xi : array-like
        A sorted list of x-coordinates, of length N.
    yi : array-like
        A 1-D array of real values.  `yi`'s length along the interpolation
        axis must be equal to the length of `xi`. If N-D array, use axis
        parameter to select correct axis.
    x : scalar or array-like
        Of length M.
    der : int, optional
        How many derivatives to extract; None for all potentially
        nonzero derivatives (that is a number equal to the number
        of points), or a list of derivatives to extract. This number
        includes the function value as 0th derivative.
    axis : int, optional
        Axis in the yi array corresponding to the x-coordinate values.

    See Also
    --------
    scipy.interpolate.Akima1DInterpolator

    Returns
    -------
    y : scalar or array-like
        The result, of length R or length M or M by R,

    r   r   rY   )nu)r   r   ZAkima1DInterpolator)r   r   r.   r   rZ   r   Pr!   r!   r"   r   f  s   $r   
not-a-knotbc_typestr | tuple[Any, Any]c                 C  s(   ddl m} |j| ||||d}||S )aq  
    Convenience function for cubic spline data interpolator.

    See `scipy.interpolate.CubicSpline` for details.

    Parameters
    ----------
    xi : array-like, shape (n,)
        1-d array containing values of the independent variable.
        Values must be real, finite and in strictly increasing order.
    yi : array-like
        Array containing values of the dependent variable. It can have
        arbitrary number of dimensions, but the length along ``axis``
        (see below) must match the length of ``x``. Values must be finite.
    x : scalar or array-like, shape (m,)
    axis : int, optional
        Axis along which `y` is assumed to be varying. Meaning that for
        ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
        Default is 0.
    bc_type : string or 2-tuple, optional
        Boundary condition type. Two additional equations, given by the
        boundary conditions, are required to determine all coefficients of
        polynomials on each segment [2]_.
        If `bc_type` is a string, then the specified condition will be applied
        at both ends of a spline. Available conditions are:
        * 'not-a-knot' (default): The first and second segment at a curve end
          are the same polynomial. It is a good default when there is no
          information on boundary conditions.
        * 'periodic': The interpolated functions is assumed to be periodic
          of period ``x[-1] - x[0]``. The first and last value of `y` must be
          identical: ``y[0] == y[-1]``. This boundary condition will result in
          ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
        * 'clamped': The first derivative at curves ends are zero. Assuming
          a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
        * 'natural': The second derivative at curve ends are zero. Assuming
          a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
        If `bc_type` is a 2-tuple, the first and the second value will be
        applied at the curve start and end respectively. The tuple values can
        be one of the previously mentioned strings (except 'periodic') or a
        tuple `(order, deriv_values)` allowing to specify arbitrary
        derivatives at curve ends:
        * `order`: the derivative order, 1 or 2.
        * `deriv_value`: array-like containing derivative values, shape must
          be the same as `y`, excluding ``axis`` dimension. For example, if
          `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
          the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
          and have the shape (n0, n1).
    extrapolate : {bool, 'periodic', None}, optional
        If bool, determines whether to extrapolate to out-of-bounds points
        based on first and last intervals, or to return NaNs. If 'periodic',
        periodic extrapolation is used. If None (default), ``extrapolate`` is
        set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.

    See Also
    --------
    scipy.interpolate.CubicHermiteSpline

    Returns
    -------
    y : scalar or array-like
        The result, of shape (m,)

    References
    ----------
    .. [1] `Cubic Spline Interpolation
            <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
            on Wikiversity.
    .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
    r   r   )rZ   r   r   )r   r   ZCubicSpline)r   r   r.   rZ   r   r   r   r   r!   r!   r"   r     s
   M
r   r=   c                 C  s   t | }| }| sTt| d|d}|du rd}t| d|d}|du r't| }t| ||d |dkr;d|||d	 < n|d
krMd |d|< ||d	 d< tj| |< dS dS )a  
    Apply interpolation and limit_area logic to values along a to-be-specified axis.

    Parameters
    ----------
    values: np.ndarray
        Input array.
    method: str
        Interpolation method. Could be "bfill" or "pad"
    limit: int, optional
        Index limit on interpolation.
    limit_area: str
        Limit area for interpolation. Can be "inside" or "outside"

    Notes
    -----
    Modifies values in-place.
    rU   r   Nr   rV   )r0   rb   rp   FrX   rq   )r   r   r]   r   rj   r'   r   )r=   r0   rb   rd   r   rS   rU   rV   r!   r!   r"   _interpolate_with_limit_area  s(   r   r   c                 C  s   |durt tt|||d||  dS |dkrdd ndd }| jdkr6|dkr,td| td	| j } t	|}|| }|d
krJt
||d dS t||d dS )a  
    Perform an actual interpolation of values, values will be make 2-d if
    needed fills inplace, returns the result.

    Parameters
    ----------
    values: np.ndarray
        Input array.
    method: str, default "pad"
        Interpolation method. Could be "bfill" or "pad"
    axis: 0 or 1
        Interpolation axis
    limit: int, optional
        Index limit on interpolation.
    limit_area: str, optional
        Limit area for interpolation. Can be "inside" or "outside"

    Notes
    -----
    Modifies values in-place.
    N)r0   rb   rd   r   c                 S  s   | S Nr!   r.   r!   r!   r"   <lambda>I  s    z interpolate_2d.<locals>.<lambda>c                 S  s   | j S r   )Tr   r!   r!   r"   r   I  s    rX   z0cannot interpolate on a ndim == 1 with axis != 0rX   r3   rb   )r'   rz   r   r   r\   AssertionErrorr   tupler)   r9   _pad_2d_backfill_2d)r=   r0   rZ   rb   rd   ZtransfZtvaluesr!   r!   r"   rj     s0   	
rj   npt.NDArray[np.bool_] | Nonec                 C  s    |d u rt | }|tj}|S r   )r   r}   r'   Zuint8)r=   r   r!   r!   r"   _fillna_prep]  s   r   rx   r   c                   s    t  d fdd	}tt|S )z>
    Wrapper to handle datetime64 and timedelta64 dtypes.
    Nc                   sP   t | jr!|d u rt| } | d||d\}}|| j|fS  | ||dS )Nr{   )rb   r   )r   r&   r   r}   )r=   rb   r   resultrx   r!   r"   new_funcn  s   
z&_datetimelike_compat.<locals>.new_funcNN)r   r   r   )rx   r   r!   r   r"   _datetimelike_compati  s   
r   (tuple[np.ndarray, npt.NDArray[np.bool_]]c                 C  "   t | |}tj| ||d | |fS Nr   )r   r	   Zpad_inplacer=   rb   r   r!   r!   r"   _pad_1d}     
r   c                 C  r   r   )r   r	   Zbackfill_inplacer   r!   r!   r"   _backfill_1d  r   r   c                 C  8   t | |}t| jrtj| ||d | |fS 	 | |fS r   )r   r'   r   r)   r	   Zpad_2d_inplacer   r!   r!   r"   r        
r   c                 C  r   r   )r   r'   r   r)   r	   Zbackfill_2d_inplacer   r!   r!   r"   r     r   r   r3   r4   rX   r\   c                 C  s&   t | } |dkrt|  S ttd|  S )NrX   r   )r9   _fill_methodsr   r   )r0   r\   r!   r!   r"   get_fill_func  s   r   c                 C  s   t | ddS )NT)r2   )r9   )r0   r!   r!   r"   clean_reindex_fill_method  s   r   r   c                   s   t |  t }t } fdd}|dur'|dkr"tt| d }n|| |}|durN|dkr1|S t|| ddd |}t d t| }|dkrN|S ||@ S )ak  
    Get indexers of values that won't be filled
    because they exceed the limits.

    Parameters
    ----------
    invalid : np.ndarray[bool]
    fw_limit : int or None
        forward limit to index
    bw_limit : int or None
        backward limit to index

    Returns
    -------
    set of indexers

    Notes
    -----
    This is equivalent to the more readable, but slower

    .. code-block:: python

        def _interp_limit(invalid, fw_limit, bw_limit):
            for x in np.where(invalid)[0]:
                if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                    yield x
    c                   s`   t | }t| |d d}tt|d | tt| d |d    dkd B }|S )NrX   r   )min_rolling_windowr   r   r'   whereZcumsum)r   rb   ZwindowedidxNr!   r"   inner  s   
"z_interp_limit.<locals>.innerNr   r[   rX   )r   r   r'   r   listr~   )r   Zfw_limitZbw_limitZf_idxZb_idxr   Z	b_idx_invr!   r   r"   r     s    
r   awindowc                 C  sJ   | j dd | j d | d |f }| j| jd f }tjjj| ||dS )z
    [True, True, False, True, False], 2 ->

    [
        [True,  True],
        [True, False],
        [False, True],
        [True, False],
    ]
    Nr[   rX   )r)   strides)r)   r   r'   r
   Zstride_tricksZ
as_strided)r   r   r)   r   r!   r!   r"   r     s   $r   )r   r   r   r   )r$   r   r%   r   )F)r0   r1   r2   r*   )r0   r6   r<   r   r%   r6   )rR   r6   rS   r   r%   rT   )	r3   r   NNr^   NNFN)r_   r`   r0   r6   rZ   r   r<   ra   rb   rT   rc   r6   rd   r1   re   rf   rg   r*   rh   r1   r%   ri   )r:   Nr^   NN)r_   r`   r<   r   rZ   r   r0   r6   rb   rT   rc   r6   rd   r1   re   rf   r%   ri   )r<   r   r0   r6   r%   r`   )r:   Nr^   NNFN)rt   r`   rs   r`   r0   r1   rb   rT   rc   r6   rd   r1   re   rf   ru   r*   rK   rT   r%   ri   )NFN)ru   r*   )Nr   F)r   r   r   r*   )r   r   )r   r   rZ   r   )r   r   N)rZ   r   r   r   )
r=   r`   r0   r6   rb   rT   rd   r1   r%   ri   )r3   r   NN)r=   r`   r0   r6   rZ   r   rb   rT   rd   r1   r%   ri   r   )r   r   r%   r   )rx   r   r%   r   r   )r=   r`   rb   rT   r   r   r%   r   )r=   r`   r   r   )r   r   r   )r\   r   )r%   r1   )r   r   )r   r   r   r   r%   r   )A__doc__
__future__r   	functoolsr   r   typingr   r   r   numpyr'   Zpandas._libsr   r	   r
   Zpandas._typingr   r   r   r   r   Zpandas.compat._optionalr   Zpandas.core.dtypes.castr   Zpandas.core.dtypes.commonr   r   r   r   Zpandas.core.dtypes.missingr   r   r   Zpandasr   r#   r/   r9   rM   rN   rQ   r]   rm   rk   ry   rv   r   r   r   r   r   rj   r   r   r   r   r   r   r   r   r   r   r   r!   r!   r!   r"   <module>   s    

9

+9
RqN-/
V1H





?