o
    d                     @   sD  d Z ddlmZ ddlZddlZddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ dd Zdd Zg Zg Zd	D ]LZed
dD ]D\ZZeeeeZdededed  d dfZddddgfZeeeD ]\ZZ e!eee eef e!e de de  qiqAq:ej"j#ej"j$deedej"$dddgej"$dddgej"$dddgej"$dddgej"$dddgdd Z%ej"$d ddgd!d" Z&ej"$d#d$d%gg d&gd'd( Z'ej"$d#d$d%gg d&gd)d* Z(d+d, Z)d-d. Z*ej+d/d0 Z,d1d2 Z-d3d4 Z.d5d6 Z/d7d8 Z0ej"$d9g d:ej"$dddgej"$d;g d<ej"$d=ddgej"$d>ddgd?d@ Z1ej"$dAddgej"$dBddg dCg dDg dEfddg dFg dGg dHfddg dIg dJg dHfgdKdL Z2ej+dMdN Z3ej"$dOddddPg dQg dRfddddPg dSg dTfddddPg dUg dVfddddWg dXg dVfgdYdZ Z4ej+d[d\ Z5ej"$d]ddg d^g d_fddg d`g dafddg dbg dcfddg ddg defgdfdg Z6ej+dhdi Z7ej"$djdddgej8g dkg dldmfdg dneedgeg doedpdqej9ggg drg dsg dtgg dldufgej"$dddgdvdw Z:ej"$d=ddgej"$dxddgej"$dyddPej;g dzej<d{fddWe;g d|fgd}d~ Z=dd Z>ej"$d=ddgej"$dyddPej;g dzej<d{fddWe;g d|fgdd Z?ej"$d=ddgej"$dyddPej;g dej<d{fddWe;g dfgdd Z@ej"$d=ddgej"$ddg dfdg dfgej"$dyddPej;g dej<d{fddWe;g dfgdd ZAej"$d=ddgej"$dxddgej"$dyddPej;g dzej<d{fddWe;g d|fgdd ZBej"$dddPg dfddWg defgdd ZCej"$ddeDdg dfdeDddg g dfgej"$d=ddgdd ZEej"$dddgdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKej"$d ddgdd ZLdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    )productN)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetimec                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer	   tmassert_series_equal)dfresultZdf_mi_expectedZmi_expectedexpected r!   Z/app/.heroku/python/lib/python3.10/site-packages/pandas/tests/groupby/test_value_counts.py.tests_value_counts_index_names_category_column   s   
r#   c                 C   s   t jd tddd}tt jtd|t j||t jd|d |d}| rj|d d	|d< t j	|j
dd d
df< t j	|j
dd ddf< t j	|j
dd ddf< t j	|j
dd ddf< t j	|j
dd ddf< |S )Ni  z
2015-08-24
   )Zperiodsabcdr   )1st2nd3rdr(   float   r&         r'            	   )nprandomseedr
   r   choicelistrandintr   nanloc)	seed_nansnmdaysframer!   r!   r"   seed_df/   s    r>   TF)d   i  )      rA   r(   r      r&   r'   -zdf, keys, bins, n, m)idsisortTFznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   dd }|||	|
|d}| j ||d}|d jdi |}| j ||d}|d jtjfi |}|jjd d dg |j_||}t|||f\}}t	|
 |
  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r5   mapr   Zget_level_valuesrangeZnlevelsr   from_arraysrL   )r   Zarrr!   r!   r"   rebuild_index^   s   z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerH   rI   rJ   binsrH   r(   r!   )r   r   applyr	   r   rL   renamerM   r   r   
sort_index)r   keysrR   r:   r;   rF   rQ   r   rH   rI   rJ   rP   kwargsgrleftrightr!   r!   r"    test_series_groupby_value_countsT   s   

r]   utcc                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NiGI]i)J]iJ]iK]i)<M]iU=M]iN]applera   bananarb   orangerc   pear	TimestampFoodr+   rf   sr^   unitDatetime1Dfreqkeyrg   r   )r   dropr   r   r   r   rW   rU   r	   r   rL   rV   r   r   )r^   r   dfgr   r    r!   r!   r"   -test_series_groupby_value_counts_with_groupery   s   	
rr   r   AB)rs   rt   Cc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   rT   r   )dtyper   rK   )r   r   r   r	   rv   r   rO   lenr   r   r   r   r   rq   r   r    r!   r!   r"   &test_series_groupby_value_counts_empty   s   
ry   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   rT   )r   rN   rw   r   r   r   r   rx   r!   r!   r"   (test_series_groupby_value_counts_one_row   s
   r{   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r~   Zorderedrv   r   rz   r   r   )r	   r   r   r   r   rO   r1   arrayr   r   r   )rh   r   r    r!   r!   r"   /test_series_groupby_value_counts_on_categorical   s   r   c                  C   s   t g dg dg dd} | jddgddd	 }|jdd}td
dgddgg dgg dg dg dgg dd}tg d|dd}t|| d S )Nmaler   r   r   r   r   lowmediumhighr   r   r   r   FRr   r   r   r   r   	educationr   r   r   FrS   r   r   r   r   r   )r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   rC   r   rC   r   r   r   levelscodesrL   r   r   r   rC   r   r   r   )r   r   r   r   r	   r   r   )r   gbr   r   r    r!   r!   r"   (test_series_groupby_value_counts_no_sort   s   r   c                   C   s   t g dg dg ddS )Nr   r   r   r   r   r!   r!   r!   r"   education_df   s   r   c                 C   sJ   | j ddd}tjtdd |  W d    d S 1 sw   Y  d S )Nr   r   axisr   match)r   pytestraisesNotImplementedErrorr   r   gpr!   r!   r"   	test_axis   s   
"r   c                 C   sL   |  d}tjtdd |jdgd W d    d S 1 sw   Y  d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   r   r!   r!   r"   test_bad_subset   s   
"r   c                 C   sN   |  dddg jdd}tg dtjg dg dd	d
d}t|| d S )Nr   r   r   TrQ   )      ?      ?r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   rG   r   )r   r   r	   r   from_tuplesr   r   )r   r   r    r!   r!   r"   
test_basic   s   
r   c                 C   s   | | j |||dS )NrQ   rH   rI   )r   )r   rX   rQ   rH   rI   r!   r!   r"   _frame_value_counts  s   r   r   columnr   functionzsort, ascending))FN)TTr?   as_indexr=   c                    s  d d j  fddd| } j||d}	|	ddg j|||d}
|r|	tddg|||}|r9t|
| d S |r=d	nd
}| jd|idd}|dkrc|jddidd}t	
|d dd|d< n|dkrp|d dk|d< nt	
|d dd|d< t|
| d S  d d  d   d< |	d j|||d}||_|r|jjdd}|d jdjd|d< |d jdjd|d< |d= |jdd idd}t||_t|
| d S |dd|d jdjd |dd|d jdjd |d= t|
| d S )Nr   c                    s    d |  dkS )Nr   r   r!   )xr   r!   r"   <lambda>+      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>r   )byr   r   r   r   rG   r   r   r   r   r   level_0r   r   r   rD   ZbothFr   rC   )valuesr   r   rU   r   r   r   reset_indexrV   r1   whereassert_frame_equalr   r   Zto_framestrsplitgetr   r   insert)r   r   rQ   r   rH   rI   r   r=   r   r   r   r    Zindex_framer!   r   r"   $test_against_frame_and_seriesgroupby  sT   
""r   rQ   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   rC   r+      r   )r   r+   r   r+   r   )r   r+   r   rC   r   )r   rC   r   r   r   )r   r+   r+   r   r   )r   r   r+   rC   r   )r   r   rC   r   r   c           
         s   j ddgddd}|d j|||d}t }	dD ]  fdd	|D |	 < q|r7||	d
< |	d
  |  < n||	d< t||	 d S )Nr   r   F)r   rH   r   r   r   c                       g | ]}  | qS r!   r!   .0rowr   r   r!   r"   
<listcomp>t      z!test_compound.<locals>.<listcomp>rG   r   )r   r   r   r   r   )
r   rQ   rH   rI   expected_rowsZexpected_countZexpected_group_sizer   r   r    r!   r   r"   test_compound[  s   r   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )rC   r   r      )rC   r   r   r   ro   Znum_legsZ	num_wings)Zfalcondogcatantr   r   r!   r!   r!   r"   
animals_df}  s   r   z?sort, ascending, normalize, name, expected_data, expected_indexr   r   rC   r   )r   r   r   )rC   r   r   rC   r   r   r   r   rC   )r   )rC   r   r   r   )rC   r   r   )r   )r   rC   r   )r   rC   r   rG   )r   r   r   c           
      C   s^   | j |||d}t|tj|g dd|d}t|| | dj |||d}	t|	| d S )N)rH   rI   rQ   r   rK   r   ro   )r   r	   r   rO   r   r   r   )
r   rH   rI   rQ   r   expected_dataexpected_indexresult_framer    result_frame_groupbyr!   r!   r"   test_data_frame_value_counts  s   
r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r+   rC   rA   r/   r-   )rs   rt   ru   D)r1   r7   r   )r:   r!   r!   r"   nulls_df  s   r   z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   r+   rA   r-   r   r/   rC   r   )	r   r         ?r   r   r   r   r   r   )r   r   r+   rA   rC   r   )r   r   r   r   r   r   )r   r   rA   r-   r   r/   )r   r   r   r   r   r   )r   r   rA   )r   r   r   c           
         st   j ddg|d}|jdd|d}t }jD ]  fdd|D | < qt|}t||dd	}	t||	 d S )
Nrs   rt   )rJ   T)rQ   rH   rJ   c                    r   r!   r!   r   r   r   r!   r"   r     r   z,test_dropna_combinations.<locals>.<listcomp>rG   r   )	r   r   r   r   r   r   r	   r   r   )
r   Zgroup_dropnaZcount_dropnar   expected_valuesr   r   r   r   r    r!   r   r"   test_dropna_combinations  s   

r   c                 C   s    t g dg dd| | dgdS )Nr   )JohnAnner   BethSmithLouisero   Z
first_nameZmiddle_namer   )Znulls_fixturer!   r!   r"   names_with_nulls_df  s   
r   z%dropna, expected_data, expected_index)r   r   )r   r   )r   r   r   rK   r   )r   r   r   r   r   )r   r   r   r   )r   r   rC   rC   )rC   r   r   rC   r   c           	      C   s`   | j ||d}t|||d}|r|tt| }t|| | dj ||d}t|| d S )N)rJ   rQ   r   ro   )r   r	   r)   rw   r   r   r   )	r   rJ   rQ   r   r   r   r   r    r   r!   r!   r"   #test_data_frame_value_counts_dropna  s   !
r   observedznormalize, name, expected_data)rC   r   r   r   r   r   r   r   r   r   r   r   rv   )r   r   r           r   r   r   r   r   r   r   r   c                 C   s   |  djd||d}|j|d}tjg dg dd}t|||d}	td	D ]}
|	jjt	|	jj
|
 |
d
|	_q'|rCt||	 d S |	j|rIdndd}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r+   levelrG   r   r   )r   r   r   r   r   r	   rN   r   
set_levelsr   r   r   r   r   r   r   r   r   rQ   r   r   r   r   r   expected_seriesir    r!   r!   r"   =test_categorical_single_grouper_with_only_observed_categories  s.   


r   c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g dd|d}	t	d	D ] }
t
|	jj|
 }|
d
krI|| d jj}|	jj||
d|	_q2|r]t||	 d S |	j|d}t|| d S )Nr   r   ASIAr   r   r   rK   r   r+   r   r   r   )copyr   r   Zadd_categoriesr   r   r	   r   r   rN   r   r   r   Zset_categoriesr~   r   r   r   r   r   )r   r   r   r   rQ   r   r   r   r   r   r   Zindex_levelr    r!   r!   r"   !assert_categorical_single_grouperT  s.   
r  c              	   C   "   g d}t | |d||||d d S )Nr   Tr   r   r   r   rQ   r   r   r  r   r   rQ   r   r   r   r!   r!   r"   -test_categorical_single_grouper_observed_truew  s   
r  )rC   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   c              	   C   r  )N)r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   Fr  r  r  r!   r!   r"   .test_categorical_single_grouper_observed_false  s   ,
r  zobserved, expected_index)r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r	  r
  r  r  r  )r   r   rC   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r/||dk n|tj|g dd|d	}	td
D ]}
|	jj	t
|	jj|
 |
d|	_q@|r\t||	 d S |	j|rbdndd}t|| d S )Nr   r   r   r   r   r   )r   r   r   rK   r   rC   r   rG   r   r   r  r   r   r   r	   r   r   rN   r   r   r   r   r   r   r   r   )r   r   r   r   rQ   r   r   r   r   r   r   r    r!   r!   r"   "test_categorical_multiple_groupers  s2   7


r  c                 C   s   |   } | d d| d< | d d| d< | jd||d}|j|d}g d}t|tj|g dd	|d
}	tddD ]}
|	jj	t
|	jj|
 |
d|	_q;|rWt||	 d S |	j|r]dndd}t|| d S )Nr   r   r   r   r   r   r   r   rK   r   r   r+   r   rG   r   r   r  r   r!   r!   r"   test_categorical_non_groupersH  s0   

r  z*normalize, expected_label, expected_valuesr   c                 C   s   t g dg dd}|jg dddd gdd	}|jd
| d}t dtjg dtjddg ddg ddg d||i}t|| d S )Nr   r   rC   r+   )rs   rt   )r   rA   r   rs   c                 S   s   | dkrdS dS )Nr   r-   r/   r!   )r   r!   r!   r"   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   T)rH   rQ   r   )r   r   rA   r   r   Zlevel_2)r/   r/   r-   rt   )r   r+   rC   )r   r   r   r1   r   int_r   r   )rQ   expected_labelr   r   r   r   r    r!   r!   r"   test_mixed_groupings  s   		r  ztest, columns, expected_namesrepeatZabbde)r|   Ndr}   r}   er   r%   level_1)r|   Nr  r}   cr  c           
      C   s   t g dg dg|d}ddg}dtjddgtjd	d
g}|j||d }|r<tdtj||ddd}t	
|| d S dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )N)r   r+   rA   r-   r0   )rC   r   r   r/   r$   r   )r   r   r-   r+   rA   r0   )rC   r   r/   r   r   r$   r|   r   r   r   r  r  r   rK   r   r   c                 S   s   g | ]	}t |d g qS )r   )r5   r   r!   r!   r"   r     s    z0test_column_label_duplicates.<locals>.<listcomp>r  )r   r1   r   int64r   r   r	   r   r   r   r   r5   appendr   )
testr   Zexpected_namesr   r   r   rX   r   r    Zexpected_columnsr!   r!   r"   test_column_label_duplicates  s(   
r  znormalize, expected_labelc                 C   sn   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    d S 1 s0w   Y  d S )Nr  r|   r}   r   Fr  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rQ   r  r   msgr!   r!   r"   test_result_label_duplicates  s   	"r   c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nr|   r   r   rC   rK   r   r   )r   r   r1   r   r  r   r	   r   r   r   r   )r   r   r   r    r!   r!   r"   test_ambiguous_grouping  s   r!  c                  C   sj   t g dg ddg dd} d}tjt|d | djdgd	 W d    d S 1 s.w   Y  d S )
Nr|   r}   r  r   yr$  c1c2r   r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r&  r   r   r   r   r   r   r   r   r  r!   r!   r"   "test_subset_overlaps_gb_key_raises  
   "r+  c                  C   sj   t g dg ddg dd} d}tjt|d | djd	gd
 W d    d S 1 s.w   Y  d S )Nr"  r#  r%  r(  r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r&  c3r   r)  r*  r!   r!   r"   !test_subset_doesnt_exist_in_frame  r,  r.  c                  C   sp   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgddd}t|| d S )Nr"  r#  r%  r(  r   r   r   r'  r   r   rC   r   r$  rK   r   r   r   r   r   r	   r   rO   r   r   r   r   r    r!   r!   r"   test_subset  s   r1  c                  C   s   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg dddd}t|| d S )N)r|   r   r   )r}   r$  r$  r(  )r&  r'  r'  )r   r   r   r   r'  r   r   rC   r   r$  )Nr'  r'  rK   r   r   r/  r0  r!   r!   r"   test_subset_duplicate_columns  s   r2  c                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}| }tg d| d}|d  }t||g dgg dtdg dgg dd}t	d|dd}t
|| d S )Nr_   r`   re   r+   rf   rh   ri   rk   rl   rm   )z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)r^   )ra   rb   rc   rd   )r   r   r   rC   rC   r+   r   )r   r   r   rC   rC   r+   )rk   rf   rg   r   r   r   r   )r   rp   r   r   r   r   uniquer   rN   r	   r   r   )r^   r   r   r   datesZ
timestampsr   r    r!   r!   r"   test_value_counts_time_grouper  s*   	r5  )M__doc__	itertoolsr   numpyr1   r   Zpandasr   r   r   r   r   r   r	   r
   r   Zpandas._testingZ_testingr   r#   r>   ZbinnedrE   r9   r:   r;   r   ZarangemaxrR   rX   kr}   r  markZslowZparametrizer]   rr   ry   r{   r   r   Zfixturer   r   r   r   r   r   r   r   r   r   r   r   rO   r7   r   r   r  r   r  r  r  r  r  r  r5   r  r   r!  r+  r.  r1  r2  r5  r!   r!   r!   r"   <module>   s   ,$	




>





	
0#
&% %0


