o
    dX                     @   s  d Z ddlZddlmZ ddlZddlZddlmZ ddlZ	ddl
Z
ddlmZ ddlmZ ddlmZmZ ddlm  mZ ddlZddlmZ ddlmZ dd	lmZmZm Z m!Z!m"Z" zddl#Z#d
Z$W n e%yp   dZ$Y nw zddl&Z&d
Z'W n e%y   dZ'Y nw e
j(e
j)de
j*j+e' peddkddde
j)de
j*j+e$ dddgddd Z,e
j(dd Z-e
j(dd Z.e
j(dd Z/e
j(dd Z0e
j(dd  Z1e
j(ej2ej3j4ej2ej3j5ej2ej3j6ej7d!d"ej7d#d"ej7d$d"ej7d%d"gdd&d' Z8						
		
	(dEd)d*Z9d+d, Z:d-d. Z;d/d0 Z<d1d2 Z=d3d4 Z>d5d6 Z?d7d8 Z@d9d: ZAd;d< ZBG d=d> d>ZCG d?d@ d@eCZDG dAdB dBeCZEG dCdD dDeCZFdS )Fz test parquet compat     N)BytesIO)catch_warnings)
get_option)is_platform_windows)pa_version_under7p0pa_version_under8p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFfastparquetmode.data_managerarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C      | j S Nparamrequest r   P/app/.heroku/python/lib/python3.10/site-packages/pandas/tests/io/test_parquet.pyengine3   s   r   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr   r   r   r   paH   s   
r"   c                   C   s,   t s	td dS tddkrtd dS )Nzfastparquet is not installedr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr    r!   r   r   r   r   r   fpO   s   

r$   c                   C   s   t g dddS )N         fooAB)pd	DataFramer   r   r   r   	df_compatX   s   r/   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr&            @      @float64dtypeTFT20130101r(   periods)abdef)r-   r.   listrangenparange
date_range)dfr   r   r   df_cross_compat]   s   rF   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr0   r;   cr;   NrG   )   foo   bars   bazr)   barbazr&   r1   r(      u1r2   r3   r4   r5          @      @r7   r8   r9   Z20130103)stringZstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimedatetime_with_nat)r-   r.   r@   rB   nanrA   rC   astyperD   	TimestampZNaTr   r   r   r   df_fullo   s$   

r]   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   r   r   r   r   r   r   r   timezone_aware_date_list   s   r^   r'   c
                    s   pddip	i du r|r|d< |d<  fdd}
du rFt  |
|	 W d   dS 1 s?w   Y  dS |
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr   c              	      sr   t | D ]2}jfi  tdd tfi }W d    n1 s'w   Y  tj| d qd S )NTrecord)check_names
check_likecheck_dtype)rA   r   r   r   tmassert_frame_equal)repeat_actualrd   rc   rb   rE   expectedpathread_kwargswrite_kwargsr   r   compare   s   z!check_round_trip.<locals>.compare)re   ensure_clean)rE   r   rl   rn   rm   rk   rb   rc   rd   rg   ro   r   rj   r   check_round_trip   s   "

"rq   c                 C   s~   t r'ddlm} |j| dd}t|jjt|ksJ |jjt|ks%J dS ddlm	} |j	| dd}|j
jj|ks=J dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NF)Zvalidate_schemaZhive)partitioning)r   pyarrow.parquetparquetZParquetDatasetlenZ
partitionsZpartition_namessetZpyarrow.datasetdatasetrr   schemanames)rl   rk   pqrw   Zdsr   r   r   check_partition_names   s   
r{   c                 C   sD   d}t jt|d t| dd W d    d S 1 sw   Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr)   rL   )r    raises
ValueErrorrq   )r/   msgr   r   r   test_invalid_engine   s   "r   c                 C   :   t dd t|  W d    d S 1 sw   Y  d S )Nio.parquet.enginer   r-   option_contextrq   )r/   r"   r   r   r   test_options_py      
"r   c                 C   r   )Nr   r   r   )r/   r$   r   r   r   test_options_fp   r   r   c                 C   r   )Nr   autor   )r/   r$   r"   r   r   r   test_options_auto  r   r   c                 C   sP  t tdts	J t tdtsJ tdd# t tdts"J t tdts+J t tdts4J W d    n1 s>w   Y  tdd# t tdtsSJ t tdts\J t tdtseJ W d    n1 sow   Y  tdd$ t tdtsJ t tdtsJ t tdtsJ W d    d S 1 sw   Y  d S )Nr   r   r   r   )
isinstancer   r
   r	   r-   r   )r$   r"   r   r   r   test_options_get_engine	  s"   "r   c                  C   s  ddl m}  | d}| d}tsdnttjt|k }ts!dnttjt|k }to.| }to3| }|s|s|r\d| d}t	j
t|d td	 W d    n1 sVw   Y  nd
}t	j
t|d td	 W d    n1 stw   Y  |rd| d}t	j
t|d td	 W d    d S 1 sw   Y  d S d}t	j
t|d td	 W d    d S 1 sw   Y  d S d S d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.r|   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr   r   r   __version__r#   r   r    r~   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr}   r   r   r   "test_get_engine_auto_error_message  sD   






"
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S )Nr   r_   r   r;   r=   r   columns)re   rp   r   r   rf   rF   r"   r$   rE   rl   resultr   r   r   test_cross_engine_pa_fpH  s   
"r   c              	   C   s   | }t  M}|j||d d tdd' t||d}t || t||ddgd}t ||ddg  W d    n1 s>w   Y  W d    d S W d    d S 1 sVw   Y  d S )Nr   Tr`   r   r;   r=   r   )re   rp   r   r   r   rf   r   r   r   r   test_cross_engine_fp_paV  s   
"r   c                   @   s:   e Zd Zdd Zdd Zejjejddddd	 Z	d
S )Basec              	   C   s|   t  0}tj||d t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s7w   Y  d S )Nr|   r_   )re   rp   r    r~   r   )selfrE   r   excerr_msgrl   r   r   r   check_error_on_writee  s   
"zBase.check_error_on_writec              	   C   sx   t  .}t | t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s5w   Y  d S )Nr   )re   rp   external_error_raisedr   )r   rE   r   r   rl   r   r   r   check_external_error_on_writek  s   
"z"Base.check_external_error_on_writedhttps://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/parquet/simple.parquetT)urlZcheck_before_testc                 C   s.   |dkr	t | d}t|}t|| d S )Nr   r   )r    importorskipr   re   rf   )r   r/   r   r   rE   r   r   r   test_parquet_read_from_urlq  s   	
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r    marknetworkre   r   r   r   r   r   r   d  s    r   c                   @   s   e Zd Zdd Zdd Zejdg ddd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejjedddd  Zejd!g d"d#d$ Zd%S )&	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| qd S )Nr%   r&   r)   r8   z+to_parquet only supports IO with DataFrames)r-   Seriesr\   rB   r   r   r   )r   r   objr   r   r   r   
test_error  s   zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr0   r&   r1   rR   rU   r)   rL   )r-   r.   r@   rA   r   rq   )r   r   rE   r   r   r   test_columns_dtypes  s   
zTestBasic.test_columns_dtypesr_   )Ngzipsnappybrotlic                 C   sN   |dkr
t d n	|dkrt d tdg di}t||d|id d S )Nr   r   r+   r%   r_   rn   )r    r   r-   r.   rq   )r   r   r_   rE   r   r   r   test_compression  s   
zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr0   r&   r1   r   rR   r   rk   rm   )r-   r.   r@   rA   rq   )r   r   rE   rk   r   r   r   test_read_columns  s
   
zTestBasic.test_read_columnsc                 C   s   |dk}t dg di}t|| g dt jdddtdg d	g}|D ]}||_t|t jr7|jd |_t|||d
 q%g d|_d|j_	t|| d S )Nr   r+   r%   )r'   r(   r1   r8   r(   r9   r0   )r&   r(   r1   )rb   )r   r&   r'   r)   )
r-   r.   rq   rD   r@   indexr   ZDatetimeIndex
_with_freqname)r   r   rb   rE   Zindexesr   r   r   r   test_write_index  s    

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr+   r%   )r;   r&   )r;   r'   )r<   r&   )r-   r.   
MultiIndexfrom_tuplesr   rq   )r   r"   r   rE   r   r   r   r   test_write_multiindex  s
   zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjdt| dtdd}t jj	d	d
g|gddgd}|j
d d}||fD ]}||_t|| t||dddgi|ddg d q4d S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr'   r(   ABCr   Level1Level2leveldate)ry   r   r+   r,   rm   rk   )r-   rD   r.   rB   randomrandnru   r@   r   Zfrom_productcopyr   rq   )r   r"   r   datesrE   Zindex1index2r   r   r   r   test_multiindex_with_columns  s   $
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr%   )qrs)r;   r<   F)r_   r   T)droprn   rk   )ZzyxZwvuZtsrr   rL   rL   rM   rM   r)   r)   quxr   onetwor   r   r   r   r   r      c                 S   s   g | ]}| qS r   r   ).0ir   r   r   
<listcomp>  s    z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r-   r.   Zreset_indexrq   r@   rA   )r   r   rE   rn   rk   arraysr   r   r   test_write_ignoring_index  s    
 z#TestBasic.test_write_ignoring_indexc                 C   s^   t jg d}t jtjdd|d}|dkr"| ||td d S |dkr-t	|| d S d S )Nr   r1   r(   r   r   Column name must be a stringr   )
r-   r   r   r.   rB   r   r   r   	TypeErrorrq   )r   r   Z
mi_columnsrE   r   r   r   test_write_column_multiindex  s   z&TestBasic.test_write_column_multiindexc                 C   s   g dg dg}t jtjdd|d}ddg|j_|dkr6ttj	tdk r*t
}nt}| |||d	 d S |d
krAt|| d S d S )Nr   )r&   r'   r&   r'   r&   r'   r&   r'   r   r   r   r   r   z0.7.0zColumn namer   )r-   r.   rB   r   r   r   ry   r   r   r   r   r   r   rq   )r   r   r   rE   errr   r   r   &test_write_column_multiindex_nonstring  s   z0TestBasic.test_write_column_multiindex_nonstringc                 C   sF   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr   r   r   r   Z	ColLevel1Z	ColLevel2)r-   r.   rB   r   r   r   ry   rq   r   r"   r   r   rE   r   r   r   #test_write_column_multiindex_string  s   z-TestBasic.test_write_column_multiindex_stringc                 C   s:   |}g d}t jtjdd|d}d|j_t|| d S )N)rL   rM   r)   r   r   r1   r   Z	StringCol)r-   r.   rB   r   r   r   r   rq   r   r   r   r   test_write_column_index_string*  s
   z(TestBasic.test_write_column_index_stringc                 C   sR   g d}t jtjdd|d}d|j_|dkr"| ||td d S t	|| d S )Nr&   r'   r(   r1   r   r1   r   ZNonStringColr   r   )
r-   r.   rB   r   r   r   r   r   r   rq   )r   r   r   rE   r   r   r   !test_write_column_index_nonstring6  s   z+TestBasic.test_write_column_index_nonstringzminimum pyarrow not installedr   c           
      C   s  dd l m} |dkrtjjdd}|j| tt	g ddt	g ddt	g dt	g d	t	g d
dt	g ddt	g ddd}t
 }||| t||d}t||dd}W d    n1 snw   Y  |d jtdksJ ttj	g dddtj	g dddtj	g dddtj	g d	ddtj	g d
ddtj	g dddtj	g dddd}	|dkr|jddd}|	jddd}	t
||	 d S )Nr   r   z.Fastparquet nullable dtype support is disabledr   r&   r'   r(   NZint64Zuint8)r;   r<   rG   N)TFTNr   )      ?rP   rQ   NZfloat32r4   )r;   r<   rG   r=   r>   r?   gr   numpy_nullabler   dtype_backendr;   Int64r5   UInt8rR   booleanZFloat32Float64rG   r&   )Zaxis)rs   rt   r    r   xfailnode
add_markerr   tabler   re   rp   Zwrite_tabler   r6   rB   r-   r.   r   rf   )
r   r   r   rz   r   r   rl   Zresult1Zresult2rk   r   r   r   test_dtype_backendD  sH   
zTestBasic.test_dtype_backendr6   )	r   r   r   objectzdatetime64[ns, UTC]rV   z	period[D]r   rR   c                 C   sT   t dt jg |di}d }|dkrt dt jg ddi}t||ddi|d d S )Nvaluer5   rV   r   r   r   r   )r-   r.   r   rq   )r   r"   r6   rE   rk   r   r   r   test_read_empty_arrayv  s   
zTestBasic.test_read_empty_arrayN)r   r   r   r   r   r    r   parametrizer   r   r   r   r   r   r   r   r   r   r   skipifr   r   r   r   r   r   r   r     s,    

		 
1r   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jjede
jddgg gdd Zedd d! Zedd"d# Zd$d% Zd&d' Ze
jjded(d) gd*d+gd,d-d. Zd/d0 Zd1d2 Z edd3d4 Z!edd5d6 Z"edd7d8 Z#d9d: Z$d;d< Z%edd=d> Z&d?d@ Z'dAdB Z(dCdD Z)dEdF Z*dGdH Z+dIS )JTestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )Nr8   r(   Europe/Brusselsr:   tzdatetime_tzTNTbool_with_none)r-   rD   r   rq   )r   r"   r]   rE   dtir   r   r   
test_basic  s   
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr8   r(   r   r   r  rR   rU   r   r   )r-   rD   rq   )r   r"   r]   rE   r   r   r   test_basic_subset_columns  s   


z,TestParquetPyArrow.test_basic_subset_columnsc                 C   s:   |j |d}t|tsJ t|}t|}t|| d S )Nr   )r   r   rS   r   r   re   rf   )r   r"   r]   Z	buf_bytesZ
buf_streamresr   r   r   *test_to_bytes_without_path_or_buf_provided  s
   z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r1   r(   aaar   zDuplicate column names found	r-   r.   rB   rC   Zreshaper@   r   r   r   r   r"   rE   r   r   r   test_duplicate_columns  s   $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s<   t dt jdddi}tr| ||t d S t|| d S )Nr;   1 dayr(   r9   )r-   r.   timedelta_ranger   r   NotImplementedErrorrq   r  r   r   r   test_timedelta  s   z!TestParquetPyArrow.test_timedeltac                 C   s&   t dg di}| ||tj d S )Nr;   r;   r&   rP   )r-   r.   r   r   ArrowExceptionr  r   r   r   test_unsupported  s   z#TestParquetPyArrow.test_unsupportedc                 C   s6   t jddt jd}tj|dgd}| ||tj d S )Nr'   
   r5   fp16datar   )rB   rC   float16r-   r.   r   r   r  )r   r"   r  rE   r   r   r   test_unsupported_float16  s   z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   	path_typec              	   C   s   t jddt jd}tj|dgd}t 2}||}ttj	 |j
||d W d    n1 s2w   Y  tj|r?J W d    d S 1 sJw   Y  d S )Nr'   r  r5   r  r  )rl   r   )rB   rC   r  r-   r.   re   rp   r   r   r  r   osrl   isfile)r   r"   r  r  rE   Zpath_strrl   r   r   r    test_unsupported_float16_cleanup  s   
"z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sd   t  }t td|d< t jg dt g dd|d< t jg dg dd	d
|d< t|| d S )NZabcdefr;   )rL   r)   r)   rL   NrL   rK   r5   r<   )r;   r<   rG   r;   rG   r<   )r<   rG   r=   T)
categoriesZorderedrG   )r-   r.   Categoricalr@   ZCategoricalDtyperq   r  r   r   r   test_categorical  s   

z#TestParquetPyArrow.test_categoricalc                 C   s8   t d}|jdi |}d|i}t||d||d d S )Ns3fs
filesystemzpandas-test/pyarrow.parquetrl   rm   rn   r   )r    r   ZS3FileSystemrq   )r   r/   s3_resourcer"   s3sor"  Zs3kwr   r   r   test_s3_roundtrip_explicit_fs  s   

z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s   d|i}t ||d||d d S )Nstorage_optionsz s3://pandas-test/pyarrow.parquetr$  rq   )r   r/   r%  r"   r&  r   r   r   test_s3_roundtrip  s   
z$TestParquetPyArrow.test_s3_roundtripr"  partition_colr+   c              
   C   s^   |  }|r|t|tj}d}|| |||< t|||dd|i|d |dddd d S )Ncategoryzs3://pandas-test/parquet_dirr)  )partition_colsr_   r)  Tr&   )rk   rl   rm   rn   rc   rg   )r   r[   dictfromkeysrB   Zint32rq   )r   r/   r%  r"   r,  r&  Zexpected_dfZpartition_col_typer   r   r   test_s3_roundtrip_for_dir  s(   
z,TestParquetPyArrow.test_s3_roundtrip_for_dirr   c                 C   s(   t  }|| t|}t|| d S r   )r   r   r   re   rf   )r   r/   bufferZdf_from_bufr   r   r   test_read_file_like_obj_support>  s   
z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   | dd | dd tjtdd td W d    n1 s"w   Y  tjtdd |d W d    d S 1 s?w   Y  d S )NHOMEZTestingUserUSERPROFILEz.*TestingUser.*r|   z~/file.parquet)Zsetenvr    r~   OSErrorr   r   )r   r/   Zmonkeypatchr   r   r   test_expand_userE  s   
"z#TestParquetPyArrow.test_expand_userc                 C   s>   ddg}|}|j ||d d t|| t|j|jksJ d S )NrW   rU   r.  r_   r   r{   r   shape)r   tmp_pathr"   r]   r.  rE   r   r   r   test_partition_cols_supportedN  s
   
z0TestParquetPyArrow.test_partition_cols_supportedc                 C   s@   d}|g}|}|j ||d d t|| t|j|jksJ d S )NrW   r8  r9  )r   r;  r"   r]   r.  partition_cols_listrE   r   r   r   test_partition_cols_stringV  s   
z-TestParquetPyArrow.test_partition_cols_stringc                 C   s   | S r   r   )xr   r   r   <lambda>`  s    zTestParquetPyArrow.<lambda>rR   zpathlib.Path)Zidsc           	      C   s<   d}|g}|}||}|j ||d t|j|jksJ d S )Nr,   )r.  )r   r   r:  )	r   r;  r"   r/   r  r.  r=  rE   rl   r   r   r   test_partition_cols_pathlib_  s   z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t jg g d}t|| d S )N)r   r   )r-   r.   rq   r  r   r   r   test_empty_dataframem  s   z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   r?  r&   )typerx   r   )	r   r-   r.   rx   fieldZbool_r[   rW   rq   )r   r"   r   rE   rx   Zout_dfr   r   r   test_write_with_schemar  s
   
z)TestParquetPyArrow.test_write_with_schemac                 C   sp   t t jg dddt jg dddt jg dddd}t|| t dt jg d	ddi}t|| d S )
Nr%   r   r5   ZUInt32rH   rR   r;   r<   rG   r;   r   )r-   r.   r   rq   r  r   r   r    test_additional_extension_arraysz  s   
z3TestParquetPyArrow.test_additional_extension_arraysc              	   C   sj   t dt jg dddi}t d| t|||d| dd W d    d S 1 s.w   Y  d S )	Nr;   rH   zstring[pyarrow]r5   string_storagezstring[]rk   )r-   r.   r   r   rq   r[   )r   r"   rH  rE   r   r   r    test_pyarrow_backed_string_array  s   "z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   sL   t t jg dt jddddt jt jddddd}t|| d S )N))r   r&   )r&   r'   )r(   r1   z
2012-01-01r(   D)r:   r   r1   )rG   r=   r>   )r-   r.   ZIntervalIndexr   period_rangeZfrom_breaksrD   rq   r  r   r   r   test_additional_extension_types  s   
z2TestParquetPyArrow.test_additional_extension_typesc                 C   s>   t sd}nd}tdtjddddi}t||d|id	 d S )
Nz2.6z2.0r;   z
2017-01-01Z1nr  r   r:   versionr   )r   r-   r.   rD   rq   )r   r"   verrE   r   r   r   test_timestamp_nanoseconds  s
   z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sV   t s|jtjjkr|jtjj	dd d|g }t
j|d|id}t||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r  F)rd   )r   tzinforX   timezoneutcr   r   r    r   r   r-   r.   rq   )r   r   r"   r^   idxrE   r   r   r   test_timezone_aware_index  s   
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sr   t dttddi}t }||| t||dgdd}W d    n1 s*w   Y  t|dks7J d S )Nr;   r   r(   r;   z==r   F)filtersZuse_legacy_datasetr&   )	r-   r.   r@   rA   re   rp   r   r   ru   )r   r"   rE   rl   r   r   r   r   test_filter_row_groups  s   

z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t }||| t||}W d    n1 s(w   Y  |r<t	|j
t jjjs:J d S t	|j
t jjjsGJ d S )Nr  r(   )r+   r,   Cr   )r-   r.   rB   r   r   re   rp   r   r   r   Z_mgrcoreZ	internalsZArrayManagerZBlockManager)r   r"   Zusing_array_managerrE   rl   r   r   r   r   test_read_parquet_manager  s   
z,TestParquetPyArrow.test_read_parquet_managerc                 C   s   dd l }|}tjdddd}|d }||d< g d|d< |j|}|jtjd	}|d
 d|d
< |d d|d< |d t|j	ddd|d< t
||ddi|d d S )Nr   r8   r(   r   r   r  r  r  )Ztypes_mapperrX   ztimestamp[us][pyarrow]rY   us)unitr   r   r   r   rm   rk   )r   r-   rD   r   TableZfrom_pandasZ	to_pandasZ
ArrowDtyper[   	timestamprq   )r   r"   r]   r   rE   r  Zpa_tablerk   r   r   r   &test_read_dtype_backend_pyarrow_config  s*   

z9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configc                 C   sn   t jdddgit jddgdddd	}| }d
d l}t|jtdkr+|jd|_t	||ddi|d d S )Nr;   r&   r'   r(   r1   testr   zint64[pyarrow])r   r6   r   z11.0.0r   r   rc  )
r-   r.   Indexr   r   r   r   r   r[   rq   )r   r"   rE   rk   r   r   r   r   ,test_read_dtype_backend_pyarrow_config_index  s   
z?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexc                 C   s   t tdttddd}ddg|_t|| ddg|_tjtdd	 t|| W d    n1 s5w   Y  t		d
ddddt		d
ddddg|_t|| d S )Nr0   r&   r1   r   r   rI   rJ   z|S3r|     )
r-   r.   r@   rA   r   rq   r    r~   r  rX   r  r   r   r   test_columns_dtypes_not_invalid	  s   


z2TestParquetPyArrow.test_columns_dtypes_not_invalidc                 C   s(   t jt jg dddd}t|| d S )NrF  custom namerh  r   r-   r.   ri  rq   r  r   r   r   test_empty_columns  s   z%TestParquetPyArrow.test_empty_columnsN),r   r   r   r  r  r  r  r  r  r  r    r   r   r   r   strpathlibPathr  r!  
single_cpur(  r+  tdZ
skip_if_nor1  r3  r7  r<  r>  rA  rB  rE  rG  rK  rN  rR  rZ  r]  r`  rf  rj  rl  ro  r   r   r   r   r     sn    



	





r   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jjdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'S )(TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr8   r(   z
US/Easternr   r  r  r9   	timedelta)r-   rD   r   r  rq   )r   r$   r]   rE   r  r   r   r   r  $  s   
z!TestParquetFastParquet.test_basicc                 C   s   t tdttddd}t}d}ddg|_| |||| ddg|_| |||| td	ddddtd	ddddg|_| |||| d S )
Nr0   r&   r1   r   r   r   rI   rJ   rk  )r-   r.   r@   rA   r   r   r   rX   )r   r$   rE   r   r   r   r   r   test_columns_dtypes_invalid-  s   

z2TestParquetFastParquet.test_columns_dtypes_invalidc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr	  r1   r(   r
  r   z9Cannot create parquet dataset with duplicate column namesr  r   r$   rE   r   r   r   r   r  B  s   $z-TestParquetFastParquet.test_duplicate_columnsc                 C   s@   t dg di}t jddtjdgidd}t|||dd d S )	Nr;   )TNFr   g        r  r5   F)rk   rd   )r-   r.   rB   rZ   rq   r   r$   rE   rk   r   r   r   test_bool_with_noneH  s   z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )Nr;   Z2013Mr(   rO  r  z"Can't infer object conversion type)r-   r.   rM  r   r   rx  r   r   r   r  O  s
   z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr;   r0   )r-   r.   r   r@   rq   )r   r$   rE   r   r   r   r!  Z  s   z'TestParquetFastParquet.test_categoricalc                 C   sz   dt tddi}t|}t }|j||d dd t||dgd}W d    n1 s.w   Y  t|dks;J d S )Nr;   r   r(   r&   )r_   Zrow_group_offsetsr[  )r\  )	r@   rA   r-   r.   re   rp   r   r   ru   )r   r$   r=   rE   rl   r   r   r   r   r]  ^  s   

z-TestParquetFastParquet.test_filter_row_groupsc                 C   s    t ||dd|id |dd d S )Nz$s3://pandas-test/fastparquet.parquetr)  )r_   r)  r$  r*  )r   r/   r%  r$   r&  r   r   r   r+  f  s   
z(TestParquetFastParquet.test_s3_roundtripc                 C   s\   ddg}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks,J d S )NrW   rU   r   r   r.  r_   r   Fr'   	r   r  rl   existsr   ZParquetFilerp  Zcatsru   r   r;  r$   r]   r.  rE   r   Zactual_partition_colsr   r   r   r<  q     z4TestParquetFastParquet.test_partition_cols_supportedc                 C   sX   d}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks*J d S )NrW   r   r|  r   Fr&   r}  r  r   r   r   r>    s   z1TestParquetFastParquet.test_partition_cols_stringc                 C   s\   ddg}|}|j |dd |d tj|sJ dd l}|t|dj}t|dks,J d S )NrW   rU   r   )r   r_   partition_onr   Fr'   r}  r  r   r   r   test_partition_on_supported  r  z2TestParquetFastParquet.test_partition_on_supportedc                 C   sX   ddg}|}d}t jt|d |j|dd ||d W d    d S 1 s%w   Y  d S )NrW   rU   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datar|   r   )r   r_   r  r.  )r    r~   r   r   )r   r;  r$   r]   r.  rE   r   r   r   r   3test_error_on_using_partition_cols_and_partition_on  s   "zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onc                 C   s"   t  }| }t|||d d S )NrJ  )r-   r.   r   rq   ry  r   r   r   rB    s   z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )NrS  rT  rU  r   rJ  )r-   r.   r   r   r   rq   )r   r$   r^   rY  rE   rk   r   r   r   rZ    s
   
z0TestParquetFastParquet.test_timezone_aware_indexc              
   C   s   t dddgi}t g}|| tjtdd$ tt	 t
|ddd W d    n1 s2w   Y  W d    n1 sAw   Y  tjtdd t
|dd	d
 W d    n1 s_w   Y  W d    d S W d    d S 1 sww   Y  d S )Nr;   r&   r'   z!not supported for the fastparquetr|   r   T)r   Zuse_nullable_dtypesr   r   )r-   r.   re   rp   r   r    r~   r   Zassert_produces_warningFutureWarningr   )r   r$   rE   rl   r   r   r   &test_use_nullable_dtypes_not_supported  s   

"z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              	   C   s   t d7}t|d tjtdd t|dd W d    n1 s&w   Y  t|j	dd W d    d S 1 s?w   Y  d S )	Ntest.parquets   breakit r|   r   r   F)
missing_ok)
re   rp   rq  rr  write_bytesr    r~   	Exceptionr   unlink)r   rl   r   r   r   $test_close_file_handle_on_read_error  s   "z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              	   C   s   t jddgddgdd}td*}t| d}|| W d    n1 s*w   Y  t||d}W d    n1 s?w   Y  t|| d S )Nr   r&   r*   )r  r  wbr   )	r-   r.   re   rp   openencoder   r   rf   )r   r   rE   rl   r?   r   r   r   r   test_bytes_file_name  s   z+TestParquetFastParquet.test_bytes_file_namec              	   C   s   d}t dttddi}td3}|| tjt	|d t
|dd W d    n1 s1w   Y  W d    d S W d    d S 1 sIw   Y  d S )	NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rU   r&   r1   ztmp.parquetr|   numpy)r   )r-   r.   r@   rA   re   rp   r   r    r~   r   r   )r   r   r   rE   rl   r   r   r   test_invalid_dtype_backend  s   
"z1TestParquetFastParquet.test_invalid_dtype_backendc                 C   sF   t jt jg dddd}t jt jg dddd}t|||d d S )NrF  rm  rh  r   rJ  rn  ry  r   r   r   ro    s   z)TestParquetFastParquet.test_empty_columnsN)r   r   r   r  rw  r  rz  r  r!  r]  r    r   rs  r+  r<  r>  r  r  rB  rZ  r  r  r  r  ro  r   r   r   r   ru  #  s*    	

	
ru  )	NNNNNTFTr'   )G__doc__rX   ior   r  rq  warningsr   r  rB   r    Zpandas._configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r   Zpandas.util._test_decoratorsutilZ_test_decoratorsrt  Zpandasr-   Zpandas._testingZ_testingre   Zpandas.util.versionr   Zpandas.io.parquetr	   r
   r   r   r   r   r   r   r   r#   Zfixturer   r   r   r   r"   r$   r/   rF   r]   nowrW  rX  minmaxstrptimer^   rq   r{   r   r   r   r   r   r   r   r   r   r   r   ru  r   r   r   r   <module>   s    







A+      