o
    d7                     @   s  d dl Z d dlmZ d dlZd dlZd dlmZ d dlZd dlZ	d dl
Z
d dlmZ d dlm  mZ d dlZd dlmZ e
jdd Ze
jdeddfd	dgfgd
dd ZG dd dZdd Zdd Zdd Zdd Zdd Zdd Z e
j!"dddgdd Z#d d! Z$d"d# Z%d$d% Z&d&d' Z'd(d) Z(d*d+ Z)d,d- Z*d.d/ Z+d0d1 Z,d2d3 Z-e
j!"d4g d5d6d7 Z.d8d9 Z/d:d; Z0dS )<    N)datetime)Path)EmptyDataErrorc                 C   s   | dddS )Niosasdata )datapathr   r   U/app/.heroku/python/lib/python3.10/site-packages/pandas/tests/io/sas/test_sas7bdat.pydirpath   s   r            )paramsc                 C   s   | j \}}tj|d| d}t|}tddd}tj|d dd}|| |d< tj|d dd}|| |d< t|j	d D ]"}	|j
d d |	f }
|
jtjkrb||	|j
d d |	f tj q@||fS )	NZtest_sas7bdat_z.csvi  r   ZColumn4d)unitZColumn12)paramospathjoinpdread_csvr   Zto_timedeltarangeshapeilocdtypenpZint64Zisetitemastypefloat64)requestr   itest_ixfnamedfepocht1t2kcolr   r   r
   data_test_ix   s   

"r)   c                   @   s   e Zd Zejjdd Zejjdd Zejjdd Zejjdd Z	e
d	ejjd
d Zejjejddejdedddd Zdd ZdS )TestSAS7BDATc                 C   sF   |\}}|D ]}t j|d| d}tj|dd}t|| qd S Ntest	.sas7bdatutf-8encoding)r   r   r   r   read_sastmassert_frame_equalselfr   r)   df0r!   r'   r"   r#   r   r   r
   test_from_file*   s   zTestSAS7BDAT.test_from_filec              	   C   s   |\}}|D ]P}t j|d| d}t|d}| }W d    n1 s'w   Y  t|}	tj|	dddd}
|
 }W d    n1 sIw   Y  t	j
||dd	 qd S )
Nr,   r-   rbsas7bdatTr.   formatiteratorr0   FZcheck_exact)r   r   r   openreadr   BytesIOr   r1   r2   r3   )r5   r   r)   r6   r!   r'   r"   fZbytsbufrdrr#   r   r   r
   test_from_buffer2   s   


zTestSAS7BDAT.test_from_bufferc           	   	   C   s   |\}}|D ]M}t j|d| d}tj|ddd0}|d}t||jddd d f  |d}t||jdd	d d f  W d    n1 sNw   Y  qd S )
Nr,   r-   Tr.   r<   r0   r   r         )	r   r   r   r   r1   r?   r2   r3   r   )	r5   r   r)   r6   r!   r'   r"   rC   r#   r   r   r
   test_from_iterator@   s   

 zTestSAS7BDAT.test_from_iteratorc                 C   sJ   |\}}|D ]}t tj|d| d}tj|dd}t|| qd S r+   )r   r   r   r   r   r1   r2   r3   r4   r   r   r
   test_path_pathlibK   s   zTestSAS7BDAT.test_path_pathlibzpy.pathc           	      C   sV   ddl m} |\}}|D ]}|tj|d| d}tj|dd}t|| qd S )Nr   )localr,   r-   r.   r/   )	Zpy.pathrJ   r   r   r   r   r1   r2   r3   )	r5   r   r)   Z	LocalPathr6   r!   r'   r"   r#   r   r   r
   test_path_localpathS   s   z TestSAS7BDAT.test_path_localpath	chunksize)rF   rG   
      r'   r      c                 C   st   t j|d| d}tj||dd}d}|D ]	}||jd 7 }qW d    n1 s,w   Y  ||jks8J d S )Nr,   r-   r.   )rL   r0   r   )r   r   r   r   r1   r   	row_count)r5   r   r'   rL   r"   rC   yxr   r   r
   test_iterator_loop^   s   zTestSAS7BDAT.test_iterator_loopc                 C   s   t j|d}tj|dddd}||jd }W d    n1 s#w   Y  tj|ddd}||jd }W d    n1 sCw   Y  t|| d S )Ntest1.sas7bdatr9   Tr.   r:      rE   )	r   r   r   r   r1   r?   rP   r2   r3   )r5   r   r"   rC   Zd1Zd2r   r   r
   test_iterator_read_too_muchj   s   z(TestSAS7BDAT.test_iterator_read_too_muchN)__name__
__module____qualname__pytestmarkZslowr7   rD   rH   rI   tdZ
skip_if_norK   parametrizer   rS   rV   r   r   r   r
   r*   )   s"    




		r*   c           
   	   C   s   | dddd}t |}t j|dd}|jD ]}z|| jd||< W q ty-   Y qw t|| ddlm	} t
||d	d
}| }W d    n1 sRw   Y  t|j|jD ]\}}	||	 ksjJ q^d S )Nr   r   r   rT   r.   r/   r   )SAS7BDATReaderF)Zconvert_header_text)r   r1   columnsstrdecodeAttributeErrorr2   r3   Zpandas.io.sas.sas7bdatr^   
contextlibclosingr?   zip)
r	   r"   df1df2r(   r^   rC   Zdf3rR   rQ   r   r   r
   test_encoding_optionsw   s"   


rh   c                 C   s   | dddd}t j|ddd}|jdksJ | }W d    n1 s%w   Y  t j|ddd}| }W d    n1 sAw   Y  t|| d S )	Nr   r   r   rT   ZinferT)r0   r<   cp1252)r   r1   Zinferred_encodingr?   r2   r3   )r	   r"   Z
df1_readerrf   Z
df2_readerrg   r   r   r
   test_encoding_infer   s   

rj   c                 C   sf   | dddd}t j|dd}| dddd}t j|dgd	}g d
}|| tj||< t|| d S )Nr   r   r   zproductsales.sas7bdatr.   r/   zproductsales.csvZMONTHZparse_dates)ZACTUALZPREDICTZQUARTERZYEARr   r1   r   r   r   r   r2   r3   )r	   r"   r#   r6   Zvnr   r   r
   test_productsales   s   rm   c                 C   sL   | dddd}t |}| dddd}t |}|tj}t|| d S )Nr   r   r   ztest_12659.sas7bdatztest_12659.csvrl   r	   r"   r#   r6   r   r   r
   
test_12659   s   

ro   c                 C   sP   | dddd}t |}| dddd}t |}|tj}tj||dd d S )Nr   r   r   zairline.sas7bdatzairline.csvFr=   rl   rn   r   r   r
   test_airline   s   

rp   c                 C   sl   | dddd}t |}| dddd}t j|g dd}|jd d df jd	||jd < t|| d S )
Nr   r   r   zdatetime.sas7bdatzdatetime.csv)ZDate1ZDate2DateTimeZ
DateTimeHiZTaiwrk   rF   us)	r   r1   r   r   dtroundr_   r2   r3   rn   r   r   r
   test_date_time   s   
$ru   columnZWGTZCYLc                 C   sD   | dddd}t j|dd}|| }||  }tj||dd d S )	Nr   r   r   zcars.sas7bdatlatin-1r/   Tr=   )r   r1   rt   r2   Zassert_series_equal)r	   rv   r"   r#   resultexpectedr   r   r
   test_compact_numerical_values   s
   rz   c                 C   sH   | dddd}t j|dd}| dddd}t j|dd}t|| d S )Nr   r   r   zmany_columns.sas7bdatrw   r/   zmany_columns.csvr   r1   r   r2   r3   rn   r   r   r
   test_many_columns   s
   r|   c                 C   s0   | dddd}t j|dd}t|dksJ d S )Nr   r   r   zload_log.sas7bdatrw   r/   i1  r   r1   lenr	   r"   r#   r   r   r
    test_inconsistent_number_of_rows   s   r   c                 C   sL   | dddd}t jtdd t| W d    d S 1 sw   Y  d S )Nr   r   r   zzero_variables.sas7bdatzNo columns to parse from filematch)rZ   raisesr   r   r1   )r	   r"   r   r   r
   test_zero_variables   s   "r   c                 C   sD   | dddd}t |}t dddgjd d }t|| d S )	Nr   r   r   zzero_rows.sas7bdatag      ?)Z
char_fieldZ	num_fieldr   )r   r1   	DataFramer   r2   r3   )r	   r"   rx   ry   r   r   r
   test_zero_rows   s   
r   c                 C   sP   | dddd}d}t jt|d t| W d    d S 1 s!w   Y  d S )Nr   r   r   zcorrupt.sas7bdatz4'SAS7BDATReader' object has no attribute 'row_count'r   )rZ   r   rb   r   r1   )r	   r"   msgr   r   r
   test_corrupt_read   s
   "r   c                 C   sf   t | tr| jtt| jdd d dS t | tr1tjj	| d}|jtt|jdd d dS | S )N  )microsecond)Ztimestr)

isinstancer   replaceintrt   r   r`   dateutilparserparse)tsZ_tsr   r   r
   round_datetime_to_ms   s   
 
 r   c                 C   s   | dddd}t j|dd}|dd }z|d	 jd
|d	< W n# t jjjjy3   |t	}Y n t
yD   |d	 t	|d	< Y nw t jddgddgtdddddddtdddddddgddgtdddtdddgdg dd}t|| d S )Nr   r   r   max_sas_date.sas7bdat
iso-8859-1r/   c                 S      t | tr	|  S | S Nr   r`   lstriprR   r   r   r
   <lambda>      z#test_max_sas_date.<locals>.<lambda>dt_as_dtrr   maxnormal`MBA'           ;   X> i     r      qgFA    @@textZdt_as_floatr   Zdate_as_floatZdate_as_dater_   )r   r1   applymaprs   rt   _libstslibsnp_datetimeOutOfBoundsDatetimer   rb   applyr   r   r2   r3   r	   r"   r#   ry   r   r   r
   test_max_sas_date  s,   
r   c                 C   sF  g d}| dddd}g }t j|dddD ]E}|d	d
 }z|d jd|d< W n# t jjjjy<   |t	}Y n t
yM   |d t	|d< Y nw |jddd || qt jdgdgtdddddddgdgtdddgd|dt jdgdgtdgdgtdgd|dg}t||D ]
\}}t|| qd S )Nr   r   r   r   r   r   r   )r0   rL   c                 S   r   r   r   r   r   r   r
   r   1  r   z,test_max_sas_date_iterator.<locals>.<lambda>r   rr   T)ZinplaceZdropr   r   r   r   r   r   r   r   r   r   r   r   z2019-08-01 23:59:59.999r   z
2019-08-01)r   r1   r   rs   rt   r   r   r   r   r   rb   r   Zreset_indexappendr   r   r   Z
datetime64re   r2   r3   )r	   Z	col_orderr"   resultsr#   ry   rx   r   r   r
   test_max_sas_date_iterator'  sH   


r   c                 C   s`   | dddd}t j|dd}t tddd	t jgtddd	d
dddt jgd}t|| d S )Nr   r   r   zdates_null.sas7bdatr.   r/   r   r   r   r   r   iQ> )ZdatecolZdatetimecol)r   r1   r   r   ZNaTr2   r3   r   r   r   r
   test_null_dateU  s   
r   c                 C   s,   | dddd}t |}t|dksJ d S )Nr   r   r   ztest_meta2_page.sas7bdatr   r}   r   r   r   r
   test_meta2_pageh  s   
r   z8test_file, override_offset, override_value, expected_msg))test2.sas7bdat    Out of bounds)r   r   r   zunknown control byte)ztest3.sas7bdati    r   c                 C   s   t | ddd|d}t| }W d   n1 sw   Y  |||< tjt|d tjt	|dd W d   dS 1 sAw   Y  dS )	z1Errors in RLE/RDC decompression should propagate.r   r   r   r8   Nr   r9   )r;   )
r>   	bytearrayr?   rZ   r   	Exceptionr   r1   r   r@   )r	   Z	test_fileZoverride_offsetZoverride_valueZexpected_msgfdr   r   r   r
   test_rle_rdc_exceptionso  s   "r   c                 C   sH   | dddd}t j|dd}| dddd}t j|dd	}t|| d S )
Nr   r   r   z0x40controlbyte.sas7bdatasciir/   z0x40controlbyte.csvobject)r   r{   rn   r   r   r
   test_0x40_control_byte  s
   r   c                 C   s2   | dddd}t tj|dd}|jdksJ d S )Nr   r   r   z0x00controlbyte.sas7bdat.bz2*  )rL   )r   rU   )nextr   r1   r   r   r   r   r
   test_0x00_control_byte  s   r   )1rc   r   r   r   pathlibr   Zdateutil.parserr   numpyr   rZ   Zpandas.errorsr   Zpandas.util._test_decoratorsutilZ_test_decoratorsr\   Zpandasr   Zpandas._testingZ_testingr2   Zfixturer   r   r)   r*   rh   rj   rm   ro   rp   ru   r[   r]   rz   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   <module>   sT    

N
		
	
#.
	