o
    d                     @  sf  d dl mZ d dlmZmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlZd dlZd d	lmZ d d
lmZ d dlmZmZ d dlm  mZ d dlZd dlm Z m!Z!m"Z" d dl#m$Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, e!g dg ddej-dgdZ.dZ/dZ0e!ddddddddddddddddddd d d d d dd d d d d dd!d"d#d$d%dd&Z1ej2d'd(gd)d*d+ Z3ej2ej4d,e5d,d-d.gd)d/d0 Z6d1d2 Z7d3d4 Z8e5d,d5d6 Z9ej:j;ej:j<e%j;d7d8d9d:d; Z=d<d= Z>d>d? Z?d@dA Z@dBdC ZAdDdE ZBdFdG ZCdHdI ZDe5d,ej:EdJdKdLgdMdN ZFej:EdJdKdLgdOdP ZGe5d,dQdR ZHdSdT ZIej:j;e%j;dUd8d9e5d,dVdW ZJej:j;e%j;dXd8d9dYdZ ZKe5d,d[d\ ZLd]d^ ZMe5d,d_d` ZNdadb ZOdcdd ZPe5d,dedf ZQe5d,dgdh ZRdidj ZSdkdl ZTe5d,dmdn ZUe5d,ej:EdodKdgdpdq ZVdrds ZWdtdu ZXdvdw ZYdxdy ZZdzd{ Z[d|d} Z\e5d,d~d Z]dd Z^dd Z_dd Z`dd Zadd Zbdd Zcdd Zddd Zedd Zfdd Zge5d,dd Zhe5d,dd Zidd Zjekd,dd Zldd Zme5d,dd Zne5d,dd Zoe5d,dd Zpe5d,dd Zqe5d,dd Zre5d,dd Zse5d,dd Zte5d,dd Zue5d,dd Zve5d,dd Zwe5d,dd Zxe5d,dd Zye5d,ej:EdJdKdLgdd Zzdd Z{dd Z|dd Z}ej:j;e%j;dUd8d9dd Z~dd ZddÄ Zddń ZddǄ ZddɄ Zdd˄ Zdd̈́ Zddτ Zddф Zej:j;e5d,e%j;dd8d9ddԄ Zddք Zdd؄ Zddڄ Zej:j;e5dۡe5d,ej:je ddݍe%j;dd߄ Zdd Zdd ZdS )    )annotations)BytesIOStringIO)	LZMAErrorN)	ReadError)	HTTPError)
ParseError)
BadZipFile)is_ci_environment)import_optional_dependency)EmptyDataErrorParserError)NA	DataFrameSeries)ArrowStringArrayStringArray)
get_handle)read_xmlsquarecircletriangleh  r            shapedegreessidesa[  <?xml version='1.0' encoding='utf-8'?>
<data xmlns="http://example.com">
  <row>
    <shape>square</shape>
    <degrees>360</degrees>
    <sides>4</sides>
  </row>
  <row>
    <shape>circle</shape>
    <degrees>360</degrees>
    <sides/>
  </row>
  <row>
    <shape>triangle</shape>
    <degrees>180</degrees>
    <sides>3</sides>
  </row>
</data>a  <?xml version='1.0' encoding='utf-8'?>
<doc:data xmlns:doc="http://example.com">
  <doc:row>
    <doc:shape>square</doc:shape>
    <doc:degrees>360</doc:degrees>
    <doc:sides>4.0</doc:sides>
  </doc:row>
  <doc:row>
    <doc:shape>circle</doc:shape>
    <doc:degrees>360</doc:degrees>
    <doc:sides/>
  </doc:row>
  <doc:row>
    <doc:shape>triangle</doc:shape>
    <doc:degrees>180</doc:degrees>
    <doc:sides>3.0</doc:sides>
  </doc:row>
</doc:data>ZID_00001ZID_00002ZID_00003ZID_00004ZID_00005r         r   r   zBlue Line (Forest Park)zRed, Purple Linez#LineStyle01ZclampedToGroundz-87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0a\  -87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0a  -87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0ah  -87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0a   -87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0idnameZstyleUrlZextrudeZaltitudeModeZcoordinatesrbr)paramsc                 C     | j S Nparamrequest r1   P/app/.heroku/python/lib/python3.10/site-packages/pandas/tests/io/xml/test_xml.pymode      r3   lxml)Zmarksetreec                 C  r+   r,   r-   r/   r1   r1   r2   parser   r4   r7   c              	   K  sp   t  *}t|d}||  W d    n1 sw   Y  t|fi |W  d    S 1 s1w   Y  d S )Nw)tmensure_cleanopenwriter   )datakwargspathfr1   r1   r2   read_xml_iterparse  s   
$rA   c              
   K  s   t | d|dI}t 4}t|d}||j  W d    n1 s%w   Y  t|fi |W  d    W  d    S 1 sCw   Y  W d    d S 1 sSw   Y  d S )Nr)   )compressionr8   )r   r9   r:   r;   r<   handlereadr   )	comp_pathcompression_onlyr>   Zhandlesr?   r@   r1   r1   r2   read_xml_iterparse_comp  s   
"rG   c                 C  sz   | dddd}t |dd}t |dd}t |ddg d	id
}t |ddg d	id
}t|| t|| t|| d S )Nior=   xml	books.xmlr5   r7   r6   bookcategorytitleyearauthorpricer7   	iterparser   r9   assert_frame_equal)datapathfilenameZdf_file_lxmlZdf_file_etreedf_iter_lxmldf_iter_etreer1   r1   r2   test_parser_consistency_file  s    

r[   Ohttps://data.cityofchicago.org/api/views/8pix-ypme/rows.xml?accessType=DOWNLOADT)urlZcheck_before_testc                 C  sz   d}t jdd%}t|d| dj|dd t|| d}t|| d	g d
id}W d    n1 s0w   Y  t || d S )Nr\   zcta.xmlrX   z
.//row/rowxpathr7   FindexrK   row)Z_id_uuidZ	_position_addressZstop_idZdirection_idZ	stop_nameZstation_nameZstation_descriptive_nameZmap_idadaredbluegZbrnpZpexpyZpnkolocationrS   )r9   r:   r   to_xmlrV   )r7   r]   r?   df_xpathdf_iterr1   r1   r2   test_parser_consistency_url,  s   "rq   c                 C  s|   | dddd}t ||}t||d}W d    n1 sw   Y  tg dg dg dg d	g d
d}t|| d S NrH   r=   rI   rJ   rK   cookingchildrenZwebzEveryday ItalianzHarry PotterzLearning XMLzGiada De LaurentiiszJ K. RowlingzErik T. Ray  ry   i  g      >@g=
ףp=@gC@rN   rO   rQ   rP   rR   )r;   r   r   r9   rV   )rW   r7   r3   rX   r@   df_filedf_expectedr1   r1   r2   test_file_like`  s   
r~   c                 C  s   | dddd}t ||}| }W d    n1 sw   Y  tt|tr*t|nt||d}tg dg dg dg d	g d
d}t	|| d S rr   )
r;   rD   r   
isinstancebytesr   r   r   r9   rV   )rW   r7   r3   rX   r@   xml_objZdf_ior}   r1   r1   r2   test_file_ior  s"   

r   c                 C  s   | dddd}t ||}| }W d    n1 sw   Y  t||d}tg dg dg dg d	g d
d}t|| d S rr   )r;   rD   r   r   r9   rV   rW   r7   r3   rX   r@   r   df_strr}   r1   r1   r2    test_file_buffered_reader_string  s   

r   c                 C  s   | dddd}t ||}t| | }W d    n1 sw   Y  t||d}tg dg dg dg d	g d
d}t|| d S rr   )r;   nextrD   r   r   r9   rV   r   r1   r1   r2   ,test_file_buffered_reader_no_xml_declaration  s   

r   c                 C  s4   d}t || d}tddddgd}t|| d S )N<   <中文標籤><row><c1>1</c1><c2>2</c2></row></中文標籤>rK   r#   r$   )c1c2r   ra   r   r   r9   rV   )r7   txtr   r}   r1   r1   r2   test_string_charset  s   r   c                 C  sJ   | dddd}t | ||d}tg dg dg dd	}t|| d S )
NrH   r=   rI   zdoc_ch_utf.xmlrK   )uP   問  若箇是邪而言破邪 何者是正而道(Sorry, this is Big5 only)申正u;   問 既破有得申無得 亦應但破性執申假名以不uO   問 既破性申假 亦應但破有申無 若有無兩洗 亦應性假雙破耶)uw   答  邪既無量 正亦多途  大略為言不出二種 謂有得與無得 有得是邪須破 無得是正須申
		故NuL   答  不例  有無皆是性 所以須雙破 既分性假異 故有破不破)NuV   答 性執是有得 假名是無得  今破有得申無得 即是破性執申假名也N)u   問u   答ar   )rW   r7   xml_filer|   r}   r1   r1   r2   test_file_charset  s   r   c                 C  s\   | dddd}t |d}tt| |d |jrJ W d    d S 1 s'w   Y  d S )NrH   r=   rI   rJ   r(   rK   )r;   r   r   rD   closed)rW   r7   r   r@   r1   r1   r2   test_file_handle_close  s
   "r   val     c                 C  sZ   ddl m} dddg}tj||d t| dd W d    d S 1 s&w   Y  d S )	Nr   XMLSyntaxError|zDocument is emptyzNone \(line 0\)matchr5   rK   )
lxml.etreer   joinpytestraisesr   )r   r   msgr1   r1   r2   test_empty_string_lxml  s   "r   c                 C  s@   t jtdd t| dd W d    d S 1 sw   Y  d S )Nzno element foundr   r6   rK   )r   r   r   r   )r   r1   r1   r2   test_empty_string_etree  s   "r   c                  C  s\   ddl m}  tjddd}tj| dd t|dd	 W d    d S 1 s'w   Y  d S )
Nr   r   r=   htmlrJ   !Start tag expected, '<' not foundr   r5   rK   r   r   osr?   r   r   r   r   )r   rX   r1   r1   r2   test_wrong_file_path_lxml  s   "r   c                  C  sP   t jddd} tjtdd t| dd W d    d S 1 s!w   Y  d S )Nr=   r   rJ   znot well-formedr   r6   rK   )r   r?   r   r   r   r   r   r^   r1   r1   r2   test_wrong_file_path_etree   s   "r   'https://www.w3schools.com/xml/books.xmlc                  C  sN   d} t | dd}tg dg dg dg dg dg d	d
}t|| d S )Nr   .//book[count(*)=4]r`   rs   rv   rw   rx   rz   )NNZ	paperback)rN   rO   rQ   rP   rR   Zcoverr   )r]   Zdf_urlr}   r1   r1   r2   test_url
  s   r   (https://www.w3schools.com/xml/python.xmlc                 C  sF   t jtdd d}t|d| d W d    d S 1 sw   Y  d S )NzHTTP Error 404: Not Foundr   r   r   r_   )r   r   r   r   r7   r]   r1   r1   r2   test_wrong_url"  s   "r   c                 C  P   | dddd}t jtdd t|ddd	 W d    d S 1 s!w   Y  d S )
NrH   r=   rI   rJ   xpath does not return any nodesr   z	.//pythonr5   r_   r   r   
ValueErrorr   rW   rX   r1   r1   r2   test_empty_xpath_lxml-  s   "r   c                 C  r   )
NrH   r=   rI   rJ   z/You have used an incorrect or unsupported XPathr   	.//[book]r6   r_   r   r   SyntaxErrorr   r   r1   r1   r2   test_bad_xpath_etree4  s   "r   c                 C  \   ddl m} | dddd}tj|dd t|d	d
d W d    d S 1 s'w   Y  d S )Nr   XPathEvalErrorrH   r=   rI   rJ   zInvalid expressionr   r   r5   r_   r   r   r   r   r   rW   r   rX   r1   r1   r2   test_bad_xpath_lxml<  
   "r   c                 C  h   t tdddi| d}tt| dg did}tg dg d	d
tddgd}t|| t|| d S )N	.//ns:rownshttp://example.comr`   
namespacesr7   rc   r   rS   r   r         @nan      @)r   xml_default_nmsprA   r   floatr9   rV   r7   Zdf_nmsprp   r}   r1   r1   r2   test_default_namespaceH  s&   
r   c                 C  r   )N
.//doc:rowdocr   r   rc   r   rS   r   r   r   r   r   )r   xml_prefix_nmsprA   r   r   r9   rV   r   r1   r1   r2   test_prefix_namespaceb  s"   r   c                  C  s8   t tdddidd} t tdddidd}t| | d S )	Nr   r   r   r5   r   r   r   r6   )r   r   r9   rV   df_lxmldf_etreer1   r1   r2   "test_consistency_default_namespacey     r   c                  C  s8   t tdddidd} t tdddidd}t| | d S )Nr   r   r   r5   r   r6   )r   r   r9   rV   r   r1   r1   r2   !test_consistency_prefix_namespace  r   r   c                 C  P   | dddd}t jtdd t|d|d W d    d S 1 s!w   Y  d S )	NrH   r=   rI   rJ   r   r   z.//Placemarkr_   r   rW   r7   rX   r1   r1   r2   *test_missing_prefix_with_default_namespace     "r   c                 C  r   )
NrH   r=   rI   cta_rail_lines.kmlz'you used an undeclared namespace prefixr   .//kml:Placemarkr6   r_   r   r   r1   r1   r2   $test_missing_prefix_definition_etree  r   r   c                 C  r   )Nr   r   rH   r=   rI   r   zUndefined namespace prefixr   r   r5   r_   r   r   r1   r1   r2   #test_missing_prefix_definition_lxml  r   r   keyc                 C  sH   t jtdd ttd| didd W d    d S 1 sw   Y  d S )Nz0empty namespace prefix is not supported in XPathr   r   http://www.opengis.net/kml/2.2r5   r   )r   r   	TypeErrorr   r   )r   r1   r1   r2   test_none_namespace_prefix  s   "r   c                 C  st   | dddd}t ||d}t ||dg did}tg d	g d
g dg dg dd}t|| t|| d S )NrH   r=   rI   rJ   rK   rL   r{   rS   rs   rv   rw   rx   rz   r   rW   r7   rX   r|   rp   r}   r1   r1   r2   test_file_elems_and_attrs  s"   

r   c                 C  s\   | dddd}t |d|d}t ||ddgid	}tdg d
i}t|| t|| d S )NrH   r=   rI   rJ   T)
attrs_onlyr7   rL   rN   rS   rs   r   r   r1   r1   r2   test_file_only_attrs  s   r   c                 C  sp   | dddd}t |d|d}t ||dg did	}tg d
g dg dg dd}t|| t|| d S )NrH   r=   rI   rJ   T)
elems_onlyr7   rL   )rO   rQ   rP   rR   rS   rv   rw   rx   rz   r   r   r1   r1   r2   test_file_only_elems  s    
	r   c                 C  sR   | dddd}t jtdd t|dd|d W d    d S 1 s"w   Y  d S )	NrH   r=   rI   r   z3Either element or attributes can be parsed not bothr   T)r   r   r7   r   r   r1   r1   r2   test_elem_and_attrs_only  s   "r   c                 C  H   d}t jtdd t|dd| d W d    d S 1 sw   Y  d S )NaU  
      <data>
        <row>
          <shape sides="4">square</shape>
          <degrees>360</degrees>
        </row>
        <row>
          <shape sides="0">circle</shape>
          <degrees>360</degrees>
        </row>
        <row>
          <shape sides="3">triangle</shape>
          <degrees>180</degrees>
        </row>
      </data>-xpath does not return any nodes or attributesr   ./rowT)r`   r   r7   r   r7   rI   r1   r1   r2   test_empty_attrs_only  s   "r   c                 C  r   )Nz
      <data>
        <row sides="4" shape="square" degrees="360"/>
        <row sides="0" shape="circle" degrees="360"/>
        <row sides="3" shape="triangle" degrees="180"/>
      </data>r   r   r   T)r`   r   r7   r   r   r1   r1   r2   test_empty_elems_only  s   "r   c                  C  sd   d} t | dd}t | ddd}t| dddgid	}t| ddddgid
}t|| t|| d S )Na  <?xml version="1.0" encoding="UTF-8"?>
<TrainSchedule>
      <Stations>
         <station Name="Manhattan" coords="31,460,195,498"/>
         <station Name="Laraway Road" coords="63,409,194,455"/>
         <station Name="179th St (Orland Park)" coords="0,364,110,395"/>
         <station Name="153rd St (Orland Park)" coords="7,333,113,362"/>
         <station Name="143rd St (Orland Park)" coords="17,297,115,330"/>
         <station Name="Palos Park" coords="128,281,239,303"/>
         <station Name="Palos Heights" coords="148,257,283,279"/>
         <station Name="Worth" coords="170,230,248,255"/>
         <station Name="Chicago Ridge" coords="70,187,208,214"/>
         <station Name="Oak Lawn" coords="166,159,266,185"/>
         <station Name="Ashburn" coords="197,133,336,157"/>
         <station Name="Wrightwood" coords="219,106,340,133"/>
         <station Name="Chicago Union Sta" coords="220,0,360,43"/>
      </Stations>
</TrainSchedule>z
.//stationr   r6   r_   stationNameZcoordsrT   rS   )r   rA   r9   rV   )rI   r   r   Z
df_iter_lxZ
df_iter_etr1   r1   r2   test_attribute_centric_xml-  s   r   c                 C  s   | dddd}t |g d|d}t ||g ddg did	}tg d
g dg dg dg dd}t|| t|| d S )NrH   r=   rI   rJ   )Col1Col2Col3ZCol4ZCol5namesr7   rL   r{   )r7   r   rT   rs   rv   rw   rx   rz   r   r   r1   r1   r2   test_names_option_outputR  s(   


r   c                 C  sl   d}t |d| g dd}t|| dg dig dd}tdd	gd
dgddgd}t|| t|| d S )Nz<shapes>
  <shape type="2D">
    <name>circle</name>
    <type>curved</type>
  </shape>
  <shape type="3D">
    <name>sphere</name>
    <type>curved</type>
  </shape>
</shapes>.//shape)Ztype_dimr   Z	type_edger`   r7   r   r   )typer'   r   r7   rT   r   2D3Dr   sphereZcurvedr   rA   r   r9   rV   r7   rI   ro   rp   r}   r1   r1   r2   test_repeat_namesl  s$   
r  c                 C  sf   d}t |d| ddgd}t|| dddgiddgd}tg d	g d
d}t|| t|| d S )NaB  <shapes>
  <shape>
    <name>rectangle</name>
    <family>rectangle</family>
  </shape>
  <shape>
    <name>square</name>
    <family>rectangle</family>
  </shape>
  <shape>
    <name>ellipse</name>
    <family>ellipse</family>
  </shape>
  <shape>
    <name>circle</name>
    <family>ellipse</family>
  </shape>
</shapes>r   r'   groupr   r   familyr   )	rectangler   ellipser   )r  r  r  r  )r'   r  r  r  r1   r1   r2   test_repeat_values_new_names  s   
r  c                 C  sr   d}t |d| g dd}t|| dg dig dd}tg dg d	g d
g dd}t|| t|| d S )Na  <shapes>
  <shape>
    <value item="name">circle</value>
    <value item="family">ellipse</value>
    <value item="degrees">360</value>
    <value item="sides">0</value>
  </shape>
  <shape>
    <value item="name">triangle</value>
    <value item="family">polygon</value>
    <value item="degrees">180</value>
    <value item="sides">3</value>
  </shape>
  <shape>
    <value item="name">square</value>
    <value item="family">polygon</value>
    <value item="degrees">360</value>
    <value item="sides">4</value>
  </shape>
</shapes>r   )r'   r  r    r!   r   r   )valuer	  r	  r	  r   )r   r   r   )r  polygonr
  )r   r   r   )r   r   r   r  r  r1   r1   r2   test_repeat_elements  s,   
	r  c                 C  sT   | dddd}t jtdd t|g d|d W d    d S 1 s#w   Y  d S )	NrH   r=   rI   rJ   znames does not match lengthr   )r   r   r   r   r   r   r1   r1   r2   test_names_option_wrong_length  s   "r  c                 C  r   )	NrH   r=   rI   rJ   zis not a valid type for namesr   zCol1, Col2, Col3r   r   r   r   r   r   r1   r1   r2   test_names_option_wrong_type  s   "r  c                 C  sN   | dddd}t jtdd t||d W d    d S 1 s w   Y  d S )NrH   r=   rI   baby_names.xmlz'utf-8' codec can't decoder   rK   r   r   UnicodeDecodeErrorr   r   r1   r1   r2   test_wrong_encoding  s   "r  c                 C  r   )	NrH   r=   rI   r  zIUTF-16 stream does not start with BOM|'utf-16-le' codec can't decode byter   zUTF-16encodingr7   )r   r   UnicodeErrorr   r   r1   r1   r2   test_utf16_encoding  s   "r  c                 C  r   )	NrH   r=   rI   r  zunknown encoding: UFT-8r   zUFT-8r  )r   r   LookupErrorr   r   r1   r1   r2   test_unknown_encoding  r   r  c                 C  r   )	NrH   r=   rI   r  z'ascii' codec can't decode byter   asciir  r  r   r1   r1   r2   test_ascii_encoding  r   r  c                 C  s   | dddd}t |ddd}t |dd	d}t |ddd
g did}t |ddd
g did}t|| t|| t|| d S )NrH   r=   rI   r  r5   z
ISO-8859-1r7   r  r6   z
iso-8859-1rc   )ZrankZmalenameZ
femalename)r7   r  rT   rU   )rW   rX   Zdf_xpath_lxmlZdf_xpath_etreerY   rZ   r1   r1   r2   %test_parser_consistency_with_encoding  s$   

r  c                  C  sJ   d} t jtdd tt| dd d W d    d S 1 sw   Y  d S )N-<data>
  <row>
    <a>c</a>
  </row>
</data>
zencoding Noner   r5   r  )r   r   r   r   r   )r=   r1   r1   r2   test_wrong_encoding_for_lxml/  s   "r  c                  C  s4   d} t t| dd d}tddgi}t|| d S )Nr  r6   r  r   c)r   r   r   r9   rV   )r=   resultexpectedr1   r1   r2   test_none_encoding_etree<  s   r"  c                 C  sJ   | dddd}t jtdd t| W d    d S 1 sw   Y  d S )NrH   r=   rI   rJ   z7lxml not found, please install or use the etree parser.r   )r   r   ImportErrorr   r   r1   r1   r2   test_default_parser_no_lxmlL  s   
"r$  c                 C  sN   | dddd}t jtdd t|dd W d    d S 1 s w   Y  d S )	NrH   r=   rI   rJ   z,Values for parser can only be lxml or etree.r   Zbs4rK   r   r   r1   r1   r2   test_wrong_parserV  s   "r%  c                 C  s`   | dddd}| dddd}t |dddi|d	}t |d
g did}tt| tt| d S )NrH   r=   rI   r   flatten_doc.xsl.//k:Placemarkkr   r`   r   
stylesheetZ	Placemarkr%   r   )r   r9   rV   df_kml)rW   kmlxsldf_stylerp   r1   r1   r2   test_stylesheet_fileb  s   r/  c                 C  sj   | dddd}| dddd}t ||}t|dddi|d	}W d    n1 s(w   Y  tt| d S 
NrH   r=   rI   r   r&  r'  r(  r   r)  )r;   r   r9   rV   r+  )rW   r3   r,  r-  r@   r.  r1   r1   r2   test_stylesheet_file_like  s   r1  c                 C  s   | dddd}| dddd}t ||}|dkrt| }nt| }W d    n1 s/w   Y  t|ddd	i|d
}tt| d S )NrH   r=   rI   r   r&  r(   r'  r(  r   r)  )r;   r   rD   r   r   r9   rV   r+  rW   r3   r,  r-  r@   xsl_objr.  r1   r1   r2   test_stylesheet_io  s   r4  c                 C  sr   | dddd}| dddd}t ||}| }W d    n1 s"w   Y  t|dddi|d	}tt| d S r0  )r;   rD   r   r9   rV   r+  r2  r1   r1   r2   test_stylesheet_buffered_reader  s   
r5  c                  C  s,   d} d}t | }t | |d}t|| d S )Nr   u  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
 </xsl:template>

 <xsl:template match="中文標籤">
     <根>
       <xsl:apply-templates />
     </根>
 </xsl:template>

</xsl:stylesheet>r*  rU   )rI   r-  Zdf_origr.  r1   r1   r2   test_style_charset  s
   r7  c                 C  sh   ddl m} | dddd}| dddd}tj|dd	 t||d
 W d    d S 1 s-w   Y  d S )Nr   XSLTParseErrorrH   r=   rI   r   rJ   zdocument is not a stylesheetr   r6  r   r9  r   r   r   )rW   r9  r,  r-  r1   r1   r2   test_not_stylesheet  s   "r;  c                 C  ^   ddl m} d}| dddd}tj|dd	 t||d
 W d    d S 1 s(w   Y  d S )Nr   r   a  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                              xmlns:k="http://www.opengis.net/kml/2.2"/>
    <xsl:output method="xml" omit-xml-declaration="yes"
                cdata-section-elements="k:description" indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
    </xsl:template>

    <xsl:template match="k:MultiGeometry|k:LineString">
        <xsl:apply-templates select='*'/>
    </xsl:template>

    <xsl:template match="k:description|k:Snippet|k:Style"/>
</xsl:stylesheet>rH   r=   rI   r   z(Extra content at the end of the documentr   r6  )r   r   r   r   r   )rW   r   r-  r,  r1   r1   r2   test_incorrect_xsl_syntax  s   "r=  c                 C  r<  )Nr   r8  a  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                              xmlns:k="http://www.opengis.net/kml/2.2">
    <xsl:output method="xml" omit-xml-declaration="yes"
                cdata-section-elements="k:description" indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="node(*)|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
    </xsl:template>

    <xsl:template match="k:MultiGeometry|k:LineString">
        <xsl:apply-templates select='*'/>
    </xsl:template>

    <xsl:template match="k:description|k:Snippet|k:Style"/>
</xsl:stylesheet>rH   r=   rI   r   zfailed to compiler   r6  r:  )rW   r9  r-  r,  r1   r1   r2   test_incorrect_xsl_eval  s   "r>  c                 C  r<  )Nr   )XSLTApplyErrorag  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" encoding="utf-8" indent="yes" />
    <xsl:strip-space elements="*"/>

    <xsl:template match="@*|node()">
        <xsl:copy>
            <xsl:copy-of select="document('non_existent.xml')/*"/>
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>rH   r=   rI   r   zCannot resolve URIr   r6  )r   r?  r   r   r   )rW   r?  r-  r,  r1   r1   r2   test_incorrect_xsl_apply  s   "r@  c                  C  sl   ddl m}  tjddd}tjddd}tj| dd t||d	 W d    d S 1 s/w   Y  d S )
Nr   r   r=   rI   r   zflatten.xslr   r   r6  r   )r   r,  r-  r1   r1   r2   test_wrong_stylesheet5  s   "rA  c                 C  s   | dddd}| dddd}t ||%}|dkrt| }nt| }t||d |jr0J W d    d S 1 s;w   Y  d S )NrH   r=   rI   r   r&  r(   r6  )r;   r   rD   r   r   r   )rW   r3   r,  r-  r@   r3  r1   r1   r2   test_stylesheet_file_closeC  s   "rB  c                  C  sb   t jddd} t jddd}tjtdd t| d|d W d    d S 1 s*w   Y  d S )	Nr=   rI   r   r&  z*To use stylesheet, you need lxml installedr   r6   )r7   r*  )r   r?   r   r   r   r   r   )r,  r-  r1   r1   r2   test_stylesheet_with_etreeU  s   "rC  c                 C  s\   ddl m} tjddd}tj|dd t|| d W d    d S 1 s'w   Y  d S )	Nr   r   r=   rI   r   z3Document is empty|Start tag expected, '<' not foundr   r6  r   )r   r   r,  r1   r1   r2   test_empty_stylesheet`  s   "rD  c                 C  sJ   t jtdd tt| dg did W d    d S 1 sw   Y  d S )N)iterparse is designed for large XML filesr   rc   r   r    r!   daterS   )r   r   r   r   r   rK   r1   r1   r2   test_string_errorp  s   
"rH  c              	   C  s   | dddd}t ||F}|dkr@|dkr@tjtdd t||d	g d
id W d    n1 s2w   Y  	 W d    d S t||d	g d
id}W d    n1 sUw   Y  tg dg dg dg dg dd}t|| d S NrH   r=   rI   rJ   r)   r5   z.reading file objects must return bytes objectsr   rL   rM   rS   rs   rv   rw   rx   rz   r{   )r;   r   r   r   r   r   r9   rV   )rW   r7   r3   rX   r@   Zdf_fileliker}   r1   r1   r2   test_file_like_iterparse{  s<   


rJ  c           	   
   C  s4  | dddd}|dkrt nt}t||b}|| L}|dkrU|dkrUtjtdd t||d	g d
id W d    n1 sAw   Y  	 W d    W d    d S t||d	g d
id}W d    n1 sjw   Y  W d    n1 syw   Y  tg dg dg dg dg dd}t	
|| d S rI  )r   r   r;   rD   r   r   r   r   r   r9   rV   )	rW   r7   r3   rX   ZfuncIOr@   bZ	df_fileior}   r1   r1   r2   test_file_io_iterparse  sH   

rL  c                 C  sN   d}t jtdd t|| dg did W d    d S 1 s w   Y  d S )Nr   rE  r   rc   rF  rS   r   r   r   r   r   r1   r1   r2   test_url_path_error  s   
"rN  c              	   C  s   t jdd<}tj|| |d tjtdd t|| dg di|d W d    n1 s-w   Y  W d    d S W d    d S 1 sEw   Y  d S )	Nzgeom_xml.zipr^   r7   rB   rE  r   rc   rF  r7   rT   rB   )r9   r:   geom_dfrn   r   r   r   r   )r7   rF   r?   r1   r1   r2   test_compression_error  s   
"rR  c                 C  sT   | dddd}t jtdd t||g dd W d    d S 1 s#w   Y  d S )	NrH   r=   rI   rJ   z&list is not a valid type for iterparser   rM   rS   r  r   r1   r1   r2   test_wrong_dict_type  s   "rS  c                 C  sT   | dddd}t jtdd t||ddid	 W d    d S 1 s#w   Y  d S )
NrH   r=   rI   rJ   z8<class 'str'> is not a valid type for value in iterparser   rL   rN   rS   r  r   r1   r1   r2   test_wrong_dict_value  s   "rT  c              	   C  s   d}t jddO}t|d}|| W d    n1 sw   Y  tjtdd t|| dgdg d	id
 W d    n1 sBw   Y  W d    d S W d    d S 1 sZw   Y  d S )Na  <?xml version='1.0' encoding='utf-8'?>
  <row>
    <shape>square</shape>
    <degrees>00360</degrees>
    <sides>4.0</sides>
    <date>2020-01-01</date>
   </row>
  <row>
    <shape>circle</shape>
    <degrees>00360</degrees>
    <sides/>
    <date>2021-01-01</date>
  </row>
  <row>
    <shape>triangle</shape>
    <degrees>00180</degrees>
    <sides>3.0</sides>
    <date>2022-01-01</date>
  </row>
zbad.xmlr^   r8   zDExtra content at the end of the document|junk after document elementr   rG  rc   rF  )r7   Zparse_datesrT   )r9   r:   r;   r<   r   r   r   r   )r7   Zbad_xmlr?   r@   r1   r1   r2   test_bad_xml  s&   
"rU  c                 C  Z   d}t |d| d}t|| dddgid}tdd	gd
dgd}t|| t|| d S )Na-  <!-- comment before root -->
<shapes>
  <!-- comment within root -->
  <shape>
    <name>circle</name>
    <type>2D</type>
  </shape>
  <shape>
    <name>sphere</name>
    <type>3D</type>
    <!-- comment within child -->
  </shape>
  <!-- comment within root -->
</shapes>
<!-- comment after root -->r   r_   r   r'   r   rS   r   r   r   r   r'   r   r  r  r1   r1   r2   test_comment  s   rX  c                 C  rV  )Na8  <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE non-profits [
    <!ELEMENT shapes (shape*) >
    <!ELEMENT shape ( name, type )>
    <!ELEMENT name (#PCDATA)>
]>
<shapes>
  <shape>
    <name>circle</name>
    <type>2D</type>
  </shape>
  <shape>
    <name>sphere</name>
    <type>3D</type>
  </shape>
</shapes>r   r_   r   r'   r   rS   r   r   r   r   rW  r  r  r1   r1   r2   test_dtdB     rY  c                 C  rV  )Nam  <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="style.xsl"?>
<?display table-view?>
<?sort alpha-ascending?>
<?textinfo whitespace is allowed ?>
<?elementnames <shape>, <name>, <type> ?>
<shapes>
  <shape>
    <name>circle</name>
    <type>2D</type>
  </shape>
  <shape>
    <name>sphere</name>
    <type>3D</type>
  </shape>
</shapes>r   r_   r   r'   r   rS   r   r   r   r   rW  r  r  r1   r1   r2   test_processing_instructionf  rZ  r[  c                 C  X   | dddd}t jtdd t||dg did	 W d    d S 1 s%w   Y  d S )
NrH   r=   rI   rJ   z+No result from selected items in iterparse.r   nodeZattr1Zelem1Zelem2Zelem3rS   rM  r   r1   r1   r2   test_no_result  s   
"r_  c                 C  r\  )
NrH   r=   rI   rJ   zNo columns to parse from filer   rL   r^  rS   )r   r   r   r   r   r1   r1   r2   test_empty_data  s   
"r`  4https://www.w3schools.com/xml/cdcatalog_with_xsl.xmlc                  C  sR   d} d}t | dddg|d}tddd	d
ddddddddd}t|| d S )Nra  z+https://www.w3schools.com/xml/cdcatalog.xslz.//tr[td and position() <= 6]rO   artist)r`   r   r*  zEmpire BurlesquezHide your heartzGreatest HitszStill got the bluesZErosr"   z	Bob DylanzBonnie TylerzDolly Partonz
Gary MoorezEros Ramazzotti)rO   rb  r   )rI   r-  Zdf_xslr}   r1   r1   r2   test_online_stylesheet  s0   rc  c                 C  s~   t  %}tj|d| |d t|| |d}t||| dg di|d}W d    n1 s,w   Y  t |t t |t d S )NF)rb   r7   rB   rO  rc   r   rP  )r9   r:   rQ  rn   r   rG   rV   )r7   rF   rE   ro   rp   r1   r1   r2   test_compression_read  s   

rd  c              	   C  s
  |}|}||kr
d S t dft dftdftdfd}tddd}|d ur*|jd	f|d
< tddd}|d ur:tdf|d< || \}}	t 7}
tj	|
| |d t
j||	d t|
| |d W d    n1 sfw   Y  W d    d S W d    d S 1 s~w   Y  d S )NzInvalid data streamzNot a gzipped filezFile is not a zip filez%file could not be opened successfully)bz2gzipziptarZ	zstandardignore)errorszUnknown frame descriptorzstdlzmaz%Input format not supported by decoderxzrO  r   )OSErrorr	   r   r   Z	ZstdErrorr   r9   r:   rQ  rn   r   r   r   )r7   rB   rF   Zactual_compressionZattempted_compressionrj  rk  rl  Z	error_clsZ	error_strr?   r1   r1   r2   test_wrong_compression  s0   
"ro  c              	   C  sz   t jtdd, t }t|| dd W d    n1 sw   Y  W d    d S W d    d S 1 s6w   Y  d S )NzUnrecognized compression typer   Z7zrO  )r   r   r   r9   r:   r   )r7   r?   r1   r1   r2   test_unsuported_compression  s   
"rp  Zs3fsz02022.1.17: Hanging on the CI min versions build.)reasonc                  C  sH   d} t | dddidddid}t | dddid	ddid}t|| d S )
Nz/s3://irs-form-990/201923199349319487_public.xmlz .//irs:Form990PartVIISectionAGrpZirszhttp://www.irs.gov/efiler5   ZanonT)r`   r   r7   Zstorage_optionsr6   rU   )Zs3r   r   r1   r1   r2   test_s3_parser_consistency  s    
rr  c                   sv  d}|dkrt tjddgtjd}t tjdtgtjd}ntdtddg}tdd g}t	d| t
|| |d}W d    n1 sNw   Y  t|td	d
gddtddgdd|td
tgddtdtgddtttgddtddgddtdtgddd	|dkrtdddlm  t fddjD  d d gd< t| d S )Na;  <?xml version='1.0' encoding='utf-8'?>
<data xmlns="http://example.com">
<row>
  <a>x</a>
  <b>1</b>
  <c>4.0</c>
  <d>x</d>
  <e>2</e>
  <f>4.0</f>
  <g></g>
  <h>True</h>
  <i>False</i>
</row>
<row>
  <a>y</a>
  <b>2</b>
  <c>5.0</c>
  <d></d>
  <e></e>
  <f></f>
  <g></g>
  <h>False</h>
  <i></i>
</row>
</data>pythonxrk   )ZdtypeZpyarrowzmode.string_storage)r7   dtype_backendr#   r$   ZInt64r   g      @ZFloat64TFboolean)	r   rK  r  der@   ri   hir   )ArrowExtensionArrayc                   s$   i | ]}| j | d dqS )T)Zfrom_pandas)array).0colr{  r!  par1   r2   
<dictcomp>\  s    z1test_read_xml_nullable_dtypes.<locals>.<dictcomp>ri   )r   npr|  Zobject_r   r   Zimportorskipr   pdZoption_contextr   r   r   Zpandas.arraysr{  columnsr9   rV   )r7   Zstring_storageru  r=   Zstring_arrayZstring_array_nar   r1   r  r2   test_read_xml_nullable_dtypes!  s@   

r  c                  C  sD   d} t jt| d tddd W d    d S 1 sw   Y  d S )NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r   testnumpy)ru  r   )r   r1   r1   r2   test_invalid_dtype_backendf  s
   "r  )
__future__r   rH   r   r   rl  r   r   tarfiler   urllib.errorr   Zxml.etree.ElementTreer   zipfiler	   r  r  r   Zpandas.compatr
   Zpandas.compat._optionalr   Zpandas.errorsr   r   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandasr  r   r   r   Zpandas._testingZ_testingr9   Zpandas.core.arraysr   r   Zpandas.io.commonr   Zpandas.io.xmlr   r   rQ  r   r   r+  Zfixturer3   r.   Z
skip_if_nor7   rA   rG   r[   marknetworkZslowrq   r~   r   r   r   r   r   r   Zparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r  r  r"  Zskip_if_installedr$  r%  r/  r1  r4  r5  r7  r;  r=  r>  r@  rA  rB  rC  rD  rH  rJ  rL  rN  rR  rS  rT  rU  rX  rY  r[  r_  r`  rc  rd  ro  rp  Zskipifrr  r  r  r1   r1   r1   r2   <module>   s   6
	m


+
	


	





	
$#(1



	













$(
)#$$
$	E