
    Mhz                       S r SSKJr  SSKJr  SSKrSSKrSSKJr  SSKJ	r	J
r
Jr  SSKrSSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSK J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'J(r(J)r)J*r*J+r+J,r,J-r-  SSK.J/r/  SSK0J1r1  \	(       a  SSK2J3r3J4r4  SSK5J6r6J7r7J8r8J9r9J:r:J;r;  SSKJ<r<  \Rz                  " S5      r>\>4S6S jjr?S7S jr@        S8S jrA " S S5      rB " S  S!\B5      rCS9S" jrDS#S$0rE " S% S&\B5      rFS:S' jrGS( rH\F\F\C\CS).rIS;S* jrJS9S+ jrKS, rLS- rM\" \%S   S.9S/SSSSSS0S1SS2SSS3S3S\R                  SS4.                                     S<S5 jj5       rOg)=zW
:mod:`pandas.io.html` is a module containing functionality for dealing with
HTML IO.

    )annotations)abcN)Pattern)TYPE_CHECKINGLiteralcast)lib)import_optional_dependency)AbstractMethodErrorEmptyDataError)doc)find_stack_level)check_dtype_backend)is_list_likeisna)Index)
MultiIndex)Series)_shared_docs)file_exists
get_handleis_file_likeis_fsspec_urlis_urlstringify_pathvalidate_header_arg)pprint_thing)
TextParser)IterableSequence)
BaseBufferDtypeBackendFilePathHTMLFlavors
ReadBufferStorageOptions)	DataFramez[\r\n]+|\s{2,}c                B    UR                  SU R                  5       5      $ )aZ  
Replace extra whitespace inside of a string with a single space.

Parameters
----------
s : str or unicode
    The string from which to remove extra whitespace.
regex : re.Pattern
    The regular expression to use to remove extra whitespace.

Returns
-------
subd : str or unicode
    `s` with all extra whitespace replaced with a single space.
 )substrip)sregexs     @/var/www/html/env/lib/python3.13/site-packages/pandas/io/html.py_remove_whitespacer0   I   s      99S!'')$$    c                |   [        U [        5      (       aH  U R                  =(       d    SU R                  =(       d    Sp![	        [        XR                  U5      5      $ [        U [        R                  5      (       d  [        U 5      (       a  [        SU 5      $ U c  g[        [        U 5      R                   S35      e)aV  
Get an iterator given an integer, slice or container.

Parameters
----------
skiprows : int, slice, container
    The iterator to use to skip rows; can also be a slice.

Raises
------
TypeError
    * If `skiprows` is not a slice, integer, or Container

Returns
-------
it : iterable
    A proper iterator to use to skip rows of a DataFrame.
r      int | Sequence[int]z& is not a valid type for skipping rows)
isinstanceslicestartsteplistrangestopnumbersIntegralr   r   	TypeErrortype__name__)skiprowsr7   r8   s      r/   _get_skiprowsrB   \   s    & (E""nn)8==+=AtE%566	Hg..	/	/<3I3I)844		
tH~..//UV
WWr1   storage_optionsc                   [        U 5      (       d6  [        U S5      (       d%  [        U [        5      (       a@  [	        U 5      (       a0  [        U SXS9 nUR                  R                  5       nSSS5        U$ [        U [        [        45      (       a  U nU$ [        S[        U 5      R                   S35      e! , (       d  f       W$ = f)z
Try to read from a url, file or string.

Parameters
----------
obj : str, unicode, path object, or file-like object

Returns
-------
raw_text : str
readr)encodingrC   NzCannot read object of type '')r   hasattrr5   strr   r   handlerE   bytesr>   r?   r@   )objrG   rC   handlestexts        r/   _readrP   y   s    $ 	s3sC  [%5%5x
>>&&(D
 K	 
C#u	&	& K 6tCy7I7I6J!LMM
 
 Ks   B88
Cc                      \ rS rSrSr S               SS jjrS rS rSS jrS r	S	 r
S
 rS rS rS rSS jrS rS r  SS jrSS jrSrg)_HtmlFrameParser   a  
Base class for parsers that parse HTML into DataFrames.

Parameters
----------
io : str or file-like
    This can be either a string of raw HTML, a valid URL using the HTTP,
    FTP, or FILE protocols or a file-like object.

match : str or regex
    The text to match in the document.

attrs : dict
    List of HTML <table> element attributes to match.

encoding : str
    Encoding to be used by parser

displayed_only : bool
    Whether or not items with "display:none" should be ignored

extract_links : {None, "all", "header", "body", "footer"}
    Table elements in the specified section(s) with <a> tags will have their
    href extracted.

    .. versionadded:: 1.5.0

Attributes
----------
io : str or file-like
    raw HTML, URL, or file-like object

match : regex
    The text to match in the raw HTML

attrs : dict-like
    A dictionary of valid table attributes to use to search for table
    elements.

encoding : str
    Encoding to be used by parser

displayed_only : bool
    Whether or not items with "display:none" should be ignored

extract_links : {None, "all", "header", "body", "footer"}
    Table elements in the specified section(s) with <a> tags will have their
    href extracted.

    .. versionadded:: 1.5.0

Notes
-----
To subclass this class effectively you must override the following methods:
    * :func:`_build_doc`
    * :func:`_attr_getter`
    * :func:`_href_getter`
    * :func:`_text_getter`
    * :func:`_parse_td`
    * :func:`_parse_thead_tr`
    * :func:`_parse_tbody_tr`
    * :func:`_parse_tfoot_tr`
    * :func:`_parse_tables`
    * :func:`_equals_tag`
See each method's respective documentation for details on their
functionality.
Nc                X    Xl         X l        X0l        X@l        XPl        X`l        Xpl        g N)iomatchattrsrG   displayed_onlyextract_linksrC   )selfrV   rW   rX   rG   rY   rZ   rC   s           r/   __init___HtmlFrameParser.__init__   s*     

 ,*.r1   c                   ^  T R                  T R                  5       T R                  T R                  5      nU 4S jU 5       $ )zv
Parse and return all tables from the DOM.

Returns
-------
list of parsed (header, body, footer) tuples from tables.
c              3  F   >#    U  H  nTR                  U5      v   M     g 7frU   )_parse_thead_tbody_tfoot).0tabler[   s     r/   	<genexpr>0_HtmlFrameParser.parse_tables.<locals>.<genexpr>   s     I&--e44&s   !)_parse_tables
_build_docrW   rX   )r[   tabless   ` r/   parse_tables_HtmlFrameParser.parse_tables   s3     ##DOO$5tzz4::NI&IIr1   c                $    UR                  U5      $ )z
Return the attribute value of an individual DOM node.

Parameters
----------
obj : node-like
    A DOM node.

attr : str or unicode
    The attribute, such as "colspan"

Returns
-------
str or unicode
    The attribute value.
)get)r[   rM   attrs      r/   _attr_getter_HtmlFrameParser._attr_getter   s    $ wwt}r1   c                    [        U 5      e)z
Return a href if the DOM node contains a child <a> or None.

Parameters
----------
obj : node-like
    A DOM node.

Returns
-------
href : str or unicode
    The href from the <a> child of the DOM node.
r   r[   rM   s     r/   _href_getter_HtmlFrameParser._href_getter       "$''r1   c                    [        U 5      e)z
Return the text of an individual DOM node.

Parameters
----------
obj : node-like
    A DOM node.

Returns
-------
text : str or unicode
    The text from an individual DOM node.
rp   rq   s     r/   _text_getter_HtmlFrameParser._text_getter   rt   r1   c                    [        U 5      e)z
Return the td elements from a row element.

Parameters
----------
obj : node-like
    A DOM <tr> node.

Returns
-------
list of node-like
    These are the elements of each row, i.e., the columns.
rp   rq   s     r/   	_parse_td_HtmlFrameParser._parse_td0  rt   r1   c                    [        U 5      e)z
Return the list of thead row elements from the parsed table element.

Parameters
----------
table : a table element that contains zero or more thead elements.

Returns
-------
list of node-like
    These are the <tr> row elements of a table.
rp   r[   rb   s     r/   _parse_thead_tr _HtmlFrameParser._parse_thead_tr@       "$''r1   c                    [        U 5      e)a  
Return the list of tbody row elements from the parsed table element.

HTML5 table bodies consist of either 0 or more <tbody> elements (which
only contain <tr> elements) or 0 or more <tr> elements. This method
checks for both structures.

Parameters
----------
table : a table element that contains row elements.

Returns
-------
list of node-like
    These are the <tr> row elements of a table.
rp   r|   s     r/   _parse_tbody_tr _HtmlFrameParser._parse_tbody_trO      " "$''r1   c                    [        U 5      e)z
Return the list of tfoot row elements from the parsed table element.

Parameters
----------
table : a table element that contains row elements.

Returns
-------
list of node-like
    These are the <tr> row elements of a table.
rp   r|   s     r/   _parse_tfoot_tr _HtmlFrameParser._parse_tfoot_trb  r   r1   c                    [        U 5      e)a  
Return all tables from the parsed DOM.

Parameters
----------
document : the DOM from which to parse the table element.

match : str or regular expression
    The text to search for in the DOM tree.

attrs : dict
    A dictionary of table attributes that can be used to disambiguate
    multiple tables on a page.

Raises
------
ValueError : `match` does not match any text in the document.

Returns
-------
list of node-like
    HTML <table> elements to be parsed into raw data.
rp   )r[   documentrW   rX   s       r/   re   _HtmlFrameParser._parse_tablesq  s    0 "$''r1   c                    [        U 5      e)z
Return whether an individual DOM node matches a tag

Parameters
----------
obj : node-like
    A DOM node.

tag : str
    Tag name to be checked for equality.

Returns
-------
boolean
    Whether `obj`'s tag name is `tag`
rp   r[   rM   tags      r/   _equals_tag_HtmlFrameParser._equals_tag  r   r1   c                    [        U 5      e)z
Return a tree-like object that can be used to iterate over the DOM.

Returns
-------
node-like
    The DOM from which to parse the table element.
rp   )r[   s    r/   rf   _HtmlFrameParser._build_doc  s     "$''r1   c                  ^  T R                  U5      nT R                  U5      nT R                  U5      nU 4S jnU(       dP  U(       aI  U" US   5      (       a9  UR                  UR	                  S5      5        U(       a  U" US   5      (       a  M9  T R                  USS9nT R                  USS9nT R                  USS9nXgU4$ )au  
Given a table, return parsed header, body, and foot.

Parameters
----------
table_html : node-like

Returns
-------
tuple of (header, body, footer), each a list of list-of-text rows.

Notes
-----
Header and body are lists-of-lists. Top level list is a list of
rows. Each row is a list of str text.

Logic: Use <thead>, <tbody>, <tfoot> elements to identify
       header, body, and footer, otherwise:
       - Put all rows into body
       - Move rows from top of body to header only if
         all elements inside row are <th>
       - Move rows from bottom of body to footer only if
         all elements inside row are <th>
c                L   > [        U4S jTR                  U 5       5       5      $ )Nc              3  H   >#    U  H  nTR                  US 5      v   M     g7f)thN)r   )ra   tr[   s     r/   rc   S_HtmlFrameParser._parse_thead_tbody_tfoot.<locals>.row_is_all_th.<locals>.<genexpr>  s#     N:MQt''400:Ms   ")allry   )rowr[   s    r/   row_is_all_th@_HtmlFrameParser._parse_thead_tbody_tfoot.<locals>.row_is_all_th  s    N$..:MNNNr1   r   header)sectionbodyfooter)r}   r   r   appendpop_expand_colspan_rowspan)	r[   
table_htmlheader_rows	body_rowsfooter_rowsr   r   r   r   s	   `        r/   r`   )_HtmlFrameParser._parse_thead_tbody_tfoot  s    2 **:6((4	**:6	O  il ; ;""9==#34 il ; ; --k8-L++Iv+F--k8-LV##r1   c           	     <   / n/ nU GH  n/ n/ nSnU R                  U5      n	U	 GH>  n
U(       ag  US   S   U::  a[  UR                  S5      u  pnUR                  U5        US:  a  UR                  XUS-
  45        US-  nU(       a  US   S   U::  a  M[  [        U R	                  U
5      5      nU R
                  SU4;   a  U R                  U
5      nX4n[        U R                  U
S5      =(       d    S5      n[        U R                  U
S5      =(       d    S5      n[        U5       H5  nUR                  U5        US:  a  UR                  XUS-
  45        US-  nM7     GMA     U H5  u  pnUR                  U5        US:  d  M  UR                  XUS-
  45        M7     UR                  U5        UnGM     U(       a[  / n/ nU H5  u  pnUR                  U5        US:  d  M  UR                  XUS-
  45        M7     UR                  U5        UnU(       a  M[  U$ )a  
Given a list of <tr>s, return a list of text rows.

Parameters
----------
rows : list of node-like
    List of <tr>s
section : the section that the rows belong to (header, body or footer).

Returns
-------
list of list
    Each returned row is a list of str text, or tuple (text, link)
    if extract_links is not None.

Notes
-----
Any cell with ``rowspan`` or ``colspan`` will have its contents copied
to subsequent cells.
r   r3   r   rowspancolspan)
ry   r   r   r0   rv   rZ   rr   intrm   r:   )r[   rowsr   	all_texts	remaindertrtextsnext_remainderindextdstdprev_i	prev_textprev_rowspanrO   hrefr   r   _s                      r/   r   (_HtmlFrameParser._expand_colspan_rowspan  s3   . 	  	 BENE..$C  IaLOu$<6?mmA6F3F|LL+#a'&--v,QRBR.STQJE  IaLOu$< *$*;*;B*?@%%%)99,,R0D <Dd//I>C!Dd//I>C!DwALL&{&--uGaK.HIQJE	 (% 2 4=/<Y'!#"))6lQ>N*OP 4=
 U#&IK R NE3</<Y'!#"))6lQ>N*OP 4= U#&I i r1   c                    U R                   (       d  U$ U Vs/ s H6  nS[        X25      R                  SS5      R                  SS5      ;  d  M4  UPM8     sn$ s  snf )a>  
Return list of tables, potentially removing hidden elements

Parameters
----------
tbl_list : list of node-like
    Type of list elements will vary depending upon parser used
attr_name : str
    Name of the accessor for retrieving HTML attributes

Returns
-------
list of node-like
    Return type matches `tbl_list`
display:nonestyle r*   )rY   getattrrk   replace)r[   tbl_list	attr_namexs       r/   _handle_hidden_tables&_HtmlFrameParser._handle_hidden_tables(  sd      ""O 
1(,,Wb9AA#rJK 
 	
 
s   3AA)rX   rY   rG   rZ   rV   rW   rC   rU   )rV   z.FilePath | ReadBuffer[str] | ReadBuffer[bytes]rW   str | PatternrX   dict[str, str] | NonerG   rJ   rY   boolrZ   0Literal[None, 'header', 'footer', 'body', 'all']rC   r'   returnNoner   
str | Noner   r   )r   z#Literal['header', 'footer', 'body'])r   rJ   )r@   
__module____qualname____firstlineno____doc__r\   rh   rm   rr   rv   ry   r}   r   r   re   r   rf   r`   r   r   __static_attributes__ r1   r/   rR   rR      s    BX +//:/ / %	/
 / / H/ (/ 
/$	J(( ( ( ((&((4(&	(+$ZP@Pd
r1   rR   c                  \    \ rS rSrSrS rSS jrS rSS jrS r	S r
S	 rS
 rS rS rSrg)!_BeautifulSoupHtml5LibFrameParseriC  a  
HTML to DataFrame parser that uses BeautifulSoup under the hood.

See Also
--------
pandas.io.html._HtmlFrameParser
pandas.io.html._LxmlFrameParser

Notes
-----
Documentation strings for this class are in the base class
:class:`pandas.io.html._HtmlFrameParser`.
c                >   SnUR                  XCS9nU(       d  [        S5      e/ n[        5       nU R                  US5      nU H  nU R                  (       ab  UR                  S5       H  n	U	R                  5         M     UR                  [        R                  " S5      S9 H  n	U	R                  5         M     X;  a!  UR                  US9b  UR                  U5        UR                  U5        M     U(       d!  [        S	[        UR                  5       35      eU$ )
Nrb   )rX   zNo tables foundrX   r   zdisplay:\s*none)r   )stringz!No tables found matching pattern )find_all
ValueErrorsetr   rY   	decomposerecompilefindr   addreprpattern)
r[   r   rW   rX   element_namerg   resultunique_tablesrb   elems
             r/   re   /_BeautifulSoupHtml5LibFrameParser._parse_tablesR  s    ""<"=.//++FG<E""!NN73DNN$ 4 "NN<N1ONPDNN$ Q )ejjj.F.Re$e$  @emmAT@UVWWr1   c                >    UR                  SSS9nU(       d  S $ US   $ )NaT)r   r   )r   )r[   rM   r   s      r/   rr   ._BeautifulSoupHtml5LibFrameParser._href_getterk  s%    HHStH$t+!F)+r1   c                    UR                   $ rU   )rO   rq   s     r/   rv   ._BeautifulSoupHtml5LibFrameParser._text_gettero  s    xxr1   c                     UR                   U:H  $ rU   )namer   s      r/   r   -_BeautifulSoupHtml5LibFrameParser._equals_tagr  s    xx3r1   c                "    UR                  SSS9$ )N)r   r   F	recursive)r   r[   r   s     r/   ry   +_BeautifulSoupHtml5LibFrameParser._parse_tdu  s    ||LE|::r1   c                $    UR                  S5      $ )Nzthead trselectr|   s     r/   r}   1_BeautifulSoupHtml5LibFrameParser._parse_thead_trx      ||J''r1   c                L    UR                  S5      nUR                  SSS9nX#-   $ )Nztbody trr   Fr   )r   r   r[   rb   
from_tbody	from_roots       r/   r   1_BeautifulSoupHtml5LibFrameParser._parse_tbody_tr{  s+    \\*-
NN45N9	%%r1   c                $    UR                  S5      $ )Nztfoot trr   r|   s     r/   r   1_BeautifulSoupHtml5LibFrameParser._parse_tfoot_tr  r   r1   c                    [        U R                  U R                  U R                  5      nU(       d  [	        SU R                   35      eU$ )NzNo text parsed from document: )rP   rV   rG   rC   r   )r[   raw_texts     r/   _setup_build_doc2_BeautifulSoupHtml5LibFrameParser._setup_build_doc  s=    $--1E1EF=dggYGHHr1   c                J   SSK Jn  U R                  5       n[        U[        5      (       a+  U R
                  b  UR                  U R
                  5      nS nOUnU R
                  nU" USUS9nUR                  S5       H!  nUR                  SUR                  -   5        M#     U$ )Nr   )BeautifulSouphtml5lib)featuresfrom_encodingbr
)
bs4r  r  r5   rL   rG   decoder   replace_withrO   )r[   r  bdocudocr  soupr	  s          r/   rf   ,_BeautifulSoupHtml5LibFrameParser._build_doc  s    %$$&dE""t}}'@;;t}}-D MD MMMTJmT--%BOOD277N+ & r1   r   Nr   r   )r@   r   r   r   r   re   rr   rv   r   ry   r}   r   r   r  rf   r   r   r1   r/   r   r   C  s9    2,;(&(r1   r   c                    SU ;   a  U R                  S5      U S'   SR                  U R                  5        VVs/ s H  u  pSU S[        U5       3PM     snn5      nSU S3$ s  snnf )aF  
Build an xpath expression to simulate bs4's ability to pass in kwargs to
search for attributes when using the lxml parser.

Parameters
----------
attrs : dict
    A dict of HTML attributes. These are NOT checked for validity.

Returns
-------
expr : unicode
    An XPath expression that checks for the given HTML attributes.
class_classz and @=[])r   joinitemsr   )rX   kvr-   s       r/   _build_xpath_exprr    sg      58,gEKKMBMDA!Ad1gY'MBCAqc8O Cs   A$
r   z$http://exslt.org/regular-expressionsc                  V    \ rS rSrSrSS jrS rS rS rSS jr	S r
S	 rS
 rS rSrg)_LxmlFrameParseri  a%  
HTML to DataFrame parser that uses lxml under the hood.

Warning
-------
This parser can only handle HTTP, FTP, and FILE urls.

See Also
--------
_HtmlFrameParser
_BeautifulSoupLxmlFrameParser

Notes
-----
Documentation strings for this class are in the base class
:class:`_HtmlFrameParser`.
c                @    UR                  S5      nU(       d  S $ US   $ )Nz
.//a/@hrefr   xpath)r[   rM   r   s      r/   rr   _LxmlFrameParser._href_getter  s!    yy&t,T!W,r1   c                "    UR                  5       $ rU   )text_contentrq   s     r/   rv   _LxmlFrameParser._text_getter  s    !!r1   c                $    UR                  S5      $ )N	./td|./thr!  r   s     r/   ry   _LxmlFrameParser._parse_td  s     yy%%r1   c                &   UR                   nS[        U5       S3nU(       a  U[        U5      -  nUR                  U[        S9nU R                  US5      nU R                  (       a  U H  nUR                  S5       H  nUR                  5         M     UR                  S5       HE  nSUR                  R                  SS	5      R                  S
S	5      ;   d  M5  UR                  5         MG     M     U(       d  [        S[        U5       35      eU$ )Nz//table[.//text()[re:test(., z)]])
namespacesattribz.//stylez.//*[@style]r   r   r   r*   zNo tables found matching regex )r   r   r  r"  _re_namespacer   rY   	drop_treer,  rk   r   r   )	r[   r   rW   kwargsr   
xpath_exprrg   rb   r   s	            r/   re   _LxmlFrameParser._parse_tables  s    -- 5T']O3G
 +F33J
}E++FH= "KK
3DNN$ 4!KK7D%")E)M)McSU)VV( 8   >tG}oNOOr1   c                     UR                   U:H  $ rU   )r   r   s      r/   r   _LxmlFrameParser._equals_tag  s    ww#~r1   c                   SSK Jn  SSKJnJnJn  U" SU R                  S9n [        U R                  5      (       a:  [        U R                  SU R                  S9 nU" UR                  US9nS	S	S	5        OU" U R                  US9n WR                  5       n[        WS
5      (       d  U" SSSS5      e UR#                  S5       H   n	SU	R$                  =(       d    S-   U	l        M"     U$ ! , (       d  f       Ns= f! [         a     Nqf = f! [        [         4 a\  n[        U R                  5      (       d;  U" U R                  US9n UR                  5       n S	nAN! [         a      S	nANf = fUeS	nAff = f)a;  
Raises
------
ValueError
    * If a URL that lxml cannot parse is passed.

Exception
    * Any other ``Exception`` thrown. For example, trying to parse a
      URL that is syntactically correct on a machine with no internet
      connection will fail.

See Also
--------
pandas.io.html._HtmlFrameParser._build_doc
r   )XMLSyntaxError)
HTMLParser
fromstringparseT)recoverrG   rF   rC   )parserNr%  zno text parsed from documentz*//brr
  r   )
lxml.etreer5  	lxml.htmlr6  r7  r8  rG   r   rV   r   rC   rK   getrootAttributeErrorrI   UnicodeDecodeErrorOSErrorr"  tail)
r[   r5  r6  r7  r8  r;  frF   er	  s
             r/   rf   _LxmlFrameParser._build_doc  s\     	.	
 	
 D4==A	NdggGGS$2F2Fahhv6A  $''&1IIK 1n--$%CQ1MM . '''"Bbggm,BG # ;  " "G, 
	$''??twwv6		A%  
	sk   9D C-.D 	C> -
C;7D >
DD 
DD E:+E5
E
E0)E5/E00E55E:c                    / nUR                  S5       HN  nUR                  UR                  S5      5        UR                  S5      nU(       d  M=  UR                  U5        MP     U$ )Nz.//thead./trr(  )r"  extendr   )r[   rb   r   theadelements_at_roots        r/   r}    _LxmlFrameParser._parse_thead_tr0  sY    [[,EKKF+,  %{{;7E" - r1   c                N    UR                  S5      nUR                  S5      nX#-   $ )Nz.//tbody//trrG  r!  r   s       r/   r    _LxmlFrameParser._parse_tbody_trB  s'    [[0
KK'	%%r1   c                $    UR                  S5      $ )Nz.//tfoot//trr!  r|   s     r/   r    _LxmlFrameParser._parse_tfoot_trH  s    {{>**r1   r   Nr   r   )r@   r   r   r   r   rr   rv   ry   re   r   rf   r}   r   r   r   r   r1   r/   r  r    s5    $-"&
:8t$&+r1   r  c                    U  Vs/ s H  n[        U5      PM     nn[        U5      nUR                  5       nX3U:g     nS/nUR                  5        H  u  pxX==   XdU-
  -  -  ss'   M     g s  snf )Nr   )lenr   maxr  )	r   r   datalenslens_maxnot_maxemptyindlengths	            r/   _expand_elementsrZ  L  sm    "&'$$CI$D'$<DxxzH8#$GDE}}	U/00	 ' (s   A,c                    U R                  S5      u  pnU R                  S5      n[        U S   5      U S'   U(       aQ  X-   nUcJ  [        U5      S:X  a  SnO8[        U5       VVs/ s H   u  pV[	        S U 5       5      (       d  M  UPM"     nnnU(       a  X#-  n[        U5        [        U4SU0U D6 nUR                  5       sS S S 5        $ s  snnf ! , (       d  f       g = f)NrS  r   rA   r3   r   c              3  $   #    U  H  ov   M     g 7frU   r   )ra   rO   s     r/   rc   !_data_to_frame.<locals>.<genexpr>d  s     @VRU$RUs   )r   rB   rQ  	enumerateanyrZ  r   rE   )r/  headr   footr   ir   tps           r/   _data_to_framerd  W  s    zz&)DZZ!F&vj'9:F:{ >4yA~ +4D/W/S@VRU@V=V!/W T	D	2	26	2bwwy 
3	2 X 
3	2s   %CC3C
C!)lxmlNr  r  c                    [        [        R                  5       5      nX;  a  [        [	        U 5       SU 35      eU S;   a  [        S5        [        S5        [        U    $ [        S5        [        U    $ )a  
Choose the parser based on the input flavor.

Parameters
----------
flavor : {{"lxml", "html5lib", "bs4"}} or None
    The type of parser to use. This must be a valid backend.

Returns
-------
cls : _HtmlFrameParser subclass
    The parser class based on the requested input flavor.

Raises
------
ValueError
    * If `flavor` is not a valid backend.
ImportError
    * If you do not have the requested `flavor`
z* is not a valid flavor, valid flavors are )r  r  r  r  z
lxml.etree)r9   _valid_parserskeysr   r   r
   )flavorvalid_parserss     r/   _parser_dispatchrk  w  sz    * ,,./M"F|nF}oV
 	
 $$":."5) &!! 	#<0&!!r1   c                n    SR                  U  Vs/ s H  n[        U5      PM     sn5      nSU S3$ s  snf )Nz, {})r  r   )r-   elargs      r/   _print_as_setrq    s7    
))2"\"%2
3CuB< 3s   2c                   U c  Sn O[        U [        5      (       a  U 4n O[        U [        R                  5      (       aC  [	        S U  5       5      (       d+  [        S[        [        U 5      R                  5       S35      eO;[        U [        5      (       a  [        U 5      O
[        U 5      nUS-  n[        U5      e[        U 5      n [        [        5      n[        U 5      nX2-  (       d"  [        [        U5       S[        U5       35      eU $ )N)re  r  c              3  B   #    U  H  n[        U[        5      v   M     g 7frU   )r5   rJ   )ra   flavs     r/   rc   #_validate_flavor.<locals>.<genexpr>  s     <VT:dC((Vs   zObject of type z is not an iterable of stringsz is not a valid flavorz2 is not a valid set of flavors, valid flavors are )r5   rJ   r   r    r   r>   r   r?   r@   r   tupler   rg  rq  )ri  msgvalid_flavors
flavor_sets       r/   _validate_flavorrz    s    ~	FC	 	 	FCLL	)	)<V<<<!$tF|'<'<"=!> ?0 1  = )55d6l3v;''o6]F'MVJ%Z() *(78:
 	
 Mr1   c           
        [        U 5      n [        R                  " U5      n	S n
U  H-  n[        U5      nU" UU	UUUUU5      n UR	                  5       n  O	   U
c   eU
e/ nU Hg  n [        S
SU0UD6nUS;   a?  [        UR                  [        5      (       a   [        S UR                   5       SS	9Ul        UR                  U5        Mi     U$ ! [
         a|  n[        US5      (       a'  UR                  5       (       a  UR                  S5        O6[        US5      (       a%  UR                  5       (       d  [        SU S35      UeUn
 S nAGM*  S nAff = f! [         a     GM  f = f)Nseekabler   zThe flavor z failed to parse your input. Since you passed a non-rewindable file object, we can't rewind it to try another parser. Try read_html() with a different flavor.rS  )r   r   c              3  ^   #    U  H#  oS    [        US   5      (       a  SOUS   4v   M%     g7f)r   r3   Nr   )ra   cols     r/   rc   _parse.<locals>.<genexpr>  s)     V:C!fd3q6lldA?:s   +-F)tupleize_colsr   )rz  r   r   rk  rh   r   rI   r|  seekrd  r5   columnsr   r   r   r   )ri  rV   rW   rX   rG   rY   rZ   rC   r/  compiled_matchretainedrt  r;  prg   caughtretrb   dfs                      r/   _parser    s    f%FZZ&NH!$'
	^^%F" = @ ###
C	5U5f5B  11j

J7 7 #V2::V"'

 JJrN " JO  	 r:&&2;;==
Z(( !$ (O O
  H	J  		s,   C&A#E
EA0EE
E'&E'r:  z.+F,.T)rW   ri  r   	index_colrA   rX   parse_dates	thousandsrG   decimal
converters	na_valueskeep_default_narY   rZ   dtype_backendrC   c               *   [        U[        R                  5      (       a  US:  a  [        S5      eUS;  a  [        SU S35      e[	        U5        [        U5        [        U 5      n [        U [        5      (       aZ  [        [        U 5      [        U 5      [        U 5      [        U 5      /5      (       d"  [        R                  " S[        [!        5       S9  [#        S0 SU_S	U _S
U_SU_SU_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_6$ )a  
Read HTML tables into a ``list`` of ``DataFrame`` objects.

Parameters
----------
io : str, path object, or file-like object
    String, path object (implementing ``os.PathLike[str]``), or file-like
    object implementing a string ``read()`` function.
    The string can represent a URL or the HTML itself. Note that
    lxml only accepts the http, ftp and file url protocols. If you have a
    URL that starts with ``'https'`` you might try removing the ``'s'``.

    .. deprecated:: 2.1.0
        Passing html literal strings is deprecated.
        Wrap literal string/bytes input in ``io.StringIO``/``io.BytesIO`` instead.

match : str or compiled regular expression, optional
    The set of tables containing text matching this regex or string will be
    returned. Unless the HTML is extremely simple you will probably need to
    pass a non-empty string here. Defaults to '.+' (match any non-empty
    string). The default value will return all tables contained on a page.
    This value is converted to a regular expression so that there is
    consistent behavior between Beautiful Soup and lxml.

flavor : {{"lxml", "html5lib", "bs4"}} or list-like, optional
    The parsing engine (or list of parsing engines) to use. 'bs4' and
    'html5lib' are synonymous with each other, they are both there for
    backwards compatibility. The default of ``None`` tries to use ``lxml``
    to parse and if that fails it falls back on ``bs4`` + ``html5lib``.

header : int or list-like, optional
    The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
    make the columns headers.

index_col : int or list-like, optional
    The column (or list of columns) to use to create the index.

skiprows : int, list-like or slice, optional
    Number of rows to skip after parsing the column integer. 0-based. If a
    sequence of integers or a slice is given, will skip the rows indexed by
    that sequence.  Note that a single element sequence means 'skip the nth
    row' whereas an integer means 'skip n rows'.

attrs : dict, optional
    This is a dictionary of attributes that you can pass to use to identify
    the table in the HTML. These are not checked for validity before being
    passed to lxml or Beautiful Soup. However, these attributes must be
    valid HTML table attributes to work correctly. For example, ::

        attrs = {{'id': 'table'}}

    is a valid attribute dictionary because the 'id' HTML tag attribute is
    a valid HTML attribute for *any* HTML tag as per `this document
    <https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::

        attrs = {{'asdf': 'table'}}

    is *not* a valid attribute dictionary because 'asdf' is not a valid
    HTML attribute even if it is a valid XML attribute.  Valid HTML 4.01
    table attributes can be found `here
    <http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A
    working draft of the HTML 5 spec can be found `here
    <https://html.spec.whatwg.org/multipage/tables.html>`__. It contains the
    latest information on table attributes for the modern web.

parse_dates : bool, optional
    See :func:`~read_csv` for more details.

thousands : str, optional
    Separator to use to parse thousands. Defaults to ``','``.

encoding : str, optional
    The encoding used to decode the web page. Defaults to ``None``.``None``
    preserves the previous encoding behavior, which depends on the
    underlying parser library (e.g., the parser library will try to use
    the encoding provided by the document).

decimal : str, default '.'
    Character to recognize as decimal point (e.g. use ',' for European
    data).

converters : dict, default None
    Dict of functions for converting values in certain columns. Keys can
    either be integers or column labels, values are functions that take one
    input argument, the cell (not column) content, and return the
    transformed content.

na_values : iterable, default None
    Custom NA values.

keep_default_na : bool, default True
    If na_values are specified and keep_default_na is False the default NaN
    values are overridden, otherwise they're appended to.

displayed_only : bool, default True
    Whether elements with "display: none" should be parsed.

extract_links : {{None, "all", "header", "body", "footer"}}
    Table elements in the specified section(s) with <a> tags will have their
    href extracted.

    .. versionadded:: 1.5.0

dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
    Back-end data type applied to the resultant :class:`DataFrame`
    (still experimental). Behaviour is as follows:

    * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
      (default).
    * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
      DataFrame.

    .. versionadded:: 2.0

{storage_options}

    .. versionadded:: 2.1.0

Returns
-------
dfs
    A list of DataFrames.

See Also
--------
read_csv : Read a comma-separated values (csv) file into DataFrame.

Notes
-----
Before using this function you should read the :ref:`gotchas about the
HTML parsing libraries <io.html.gotchas>`.

Expect to do some cleanup after you call this function. For example, you
might need to manually assign column names if the column names are
converted to NaN when you pass the `header=0` argument. We try to assume as
little as possible about the structure of the table and push the
idiosyncrasies of the HTML contained in the table to the user.

This function searches for ``<table>`` elements and only for ``<tr>``
and ``<th>`` rows and ``<td>`` elements within each ``<tr>`` or ``<th>``
element in the table. ``<td>`` stands for "table data". This function
attempts to properly handle ``colspan`` and ``rowspan`` attributes.
If the function has a ``<thead>`` argument, it is used to construct
the header, otherwise the function attempts to find the header within
the body (by putting rows with only ``<th>`` elements into the header).

Similar to :func:`~read_csv` the `header` argument is applied
**after** `skiprows` is applied.

This function will *always* return a list of :class:`DataFrame` *or*
it will fail, e.g., it will *not* return an empty list.

Examples
--------
See the :ref:`read_html documentation in the IO section of the docs
<io.read_html>` for some examples of reading in HTML tables.
r   zPcannot skip rows starting from the end of the data (you passed a negative value))Nr   r   r   r   zO`extract_links` must be one of {None, "header", "footer", "body", "all"}, got ""zPassing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.)
stacklevelri  rV   rW   r   r  rA   r  r  rX   rG   r  r  r  r  rY   rZ   r  rC   r   )r5   r<   r=   r   r   r   r   rJ   r_  r   r   r   r   warningswarnFutureWarningr   r  )rV   rW   ri  r   r  rA   rX   r  r  rG   r  r  r  r  rY   rZ   r  rC   s                     r/   	read_htmlr    s   j (G,,--(Q,1
 	
 EEa!
 	
 &		B"c3O2J"		
$ $ 	> ')	
    	
           ( &  $!" $#$ (% r1   )r-   rJ   r.   r   r   rJ   )rA   "int | Sequence[int] | slice | Noner   r4   )rM   zFilePath | BaseBufferrG   r   rC   zStorageOptions | Noner   zstr | bytes)r   rJ   )r   r   )ri  zHTMLFlavors | Noner   ztype[_HtmlFrameParser])&rV   zFilePath | ReadBuffer[str]rW   r   ri  z*HTMLFlavors | Sequence[HTMLFlavors] | Noner   int | Sequence[int] | Noner  r  rA   r  rX   r   r  r   r  r   rG   r   r  rJ   r  zdict | Noner  zIterable[object] | Noner  r   rY   r   rZ   r   r  zDtypeBackend | lib.NoDefaultrC   r'   r   zlist[DataFrame])Pr   
__future__r   collectionsr   r<   r   r   typingr   r   r   r  pandas._libsr	   pandas.compat._optionalr
   pandas.errorsr   r   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.util._validatorsr   pandas.core.dtypes.commonr   pandasr   pandas.core.indexes.baser   pandas.core.indexes.multir   pandas.core.seriesr   pandas.core.shared_docsr   pandas.io.commonr   r   r   r   r   r   r   pandas.io.formats.printingr   pandas.io.parsersr   collections.abcr    r!   pandas._typingr"   r#   r$   r%   r&   r'   r(   r   _RE_WHITESPACEr0   rB   rP   rR   r   r  r-  r  rZ  rd  rg  rk  rq  rz  r  
no_defaultr  r   r1   r/   <module>r     sj   #   	  
   > ( 4 7 2  * 0 % 0   4 (
  !
 -. 1? %&X:	 + 	Bf
 f
RW(8 Wt. =>R+' R+j12 
1,	 "F
8DN \"345  9=)-,037#'")- FJ25..&*'h"h h 7	h
 'h *h 1h !h h h h h h 'h h  !h" D#h$ 0%h& $'h( )h 6hr1   