
    ,h'                        d Z g dZddlZddlmZmZ 	 ddlmZmZm	Z	m
Z
mZmZmZ eefZddZddZdd	Zd
 Z ej(                  dej*                        j,                  Z G d d      Zd Zd Z	 ddlmZ  ej(                  d      j<                  Z	 e  d Z#y# e$ r ddlmZmZm	Z	m
Z
mZmZ eZY w xY w# e$ r	 ddlmZ Y Mw xY w# e!$ r e"Z Y d Z#yw xY w)z5External interface to the BeautifulSoup HTML parser.
)
fromstringparseconvert_tree    N)etreehtml)BeautifulSoupTagCommentProcessingInstructionNavigableStringDeclarationDoctype)r   r	   r
   r   r   r   c                     t        | ||fi |S )a  Parse a string of HTML data into an Element tree using the
    BeautifulSoup parser.

    Returns the root ``<html>`` Element of the tree.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    )_parse)databeautifulsoupmakeelementbsargss       V/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/lxml/html/soupparser.pyr   r      s     ${=f==    c                 v    t        | d      st        |       } t        | ||fi |}t        j                  |      S )aY  Parse a file into an ElemenTree using the BeautifulSoup parser.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    read)hasattropenr   r   ElementTree)filer   r   r   roots        r   r   r   $   s;     4 Dz${=f=DT""r   c                 n    t        | |      }|j                         }|D ]  }|j                  |        |S )a  Convert a BeautifulSoup tree to a list of Element trees.

    Returns a list instead of a single root Element to support
    HTML-like soup with more than one root element.

    You can pass a different Element factory through the `makeelement`
    keyword.
    )_convert_treegetchildrenremove)beautiful_soup_treer   r   childrenchilds        r   r   r   3   s?     ,k:D!H EOr   c                     |t         }t        |d      r	d|vrd|d<   t        |d      r	d|vrd|d<    || fi |}t        ||      }t        |      dk(  r|d   j                  dk(  r|d   S d|_        |S )	NHTML_ENTITIESconvertEntitiesr   DEFAULT_BUILDER_FEATURESfeatureszhtml.parser   r   )r   r   r   lentag)sourcer   r   r   treer   s         r   r   r   E   s    %}o.F*(.F$%}89V#!.F:*6*D{+D
4yA~$q'++/AwDHKr   z`(?:\s|[<!])*DOCTYPE\s*HTML(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?(?:\s+(\'[^\']*\'|"[^"]*"))?c                       e Zd Zd Zd Zy)
_PseudoTagc                 .    d| _         g | _        || _        y )Nr   )nameattrscontents)selfr4   s     r   __init__z_PseudoTag.__init__`   s    	
 r   c                 6    | j                   j                         S N)r4   __iter__)r5   s    r   r9   z_PseudoTag.__iter__e   s    }}%%''r   N)__name__
__module____qualname__r6   r9    r   r   r0   r0   ^   s    !
(r   r0   c                    |t         j                  j                  }d x}}d x}}t        |       D ]b  \  }}t	        |t
              r7||}|}||j                  s,|j                  j                         dk(  sJ|}M|Pt	        |t              sa|}d |g x}}	| j                  }
n3| j                  d | }| j                  ||dz    }
| j                  |dz   d  }	|2|
j                  |      }|
d | |j                  z   |
|dz   d  z   |_	        nt        |
      }t        |      } ||      }|}t        |      D ]   } ||      }||j                  |       |}" |}|	D ]   } ||      }||j                  |       |}" |j	 |j!                         }t'        |      }|s	 |S |j)                         \  }}|j+                         j,                  }|xr |dd |_        |xr |dd |_        |S # t"        $ r |j$                  }Y rw xY w)Nr   r*   )r   html_parserr   	enumerate
isinstancer	   r2   lower_DECLARATION_OR_DOCTYPEr4   indexr0   _init_node_convertersreversedaddpreviousaddnextoutput_readyAttributeErrorstring_parse_doctype_declarationgroupsgetroottreedocinfo	public_id
system_url)r"   r   first_element_idxlast_element_idx	html_rootdeclarationiepre_root	post_rootrootsconvert_noderes_rootprev	converteddoctype_stringmatchexternal_idsys_urirP   s                       r   r   r   i   sv   &&22 ,0/(""I-. 1a ($%!  QVV&0H	 Z3J%KK  !!9#,,&//0B1BC#,,->?OPQ?QR'001A!1C1DE	 KK	""2AY););;eAaCDkI	 u%	(5L I&HDh  O	 Y'D	 D  O	 LL#D	 	0(557N +>:  O $)<<> K**,44G + AAb0AG!(!:WQr]GO  	0(//N	0s   G2 2H
	H
c                     	
 i g fd}fd	d
	fd	d 
d  |t         t               
fd       } |t              d        } |t              d        } |t              fd	       }S )Nc                        fd}|S )Nc                 B    D ]  }| |<   j                  |        | S r8   )append)handlert
convertersordered_node_typestypess     r   addz5_init_node_converters.<locals>.converter.<locals>.add   s0     - '
1"))!,- Nr   r=   )rl   rm   rj   rk   s   ` r   	converterz(_init_node_converters.<locals>.converter   s    	
 
r   c                 :    D ]  }t        | |      s|   c S  y r8   )rB   )noderi   rj   rk   s     r   find_best_converterz2_init_node_converters.<locals>.find_best_converter   s+    # 	%A$"!!}$	% r   c                     	 t        |          }|y  || |      S # t        $ r  |       x}t        |       <   Y -w xY wr8   )typeKeyError)bs_nodeparentrh   rj   rq   s      r   r\   z+_init_node_converters.<locals>.convert_node   sY    	O g/G ?w''	  	O2Eg2NNGjg/	Os    ??c                    t        | t              rKi }| j                         D ]4  \  }}t        |t              rdj	                  |      }t        |      ||<   6 |S | D ci c]  \  }}|t        |       }}}|S c c}}w )N )rB   dictitemslistjoinunescape)bs_attrsattribskvs       r   	map_attrsz(_init_node_converters.<locals>.map_attrs   s    h%G ( )1a&A%a[
)  3;;$!Qq(1+~;G; <s   !A<c                     t        |       dk(  r| j                  xs d|z   | _        y | d   j                  xs d|z   | d   _        y )Nr    r?   )r+   texttail)rv   r   s     r   append_textz*_init_node_converters.<locals>.append_text   s@    v;!!;;,"4FK%bz4"<F2JOr   c                 6   | j                   }|/|r 
|      nd }t        j                  || j                  |      }n |r 
|      ni } 	| j                  |      }| D ]  }	 t	        |         }|	 |||        |S # t
        $ r Y nw xY w ||       :)N)attrib)r3   r   
SubElementr2   rs   rt   )ru   rv   r3   r   resr$   rh   r\   rj   r   r   s          r   convert_tagz*_init_node_converters.<locals>.convert_tag   s    */i&TG""67<<HC*/i&RGgll7;C 
	%E$T%[1 &E3'
	% 
   $s   %B	BBc                 V    t        j                  |       }||j                  |       |S r8   )r   HtmlCommentrg   ru   rv   r   s      r   convert_commentz._init_node_converters.<locals>.convert_comment  s)    w'MM#
r   c                     | j                  d      r| d d } t        j                  | j                  dd       }||j	                  |       |S )N?r?   rx   r*   )endswithr   r   splitrg   r   s      r   
convert_piz)_init_node_converters.<locals>.convert_pi  sO    C  crlG))7==a+@AMM#
r   c                 .    | |t        |              y r8   )r}   )ru   rv   r   s     r   convert_textz+_init_node_converters.<locals>.convert_text  s     12r   r8   )r	   r0   r
   r   r   )r   rn   r   r   r   r   r   r\   rj   rq   r   rk   s   `     @@@@@@r   rF   rF      s    J(	= sJ  , w  $% &   
 r   )name2codepointz&(\w+);c                 &    | syd }t        ||       S )Nr   c                     	 t        t        | j                  d               S # t        $ r | j                  d      cY S w xY w)Nr*   r   )unichrr   grouprt   )ms    r   unescape_entityz!unescape.<locals>.unescape_entity5  s<    	.455 	771:	s    # A A )handle_entities)rL   r   s     r   r}   r}   1  s    
 ?F33r   )NNr8   )$__doc____all__relxmlr   r   bs4r   r	   r
   r   r   r   r   rD   ImportErrorr   r   r   r   compile
IGNORECASEra   rM   r0   r   rF   html.entitiesr   htmlentitydefssubr   r   	NameErrorchrr}   r=   r   r   <module>r      s   2 	 	*    +G4>#$$ (RZZ$ MM	 5	 ( (Rj^F.,
 "**Z(,,
	4G	  *  *	*j  .-.  F	4s5   B 1B7 C B43B47CCCC