
    ,h/                         d Z ddgZ	 ddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZmZmZmZmZ dd
lmZmZmZmZmZmZmZ ddlmZ dZd Z G d de      Z G d dee      Zy# e$ rZddlmZ Y dZ[edZ[ww xY w)MITLXMLTreeBuilderForXMLLXMLTreeBuilder    )CallableN)BytesIO)StringIO)etree)CommentDoctypeNamespacedAttributeProcessingInstructionXMLProcessingInstruction)FASTHTMLHTMLTreeBuilder
PERMISSIVEParserRejectedMarkupTreeBuilderXML)EncodingDetectorlxmlc                 T    t        d t        | j                               D              S )zInvert a dictionary.c              3   *   K   | ]  \  }}||f  y wN ).0kvs      S/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/bs4/builder/_lxml.py	<genexpr>z_invert.<locals>.<genexpr>&   s     2$!Q12s   )dictlistitems)ds    r   _invertr%   $   s    2$qwwy/222    c                        e Zd Zej                  ZdZeZdZ	dgZ
e	eeeegZdZ ed      Z ee      Z fdZd Zd	 Zd
 Zd fd	Zd Z	 	 	 ddZd Zd Zi fdZd Zd Z d Z!d Z"d Z#d Z$d Z% xZ&S )r   Tzlxml-xmlxmli   z$http://www.w3.org/XML/1998/namespace)r(   c                 b    t         t        |   |       | j                  | j                         y)zLet the BeautifulSoup object know about the standard namespace
        mapping.

        :param soup: A `BeautifulSoup`.
        N)superr   initialize_soup_register_namespacesDEFAULT_NSMAPS)selfsoup	__class__s     r   r+   z%LXMLTreeBuilderForXML.initialize_soupC   s)     	#T:4@!!$"5"56r&   c                     t        |j                               D ]:  \  }}|s	|| j                  j                  vs"|| j                  j                  |<   < y)zLet the BeautifulSoup object know about namespaces encountered
        while parsing the document.

        This might be useful later on when creating CSS selectors.

        :param mapping: A dictionary mapping namespace prefixes to URIs.
        N)r"   r#   r/   _namespaces)r.   mappingkeyvalues       r   r,   z*LXMLTreeBuilderForXML._register_namespacesL   sL     w}}/ 	3JCs$))"7"77 .3		%%c*	3r&   c                 d    | j                   | j                   S t        j                  | dd|      S )zFind the default parser for the given encoding.

        :param encoding: A string.
        :return: Either a parser object or a class, which
          will be instantiated with default arguments.
        FTtargetstrip_cdatarecoverencoding)_default_parserr	   	XMLParserr.   r;   s     r   default_parserz$LXMLTreeBuilderForXML.default_parser[   s8     +'''UD8M 	Mr&   c                 `    | j                  |      }t        |t              r || dd|      }|S )zInstantiate an appropriate parser for the given encoding.

        :param encoding: A string.
        :return: A parser object such as an `etree.XMLParser`.
        FTr7   )r?   
isinstancer   )r.   r;   parsers      r   
parser_forz LXMLTreeBuilderForXML.parser_forg   s8     $$X.fh'xF r&   c                     || _         |t        |      | _        d | _        | j                  g| _        t        t        | "  di | y )Nr   )	r<   setempty_element_tagsr/   DEFAULT_NSMAPS_INVERTEDnsmapsr*   r   __init__)r.   rB   rF   kwargsr0   s       r   rI   zLXMLTreeBuilderForXML.__init__w   sK      &)&)*<&=D#	334#T3=f=r&   c                 V    |d   dk(  rt        |dd  j                  dd            S d |fS )Nr   {   })tuplesplit)r.   tags     r   	_getNsTagzLXMLTreeBuilderForXML._getNsTag   s5     q6S=QRsA.//#;r&   c              #   @  K   | j                    }|rt        | _        nt        | _        t	        |t
              r|d|df t	        |t
              r|j                  d      d|df ||g}t        ||||      }|j                  D ]  }|j                  ||df  yw)aA  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        lxml really wants to get a bytestring and convert it to
        Unicode itself. So instead of using UnicodeDammit to convert
        the bytestring to Unicode using different encodings, this
        implementation uses EncodingDetector to iterate over the
        encodings, and tell lxml to try to parse the document as each
        one in turn.

        :param markup: Some markup -- hopefully a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples:
         (markup, encoding, declared encoding,
          has undergone character replacement)

         Each 4-tuple represents a strategy for converting the
         document to Unicode and parsing it. Each strategy will be tried 
         in turn.
        NFutf8)
is_xmlr   processing_instruction_classr   rA   strencoder   	encodingsmarkup)	r.   rZ   user_specified_encodingexclude_encodingsdocument_declared_encodingis_htmltry_encodingsdetectorr;   s	            r   prepare_markupz$LXMLTreeBuilderForXML.prepare_markup   s     8 kk/0ED-0HD-fc" $ :EAAfc" ==(&-u6 6 12LM#M7,=? ** 	QH??H.H%PP	Qs   BBc                    t        |t              rt        |      }nt        |t              rt	        |      }|j                  | j                        }	 | j                  | j                  j                        | _
        | j                  j                  |       t        |      dk7  rS|j                  | j                        }t        |      dk7  r| j                  j                  |       t        |      dk7  rS| j                  j                          y # t        t        t         j"                  f$ r}t%        |      d }~ww xY w)Nr   )rA   bytesr   rW   r   read
CHUNK_SIZErC   r/   original_encodingrB   feedlencloseUnicodeDecodeErrorLookupErrorr	   ParserErrorr   )r.   rZ   dataes       r   rg   zLXMLTreeBuilderForXML.feed   s    fe$V_F$f%F {{4??+
	*//$))*E*EFDKKKT"d)q.{{4??3t9>KK$$T*	 d)q.
 KK"K1B1BC 	*&q))	*s   B%D :D E3D>>Ec                 (    | j                   g| _        y r   )rG   rH   )r.   s    r   ri   zLXMLTreeBuilderForXML.close   s    334r&   c                    t        |      }d }t        |      dk(  r4t        | j                        dkD  r| j                  j                  d        nt        |      dkD  rx| j	                  |       | j                  j                  t        |             |j                         }t        |j                               D ]  \  }}t        d|d      }|||<    i }t        |j                               D ]D  \  }	}
| j                  |	      \  }}	||
||	<   "| j                  |      }t        ||	|      }	|
||	<   F |}| j                  |      \  }}| j                  |      }| j                  j                  ||||       y )Nr   rM   xmlnszhttp://www.w3.org/2000/xmlns/)r!   rh   rH   appendr,   r%   copyr"   r#   r   rR   _prefix_for_namespacer/   handle_starttag)r.   nameattrsnsmapnsprefixprefix	namespace	attribute	new_attrsattrr5   s              r   startzLXMLTreeBuilderForXML.start   si   Uu:?s4;;/!3 ""4(Z!^ %%e, KKwu~. JJLE%)%++-%8 -!	/V%DF	#,i - 	. 	(KD%"nnT2OIt "'	$55i@*8T9E"'	$	( ...	4--i8		!!$	8UCr&   c                 Z    |yt        | j                        D ]  }|||v s||   c S  y)z9Find the currently active prefix for the given namespace.N)reversedrH   )r.   r{   inverted_nsmaps      r   rt   z+LXMLTreeBuilderForXML._prefix_for_namespace  s@    &t{{3 	1N)i>.I%i00	1 r&   c                    | j                   j                          | j                   j                  d   }| j                  |      \  }}d }|(t	        | j
                        D ]  }|||v s||   } n | j                   j                  ||       t        | j
                        dkD  r| j
                  j                          y y )NrM   )	r/   endDatatagStackrR   r   rH   handle_endtagrh   pop)r.   rv   completed_tagr{   ry   r   s         r   endzLXMLTreeBuilderForXML.end  s    				**2....	4 "*4;;"7 !-)~2M-i8H 			h/t{{a KKOO  r&   c                     | j                   j                          | j                   j                  |dz   |z          | j                   j                  | j                         y )N )r/   r   handle_datarV   )r.   r8   rm   s      r   pizLXMLTreeBuilderForXML.pi  sE    				fslT12		$;;<r&   c                 :    | j                   j                  |       y r   )r/   r   r.   contents     r   rm   zLXMLTreeBuilderForXML.data!  s    		g&r&   c                     | j                   j                          t        j                  |||      }| j                   j	                  |       y r   )r/   r   r   for_name_and_idsobject_was_parsed)r.   rv   pubidsystemdoctypes        r   r   zLXMLTreeBuilderForXML.doctype$  s8    		**4?		##G,r&   c                     | j                   j                          | j                   j                  |       | j                   j                  t               y)z#Handle comments as Comment objects.N)r/   r   r   r
   r   s     r   commentzLXMLTreeBuilderForXML.comment)  s6    				g&		'"r&   c                     d|z  S )See `TreeBuilder`.z)<?xml version="1.0" encoding="utf-8"?>
%sr   r.   fragments     r   test_fragment_to_documentz/LXMLTreeBuilderForXML.test_fragment_to_document/  s    ;hFFr&   )NN)NNN)'__name__
__module____qualname__r	   r=   DEFAULT_PARSER_CLASSrU   r   rV   NAMEALTERNATE_NAMESLXMLr   r   r   featuresre   r!   r-   r%   rG   r+   r,   r?   rC   rI   rR   ra   rg   ri   r   rt   r   r   rm   r   r   r   __classcell__)r0   s   @r   r   r   (   s     ??F#; DgO dCz2HJ DEN%n573
M 	> >B)-261Qf**5 (* ,D\ =
'-
#Gr&   c                   B    e Zd ZeZdgZeeeeegz   Z	dZ
eZd Zd Zd Zy)r   z	lxml-htmlFc                 "    t         j                  S r   )r	   
HTMLParserr>   s     r   r?   zLXMLTreeBuilder.default_parser=  s    r&   c                 *   | j                   j                  }	 | j                  |      | _        | j                  j	                  |       | j                  j                          y # t        t        t        j                  f$ r}t        |      d }~ww xY wr   )r/   rf   rC   rB   rg   ri   rj   rk   r	   rl   r   )r.   rZ   r;   rn   s       r   rg   zLXMLTreeBuilder.feed@  sp    99..	*//(3DKKKV$KK"K1B1BC 	*&q))	*s   AA$ $BBBc                     d|z  S )r   z<html><body>%s</body></html>r   r   s     r   r   z)LXMLTreeBuilder.test_fragment_to_documentJ  s    -88r&   N)r   r   r   r   r   r   r   r   r   r   rU   r   rV   r?   rg   r   r   r&   r   r   r   4  s;    D"mO$dJ!??HF#8  *9r&   ) __license____all__collections.abcr   ImportErrorrn   collectionsior   r   r   r	   bs4.elementr
   r   r   r   r   bs4.builderr   r   r   r   r   r   r   
bs4.dammitr   r   r%   r   r   r   r&   r   <module>r      s    
%(    	 	 	 (3IGK IGX9o'< 9S	  %$$%s   A# #A8(A33A8