
    ,hG                     ^   d Z ddlmZ ddlZddlZddlmZmZmZ g dZ	dZ
dZdZd	Zd
ZdZ G d de      Z e       Z G d de      Z G d de      Z G d de      Zd Z G d de      ZddlmZ  ee       	 ddlmZ  ee       	 ddlmZ  ee       y# e$ r Y w xY w# e$ r Y yw xY w)MIT    )defaultdictN)CharsetMetaAttributeValueContentMetaAttributeValuenonwhitespace_re)HTMLTreeBuilderSAXTreeBuilderTreeBuilderTreeBuilderRegistryfast
permissivestrictxmlhtmlhtml5c                   "    e Zd ZdZd Zd Zd Zy)r   zYA way of looking up TreeBuilder subclasses by their name or by desired
    features.
    c                 :    t        t              | _        g | _        y N)r   listbuilders_for_featurebuildersselfs    V/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/bs4/builder/__init__.py__init__zTreeBuilderRegistry.__init__"   s    $/$5!    c                     |j                   D ]!  }| j                  |   j                  d|       # | j                  j                  d|       y)zRegister a treebuilder based on its advertised features.

        :param treebuilder_class: A subclass of Treebuilder. its .features
           attribute should list its features.
        r   N)featuresr   insertr   )r   treebuilder_classfeatures      r   registerzTreeBuilderRegistry.register&   sL     )11 	LG%%g.55a9JK	LQ 12r   c                    t        | j                        dk(  ryt        |      dk(  r| j                  d   S t        |      }|j                          d}d}t        |      dkD  rs|j	                         }| j
                  j                  |g       }t        |      dkD  r*||}t        |      }n|j                  t        |            }t        |      dkD  rs|y|D ]
  }||v s|c S  y)aj  Look up a TreeBuilder subclass with the desired features.

        :param features: A list of features to look for. If none are
            provided, the most recently registered TreeBuilder subclass
            will be used.
        :return: A TreeBuilder subclass, or None if there's no
            registered subclass with all the requested features.
        r   N)	lenr   r   reversepopr   getsetintersection)r   r   
candidatescandidate_setr!   we_have_the_feature	candidates          r   lookupzTreeBuilderRegistry.lookup0   s     t}}"x=A ==## >
(mallnG"&";";"?"?"L&'!+%!4J$'
OM %2$>$>/0%2M (ma  # 	!IM)  	! r   N)__name__
__module____qualname____doc__r   r"   r.    r   r   r   r      s    3,r   r   c                       e Zd ZdZdZg Zg ZdZdZdZ	i Z
 e       Z e       ZdZeeefdZd Zd Zd Zd	 Z	 	 dd
Zd Zd Zd Zy)r
   z:Turn a textual document into a Beautiful Soup object tree.z[Unknown tree builder]FNc                     d| _         || j                  u r| j                  }|| _        || j                  u r| j                  }|| _        || j                  k(  r| j                  }|| _        y)a  Constructor.

        :param multi_valued_attributes: If this is set to None, the
         TreeBuilder will not turn any values for attributes like
         'class' into lists. Setting this do a dictionary will
         customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES
         for an example.

         Internally, these are called "CDATA list attributes", but that
         probably doesn't make sense to an end-user, so the argument name
         is `multi_valued_attributes`.

        :param preserve_whitespace_tags: A list of tags to treat
         the way <pre> tags are treated in HTML. Tags in this list
         are immune from pretty-printing; their contents will always be
         output as-is.

        :param store_line_numbers: If the parser keeps track of the
         line numbers and positions of the original markup, that
         information will, by default, be stored in each corresponding
         `Tag` object. You can turn this off by passing
         store_line_numbers=False. If the parser you're using doesn't 
         keep track of this information, then setting store_line_numbers=True
         will do nothing.
        N)soupUSE_DEFAULTDEFAULT_CDATA_LIST_ATTRIBUTEScdata_list_attributes DEFAULT_PRESERVE_WHITESPACE_TAGSpreserve_whitespace_tagsTRACKS_LINE_NUMBERSstore_line_numbers)r   multi_valued_attributesr;   r=   s       r   r   zTreeBuilder.__init__y   sq    8 	"d&6&66&*&H&H#%<"#t'7'77'+'L'L$(@%!1!11!%!9!9"4r   c                     || _         y)zThe BeautifulSoup object has been initialized and is now
        being associated with the TreeBuilder.

        :param soup: A BeautifulSoup object.
        N)r6   )r   r6   s     r   initialize_soupzTreeBuilder.initialize_soup   s     	r   c                      y)zDo any work necessary to reset the underlying parser
        for a new document.

        By default, this does nothing.
        Nr3   r   s    r   resetzTreeBuilder.reset   s     	r   c                 8    | j                   y|| j                   v S )a  Might a tag with this name be an empty-element tag?

        The final markup may or may not actually present this tag as
        self-closing.

        For instance: an HTMLBuilder does not consider a <p> tag to be
        an empty-element tag (it's not in
        HTMLBuilder.empty_element_tags). This means an empty <p> tag
        will be presented as "<p></p>", not "<p/>" or "<p>".

        The default implementation has no opinion about which tags are
        empty-element tags, so a tag will be presented as an
        empty-element tag if and only if it has no children.
        "<foo></foo>" will become "<foo/>", and "<foo>bar</foo>" will
        be left alone.

        :param tag_name: The name of a markup tag.
        T)empty_element_tags)r   tag_names     r   can_be_empty_elementz TreeBuilder.can_be_empty_element   s$    & ""*42222r   c                     t               )zRun some incoming markup through some parsing process,
        populating the `BeautifulSoup` object in self.soup.

        This method is not implemented in TreeBuilder; it must be
        implemented in subclasses.

        :return: None.
        NotImplementedErrorr   markups     r   feedzTreeBuilder.feed   s     "##r   c              #      K   |dddf yw)a  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples:
         (markup, encoding, declared encoding,
          has undergone character replacement)

         Each 4-tuple represents a strategy for converting the
         document to Unicode and parsing it. Each strategy will be tried 
         in turn.

         By default, the only strategy is to parse the markup
         as-is. See `LXMLTreeBuilderForXML` and
         `HTMLParserTreeBuilder` for implementations that take into
         account the quirks of particular parsers.
        NFr3   )r   rK   user_specified_encodingdocument_declared_encodingexclude_encodingss        r   prepare_markupzTreeBuilder.prepare_markup   s     2 dD%''s   
c                     |S )a  Wrap an HTML fragment to make it look like a document.

        Different parsers do this differently. For instance, lxml
        introduces an empty <head> tag, and html5lib
        doesn't. Abstracting this away lets us write simple tests
        which run HTML fragments through the parser and compare the
        results against other HTML fragments.

        This method should not be used outside of tests.

        :param fragment: A string -- fragment of HTML.
        :return: A string -- a full HTML document.
        r3   )r   fragments     r   test_fragment_to_documentz%TreeBuilder.test_fragment_to_document   s	     r   c                      y)a2  Set up any substitutions that will need to be performed on 
        a `Tag` when it's output as a string.

        By default, this does nothing. See `HTMLTreeBuilder` for a
        case where this is used.

        :param tag: A `Tag`
        :return: Whether or not a substitution was performed.
        Fr3   )r   tags     r   set_up_substitutionsz TreeBuilder.set_up_substitutions   s     r   c                 j   |s|S | j                   r| j                   j                  dg       }| j                   j                  |j                         d      }t        |j	                               D ]@  }||v s|s
||v s||   }t        |t              rt        j                  |      }n|}|||<   B |S )a  When an attribute value is associated with a tag that can
        have multiple values for that attribute, convert the string
        value to a list of strings.

        Basically, replaces class="foo bar" with class=["foo", "bar"]

        NOTE: This method modifies its input in place.

        :param tag_name: The name of a tag.
        :param attrs: A dictionary containing the tag's attributes.
           Any appropriate attribute values will be modified in place.
        *N)	r9   r'   lowerr   keys
isinstancestrr   findall)r   rE   attrs	universaltag_specificattrvaluevaluess           r   $_replace_cdata_list_attribute_valuesz0TreeBuilder._replace_cdata_list_attribute_values	  s     L%%2266sB?I5599 $(LUZZ\* )9$$,:N "$KE!%-!1!9!9%!@ "'"(E$K!)" r   )NNN)r/   r0   r1   r2   NAMEALTERNATE_NAMESr   is_xml	picklablerD   r8   r(   r:   objectr7   r<   r   r@   rB   rF   rL   rQ   rT   rW   re   r3   r   r   r
   r
   b   s    D#DOHFI
 %'!'*u$(K  /:*5$/%5N3.	$ >BJN(6 
$r   r
   c                   R    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zy)r	   zA Beautiful Soup treebuilder that listens for SAX events.

    This is not currently used for anything, but it demonstrates
    how a simple TreeBuilder would work.
    c                     t               r   rH   rJ   s     r   rL   zSAXTreeBuilder.feed6  s    !##r   c                      y r   r3   r   s    r   closezSAXTreeBuilder.close9      r   c                     t        d t        |j                               D              }| j                  j	                  ||       y )Nc              3   0   K   | ]  \  }}|d    |f  yw)   Nr3   ).0keyrc   s      r   	<genexpr>z.SAXTreeBuilder.startElement.<locals>.<genexpr>=  s     Kec!fe_Ks   )dictr   itemsr6   handle_starttag)r   namer_   s      r   startElementzSAXTreeBuilder.startElement<  s1    KtEKKM7JKK		!!$.r   c                 :    | j                   j                  |       y r   )r6   handle_endtag)r   ry   s     r   
endElementzSAXTreeBuilder.endElementA  s    		%r   c                 (    | j                  ||       y r   )rz   )r   nsTuplenodeNamer_   s       r   startElementNSzSAXTreeBuilder.startElementNSE  s    (E*r   c                 &    | j                  |       y r   )r}   )r   r   r   s      r   endElementNSzSAXTreeBuilder.endElementNSI  s    !r   c                      y r   r3   )r   prefix	nodeValues      r   startPrefixMappingz!SAXTreeBuilder.startPrefixMappingN  s    r   c                      y r   r3   )r   r   s     r   endPrefixMappingzSAXTreeBuilder.endPrefixMappingR  s     	r   c                 :    | j                   j                  |       y r   )r6   handle_data)r   contents     r   
characterszSAXTreeBuilder.charactersW  s    		g&r   c                      y r   r3   r   s    r   startDocumentzSAXTreeBuilder.startDocumentZ  ro   r   c                      y r   r3   r   s    r   endDocumentzSAXTreeBuilder.endDocument]  ro   r   N)r/   r0   r1   r2   rL   rn   rz   r}   r   r   r   r   r   r   r   r3   r   r   r	   r	   /  s>    $/
&+"

'r   r	   c                       e Zd ZdZ eg d      Z eg d      Zg dddgddgdgdgdgdgd	gdgd
gdgdgdZ eddg      Zd Z	y)r   z]This TreeBuilder knows facts about HTML.

    Such as which tags are empty-element tags.
    )areabasebrcolembedhrimginputkeygenlinkmenuitemmetaparamsourcetrackwbrbasefontbgsoundcommandframeimageisindexnextidspacer)#addressarticleaside
blockquotecanvasdddivdldtfieldset
figcaptionfigurefooterformh1h2h3h4h5h6headerr   limainnavnoscriptoloutputppresectiontabletfootulvideo)class	accesskeydropzonerelrevheaderszaccept-charsetarchivesizessandboxfor)rY   ar   tdthr   r   rj   r   iconiframer   r   textareac                 
   |j                   dk7  ry|j                  d      }|j                  d      }|j                  d      }d}||}t        |      |d<   |duS |#|!|j                         dk(  rt	        |      |d<   |duS )a  Replace the declared encoding in a <meta> tag with a placeholder,
        to be substituted when the tag is output to a string.

        An HTML document may come in to Beautiful Soup as one
        encoding, but exit in a different encoding, and the <meta> tag
        needs to be changed to reflect this.

        :param tag: A `Tag`
        :return: Whether or not a substitution was performed.
        r   Fz
http-equivr   charsetNzcontent-type)ry   r'   r   rZ   r   )r   rV   
http_equivr   r   meta_encodings         r   rW   z$HTMLTreeBuilder.set_up_substitutions  s     88vWW\*
'')$'')$  $M6w?C	N T)* !j&<""$6 7w?C	NT)*r   N)
r/   r0   r1   r2   r(   rD   block_elementsr8   r:   rW   r3   r   r   r   r   a  s    
      }  ~N 1en%.{{{"#; ;7%
!" (+E:+>'?$)+r   r   c                    t         j                  d   }| j                  D ]b  }t        | |      }t	        |t
              s t        |||       |j                  j                  |       |j                  j                  |       d y)z9Copy TreeBuilders from the given module into this module.zbs4.builderN)
sysmodules__all__getattr
issubclassr
   setattrappendbuilder_registryr"   )modulethis_modulery   objs       r   register_treebuilders_fromr     so     ++m,K 7fd#c;'Ks+&&t,((11#67r   c                   "     e Zd ZdZ fdZ xZS )ParserRejectedMarkupzgAn Exception to be raised when the underlying parser simply
    refuses to parse the given markup.
    c                     t        |t              r&|}|j                  j                  dt	        |      }t
        t        |   |       y)zzExplain why the parser rejected the given markup, either
        with a textual explanation or another exception.
        z: N)r\   	Exception	__class__r/   r]   superr   r   )r   message_or_exceptioner   s      r   r   zParserRejectedMarkup.__init__  s?     *I6$A/0{{/C/CSV#L "D23GHr   )r/   r0   r1   r2   r   __classcell__)r   s   @r   r   r     s    I Ir   r   rr   )_htmlparser)	_html5lib)_lxml)__license__collectionsr   	itertoolsr   bs4.elementr   r   r   r   FAST
PERMISSIVESTRICTXMLHTMLHTML_5rj   r   r   r
   r	   r   r   r   r    r   r   ImportErrorr   r3   r   r   <module>r     s    #  
  
		?& ?F '( K& KZ/[ /dW+k W+r7I9 I"  ; '	y)	u%  		  		s$   ;B 
B$ B! B!$B,+B,