
    ,hsA                     @   d Z dZdgZddlmZ 	 ddlmZ ddl	Z	ddl
Z
e	j                  dd	 \  ZZZed	k(  xr ed
k(  xr ed	k\  Zed	k(  xr ed	k(  Zed	k(  xr edk\  ZddlmZmZmZmZmZ ddlmZmZ ddlmZmZmZ dZ G d de      Z  G d de      Z!ed	k(  ried
k(  rces`ddl"Z" e"jF                  d      Z$e$e!_$         e"jF                  de"jJ                        Z&e&e _&        ddlm'Z'm(Z( d Z)d Z*e)e _)        e*e _*        dZyyyy# e$ rZ G d de      ZY dZ[dZ[ww xY w)zCUse the HTMLParser library to parse HTML files that aren't too bad.MITHTMLParserTreeBuilder    )
HTMLParser)HTMLParseErrorc                       e Zd Zy)r   N)__name__
__module____qualname__     Y/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/bs4/builder/_htmlparser.pyr   r      s    r   r   N         )CDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)HTMLHTMLTreeBuilderSTRICTzhtml.parserc                   \    e Zd ZdZd Zd Zd ZddZddZd Z	d Z
d	 Zd
 Zd Zd Zd Zy)BeautifulSoupHTMLParserzA subclass of the Python standard library's HTMLParser class, which
    listens for HTMLParser events and translates them into calls
    to Beautiful Soup's tree construction API.
    c                 D    t        j                  | g|i | g | _        y )N)r   __init__already_closed_empty_element)selfargskwargss      r   r   z BeautifulSoupHTMLParser.__init__=   s$    D24262 -/)r   c                 .    t        j                  |       y)a  In Python 3, HTMLParser subclasses must implement error(), although
        this requirement doesn't appear to be documented.

        In Python 2, HTMLParser implements error() by raising an exception,
        which we don't want to do.

        In any event, this method is called only on very strange
        markup and our best strategy is to pretend it didn't happen
        and keep going.
        N)warningswarn)r    msgs     r   errorzBeautifulSoupHTMLParser.errorI   s     	cr   c                 N    | j                  ||d      }| j                  |       y)zHandle an incoming empty-element tag.

        This is only called when the markup looks like <tag/>.

        :param name: Name of the tag.
        :param attrs: Dictionary of the tag's attributes.
        F)handle_empty_elementN)handle_starttaghandle_endtag)r    nameattrstags       r   handle_startendtagz*BeautifulSoupHTMLParser.handle_startendtagV   s)     ""4U"K4 r   c                    i }|D ]  \  }}|d}|||<   d} | j                         \  }}	| j                  j                  |dd|||	      }
|
r?|
j                  r2|r/| j	                  |d       | j
                  j                  |       yyyy)a3  Handle an opening tag, e.g. '<tag>'

        :param name: Name of the tag.
        :param attrs: Dictionary of the tag's attributes.
        :param handle_empty_element: True if this tag is known to be
            an empty-element tag (i.e. there is not expected to be any
            closing tag).
        N z"")
sourceline	sourceposF)check_already_closed)getpossoupr*   is_empty_elementr+   r   append)r    r,   r-   r)   	attr_dictkeyvalue	attrvaluer2   r3   r.   s              r   r*   z'BeautifulSoupHTMLParser.handle_starttage   s     	 	JC }"IcNI	 !%
Iii''$iJ ( 
 3'',@ t%@ --44T: -A'3r   c                     |r*|| j                   v r| j                   j                  |       y| j                  j                  |       y)zHandle a closing tag, e.g. '</tag>'
        
        :param name: A tag name.
        :param check_already_closed: True if this tag is expected to
           be the closing portion of an empty-element tag,
           e.g. '<tag></tag>'.
        N)r   remover6   r+   )r    r,   r4   s      r   r+   z%BeautifulSoupHTMLParser.handle_endtag   s<      DD,M,M$M
 --44T:II##D)r   c                 :    | j                   j                  |       y)z4Handle some textual data that shows up between tags.N)r6   handle_datar    datas     r   r@   z#BeautifulSoupHTMLParser.handle_data   s    		d#r   c                    |j                  d      rt        |j                  d      d      }n8|j                  d      rt        |j                  d      d      }nt        |      }d}|dk  r<| j                  j                  dfD ]!  }|s	 t        |g      j                  |      }# |s	 t        |      }|xs d}| j                  |       y# t        $ r
}Y d}~Xd}~ww xY w# t        t        f$ r
}Y d}~Bd}~ww xY w)zHandle a numeric character reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Character number, possibly in hexadecimal.
        x   XN   zwindows-1252u   �)
startswithintlstripr6   original_encoding	bytearraydecodeUnicodeDecodeErrorchr
ValueErrorOverflowErrorr@   )r    r,   	real_namerB   encodinges         r   handle_charrefz&BeautifulSoupHTMLParser.handle_charref   s     ??3DKK,b1I__S!DKK,b1ID	Is? "YY88.I $i[188BD	 9~ 22 * 
 . s$   C,C% 	C"C"%C>9C>c                 x    t         j                  j                  |      }||}nd|z  }| j                  |       y)zHandle a named entity reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Name of the entity reference.
        Nz&%s)r   HTML_ENTITY_TO_CHARACTERgetr@   )r    r,   	characterrB   s       r   handle_entityrefz(BeautifulSoupHTMLParser.handle_entityref   s>     '??CCDI	 D 4<Dr   c                     | j                   j                          | j                   j                  |       | j                   j                  t               y)zOHandle an HTML comment.

        :param data: The text of the comment.
        N)r6   endDatar@   r   rA   s     r   handle_commentz&BeautifulSoupHTMLParser.handle_comment   s8    
 					d#		'"r   c                     | j                   j                          |t        d      d }| j                   j                  |       | j                   j                  t               y)zYHandle a DOCTYPE declaration.

        :param data: The text of the declaration.
        zDOCTYPE N)r6   r\   lenr@   r   rA   s     r   handle_declz#BeautifulSoupHTMLParser.handle_decl   sI    
 			C
O$%		d#		'"r   c                    |j                         j                  d      rt        }|t        d      d }nt        }| j
                  j                          | j
                  j                  |       | j
                  j                  |       y)z{Handle a declaration of unknown type -- probably a CDATA block.

        :param data: The text of the declaration.
        zCDATA[N)upperrH   r   r_   r   r6   r\   r@   )r    rB   clss      r   unknown_declz$BeautifulSoupHTMLParser.unknown_decl   sf    
 ::<""8,CH'DC				d#		#r   c                     | j                   j                          | j                   j                  |       | j                   j                  t               y)z\Handle a processing instruction.

        :param data: The text of the instruction.
        N)r6   r\   r@   r   rA   s     r   	handle_piz!BeautifulSoupHTMLParser.handle_pi   s9    
 					d#		/0r   N)T)r   r	   r
   __doc__r   r'   r/   r*   r+   r@   rU   rZ   r]   r`   rd   rf   r   r   r   r   r   7   sE    

/!&;P*$$%N&##1r   r   c                   P     e Zd ZdZdZdZeZeee	gZ
dZd fd	Z	 	 ddZd Z xZS )	r   zpA Beautiful soup `TreeBuilder` that uses the `HTMLParser` parser,
    found in the Python standard library.
    FTc                     t        t        | 
  di | |xs g }|xs i }t        rt        sd|d<   t
        rd|d<   ||f| _        y)a  Constructor.

        :param parser_args: Positional arguments to pass into 
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param parser_kwargs: Keyword arguments to pass into 
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param kwargs: Keyword arguments for the superclass constructor.
        Fstrictconvert_charrefsNr   )superr   r   CONSTRUCTOR_TAKES_STRICT CONSTRUCTOR_STRICT_IS_DEPRECATED"CONSTRUCTOR_TAKES_CONVERT_CHARREFSparser_args)r    rp   parser_kwargsr"   	__class__s       r   r   zHTMLParserTreeBuilder.__init__  sW     	#T3=f=!'R%+#,L&+M(#-05M,-'7r   c              #      K   t        |t              r	|dddf y||g}t        ||d|      }|j                  |j                  |j
                  |j                  f yw)a  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples:
         (markup, encoding, declared encoding,
          has undergone character replacement)

         Each 4-tuple represents a strategy for converting the
         document to Unicode and parsing it. Each strategy will be tried 
         in turn.
        NFT)is_htmlexclude_encodings)
isinstancestrr   markuprK   declared_html_encodingcontains_replacement_characters)r    rx   user_specified_encodingdocument_declared_encodingru   try_encodingsdammits          r   prepare_markupz$HTMLParserTreeBuilder.prepare_markup)  sp     * fc"4u-- 12LMv}d1BD}}f66,,557 	7s   AA c                    | j                   \  }}t        |i |}| j                  |_        	 |j                  |       |j	                          g |_	        y# t
        $ r%}t        j                  t        d             |d}~ww xY w)z{Run some incoming markup through some parsing process,
        populating the `BeautifulSoup` object in self.soup.
        a*  Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.N)
rp   r   r6   feedcloser   r$   r%   RuntimeWarningr   )r    rx   r!   r"   parserrT   s         r   r   zHTMLParserTreeBuilder.feedK  s     ''f($9&9ii	KKLLN
 /1+	  	MM. }~ G	s   !A 	B A??B)NN)NNN)r   r	   r
   rg   is_xml	picklable
HTMLPARSERNAMEr   r   featuresTRACKS_LINE_NUMBERSr   r   r   __classcell__)rr   s   @r   r   r     sF     FIDdF#H 8( >BJN 7D1r   zQ\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?a  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
)tagfindattrfindc                    d | _         | j                  |      }|dk  r|S | j                  }||| | _         g }t        j                  ||dz         }|sJ d       |j                         }||dz   | j                         x| _        }||k  r| j                  rt        j                  ||      }nt        j	                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d dcxk(  r|dd  k(  rn n|dd }|r| j                  |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d	vr| j                         \  }}d
| j                   v rP|| j                   j!                  d
      z   }t#        | j                         | j                   j%                  d
      z
  }n|t#        | j                         z   }| j                  r| j'                  d||| d d        | j)                  |||        |S |j+                  d      r| j-                  ||       |S | j/                  ||       || j0                  v r| j3                  |       |S )Nr      z#unexpected call to parse_starttag()r   r   '")>/>
zjunk characters in start tag:    r   )__starttag_textcheck_for_whole_start_tagrawdatar   matchendlowerlasttagrj   r   attrfind_tolerantgroupunescaper8   stripr5   countr_   rfindr'   r@   endswithr/   r*   CDATA_CONTENT_ELEMENTSset_cdata_mode)r    iendposr   r-   r   kr.   mattrnamerestr<   r   linenooffsets                  r   parse_starttagr   y  s   #//2A:M,,&q0 gqs+;;;IIK$QqS^1133s&j{{NN7A.%++GQ7()1a(8%HdI 	2A$8)BC.82A#7237%aO	 MM)4	LL(..*I67A! &j$ a%%'k!![[]NFFt+++$"6"6"<"<T"BBT112//55d;<  #d&:&:";;{{

%a/47 8WQv./M<<##C/
    e,d111##C(r   c                     |j                         | _        t        j                  d| j                  z  t        j                        | _        y )Nz</\s*%s\s*>)r   
cdata_elemrecompileIinteresting)r    elems     r   r   r     s/    **,::nt&FMr   T)+rg   __license____all__html.parserr   r   ImportErrorrT   	Exceptionsysr$   version_infomajorminorreleaserm   rn   ro   bs4.elementr   r   r   r   r   
bs4.dammitr   r   bs4.builderr   r   r   r   r   r   r   r   r   VERBOSElocatestarttagendr   r   r   r   r   r   r   <module>r      s   I   #*   ((!, ug A:E%1*EA #(A:#<%1*  %*aZ%>EQJ "  9  
N1j N1bQ1O Q1p 	A:%1*%="

	34 /@+"

 $ ZZ 1B--5nN .<*-;*#o &>*:c
    s   D DDD