
    ,hF                     R   d Z dgZddlZddlZddlmZmZmZmZ ddl	m
Z
mZ ddlZddlmZmZ ddl	mZmZmZmZ 	 ddlmZ d	Z G d de      Z G d dej8                        Z G d de      Z G d dej@                        Z! G d de!      Z"y# e$ rZdd
lmZ dZY dZ[^dZ[ww xY w)MITHTML5TreeBuilder    N)
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc                   @    e Zd ZdZdZeeeegZdZ		 d	dZ
d Zd Zd Zy)
r   zUse html5lib to build a tree.html5libTNc              #   X   K   || _         |rt        j                  d       |d d df y w)NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.F)user_specified_encodingwarningswarn)selfmarkupr   document_declared_encodingexclude_encodingss        W/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/bs4/builder/_html5lib.pyprepare_markupzHTML5TreeBuilder.prepare_markup4   s7      (?$
 MM  G  HtT5))s   (*c                 >   | j                   j                  t        j                  d       t	        j
                  | j                        }|| j                  _        t               }t        |t              s%t        r| j                  |d<   n| j                  |d<    |j                  |fi |}t        |t              rd |_        nF|j                   j"                  j$                  d   }t        |t              s|j&                  }||_        d | j                  _        y )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.)treeoverride_encodingencodingr   )soup
parse_onlyr   r   r   
HTMLParsercreate_treebuilderunderlying_builderparserdict
isinstancestrnew_html5libr   parseoriginal_encoding	tokenizerstreamcharEncodingname)r   r   r(   extra_kwargsdocr.   s         r   feedzHTML5TreeBuilder.feedA   s    99+MM  _  `$$$*A*AB)/&v&#&484P4P01+/+G+GZ(fll62\2 fc" %)C! & 0 0 7 7 D DQ G/5 %6$:$:!$5C!)-&    c                 h    t        || j                  | j                        | _        | j                  S )N)store_line_numbers)TreeBuilderForHtml5libr#   r8   r'   )r   namespaceHTMLElementss     r   r&   z#HTML5TreeBuilder.create_treebuilder]   s/    "8!499#66#
 &&&r6   c                     d|z  S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html> )r   fragments     r   test_fragment_to_documentz*HTML5TreeBuilder.test_fragment_to_documentd   s    :XEEr6   )NN)__name__
__module____qualname____doc__NAMEr   r   r   featuresTRACKS_LINE_NUMBERSr   r5   r&   r>   r<   r6   r   r   r   )   s<    'Dj&$/H  KO
*.8'Fr6   c                   Z     e Zd Z	 	 d fd	Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Z xZS )r9   c                     |r|| _         nddlm}  |	 dd|i|| _         t        t        |   |       d | _        || _        y )Nr   BeautifulSoupr8   ) html.parser)r#   bs4rI   superr9   __init__r(   r8   )r   r:   r#   r8   kwargsrI   	__class__s         r   rN   zTreeBuilderForHtml5lib.__init__k   sU    DI) &!6HDI 	$d45JK "4r6   c                 x    | j                   j                          t        | j                   | j                   d       S N)r#   resetElementr   s    r   documentClassz$TreeBuilderForHtml5lib.documentClass~   s'    		tyy$))T22r6   c                     |d   }|d   }|d   }t        j                  |||      }| j                  j                  |       y )Nr2   publicIdsystemId)r   for_name_and_idsr#   object_was_parsed)r   tokenr2   rX   rY   doctypes         r   insertDoctypez$TreeBuilderForHtml5lib.insertDoctype   sE    V}$$**48D		##G,r6   c                    i }| j                   rJ| j                  r>| j                   j                  j                  j	                         \  }}||d<   |dz
  |d<    | j
                  j                  ||fi |}t        || j
                  |      S )N
sourceline   	sourcepos)r(   r8   r/   r0   positionr#   new_tagrT   )r   r2   	namespacerO   r`   rb   tags          r   elementClassz#TreeBuilderForHtml5lib.elementClass   s    ;;422 %)KK$9$9$@$@$I$I$K!J	#-F< "+A+F;diii:6:sDIIy11r6   c                 @    t        t        |      | j                        S rR   )TextNoder   r#   )r   datas     r   commentClassz#TreeBuilderForHtml5lib.commentClass   s    tyy11r6   c                     ddl m}  |dd      | _        d| j                  _        t	        | j                  | j                  d       S )Nr   rH   rJ   rK   z[document_fragment])rL   rI   r#   r2   rT   )r   rI   s     r   fragmentClassz$TreeBuilderForHtml5lib.fragmentClass   s7    % ""m4	.		tyy$))T22r6   c                 N    | j                   j                  |j                         y rR   )r#   appendelementr   nodes     r   appendChildz"TreeBuilderForHtml5lib.appendChild   s    		&r6   c                     | j                   S rR   )r#   rU   s    r   getDocumentz"TreeBuilderForHtml5lib.getDocument   s    yyr6   c                 T    t         j                  j                  |       j                  S rR   )treebuilder_baseTreeBuildergetFragmentrp   rU   s    r   ry   z"TreeBuilderForHtml5lib.getFragment   s    ++77=EEEr6   c                     ddl m g t        j                  d      dfd	 |d       dj	                        S )Nr   rH   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c                    t        | 	      r	 t        | t              rǉ
j                  |       }|r|j                  d      }|j                  dkD  r_|j                  d      xs d}|j                  d      xs |j                  d      xs d}j                  dd|z  d|d	|d
|d	       y j                  dd|z  d|d       y j                  dd|z  d       y t        | t              rj                  dd|z  d| d       y t        | t              rj                  dd|z  d| d       y | j                  r#t        | j                     d| j                  }n| j                  }j                  dd|z  d|d       | j                  rg }t        | j                  j                               D ]k  \  }}t        |t              r"t        |j                     d|j                  }t        |t              rdj                  |      }|j                  ||f       m t!        |      D ]%  \  }}j                  dd|dz   z  |d|d       ' |dz  }| j"                  D ]  } ||        y )Nra      rJ         | z
<!DOCTYPE z "z" "z">>z<!DOCTYPE >z<!-- z -->"<z=")r*   r   matchgroup	lastindexro   r   r   re   r   r2   attrslistitemsr	   joinsortedchildren)rp   indentmr2   rX   rY   
attributesvaluechildrI   
doctype_rervserializeElements            r   r   z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement   s   '=1'7+$$W-771:D{{Q#$771:#3#$771:#A#Ar		#&<x#K L 		fd"KLII#,@AGW-		cFlGDEG_5		vw?@$$&.w/@/@&A&-ll4D #<<D		sV|T:;==!#J'+GMM,?,?,A'B 9e%d,?@.6t~~.F		#RD%eT2$'HHUOE"))4-89 (.j'9 Te		#!2DdE"RST!$-- 4E$UF34r6   
)r   )rL   rI   recompiler   )r   rp   rI   r   r   r   s     @@@@r   testSerializerz%TreeBuilderForHtml5lib.testSerializer   s?    %ZZ [\
(	4 (	4R 	!$yy}r6   )NT)r?   r@   rA   rN   rV   r^   rg   rk   rm   rs   ru   ry   r   __classcell__)rP   s   @r   r9   r9   i   s<    37$(5&3-223'F0r6   r9   c                   <    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
y	)
AttrListc                 Z    || _         t        | j                   j                        | _        y rR   )rp   r)   r   )r   rp   s     r   rN   zAttrList.__init__   s    $,,,,-
r6   c                 d    t        | j                  j                               j                         S rR   )r   r   r   __iter__rU   s    r   r   zAttrList.__iter__   s#    DJJ$$&'0022r6   c                    | j                   j                  }||d   v s3| j                   j                  |v r@||| j                   j                     v r%t        |t              st        j                  |      }|| j                   |<   y )N*)rp   cdata_list_attributesr2   r*   r   r
   findall)r   r2   r   	list_attrs       r   __setitem__zAttrList.__setitem__   sr     LL66	IcN"!!Y.Idll&7&788 eT*(007"Tr6   c                 H    t        | j                  j                               S rR   )r   r   r   rU   s    r   r   zAttrList.items   s    DJJ$$&''r6   c                 H    t        | j                  j                               S rR   r   r   keysrU   s    r   r   zAttrList.keys   s    DJJOO%&&r6   c                 ,    t        | j                        S rR   )lenr   rU   s    r   __len__zAttrList.__len__   s    4::r6   c                      | j                   |   S rR   )r   r   r2   s     r   __getitem__zAttrList.__getitem__   s    zz$r6   c                 L    |t        | j                  j                               v S rR   r   r   s     r   __contains__zAttrList.__contains__   s    tDJJOO-...r6   N)r?   r@   rA   rN   r   r   r   r   r   r   r   r<   r6   r   r   r      s*    .3#(' /r6   r   c                   r    e Zd Zd Zd Zd Zd Z eee      ZddZ	d Z
d Zd	 Zd
 Zd Zd Z ee      Zy)rT   c                     t         j                  j                  | |j                         || _        || _        || _        y rR   )rw   NoderN   r2   rp   r#   re   )r   rp   r#   re   s       r   rN   zElement.__init__   s1    &&tW\\:	"r6   c                    d x}}t        |t              r|x}}nYt        |t              r|}nF|j                  j                  t
        k(  r|j                  x}}| |_        n|j                  }| |_        t        |t              s&|j                  |j                  j                          || j                  j                  r| j                  j                  d   j                  t
        k(  rZ| j                  j                  d   }| j                  j                  ||z         }|j                  |       || j                  _        y t        |t              r| j                  j                  |      }| j                  j                  r| j                  j                  d      }n=| j                  j                  | j                  j                         }n| j                  }| j                  j                  || j                  |       y )NF)parentmost_recent_element)r*   r+   r   rp   rP   r   r   extractcontentsr#   
new_stringreplace_with_most_recent_element_last_descendantnext_elementr[   )r   rr   string_childr   old_elementnew_elementr   s          r   rs   zElement.appendChild  s   ##udC  $('L5c" E\\##6#'<</L5DKLLEDK%%%,,*BLL  "$)>)>%%b)33F ,,//3K))..{\/IJK$$[1-8DII*$$		,,T2
 ||$$&*ll&C&CE&J#**6
 '+ii&@&@&B#&*ll#II''dll$7 ( 9r6   c                 d    t        | j                  t              ri S t        | j                        S rR   )r*   rp   r   r   rU   s    r   getAttributeszElement.getAttributes9  s$    dllG,I%%r6   c                    |t        |      dkD  rg }t        |j                               D ]&  \  }}t        |t              st        | }||= |||<   ( | j                  j                  j                  | j                  |       t        |j                               D ]  \  }}|| j                  |<    | j                  j                  j                  | j                         y y y )Nr   )r   r   r   r*   tupler	   r#   builder$_replace_cdata_list_attribute_valuesr2   rp   set_up_substitutions)r   r   converted_attributesr2   r   new_names         r   setAttributeszElement.setAttributes>  s    !c*o&9#% #J$4$4$67 1edE*2D9H"4(+0Jx(	1 IIBB		:'#J$4$4$67 +e%*T"+ II224<<@% ':!r6   Nc                     t        | j                  j                  |      | j                        }|r| j                  ||       y | j	                  |       y rR   )ri   r#   r   insertBeforers   )r   rj   r   texts       r   
insertTextzElement.insertTextT  sB    		,,T2DII>dL1T"r6   c                    | j                   j                  |j                         }|j                   j                  t        k(  r| j                   j                  r| j                   j                  |dz
     j                  t        k(  rV| j                   j                  |dz
     }| j
                  j                  ||j                   z         }|j                  |       y | j                   j                  ||j                          | |_	        y )Nra   )
rp   indexrP   r   r   r#   r   r   insertr   )r   rr   refNoder   old_nodenew_strs         r   r   zElement.insertBefore[  s    ""7??3LL""o5$,,:O:O%%eAg.88OK||,,U1W5Hii**8dll+BCG!!'*LLt||4DKr6   c                 8    |j                   j                          y rR   )rp   r   rq   s     r   removeChildzElement.removeChildg  s    r6   c                 \   | j                   }|j                   }|j                  }|j                  dd      }t        |j                        dkD  r|j                  d   }|j
                  }nd}|j
                  }|j                  }t        |      dkD  rc|d   }	|||	_        n||	_        ||	_        ||	|_        n|	|_        ||	|_        |d   j                  dd      }
||
_        ||
|_        d|
_        |D ]$  }||_        |j                  j                  |       & g |_        ||_        y)z1Move all of this tag's children into another tag.Fr   r   NT)
rp   next_siblingr   r   r   r   previous_elementprevious_siblingr   ro   )r   
new_parentrp   new_parent_elementfinal_next_elementnew_parents_last_descendantnew_parents_last_child(new_parents_last_descendant_next_element	to_appendfirst_childlast_childs_last_descendantr   s               r   reparentChildrenzElement.reparentChildrenj  sd    ,,'// %11&8&I&I%QV&W#!**+a/ &8%@%@%D"7R7_7_4 &*"7I7V7V4$$	y>A $A,K*6/J,/A,+AK(*6;F+82="/%16A&3 +4B-*H*HPT*U'7_'47C Mh8I7;'4 	6E-EL''..u5	6
 1r6   c                    | j                   j                  | j                  j                  | j                        }t        || j                   | j                        }| j                  D ]  \  }}||j                  |<    |S rR   )r#   rd   rp   r2   re   rT   r   )r   rf   rr   keyr   s        r   	cloneNodezElement.cloneNode  sf    ii 1 14>>BsDIIt~~6 	)IC#(DOOC 	)r6   c                 .    | j                   j                  S rR   )rp   r   rU   s    r   
hasContentzElement.hasContent  s    ||$$$r6   c                 z    | j                   d k(  rt        d   | j                  fS | j                   | j                  fS )Nhtml)re   r   r2   rU   s    r   getNameTuplezElement.getNameTuple  s5    >>T!f%tyy00>>499,,r6   rR   )r?   r@   rA   rN   rs   r   r   propertyr   r   r   r   r   r   r   r   	nameTupler<   r6   r   rT   rT      sV    #49l&
A( -7J#
<2D%- &Ir6   rT   c                       e Zd Zd Zd Zy)ri   c                 `    t         j                  j                  | d        || _        || _        y rR   )rw   r   rN   rp   r#   )r   rp   r#   s      r   rN   zTextNode.__init__  s&    &&tT2	r6   c                     t         rR   )NotImplementedErrorrU   s    r   r   zTextNode.cloneNode  s    !!r6   N)r?   r@   rA   rN   r   r<   r6   r   ri   ri     s    
"r6   ri   )#__license____all__r   r   bs4.builderr   r   r   r   bs4.elementr	   r
   r   html5lib.constantsr   r   r   r   r   r   html5lib.treebuildersr   rw   r,   ImportErrorer   r   rx   r9   objectr   r   rT   ri   r<   r6   r   <module>r      s      	   ?L=F =F@s-99 sj/v /<@'## @'D"w "u  >Ls   B B&B!!B&