
    ,h!                         d Z dZddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZ ddlmZ eZdZ G d	 d
ej                        Z G d de      Z G d de      Z G d de      Zd Zy)zHelper classes for tests.MIT    N)TestCase)BeautifulSoup)CharsetMetaAttributeValueCommentContentMetaAttributeValueDoctypeSoupStrainerTag)HTMLParserTreeBuilderu-
  A bare string
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
<div><![CDATA[A CDATA section where it doesn't belong]]></div>
<div><svg><![CDATA[HTML5 does allow CDATA sections in SVG]]></svg></div>
<div>A <meta> tag</div>
<div>A <br> tag that supposedly has contents.</br></div>
<div>AT&T</div>
<div><textarea>Within a textarea, markup like <b> tags and <&<&amp; should be treated as literal</textarea></div>
<div><script>if (i < 2) { alert("<b>Markup within script tags should be treated as literal.</b>"); }</script></div>
<div>This numeric entity is missing the final semicolon: <x t="pi&#241ata"></div>
<div><a href="http://example.com/</a> that attribute value never got closed</div>
<div><a href="foo</a>, </a><a href="bar">that attribute value was closed by the subsequent tag</a></div>
<! This document starts with a bogus declaration ><div>a</div>
<div>This document contains <!an incomplete declaration <div>(do you see it?)</div>
<div>This document ends with <!an incomplete declaration
<div><a style={height:21px;}>That attribute value was bogus</a></div>
<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">The doctype is invalid because it contains extra whitespace
<div><table><td nowrap>That boolean attribute had no value</td></table></div>
<div>Here's a nonexistent entity: &#foo; (do you see it?)</div>
<div>This document ends before the entity finishes: &gt
<div><p>Paragraphs shouldn't contain block display elements, but this one does: <dl><dt>you see?</dt></p>
<b b="20" a="1" b="10" a="2" a="3" a="4">Multiple values for the same attribute.</b>
<div><table><tr><td>Here's a table</td></tr></table></div>
<div><table id="1"><tr><td>Here's a nested table:<table id="2"><tr><td>foo</td></tr></table></td></div>
<div>This tag contains nothing but whitespace: <b>    </b></div>
<div><blockquote><p><b>This p tag is cut off by</blockquote></p>the end of the blockquote tag</div>
<div><table><div>This table contains bare markup</div></table></div>
<div><div id="1">
 <a href="link1">This link is never closed.
</div>
<div id="2">
 <div id="3">
   <a href="link2">This link is closed.</a>
  </div>
</div></div>
<div>This document contains a <!DOCTYPE surprise>surprise doctype</div>
<div><a><B><Cd><EFG>Mixed case tags are folded to lowercase</efg></CD></b></A></div>
<div><our☃>Tag name contains Unicode characters</our☃></div>
<div><a ☃="snowman">Attribute name contains Unicode characters</a></div>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
c                   >    e Zd Zed        Zd Zd ZddZd Zd	dZ	y)
SoupTestc                     t         S N)default_builderselfs    M/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/bs4/testing.pyr   zSoupTest.default_builder@   s        c                 V    |j                  d| j                        }t        |fd|i|S )z*Build a Beautiful Soup object from markup.builder)popr   r   )r   markupkwargsr   s       r   soupzSoupTest.soupD   s-    **Y(<(<=V?W???r   c                 D     | j                   di |j                  |      S )z[Turn an HTML fragment into a document.

        The details depend on the builder.
         )r   test_fragment_to_document)r   r   r   s      r   document_forzSoupTest.document_forI   s%    
 $t##-f-GGOOr   Nc                     | j                   }t        ||      }||}| j                  |j                         | j	                  |             y )N)r   )r   r   assertEqualdecoder   )r   to_parsecompare_parsed_tor   objs        r   assertSoupEqualszSoupTest.assertSoupEqualsP   sG    &&Hg6$ (t'8'89J'KLr   c                     d}|j                   D ]>  }|r8| j                  ||j                         | j                  ||j                         |}@ y)zyEnsure that next_element and previous_element are properly
        set for all descendants of the given element.
        N)descendantsr!   next_elementprevious_element)r   elementearlieres       r   assertConnectednesszSoupTest.assertConnectednessX   sR     $$ 	A  G$8$89  !*<*<=G		r   c           	         d}|j                   |j                  "J dj                  ||j                  d             |j                  "J dj                  ||j                  d             |j                  "J dj                  ||j                  d             d}d}d}t        |j                        dz
  }|j                  D ]h  }d}|dk(  r|j                   |j                  |u s"J dj                  ||j                  |             |j                  |u s"J dj                  ||j                  |             |j                  IJ dj                  ||j                  d             |j                  |j                  |dz
     u s2J d	j                  ||j                  |j                  |dz
                  |j                  |dz
     j                  |u sBJ d
j                  |j                  |dz
     |j                  |dz
     j                  |             |u|j                  |u s7J dj                  ||j                  ||j                   j                               |j                  |u s"J dj                  ||j                  |             t        |t              rL|j                  r@| j                  |d      }|j                  "J d
j                  ||j                  d             ||}n|}||k(  r.|j                  "J d
j                  ||j                  d             |dz  }k ||n|}||}|s||}	 |0|j                  "J dj                  ||j                  d             	 y|j                  F|j                  |j                  u s,J dj                  ||j                  |j                               	 y|j                   }|S )z.Ensure proper linkage throughout the document.Nz3Bad previous_element
NODE: {}
PREV: {}
EXPECTED: {}z3Bad previous_sibling
NODE: {}
PREV: {}
EXPECTED: {}z/Bad next_sibling
NODE: {}
NEXT: {}
EXPECTED: {}r      z/Bad next_element
NODE: {}
NEXT: {}
EXPECTED: {}z2Bad previous_sibling
NODE: {}
PREV {}
EXPECTED: {}z1Bad previous_sibling
NODE: {}
PREV {}
EXPECTED {}z-Bad next_sibling
NODE: {}
NEXT {}
EXPECTED {}z=Bad previous_element
NODE: {}
PREV {}
EXPECTED {}
CONTENTS {}z-Bad next_element
NODE: {}
NEXT {}
EXPECTED {}T)parentr*   formatprevious_siblingnext_siblinglencontentsr)   
isinstancer   linkage_validator)	r   el_recursive_call
descendantidxchild
last_childlast_idxtargets	            r   r8   zSoupTest.linkage_validatorc   s8   
 99&&. HOO++T &&. HOO++T ??* DKK
 
r{{#a'[[ >	EJ ax99(??e3 KRR !11R7 OVV!5#9#92 !119 NUU!5#9#94 --S1W1EE JQQu55r{{377K {{37+88EA FMMC!G,bkk#'.B.O.OQV
 ) 11Z? [bb!5#9#9:u||G\G\ &22e; JQQ&
(?(?
 %%%..!33E4@
!..6 FMM"J$;$;T %'
"
 h))1 FMMu114 1HC}>	@ )4
%=E5#4F> --5 JQQ!5#5#5t   ((4 --1D1DD JQQ!5#5#5v7J7J     & Lr   r   )F)
__name__
__module____qualname__propertyr   r   r   r&   r.   r8   r   r   r   r   r   >   s1     @
PM	pr   r   c                   z   e Zd ZdZd Zd Zd Zd?dZd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d  Z"d! Z#d" Z$d# Z%d$ Z&d% Z'd& Z(d' Zd( Z)d) Z*d* Z+d+ Z,d, Z-d- Z.d. Z/d/ Z0d0 Z1d1 Z2d2 Z3d3 Z4d4 Z5d5 Z6d6 Z7d7 Z8d8 Z9d9 Z:d: Z;d; Z<d< Z=d= Z>y>)@HTMLTreeBuilderSmokeTestaC  A basic test of a treebuilder's competence.

    Any HTML treebuilder, present or future, should be able to pass
    these tests. With invalid markup, there's room for interpretation,
    and different parsers can handle it differently. But with the
    markup in these tests, there's not much room for interpretation.
    c                     dD ]@  }| j                  d      }|j                  |      }| j                  d|j                         B y)zmVerify that all HTML4 and HTML5 empty element (aka void element) tags
        are handled correctly.
        )areabasebrcolembedhrimginputkeygenlinkmenuitemmetaparamsourcetrackwbrspacerframe TN)r   new_tagr!   is_empty_element)r   namer   r[   s       r   test_empty_element_tagsz0HTMLTreeBuilderSmokeTest.test_empty_element_tags   sF    
 	=D 99R=Dll4(GT7#;#;<	=r   c                    | j                  d      }t        j                  |d      }t        j                  |      }| j	                  |j
                  t               | j	                  |j                         |j                                y Nz<a><b>foo</a>   r   pickledumpsloadsr!   	__class__r   r"   r   treedumpedloadeds       r   !test_pickle_and_unpickle_identityz:HTMLTreeBuilderSmokeTest.test_pickle_and_unpickle_identity   b     yy)dA&f%))=9$++-8r   c                 Z   | j                  |      \  }}|j                  d   }| j                  |j                  t               | j                  ||       | j                  |j                  d      dt        |       |       | j                  |j                  j                  d   d       y)z8Assert that a given doctype string is handled correctly.r   utf8Nfoo_document_with_doctyper6   r!   rf   r	   encoder5   pr   doctype_fragmentdoctype_strr   doctypes        r   assertDoctypeHandledz-HTMLTreeBuilderSmokeTest.assertDoctypeHandled   s     778HIT --"**G4"23KK 1[!12	
 	+U3r   c                 f    d|d|d}|dz   }| j                  |      }|j                  d      |fS )z5Generate and parse a document with the given doctype.z<! >z
<p>foo</p>rn   )r   rr   )r   ru   doctype_stringrw   r   r   s         r   rq   z/HTMLTreeBuilderSmokeTest._document_with_doctype  s9     .0@A>)yy ~~f%t++r   c                 H    | j                  d       | j                  d       y)z?Make sure normal, everyday HTML doctypes are handled correctly.htmlz4html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"Nrx   r   s    r   test_normal_doctypesz-HTMLTreeBuilderSmokeTest.test_normal_doctypes  s"    !!&)!!B	Dr   c                     | j                  d      }|j                  d   }| j                  d|j                                y )Nz
<!DOCTYPE>r   rZ   )r   r6   r!   strip)r   r   rw   s      r   test_empty_doctypez+HTMLTreeBuilderSmokeTest.test_empty_doctype  s4    yy&--"W]]_-r   c                 j   dD ]  }| j                  d|      \  }}|j                  d   }| j                  |j                  t               | j                  |d       | j                  |j                  d      d t        |       d       | j                  |j                  j                  d   d        y )N)rw   DocTyper~   r   rn   s   <!DOCTYPE html>ro   rp   rt   s        r   test_mixed_case_doctypez0HTMLTreeBuilderSmokeTest.test_mixed_case_doctype  s     6 	8 $ ; ;(!K mmA&GW..8Wf-F#$5S%56" TVV__Q/7#	8r   c                 *    d}| j                  |       y )Nznhtml PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"r   )r   rw   s     r   test_public_doctype_with_urlz5HTMLTreeBuilderSmokeTest.test_public_doctype_with_url-  s     C!!'*r   c                 &    | j                  d       y )Nz$foo SYSTEM "http://www.example.com/"r   r   s    r   test_system_doctypez,HTMLTreeBuilderSmokeTest.test_system_doctype1  s    !!"HIr   c                 &    | j                  d       y )Nz#xsl:stylesheet SYSTEM "htmlent.dtd"r   r   s    r   test_namespaced_system_doctypez7HTMLTreeBuilderSmokeTest.test_namespaced_system_doctype4      !!"GHr   c                 &    | j                  d       y )Nz#xsl:stylesheet PUBLIC "htmlent.dtd"r   r   s    r   test_namespaced_public_doctypez7HTMLTreeBuilderSmokeTest.test_namespaced_public_doctype8  r   r   c                     d}| j                  |      }| j                  |j                  d      j                  dd      |j                  dd             y)zJA real XHTML document should come out more or less the same as it went in.   <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>utf-8   
r   N)r   r!   rr   replacer   r   r   s      r   test_real_xhtml_documentz1HTMLTreeBuilderSmokeTest.test_real_xhtml_document<  sN     yy KK ((4NN5#&	(r   c                 ~    d}| j                  |      }| j                  dt        |j                  d                   y)ztWhen a namespaced XML document is parsed as HTML it should
        be treated as HTML with weird tag names.
        s.   <ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>ra   zns1:fooNr   r!   r5   find_allr   s      r   test_namespaced_htmlz-HTMLTreeBuilderSmokeTest.test_namespaced_htmlI  s6     Gyy Ci 89:r   c                     d}| j                  |      }| j                  ||j                                d}| j                  |      }| j                  ||j                  d             y )Nz<?PITarget PIContent?>s   <?PITarget PIContent?>rn   )r   r!   r"   rr   r   s      r   test_processing_instructionz4HTMLTreeBuilderSmokeTest.test_processing_instructionQ  sY    
 .yy /.yy V!45r   c                 B    t        j                  | j                         y)zMake sure you can copy the tree builder.

        This is important because the builder is part of a
        BeautifulSoup object, and we want to be able to copy that.
        N)copydeepcopyr   r   s    r   test_deepcopyz&HTMLTreeBuilderSmokeTest.test_deepcopy^  s     	d**+r   c                     | j                  d      }| j                  |j                  j                         | j	                  t        |j                        d       y)zA <p> tag is never designated as an empty-element tag.

        Even if the markup shows it as an empty-element tag, it
        shouldn't be presented that way.
        <p/><p></p>N)r   assertFalsers   r\   r!   strr   r   s     r   !test_p_tag_is_never_empty_elementz:HTMLTreeBuilderSmokeTest.test_p_tag_is_never_empty_elementf  sB     yy 001TVVi0r   c                 p    | j                  dd       | j                  dd       | j                  dd       y)zA tag that's not closed by the end of the document should be closed.

        This applies to all tags except empty-element tags.
        <p>r   z<b>z<b></b>z<br><br/>Nr&   r   s    r   test_unclosed_tags_get_closedz6HTMLTreeBuilderSmokeTest.test_unclosed_tags_get_closedp  s4    
 	eY/eY/fg.r   c                     | j                  d      }| j                  |j                  j                         | j	                  t        |j                        d       y)zA <br> tag is designated as an empty-element tag.

        Some parsers treat <br></br> as one <br/> tag, some parsers as
        two tags, but it should always be an empty-element tag.
        z	<br></br>r   N)r   
assertTruerJ   r\   r!   r   r   s     r   #test_br_is_always_empty_element_tagz<HTMLTreeBuilderSmokeTest.test_br_is_always_empty_element_tagz  s@     yy%001TWWw/r   c                 &    | j                  d       y )Nz<em><em></em></em>r   r   s    r   test_nested_formatting_elementsz8HTMLTreeBuilderSmokeTest.test_nested_formatting_elements  s    23r   c                 r    d}| j                  |      }| j                  d|j                  d      d          y )Nz<!DOCTYPE html>
<html>
<head>
<title>Ordinary HEAD element test</title>
</head>
<script type="text/javascript">
alert("Help!");
</script>
<body>
Hello, world!
</body>
</html>
ztext/javascriptscripttype)r   r!   find)r   r~   r   s      r   test_double_headz)HTMLTreeBuilderSmokeTest.test_double_head  s7     yy*DIIh,?,GHr   c                 h   d}| j                  |       | j                  |      }|j                  d      }| j                  |j                  t
               |j                  d      }| j                  ||j                         |j                  d      }| j                  ||j                         y )Nz<p>foo<!--foobar-->baz</p>foobar)textro   baz)r&   r   r   r!   rf   r   r)   r*   )r   r   r   commentro   r   s         r   test_commentz%HTMLTreeBuilderSmokeTest.test_comment  s    -f%yy )))***G4 iiUi##"2"23iiUi##"6"67r   c                    d}d}| j                  |       | j                  |       | j                  |      }| j                  |j                  j	                         |       | j                  |      }| j                  |j
                  j	                         |       | j                  d      }| j                  |j
                  j	                         d       y)zWhitespace must be preserved in <pre> and <textarea> tags,
        even if that would mean not prettifying the markup.
        z<pre>   </pre>z<textarea> woo
woo  </textarea>z<textarea></textarea>N)r&   r   r!   preprettifytextarea)r   
pre_markuptextarea_markupr   s       r   -test_preserved_whitespace_in_pre_and_textareazFHTMLTreeBuilderSmokeTest.test_preserved_whitespace_in_pre_and_textarea  s     &
<j)o.yy$**,j9yy)//1?Cyy01//13JKr   c                 v    d}| j                  |       d}| j                  |       d}| j                  |       y)z+Inline elements can be nested indefinitely.z<b>Inside a B tag</b>z!<p>A <i>nested <b>tag</b></i></p>z/<p>A <a>doubly <i>nested <b>tag</b></i></a></p>Nr   )r   b_tagnested_b_tagdouble_nested_b_tags       r   test_nested_inline_elementsz4HTMLTreeBuilderSmokeTest.test_nested_inline_elements  s<    'e$:l+Ol+r   c                     | j                  d      }|j                  }| j                  |j                  j                  j
                  d       | j                  |j                  j
                  d       y)zBlock elements can be nested.z*<blockquote><p><b>Foo</b></p></blockquote>FooN)r   
blockquoter!   rs   bstring)r   r   r   s      r    test_nested_block_level_elementsz9HTMLTreeBuilderSmokeTest.test_nested_block_level_elements  sR    yyEF__
..6,,e4r   c                 N    d}| j                  |d       | j                  d       y)z$One table can go inside another one.z[<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td>zh<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td></tr></table>z{<table><thead><tr><td>Foo</td></tr></thead><tbody><tr><td>Bar</td></tr></tbody><tfoot><tr><td>Baz</td></tr></tfoot></table>Nr   )r   r   s     r   test_correctly_nested_tablesz5HTMLTreeBuilderSmokeTest.test_correctly_nested_tables  s6    " 	!	" 	;	<r   c                     d}| j                  |      }| j                  ddg|j                  d          | j                  |j                  |j                  dd             y )Nz<div class=" foo bar	 "></a>ro   barclassdivzfoo bar)class_)r   r!   r   r   r   s      r   *test_multivalued_attribute_with_whitespacezCHTMLTreeBuilderSmokeTest.test_multivalued_attribute_with_whitespace  sW     0yy %'):; 	499U99#EFr   c                 ~    d}| j                  |      }| j                  dg|j                  j                  d          y )Nz1<table><div><div class="css"></div></div></table>cssr   )r   r!   r   r   s      r   (test_deeply_nested_multivalued_attributezAHTMLTreeBuilderSmokeTest.test_deeply_nested_multivalued_attribute  s7     Eyy %$((,,w"78r   c                 l    d}| j                  |      }| j                  ddg|j                  d          y )Nz<html class="a b"></html>ar   r   )r   r!   r~   r   s      r   "test_multivalued_attribute_on_htmlz;HTMLTreeBuilderSmokeTest.test_multivalued_attribute_on_html  s4     -yy #sTYYw%78r   c                 (    | j                  dd       y )Nz<a b="<a>"></a>z<a b="&lt;a&gt;"></a>r   r   s    r   3test_angle_brackets_in_attribute_values_are_escapedzLHTMLTreeBuilderSmokeTest.test_angle_brackets_in_attribute_values_are_escaped  s    /1HIr   c                 (    | j                  dd       y )Nz$<p>&bull; AT&T is in the s&p 500</p>u)   <p>• AT&amp;T is in the s&amp;p 500</p>r   r   s    r   3test_strings_resembling_character_entity_referenceszLHTMLTreeBuilderSmokeTest.test_strings_resembling_character_entity_references  s     	2:	
r   c                 (    | j                  dd       y )Nz<p>Bob&apos;s Bar</p>z<p>Bob's Bar</p>r   r   s    r   test_apos_entityz)HTMLTreeBuilderSmokeTest.test_apos_entity  s    #	
r   c                 v    d}| j                  |      }| j                  d|j                  j                         y )Nz%<p>&#147;Hello&#148; &#45;&#9731;</p>u   “Hello” -☃r   r!   rs   r   r   s      r   *test_entities_in_foreign_document_encodingzCHTMLTreeBuilderSmokeTest.test_entities_in_foreign_document_encoding  s0     9yy +TVV]];r   c                     d}| j                  d|       | j                  d|       | j                  d|       | j                  d|       y )Nu   <p id="piñata"></p>z<p id="pi&#241;ata"></p>z<p id="pi&#xf1;ata"></p>z<p id="pi&#Xf1;ata"></p>z<p id="pi&ntilde;ata"></p>r   r   expects     r   0test_entities_in_attributes_converted_to_unicodezIHTMLTreeBuilderSmokeTest.test_entities_in_attributes_converted_to_unicode  sK    H8&A8&A8&A:FCr   c                     d}| j                  d|       | j                  d|       | j                  d|       | j                  d|       y )Nu   <p>piñata</p>z<p>pi&#241;ata</p>z<p>pi&#xf1;ata</p>z<p>pi&#Xf1;ata</p>z<p>pi&ntilde;ata</p>r   r   s     r   *test_entities_in_text_converted_to_unicodezCHTMLTreeBuilderSmokeTest.test_entities_in_text_converted_to_unicode  sK    B2F;2F;2F;4f=r   c                 (    | j                  dd       y )Nz#<p>I said &quot;good day!&quot;</p>z<p>I said "good day!"</p>r   r   s    r   ,test_quot_entity_converted_to_quotation_markzEHTMLTreeBuilderSmokeTest.test_quot_entity_converted_to_quotation_mark&  s    C9	;r   c                 t    d}| j                  d|       | j                  d|       | j                  d|       y )Nu   �z&#10000000000000;z&#x10000000000000;z&#1000000000;r   r   s     r   test_out_of_range_entityz1HTMLTreeBuilderSmokeTest.test_out_of_range_entity*  s9    ,16:2F;ov6r   c                    | j                  d      }| j                  d|j                  j                  j                  j
                         | j                  d|j                  j
                         | j                  |       y)zDMostly to prevent a recurrence of a bug in the html5lib treebuilder.z!<html><h2>
foo</h2><p></p></html>rs   N)r   r!   h2r   r)   r]   rs   r.   r   s     r   test_multipart_stringsz/HTMLTreeBuilderSmokeTest.test_multipart_strings0  s[    yy=>dggnn99>>?dffkk*  &r   c                 L    | j                  dd       | j                  dd       y)zqVerify consistent handling of empty-element tags,
        no matter how they come in through the markup.
        z<br/><br/><br/>z<br /><br /><br />Nr   r   s    r   r^   z0HTMLTreeBuilderSmokeTest.test_empty_element_tags7  s(     	/1BC24EFr   c                     d}| j                  |      }| j                  d|j                  j                         | j	                  |       y)8Prevent recurrence of a bug in the html5lib treebuilder.z?<html><head></head>
  <link></link>
  <body>foo</body>
</html>
N)r   assertNotEqualr~   bodyr.   r   contentr   s      r   #test_head_tag_between_head_and_bodyz<HTMLTreeBuilderSmokeTest.test_head_tag_between_head_and_body>  s?    
 yy!D$))..1  &r   c                 `    d}| j                  |      }| j                  |j                         y)r   z<!DOCTYPE html>
<html>
 <body>
   <article id="a" >
   <div><a href="1"></div>
   <footer>
     <a href="2"></a>
   </footer>
  </article>
  </body>
</html>
N)r   r.   articler   s      r   test_multiple_copies_of_a_tagz6HTMLTreeBuilderSmokeTest.test_multiple_copies_of_a_tagI  s+     yy!  .r   c                 <   d}| j                  |      }| j                  ||j                                |j                  }| j                  d|j                  d          | j                  d|j                  d          | j                  d|j                  d          y)	zParsers don't need to *understand* namespaces, but at the
        very least they should not choke on namespaces or lose
        data.s   <html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>http://www.w3.org/1999/xhtmlxmlns"http://www.w3.org/1998/Math/MathMLzxmlns:mathmlhttp://www.w3.org/2000/svgz	xmlns:svgN)r   r!   rr   r~   )r   r   r   r~   s       r   test_basic_namespacesz.HTMLTreeBuilderSmokeTest.test_basic_namespacesZ  s    
 nyy /yy779KL0$))N2K	M($))K*@	Br   c                 l    d}| j                  |      }| j                  ddg|j                  d          y )Ns   <a class="foo bar">ro   r   r   )r   r!   r   r   s      r   -test_multivalued_attribute_value_becomes_listzFHTMLTreeBuilderSmokeTest.test_multivalued_attribute_value_becomes_listi  s1    'yy %9r   c                 v    d}| j                  |      }| j                  d|j                  j                         y )NuD   <html><head><meta encoding="euc-jp"></head><body>Sacré bleu!</body>   Sacré bleu!)r   r!   r   r   r   s      r   test_can_parse_unicode_documentz8HTMLTreeBuilderSmokeTest.test_can_parse_unicode_documentu  s3     yyy )499+;+;<r   c                     t        d      }| j                  d|      }| j                  |j                         d       y)z2Parsers should be able to work with SoupStrainers.r   z&A <b>bold</b> <meta/> <i>statement</i>)
parse_onlyz<b>bold</b>N)r
   r   r!   r"   )r   strainerr   s      r   test_soupstrainerz*HTMLTreeBuilderSmokeTest.test_soupstrainer}  s;    $yyA$,  .6r   c                 (    | j                  dd       y )Nz<foo attr='bar'></foo>z<foo attr="bar"></foo>r   r   s    r   7test_single_quote_attribute_values_become_double_quoteszPHTMLTreeBuilderSmokeTest.test_single_quote_attribute_values_become_double_quotes  s    66	8r   c                 *    d}| j                  |       y )N'<foo attr='bar "brawls" happen'>a</foo>r   )r   r   s     r   7test_attribute_values_with_nested_quotes_are_left_alonezPHTMLTreeBuilderSmokeTest.test_attribute_values_with_nested_quotes_are_left_alone  s    <d#r   c                     d}| j                  |      }d|j                  d<   | j                  |j                  j                         d       y )Nr  zBrawls happen at "Bob's Bar"attrz:<foo attr="Brawls happen at &quot;Bob's Bar&quot;">a</foo>)r   ro   r&   r"   )r   r   r   s      r   :test_attribute_values_with_double_nested_quotes_get_quotedzSHTMLTreeBuilderSmokeTest.test_attribute_values_with_double_nested_quotes_get_quoted  sB    <yy:HHOOM	Or   c                 L    | j                  dd       | j                  dd       y )Nz+<this is="really messed up & stuff"></this>z/<this is="really messed up &amp; stuff"></this>z.<a href="http://example.org?a=1&b=2;3">foo</a>z2<a href="http://example.org?a=1&amp;b=2;3">foo</a>r   r   s    r   .test_ampersand_in_attribute_value_gets_escapedzGHTMLTreeBuilderSmokeTest.test_ampersand_in_attribute_value_gets_escaped  s.    KO	Q 	<@	Br   c                 &    | j                  d       y )Nz/<a href="http://example.org?a=1&amp;b=2;3"></a>r   r   s    r   7test_escaped_ampersand_in_attribute_value_is_left_alonezPHTMLTreeBuilderSmokeTest.test_escaped_ampersand_in_attribute_value_is_left_alone  s    OPr   c                 0    d}d}| j                  ||       y )N-<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>#   <p>&lt;&lt;sacré bleu!&gt;&gt;</p>r   )r   r   expecteds      r   1test_entities_in_strings_converted_during_parsingzJHTMLTreeBuilderSmokeTest.test_entities_in_strings_converted_during_parsing  s     ?YdH-r   c                 v    d}| j                  |      }| j                  |j                  j                  d       y )Ns   <p>Foo</p>u	   ‘Foo’r   )r   quoter   s      r   )test_smart_quotes_converted_on_the_way_inzBHTMLTreeBuilderSmokeTest.test_smart_quotes_converted_on_the_way_in  s4     &yyFFMMN	Pr   c                 r    | j                  d      }| j                  |j                  j                  d       y )Nz<a>&nbsp;&nbsp;</a>u     )r   r!   r   r   r   s     r   0test_non_breaking_spaces_converted_on_the_way_inzIHTMLTreeBuilderSmokeTest.test_non_breaking_spaces_converted_on_the_way_in  s*    yy./(@Ar   c                     d}dj                  d      }| j                  |      }| j                  |j                  j                  d      |       y )Nr  r  r   )rr   r   r!   rs   )r   r   r  r   s       r   &test_entities_converted_on_the_way_outz?HTMLTreeBuilderSmokeTest.test_entities_converted_on_the_way_out  sB    >Y``ahiyyw/:r   c                     d}|j                  d      }| j                  |      }|j                  d      }|j                  dd      }|j                  d      }| j                  ||       y )Nu   <html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacré bleu!</p></body></html>z
iso-8859-1r   zISO-Latin-1)rr   r   r   r!   )r   unicode_htmliso_latin_htmlr   resultr  s         r   test_real_iso_latin_documentz5HTMLTreeBuilderSmokeTest.test_real_iso_latin_document  sp    
  &,,\: yy(W%
  ''w? ??7+ 	*r   c                    d}|j                  d      }| j                  |      }| j                  |j                  d      |j                  d             | j                  |j                  d      |j                  d             y )Nsk   <html><head></head><body><pre>Shift-JISŃR[fBOꂽ{̃t@CłB</pre></body></html>z	shift-jisr   euc_jp)r"   r   r!   rr   )r   shift_jis_htmlr&  r   s       r   test_real_shift_jis_documentz5HTMLTreeBuilderSmokeTest.test_real_shift_jis_document  sv    $ 	 &,,[9yy& 	W-|/B/B7/KLX.0C0CH0MNr   c                     d}| j                  |d      }|j                  dv sJ | j                  |j                  d      |j	                  d      j                  d             y )Ns   <html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1></body></html>	iso8859-8)from_encoding)r/  z
iso-8859-8r   )r   original_encodingr!   rr   r"   )r   hebrew_documentr   s      r   test_real_hebrew_documentz2HTMLTreeBuilderSmokeTest.test_real_hebrew_document  sj     Eyy;  8 %%)DDDKK "";/66w?	Ar   c                 
   d}d|z  }| j                  |      }|j                  dddi      }|d   }| j                  d|       | j                  t	        |t
                     | j                  d|j                  d	             y )
NzE<meta content="text/html; charset=x-sjis" http-equiv="Content-type"/>j<html><head>
%s
<meta http-equiv="Content-language" content="ja"/></head><body>Shift-JIS markup goes here.rS   z
http-equivzContent-typer   ztext/html; charset=x-sjisztext/html; charset=utf8rn   )r   r   r!   r   r7   r   rr   )r   meta_tagr,  r   parsed_metar   s         r   'test_meta_tag_reflects_current_encodingz@HTMLTreeBuilderSmokeTest.test_meta_tag_reflects_current_encoding  s    2
7:BC yy( ii~(FGi(4g> 	
7,EFG 	2GNN64JKr   c                    d}d|z  }| j                  |      }|j                  dd      }|d   }| j                  d|       | j                  t	        |t
                     | j                  d|j                  d             y )	Nz'<meta id="encoding" charset="x-sjis" />r5  rS   encoding)idcharsetzx-sjisrn   )r   r   r!   r   r7   r   rr   )r   r6  r,  r   r7  r<  s         r   3test_html5_style_meta_tag_reflects_current_encodingzLHTMLTreeBuilderSmokeTest.test_html5_style_meta_tag_reflects_current_encoding  s     >7:BC yy( ii:i6i(7+ 	
7,EFG 	!78r   c                     | j                  d      }d|j                  d<   | j                  d|j                  j                                y )Nz<a>text</a>r   ro   z<a foo="bar">text</a>)r   r   r!   r"   )r   datas     r   5test_tag_with_no_attributes_can_have_attributes_addedzNHTMLTreeBuilderSmokeTest.test_tag_with_no_attributes_can_have_attributes_added&  s8    yy'u0$&&--/Br   c                 P    | j                  t              }| j                  |       yz3Test the worst case (currently) for linking issues.Nr   BAD_DOCUMENTr8   r   s     r   test_worst_casez(HTMLTreeBuilderSmokeTest.test_worst_case+        yy&t$r   N)DOCTYPE)?rA   rB   rC   __doc__r^   rk   rx   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r  r   r"  r$  r)  r-  r3  r8  r=  r@  rE  r   r   r   rF   rF      sG   
=94",D.
8*+JII(;6,1/04I"8L$	,5<(
G99J


<D>;7'G	'/"B:=78$OBQ.PB;+6O"AL890C
%r   rF   c                   ~    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)XMLTreeBuilderSmokeTestc                    | j                  d      }t        j                  |d      }t        j                  |      }| j	                  |j
                  t               | j	                  |j                         |j                                y r`   rb   rg   s       r   rk   z9XMLTreeBuilderSmokeTest.test_pickle_and_unpickle_identity4  rl   r   c                 f    | j                  d      }| j                  |j                         d       y )N<root/>s.   <?xml version="1.0" encoding="utf-8"?>
<root/>r   r!   rr   r   s     r   test_docstring_generatedz0XMLTreeBuilderSmokeTest.test_docstring_generated=  s+    yy#KKMM	Or   c                 l    d}| j                  |      }| j                  ||j                  d             y )Ns,   <?xml version="1.0" encoding="utf8"?>
<foo/>rn   rN  r   s      r   test_xml_declarationz,XMLTreeBuilderSmokeTest.test_xml_declarationB  s.    Eyy V!45r   c                 l    d}| j                  |      }| j                  ||j                  d             y )Ns<   <?xml version="1.0" encoding="utf8"?>
<?PITarget PIContent?>rn   rN  r   s      r   r   z3XMLTreeBuilderSmokeTest.test_processing_instructionG  s.    Uyy V!45r   c                 l    d}| j                  |      }| j                  |j                  d      |       y)zGA real XHTML document should come out *exactly* the same as it went in.r   r   NrN  r   s      r   r   z0XMLTreeBuilderSmokeTest.test_real_xhtml_documentL  s5     yy KK &	*r   c                 j    d}| j                  |      }| j                  ||j                                y )Ns  <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<parent xmlns="http://ns1/">
<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
<grandchild ns3:attr="value" xmlns="http://ns4/"/>
</child>
</parent>rN  r   docr   s      r   test_nested_namespacesz.XMLTreeBuilderSmokeTest.test_nested_namespacesX  s.     yy~dkkm,r   c                     d}t        |d      }d|j                  _        |j                         }| j	                  d|v        y )Nz/
  <script type="text/javascript">
  </script>
zlxml-xmlzconsole.log("< < hey > > ");s   &lt; &lt; hey &gt; &gt;)r   r   r   rr   r   )r   rV  r   encodeds       r   5test_formatter_processes_script_tag_for_xml_documentszMXMLTreeBuilderSmokeTest.test_formatter_processes_script_tag_for_xml_documentsc  sB     S*- <++-2g=>r   c                 v    d}| j                  |      }| j                  d|j                  j                         y )Nu?   <?xml version="1.0" encoding="euc-jp"><root>Sacré bleu!</root>r  )r   r!   rootr   r   s      r   r  z7XMLTreeBuilderSmokeTest.test_can_parse_unicode_documento  s0    syy )499+;+;<r   c                 t    d}| j                  |      }| j                  t        |j                        |       y )Nz<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>)r   r!   r   rssr   s      r   test_popping_namespaced_tagz3XMLTreeBuilderSmokeTest.test_popping_namespaced_tagt  s3     Wyy M6	#r   c                 h    | j                  d      }| j                  |j                  d      d       y )NrM  latin1s/   <?xml version="1.0" encoding="latin1"?>
<root/>rN  r   s     r   (test_docstring_includes_correct_encodingz@XMLTreeBuilderSmokeTest.test_docstring_includes_correct_encodingz  s.    yy#KK!?	Ar   c                 l    d}| j                  |      }| j                  |j                  d      |       y)z<A large XML document should come out the same as it went in.s4  <?xml version="1.0" encoding="utf-8"?>
<root>0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000</root>r   NrN  r   s      r   test_large_xml_documentz/XMLTreeBuilderSmokeTest.test_large_xml_document  s2     yy W-v6r   c                 J    | j                  dd       | j                  d       y )Nr   r   z
<p>foo</p>r   r   s    r   9test_tags_are_empty_element_if_and_only_if_they_are_emptyzQXMLTreeBuilderSmokeTest.test_tags_are_empty_element_if_and_only_if_they_are_empty  s     eV,l+r   c                     d}| j                  |      }|j                  }| j                  d|d          | j                  d|d          y )Nz<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>zhttp://example.com/zxmlns:azhttp://example.net/zxmlns:b)r   r\  r!   )r   r   r   r\  s       r   test_namespaces_are_preservedz5XMLTreeBuilderSmokeTest.test_namespaces_are_preserved  sK     wyy yy.Y@.Y@r   c                 t    d}| j                  |      }| j                  t        |j                        |       y )NzN<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>)r   r!   r   rs   r   s      r   test_closing_namespaced_tagz3XMLTreeBuilderSmokeTest.test_closing_namespaced_tag  s-    ayy TVVf-r   c                 t    d}| j                  |      }| j                  t        |j                        |       y )Nzs<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>r   r!   r   ro   r   s      r   test_namespaced_attributesz2XMLTreeBuilderSmokeTest.test_namespaced_attributes  s0     Gyy TXX/r   c                 t    d}| j                  |      }| j                  t        |j                        |       y )Nz<foo xml:lang="fr">bar</foo>rl  r   s      r   (test_namespaced_attributes_xml_namespacez@XMLTreeBuilderSmokeTest.test_namespaced_attributes_xml_namespace  s-    /yy TXX/r   c           	         d}| j                  |      }| j                  dt        |j                  d                   | j                  dt        |j                  d                   | j                  dt        |j                  d                   | j                  dt        |j                  dd	                   | j                  dt        |j                  ddg                   y )
Na  <?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
    xmlns:ns1="http://example.com/ns1"
    xmlns:ns2="http://example.com/ns2"
    <ns1:tag>foo</ns1:tag>
    <ns1:tag>bar</ns1:tag>
    <ns2:tag key="value">baz</ns2:tag>
</Document>
   tagra   zns1:tagr0   zns2:tagvalue)keyr   rU  s      r   test_find_by_prefixed_namez2XMLTreeBuilderSmokeTest.test_find_by_prefixed_name  s     yy~ 	Ce 456 	Ci 89:Ci 89:CiW EFGCy).D EFGr   c                     d}| j                  |      }|j                  }t        j                  |      }| j                  |j                  |j                         y )Nzf<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://example.com/ns0"/>)r   documentr   r!   prefix)r   xmlr   rr  	duplicates        r   !test_copy_tag_preserves_namespacez9XMLTreeBuilderSmokeTest.test_copy_tag_preserves_namespace  sJ    2 yy~mmIIcN	 	Y%5%56r   c                 P    | j                  t              }| j                  |       yrB  rC  r   s     r   rE  z'XMLTreeBuilderSmokeTest.test_worst_case  rF  r   N)rA   rB   rC   rk   rO  rQ  r   r   rW  rZ  r  r_  rb  rd  rf  rh  rj  rm  ro  ru  r{  rE  r   r   r   rJ  rJ  2  se    9O
6
6

*	-
?=
#A7,A.
0
0
H,	7%r   rJ  c                   .    e Zd ZdZd Zd Zd Zd Zd Zy)HTML5TreeBuilderSmokeTestz2Smoke test for a tree builder that supports HTML5.c                      y r   r   r   s    r   r   z2HTML5TreeBuilderSmokeTest.test_real_xhtml_document  s     	r   c                 v    d}| j                  |      }| j                  d|j                  j                         y )Nz<a>r   )r   r!   r   	namespacer   s      r   test_html_tags_have_namespacez7HTML5TreeBuilderSmokeTest.test_html_tags_have_namespace  s0    yy 79I9IJr   c                     d}| j                  |      }d}| j                  ||j                  j                         | j                  ||j                  j                         y )Nz<svg><circle/></svg>r  )r   r!   svgr  circler   r   r   r  s       r   test_svg_tags_have_namespacez6HTML5TreeBuilderSmokeTest.test_svg_tags_have_namespace  sN    'yy 0	DHH$6$67DKK$9$9:r   c                     d}| j                  |      }d}| j                  ||j                  j                         | j                  ||j                  j                         y )Nz<math><msqrt>5</msqrt></math>r  )r   r!   mathr  msqrtr  s       r   test_mathml_tags_have_namespacez9HTML5TreeBuilderSmokeTest.test_mathml_tags_have_namespace  sN    0yy 8	DII$7$78DJJ$8$89r   c                 &   d}| j                  |      }| j                  t        |j                  d   t                     | j                  |j                  d   d       | j                  d|j                  d   j                  j                         y )Nz3<?xml version="1.0" encoding="utf-8"?><html></html>r   z$?xml version="1.0" encoding="utf-8"?r~   )r   r   r7   r6   r   r!   r)   r]   r   s      r   $test_xml_declaration_becomes_commentz>HTML5TreeBuilderSmokeTest.test_xml_declaration_becomes_comment  sp    Fyy 
4==#3W=>q)+QRq!1!>!>!C!CDr   N)	rA   rB   rC   rH  r   r  r  r  r  r   r   r   r~  r~    s     <
K
;:Er   r~  c                      d  fd}|S )Nc                      y r   r   )testargsr   s      r   nothingzskipIf.<locals>.nothing  s    r   c                     rS | S r   r   )	test_item	conditionr  s    r   	decoratorzskipIf.<locals>.decorator  s    
>r   r   )r  reasonr  r  s   `  @r   skipIfr    s     r   )rH  __license__rc   r   	functoolsunittestr   bs4r   bs4.elementr   r   r   r	   r
   r   bs4.builderr   r   rD  r   objectrF   rJ  r~  r  r   r   r   <module>r     s             .'"JUx   UpY	%v Y	%xV%f V%r!E 8 !EF
r   