
    ,hj0                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZ ddlmZmZ dd	lmZmZmZ  G d
 d      Z G d d      Zeeef   Zee   Z G d d      Zy)    )annotations)aliases)sha256)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                     e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZedd       ZddZddZ	ddZ
edd	       Zed d
       Zed!d       Zed!d       Zed d       Zedd       Zedd       Zedd       Zedd       Zedd       Zed"d       Zed#d       Zed!d       Zed d       Zed d       Zd$d%dZedd       Zy)&CharsetMatchNc                    || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        y )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesdecoded_payloadpreemptive_declarations           [/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s_      '.'6,5%315+-,/"-1,0#23I$    c                    t        |t              s)t        |t              rt        |      | j                  k(  S y| j                  |j                  k(  xr | j
                  |j
                  k(  S )NF)
isinstancer   strr   encodingfingerprintr"   others     r*   __eq__zCharsetMatch.__eq__*   sV    %.%% '4==88}}.X43C3CuGXGX3XXr,   c                   t        |t              st        t        | j                  |j                  z
        }t        | j
                  |j
                  z
        }|dk  r|dkD  r| j
                  |j
                  kD  S |dk  rS|dk  rNt        | j                        t        k\  r| j                  |j                  k  S | j                  |j                  kD  S | j                  |j                  k  S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r.   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r"   r3   chaos_differencecoherence_differences       r*   __lt__zCharsetMatch.__lt__1   s     %."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r,   c                \    dt        t        |             t        | j                        z  z
  S )Ng      ?)r:   r/   rawr"   s    r*   r;   zCharsetMatch.multi_byte_usageG   s"    c#d)ns488}455r,   c                ~    | j                   &t        | j                  | j                  d      | _         | j                   S )Nstrict)r    r/   r   r   rA   s    r*   __str__zCharsetMatch.__str__K   s.    <<t}}dnnhGDL||r,   c                <    d| j                    d| j                   dS )Nz<CharsetMatch 'z' bytes(z)>)r0   r1   rA   s    r*   __repr__zCharsetMatch.__repr__Q   s"     x8H8H7ILLr,   c                    t        |t              r|| k(  r$t        dj                  |j                              d |_        | j                  j                  |       y )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r.   r   r6   format	__class__r    r   appendr2   s     r*   add_submatchzCharsetMatch.add_submatchT   sO    %.%4-MTTOO  E"r,   c                    | j                   S N)r   rA   s    r*   r0   zCharsetMatch.encoding_   s    ~~r,   c                    g }t        j                         D ]G  \  }}| j                  |k(  r|j                  |       '| j                  |k(  s7|j                  |       I |S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr0   rJ   )r"   also_known_asups       r*   encoding_aliaseszCharsetMatch.encoding_aliasesc   s^    
 $&MMO 	(DAq}}!$$Q'!#$$Q'		(
 r,   c                    | j                   S rM   r   rA   s    r*   bomzCharsetMatch.bomp       ###r,   c                    | j                   S rM   rU   rA   s    r*   byte_order_markzCharsetMatch.byte_order_markt   rW   r,   c                F    | j                   D cg c]  }|d   	 c}S c c}w )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        r   r   )r"   es     r*   r'   zCharsetMatch.languagesx   s      #oo.!...s   c                   | j                   shd| j                  v ryddlm}m} t        | j                        r || j                        n || j                        }t        |      dk(  sd|v ry|d   S | j                   d   d   S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r   could_be_from_charsetcharset_normalizer.cdr`   ra   r   r0   r:   )r"   r`   ra   r'   s       r*   languagezCharsetMatch.language   s      $444  X *$--8 &dmm4'6  9~"my&@ Q<q!!$$r,   c                    | j                   S rM   )r   rA   s    r*   r8   zCharsetMatch.chaos   s    $$$r,   c                @    | j                   sy| j                   d   d   S )Nr   r   r   r[   rA   s    r*   r9   zCharsetMatch.coherence   s     q!!$$r,   c                6    t        | j                  dz  d      S Nd      )ndigits)roundr8   rA   s    r*   percent_chaoszCharsetMatch.percent_chaos   s    TZZ#%q11r,   c                6    t        | j                  dz  d      S ri   )rm   r9   rA   s    r*   percent_coherencezCharsetMatch.percent_coherence   s    T^^c)155r,   c                    | j                   S )z+
        Original untouched bytes.
        )r   rA   s    r*   r@   zCharsetMatch.raw   s    
 }}r,   c                    | j                   S rM   )r   rA   s    r*   submatchzCharsetMatch.submatch   s    ||r,   c                2    t        | j                        dkD  S Nr   )r:   r   rA   s    r*   has_submatchzCharsetMatch.has_submatch   s    4<< 1$$r,   c                    | j                   | j                   S t        |       D cg c]  }t        |       }}t        t	        |D ch c]  }|s|	 c}            | _         | j                   S c c}w c c}w rM   )r   r/   r   sortedlist)r"   chardetected_rangesrs       r*   	alphabetszCharsetMatch.alphabets   sk    +'''MPQUY,WT]4-@,W,W%d+L!!A+L&MN### -X+Ls   A0A5A5c                p    | j                   g| j                  D cg c]  }|j                   c}z   S c c}w )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        )r   r   r0   )r"   ms     r*   rc   z"CharsetMatch.could_be_from_charset   s,     t||"D!1::"DDD"Ds   3c                6     j                    j                   |k7  rr| _         t               } j                  = j                  j                         dvr!t	        t
         fd|dd d      }||dd z   }|j                  |d       _         j                  S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8utf_8c                    | j                   | j                         d   | j                         d    j                  | j                         d   t	        j
                        j                  dd            S )Nr   r   _-)stringspanreplacegroupsr   r   )r   r"   s    r*   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sY    ahhqvvx{QVVXa[AII
1!$"7"78@@cJ r,   i    r   )countr   )r   r/   r!   lowerr   r   encoder   )r"   r0   decoded_stringpatched_headers   `   r*   outputzCharsetMatch.output   s    
   (D,A,AX,M$,D! YN,,80066812 "%3 #5D)" "0.2G!G#1#8#89#MD ###r,   c                P    t        | j                               j                         S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrA   s    r*   r1   zCharsetMatch.fingerprint   s    
 dkkm$..00r,   )NN)r#   bytesr$   r/   r%   floatr&   boolr'   CoherenceMatchesr(   
str | Noner)   r   )r3   objectreturnr   )r   r   r   r/   )r3   r   r   None)r   	list[str]r   r   )r   r   )r   zlist[CharsetMatch])r   )r0   r/   r   r   )__name__
__module____qualname__r+   r4   r>   propertyr;   rD   rF   rK   r0   rS   rV   rY   r'   re   r8   r9   rn   rp   r@   rs   rv   r}   rc   r   r1    r,   r*   r   r      s    '+-1JJ J 	J
 J $J $J !+J8Y(, 6 6M	#   
 
 $ $ $ $ / / % %6 % % % %
 2 2 6 6     % % $ $ E E$: 1 1r,   r   c                  R    e Zd ZdZdddZddZddZddZddZddZ	dd	Z
dd
Zy)CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nc                8    |rt        |      | _        y g | _        y rM   )rx   _results)r"   resultss     r*   r+   zCharsetMatches.__init__   s    ?FF7OBr,   c              #  8   K   | j                   E d {    y 7 wrM   r   rA   s    r*   __iter__zCharsetMatches.__iter__   s     ==  s   c                    t        |t              r| j                  |   S t        |t              r/t	        |d      }| j                  D ]  }||j
                  v s|c S  t        )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r.   intr   r/   r   rc   KeyError)r"   itemresults      r*   __getitem__zCharsetMatches.__getitem__  s`    
 dC ==&&dC T5)D-- "6777!M" r,   c                ,    t        | j                        S rM   r:   r   rA   s    r*   __len__zCharsetMatches.__len__  s    4==!!r,   c                2    t        | j                        dkD  S ru   r   rA   s    r*   __bool__zCharsetMatches.__bool__  s    4==!A%%r,   c                   t        |t              s-t        dj                  t	        |j
                                    t        |j                        t        k  rW| j                  D ]H  }|j                  |j                  k(  s|j                  |j                  k(  s7|j                  |        y | j                  j                  |       t        | j                        | _	        y)z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r.   r   r6   rH   r/   rI   r:   r@   r   r   r1   r8   rK   rJ   rx   )r"   r   matchs      r*   rJ   zCharsetMatches.append  s    
 $-?FF'  txx=++ $$(8(88U[[DJJ=V&&t, 	T"t}}-r,   c                :    | j                   sy| j                   d   S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rA   s    r*   bestzCharsetMatches.best)  s     }}}}Qr,   c                "    | j                         S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rA   s    r*   firstzCharsetMatches.first1  s     yy{r,   rM   )r   zlist[CharsetMatch] | None)r   zIterator[CharsetMatch])r   z	int | strr   r   )r   r   r   )r   r   r   r   )r   zCharsetMatch | None)r   r   r   __doc__r+   r   r   r   r   rJ   r   r   r   r,   r*   r   r      s0    
O!"&.( r,   r   c                  Z    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZedd       ZddZy)CliDetectionResultc                    || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        y rM   )pathunicode_pathr0   rS   alternative_encodingsre   r}   r&   r8   r9   is_preferred)r"   r   r0   rS   r   re   r}   r&   r8   r9   r   r   s               r*   r+   zCliDetectionResult.__init__=  sV     	(4$,+;0E"%$-$2!
 )".r,   c                    | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  dS )Nr   r0   rS   r   re   r}   r&   r8   r9   r   r   r   rA   s    r*   __dict__zCliDetectionResult.__dict__W  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r,   c                2    t        | j                  dd      S )NT   )ensure_asciiindent)r   r   rA   s    r*   to_jsonzCliDetectionResult.to_jsong  s    T]]a@@r,   N)r   r/   r0   r   rS   r   r   r   re   r/   r}   r   r&   r   r8   r   r9   r   r   r   r   r   )r   zdict[str, Any]r   )r   r   r   r+   r   r   r   r   r,   r*   r   r   <  s    // / $	/
  )/ / / / / / !/ /4 
 
Ar,   r   N)
__future__r   encodings.aliasesr   hashlibr   jsonr   rer   typingr   r	   r
   r   constantr   r   utilsr   r   r   r   r   r/   r   CoherenceMatchr   r   r   r,   r*   <module>r      sd    " %    - - G C Ce1 e1P@ @F sEz"' ,A ,Ar,   