
    *,h9!                     B    d dl mZmZ ddlmZ ddlmZ  G d de      Zy)    )ListUnion   )CharSetProber)ProbingStatec                       e Zd ZdZdZdZd fdZd fdZede	fd       Z
ede	fd	       Zdefd
ZdefdZdefdZdefdZdefdZdefdZdee   ddfdZdee   ddfdZdeeef   defdZedefd       ZdefdZ xZS )UTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
       gGz?returnNc                    t         |           d| _        dgdz  | _        dgdz  | _        t
        j                  | _        g d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        | j                          y )Nr      r   r   r   r   F)super__init__positionzeros_at_modnonzeros_at_modr   	DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself	__class__s    c/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pip/_vendor/chardet/utf1632prober.pyr   zUTF1632Prober.__init__)   s~    C!G !sQw",, 	$$$$7<47<4

    c                     t         |           d| _        dgdz  | _        dgdz  | _        t
        j                  | _        d| _        d| _	        d| _
        d| _        d| _        d| _        g d| _        y )Nr   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s    r!   r   zUTF1632Prober.reset8   ss    C!G !sQw",,$$$$7<47<4 	r"   c                     | j                         ry| j                         ry| j                         ry| j                         ryy)Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16ler   s    r!   charset_namezUTF1632Prober.charset_nameF   sA    !!#!!#!!#!!#r"   c                      y)N  r)   s    r!   languagezUTF1632Prober.languageS   s    r"   c                 4    t        d| j                  dz        S )N      ?g      @maxr   r)   s    r!   approx_32bit_charsz UTF1632Prober.approx_32bit_charsW       3+,,r"   c                 4    t        d| j                  dz        S )Nr0   g       @r1   r)   s    r!   approx_16bit_charsz UTF1632Prober.approx_16bit_charsZ   r4   r"   c                 f   | j                         }|| j                  k\  xr | j                  d   |z  | j                  kD  xrp | j                  d   |z  | j                  kD  xrO | j                  d   |z  | j                  kD  xr. | j                  d   |z  | j                  kD  xr | j
                   S Nr   r         )r3   MIN_CHARS_FOR_DETECTIONr   EXPECTED_RATIOr   r   r   approx_charss     r!   r%   zUTF1632Prober.is_likely_utf32be]   s    ..0t;;; 
a </$2E2EE )!!!$|3d6I6II)!!!$|3d6I6II) $$Q',69L9LL) (((	
r"   c                 f   | j                         }|| j                  k\  xr | j                  d   |z  | j                  kD  xrp | j                  d   |z  | j                  kD  xrO | j                  d   |z  | j                  kD  xr. | j                  d   |z  | j                  kD  xr | j
                   S r8   )r3   r;   r   r<   r   r   r=   s     r!   r&   zUTF1632Prober.is_likely_utf32leg   s    ..0t;;; 
  #l2T5H5HH )!!!$|3d6I6II)!!!$|3d6I6II) !!!$|3d6I6II) (((	
r"   c                 "   | j                         }|| j                  k\  xro | j                  d   | j                  d   z   |z  | j                  kD  xr> | j                  d   | j                  d   z   |z  | j                  kD  xr | j
                   S )Nr   r:   r   r9   )r6   r;   r   r<   r   r   r=   s     r!   r'   zUTF1632Prober.is_likely_utf16beq       ..0t;;; 
!!!$t';';A'>>,N!!" )""1%(9(9!(<<L!!") (((	
r"   c                 "   | j                         }|| j                  k\  xro | j                  d   | j                  d   z   |z  | j                  kD  xr> | j                  d   | j                  d   z   |z  | j                  kD  xr | j
                   S )Nr   r9   r   r:   )r6   r;   r   r<   r   r   r=   s     r!   r(   zUTF1632Prober.is_likely_utf16le{   rA   r"   r   c                     |d   dk7  s)|d   dkD  s!|d   dk(  r |d   dk(  rd|d   cxk  rdk  r
n nd| _         |d   dk7  s)|d   dkD  s!|d   dk(  r#|d   dk(  rd|d   cxk  rdk  rn y	d| _        y	y	y	y	)
z
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r   r         r9      Tr:   N)r   r   )r   r   s     r!   validate_utf32_charactersz'UTF1632Prober.validate_utf32_characters   s     GqLAw~Q1aA$$q'2IT2I#'D GqLAw~Q1aA$$q'2IT2I#'D  3Jr"   pairc                 z   | j                   s2d|d   cxk  rdk  rn nd| _         n9d|d   cxk  rdk  r+n n(d| _        n d|d   cxk  rdk  rn nd| _         nd| _        | j                  s3d|d   cxk  rdk  rn nd| _        y	d|d   cxk  rdk  rn y	d| _        y	y	d|d   cxk  rdk  r	d| _        y	 d| _        y	)
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        rE   r      T   rF   Fr   N)r   r   r   r   )r   rH   s     r!   validate_utf16_charactersz'UTF1632Prober.validate_utf16_characters   s     ;;tAw&$&?C<a(D('+$tAw&$&?D<'+$;;tAw&$&?C<a(D('+$ ) tAw&$&?D< ' (,$r"   byte_strc                    |D ]  }| j                   dz  }|| j                  |<   |dk(  rW| j                  | j                         | j                  | j                  dd        | j                  | j                  dd        |dk(  r| j                  |xx   dz  cc<   n| j
                  |xx   dz  cc<   | xj                   dz  c_          | j                  S )Nr   r:   r   r9   r   )r   r   rG   rL   r   r   state)r   rM   cmod4s       r!   feedzUTF1632Prober.feed   s     	A==1$DDIIdOqy..tyy9..tyy1~>..tyy1~>Av!!$'1,'$$T*a/*MMQM	 zzr"   c                 :   | j                   t        j                  t        j                  hv r| j                   S | j	                         dkD  r!t        j                  | _         | j                   S | j
                  dkD  rt        j                  | _         | j                   S )Ng?i   )r   r   NOT_MEFOUND_ITget_confidencer   r)   s    r!   rO   zUTF1632Prober.state   sz    ;;<..0E0EFF;; 4'&//DK
 {{	 ]]X% '--DK{{r"   c                     | j                         s0| j                         s | j                         s| j                         rdS dS )Ng333333?g        )r(   r'   r&   r%   r)   s    r!   rV   zUTF1632Prober.get_confidence   sH     &&())+))+))+ 		
 		
r"   )r   N) __name__
__module____qualname____doc__r;   r<   r   r   propertystrr*   r.   floatr3   r6   boolr%   r&   r'   r(   r   intrG   rL   r   bytes	bytearrayr   rR   rO   rV   __classcell__)r    s   @r!   r	   r	      s    !N! 
c 
 
 #  -E --E -
4 

4 

4 

4 
(d3i (D (,,d3i ,D ,@U5)#34   
| 
 


 

r"   r	   N)typingr   r   charsetproberr   enumsr   r	   r-   r"   r!   <module>rg      s   *  ( F
M F
r"   