
    *,h,                     l    d dl Z d dlZd dlmZmZ ddlmZmZ  ej                  d      Z	 G d d      Z
y)    N)OptionalUnion   )LanguageFilterProbingStates%   [a-zA-Z]*[-]+[a-zA-Z]*[^a-zA-Z-]?c                       e Zd ZdZej
                  fdeddfdZddZede	e
   fd       Zede	e
   fd       Zd	eeef   defd
Zedefd       ZdefdZedeeef   defd       Zedeeef   defd       Zedeeef   defd       Zy)CharSetProbergffffff?lang_filterreturnNc                     t         j                  | _        d| _        || _        t        j                  t              | _        y )NT)	r   	DETECTING_stateactiver
   logging	getLogger__name__logger)selfr
   s     c/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pip/_vendor/chardet/charsetprober.py__init__zCharSetProber.__init__,   s0    ",,&''1    c                 .    t         j                  | _        y N)r   r   r   r   s    r   resetzCharSetProber.reset2   s    ",,r   c                      y r    r   s    r   charset_namezCharSetProber.charset_name5   s    r   c                     t         r   NotImplementedErrorr   s    r   languagezCharSetProber.language9   s    !!r   byte_strc                     t         r   r    )r   r#   s     r   feedzCharSetProber.feed=   s    !!r   c                     | j                   S r   )r   r   s    r   statezCharSetProber.state@   s    {{r   c                      y)Ng        r   r   s    r   get_confidencezCharSetProber.get_confidenceD   s    r   bufc                 4    t        j                  dd|       } | S )Ns   ([ -])+    )resub)r*   s    r   filter_high_byte_onlyz#CharSetProber.filter_high_byte_onlyG   s    ff&c2
r   c                     t               }t        j                  |       }|D ]C  }|j                  |dd        |dd }|j	                         s|dk  rd}|j                  |       E |S )u7  
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [-ÿ]
        marker: everything else [^a-zA-Z-ÿ]
        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.
        This filter applies to all scripts which do not use English characters.
        N   r,   )	bytearrayINTERNATIONAL_WORDS_PATTERNfindallextendisalpha)r*   filteredwordsword	last_chars        r   filter_international_wordsz(CharSetProber.filter_international_wordsL   sv     ;
 ,33C8 
	'DOOD"I& RS	I$$&9w+> 	OOI&
	' r   c                 *   t               }d}d}t        |       j                  d      } t        |       D ]F  \  }}|dk(  r|dz   }d}|dk(  s||kD  r'|s%|j	                  | ||        |j	                  d       d}H |s|j	                  | |d	        |S )
a[  
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        Fr   c   >r      <r,   TN)r3   
memoryviewcast	enumerater6   )r*   r8   in_tagprevcurrbuf_chars         r   remove_xml_tagszCharSetProber.remove_xml_tagsn   s     ;o""3''n 	ND( 4axT!$;v OOCTN3OOD)	$  OOCJ'r   )r   N)r   
__module____qualname__SHORTCUT_THRESHOLDr   NONEr   r   propertyr   strr   r"   r   bytesr3   r   r%   r'   floatr)   staticmethodr/   r<   rH   r   r   r   r	   r	   (   s0   5C5H5H 2N 2T 2- hsm   "(3- " ""U5)#34 " " |    5	)9#: u   eY.>(? I  B $U5)#34 $ $ $r   r	   )r   r-   typingr   r   enumsr   r   compiler4   r	   r   r   r   <module>rU      s3   :  	 " /(bjj8 
k kr   