
    ,h/                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZmZmZmZmZmZmZ  e
e      d+d       Z e
e      d,d       Z e
e      d-d       Z e
e      d+d       Z e
e      d+d       Z e
e      d+d       Z e
e      d+d       Z e
e      d+d       Z  e
e      d+d       Z! e
e      d+d       Z" e
e      d+d       Z# e
e      d+d       Z$ e
e      d+d       Z% e
e      d+d       Z& e
e      d+d       Z' e
e      d+d       Z( e
e      d+d       Z) e
 e*e            d.d       Z+ e
e      d+d       Z,d/d0dZ- e
d       d1d!       Z.d2d"Z/d3d#Z0d4d5d$Z1d6d%Z2d7d&Z3d'ejh                  d(f	 	 	 	 	 	 	 d8d)Z5	 d9	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d:d*Z6y);    )annotationsN)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS)maxsizec                    	 t        j                  |       }d|v xs( d|v xs" d|v xs d|v xs d|v xs d|v xs
 d|v xs d	|v S # t        $ r Y yw xY w)
NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEzWITH MACRONzWITH RING ABOVEunicodedataname
ValueError	characterdescriptions     Z/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/charset_normalizer/utils.pyis_accentuatedr      s    &++I6 	# 	,;&	,[(	, {*	, +		,
 ;&	, K'	, +	  s   A 	AAc                    t        j                  |       }|s| S |j                  d      }t        t	        |d   d            S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr&   -   sA    !//	:J!'',Es58R !!    c                b    t        |       }t        j                         D ]  \  }}||v s|c S  y)zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger.   8   s=    
 YM!8!>!>!@ 
II% r'   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFLATINr   r   s     r   is_latinr1   F   s8    &++I6 k!!      	''c                Z    t        j                  |       }d|v ryt        |       }|yd|v S )NPTFPunctuationr   categoryr.   r   character_categorycharacter_ranges      r   is_punctuationr;   O   s=    )229=
  "/	":OO++r'   c                p    t        j                  |       }d|v sd|v ryt        |       }|yd|v xr |dk7  S )NSNTFFormsLor6   r8   s      r   	is_symbolrA   ^   sP    )229=
  C+=$="/	":Oo%D*<*DDr'   c                2    t        |       }|yd|v xs d|v S )NF	EmoticonsPictographs)r.   )r   r:   s     r   is_emoticonrE   m   s*    "/	":O/)M]o-MMr'   c                j    | j                         s| dv ryt        j                  |       }d|v xs |dv S )N>      ｜+<>TZ>   PcPdPo)isspacer   r7   )r   r9   s     r   is_separatorrP   w   sB    i+AA)229=$$P(:>P(PPr'   c                D    | j                         | j                         k7  S N)islowerisupperr   s    r   is_case_variablerV      s    )"3"3"555r'   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFCJKr   r   character_names     r   is_cjkr[      s8    $)))4 N""  r2   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHIRAGANAr   rY   s     r   is_hiraganar^      8    $)))4 ''  r2   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFKATAKANAr   rY   s     r   is_katakanarb      r_   r2   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFHANGULr   rY   s     r   	is_hangulre      8    $)))4 ~%%  r2   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFTHAIr   rY   s     r   is_thairi      s8    $)))4 ^##  r2   c                T    	 t        j                  |       }d|v S # t        $ r Y yw xY w)NFARABICr   rY   s     r   	is_arabicrl      rf   r2   c                `    	 t        j                  |       }d|v xr d|v S # t        $ r Y yw xY w)NFrk   zISOLATED FORMr   rY   s     r   is_arabic_isolated_formrn      sB    $)))4 ~%K/^*KK  s   ! 	--c                    | t         vS rR   )r   rU   s    r   is_cjk_uncommonrp      s    111r'   c                4     t         fdt        D              S )Nc              3  &   K   | ]  }|v  
 y wrR    ).0keywordr,   s     r   	<genexpr>z-is_unicode_range_secondary.<locals>.<genexpr>   s     Tw*$Ts   )anyr   )r,   s   `r   is_unicode_range_secondaryrx      s    T4STTTr'   c                j    | j                         du xr  | j                         du xr | dk7  xr | dk7  S )NFu   ﻿)rO   isprintablerU   s    r   is_unprintabler|      sL     	u$ 	"!!#u,	"	" !	r'   c           	     r   t        | t              st        t        |       }t	        t
        | dt        ||       j                  dd            }t        |      dk(  ry|D ]T  }|j                         j                  dd      }t        j                         D ]  \  }}||k(  r|c c S ||k(  s|c c S  V y)zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r*   )sequencesearch_zoneseq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s     h&x=G ',3w,-44WX4NG
 7|q% 
%/557??SI
 .5]]_ 	%)NM!33$$ 22$$		%
% r'      c                n    | dv xs0 t        t        j                  d|        j                  t              S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sig
encodings.)
issubclass	importlibimport_moduler   r	   )r   s    r   is_multi_byte_encodingr     sC    
  
 
  
*TF 34GG#
r'   c                    t         D ]>  }t         |   }t        |t              r|g}|D ]  }| j                  |      s||fc c S  @ y)z9
    Identify and extract SIG/BOM in given sequence.
    )Nr'   )r   r   r   
startswith)r   iana_encodingmarksmarks       r   identify_sig_or_bomr     s[    
 ( +%3M%BeU#GE 	+D""4($d**	++ r'   c                
    | dvS )N>   r   r   rs   )r   s    r   should_strip_sig_or_bomr   (  s     444r'   c                    | j                         j                  dd      } t        j                         D ]  \  }}| ||fv s|c S  |rt	        d|  d      | S )zIReturns the Python normalized encoding name (Not the IANA official name).r   r   zUnable to retrieve IANA for '')r   r   r   r*   r   )cp_namestrictr   r   s       r   	iana_namer   ,  sh    mmo%%c3/G
 *1 !%~}55  ! 8	CDDNr'   c                t   t        |       st        |      ryt        j                  d|        j                  }t        j                  d|       j                  } |d      } |d      }d}t	        d      D ]7  }t        |g      }|j                  |      |j                  |      k(  s3|dz  }9 |dz  S )	Ng        r   r   r   r      r
      )r   r   r   r   ranger   r   )	iana_name_aiana_name_b	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr   =  s    k*.D[.Q''*[M(BCVVI''*[M(BCVVI(9D(9D!"3Z '$aSz;;}%])CC!Q&!'
 !3&&r'   c                ,    | t         v xr |t         |    v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   Q  s%     	-- 	?1+>>r'   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)sc                    t        j                  |       }|j                  |       t        j                         }|j	                  t        j
                  |             |j                  |       y rR   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   levelformat_stringloggerhandlers        r   set_logging_handlerr   \  sU    
 t$F
OOE##%G**=9:
gr'   c	              #    K   |r|du r|D ]  }	||	|	|z    }
|
s y |
  y |D ]  }	|	|z   }|t        |       dz   kD  r| |	|	|z    }|r	|du r||z   }|j                  ||rdnd      }
|r[|	dkD  rVt        |d      }|rH|
d | |vrAt        |	|	dz
  d	      D ].  }| || }|r	|du r||z   }|j                  |d      }
|
d | |v s. n |
  y w)
NF   r   r   r   r   r      )r   r   r   r   )	sequencesr   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadr   chunk	chunk_endcut_sequencechunk_partial_size_chkjs                  r   cut_sequence_chunksr   i  sK     0E9 	A#AJ7EK		  #	AJI3y>A--$QZ8L#(8E(A*\9 ''#8xh ( E %Q.1*b.A& $556oM"1a!eR0 	"'09'=/4D4M+6+EL , 3 3M( 3 S !8"89_L!	" KG#	s   B>C
C)r   strreturnbool)r   r   r   r   )r   r   r   
str | None)r,   r   r   r   )i    )r   r   r   r#   r   r   )r   r   r   r   )r   r   r   ztuple[str | None, bytes])r   r   r   r   )T)r   r   r   r   r   r   )r   r   r   r   r   float)r   r   r   r   r   r   )r   r   r   r#   r   r   r   NonerR   )r   r   r   r   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   r   zGenerator[str, None, None])7
__future__r   r   r   r   codecsr   encodings.aliasesr   	functoolsr   rer   typingr   _multibytecodecr	   constantr   r   r   r   r   r   r   r   r&   r.   r1   r;   rA   rE   rP   rV   r[   r^   rb   re   ri   rl   rn   rp   r   rx   r|   r   r   r   r   r   r   r   INFOr   r   rs   r'   r   <module>r      s   "    % %      *+ ," *+" ," *+
 ,
 *+" ," *+, ,, *+E ,E *+N ,N *+Q ,Q *+6 ,6 *+# ,# *+( ,( *+( ,( *+& ,& *+$ ,$ *+& ,& *+L ,L *+2 ,2 3./0U 1U *+ ,@ 3 ($5"'( %D



 
 
	
, #'555 5 	5
 5 5 5  5  5  5r'   