
    ,hYX                    @   d dl mZ d dlZd dlmZ d dlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZmZmZ  ej<                  d      Z ej@                         Z!e!jE                   ejF                  d             	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ$	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ%	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ&	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ'y)    )annotationsN)PathLike)BinaryIO   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_cp_similaris_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)sc
                    t        | t        t        f      s#t        dj	                  t        |                   |rBt        j                  }
t        j                  t               t        j                  t               t        |       }|dk(  rqt        j                  d       |r@t        j                  t               t        j                  
xs t        j                          t#        t%        | dddg d      g      S |Dt        j'                  t        d	d
j)                  |             |D cg c]  }t+        |d       }}ng }|Dt        j'                  t        dd
j)                  |             |D cg c]  }t+        |d       }}ng }|||z  k  r!t        j'                  t        d|||       d}|}|dkD  r||z  |k  rt-        ||z        }t        |       t.        k  }t        |       t0        k\  }|r*t        j'                  t        dj	                  |             n+|r)t        j'                  t        dj	                  |             g }|rt3        |       nd}|,|j5                  |       t        j'                  t        d|       t7               }g }g }d}d}d}t#               }t#               }t9        |       \  }}|6|j5                  |       t        j'                  t        dt        |      |       |j5                  d       d|vr|j5                  d       |t:        z   D ]^  }|r||vr|r||v r||v r|j=                  |       d}||k(  }|xr t?        |      }|dv r|st        j'                  t        d|       `|dv r|st        j'                  t        d|       	 tA        |      }	 |r9|du r5tG        |du r| dt-        d       n| t        |      t-        d       |       ntG        |du r| n| t        |      d |      }d}!|D ]  }"tM        ||"      sd}! n |!rt        j'                  t        d|"       "tO        |sdn
t        |      |t-        ||z              }#|xr |duxr t        |      |k  }$|$rt        j'                  t        d|       t-        t        |#      dz        }%tQ        |%d      }%d}&d}'g }(g })	 tS        | ||#||||||	      D ]g  }*|(j5                  |*       |)j5                  tU        |*||du xr dt        |      cxk  xr dk  nc              |)d    |k\  r|&dz  }&|&|%k\  s|sb|du sg n |'s$|r"|s 	 | t-        d"      d jW                  |d#$       |)rtY        |)      t        |)      z  nd}+|+|k\  s|&|%k\  rm|j5                  |       t        j'                  t        d&||&t[        |+d'z  d()             |	r-|dd|fv r&|'s$t%        | ||dg ||*      },||k(  r|,}n
|dk(  r|,}n|,}t        j'                  t        d+|t[        |+d'z  d()             |st]        |      }-nt_        |      }-|-r3t        j'                  t        d,j	                  |tG        |-                   g }.|dk7  r8|(D ]3  }*ta        |*||-rd-j)                  |-      nd      }/|.j5                  |/       5 tc        |.      }0|0r*t        j'                  t        d.j	                  |0|             t%        | ||+||0|du s||ddfv r|nd|*      }1|j5                  |1       ||ddfv ry|+d/k  rt|+dk(  r^t        j                  d0|1jd                         |r.t        j                  t               t        j                  
       t#        |1g      c S |j5                  |1       t        |      r||||v rvd|v rrd|v rn|jg                         }2t        j                  d0|2jd                         |r.t        j                  t               t        j                  
       t#        |2g      c S ||k(  s
t        j                  d1|       |r.t        j                  t               t        j                  
       t#        ||   g      c S  t        |      dk(  r|s|s|rt        j'                  t        d2       |r2t        j                  d3|jd                         |j5                  |       nr|r||r|r|jh                  |jh                  k7  s|'t        j                  d4       |j5                  |       n(|r&t        j                  d5       |j5                  |       |r<t        j                  d6|jg                         jd                  t        |      dz
         nt        j                  d7       |r.t        j                  t               t        j                  
       |S c c}w c c}w # tB        tD        f$ r t        j'                  t        d|       Y w xY w# tH        tJ        f$ rQ} t        | tJ              s%t        j'                  t        d|tG        |              |j5                  |       Y d} ~ Qd} ~ ww xY w# tH        $ r4} t        j'                  t        d!|tG        |              |%}&d}'Y d} ~ kd} ~ ww xY w# tH        $ rA} t        j'                  t        d%|tG        |              |j5                  |       Y d} ~ d} ~ ww xY w)8af  
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
    but never take it for granted. Can improve the performance.

    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
    purpose.

    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
    Custom logging format and handler can be set manually.
    z3Expected object of type bytes or bytearray, got: {}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.z2Encoding %s does not provide an IncrementalDecoderg    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %sTzW%s is deemed too similar to code page %s and was consider unsuited already. Continuing!zpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.      zaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.d      )ndigits)preemptive_declarationz=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}皙?z.Encoding detection: %s is most likely the one.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)5
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerloggingWARNINGr   r   logjoinr   intr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorstrUnicodeDecodeErrorLookupErrorr   rangemaxr   r   decodesumroundr   r	   r   r
   r!   bestfingerprint)3	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainlanguage_thresholdenable_fallbackprevious_logger_levellengthcpis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failurefallback_asciifallback_u8fallback_specifiedresultsearly_stop_resultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderesimilar_soft_failure_testencoding_soft_failedr_multi_byte_bonusmax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiofallback_entrytarget_languages	cd_ratioschunk_languagescd_ratios_mergedcurrent_matchprobable_results3                                                      X/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/charset_normalizer/api.py
from_bytesr   !   sZ   < i)U!34AHHY
 	
 %+\\/*i.F{ST  1OO1DW__E|IwUBPRSTUU

5IIl#		
 8DD	"e,DD

6IIl#		
 8DD	"e,DD*u$%

l	
 
qyVe^j0%(
"%i.3E"E"%i.4D"D

LSS	
 


W^^	
 (* .By)t  %$$%78

N	
 uF)+)+*.N'+K.2,.G)7)9 3I >L+$$\2

W		
   )++$$W-.? ~<M=M\9F"

=!&*%1]%B!5 "
:Q;
 009MJJn
 I%.BJJd
 	*@*O!	$)>%)G ,u4 "+CI.&s;'7#d)D* #& ,u4 "&s;'7'9:*#& +0!$; 	 ],@A,0)	
 %JJi$	 )As;/?
 " .t+.O$v- 	 JJ-	 "%SWq[!1 115 ! %!		'	),$ %
    '  !4GA\1B,Ga,G R=I-$)$$(99(-=-F7V &%)
#d)+&--mH-M ENY#i.!@SVi'+;?P+P#**=9JJ0 o+Q7  !gw8J%KK-!-!#+=" !$66)7&"g-%3N"0K

K/C'3		
 %*<]*K4]CJJ8??!3'7#8 	 G#" 2"1&2BCHH-.#   12 2)<JJ299$m %  *U2$);Wg(NN  
 #5
" 	}% 0'7CC#% #%D!** ((9OO$9:%}o66%%m4 "##+/AV/K6!6!,>,C,C,EOLL@(( $$_5 56!?"344L(LL1
 $$_5 56!7=#9":;;}	~<@
 7|q.,>JJa
 LLI"++ NN-.^3"++~/I/II'LLUVNN;'LLUVNN>*kLLN##L1	
 	TU_--.NE E Eb $[1 	JJD
 	6 #K0 		a-

O!F	 $**=9		l 
	) JJsA	  1$(!
	)* & 

t!F	 (..}=su   /g%7g*-g/9Ah A4jjjk/*hh j /Ai;;j 	k )j;;k 	l6llc
                F    t        | j                         |||||||||	
      S )z
    Same thing than the function from_bytes but using a file pointer that is already ready.
    Will not close the file pointer.
    )r   read)
fprP   rQ   rR   rS   rT   rU   rV   rW   rX   s
             r   from_fpr      s5      
	     c
                n    t        | d      5 }
t        |
|||||||||	
      cddd       S # 1 sw Y   yxY w)z
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
    Can raise IOError.
    rbN)openr   )pathrP   rQ   rR   rS   rT   rU   rV   rW   rX   r   s              r   	from_pathr   >  sK      
dD	 
R 

 
 
s   +4c
                    t        | t        t        f      rt        | |||||||||	
      }
|
 S t        | t        t
        f      rt        | |||||||||	
      }
|
 S t        | |||||||||	
      }
|
 S )a)  
    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
    )	rP   rQ   rR   rS   rT   rU   rV   rW   rX   )r-   rE   r   r   r/   r.   r   r   )fp_or_path_or_payloadrP   rQ   rR   rS   rT   rU   rV   rW   rX   guessess              r   	is_binaryr   ]  s    " '#x9!!%%!51+
Z ;C 
	

 !!%%!51+
4 ; !!%%!51+
 ;r   )	      皙?NNTFr,   T)rO   zbytes | bytearrayrP   r?   rQ   r?   rR   floatrS   list[str] | NonerT   r   rU   boolrV   r   rW   r   rX   r   returnr   )r   r   rP   r?   rQ   r?   rR   r   rS   r   rT   r   rU   r   rV   r   rW   r   rX   r   r   r   )r   zstr | bytes | PathLikerP   r?   rQ   r?   rR   r   rS   r   rT   r   rU   r   rV   r   rW   r   rX   r   r   r   )	r   r   r   NNTFr,   F)r   z!PathLike | str | BinaryIO | bytesrP   r?   rQ   r?   rR   r   rS   r   rT   r   rU   r   rV   r   rW   r   rX   r   r   r   )(
__future__r   r;   osr   typingr   cdr   r   r	   r
   constantr   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   r   	getLoggerr3   StreamHandlerr6   setFormatter	Formatterr   r   r   r    r   r   <module>r      s   "     R Q  0   
		/	0'''')   GAB %)%)!% # | || | 	|
 #| #| | | | | |B %)%)!% #   	
 # #     @ %)%)!% # 

 

 
 	

 #
 #
 
 
 
 
 
B %)%)!% #!?<?? ? 	?
 #? #? ? ? ? ? 
?r   