
    ,h$                        d dl mZmZmZ d dlmZmZmZ d dlm	Z	m
Z
mZ d dlmZmZmZ d dlmZmZmZ  G d de      Z G d d	e      Z G d
 de      Z G d de      Z G d de      Z G d de      Z G d de      Zy)   )EUCTW_CHAR_TO_FREQ_ORDEREUCTW_TABLE_SIZE EUCTW_TYPICAL_DISTRIBUTION_RATIO)EUCKR_CHAR_TO_FREQ_ORDEREUCKR_TABLE_SIZE EUCKR_TYPICAL_DISTRIBUTION_RATIO)GB2312_CHAR_TO_FREQ_ORDERGB2312_TABLE_SIZE!GB2312_TYPICAL_DISTRIBUTION_RATIO)BIG5_CHAR_TO_FREQ_ORDERBIG5_TABLE_SIZEBIG5_TYPICAL_DISTRIBUTION_RATIO)JIS_CHAR_TO_FREQ_ORDERJIS_TABLE_SIZEJIS_TYPICAL_DISTRIBUTION_RATIOc                   @    e Zd ZdZdZdZdZd Zd Zd Z	d Z
d	 Zd
 Zy)CharDistributionAnalysisi   gGz?g{Gz?   c                 x    d | _         d | _        d | _        d | _        d | _        d | _        | j                          y N)_char_to_freq_order_table_sizetypical_distribution_ratio_done_total_chars_freq_charsresetselfs    Z/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/chardet/chardistribution.py__init__z!CharDistributionAnalysis.__init__.   s>     $( 
 +/'
 

    c                 .    d| _         d| _        d| _        y)zreset analyser, clear any stateF    N)r   r   r   r   s    r    r   zCharDistributionAnalysis.reset=   s     
r"   c                     |dk(  r| j                  |      }nd}|dk\  rN| xj                  dz  c_        || j                  k  r)d| j                  |   kD  r| xj                  dz  c_        yyyy)z"feed a character with known length   r$   r   i   N)	get_orderr   r   r   r   )r   charchar_lenorders       r    feedzCharDistributionAnalysis.feedF   su    q=NN4(EEA:"t'''11%88$$)$ 9 ( r"   c                 <   | j                   dk  s| j                  | j                  k  r| j                  S | j                   | j                  k7  rD| j                  | j                   | j                  z
  | j                  z  z  }|| j
                  k  r|S | j
                  S )z(return confidence based on existing datar$   )r   r   MINIMUM_DATA_THRESHOLDSURE_NOr   SURE_YES)r   rs     r    get_confidencez'CharDistributionAnalysis.get_confidenceT   s     !T%5%59T9T%T<< 0 00!!d&7&7$:J:J&J22&3 4A4==  }}r"   c                 4    | j                   | j                  kD  S r   )r   ENOUGH_DATA_THRESHOLDr   s    r    got_enough_dataz(CharDistributionAnalysis.got_enough_datad   s       4#=#===r"   c                      y)Nr'    )r   byte_strs     r    r(   z"CharDistributionAnalysis.get_orderi   s    
 r"   N)__name__
__module____qualname__r4   r0   r/   r.   r!   r   r,   r2   r5   r(   r7   r"   r    r   r   (   s6     HG* >
r"   r   c                   $     e Zd Z fdZd Z xZS )EUCTWDistributionAnalysisc                 l    t         t        |           t        | _        t
        | _        t        | _        y r   )	superr=   r!   r   r   r   r   r   r   r   	__class__s    r    r!   z"EUCTWDistributionAnalysis.__init__r   )    '79#; +*J'r"   c                 :    |d   }|dk\  rd|dz
  z  |d   z   dz
  S y)Nr$      ^   r      r'   r7   r   r8   
first_chars      r    r(   z#EUCTWDistributionAnalysis.get_orderx   6    
 a[
d*+hqk9D@@r"   r9   r:   r;   r!   r(   __classcell__rA   s   @r    r=   r=   q       K	r"   r=   c                   $     e Zd Z fdZd Z xZS )EUCKRDistributionAnalysisc                 l    t         t        |           t        | _        t
        | _        t        | _        y r   )	r?   rO   r!   r   r   r   r   r   r   r@   s    r    r!   z"EUCKRDistributionAnalysis.__init__   rB   r"   c                 :    |d   }|dk\  rd|dz
  z  |d   z   dz
  S y)Nr$      rE   r   rF   r'   r7   rG   s      r    r(   z#EUCKRDistributionAnalysis.get_order   rI   r"   rJ   rL   s   @r    rO   rO      rM   r"   rO   c                   $     e Zd Z fdZd Z xZS )GB2312DistributionAnalysisc                 l    t         t        |           t        | _        t
        | _        t        | _        y r   )	r?   rT   r!   r	   r   r
   r   r   r   r@   s    r    r!   z#GB2312DistributionAnalysis.__init__   s)    ($8:#< ,*K'r"   c                 H    |d   |d   }}|dk\  r|dk\  rd|dz
  z  |z   dz
  S y)Nr$   r   rR   rF   rE   r'   r7   r   r8   rH   second_chars       r    r(   z$GB2312DistributionAnalysis.get_order   sA    
 #+1+x{K
$[D%8d*+k9D@@r"   rJ   rL   s   @r    rT   rT      s    L	r"   rT   c                   $     e Zd Z fdZd Z xZS )Big5DistributionAnalysisc                 l    t         t        |           t        | _        t
        | _        t        | _        y r   )	r?   rZ   r!   r   r   r   r   r   r   r@   s    r    r!   z!Big5DistributionAnalysis.__init__   s)    &68#: **I'r"   c                 j    |d   |d   }}|dk\  r$|dk\  rd|dz
  z  |z   dz
  dz   S d|dz
  z  |z   dz
  S y)	Nr$   r      rF      ?   @   r'   r7   rW   s       r    r(   z"Big5DistributionAnalysis.get_order   sa    
 #+1+x{K
d"j4/0;>EJJj4/0;>EEr"   rJ   rL   s   @r    rZ   rZ      s    Jr"   rZ   c                   $     e Zd Z fdZd Z xZS )SJISDistributionAnalysisc                 l    t         t        |           t        | _        t
        | _        t        | _        y r   )	r?   rb   r!   r   r   r   r   r   r   r@   s    r    r!   z!SJISDistributionAnalysis.__init__   s)    &68#9 )*H'r"   c                     |d   |d   }}|dk\  r|dk  r	d|dz
  z  }n|dk\  r|dk  rd|dz
  dz   z  }ny	||z   d
z
  }|dkD  rd	}|S )Nr$   r                     r'   r`      r7   )r   r8   rH   rX   r+   s        r    r(   z"SJISDistributionAnalysis.get_order   sz    
 #+1+x{K
$Z4%7:,-ED zT'9:,r12E#d*Er"   rJ   rL   s   @r    rb   rb      s    Ir"   rb   c                   $     e Zd Z fdZd Z xZS )EUCJPDistributionAnalysisc                 l    t         t        |           t        | _        t
        | _        t        | _        y r   )	r?   rm   r!   r   r   r   r   r   r   r@   s    r    r!   z"EUCJPDistributionAnalysis.__init__   s)    '79#9 )*H'r"   c                 :    |d   }|dk\  rd|dz
  z  |d   z   dz
  S y)Nr$      rE   rF   r   r'   r7   )r   r8   r)   s      r    r(   z#EUCJPDistributionAnalysis.get_order   s4    
 {4<%3d::r"   rJ   rL   s   @r    rm   rm      s    I	r"   rm   N)	euctwfreqr   r   r   	euckrfreqr   r   r   
gb2312freqr	   r
   r   big5freqr   r   r   jisfreqr   r   r   objectr   r=   rO   rT   rZ   rb   rm   r7   r"   r    <module>rw      s   8: :: :< <8 86 6Fv FR 8 & 8 &!9 &7 ,7 2 8 r"   