
    ,h                         d dl Z d dlZd dlZddlmZ ddlmZ ddlmZ  ej                  d      Z
 e j                  e      Zd Zd Z G d	 d
e      Zy)    N   )glyphname2unicode)ENCODING)	PSLiteralz[0-9a-fA-F]+c           	      n   | j                  d      d   } | j                  d      }t        |      dkD  rdj                  t        t        |            S | t
        v rt        j                  |       S | j                  d      r| j                  d      }t        j                  |      rt        |      dz  dk(  rt        dt        |      d      D cg c]  }t        |||dz    d	       }}|D ]  }t        |        t        t        j                  |      }dj                  |      S | j                  d
      rj| j                  d
      }t        j                  |      rDdt        |      cxk  rdk  r0n n-t        |d	      }t        |       t        j                  |      S t!        d| z        c c}w )a  Converts Adobe glyph names to Unicode numbers.

    In contrast to the specification, this raises a KeyError instead of return an empty string when the key is unknown.
    This way the caller must explicitly define what to do when there is not a match.

    Reference: https://github.com/adobe-type-tools/agl-specification#2-the-mapping

    :returns unicode character if name resembles something, otherwise a KeyError
    .r   _r    uni      )baseu   zXCould not convert unicode name "%s" to character because it does not match specification)splitlenjoinmapname2unicoder   get
startswithstripHEXADECIMALmatchrangeint#raise_key_error_for_invalid_unicodesixunichrKeyError)	name
componentsname_without_uniiunicode_digitsdigit
charactersname_without_uunicode_digits	            U/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pdfminer/encodingdb.pyr   r      s    ::c?1DCJ
:wws<455 $$$((..__U##zz%0  !12s;K7Lq7PTU7USXYZ\_`p\qstSu!va#&6qQ&?b"I!v!v+ ?E7>? ^<
wwz**__S!!ZZ_N  0Q#n:M5RQR5R #N <3MBzz-00
mptt
uu "ws   F2c                 <    d| cxk  rdk  rn yt        d| z        y)zUnicode values should not be in the range D800 through DFFF because that is used for surrogate pairs in UTF-16

    :raises KeyError if unicode digit is invalid
    i  i   zHUnicode digit %d is invalid because it is in the range D800 through DFFFN)r    )r)   s    r*   r   r   8   s(    
 }$u$adqqrr %    c                       e Zd Zi Zi Zi Zi ZeD ]-  \  ZZ	Z
ZZ ee      Ze	reee	<   e
reee
<   ereee<   es)eee<   / eeeedZedd       Zy)
EncodingDB)StandardEncodingMacRomanEncodingWinAnsiEncodingPDFDocEncodingNc                 t   | j                   j                  || j                        }|r[|j                         }d}|D ]D  }t	        |t
              r|}t	        |t              s'	 t        |j                        ||<   |dz  }F |S # t        $ r(}t        j                  t        |             Y d }~5d }~ww xY w)Nr   r   )	encodingsr   std2unicodecopy
isinstancer   r   r   r!   r    logdebugstr)klassr!   diffcid2unicodecidxes          r*   get_encodingzEncodingDB.get_encodingY   s    oo))$0A0AB%**,KC a%C9-*+7+?C( 1HC  $ *		#a&))*s   %B	B7B22B7)N)__name__
__module____qualname__r5   mac2unicodewin2unicodepdf2unicoder   r!   stdmacwinpdfr   cr4   classmethodrA    r,   r*   r.   r.   A   s    KKKK&. 	!"sCc K K K K	! ('&%	I  r,   r.   )loggingrer   	glyphlistr   	latin_encr   psparserr   compiler   	getLoggerrB   r8   r   r   objectr.   rN   r,   r*   <module>rW      sR     	 
 (  bjj)g!&vRs' 'r,   