
    ,h/                     X   d Z ddlZddlZddlZddlZ	 ddlZddlZddl	Z	ddl
mZ ddl
mZ ddl
mZ ddl
mZ ddl
mZ dd	l
mZ dd
lmZ ddlmZ ddlmZ ddlZ e	j.                  e      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z  G d de      Z! G d de      Z" G d de!      Z# G d de      Z$ G d d e!      Z% G d! d"e      Z& G d# d$e      Z'd% Z(ed&k(  r# ejR                   e(ejT                               yy# e$ r ddlZY w xY w)'a   Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on the Adobe website:

  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources

    N   )PSStackParser)PSSyntaxError)PSEOF)	PSLiteral)literal_name)KWD)name2unicode)choplist)nunpackc                       e Zd Zy)	CMapErrorN__name__
__module____qualname__     Q/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pdfminer/cmapdb.pyr   r   %   s    r   r   c                   4    e Zd ZdZd Zd Zd Zd Zd Zd Z	y)	CMapBaser   c                 .    |j                         | _        y N)copyattrsselfkwargss     r   __init__zCMapBase.__init__-   s    [[]
r   c                 @    | j                   j                  dd      dk7  S )NWModer   r   getr   s    r   is_verticalzCMapBase.is_vertical1   s    zz~~gq)Q..r   c                 "    || j                   |<   y r   )r   )r   kvs      r   set_attrzCMapBase.set_attr4   s    

1r   c                      y r   r   )r   codecids      r   add_code2cidzCMapBase.add_code2cid8       r   c                      y r   r   r   r,   r+   s      r   add_cid2unichrzCMapBase.add_cid2unichr;   r.   r   c                      y r   r   )r   cmaps     r   use_cmapzCMapBase.use_cmap>   r.   r   N)
r   r   r   debugr   r%   r)   r-   r1   r4   r   r   r   r   r   )   s%    E/r   r   c                   F    e Zd Zd Zd Zd Zd Zej                  ddfdZ	y)CMapc                 >    t        j                  | fi | i | _        y r   )r   r   code2cidr   s     r   r   zCMap.__init__D   s    $)&)r   c                 >    d| j                   j                  d      z  S )Nz
<CMap: %s>CMapNamer"   r$   s    r   __repr__zCMap.__repr__I   s    djjnnZ888r   c                     t        |t              sJ t        t        |                   fd | j                  |j                         y )Nc                     t        j                  |      D ]+  \  }}t        |t              ri }|| |<    ||       '|| |<   - y r   )six	iteritems
isinstancedict)dstsrcr'   r(   dr   s        r   r   zCMap.use_cmap.<locals>.copyO   sI    --, Aa&ACFAJCFr   )rA   r7   strtyper9   )r   r3   r   s     @r   r4   zCMap.use_cmapL   s9    $%6s4:6	 	T]]DMM*r   c              #      K   t         j                  d| |       | j                  }t        j                  |      D ]9  }||v r'||   }t        |t              s| | j                  }.| j                  }; y w)Nzdecode: %r, %r)logr5   r9   r?   	iterbytesrA   int)r   r+   rE   is       r   decodezCMap.decodeZ   sn     		"D$/MMt$ 	"AAvaDa%GAMM	" 	s   AA8 A8Nc                     || j                   }d}t        t        j                  |            D ]F  \  }}||fz   }t	        |t
              r|j                  d||fz         3| j                  |||       H y )Nr   zcode %r = cid %d
)outr9   r+   )r9   sortedr?   r@   rA   rK   writedump)r   rO   r9   r+   r'   r(   cs          r   rR   z	CMap.dumpg   sy    }}HDS]]845 	7FQaT	A!S!		.!Q78		cAA	6	7 	r   )
r   r   r   r   r<   r4   rM   sysstdoutrR   r   r   r   r7   r7   B   s(    
9 zzDt 
r   r7   c                       e Zd Zd Zy)IdentityCMapc                 V    t        |      dz  }|rt        j                  d|z  |      S y)N   z>%dHr   lenstructunpackr   r+   ns      r   rM   zIdentityCMap.decodev   s*    IqL==!T22r   Nr   r   r   rM   r   r   r   rW   rW   t       r   rW   c                       e Zd Zd Zy)IdentityCMapBytec                 P    t        |      }|rt        j                  d|z  |      S y)Nz>%dBr   rZ   r^   s      r   rM   zIdentityCMapByte.decode   s&    I==!T22r   Nr`   r   r   r   rc   rc   ~   ra   r   rc   c                   <    e Zd Zd Zd Zd Zej                  fdZy)
UnicodeMapc                 >    t        j                  | fi | i | _        y r   )r   r   
cid2unichrr   s     r   r   zUnicodeMap.__init__   s    $)&)r   c                 >    d| j                   j                  d      z  S )Nz<UnicodeMap: %s>r;   r"   r$   s    r   r<   zUnicodeMap.__repr__   s    !DJJNN:$>>>r   c                 N    t         j                  d| |       | j                  |   S )Nzget_unichr: %r, %r)rI   r5   rh   )r   r,   s     r   
get_unichrzUnicodeMap.get_unichr   s"    		&c2s##r   c                     t        t        j                  | j                              D ]  \  }}|j	                  d||fz          y )Nzcid %d = unicode %r
)rP   r?   r@   rh   rQ   )r   rO   r'   r(   s       r   rR   zUnicodeMap.dump   s@    S]]4??;< 	8FQII-A67	8r   N)	r   r   r   r   r<   rk   rT   rU   rR   r   r   r   rf   rf      s    
?$ zz r   rf   c                       e Zd Zd Zy)FileCMapc                    t        |t              rt        |t              s$J t        t        |      t        |      f             | j                  }|d d D ]   }t        |      }||v r||   }i }|||<   |}" t        |d         }|||<   y )N)rA   rF   rK   rG   r9   ord)r   r+   r,   rE   rS   ts         r   r-   zFileCMap.add_code2cid   s    $$C)=[sDJPTUXPYCZ?[[MMcr 	AAAAvaD!	 RM!r   N)r   r   r   r-   r   r   r   rn   rn      s    r   rn   c                       e Zd Zd Zy)FileUnicodeMapc                    t        |t              sJ t        t        |                   t        |t              r#t        |j                        | j                  |<   y t        |t              r |j                  dd      | j                  |<   y t        |t              r#t        j                  |      | j                  |<   y t        |      )NzUTF-16BEignore)rA   rK   rF   rG   r   r
   namerh   bytesrM   r?   unichr	TypeErrorr0   s      r   r1   zFileUnicodeMap.add_cid2unichr   s    #s#3Sc^3dI&#/		#:DOOC  	 e$#';;z8#DDOOC 
 		 c"#&::d#3DOOC  	 D/!r   N)r   r   r   r1   r   r   r   rt   rt      s    r   rt   c                       e Zd Zd Zy)PyCMapc                     t         j                  | |       |j                  | _        |j                  rd| j
                  d<   y N)r;   r   r!   )r7   r   CODE2CIDr9   IS_VERTICALr   )r   rw   modules      r   r   zPyCMap.__init__   s7    dT*"#DJJwr   Nr   r   r   r   r   r   r   r|   r|      s    r   r|   c                       e Zd Zd Zy)PyUnicodeMapc                     t         j                  | |       |r!|j                  | _        d| j                  d<   y |j
                  | _        y r~   )rf   r   CID2UNICHR_Vrh   r   CID2UNICHR_H)r   rw   r   verticals       r   r   zPyUnicodeMap.__init__   sJ    D40$11DO"#DJJw 	 %11DOr   Nr   r   r   r   r   r      s    r   r   c                   \    e Zd Zi Zi Z G d de      Zed        Zed        Z	edd       Z
y)CMapDBc                       e Zd Zy)CMapDB.CMapNotFoundNr   r   r   r   CMapNotFoundr      s    r   r   c           	         |j                  dd      }d|z  }t        j                  d|       t        j                  j                  dd      t        j                  j                  t        j                  j                  t              d      f}|D ]  }t        j                  j                  ||      }t        j                  j                  |      sCt        j                  |      }	 t        t        |      dt        j                   |j#                                     |j%                          c S  t&        j)                  |      # |j%                          w xY w)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/r3   r   )replacerI   infoosenvironr#   pathjoindirname__file__existsgzipopenrG   rF   pickleloadsreadcloser   r   )klassrw   filename
cmap_paths	directoryr   gzfiles          r   
_load_datazCMapDB._load_data   s    ||D"%!D(%jjnn[2HIggll277??8#<fEH
# 		,I77<<	84Dww~~d#4#D	2v||FKKM/JKLLN		, %%d++ LLNs   *6E		Ec                 $   |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S 	 | j                  |   S # t        $ r Y nw xY w| j	                  |      }t        ||      x| j                  |<   }|S )Nz
Identity-Hr   )r!   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rW   rc   _cmap_cacheKeyErrorr   r|   )r   rw   datar3   s       r   get_cmapzCMapDB.get_cmap   s    <a((\!a((''#!,,''#!,,	$$T** 		%)/d);;$$s   A 	A! A!c                     	 | j                   |   |   S # t        $ r Y nw xY w| j                  d|z        }dD cg c]  }t        |||       nc c}w c}x| j                   |<   }||   S )Nzto-unicode-%s)FT)_umap_cacher   r   r   )r   rw   r   r   r(   umapss         r   get_unicode_mapzCMapDB.get_unicode_map  sz    	$$T*844 		$ 67P]*^1<dA+F*^*^^$%Xs    	  AN)F)r   r   r   r   r   r   r   classmethodr   r   r   r   r   r   r   r      sT    KKy  , ,"  "  r   r   c                      e Zd Zd Zd Z ed      Z ed      Z ed      Z ed      Z	 ed      Z
 ed      Z ed	      Z ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Zd Zy)
CMapParserc                 L    t        j                  | |       || _        d| _        y )NT)r   r   r3   _in_cmap)r   r3   fps      r   r   zCMapParser.__init__  s#    tR(	r   c                 D    	 | j                          y # t        $ r Y y w xY wr   )
nextobjectr   r$   s    r   runzCMapParser.run  s-    	OO 	  		s    	s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangec                    || j                   u rd| _        | j                          y || j                  u rd| _        y | j                  sy || j                  u rA	 | j                  d      \  \  }}\  }}| j                  j                  t        |      |       y || j                  u rO	 | j                  d      \  \  }}| j                  j                  t        j                  t        |                   y || j                  u r| j                          y || j                   u r| j                          y || j"                  u r| j                          y || j$                  u r| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
t*              r7t)        |t*              r't)        |t,              rt/        |
      t/        |      k7  rO|
d d }|d d }||k7  r_|
dd  }|dd  }t1        |      }t1        |      }t/        |      }t3        ||z
  dz         D ]A  }|t5        j6                  d||z         | d  z   }| j                  j9                  |||z          C  y || j:                  u r| j                          y || j<                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |t*              st)        |t*              s(| j                  j9                  |t1        |             N y || j>                  u r| j                          y || j@                  u rF| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
tB              r't)        |tB              rt/        |
      t/        |      k7  r@t1        |
      }t1        |      }t)        |tD              r9t3        ||z
  dz         D ]$  }| j                  jG                  ||z   ||          & |dd  }t1        |      }|d d }t/        |      }t3        ||z
  dz         D ]A  }|t5        j6                  d||z         | d  z   }| j                  jG                  ||z   |       C  y || jH                  u r| j                          y || jJ                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |tB              st)        |tB              s(| j                  jG                  t1        |      |       N y || jL                  u r| j                          y || jN                  u r| j                          y | jQ                  ||f       y # t        $ r Y y w xY w# t        $ r Y y t        j                  $ r Y y w xY wc c}}w c c}}w c c}}w c c}}w )NTFrY   r      z>L))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr3   r)   r   r   KEYWORD_USECMAPr4   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rA   rF   rK   r[   r   ranger\   packr-   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGErx   listr1   KEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r   postoken_r'   r(   cmapname__objobjsser,   sprefixeprefixsvarevars1e1vlenrL   xr+   varbaseprefixs                             r   
do_keywordzCMapParser.do_keyword-  sk   D*** DMKKMd***!DM}}D$$$#'88A; !Q!Q		""<?A6 D(((#'88A; !X		""6??<3I#JK
 D444KKMD222KKMD...KKMD,,,)-7IRC7D7'40 5As"1c**Q2D!#s+s1vQ/?CR&CR&g%vvT]T]4yr"uQw 5AD"Q$ 7 ??AII**1c!e455  D---KKMD+++)-7IRC7D7'40 ?ddC(ZS-AII**4>? D---KKMD+++)-7IRC7D7 (D 1 :At"1e,Jq%4Hq6SV# QZQZdD)"2b57^ @		00AtAw?@ rs)C"3<D!#2YFs8D"2b57^ :"6;;tT!V#<dUV#DD		00Aq9::" D,,,KKMD***)-7IRC7D7'40 Adc5)ju.EII,,WS\4@A D111KKMD///KKM		3,A !  !   &&  8. 8 80 8sC   ?V (AV 5W 2WWW	VV	V=(V=<V=N)r   r   r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s     L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23or   r   c                     | dd  }|D ]R  }t        |d      }t               }t        ||      j                          |j	                          |j                          T y )Nr   rb)r   rt   r   r   r   rR   )argvargsfnamer   r3   s        r   mainr     sZ    8D %4  "

		 r   __main__)+__doc__rT   r   os.pathr   cPickler   ImportErrorr\   loggingpsparserr   r   r   r   r   r	   
encodingdbr
   utilsr   r   r?   	getLoggerr   rI   	Exceptionr   objectr   r7   rW   rc   rf   rn   rt   r|   r   r   r   r   exitr   r   r   r   <module>r      s,  	  	     # #   "  $   
g!		 	v 2/8 /d8 |  (t $Z "T 	: 	4V 4nP Pf zCHHT#((^ q  s   D 	D)(D)