
    ,hP                        d dl Z d dlZd dlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ d dlZ e j8                  e      Z G d de      Z G d de      Z  G d de      Z! G d de!      Z" G d d e!      Z# G d! d"e!      Z$y)#    N   )PDFTextDevice)PDFUnicodeNotDefined)LTContainer)LTPage)LTText)LTLine)LTRect)LTCurve)LTFigure)LTImage)LTChar)
LTTextLine)	LTTextBox)LTTextBoxVertical)LTTextGroup)apply_matrix_pt)mult_matrix)enc)bbox2str)utilsc                   J    e Zd ZddZd Zd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zy)PDFLayoutAnalyzerNc                 Z    t        j                  | |       || _        || _        g | _        y N)r   __init__pagenolaparams_stackselfrsrcmgrr   r   s       T/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pdfminer/converter.pyr   zPDFLayoutAnalyzer.__init__$   s*    tW-     c                     |j                   \  }}}}t        |||f      \  }}t        |||f      \  }}ddt        ||z
        t        ||z
        f}t        | j                  |      | _        y )Nr   )mediaboxr   absr   r   cur_item)r!   pagectmx0y0x1y1r&   s           r#   
begin_pagezPDFLayoutAnalyzer.begin_page+   so    ==RR"3R1R"3R1Rq#be*c"R%j1t{{H5r$   c                    | j                   r#J t        t        | j                                      t        | j                  t
              s#J t        t        | j                                     | j                  %| j                  j                  | j                         | xj                  dz  c_	        | j                  | j                         y )Nr   )r   strlen
isinstancer(   r   typer   analyzer   receive_layout)r!   r)   s     r#   end_pagezPDFLayoutAnalyzer.end_page3   s    ;;5C$4 55$--0J#d4==6I2JJ==$MM!!$--0qDMM*r$   c                     | j                   j                  | j                         t        ||t	        || j
                              | _        y r   )r   appendr(   r   r   r*   )r!   namebboxmatrixs       r#   begin_figurezPDFLayoutAnalyzer.begin_figure<   s6    4==) t[-JKr$   c                 
   | j                   }t        | j                   t              s#J t        t	        | j                                      | j
                  j                         | _         | j                   j                  |       y r   )r(   r3   r   r1   r4   r   popadd)r!   _figs      r#   
end_figurezPDFLayoutAnalyzer.end_figureA   sV    mm$--2LCT]]8K4LL)#r$   c                 v   t        | j                  t              s#J t        t	        | j                                     t        ||| j                  j                  | j                  j                  | j                  j                  | j                  j                  f      }| j                  j                  |       y r   )r3   r(   r   r1   r4   r   r+   r,   r-   r.   r@   )r!   r:   streamitems       r#   render_imagezPDFLayoutAnalyzer.render_imageH   s    $--2LCT]]8K4LLtV(($--*:*:(($--*:*:<= 	$r$   c                    dj                  d |D              }|dk(  r|d   \  }}}	|d   \  }}
}t        | j                  ||	f      \  }}	t        | j                  |
|f      \  }
}||
k(  s|	|k(  rN| j                  j	                  t        |j                  ||	f|
|f||||j                  |j                               y |dk(  r|d   \  }}}	|d   \  }}
}|d   \  }}}|d   \  }}}t        | j                  ||	f      \  }}	t        | j                  |
|f      \  }
}t        | j                  ||f      \  }}t        | j                  ||f      \  }}||
k(  r||k(  r
||k(  r||	k(  s|	|k(  r\|
|k(  rW||k(  rR||k(  rM| j                  j	                  t        |j                  ||	||f||||j                  |j                               y g }|D ]M  }t        dt        |      d      D ]2  }|j                  t        | j                  ||   ||dz      f             4 O | j                  j	                  t        |j                  |||||j                  |j                               y )	N c              3   &   K   | ]	  }|d      yw)r   N ).0xs     r#   	<genexpr>z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>Q   s     +!+s   mlr   r   mlllh      )joinr   r*   r(   r@   r	   	linewidthscolorncolorr
   ranger2   r9   r   )r!   gstatestrokefillevenoddpathshaperA   r+   r,   r-   r.   x2y2x3y3ptspis                      r#   
paint_pathzPDFLayoutAnalyzer.paint_pathP   sm   +d++D=q'KQBq'KQB&txx"b:HR&txx"b:HRRx28!!&)9)9B8b"XD'6==&--#I JGq'KQBq'KQBq'KQBq'KQB&txx"b:HR&txx"b:HR&txx"b:HR&txx"b:HRrbBh28brbBh28b!!&)9)9BB;KD'6==&--#I J 	FA1c!fa( F

?488adAacF^DEF	F 	'&"2"2CV]]FMM3 	4r$   c	                    	 |j                  |      }	t        |	t        j                        sJ t	        t        |	                   	 |j                  |      }
|j                  |      }t        ||||||	|
|||
      }| j                  j                  |       |j                  S # t        $ r | j                  ||      }	Y zw xY wr   )	to_unichrr3   six	text_typer1   r4   r   handle_undefined_char
char_width	char_dispr   r(   r@   adv)r!   r<   fontfontsizescalingrisecidncsgraphicstatetext	textwidthtextdisprF   s                r#   render_charzPDFLayoutAnalyzer.render_chart   s    	9>>#&DdCMM2CCT
OC2 OOC(	>>#&fdHgtT9hX[]ij$xx $ 	9--dC8D	9s   AB$ $CCc                 :    t         j                  d||       d|z  S )Nzundefined: %r, %rz(cid:%d))loginfo)r!   rn   rr   s      r#   rj   z'PDFLayoutAnalyzer.handle_undefined_char   s    $dC0Cr$   c                      y r   rK   r!   ltpages     r#   r6   z PDFLayoutAnalyzer.receive_layout       r$   r   N)__name__
__module____qualname__r   r/   r7   r=   rC   rG   re   rx   rj   r6   rK   r$   r#   r   r   "   s5    
"H
 r$   r   c                        e Zd ZddZd Zd Zy)PDFPageAggregatorNc                 D    t         j                  | |||       d | _        y )Nr   r   )r   r   resultr    s       r#   r   zPDFPageAggregator.__init__   s#    ""4("Sr$   c                     || _         y r   r   r}   s     r#   r6   z PDFPageAggregator.receive_layout   s    r$   c                     | j                   S r   r   r!   s    r#   
get_resultzPDFPageAggregator.get_result   s    {{r$   r   )r   r   r   r   r6   r   rK   r$   r#   r   r      s    
r$   r   c                       e Zd ZddZy)PDFConverterNc                    t         j                  | |||       || _        || _        t	        | j                  d      r(d| j                  j
                  v rd| _        y d| _        y dd l}t        | j                  |j                        rd| _        y t        | j                  |j                        rd| _        y 	 | j                  j                  d       d| _        y # t        $ r
 d| _        Y y w xY w)Nr   modebTFr      é)r   r   outfpcodechasattrr   outfp_binaryior3   BytesIOStringIOwrite	TypeError)r!   r"   r   r   r   r   r   s          r#   r   zPDFConverter.__init__   s    ""4("S

4::v&djjoo%$(! 	 %*! 	 $**bjj1$(! 	 DJJ4$)! 	-JJ$$U+(-D% 	 ! -(,D%-s   ;"C C10C1)utf-8r   N)r   r   r   r   rK   r$   r#   r   r      s    r$   r   c                   0    e Zd Z	 	 ddZd Zd Zd Zd Zy)TextConverterNc                 V    t         j                  | |||||       || _        || _        y N)r   r   r   )r   r   
showpagenoimagewriter)r!   r"   r   r   r   r   r   r   s           r#   r   zTextConverter.__init__   s0    dGU%Yab$&r$   c                     t        j                  || j                  d      }t        j                  r| j
                  r|j                         }| j                  j                  |       y )Nignore)	r   compatible_encode_methodr   rh   PY3r   encoder   r   r!   ru   s     r#   
write_textzTextConverter.write_text   sI    --dDJJI77t((;;=D

r$   c                       fd j                   r j                  d|j                  z          |        j                  d       y )Nc                 Z   t        | t              r| D ]
  } |        n/t        | t              rj                  | j	                                t        | t
              rj                  d       y t        | t              r)j                  j                  j                  |        y y y )N
)	r3   r   r   r   get_textr   r   r   export_image)rF   childrenderr!   s     r#   r   z,TextConverter.receive_layout.<locals>.render   s    $,! "E5M"D&)0$	*%D'*##/$$11$7 0 +r$   zPage %s
)r   r   pageid)r!   r~   r   s   ` @r#   r6   zTextConverter.receive_layout   s<    
	8 ??OOK&--78vr$   c                 L    | j                   y t        j                  | ||       y r   )r   r   rG   )r!   r:   rE   s      r#   rG   zTextConverter.render_image   s&    #!!$f5r$   c                      y r   rK   )r!   rX   rY   rZ   r[   r\   s         r#   re   zTextConverter.paint_path   r   r$   )r   r   NFN)r   r   r   r   r   r6   rG   re   rK   r$   r#   r   r      s!    IM/3*r$   r   c                       e Zd ZdddddddZddd	Zd
ddddddddddddddifdZd Zd Zd Zd Z	d Z
d Zd Zd Zd#dZd Zd Zd  Zd! Zd" Zy)$HTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupcurver)   blue)r   charr   r   Ng      ?normalT2   r   )r   r)   r   c                    t         j                  | |||||       || _        || _        || _        |	| _        |
| _        || _        || _        || _	        |rJ| j                  j                  | j                         | j                  j                  | j                         | j                  | _        d | _        g | _        | j!                          y r   )r   r   scale	fontscale
layoutmoder   
pagemarginr   rect_colorstext_colorsupdateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)r!   r"   r   r   r   r   r   r   r   r   r   r   debugr   r   s                  r#   r   zHTMLConverter.__init__   s    
 	dGU%Yab
"$$$&&&##D$4$45##D$4$45
r$   c                     | j                   r|j                  | j                         }t        j                  dk  rt	        |      }| j
                  j                  |       y )N)rR   r   )r   r   sysversion_infor1   r   r   r   s     r#   r   zHTMLConverter.write  sG    ::;;tzz*Df$t9D

r$   c                     | j                  d       | j                  r| j                  d| j                  z         n| j                  d       | j                  d       y )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   r   s    r#   r   zHTMLConverter.write_header  sL    

#$::JJ[^b^h^hhiJJOP

$%r$   c           
          | j                  ddj                  d t        d| j                        D              z         | j                  d       y )Nz8<div style="position:absolute; top:0px;">Page: %s</div>
z, c              3   .   K   | ]  }d |d|d  yw)z
<a href="#z">z</a>NrK   )rL   rd   s     r#   rN   z-HTMLConverter.write_footer.<locals>.<genexpr>#  s     \q!<\s   r   z</body></html>
)r   rS   rW   r   r   s    r#   write_footerzHTMLConverter.write_footer!  sE    

N99\eAt{{F[\\] 	^

%&r$   c                 :    | j                  t        |d              y r   )r   r   r   s     r#   r   zHTMLConverter.write_text'  s    

3tT?#r$   c           
          | j                   j                  |      }|[| j                  d|||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz         y )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r   getr   r   r   )r!   colorborderwidthrM   ywhs          r#   
place_rectzHTMLConverter.place_rect+  sw      $$U+JJ R{$**t}}Q

&B$**a

l44 5
 	r$   c                     | j                  |||j                  |j                  |j                  |j                         y r   )r   r+   r.   widthheight)r!   r   r   rF   s       r#   place_borderzHTMLConverter.place_border5  s+    {DGGTWWdjj$++Vr$   c           
         | j                   | j                   j                  |      }| j                  dt        |d       ||| j                  z  | j
                  |z
  | j                  z  || j                  z  || j                  z  fz         y )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r   r   r   r   )r!   rF   r   rM   r   r   r   r:   s           r#   place_imagezHTMLConverter.place_image9  s    '##006DJJ 5D$$**t}}Q

&B$**a

l44 5
 	r$   c                 4   | j                   j                  |      }|{| j                  d||| j                  z  | j                  |z
  | j                  z  || j                  z  | j
                  z  fz         | j                  |       | j                  d       y )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;">z</span>
)r   r   r   r   r   r   r   )r!   r   ru   rM   r   sizes         r#   
place_textzHTMLConverter.place_textC  s      $$U+JJiq|dmmAotzz-I4PTPZPZ?[_[i[iKijk lOOD!JJ{#r$   c                    | j                   j                  | j                         d | _        | j                  d||||| j                  z  | j
                  |z
  | j                  z  || j                  z  || j                  z  fz         y )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r   r9   r   r   r   r   )r!   r   r   rM   r   r   r   writing_modes           r#   	begin_divzHTMLConverter.begin_divL  sz    tzz*


 E;djjL4==?DJJ">djjL!DJJ,00 	1
 	r$   c                     | j                   | j                  d       | j                  j                         | _         | j                  d       y )N</span>z</div>)r   r   r   r?   )r!   r   s     r#   end_divzHTMLConverter.end_divV  s;    ::!JJy!__((*


8r$   c                    ||f}|| j                   k7  r]| j                   | j                  d       | j                  dt        |      || j                  z  | j                  z  fz         || _         | j                  |       y )Nr   z.<span style="font-family: %s; font-size:%dpx">)r   r   r   r   r   r   )r!   ru   fontnamero   rn   s        r#   put_textzHTMLConverter.put_text]  sx    (#4::zz%

9%JJGHx$**'<t~~'MNO PDJr$   c                 &    | j                  d       y )Nz<br>r   r   s    r#   put_newlinezHTMLConverter.put_newlineh  s    

6r$   c                 r      fd fd |        xj                    j                  z  c_         y )Nc                 j    t        | t              r"j                  dd|        | D ]
  } |        y )Nr   r   )r3   r   r   rF   r   r!   
show_groups     r#   r   z0HTMLConverter.receive_layout.<locals>.show_groupm  s9    $,!!+q$7! &Eu%&r$   c           
      0   t        | t              r׉xj                  | j                  z  c_        j	                  dd|        j
                  rdj                  dj                  | j                  z
  j                  z  z         j                  d| j                  d| j                  d       | D ]
  } |        | j                  | j                  D ]
  } |        y t        | t              rj	                  dd|        y t        | t              r_j                  dd| j                  | j                  | j                  | j                         | D ]
  } |        j!                  d       y t        | t"              r?j%                  | d| j                  | j                  | j                  | j                         y j&                  d	k(  r
t        | t(              r#j	                  d
d|        | D ]
  } |        y t        | t*              rbj	                  dd|        j-                  dt/        | j0                  dz         | j                  | j                  d       | D ]
  } |        y t        | t2              rTj	                  dd|        j-                  d| j5                         | j                  | j                  | j6                         y t        | t(              r/| D ]
  } |        j&                  dk7  rj9                          y t        | t*              rnj                  dd| j                  | j                  | j                  | j                  | j;                                | D ]
  } |        j!                  d       y t        | t2              r6j=                  | j5                         | j>                  | j6                         y t        | t@              rjC                  | j5                                y )Nr)   r   z*<div style="position:absolute; top:%dpx;">z	<a name="z">Page z</a></div>
r   r   exactr   r      r   loose)"r3   r   r   r.   r   r   r   r   r   groupsr   r   r   r+   r   r   r   r   r   r   r   r   r   r1   indexr   r   r   r   get_writing_moder   r   r   r   )rF   r   groupr   r!   r   s      r#   r   z,HTMLConverter.receive_layout.<locals>.rendert  sl   $'(!!&!T2??JJK!%tww!6

 B D EJJT[[RVR]R]^_! "E5M";;*!% *"5)*R O D'*!!'1d3L K D(+xDGGTWWdjj$++V! "E5M"X&B A D'*  q$''477DJJT> ; ??g-!$
3))*a>%) *E"5M*4 1 $D)4)))Q=	3tzz!|3DdggtwwXZ[%) *E"5M** ' $D&1))&!T:$''SWS\S\]"  "$
3%) *E"5M*??g5 ,,.  $D)4y!TWWdggtzzSWS^S^'+'<'<'>@%) *E"5M*Y/
 	 $D&1dmmot}}diiP  $D&18r$   )r   r   r!   r~   r   r   s   ` @@r#   r6   zHTMLConverter.receive_layoutl  s,    	4	j 	v(r$   c                 $    | j                          y r   r   r   s    r#   closezHTMLConverter.close      r$   )F)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r6   r
  rK   r$   r#   r   r      s     K K
 .5QCHD'.?$g.	.	?Br$   r   c                   `    e Zd Z ej                  d      Z	 	 d
dZd Zd Zd Z	d Z
d Zd	 Zy)XMLConverterz[ ---]Nc                 v    t         j                  | |||||       || _        || _        | j	                          y r   )r   r   r   stripcontrolr   )r!   r"   r   r   r   r   r   r  s           r#   r   zXMLConverter.__init__  s=    dGU%Yab&(r$   c                     | j                   r|j                  | j                         }| j                  j                  |       y r   )r   r   r   r   r   s     r#   r   zXMLConverter.write  s0    ::;;tzz*D

r$   c                     | j                   r| j                  d| j                   z         n| j                  d       | j                  d       y )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
)r   r   r   s    r#   r   zXMLConverter.write_header  s;    ::JJ?$**LMJJ12

;r$   c                 &    | j                  d       y )Nz	</pages>
r   r   s    r#   r   zXMLConverter.write_footer  s    

< r$   c                     | j                   r| j                  j                  d|      }| j                  t	        |d              y )NrI   )r  CONTROLsubr   r   r   s     r#   r   zXMLConverter.write_text  s6    <<##C.D

3tT?#r$   c                 4      fd fd |       y )Nc                 <   t        | t              r4j                  d| j                  t	        | j
                        fz         y t        | t              rGj                  dt	        | j
                        z         | D ]
  } |        j                  d       y )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)r3   r   r   r  r   r;   r   r   s     r#   r   z/XMLConverter.receive_layout.<locals>.show_group  s    $	*

<"&**htyy.A!BC D  D+.

4x		7JJK! &Eu%&

+,r$   c                    t        | t              rj                  d| j                  t	        | j
                        | j                  fz         | D ]
  } |        | j                  ;j                  d       | j                  D ]
  } |        j                  d       j                  d       y t        | t              r4j                  d| j                  t	        | j
                        fz         y t        | t              r4j                  d| j                  t	        | j
                        fz         y t        | t              rCj                  d| j                  t	        | j
                        | j                         fz         y t        | t              rVj                  d| j                  d	t	        | j
                        d
       | D ]
  } |        j                  d       y t        | t              rHj                  dt	        | j
                        z         | D ]
  } |        j                  d       y t        | t               rid}t        | t"              rd}j                  d| j$                  t	        | j
                        |fz         | D ]
  } |        j                  d       y t        | t&              rj                  dt)        | j*                  d       t	        | j
                        | j,                  j                  | j.                  j0                  | j2                  fz         j5                  | j7                                j                  d       y t        | t8              r#j                  d| j7                         z         y t        | t:              rj<                  Qj<                  j?                  |       }j                  dt)        |d       | j@                  | jB                  fz         y j                  d| j@                  | jB                  fz         y J tE        d| f             )Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
rI   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
	Unhandled)#r3   r   r   r   r   r;   rotater  r	   rT   r
   r   get_ptsr   r:   r   r   r   r  r   r   r   rs   rt   rV   r   r   r   r   r   r   r   r   r   r1   )rF   r   r  wmoder:   r   r!   r   s        r#   r   z+XMLConverter.receive_layout.<locals>.render  s   $'

C"&++x		/BDKK!PQ R! "E5M";;*JJ|,!% *"5)*JJ}-

;'` _ D&)

@"&..(4992E!FG H\ Y D&)

@"&..(4992E!FG HV S D'*

I"&..(4992Et||~!VW XP M D(+

"&))Xdii-@B C! "E5M"

=)B A D*-

3htyy6IIJ! "E5M"

?+8 7 D),d$56/E

<"&**htyy.A5!IJ K! "E5M"

>*& % D&)

at4htyy6I HHMM4+<+<+C+CTYYPP Q 0

;'  D&)

.@A  D'*##/++88>DJJL&)$otzz4;;%O P Q 	 JJC&*jj$++%> ? @  7c;"566r$   rK   r  s   ` @@r#   r6   zXMLConverter.receive_layout  s    		;	x 	vr$   c                 $    | j                          y r   r	  r   s    r#   r
  zXMLConverter.close#  r  r$   )r   r   NNF)r   r   r   recompiler  r   r   r   r   r   r6   r
  rK   r$   r#   r  r    s>    bjj9:G=>?DIVr$   r  )%loggingr  r   	pdfdevicer   pdffontr   layoutr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   rI   rh   	getLoggerr   rz   r   r   r   r   r   r  rK   r$   r#   <module>r%     s     	 
 $ )            %  "     
g!
c cP) "$ 8,L ,bGL GXq< qr$   