
    ,he                        d dl Z d dlZddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 dd	lm
Z
 dd
lmZ  ej                  e      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d dee      Z G d d ee      Z G d! d"e      Z G d# d$e      Z G d% d&ee      Z G d' d(e      Z G d) d*e      Z G d+ d,e      Z  G d- d.e      Z! G d/ d0e!      Z" G d1 d2e!      Z# G d3 d4e      Z$ G d5 d6e$      Z% G d7 d8e$      Z& G d9 d:e      Z' G d; d<e'      Z( G d= d>e'      Z)y)?    N   )INFshorten_str)Planeapply_matrix_pt)bbox2str)fsplit)	get_bound)
matrix2str)uniqc                       e Zd ZddZd Zy)IndexAssignerc                     || _         y Nindex)selfr   s     Q/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pdfminer/layout.py__init__zIndexAssigner.__init__   s    
    c                     t        |t              r'| j                  |_        | xj                  dz  c_        y t        |t              r|D ]  }| j	                  |        y Nr   )
isinstance	LTTextBoxr   LTTextGrouprun)r   objxs      r   r   zIndexAssigner.run   sR    c9%

CIJJ!OJ 	 [) r   Nr   )__name__
__module____qualname__r   r    r   r   r   r      s    r   r   c                   ,    e Zd ZdZ	 	 	 	 	 	 	 ddZd Zy)LAParamsa  Parameters for layout analysis

    :param line_overlap: If two characters have more overlap than this they
        are considered to be on the same line. The overlap is specified
        relative to the minimum height of both characters.
    :param char_margin: If two characters are closer together than this
        margin they are considered to be part of the same word. If
        characters are on the same line but not part of the same word, an
        intermediate space is inserted. The margin is specified relative to
        the width of the character.
    :param word_margin: If two words are are closer together than this
        margin they are considered to be part of the same line. A space is
        added in between for readability. The margin is specified relative
        to the width of the word.
    :param line_margin: If two lines are are close together they are
        considered to be part of the same paragraph. The margin is
        specified relative to the height of a line.
    :param boxes_flow: Specifies how much a horizontal and vertical position
        of a text matters when determining the order of text boxes. The value
        should be within the range of -1.0 (only horizontal position
        matters) to +1.0 (only vertical position matters).
    :param detect_vertical: If vertical text should be considered during
        layout analysis
    :param all_texts: If layout analysis should be performed on text in
        figures.
    c                 f    || _         || _        || _        || _        || _        || _        || _        y r   )line_overlapchar_marginline_marginword_margin
boxes_flowdetect_vertical	all_texts)r   r(   r)   r*   r+   r,   r-   r.   s           r   r   zLAParams.__init__<   s=     )&&&$."r   c                 d    d| j                   | j                  | j                  | j                  fz  S )NzM<LAParams: char_margin=%.1f, line_margin=%.1f, word_margin=%.1f all_texts=%r>)r)   r*   r+   r.   r   s    r   __repr__zLAParams.__repr__M   s3    _!!4#3#3T5E5Et~~VW 	Xr   N)      ?g       @r2   g?r2   FFr!   r"   r#   __doc__r   r1   r$   r   r   r&   r&       s)    8 "   !& "Xr   r&   c                       e Zd ZdZd Zy)LTItemz)Interface for things that can be analyzedc                      y)zPerform the layout analysis.Nr$   r   laparamss     r   analyzezLTItem.analyzeU   s    r   N)r!   r"   r#   r4   r:   r$   r   r   r6   r6   R   s
    3r   r6   c                       e Zd ZdZd Zd Zy)LTTextz#Interface for things that have textc                 X    d| j                   j                  d| j                         dS N< >)	__class__r!   get_textr0   s    r   r1   zLTText.__repr__]   s!    (($--/; 	<r   c                     t         )zText contained in this objectNotImplementedErrorr0   s    r   rC   zLTText.get_texta   s    !!r   N)r!   r"   r#   r4   r1   rC   r$   r   r   r<   r<   Z   s    -<"r   r<   c                   d    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zy)LTComponentzObject with a bounding boxc                 P    t         j                  |        | j                  |       y r   )r6   r   set_bboxr   bboxs     r   r   zLTComponent.__init__i   s    dr   c                 b    d| j                   j                  dt        | j                        dS r>   )rB   r!   r	   rL   r0   s    r   r1   zLTComponent.__repr__n   s%    (((499*=? 	@r   c                     t         r   
ValueErrorr   _s     r   __lt__zLTComponent.__lt__s       r   c                     t         r   rO   rQ   s     r   __le__zLTComponent.__le__v   rT   r   c                     t         r   rO   rQ   s     r   __gt__zLTComponent.__gt__y   rT   r   c                     t         r   rO   rQ   s     r   __ge__zLTComponent.__ge__|   rT   r   c                     |\  }}}}|| _         || _        || _        || _        ||z
  | _        ||z
  | _        || _        y r   )x0y0x1y1widthheightrL   )r   rL   r\   r]   r^   r_   s         r   rJ   zLTComponent.set_bbox   sL    RRU
e	r   c                 B    | j                   dk  xs | j                  dk  S Nr   )r`   ra   r0   s    r   is_emptyzLTComponent.is_empty   s    zzQ2$++"22r   c                     t        |t              sJ t        t        |                   |j                  | j
                  k  xr | j                  |j
                  k  S r   )r   rH   strtyper\   r^   r   r   s     r   is_hoverlapzLTComponent.is_hoverlap   A    #{+;Sc^;vv 6TWW%66r   c                    t        |t              sJ t        t        |                   | j	                  |      ryt        t        | j                  |j                  z
        t        | j                  |j                  z
              S rc   	r   rH   rf   rg   ri   minabsr\   r^   rh   s     r   	hdistancezLTComponent.hdistance   \    #{+;Sc^;C s477366>*C,?@@r   c                    t        |t              sJ t        t        |                   | j	                  |      rLt        t        | j                  |j                  z
        t        | j                  |j                  z
              S yrc   rl   rh   s     r   hoverlapzLTComponent.hoverlap   \    #{+;Sc^;C s477366>*C,?@@r   c                     t        |t              sJ t        t        |                   |j                  | j
                  k  xr | j                  |j
                  k  S r   )r   rH   rf   rg   r]   r_   rh   s     r   is_voverlapzLTComponent.is_voverlap   rj   r   c                    t        |t              sJ t        t        |                   | j	                  |      ryt        t        | j                  |j                  z
        t        | j                  |j                  z
              S rc   	r   rH   rf   rg   ru   rm   rn   r]   r_   rh   s     r   	vdistancezLTComponent.vdistance   rp   r   c                    t        |t              sJ t        t        |                   | j	                  |      rLt        t        | j                  |j                  z
        t        | j                  |j                  z
              S yrc   rw   rh   s     r   voverlapzLTComponent.voverlap   rs   r   N)r!   r"   r#   r4   r   r1   rS   rV   rX   rZ   rJ   rd   ri   ro   rr   ru   rx   rz   r$   r   r   rH   rH   f   sN    $
@
	37A7Ar   rH   c                       e Zd ZdZddZd Zy)LTCurvezA generic Bezier curveNc                     t         j                  | t        |             || _        || _        || _        || _        || _        || _        || _	        y r   )
rH   r   r   pts	linewidthstrokefillevenoddstroking_colornon_stroking_color)r   r   r~   r   r   r   r   r   s           r   r   zLTCurve.__init__   sL    T9S>2"	,"4r   c                 F    dj                  d | j                  D              S )N,c              3   &   K   | ]	  }d |z    yw)z	%.3f,%.3fNr$   ).0ps     r   	<genexpr>z"LTCurve.get_pts.<locals>.<genexpr>   s     :Aa:s   )joinr~   r0   s    r   get_ptszLTCurve.get_pts   s    xx::::r   FFFNN)r!   r"   r#   r4   r   r   r$   r   r   r|   r|      s     	;r   r|   c                       e Zd ZdZddZy)LTLinezOA single straight line.

    Could be used for separating text or figures.
    Nc	           
      @    t         j                  | |||g|||||       y r   r|   r   )	r   r   p0p1r   r   r   r   r   s	            r   r   zLTLine.__init__   s&    y2r(FD'>[mnr   r   r!   r"   r#   r4   r   r$   r   r   r   r      s    
r   r   c                       e Zd ZdZddZy)LTRectzMA rectangle.

    Could be used for framing another pictures or figures.
    Nc           
      b    |\  }}	}
}t         j                  | |||	f|
|	f|
|f||fg|||||       y r   r   )r   r   rL   r   r   r   r   r   r\   r]   r^   r_   s               r   r   zLTRect.__init__   sT    RRyB8b"XBx"b*RTZ\`biky  |N  	Or   r   r   r$   r   r   r   r      s    
r   r   c                       e Zd ZdZd Zd Zy)LTImagezKAn image object.

    Embedded images can be in JPEG, Bitmap or JBIG2.
    c                 x   t         j                  | |       || _        || _        |j	                  d      |j	                  d      f| _        |j	                  d      | _        |j	                  dd      | _        |j	                  d      | _        t        | j                  t              s| j                  g| _        y )N)WWidth)HHeight)IM	ImageMask)BPCBitsPerComponentr   )CS
ColorSpace)rH   r   namestreamget_anysrcsize	imagemaskbits
colorspacer   list)r   r   r   rL   s       r   r   zLTImage.__init__   s    T4(	~679(;<NN#>B	 ..)=>$//40#/DOr   c           	          d| j                   j                  d| j                  dt        | j                        d| j
                  d	S Nr?   () r@   rA   )rB   r!   r   r	   rL   r   r0   s    r   r1   zLTImage.__repr__   s1    (($))$))$dll4 	5r   Nr3   r$   r   r   r   r      s    
5r   r   c                       e Zd ZdZd Zd Zy)LTAnnoa  Actual letter in the text as a Unicode string.

    Note that, while a LTChar object has actual boundaries, LTAnno objects does
    not, as these are "virtual" characters, inserted by a layout analyzer
    according to the relationship between two characters (e.g. a space).
    c                     || _         y r   _text)r   texts     r   r   zLTAnno.__init__   s    
r   c                     | j                   S r   r   r0   s    r   rC   zLTAnno.get_text       zzr   N)r!   r"   r#   r4   r   rC   r$   r   r   r   r      s    r   r   c                   (    e Zd ZdZd Zd Zd Zd Zy)LTCharz.Actual letter in the text as a Unicode string.c                 f   t         j                  |        || _        || _        |j                  | _        |	| _        |
| _        ||z  |z  | _        |j                         rT|j                         |z  }|\  }}||dz  }n||z  dz  }d|z
  |z  dz  }| }||z   }||| j                  z   f}||z   |f}n@|j                         |z  }|j                         |z  }||z   }d|f}| j                  ||z   f}| j                  \  }}}}}}d||z  |z  k  xr ||z  dk  | _        t        | j                  |      \  }}t        | j                  |      \  }}||k  r||}}||k  r||}}t        j                  | ||||f       |j                         r| j                  | _        y | j"                  | _        y )Nr2   gMbP?i  r   )r<   r   r   matrixfontnamencsgraphicstateadvis_vertical	get_width
get_heightget_descentuprightr   rH   r`   sizera   )r   r   fontfontsizescalingriser   	textwidthtextdispr   r   r`   vxvytxtybllburra   descentabcdefr\   r]   r^   r_   s                                 r   r   zLTChar.__init__  s   
(x''1NN$x/EHRzS[(]T))x'$.BBdBr$(({#Ce8R.C __&1F&&(83G4Bb'C88RY'C![[Aq!QAaCK4AaC1H"4;;4R"4;;4R7BR7BRTBB#34

DI 	 DIr   c                     d| j                   j                  dt        | j                        dt	        | j
                        d| j                  d| j                  d| j                         dS )Nr?   r@    matrix=z font=z adv=z text=rA   )	rB   r!   r	   rL   r   r   r   r   rC   r0   s    r   r1   zLTChar.__repr__4  sE    (((499*=DKK($--" 	#r   c                     | j                   S r   r   r0   s    r   rC   zLTChar.get_text:  r   r   c                      y)z<Returns True if two characters can coexist in the same line.Tr$   rh   s     r   is_compatiblezLTChar.is_compatible=  s    r   N)r!   r"   r#   r4   r   r1   rC   r   r$   r   r   r   r     s    8+Z#r   r   c                   4    e Zd ZdZd Zd Zd Zd Zd Zd Z	y)	LTContainerz(Object that can be extended and analyzedc                 >    t         j                  | |       g | _        y r   )rH   r   _objsrK   s     r   r   zLTContainer.__init__E  s    T4(
r   c                 ,    t        | j                        S r   )iterr   r0   s    r   __iter__zLTContainer.__iter__J  s    DJJr   c                 ,    t        | j                        S r   )lenr   r0   s    r   __len__zLTContainer.__len__M  s    4::r   c                 :    | j                   j                  |       y r   )r   appendrh   s     r   addzLTContainer.addP  s    

#r   c                 4    |D ]  }| j                  |        y r   )r   )r   objsr   s      r   extendzLTContainer.extendT  s     	CHHSM	r   c                 H    | j                   D ]  }|j                  |        y r   )r   r:   )r   r9   r   s      r   r:   zLTContainer.analyzeY  s$    :: 	"CKK!	"r   N)
r!   r"   r#   r4   r   r   r   r   r   r:   r$   r   r   r   r   B  s#    2
 
r   r   c                       e Zd Zd Zd Zy)LTExpandableContainerc                 `    t         j                  | t        t        t         t         f       y r   )r   r   r   r0   s    r   r   zLTExpandableContainer.__init__`  s&    TSD3$sd#;<r   c           	      J   t         j                  | |       | j                  t        | j                  |j                        t        | j
                  |j
                        t        | j                  |j                        t        | j                  |j                        f       y r   )	r   r   rJ   rm   r\   r]   maxr^   r_   rh   s     r   r   zLTExpandableContainer.addd  sk    c"s477CFF+S#&&-A477CFF+S#&&-AC 	Dr   N)r!   r"   r#   r   r   r$   r   r   r   r   _  s    r   r   c                       e Zd Zd Zd Zy)LTTextContainerc                 X    t         j                  |        t        j                  |        y r   )r<   r   r   r0   s    r   r   zLTTextContainer.__init__l  s    &&t,r   c                 2    dj                  d | D              S )N c              3   ^   K   | ]%  }t        |t              s|j                          ' y wr   )r   r<   rC   )r   r   s     r   r   z+LTTextContainer.get_text.<locals>.<genexpr>r  s     Q#C9Ps||~Qs   --)r   r0   s    r   rC   zLTTextContainer.get_textq  s    wwQQQQr   N)r!   r"   r#   r   rC   r$   r   r   r   r   k  s    
Rr   r   c                   (    e Zd ZdZd Zd Zd Zd Zy)
LTTextLinezContains a list of LTChar objects that represent a single text line.

    The characters are aligned either horizontally or vertically, depending on
    the text's writing mode.
    c                 <    t         j                  |        || _        y r   )r   r   r+   r   r+   s     r   r   zLTTextLine.__init__|  s      &&r   c                     d| j                   j                  dt        | j                        d| j	                         dS r>   )rB   r!   r	   rL   rC   r0   s    r   r1   zLTTextLine.__repr__  s-    (((499*=" 	#r   c                 n    t         j                  | |       t        j                  | t	        d             y )N
)r   r:   r   r   r   r8   s     r   r:   zLTTextLine.analyze  s'    h/fTl+r   c                     t         r   rE   )r   planeratios      r   find_neighborszLTTextLine.find_neighbors  s    !!r   N)r!   r"   r#   r4   r   r1   r:   r  r$   r   r   r   r   u  s    
#

"r   r   c                       e Zd Zd Zd Zd Zy)LTTextLineHorizontalc                 H    t         j                  | |       t        | _        y r   )r   r   r   _x1r   s     r   r   zLTTextLineHorizontal.__init__      D+.4r   c                 Z   t        |t              rt| j                  rh| j                  t        |j                  |j
                        z  }| j                  |j                  |z
  k  rt        j                  | t        d             |j                  | _        t        j                  | |       y Nr@   )r   r   r+   r   r`   ra   r	  r\   r   r   r   r^   r   r   r   margins      r   r   zLTTextLineHorizontal.add  su    c6"t'7'7%%CIIszz(BBFxx#&&-'fSk266tS!r   c                    || j                   z  }|j                  | j                  | j                  |z
  | j                  | j
                  |z   f      }|D cg c]  }t        |t              rqt        |j                   | j                   z
        |k  rLt        |j                  | j                  z
        |k  s%t        |j                  | j                  z
        |k  r| c}S c c}w r   )	ra   findr\   r]   r^   r_   r   r  rn   r   r  r  r   r   r   s         r   r  z#LTTextLineHorizontal.find_neighbors  s    $++zz477DGGAItww	BC# 0s$89

4;;./!3(1,(1,	  0 	0 0   BC"Nr!   r"   r#   r   r   r  r$   r   r   r  r        
0r   r  c                       e Zd Zd Zd Zd Zy)LTTextLineVerticalc                 H    t         j                  | |       t         | _        y r   )r   r   r   _y0r   s     r   r   zLTTextLineVertical.__init__  r
  r   c                 Z   t        |t              rt| j                  rh| j                  t        |j                  |j
                        z  }|j                  |z   | j                  k  rt        j                  | t        d             |j                  | _        t        j                  | |       y r  )r   r   r+   r   r`   ra   r_   r  r   r   r   r]   r   r  s      r   r   zLTTextLineVertical.add  su    c6"t'7'7%%CIIszz(BBFvvf}txx'fSk266tS!r   c                    || j                   z  }|j                  | j                  |z
  | j                  | j                  |z   | j
                  f      }|D cg c]  }t        |t              rqt        |j                   | j                   z
        |k  rLt        |j                  | j                  z
        |k  s%t        |j
                  | j
                  z
        |k  r| c}S c c}w r   )	r`   r  r\   r]   r^   r_   r   r  rn   r  s         r   r  z!LTTextLineVertical.find_neighbors  s    $**zz47719dggtwwqy$''BC# 0s$67		$**,-1(1,(1,	  0 	0 0r  Nr  r$   r   r   r  r    r  r   r  c                       e Zd ZdZd Zd Zy)r   zRepresents a group of text chunks in a rectangular area.

    Note that this box is created by geometric analysis and does not necessarily
    represents a logical boundary of the text. It contains a list of
    LTTextLine objects.
    c                 <    t         j                  |        d| _        y )N)r   r   r   r0   s    r   r   zLTTextBox.__init__  s      &
r   c           	          d| j                   j                  d| j                  dt        | j                        d| j                         d	S r   )rB   r!   r   r	   rL   rC   r0   s    r   r1   zLTTextBox.__repr__  s5    ((Xdii0$--/C 	Dr   Nr3   r$   r   r   r   r     s    
Dr   r   c                       e Zd Zd Zd Zy)LTTextBoxHorizontalc                 j    t         j                  | |       | j                  j                  d        y )Nc                     | j                    S r   )r_   r   s    r   <lambda>z-LTTextBoxHorizontal.analyze.<locals>.<lambda>       r   keyr   r:   r   sortr8   s     r   r:   zLTTextBoxHorizontal.analyze  (    $)

/0r   c                      y)Nzlr-tbr$   r0   s    r   get_writing_modez$LTTextBoxHorizontal.get_writing_mode      r   Nr!   r"   r#   r:   r,  r$   r   r   r   r         
r   r   c                       e Zd Zd Zd Zy)LTTextBoxVerticalc                 j    t         j                  | |       | j                  j                  d        y )Nc                     | j                    S r   )r^   r#  s    r   r$  z+LTTextBoxVertical.analyze.<locals>.<lambda>  r%  r   r&  r(  r8   s     r   r:   zLTTextBoxVertical.analyze  r*  r   c                      y)Nztb-rlr$   r0   s    r   r,  z"LTTextBoxVertical.get_writing_mode  r-  r   Nr.  r$   r   r   r1  r1    r/  r   r1  c                       e Zd Zd Zy)r   c                 P    t         j                  |        | j                  |       y r   )r   r   r   )r   r   s     r   r   zLTTextGroup.__init__  s      &Dr   N)r!   r"   r#   r   r$   r   r   r   r     s    r   r   c                       e Zd Zd Zy)LTTextGroupLRTBc                 p    t         j                  |        | j                  j                  fd       y )Nc                     dj                   z
  | j                  z  dj                   z   | j                  | j                  z   z  z
  S r   )r,   r\   r]   r_   r   r9   s    r   r$  z)LTTextGroupLRTB.analyze.<locals>.<lambda>  sD    h111CFF;h111CFF366MBC r   r&  r   r:   r   r)  r8   s    `r   r:   zLTTextGroupLRTB.analyze  s1    D(+

 C 	D 	r   Nr!   r"   r#   r:   r$   r   r   r8  r8        r   r8  c                       e Zd Zd Zy)LTTextGroupTBRLc                 p    t         j                  |        | j                  j                  fd       y )Nc                     dj                   z    | j                  | j                  z   z  dj                   z
  | j                  z  z
  S r   )r,   r\   r^   r_   r;  s    r   r$  z)LTTextGroupTBRL.analyze.<locals>.<lambda>  sF    x2223SVVCFF]Cx222SVV<= r   r&  r<  r8   s    `r   r:   zLTTextGroupTBRL.analyze  s/    D(+

 = 	> 	r   Nr=  r$   r   r   r@  r@    r>  r   r@  c                   *    e Zd Zd Zd Zd Zd Zd Zy)LTLayoutContainerc                 >    t         j                  | |       d | _        y r   )r   r   groupsrK   s     r   r   zLTLayoutContainer.__init__  s    T4(r   c              #     K   d }d }|D ]E  }|=|j                  |      xr |j                  |      xr t        |j                  |j                        |j                  z  |j                  |      k  xr? |j                  |      t        |j                  |j                        |j                  z  k  }|j                  xr |j                  |      xr |j                  |      xr t        |j                  |j                        |j                  z  |j                  |      k  xr? |j                  |      t        |j                  |j                        |j                  z  k  }|rt        |t              s|r"t        |t               r|j#                  |       n|| d }n|r:|s8t!        |j$                        }|j#                  |       |j#                  |       nh|r:|s8t        |j$                        }|j#                  |       |j#                  |       n,t        |j$                        }|j#                  |       | d }|}H |&t        |j$                        }|j#                  |       | y wr   )r   ru   rm   ra   r(   rz   ro   r   r`   r)   r-   ri   rr   rx   r   r  r  r   r+   )r   r9   r   obj0lineobj1halignvaligns           r   group_objectszLTLayoutContainer.group_objects
  sA     >	D ,,T2 O**40Ot{{DKK88;P;PP==./O  >>$/tzz4::69M9MMN	 * #22 Q,,T2Q**40Q tzz4::69N9NN==./Q
  >>$/t{{DKK88;O;OOP  
41E F
41C DHHTN%JDf1(2F2FG3H4H4HI3H4H4HI"
#D}>	~ <'(<(<=DHHTN
s   I=I?c              #   L  K   t        | j                        }|j                  |       i }|D ]  }|j                  ||j                        }||vr$g }|D ]8  }|j                  |       ||v s|j                  |j                  |             : t        |t              rt               }	n
t               }	t        |      D ]  }
|	j                  |
       |	||
<     t               }|D ]7  }||vr||   }	|	|v r|j                  |	       |	j                         r4|	 9 y wr   )r   rL   r   r  r*   r   popr   r  r   r1  r   r   setrd   )r   r9   linesr  boxesrI  	neighborsmembersrJ  boxr   dones               r   group_textlinesz!LTLayoutContainer.group_textlinesS  s(    dii U 	!D++E83G3GHI9$hG! 4t$5=NN599T?34 $ 45)+')G} ! c
!	! u 	D5 (+Cd{HHSM<<>		 	s   A+D$.B.D$D$c           
          d }fd}g }t        t        |            D ]Y  }||   }t        |dz   t        |            D ]7  }||   }	|j                  d |||	      t        |      t        |	      ||	f       9 [ t	        j
                  |       t        | j                        j                  |       t               }
t        |      dkD  r%t	        j                  |      \  }}}}}}	||
vr||
vr|r& |||	      rt	        j                  |d|||||	f       [t        |t        t        f      st        |	t        t        f      rt        ||	g      }nt        ||	g      }j!                  |       j!                  |	       |
j#                  ||g       D ]7  }t	        j                  |d |||      t        |      t        |      ||f       9 j%                  |       t        |      dkD  r%t'              S )aq  Group textboxes hierarchically.

         Get pair-wise distances, via dist func defined below, and then merge from the closest textbox pair. Once
         obj1 and obj2 are merged / grouped, the resulting group is considered as a new object, and its distances to
         other objects & groups are added to the process queue.

         For performance reason, pair-wise distances and object pair info are maintained in a heap of
         (idx, dist, id(obj1), id(obj2), obj1, obj2) tuples. It ensures quick access to the smallest element. Note that
         since comparison operators, e.g., __lt__, are disabled for LTComponent, id(obj) has to appear before obj in
         element tuples.

        :param laparams: LAParams object.
        :param boxes: All textbox objects to be grouped.
        :return: a list that has only one element, the final top level textbox.
        c                    t        | j                  |j                        }t        | j                  |j                        }t        | j                  |j                        }t        | j
                  |j
                        }||z
  ||z
  z  | j                  | j                  z  z
  |j                  |j                  z  z
  S )a  A distance function between two TextBoxes.

            Consider the bounding rectangle for obj1 and obj2.
            Return its area less the areas of obj1 and obj2,
            shown as 'www' below. This value may be negative.
                    +------+..........+ (x1, y1)
                    | obj1 |wwwwwwwwww:
                    +------+www+------+
                    :wwwwwwwwww| obj2 |
            (x0, y0) +..........+------+
            )rm   r\   r]   r   r^   r_   r`   ra   )rJ  obj2r\   r]   r^   r_   s         r   distz/LTLayoutContainer.group_textboxes.<locals>.dist  s     TWWdgg&BTWWdgg&BTWWdgg&BTWWdgg&BURUOdjj&<<tzz$++?UUVr   c                 f   t        | j                  |j                        }t        | j                  |j                        }t        | j                  |j                        }t        | j
                  |j
                        }t        j                  ||||f            }|j                  | |f      S )z8Check if there's any other object between obj1 and obj2.)	rm   r\   r]   r   r^   r_   rP  r  
difference)rJ  rZ  r\   r]   r^   r_   r   r  s          r   isanyz0LTLayoutContainer.group_textboxes.<locals>.isany  s    TWWdgg&BTWWdgg&BTWWdgg&BTWWdgg&Buzz2r2r"234D??D$<00r   r   Fr   T)ranger   r   idheapqheapifyr   rL   r   rP  heappopheappushr   r1  r@  r8  removeupdater   r   )r   r9   rR  r[  r^  distsirJ  jrZ  rV  
skip_isanyr   id1id2groupotherr  s                    @r   group_textboxesz!LTLayoutContainer.group_textboxesq  s   "	W$	1 s5z" 	+A8D1Q3E
+ +QxeT$%5r$xD"D* ++	+ 	edii Uu%j1n49MM%4H1ZCdD4co%d"3NN54CdD*IJd%6$HI"4*;_)MN+T4L9E+T4L9ET"T"S#J'" kENN55$ue2DbiQSTYQZ\ach*ijk		% % %j1n& E{r   c                 j   t        d |       \  }}|D ]  }|j                  |        |sy t        | j                  ||            }t        d |      \  }}|D ]  }|j                  |        t        | j	                  ||            }d|j
                  k  ry|j
                  dk  rj|rh| j                  ||      | _        t               }| j                  D ]$  }	|	j                  |       |j                  |	       & |j                  d        nd }
|j                  |
       ||z   |z   | _        y )Nc                 "    t        | t              S r   )r   r   r#  s    r   r$  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    :c63J r   c                 "    | j                         S r   )rd   r#  s    r   r$  z+LTLayoutContainer.analyze.<locals>.<lambda>  s    #,,. r   r  r   c                     | j                   S r   r   rU  s    r   r$  z+LTLayoutContainer.analyze.<locals>.<lambda>  s
    399 r   r&  c                     t        | t              rd| j                   | j                  fS d| j                  | j                  fS )Nr   r   )r   r1  r^   r]   r\   rt  s    r   getkeyz)LTLayoutContainer.analyze.<locals>.getkey  s9    c#45w//svvsvv..r   )r
   r:   r   rM  rW  r,   ro  rF  r   r   r)  r   )r   r9   textobjs	otherobjsr   	textlinesempties	textboxesassignerrm  rv  s              r   r:   zLTLayoutContainer.analyze  s/    !''JD Q9 	"CKK!	"++Hh?@	%&@)L) 	"CKK!	"--h	BC	$$$)<)<)By..xCDK$H $h'U#$ NN4N5/
 NNvN&*W4
r   N)r!   r"   r#   r   rM  rW  ro  r:   r$   r   r   rD  rD    s    FR<KZr   rD  c                   "    e Zd ZdZd Zd Zd Zy)LTFigurezRepresents an area used by PDF Form objects.

    PDF Forms can be used to present figures or pictures by embedding yet
    another PDF document within a page. Note that LTFigure objects can appear
    recursively.
    c           	          || _         | _        |\  }}}}t        fd||f||z   |f|||z   f||z   ||z   ffD              }t        j	                  | |       y )Nc              3   @   K   | ]  \  }}t        ||f        y wr   r   )r   r   qr   s      r   r   z$LTFigure.__init__.<locals>.<genexpr>  s*      Q#a )!Q8 Qs   )r   r   r   rD  r   )r   r   rL   r   r   ywhs      `    r   r   zLTFigure.__init__  st    	Aq! Q)*A1a1ac(QqS!A#J'OQ Q""4.r   c           
          d| j                   j                  d| j                  dt        | j                        dt        | j                        d	S )Nr?   r   r   r   rA   )rB   r!   r   r	   rL   r   r   r0   s    r   r1   zLTFigure.__repr__  s7    (($))$))$j&=? 	@r   c                 J    |j                   sy t        j                  | |       y r   )r.   rD  r:   r8   s     r   r:   zLTFigure.analyze  s!    !!!!$1r   N)r!   r"   r#   r4   r   r1   r:   r$   r   r   r~  r~    s    @
r   r~  c                       e Zd ZdZddZd Zy)LTPagezRepresents an entire page.

    May contain child objects like LTTextBox, LTFigure, LTImage, LTRect,
    LTCurve and LTLine.
    c                 L    t         j                  | |       || _        || _        y r   )rD  r   pageidrotate)r   r  rL   r  s       r   r   zLTPage.__init__  s#    ""4.r   c           	          d| j                   j                  d| j                  dt        | j                        d| j
                  d	S )Nr?   r   r   z rotate=rA   )rB   r!   r  r	   rL   r  r0   s    r   r1   zLTPage.__repr__  s1    (($++$))$dkk3 	4r   Nr    r3   r$   r   r   r  r    s    4r   r  )*ra  loggingutilsr   r   r   r   r	   r
   r   r   r   	getLoggerr!   loggerobjectr   r&   r6   r<   rH   r|   r   r   r   r   r   r   r   r   r   r  r  r   r   r1  r   r8  r@  rD  r~  r  r$   r   r   <module>r     s     #  "     			8	$F  /Xv /XdV 	"V 	"I& IX;k ;$W 	W 	5k 52VV  ;[& ;|+ :	K 	R+V R" "40: 020 02D D&) 	 / k k W Wt  :4 4r   