
    ,hL                         d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZ ddl	m
Z
 d Zd ZddZd Zd	 Zdd
Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z e!dk(  r e         yy)    N)DictListSet   )pymupdfc                 ,    d| z  j                  dd      S )Nz %s K   -)center)xs    R/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pymupdf/__main__.pymycenterr      s    QJr3''    c                     |d   }|d   }|dk(  r| j                  |      S d }t        j                  | |      }t        j                  | |      }	 |j                  |j                  k(  r1|j                  |j                  cxk(  rdk(  rn n|j
                  dk(  s9t        j                  d||fz         t        j                  |       d} ||      S t        j                  |      }|j                  |j                         dx}} ||      S )zReturn image for a given XREF.r   r   c                     | j                   j                  dk7  r| S t        j                  t        j                  |       }|S )N   )
colorspacenr   PixmapcsRGB)pixtpixs     r   getimagezrecoverpix.<locals>.getimage   s2    >>q J~~gmmS1r   z&Warning: unsupported /SMask %i for %i:N)	extract_imager   r   irectalphar   message	set_alphasamples)docitemr   sr   pix1pix2r   s           r   
recoverpixr%      s    QAQAAv  ## >>#q!D>>#q!D
 JJ$**$tzz)FQ)F466UV;@Aq6IJ~
..
CMM$,,D4 C=r   c                 |   t        j                  |       }|j                  s|du rt        j                  d       d}|j
                  s|S |rM|j                  |      }|st        j                  d       |du rt        j                  |dkD  rddz  nd       |S t        j                  d	|j                  z         |S )
z!Open and authenticate a document.Tz$this command supports PDF files onlyzauthentication unsuccessful   zauthenticated as %sowneruserz'%s' requires a password)	r   openis_pdfsysexit
needs_passauthenticater   name)filenamepasswordshowpdfr    rcs         r   	open_filer7   9   s    
,,x
 C::#+78	B>>
h'HH234<OOrAv1G;6R J 	+chh67Jr   c                     t        | j                         D cg c]  }t        |       c}      dz   }| j                         D ]0  \  }}|j	                  |      d|}t        j                  |       2 yc c}w )zPrint a Python dictionary.r   : N)maxkeyslenitemsrjustr   r   )r!   klvmsgs        r   
print_dictrC   L   sb    TYY[)SV)*Q.A

 1''!*a( *s   A8c                    t        j                  d|z         | j                  |      }t        j                  |       | j                  |      rj|j	                         }	 |j                  d      dz   }||   }|j                  d      rd}t        j                  d|z         t        j                  d       t        j                  d       y	# t        $ r d}Y Pw xY w)
zPrint an object given by XREF number.

    Simulate the PDF source in "pretty" format.
    For a stream also print its size.
    z%i 0 objz/Lengthr   z0 Runknownzstream
...%s bytes	endstreamendobjN)r   r   xref_objectxref_is_streamsplitindexendswith	Exception)r    xrefxref_strtempidxsizes         r   
print_xrefrS   T   s     OOJ%&t$HOOH
$~~	**Y'!+C9D}}U#  	-45$OOH	  	D	s   !,C CCc           	      :   t        |dz
        }| j                  d|      j                  dd      } | j                  d      }g }t        |      D ]  \  }}|dz   }|j	                         rOt        |      }	d|	cxk  r|k  rn n|j                  t        |             nt        j                  d||fz         k	 |j                  d      \  }
}t        |
      }
t        |      }d
cxk  r|k  rn ndcxk  r|k  sn t        j                  d||fz         |
k(  r|j                  |
       |
|k  r|t        t        |
|dz               z  }|t        t        |
|dz
  d	            z  }" |S # t        $ r t        j                  d||fz         Y w xY w)
aK  Transform a page / xref specification into a list of integers.

    Args
    ----
        rlist: (str) the specification
        limit: maximum number, i.e. number of pages, number of objects
        what: a string to be used in error messages
    Returns
    -------
        A list of integers representing the specification.
    r   N  ,zbad %s specification at item %ir
   z%bad %s range specification at item %ir'   )strreplacerJ   	enumerate	isdecimalintappendr-   r.   rM   listrange)rlistlimitwhatrU   	rlist_arrout_listseqr!   r   ii1i2s               r   get_listrj   k   s    	EAIAMM#q!))#r2EC IHy) 4	T!G>>D	AA~~D	*:dAYFG	JZZ_FBRBRB R%AOeOHH<ayHI8OOB7U2rAv.//HU2rAvr233H548 O  	JHH<ayHI	Js   ;*E44#FFc                    t        | j                  | j                  d      }t        j                  j                  | j                        dz  }d}|dkD  r|dz  }d}t        |d      }|j                  }t        j                  d| j                  |j                  |j                         dz
  |||d   |d	   fz         |j                  }|d
kD  r1|j                         }t        j                  d||dk7  rdndfz         |j                         }|d
kD  rt        j                  d|z         t        j                          | j                  rNt        j                  t!        d             |j#                         }t%        ||       t        j                          | j                  rGt        j                  t!        d             t'        |j                         t        j                          | j(                  rkt        j                  t!        d             t+        | j(                  |j                         d      }|D ]"  }t%        ||       t        j                          $ | j,                  rt        j                  t!        d             t+        | j,                  |j                  dz         }	|	D ]P  }
|
dz
  }|j/                  |      }t        j                  d|
z         t%        ||       t        j                          R | j0                  rUt        j                  t!        d             t        j                  |j3                                t        j                          |j5                          y )NTi   KBi  MBr   z7'%s', pages: %i, objects: %i, %g %s, %s, encryption: %sformat
encryptionr   z5document contains %i root form fields and is %ssigned   znot rW   z#document contains %i embedded fileszPDF catalogzPDF metadatazobject informationrN   )rc   zpage informationzPage %i:zPDF trailer)r7   inputr3   ospathgetsizeroundmetadatar   r   
page_countxref_lengthis_form_pdfget_sigflagsembfile_countcatalogr   pdf_catalogrS   rC   xrefsrj   pages	page_xreftrailerpdf_trailerclose)argsr    rR   flagmetar   r"   rN   xreflpagelpnos              r   r4   r4      s   
DJJt
4C77??4::&-DDd{q>D<<DOOAJJNNOO!N
		
 	A1uCAF&+,	
 	A1u=ABOO||/0 3}}013<< zz!567S__%6VD 	DsD!OO	 zz!345S^^a%78 	CaA==#DOOJ,-sD!OO	 ||/0)*IIKr   c                    t        | j                  | j                  d      }| j                  }dj	                  |      }| j
                  s|j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  || j                  | j                  | j                          y t#        | j
                  |j$                  dz         }t'        j(                         }|D ]  }|dz
  }|j+                  |||        |j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  || j                  | j                  | j                          |j-                          |j-                          y )NTr5   keepnonezrc4-40zrc4-128zaes-128zaes-256)
garbagedeflateprettycleanasciilinearro   owner_pwuser_pwpermissionsr   	from_pageto_page)r7   rq   r3   ro   rK   r   saveoutputr   compressr   sanitizer   r   r)   r*   
permissionrj   rw   r   r+   
insert_pdfr   )r   r    ro   encryptr   outdocr   r   s           r   r   r      sb   
DJJ4
8CJIOOG ::KKLLMM;;--**;;ZZII 	 	
 	 TZZ!!34E\\^F 7!G#A67 KK{{mmjj{{		OO   IIK
LLN
r   c                 4   | j                   }t        j                         }|D ]  }|j                  d      }t	        |      dkD  r|d   nd}t        |d   |d      }dj                  |dd       }|r,t        dj                  |dd       |j                  dz         }nt        d|j                  dz         }|D ]  }	|j                  ||	dz
  |	dz
          |j                           |j                  | j                  d	d
       |j                          y)z&Join pages from several PDF documents.rX   r   Nr   Tr   r(   r   r   )r   r   )rq   r   r+   rJ   r<   r7   joinrj   rw   r`   r   r   r   r   )
r   doc_listr    src_itemsrc_listr3   srcr   	page_listrg   s
             r   doc_joinr     s    zzH
,,.C >>#&"%h-!"38A;X48!"& (12,!7!9KLIa!!34I 	@ANN3!a%QN?	@		 HHT[[!TH2IIKr   c           	      P   t        | j                  | j                  d      }|j                         s:| j                  r| j                  | j                  k(  rt        j                  d       t        | j                  | j                        }| j                  rt        | j                        n	t               }t        |j                               }|r||k  st        j                  d       n|}|st        j                  d       |t        |j                               z  }|r!t        j                  dt        |      z         |D ]i  }|j                  |      }|j                  |      }|j                  |||d   |d   |d	   
       t!        j"                  d|d|j                  d       k |j%                          | j                  r7| j                  | j                  k7  r|j'                  | j                  d       n|j)                          |j%                          y)z!Copy embedded files between PDFs.Tr   cannot save PDF incrementallyz%not all names are contained in sourceznothing to copyz2following names already exist in receiving PDF: %sr2   	ufilenamedescr2   r   r   zcopied entry 'z' from ''rp   r   N)r7   rq   r3   can_save_incrementallyr   r-   r.   source	pwdsourcer1   setembfile_namesrY   embfile_infoembfile_getembfile_addr   r   r   r   saveIncr)	r   r    r   names	src_names	intersectr!   infobuffs	            r   embedded_copyr     s   
DJJ4
8C%%'KK4;;$**401
DKK
0C"iiC		NSUEC%%'(I	!HH<="#C--/00IEIVW 
J%t$*%;'f 	 	
 	sxxHI
J IIK{{t{{djj0a(IIKr   c                    t        | j                  | j                  d      }|j                         s:| j                  r| j                  | j                  k(  rt        j                  d       	 |j                  | j                         | j                  r| j                  | j                  k(  r|j                          n|j                  | j                  d       |j                          y# t        t        j                  j                  f$ r/}t        j                  d| j                  d|        Y d}~d}~ww xY w)	zDelete an embedded file entry.Tr   r   no such embedded file r9   Nr   r   )r7   rq   r3   r   r   r-   r.   embfile_delr1   
ValueErrorr   mupdfFzErrorBaser   r   r   )r   r    es      r   embedded_delr   E  s    
DJJ4
8C%%'KK4;;$**401>		" ;;$++3a(IIK 112 >)$))b<==>s   .C- -#D?%D::D?c                 j   t        | j                  | j                  d      }	 |j                  | j                        }|j                  | j                        }| j                  r| j                  nd   }t        |d      5 }|j                         ddd       t        j                  d| j                  d	|d
       |j!                          y# t        t        j                  j                  f$ r/}t        j                  d| j                  d|        Y d}~d}~ww xY w# 1 sw Y   xY w)z&Retrieve contents of an embedded file.Tr   r   r9   Nr2   wbzsaved entry 'z' as 'r   )r7   rq   r3   r   r1   r   r   r   r   r   r-   r.   r   r+   writer   r   )r   r    streamdr   r2   r   s          r   embedded_getr   X  s    
DJJ4
8C>+TYY' #kkt{{q}H	h	 VOO$))XFGIIK 112 >)$))b<==> s#   6C D)#D&7%D!!D&)D2c                 .   t        | j                  | j                  d      }|j                         s:| j                  | j                  | j                  k(  rt        j                  d       	 |j                  | j                         t        j                  d| j                  z         t        j                  j                  | j                        r)t        j                  j                  | j                        s"t        j                  d| j                  z         t        | j                  d      5 }|j                         }ddd       | j                  }|}| j                   s|}n| j                   }|j#                  | j                  |||       | j                  r| j                  | j                  k(  r|j%                          n|j'                  | j                  d	
       |j)                          y# t        $ r Y \w xY w# 1 sw Y   xY w)zInsert a new embedded file.Tr   Nr   zentry '%s' already existszno such file '%s'rbr   rp   r   )r7   rq   r3   r   r   r-   r.   r   r1   rM   rr   rs   existsisfiler+   readr   r   r   r   r   r   r    fr   r2   r   r   s          r   embedded_addr   g  sy   
DJJ4
8C%%'t{{djj801		",tyy89 77>>$))$BGGNN499,E$tyy01	dii	 !yyHI99yyOO		6H	   ;;$++3a(IIK)  
 s   .=G; 5H;	HHHc                    t        | j                  | j                  d      }|j                         s:| j                  | j                  | j                  k(  rt        j                  d       	 |j                  | j                         | j                  t        j                  j                  | j                        rYt        j                  j                  | j                        r0t        | j                  d      5 }|j                         }ddd       nd}| j                   r| j                   }nd}| j"                  r| j"                  }n| j                   r| j                   }nd}| j$                  r| j$                  }nd}|j'                  | j                  |||       | j                  | j                  | j                  k(  r|j)                          n|j+                  | j                  d	       |j-                          y# t        $ r& t        j                  d| j                  z         Y w xY w# 1 sw Y   *xY w)
z0Update contents or metadata of an embedded file.Tr   Nr   no such embedded file '%s'r   r   rp   r   )r7   rq   r3   r   r   r-   r.   r   r1   rM   rs   rr   r   r   r+   r   r2   r   r   embfile_updr   r   r   r   s          r   embedded_updr     s   
DJJ4
8C%%'t{{djj801;#
 			GGNN499%GGNN499%$))T" 	aVVXF	 	 }}==~~NN		MM		yyyyOO		6H	   {{dkkTZZ7a(IIKK  ;-		9:;	 	s   .H =H9+H65H69Ic                    t        | j                  | j                  d      }|j                         }| j                  | j                  |vr#t        j                  d| j                  z         nt        j                          t        j                  dt        |      t        |      dkD  rdndfz         t        j                          t        |j                  | j                               t        j                          y|s#t        j                  d	|j                  z         yt        |      dkD  rd
|j                  t        |      fz  }nd|j                  z  }t        j                  |       t        j                          |D ]c  }| j                  st        j                  |       %|j                  |      }t        |j                  |             t        j                          e |j                          y)zList embedded files.Tr   Nr   z!printing 1 of %i embedded file%s:r   r"   rW   z'%s' contains no embedded filesz-'%s' contains the following %i embedded filesz)'%s' contains the following embedded file)r7   rq   r3   r   r1   r-   r.   r   r   r<   rC   r   detailr   )r   r    r   rB   r1   _s         r   embedded_listr     sw   
DJJ4
8CEyy99E!HH1DII=>OOOO3u:c%j1ns"=> OOs''		23OO9CHHDE
5zA~=3u:@VV9CHHDOOCOO {{OOD!T"3##D)* IIKr   c           
         | j                   s!| j                  st        j                  d       t	        | j
                  | j                  d      }| j                  r$t        | j                  |j                  dz         }nt        d|j                  dz         }| j                  s.t        j                  j                  t        j                        }nb| j                  }t        j                  j!                  |      rt        j                  j#                  |      st        j                  d|z         t%               }t%               }|D ]  }| j                   r|j'                  |dz
        }|D ]  }|d   }	|	|vs|j)                  |	       |j+                  |	      \  }
}}}|dk(  s|s<t        j                  j-                  ||
j/                  dd	       d	|	 d
|       }t1        |d      5 }|j3                  |       ddd       d} | j                  s|j5                  |dz
        }|D ]  }|d   }	|	|vs|j)                  |	       t7        ||      }t9        |      t:        u rV|d   }|d   }t        j                  j-                  |d|	|fz        }t1        |d      5 }|j3                  |       ddd       t        j                  j-                  |d|	z        }|j<                  j>                  dk  r|n#tA        jB                  t@        jD                  |      }|jG                  |         | j                   r#tA        jH                  dtK        |      |fz         | j                  r#tA        jH                  dtK        |      |fz         |jM                          y# 1 sw Y   xY w# 1 sw Y   xY w)z)Extract images and / or fonts from a PDF.z"neither fonts nor images requestedTr   r   z"output directory %s does not existr   zn/arV   r
   .r   Nextimagez	img-%i.%sz
img-%i.pngr   zsaved %i fonts to '%s'zsaved %i images to '%s')'fontsimagesr-   r.   r7   rq   r3   r   rj   rw   r`   r   rr   rs   abspathcurdirr   isdirr   get_page_fontsaddextract_fontr   rZ   r+   r   get_page_imagesr%   typedictr   r   r   r   r   r   r   r<   r   )r   r    r   out_dir
font_xrefsimage_xrefsr   itemlistr!   rN   fontnamer   r   bufferoutnameoutfiler   imgdatar$   s                      r   extract_objectsr     s   ::dkk56
DJJ4
8CzzS^^a%78a!+,;;''//")),++w'BGGMM',BHH9GCDJ%K $+::))#'2H  "Awz)NN4(/2/?/?/E,Hc1fe|6  ggllH$4$4S#$>#?qau!MG gt, .f-.!F" ;;**373H  +Aw{*OOD)$S$/CCyD(!%j"%g,"$'',,wtSk8Q"R!'40 3G#MM'23 3 #%'',,wt8K"L  #~~//!3  !(s!C 
 		'*%+%$+L zz0C
OW3MMN{{1S5Ew4OOPIIK;. .3 3s   1N,)N9,N69Oc                     |rdnt        dg      }| j                  d|      }|s|s|j                  |       y |j                  |j                  dd             |j                  |       y )N   
   textflagsutf8surrogatepasserrors)bytesget_textr   encode)	pagetextoutGRIDfontsize
noformfeed
skip_emptyr   eopr   s	            r   page_simpler  !  s`    %5";C==u=-DMM#MM$++f_+=>MM#
r   c                    |rdnt        dg      }| j                  d|      }|g k(  r|s|j                  |       y |j                  d        |D ]'  }	|j                  |	d   j	                  dd	
             ) |j                  |       y )Nr   r   blocksr   c                     | d   | d   fS )Nrp   r    )bs    r   <lambda>z page_blocksort.<locals>.<lambda>4  s    qtQqTl r   keyr   r   r   r   )r  r  r   sortr  )
r  r  r  r  r	  r
  r   r  r  r  s
             r   page_blocksortr  -  s    %5";C]]85]1F|MM#
KK*K+ Cadkk&kABCMM#
r   c                   ! |rdnt        dg      }dt        t           dt        dt        fd}dt        t           dt        fd}	d	t        t           d
t
        j                  f!fd}
dt        dt        fd!d }| j                  d|      d	   } |
||       \  }}}}}|g k(  r|s|j                  |       y  |	||      }|j                  d        i }|D ]:  }|\  }}}} |||      }|j                  |g       }|j                  |       |||<   < t        |j                               }|j                          ||z
  }i }|D ]c  }||   }t        |      }|dk  rd||<   |D cg c]  }|d   	 }}|j                          t!        j"                  |      }||k  r|}|d   ||<   e ||d   |d   z
  z  |t        |      z  z  dz  }|d   }|j                  d       |D ]^  }||k  r|j                  d       ||z  }||k  r |||||   ||         } |j                  | dz   j%                  dd             ||z   }` |j                  |       y c c}w )Nr   r   valuesvaluereturnc                 b    t        j                  | |      }|r| |dz
     S t        d|| fz        )zFind the right row coordinate.

        Args:
            values: (list) y-coordinates of rows.
            value: (int) lookup for this value (y-origin of char).
        Returns:
            y-ccordinate of appropriate line for value.
        r   zLine for %g not found in %s)bisectbisect_rightRuntimeError)r  r  rg   s      r   find_line_indexz$page_layout.<locals>.find_line_index?  s<     .!a%= 8E6?JKKr   rowsc                     t        |       } | j                          | d   g}| dd  D ]  }||d   |z   k\  s|j                  |       ! |S )Nr   r   r'   )r_   r  r^   )r   r  nrowshs       r   curate_rowsz page_layout.<locals>.curate_rowsN  sW    Dz		a	ab 	 AE"I$$Q	  r   r  r  c           
         t               }|j                  j                  }|j                  j                  }|}|}d}g }| D ]4  }	|	d   D ](  }
|
d   dk7  r|
d   \  }}}}|dk  s||j                  j                  kD  r6||z
  }||kD  r|}|
d   D ]  }|d   k  r|d   D ]  }|d   \  }}}}||z
  }|d	   \  }}t	        t        |            }|j                  |       |d
   }||kD  r|dk7  r|}||k  r|}|dk(  r\|g k7  rW|d   \  }}}}||k(  rH|t        d      k7  r ||z         }n$|dk(  rt        d      }n|dk(  rt        d      }n|}||||f|d<   |j                  ||||f         + 7 |||||fS )Nr   linesdir)r   r   bboxspansrR   charsorigincrV   r'      rg     r@     )	r   rectwidthheightr]   ru   r   chrr^   )r  r  r   
page_widthpage_height	rowheightleftrightr*  blocklinex0y0x1y1r2  spanr,  r   cwidthoxoychold_chold_oxold_oy
old_cwidthligr  joinligatures                               r   process_blocksz#page_layout.<locals>.process_blocksW  s   uYY__
ii&&	 )	;Eg (;;&(!%fBB6R$))"2"22bv% &I M ;DF|x/ !'] ;'(yAr1!#b!"8B r^sV"9s#%D 2:$&E!Q;5B;AFr>FFFJ%|#)S[#8*6v{*CC%'3Y*-f+C%'3Y*-f+C*0C-0&&*,Mb	 (b"b&%9:5;;(;)	;T dD%22r   rH  c                     | dk(  rt        d      S | dk(  rt        d      S | dk(  rt        d      S | dk(  rt        d      S | d	k(  rt        d
      S | dk(  rt        d      S | dk(  rt        d      S | S )zReturn ligature character for a given pair / triple of characters.

        Args:
            lig: (str) 2/3 characters, e.g. "ff"
        Returns:
            Ligature, e.g. "ff" -> chr(0xFB00)
        ffr-  fii  fli  ffir.  fflr/  fti  sti  )r3  )rH  s    r   rI  z!page_layout.<locals>.joinligature  s     $;v;D[v;D[v;E\v;E\v;D[v;D[v;
r   c                    d}d}d}d}|t         j                  k  rt        d|z        |D ]  }|\  }	}
}}|
| z
  }
|
|z   }||	k(  r|
|z
  |dz  k  r%|	dk(  r||
z
  |z  dkD  r6|	}|
||z   k  r
||	z  }|}|
}J|	dk(  rPt        |
|z        t	        |      z
  }|
|kD  r|dkD  r|d|z  z  }||	z  }|}|
} |j                         S )a  Produce the text of one output line.

        Args:
            left: (float) left most coordinate used on page
            slot: (float) avg width of one character in any font in use.
            minslot: (float) min width for the characters in this line.
            chars: (list[tuple]) characters of this line.
        Returns:
            text: (str) text string for this line
        rW   r   z%program error: minslot too small = %gg?rV   g?r   )r   EPSILONr  r]   r<   rstrip)r7  slotminslotlcharsr   old_charold_x1rE  r,  charrA  r   r@  r=  deltas                  r   make_textlinez"page_layout.<locals>.make_textline  s$    goo%FPQQ  	A"#D"adBfB 4BK6C<$? s{v5;HFW$$
 s{T	NSY.EF{uqye#DLDFFA 	B {{}r   rawdictr   c                     | d   S )Nr   r  )r,  s    r   r  zpage_layout.<locals>.<lambda>  s
    QqT r   r  r(   r   rp   r   r'   g333333?
r   r   r   )r  r   r]   r   r   r   PagerY   r  r   r  getr^   r_   r;   r<   
statisticsmedianr  )"r  r  r  r  r	  r
  r   r  r  r$  rJ  r]  r  r*  r   r7  r8  r6  r&  r,  r   rB  yrX  r;   rV  minslotsr?   ccountwidths	this_slotrowposr   rI  s"      `                             @r   page_layoutrk  ;  s   %5";CLS	 L# L# L#c( T 23tDz 23 23h# # 43l ]]9E]28<F*8*F'E4ui{MM#tT"D 
JJ>J" E 1b!D"%1b!aa 

DIIK 4<DH  qVA:HQK &'1!A$''%%f-	tDQi  T"XQ/0ID	4IJSPI!WFMM% qjMM% iF qj T4!eAh?td{**6/*JKY MM#' (s   -Ic                    t        | j                  | j                  d      }t        | j                  |j
                  dz         }| j                  }|1t        j                  j                  |j                        \  }}|dz   }t        |d      5 }t        j                  t        j                  z  }| j                  r|t        j                  z  }| j                   r|t        j                  z  }| j"                  r|t        j$                  z  }t&        t(        t*        d}|D ]N  }	||	dz
     }
 || j,                     |
|| j.                  | j0                  | j2                  | j4                  |       P 	 d d d        y # 1 sw Y   y xY w)NFr   r   z.txtr   simpler  layoutr   )r7   rq   r3   rj   r   rw   r   rr   rs   splitextr1   r+   r   TEXT_PRESERVE_LIGATURESTEXT_PRESERVE_WHITESPACEconvert_whitenoligaturesextra_spacesTEXT_INHIBIT_SPACESr  r  rk  modegridr  r	  r
  )r   r    r   r   r2   r   r  r   funcr   r  s              r   gettextrz    sL   
DJJ5
9CTZZ!!34E[[F~gg&&sxx0!F"	fd	 w//'2R2RRW555EW444EW000E!$!

  
	CsQw<DDO		
	  s   C$E??Fc                 X    t        j                  d       t        j                  d       y )NzThis is from PyMuPDF message().zThis is from PyMuPDF log().)r   r   log)r   s    r   	_internalr}  =  s    OO56KK-.r   c                     t        j                  dt        d            } | j                  dd      }|j	                  dt        d      	      }|j                  d
t        d       |j                  dd       |j                  ddd       |j                  ddd       |j                  ddd       |j                  dt        d       |j                  dt        d       |j                  t               |j	                  dt        d      	      }|j                  d
t        d       |j                  dt        d        |j                  dd       |j                  d!d"d#d$%       |j                  d&t        d'       |j                  d(t        d)       |j                  d*t        d+t        d,      d-.       |j                  d/dd0d12       |j                  d3dd0d42       |j                  d5dd0d62       |j                  d7t        d8d9:       |j                  d;dd0d<2       |j                  d=dd0d>2       |j                  dd?       |j                  t               |j	                  d@t        dA      dBC      }|j                  d
dDdEF       |j                  dGdHdIJ       |j                  t               |j	                  dKt        dL      	      }|j                  d
t        d       |j                  dMddN       |j                  dOddP       |j                  dGdQ       |j                  dd       |j                  dt        dR       |j                  t               |j	                  dSt        dT      	      }|j                  d
d       |j                  dUdV       |j                  dWddX       |j                  dd       |j                  t               |j	                  dYt        dZ      	      }|j                  d
d       |j                  dd       |j                  dGd[       |j                  dUdHd\J       |j                  d]dHd^J       |j                  d_d`       |j                  t               |j	                  dat        db      	      }|j                  d
d       |j                  dd       |j                  dGd[       |j                  dUdHdcJ       |j                  t                |j	                  ddt        de      dfC      }|j                  d
d       |j                  dUdHdgJ       |j                  dd       |j                  dGdh       |j                  d]di       |j                  djdk       |j                  dldm       |j                  d_dn       |j                  t"               |j	                  dot        dp      	      }	|	j                  d
t        d       |	j                  dUdHdgJ       |	j                  dd       |	j                  dGdq       |	j                  t$               |j	                  drt        ds      	      }
|
j                  d
t        dt       |
j                  ddu       |
j                  dGdv       |
j                  dwdHdxJ       |
j                  dydz       |
j                  dUdDd{F       |
j                  t&               |j	                  d|t        d}      	      }|j                  d
t        d~       |j                  dd       |j                  dt        ddd.       |j                  dt        dd       |j                  dddd0       |j                  dddd0       |j                  dddd0       |j                  dddd0       |j                  dddd0       |j                  dGd       |j                  dt(        dd       |j                  dt(        dd       |j                  t*               |j	                  dt        d      	      }|j                  t,               | j/                         }t1        |d      s| j3                          y|j5                  |       y)zDefine command configurations.r   zBasic PyMuPDF Functions)progdescriptionSubcommandsz/Enter 'command -h' for subcommand specific help)titlehelpr4   zdisplay PDF information)r  rq   zPDF filename)r   r  z	-passwordr3   )r  z-catalog
store_truezshow PDF catalog)actionr  z-trailerzshow PDF trailerz	-metadatazshow PDF metadataz-xrefsz&show selected objects, format: 1,5-7,Nz-pagesz'show selected pages, format: 1,5-7,50-N)ry  r   z.optimize PDF, or create sub-PDF if pages givenr   zoutput PDF filenamez-encryptionzencryption methodr   r   )r  choicesdefaultz-ownerzowner passwordz-userzuser passwordz-garbagezgarbage collection level   r   )r   r  r  r  z	-compressFzcompress (deflate) output)r  r  r  z-asciizASCII encode binary dataz-linearzformat for fast web displayz-permissionr'   zinteger with permission levels)r   r  r  z	-sanitizezsanitize / clean contentsz-prettyzprettify PDF structurez/output selected pages pages, format: 1,5-7,50-Nr   zjoin PDF documentsz3specify each input as 'filename[,password[,pages]]')r  epilog*zinput filenames)nargsr  z-outputTzoutput filename)requiredr  extractz extract images and fonts to diskz-imageszextract imagesz-fontszextract fontsz-folder to receive output, defaults to currentz-consider these pages only, format: 1,5-7,50-Nz
embed-infozlist embedded filesz-namezif given, report only this onez-detailzdetail informationz	embed-addzadd embedded filez-output PDF filename, incremental save if nonezname of new entryz-pathzpath to data for new entryz-desczdescription of new entryz	embed-delzdelete embedded filezname of entry to deletez	embed-updzupdate embedded filez*except '-name' all parameters are optionalzname of entryz-Output PDF filename, incremental save if nonezpath to new data for entryz	-filenameznew filename to store in entryz
-ufilenamez&new unicode filename to store in entryz!new description to store in entryzembed-extractzextract embedded file to diskz'output filename, default is stored namez
embed-copyz copy embedded files between PDFszPDF to receive embedded fileszpassword of inputz2output PDF, incremental save to 'input' if omittedz-sourcezcopy embedded files from herez
-pwdsourcezpassword of 'source' PDFzrestrict copy to these entriesrz  z(extract text in various formatting modeszinput document filenamezpassword for input documentz-modez-mode: simple, block sort, or layout (default)rm  ro  z select pages, format: 1,5-7,50-Nz1-N)r   r  r  z-noligaturesz*expand ligature characters (default False))r  r  r  z-convert-whitez6convert whitespace characters to white (default False)z-extra-spacesz%fill gaps with spaces (default False)z-noformfeedz-write linefeeds, no formfeeds (default False)z-skip-emptyz+suppress pages with no text (default False)z3store text in this file (default inputfilename.txt)z-gridz+merge lines if closer than this (default 2)r(   z	-fontsizez4only include text with a larger fontsize (default 3)rp   internalzinternal testingry  N)argparseArgumentParserr   add_subparsers
add_parseradd_argumentrY   set_defaultsr4   r]   r`   r   r   r   r   r   r   r   r   r   floatrz  r}  
parse_argshasattr
print_helpry  )parsersubpsps_showps_cleanps_join
ps_extractps_embed_addps_embed_delps_embed_updps_embed_extractps_embed_copy
ps_gettextps_internalr   s                 r   mainr  A  s
   $$67F !!"S " E v8<U3VWGs@:6L?QRL?QR\@STs!I   s!J   d#
 X&VW   H '.A(3HI+J7 K	   (3CD'/B'a   (	   u;U   *	   C2R   (	   ,<T   H   u%
 12D  G
 2CDT8IJh'
 !!x(JK " J G#NCIlAQRH\PG   Kj9s!P   1
 (+@"A  G ~6'GH<>RS:6m,
 ##*=!> $ L gN;k
;G   g;NOg;WXg,FG<0
 ##*@!A $ L gN;k
;G   g;TU<0
 ##34; $ L
 gN;g?Kk
;G   g,HIk0PQC   g,OP<0
 ''X.M%N (  !!'.!I!!'D!O!!+J!?!!A "  !!|!4
 $$(+M"N % M wS7VW{1DEL   D'F   |2LMs!A   M2
 !!x(RS " J G#4MNK.KL<.   /	   9	   E	   4	   <	   :	   B   :	   C	   )
 ""); < # K ),
 D4 		$r   __main__)FT)r  )"r  r  rr   r-   rc  typingr   r   r   rW   r   r   r%   r7   rC   rS   rj   r4   r   r   r   r   r   r   r   r   r   r  r  rk  rz  r}  r  __name__r  r   r   <module>r     s      	 
  " " (!H&.,^<~,^*%P&D/d!H?D	^BB/od	 zF r   